diff --git a/0001-Backport-LoongArch-Add-relax-feature-and-keep-relocations.patch b/0001-Backport-LoongArch-Add-relax-feature-and-keep-relocations.patch deleted file mode 100644 index 9602f6ca123124dad77d8be6e7e77944922ebb99..0000000000000000000000000000000000000000 --- a/0001-Backport-LoongArch-Add-relax-feature-and-keep-relocations.patch +++ /dev/null @@ -1,178 +0,0 @@ -From 6f135b13769c64a6942b4b232a350b6a6207f2b2 Mon Sep 17 00:00:00 2001 -From: Jinyang He -Date: Thu, 16 Nov 2023 11:01:26 +0800 -Subject: [PATCH 02/14] [LoongArch] Add relax feature and keep relocations - (#72191) - -Add relax feature. To support linker relocation, we should make -relocation with a symbol rather than section plus offset, and keep all -relocations with non-abs symbol. - -(cherry picked from commit f5bfc833fcbf17a5876911783d1adaca7028d20c) -Change-Id: Ief38b480016175f2cc9939b74a84d9444559ffd6 ---- - llvm/lib/Target/LoongArch/LoongArch.td | 4 +++ - .../lib/Target/LoongArch/LoongArchSubtarget.h | 2 ++ - .../MCTargetDesc/LoongArchAsmBackend.cpp | 5 +-- - .../MCTargetDesc/LoongArchELFObjectWriter.cpp | 18 ++++++++--- - .../MCTargetDesc/LoongArchMCTargetDesc.h | 2 +- - .../MC/LoongArch/Relocations/relax-attr.s | 32 +++++++++++++++++++ - 6 files changed, 55 insertions(+), 8 deletions(-) - create mode 100644 llvm/test/MC/LoongArch/Relocations/relax-attr.s - -diff --git a/llvm/lib/Target/LoongArch/LoongArch.td b/llvm/lib/Target/LoongArch/LoongArch.td -index 0675caa3b601..75b65fe69f26 100644 ---- a/llvm/lib/Target/LoongArch/LoongArch.td -+++ b/llvm/lib/Target/LoongArch/LoongArch.td -@@ -102,6 +102,10 @@ def FeatureUAL - : SubtargetFeature<"ual", "HasUAL", "true", - "Allow memory accesses to be unaligned">; - -+def FeatureRelax -+ : SubtargetFeature<"relax", "HasLinkerRelax", "true", -+ "Enable Linker relaxation">; -+ - //===----------------------------------------------------------------------===// - // Registers, instruction descriptions ... - //===----------------------------------------------------------------------===// -diff --git a/llvm/lib/Target/LoongArch/LoongArchSubtarget.h b/llvm/lib/Target/LoongArch/LoongArchSubtarget.h -index 0fbe23f2f62d..5c173675cca4 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchSubtarget.h -+++ b/llvm/lib/Target/LoongArch/LoongArchSubtarget.h -@@ -43,6 +43,7 @@ class LoongArchSubtarget : public LoongArchGenSubtargetInfo { - bool HasLaGlobalWithAbs = false; - bool HasLaLocalWithAbs = false; - bool HasUAL = false; -+ bool HasLinkerRelax = false; - unsigned GRLen = 32; - MVT GRLenVT = MVT::i32; - LoongArchABI::ABI TargetABI = LoongArchABI::ABI_Unknown; -@@ -100,6 +101,7 @@ public: - bool hasLaGlobalWithAbs() const { return HasLaGlobalWithAbs; } - bool hasLaLocalWithAbs() const { return HasLaLocalWithAbs; } - bool hasUAL() const { return HasUAL; } -+ bool hasLinkerRelax() const { return HasLinkerRelax; } - MVT getGRLenVT() const { return GRLenVT; } - unsigned getGRLen() const { return GRLen; } - LoongArchABI::ABI getTargetABI() const { return TargetABI; } -diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp -index ecb68ff401e9..aae3e544d326 100644 ---- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp -+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp -@@ -168,7 +168,7 @@ bool LoongArchAsmBackend::shouldForceRelocation(const MCAssembler &Asm, - return true; - switch (Fixup.getTargetKind()) { - default: -- return false; -+ return STI.hasFeature(LoongArch::FeatureRelax); - case FK_Data_1: - case FK_Data_2: - case FK_Data_4: -@@ -193,7 +193,8 @@ bool LoongArchAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count, - - std::unique_ptr - LoongArchAsmBackend::createObjectTargetWriter() const { -- return createLoongArchELFObjectWriter(OSABI, Is64Bit); -+ return createLoongArchELFObjectWriter( -+ OSABI, Is64Bit, STI.hasFeature(LoongArch::FeatureRelax)); - } - - MCAsmBackend *llvm::createLoongArchAsmBackend(const Target &T, -diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp -index a6b9c0652639..e60b9c2cfd97 100644 ---- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp -+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp -@@ -20,19 +20,27 @@ using namespace llvm; - namespace { - class LoongArchELFObjectWriter : public MCELFObjectTargetWriter { - public: -- LoongArchELFObjectWriter(uint8_t OSABI, bool Is64Bit); -+ LoongArchELFObjectWriter(uint8_t OSABI, bool Is64Bit, bool EnableRelax); - - ~LoongArchELFObjectWriter() override; - -+ bool needsRelocateWithSymbol(const MCSymbol &Sym, -+ unsigned Type) const override { -+ return EnableRelax; -+ } -+ - protected: - unsigned getRelocType(MCContext &Ctx, const MCValue &Target, - const MCFixup &Fixup, bool IsPCRel) const override; -+ bool EnableRelax; - }; - } // end namespace - --LoongArchELFObjectWriter::LoongArchELFObjectWriter(uint8_t OSABI, bool Is64Bit) -+LoongArchELFObjectWriter::LoongArchELFObjectWriter(uint8_t OSABI, bool Is64Bit, -+ bool EnableRelax) - : MCELFObjectTargetWriter(Is64Bit, OSABI, ELF::EM_LOONGARCH, -- /*HasRelocationAddend*/ true) {} -+ /*HasRelocationAddend=*/true), -+ EnableRelax(EnableRelax) {} - - LoongArchELFObjectWriter::~LoongArchELFObjectWriter() {} - -@@ -87,6 +95,6 @@ unsigned LoongArchELFObjectWriter::getRelocType(MCContext &Ctx, - } - - std::unique_ptr --llvm::createLoongArchELFObjectWriter(uint8_t OSABI, bool Is64Bit) { -- return std::make_unique(OSABI, Is64Bit); -+llvm::createLoongArchELFObjectWriter(uint8_t OSABI, bool Is64Bit, bool Relax) { -+ return std::make_unique(OSABI, Is64Bit, Relax); - } -diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.h -index ab35a0096c8a..bb05baa9b717 100644 ---- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.h -+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.h -@@ -36,7 +36,7 @@ MCAsmBackend *createLoongArchAsmBackend(const Target &T, - const MCTargetOptions &Options); - - std::unique_ptr --createLoongArchELFObjectWriter(uint8_t OSABI, bool Is64Bit); -+createLoongArchELFObjectWriter(uint8_t OSABI, bool Is64Bit, bool Relax); - - } // end namespace llvm - -diff --git a/llvm/test/MC/LoongArch/Relocations/relax-attr.s b/llvm/test/MC/LoongArch/Relocations/relax-attr.s -new file mode 100644 -index 000000000000..b1e648d850bb ---- /dev/null -+++ b/llvm/test/MC/LoongArch/Relocations/relax-attr.s -@@ -0,0 +1,32 @@ -+# RUN: llvm-mc --filetype=obj --triple=loongarch64 %s -o %t -+# RUN: llvm-readobj -r %t | FileCheck %s -+# RUN: llvm-mc --filetype=obj --triple=loongarch64 -mattr=+relax %s -o %t -+# RUN: llvm-readobj -r %t | FileCheck %s --check-prefix=CHECKR -+ -+# CHECK: Relocations [ -+# CHECK-NEXT: Section ({{.*}}) .rela.data { -+# CHECK-NEXT: 0x0 R_LARCH_64 .text 0x4 -+# CHECK-NEXT: } -+# CHECK-NEXT: ] -+ -+# CHECKR: Relocations [ -+# CHECKR-NEXT: Section ({{.*}}) .rela.text { -+# CHECKR-NEXT: 0x8 R_LARCH_B21 .L1 0x0 -+# CHECKR-NEXT: 0xC R_LARCH_B16 .L1 0x0 -+# CHECKR-NEXT: 0x10 R_LARCH_B26 .L1 0x0 -+# CHECKR-NEXT: } -+# CHECKR-NEXT: Section ({{.*}}) .rela.data { -+# CHECKR-NEXT: 0x0 R_LARCH_64 .L1 0x0 -+# CHECKR-NEXT: } -+# CHECKR-NEXT: ] -+ -+.text -+ nop -+.L1: -+ nop -+ beqz $a0, .L1 -+ blt $a0, $a1, .L1 -+ b .L1 -+ -+.data -+.dword .L1 --- -2.20.1 - diff --git a/0002-Backport-LoongArch-Allow-delayed-decision-for-ADD-SUB-relocations.patch b/0002-Backport-LoongArch-Allow-delayed-decision-for-ADD-SUB-relocations.patch deleted file mode 100644 index 496e2687a327e1df3f4fdbe63e670220e322ba5c..0000000000000000000000000000000000000000 --- a/0002-Backport-LoongArch-Allow-delayed-decision-for-ADD-SUB-relocations.patch +++ /dev/null @@ -1,299 +0,0 @@ -From 77d74b8fa071fa2695c9782e2e63e7b930895b1b Mon Sep 17 00:00:00 2001 -From: Jinyang He -Date: Wed, 20 Dec 2023 10:54:51 +0800 -Subject: [PATCH 03/14] [LoongArch] Allow delayed decision for ADD/SUB - relocations (#72960) - -Refer to RISCV [1], LoongArch also need delayed decision for ADD/SUB -relocations. In handleAddSubRelocations, just return directly if SecA != -SecB, handleFixup usually will finish the the rest of creating PCRel -relocations works. Otherwise we emit relocs depends on whether -relaxation is enabled. If not, we return true and avoid record ADD/SUB -relocations. -Now the two symbols separated by alignment directive will return without -folding symbol offset in AttemptToFoldSymbolOffsetDifference, which has -the same effect when relaxation is enabled. - -[1] https://reviews.llvm.org/D155357 - -(cherry picked from commit a8081ed8ff0fd11fb8d5f4c83df49da909e49612) -Change-Id: Ic4c6a3eb11b576cb0c6ed0eba02150ad67c33cf2 ---- - llvm/lib/MC/MCExpr.cpp | 3 +- - .../MCTargetDesc/LoongArchAsmBackend.cpp | 78 +++++++++++++++++++ - .../MCTargetDesc/LoongArchAsmBackend.h | 9 ++- - .../MCTargetDesc/LoongArchFixupKinds.h | 4 +- - llvm/test/MC/LoongArch/Misc/subsection.s | 38 +++++++++ - .../MC/LoongArch/Relocations/relax-addsub.s | 68 ++++++++++++++++ - 6 files changed, 196 insertions(+), 4 deletions(-) - create mode 100644 llvm/test/MC/LoongArch/Misc/subsection.s - create mode 100644 llvm/test/MC/LoongArch/Relocations/relax-addsub.s - -diff --git a/llvm/lib/MC/MCExpr.cpp b/llvm/lib/MC/MCExpr.cpp -index a7b980553af0..5a6596f93824 100644 ---- a/llvm/lib/MC/MCExpr.cpp -+++ b/llvm/lib/MC/MCExpr.cpp -@@ -635,7 +635,8 @@ static void AttemptToFoldSymbolOffsetDifference( - // instructions and InSet is false (not expressions in directive like - // .size/.fill), disable the fast path. - if (Layout && (InSet || !SecA.hasInstructions() || -- !Asm->getContext().getTargetTriple().isRISCV())) { -+ !(Asm->getContext().getTargetTriple().isRISCV() || -+ Asm->getContext().getTargetTriple().isLoongArch()))) { - // If both symbols are in the same fragment, return the difference of their - // offsets. canGetFragmentOffset(FA) may be false. - if (FA == FB && !SA.isVariable() && !SB.isVariable()) { -diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp -index aae3e544d326..1ed047a8e632 100644 ---- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp -+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp -@@ -177,6 +177,34 @@ bool LoongArchAsmBackend::shouldForceRelocation(const MCAssembler &Asm, - } - } - -+static inline std::pair -+getRelocPairForSize(unsigned Size) { -+ switch (Size) { -+ default: -+ llvm_unreachable("unsupported fixup size"); -+ case 6: -+ return std::make_pair( -+ MCFixupKind(FirstLiteralRelocationKind + ELF::R_LARCH_ADD6), -+ MCFixupKind(FirstLiteralRelocationKind + ELF::R_LARCH_SUB6)); -+ case 8: -+ return std::make_pair( -+ MCFixupKind(FirstLiteralRelocationKind + ELF::R_LARCH_ADD8), -+ MCFixupKind(FirstLiteralRelocationKind + ELF::R_LARCH_SUB8)); -+ case 16: -+ return std::make_pair( -+ MCFixupKind(FirstLiteralRelocationKind + ELF::R_LARCH_ADD16), -+ MCFixupKind(FirstLiteralRelocationKind + ELF::R_LARCH_SUB16)); -+ case 32: -+ return std::make_pair( -+ MCFixupKind(FirstLiteralRelocationKind + ELF::R_LARCH_ADD32), -+ MCFixupKind(FirstLiteralRelocationKind + ELF::R_LARCH_SUB32)); -+ case 64: -+ return std::make_pair( -+ MCFixupKind(FirstLiteralRelocationKind + ELF::R_LARCH_ADD64), -+ MCFixupKind(FirstLiteralRelocationKind + ELF::R_LARCH_SUB64)); -+ } -+} -+ - bool LoongArchAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count, - const MCSubtargetInfo *STI) const { - // We mostly follow binutils' convention here: align to 4-byte boundary with a -@@ -191,6 +219,56 @@ bool LoongArchAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count, - return true; - } - -+bool LoongArchAsmBackend::handleAddSubRelocations(const MCAsmLayout &Layout, -+ const MCFragment &F, -+ const MCFixup &Fixup, -+ const MCValue &Target, -+ uint64_t &FixedValue) const { -+ std::pair FK; -+ uint64_t FixedValueA, FixedValueB; -+ const MCSection &SecA = Target.getSymA()->getSymbol().getSection(); -+ const MCSection &SecB = Target.getSymB()->getSymbol().getSection(); -+ -+ // We need record relocation if SecA != SecB. Usually SecB is same as the -+ // section of Fixup, which will be record the relocation as PCRel. If SecB -+ // is not same as the section of Fixup, it will report error. Just return -+ // false and then this work can be finished by handleFixup. -+ if (&SecA != &SecB) -+ return false; -+ -+ // In SecA == SecB case. If the linker relaxation is enabled, we need record -+ // the ADD, SUB relocations. Otherwise the FixedValue has already been -+ // calculated out in evaluateFixup, return true and avoid record relocations. -+ if (!STI.hasFeature(LoongArch::FeatureRelax)) -+ return true; -+ -+ switch (Fixup.getKind()) { -+ case llvm::FK_Data_1: -+ FK = getRelocPairForSize(8); -+ break; -+ case llvm::FK_Data_2: -+ FK = getRelocPairForSize(16); -+ break; -+ case llvm::FK_Data_4: -+ FK = getRelocPairForSize(32); -+ break; -+ case llvm::FK_Data_8: -+ FK = getRelocPairForSize(64); -+ break; -+ default: -+ llvm_unreachable("unsupported fixup size"); -+ } -+ MCValue A = MCValue::get(Target.getSymA(), nullptr, Target.getConstant()); -+ MCValue B = MCValue::get(Target.getSymB()); -+ auto FA = MCFixup::create(Fixup.getOffset(), nullptr, std::get<0>(FK)); -+ auto FB = MCFixup::create(Fixup.getOffset(), nullptr, std::get<1>(FK)); -+ auto &Asm = Layout.getAssembler(); -+ Asm.getWriter().recordRelocation(Asm, Layout, &F, FA, A, FixedValueA); -+ Asm.getWriter().recordRelocation(Asm, Layout, &F, FB, B, FixedValueB); -+ FixedValue = FixedValueA - FixedValueB; -+ return true; -+} -+ - std::unique_ptr - LoongArchAsmBackend::createObjectTargetWriter() const { - return createLoongArchELFObjectWriter( -diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h -index ae9bb8af0419..20f25b5cf53b 100644 ---- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h -+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h -@@ -31,10 +31,15 @@ class LoongArchAsmBackend : public MCAsmBackend { - public: - LoongArchAsmBackend(const MCSubtargetInfo &STI, uint8_t OSABI, bool Is64Bit, - const MCTargetOptions &Options) -- : MCAsmBackend(support::little), STI(STI), OSABI(OSABI), Is64Bit(Is64Bit), -- TargetOptions(Options) {} -+ : MCAsmBackend(support::little, -+ LoongArch::fixup_loongarch_relax), -+ STI(STI), OSABI(OSABI), Is64Bit(Is64Bit), TargetOptions(Options) {} - ~LoongArchAsmBackend() override {} - -+ bool handleAddSubRelocations(const MCAsmLayout &Layout, const MCFragment &F, -+ const MCFixup &Fixup, const MCValue &Target, -+ uint64_t &FixedValue) const override; -+ - void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, - const MCValue &Target, MutableArrayRef Data, - uint64_t Value, bool IsResolved, -diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchFixupKinds.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchFixupKinds.h -index ba2d6718cdf9..178fa6e5262b 100644 ---- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchFixupKinds.h -+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchFixupKinds.h -@@ -106,7 +106,9 @@ enum Fixups { - // 20-bit fixup corresponding to %gd_pc_hi20(foo) for instruction pcalau12i. - fixup_loongarch_tls_gd_pc_hi20, - // 20-bit fixup corresponding to %gd_hi20(foo) for instruction lu12i.w. -- fixup_loongarch_tls_gd_hi20 -+ fixup_loongarch_tls_gd_hi20, -+ // Generate an R_LARCH_RELAX which indicates the linker may relax here. -+ fixup_loongarch_relax = FirstLiteralRelocationKind + ELF::R_LARCH_RELAX - }; - } // end namespace LoongArch - } // end namespace llvm -diff --git a/llvm/test/MC/LoongArch/Misc/subsection.s b/llvm/test/MC/LoongArch/Misc/subsection.s -new file mode 100644 -index 000000000000..0bd22b474536 ---- /dev/null -+++ b/llvm/test/MC/LoongArch/Misc/subsection.s -@@ -0,0 +1,38 @@ -+# RUN: not llvm-mc --filetype=obj --triple=loongarch64 --mattr=-relax %s -o /dev/null 2>&1 | FileCheck %s --check-prefixes=ERR,NORELAX --implicit-check-not=error: -+## TODO: not llvm-mc --filetype=obj --triple=loongarch64 --mattr=+relax %s -o /dev/null 2>&1 | FileCheck %s --check-prefixes=ERR,RELAX --implicit-check-not=error: -+ -+a: -+ nop -+b: -+ la.pcrel $t0, a -+c: -+ nop -+d: -+ -+.data -+## Positive subsection numbers -+## With relaxation, report an error as c-b is not an assemble-time constant. -+# RELAX: :[[#@LINE+1]]:14: error: cannot evaluate subsection number -+.subsection c-b -+# RELAX: :[[#@LINE+1]]:14: error: cannot evaluate subsection number -+.subsection d-b -+# RELAX: :[[#@LINE+1]]:14: error: cannot evaluate subsection number -+.subsection c-a -+ -+.subsection b-a -+.subsection d-c -+ -+## Negative subsection numbers -+# NORELAX: :[[#@LINE+2]]:14: error: subsection number -8 is not within [0,2147483647] -+# RELAX: :[[#@LINE+1]]:14: error: cannot evaluate subsection number -+.subsection b-c -+# NORELAX: :[[#@LINE+2]]:14: error: subsection number -12 is not within [0,2147483647] -+# RELAX: :[[#@LINE+1]]:14: error: cannot evaluate subsection number -+.subsection b-d -+# NORELAX: :[[#@LINE+2]]:14: error: subsection number -12 is not within [0,2147483647] -+# RELAX: :[[#@LINE+1]]:14: error: cannot evaluate subsection number -+.subsection a-c -+# ERR: :[[#@LINE+1]]:14: error: subsection number -4 is not within [0,2147483647] -+.subsection a-b -+# ERR: :[[#@LINE+1]]:14: error: subsection number -4 is not within [0,2147483647] -+.subsection c-d -diff --git a/llvm/test/MC/LoongArch/Relocations/relax-addsub.s b/llvm/test/MC/LoongArch/Relocations/relax-addsub.s -new file mode 100644 -index 000000000000..532eb4e0561a ---- /dev/null -+++ b/llvm/test/MC/LoongArch/Relocations/relax-addsub.s -@@ -0,0 +1,68 @@ -+# RUN: llvm-mc --filetype=obj --triple=loongarch64 --mattr=-relax %s \ -+# RUN: | llvm-readobj -r -x .data - | FileCheck %s --check-prefix=NORELAX -+# RUN: llvm-mc --filetype=obj --triple=loongarch64 --mattr=+relax %s \ -+# RUN: | llvm-readobj -r -x .data - | FileCheck %s --check-prefix=RELAX -+ -+# NORELAX: Relocations [ -+# NORELAX-NEXT: Section ({{.*}}) .rela.text { -+# NORELAX-NEXT: 0x10 R_LARCH_PCALA_HI20 .text 0x0 -+# NORELAX-NEXT: 0x14 R_LARCH_PCALA_LO12 .text 0x0 -+# NORELAX-NEXT: } -+# NORELAX-NEXT: ] -+ -+# NORELAX: Hex dump of section '.data': -+# NORELAX-NEXT: 0x00000000 04040004 00000004 00000000 0000000c -+# NORELAX-NEXT: 0x00000010 0c000c00 00000c00 00000000 00000808 -+# NORELAX-NEXT: 0x00000020 00080000 00080000 00000000 00 -+ -+# RELAX: Relocations [ -+# RELAX-NEXT: Section ({{.*}}) .rela.text { -+# RELAX-NEXT: 0x10 R_LARCH_PCALA_HI20 .L1 0x0 -+# RELAX-NEXT: 0x14 R_LARCH_PCALA_LO12 .L1 0x0 -+# RELAX-NEXT: } -+# RELAX-NEXT: Section ({{.*}}) .rela.data { -+# RELAX-NEXT: 0xF R_LARCH_ADD8 .L3 0x0 -+# RELAX-NEXT: 0xF R_LARCH_SUB8 .L2 0x0 -+# RELAX-NEXT: 0x10 R_LARCH_ADD16 .L3 0x0 -+# RELAX-NEXT: 0x10 R_LARCH_SUB16 .L2 0x0 -+# RELAX-NEXT: 0x12 R_LARCH_ADD32 .L3 0x0 -+# RELAX-NEXT: 0x12 R_LARCH_SUB32 .L2 0x0 -+# RELAX-NEXT: 0x16 R_LARCH_ADD64 .L3 0x0 -+# RELAX-NEXT: 0x16 R_LARCH_SUB64 .L2 0x0 -+# RELAX-NEXT: } -+# RELAX-NEXT: ] -+ -+# RELAX: Hex dump of section '.data': -+# RELAX-NEXT: 0x00000000 04040004 00000004 00000000 00000000 -+# RELAX-NEXT: 0x00000010 00000000 00000000 00000000 00000808 -+# RELAX-NEXT: 0x00000020 00080000 00080000 00000000 00 -+ -+.text -+.L1: -+ nop -+.L2: -+ .align 4 -+.L3: -+ la.pcrel $t0, .L1 -+.L4: -+ ret -+ -+.data -+## Not emit relocs -+.byte .L2 - .L1 -+.short .L2 - .L1 -+.word .L2 - .L1 -+.dword .L2 - .L1 -+## With relaxation, emit relocs because of the .align making the diff variable. -+## TODO Handle alignment directive. Why they emit relocs now? They returns -+## without folding symbols offset in AttemptToFoldSymbolOffsetDifference(). -+.byte .L3 - .L2 -+.short .L3 - .L2 -+.word .L3 - .L2 -+.dword .L3 - .L2 -+## TODO -+## With relaxation, emit relocs because la.pcrel is a linker-relaxable inst. -+.byte .L4 - .L3 -+.short .L4 - .L3 -+.word .L4 - .L3 -+.dword .L4 - .L3 --- -2.20.1 - diff --git a/0003-Backport-LoongArch-Emit-R_LARCH_RELAX-when-expanding-some-LoadAddress.patch b/0003-Backport-LoongArch-Emit-R_LARCH_RELAX-when-expanding-some-LoadAddress.patch deleted file mode 100644 index 93a2174cd4aa11dfd02c98487cafd8ab7ffac5f2..0000000000000000000000000000000000000000 --- a/0003-Backport-LoongArch-Emit-R_LARCH_RELAX-when-expanding-some-LoadAddress.patch +++ /dev/null @@ -1,364 +0,0 @@ -From f2495d7efb79fdc82af6147f7201d9cf3c91beba Mon Sep 17 00:00:00 2001 -From: Jinyang He -Date: Wed, 27 Dec 2023 08:51:48 +0800 -Subject: [PATCH 04/14] [LoongArch] Emit R_LARCH_RELAX when expanding some - LoadAddress (#72961) - -Emit relax relocs when expand non-large la.pcrel and non-large la.got on -llvm-mc stage, which like what does on GAS. -1, la.pcrel -> PCALA_HI20 + RELAX + PCALA_LO12 + RELAX -2, la.got -> GOT_PC_HI20 + RELAX + GOT_PC_LO12 + RELAX - -(cherry picked from commit b3ef8dce9811b2725639b0d4fac3f85c7e112817) -Change-Id: I222daf60b36ee70e23c76b753e1d2a3b8148f44b ---- - .../AsmParser/LoongArchAsmParser.cpp | 12 +-- - .../MCTargetDesc/LoongArchMCCodeEmitter.cpp | 13 +++ - .../MCTargetDesc/LoongArchMCExpr.cpp | 7 +- - .../LoongArch/MCTargetDesc/LoongArchMCExpr.h | 8 +- - llvm/test/MC/LoongArch/Macros/macros-la.s | 84 ++++++++++++++++--- - llvm/test/MC/LoongArch/Misc/subsection.s | 2 +- - .../MC/LoongArch/Relocations/relax-addsub.s | 16 +++- - 7 files changed, 115 insertions(+), 27 deletions(-) - -diff --git a/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp b/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp -index 94d530306536..a132e645c864 100644 ---- a/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp -+++ b/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp -@@ -86,7 +86,7 @@ class LoongArchAsmParser : public MCTargetAsmParser { - // "emitLoadAddress*" functions. - void emitLAInstSeq(MCRegister DestReg, MCRegister TmpReg, - const MCExpr *Symbol, SmallVectorImpl &Insts, -- SMLoc IDLoc, MCStreamer &Out); -+ SMLoc IDLoc, MCStreamer &Out, bool RelaxHint = false); - - // Helper to emit pseudo instruction "la.abs $rd, sym". - void emitLoadAddressAbs(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out); -@@ -749,12 +749,14 @@ bool LoongArchAsmParser::ParseInstruction(ParseInstructionInfo &Info, - void LoongArchAsmParser::emitLAInstSeq(MCRegister DestReg, MCRegister TmpReg, - const MCExpr *Symbol, - SmallVectorImpl &Insts, -- SMLoc IDLoc, MCStreamer &Out) { -+ SMLoc IDLoc, MCStreamer &Out, -+ bool RelaxHint) { - MCContext &Ctx = getContext(); - for (LoongArchAsmParser::Inst &Inst : Insts) { - unsigned Opc = Inst.Opc; - LoongArchMCExpr::VariantKind VK = Inst.VK; -- const LoongArchMCExpr *LE = LoongArchMCExpr::create(Symbol, VK, Ctx); -+ const LoongArchMCExpr *LE = -+ LoongArchMCExpr::create(Symbol, VK, Ctx, RelaxHint); - switch (Opc) { - default: - llvm_unreachable("unexpected opcode"); -@@ -855,7 +857,7 @@ void LoongArchAsmParser::emitLoadAddressPcrel(MCInst &Inst, SMLoc IDLoc, - Insts.push_back( - LoongArchAsmParser::Inst(ADDI, LoongArchMCExpr::VK_LoongArch_PCALA_LO12)); - -- emitLAInstSeq(DestReg, DestReg, Symbol, Insts, IDLoc, Out); -+ emitLAInstSeq(DestReg, DestReg, Symbol, Insts, IDLoc, Out, true); - } - - void LoongArchAsmParser::emitLoadAddressPcrelLarge(MCInst &Inst, SMLoc IDLoc, -@@ -901,7 +903,7 @@ void LoongArchAsmParser::emitLoadAddressGot(MCInst &Inst, SMLoc IDLoc, - Insts.push_back( - LoongArchAsmParser::Inst(LD, LoongArchMCExpr::VK_LoongArch_GOT_PC_LO12)); - -- emitLAInstSeq(DestReg, DestReg, Symbol, Insts, IDLoc, Out); -+ emitLAInstSeq(DestReg, DestReg, Symbol, Insts, IDLoc, Out, true); - } - - void LoongArchAsmParser::emitLoadAddressGotLarge(MCInst &Inst, SMLoc IDLoc, -diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp -index 03fb9e008ae9..08c0820cb862 100644 ---- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp -+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp -@@ -19,6 +19,7 @@ - #include "llvm/MC/MCInstBuilder.h" - #include "llvm/MC/MCInstrInfo.h" - #include "llvm/MC/MCRegisterInfo.h" -+#include "llvm/MC/MCSubtargetInfo.h" - #include "llvm/Support/Casting.h" - #include "llvm/Support/EndianStream.h" - -@@ -120,12 +121,15 @@ LoongArchMCCodeEmitter::getExprOpValue(const MCInst &MI, const MCOperand &MO, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { - assert(MO.isExpr() && "getExprOpValue expects only expressions"); -+ bool RelaxCandidate = false; -+ bool EnableRelax = STI.hasFeature(LoongArch::FeatureRelax); - const MCExpr *Expr = MO.getExpr(); - MCExpr::ExprKind Kind = Expr->getKind(); - LoongArch::Fixups FixupKind = LoongArch::fixup_loongarch_invalid; - if (Kind == MCExpr::Target) { - const LoongArchMCExpr *LAExpr = cast(Expr); - -+ RelaxCandidate = LAExpr->getRelaxHint(); - switch (LAExpr->getKind()) { - case LoongArchMCExpr::VK_LoongArch_None: - case LoongArchMCExpr::VK_LoongArch_Invalid: -@@ -269,6 +273,15 @@ LoongArchMCCodeEmitter::getExprOpValue(const MCInst &MI, const MCOperand &MO, - - Fixups.push_back( - MCFixup::create(0, Expr, MCFixupKind(FixupKind), MI.getLoc())); -+ -+ // Emit an R_LARCH_RELAX if linker relaxation is enabled and LAExpr has relax -+ // hint. -+ if (EnableRelax && RelaxCandidate) { -+ const MCConstantExpr *Dummy = MCConstantExpr::create(0, Ctx); -+ Fixups.push_back(MCFixup::create( -+ 0, Dummy, MCFixupKind(LoongArch::fixup_loongarch_relax), MI.getLoc())); -+ } -+ - return 0; - } - -diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.cpp -index 993111552a31..82c992b1cc8c 100644 ---- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.cpp -+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.cpp -@@ -25,9 +25,10 @@ using namespace llvm; - - #define DEBUG_TYPE "loongarch-mcexpr" - --const LoongArchMCExpr * --LoongArchMCExpr::create(const MCExpr *Expr, VariantKind Kind, MCContext &Ctx) { -- return new (Ctx) LoongArchMCExpr(Expr, Kind); -+const LoongArchMCExpr *LoongArchMCExpr::create(const MCExpr *Expr, -+ VariantKind Kind, MCContext &Ctx, -+ bool Hint) { -+ return new (Ctx) LoongArchMCExpr(Expr, Kind, Hint); - } - - void LoongArchMCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const { -diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.h -index 0945cf82db86..93251f824103 100644 ---- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.h -+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.h -@@ -67,16 +67,18 @@ public: - private: - const MCExpr *Expr; - const VariantKind Kind; -+ const bool RelaxHint; - -- explicit LoongArchMCExpr(const MCExpr *Expr, VariantKind Kind) -- : Expr(Expr), Kind(Kind) {} -+ explicit LoongArchMCExpr(const MCExpr *Expr, VariantKind Kind, bool Hint) -+ : Expr(Expr), Kind(Kind), RelaxHint(Hint) {} - - public: - static const LoongArchMCExpr *create(const MCExpr *Expr, VariantKind Kind, -- MCContext &Ctx); -+ MCContext &Ctx, bool Hint = false); - - VariantKind getKind() const { return Kind; } - const MCExpr *getSubExpr() const { return Expr; } -+ bool getRelaxHint() const { return RelaxHint; } - - void printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const override; - bool evaluateAsRelocatableImpl(MCValue &Res, const MCAsmLayout *Layout, -diff --git a/llvm/test/MC/LoongArch/Macros/macros-la.s b/llvm/test/MC/LoongArch/Macros/macros-la.s -index 924e4326b8e5..1a1d12d7d7df 100644 ---- a/llvm/test/MC/LoongArch/Macros/macros-la.s -+++ b/llvm/test/MC/LoongArch/Macros/macros-la.s -@@ -1,66 +1,128 @@ - # RUN: llvm-mc --triple=loongarch64 %s | FileCheck %s -+# RUN: llvm-mc --filetype=obj --triple=loongarch64 --mattr=-relax %s -o %t -+# RUN: llvm-readobj -r %t | FileCheck %s --check-prefix=RELOC -+# RUN: llvm-mc --filetype=obj --triple=loongarch64 --mattr=+relax %s -o %t.relax -+# RUN: llvm-readobj -r %t.relax | FileCheck %s --check-prefixes=RELOC,RELAX -+ -+# RELOC: Relocations [ -+# RELOC-NEXT: Section ({{.*}}) .rela.text { - - la.abs $a0, sym_abs - # CHECK: lu12i.w $a0, %abs_hi20(sym_abs) - # CHECK-NEXT: ori $a0, $a0, %abs_lo12(sym_abs) - # CHECK-NEXT: lu32i.d $a0, %abs64_lo20(sym_abs) - # CHECK-NEXT: lu52i.d $a0, $a0, %abs64_hi12(sym_abs) -+# CHECK-EMPTY: -+# RELOC-NEXT: R_LARCH_ABS_HI20 sym_abs 0x0 -+# RELOC-NEXT: R_LARCH_ABS_LO12 sym_abs 0x0 -+# RELOC-NEXT: R_LARCH_ABS64_LO20 sym_abs 0x0 -+# RELOC-NEXT: R_LARCH_ABS64_HI12 sym_abs 0x0 - - la.pcrel $a0, sym_pcrel --# CHECK: pcalau12i $a0, %pc_hi20(sym_pcrel) -+# CHECK-NEXT: pcalau12i $a0, %pc_hi20(sym_pcrel) - # CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(sym_pcrel) -+# CHECK-EMPTY: -+# RELOC-NEXT: R_LARCH_PCALA_HI20 sym_pcrel 0x0 -+# RELAX-NEXT: R_LARCH_RELAX - 0x0 -+# RELOC-NEXT: R_LARCH_PCALA_LO12 sym_pcrel 0x0 -+# RELAX-NEXT: R_LARCH_RELAX - 0x0 - - la.pcrel $a0, $a1, sym_pcrel_large --# CHECK: pcalau12i $a0, %pc_hi20(sym_pcrel_large) -+# CHECK-NEXT: pcalau12i $a0, %pc_hi20(sym_pcrel_large) - # CHECK-NEXT: addi.d $a1, $zero, %pc_lo12(sym_pcrel_large) - # CHECK-NEXT: lu32i.d $a1, %pc64_lo20(sym_pcrel_large) - # CHECK-NEXT: lu52i.d $a1, $a1, %pc64_hi12(sym_pcrel_large) - # CHECK-NEXT: add.d $a0, $a0, $a1 -+# CHECK-EMPTY: -+# RELOC-NEXT: R_LARCH_PCALA_HI20 sym_pcrel_large 0x0 -+# RELOC-NEXT: R_LARCH_PCALA_LO12 sym_pcrel_large 0x0 -+# RELOC-NEXT: R_LARCH_PCALA64_LO20 sym_pcrel_large 0x0 -+# RELOC-NEXT: R_LARCH_PCALA64_HI12 sym_pcrel_large 0x0 - - la.got $a0, sym_got --# CHECK: pcalau12i $a0, %got_pc_hi20(sym_got) -+# CHECK-NEXT: pcalau12i $a0, %got_pc_hi20(sym_got) - # CHECK-NEXT: ld.d $a0, $a0, %got_pc_lo12(sym_got) -+# CHECK-EMPTY: -+# RELOC-NEXT: R_LARCH_GOT_PC_HI20 sym_got 0x0 -+# RELAX-NEXT: R_LARCH_RELAX - 0x0 -+# RELOC-NEXT: R_LARCH_GOT_PC_LO12 sym_got 0x0 -+# RELAX-NEXT: R_LARCH_RELAX - 0x0 - - la.got $a0, $a1, sym_got_large --# CHECK: pcalau12i $a0, %got_pc_hi20(sym_got_large) -+# CHECK-NEXT: pcalau12i $a0, %got_pc_hi20(sym_got_large) - # CHECK-NEXT: addi.d $a1, $zero, %got_pc_lo12(sym_got_large) - # CHECK-NEXT: lu32i.d $a1, %got64_pc_lo20(sym_got_large) - # CHECK-NEXT: lu52i.d $a1, $a1, %got64_pc_hi12(sym_got_large) - # CHECK-NEXT: ldx.d $a0, $a0, $a1 -+# CHECK-EMPTY: -+# RELOC-NEXT: R_LARCH_GOT_PC_HI20 sym_got_large 0x0 -+# RELOC-NEXT: R_LARCH_GOT_PC_LO12 sym_got_large 0x0 -+# RELOC-NEXT: R_LARCH_GOT64_PC_LO20 sym_got_large 0x0 -+# RELOC-NEXT: R_LARCH_GOT64_PC_HI12 sym_got_large 0x0 - - la.tls.le $a0, sym_le --# CHECK: lu12i.w $a0, %le_hi20(sym_le) -+# CHECK-NEXT: lu12i.w $a0, %le_hi20(sym_le) - # CHECK-NEXT: ori $a0, $a0, %le_lo12(sym_le) -+# CHECK-EMPTY: -+# RELOC-NEXT: R_LARCH_TLS_LE_HI20 sym_le 0x0 -+# RELOC-NEXT: R_LARCH_TLS_LE_LO12 sym_le 0x0 - - la.tls.ie $a0, sym_ie --# CHECK: pcalau12i $a0, %ie_pc_hi20(sym_ie) -+# CHECK-NEXT: pcalau12i $a0, %ie_pc_hi20(sym_ie) - # CHECK-NEXT: ld.d $a0, $a0, %ie_pc_lo12(sym_ie) -+# CHECK-EMPTY: -+# RELOC-NEXT: R_LARCH_TLS_IE_PC_HI20 sym_ie 0x0 -+# RELOC-NEXT: R_LARCH_TLS_IE_PC_LO12 sym_ie 0x0 - - la.tls.ie $a0, $a1, sym_ie_large --# CHECK: pcalau12i $a0, %ie_pc_hi20(sym_ie_large) -+# CHECK-NEXT: pcalau12i $a0, %ie_pc_hi20(sym_ie_large) - # CHECK-NEXT: addi.d $a1, $zero, %ie_pc_lo12(sym_ie_large) - # CHECK-NEXT: lu32i.d $a1, %ie64_pc_lo20(sym_ie_large) - # CHECK-NEXT: lu52i.d $a1, $a1, %ie64_pc_hi12(sym_ie_large) - # CHECK-NEXT: ldx.d $a0, $a0, $a1 -+# CHECK-EMPTY: -+# RELOC-NEXT: R_LARCH_TLS_IE_PC_HI20 sym_ie_large 0x0 -+# RELOC-NEXT: R_LARCH_TLS_IE_PC_LO12 sym_ie_large 0x0 -+# RELOC-NEXT: R_LARCH_TLS_IE64_PC_LO20 sym_ie_large 0x0 -+# RELOC-NEXT: R_LARCH_TLS_IE64_PC_HI12 sym_ie_large 0x0 - - la.tls.ld $a0, sym_ld --# CHECK: pcalau12i $a0, %ld_pc_hi20(sym_ld) -+# CHECK-NEXT: pcalau12i $a0, %ld_pc_hi20(sym_ld) - # CHECK-NEXT: addi.d $a0, $a0, %got_pc_lo12(sym_ld) -+# CHECK-EMPTY: -+# RELOC-NEXT: R_LARCH_TLS_LD_PC_HI20 sym_ld 0x0 -+# RELOC-NEXT: R_LARCH_GOT_PC_LO12 sym_ld 0x0 - - la.tls.ld $a0, $a1, sym_ld_large --# CHECK: pcalau12i $a0, %ld_pc_hi20(sym_ld_large) -+# CHECK-NEXT: pcalau12i $a0, %ld_pc_hi20(sym_ld_large) - # CHECK-NEXT: addi.d $a1, $zero, %got_pc_lo12(sym_ld_large) - # CHECK-NEXT: lu32i.d $a1, %got64_pc_lo20(sym_ld_large) - # CHECK-NEXT: lu52i.d $a1, $a1, %got64_pc_hi12(sym_ld_large) - # CHECK-NEXT: add.d $a0, $a0, $a1 -+# CHECK-EMPTY: -+# RELOC-NEXT: R_LARCH_TLS_LD_PC_HI20 sym_ld_large 0x0 -+# RELOC-NEXT: R_LARCH_GOT_PC_LO12 sym_ld_large 0x0 -+# RELOC-NEXT: R_LARCH_GOT64_PC_LO20 sym_ld_large 0x0 -+# RELOC-NEXT: R_LARCH_GOT64_PC_HI12 sym_ld_large 0x0 - - la.tls.gd $a0, sym_gd --# CHECK: pcalau12i $a0, %gd_pc_hi20(sym_gd) -+# CHECK-NEXT: pcalau12i $a0, %gd_pc_hi20(sym_gd) - # CHECK-NEXT: addi.d $a0, $a0, %got_pc_lo12(sym_gd) -+# CHECK-EMPTY: -+# RELOC-NEXT: R_LARCH_TLS_GD_PC_HI20 sym_gd 0x0 -+# RELOC-NEXT: R_LARCH_GOT_PC_LO12 sym_gd 0x0 - - la.tls.gd $a0, $a1, sym_gd_large --# CHECK: pcalau12i $a0, %gd_pc_hi20(sym_gd_large) -+# CHECK-NEXT: pcalau12i $a0, %gd_pc_hi20(sym_gd_large) - # CHECK-NEXT: addi.d $a1, $zero, %got_pc_lo12(sym_gd_large) - # CHECK-NEXT: lu32i.d $a1, %got64_pc_lo20(sym_gd_large) - # CHECK-NEXT: lu52i.d $a1, $a1, %got64_pc_hi12(sym_gd_large) - # CHECK-NEXT: add.d $a0, $a0, $a1 -+# CHECK-EMPTY: -+# RELOC-NEXT: R_LARCH_TLS_GD_PC_HI20 sym_gd_large 0x0 -+# RELOC-NEXT: R_LARCH_GOT_PC_LO12 sym_gd_large 0x0 -+# RELOC-NEXT: R_LARCH_GOT64_PC_LO20 sym_gd_large 0x0 -+# RELOC-NEXT: R_LARCH_GOT64_PC_HI12 sym_gd_large 0x0 -+ -+# RELOC-NEXT: } -+# RELOC-NEXT: ] -diff --git a/llvm/test/MC/LoongArch/Misc/subsection.s b/llvm/test/MC/LoongArch/Misc/subsection.s -index 0bd22b474536..566a2408d691 100644 ---- a/llvm/test/MC/LoongArch/Misc/subsection.s -+++ b/llvm/test/MC/LoongArch/Misc/subsection.s -@@ -1,5 +1,5 @@ - # RUN: not llvm-mc --filetype=obj --triple=loongarch64 --mattr=-relax %s -o /dev/null 2>&1 | FileCheck %s --check-prefixes=ERR,NORELAX --implicit-check-not=error: --## TODO: not llvm-mc --filetype=obj --triple=loongarch64 --mattr=+relax %s -o /dev/null 2>&1 | FileCheck %s --check-prefixes=ERR,RELAX --implicit-check-not=error: -+# RUN: not llvm-mc --filetype=obj --triple=loongarch64 --mattr=+relax %s -o /dev/null 2>&1 | FileCheck %s --check-prefixes=ERR,RELAX --implicit-check-not=error: - - a: - nop -diff --git a/llvm/test/MC/LoongArch/Relocations/relax-addsub.s b/llvm/test/MC/LoongArch/Relocations/relax-addsub.s -index 532eb4e0561a..c4454f5bb98d 100644 ---- a/llvm/test/MC/LoongArch/Relocations/relax-addsub.s -+++ b/llvm/test/MC/LoongArch/Relocations/relax-addsub.s -@@ -18,7 +18,9 @@ - # RELAX: Relocations [ - # RELAX-NEXT: Section ({{.*}}) .rela.text { - # RELAX-NEXT: 0x10 R_LARCH_PCALA_HI20 .L1 0x0 -+# RELAX-NEXT: 0x10 R_LARCH_RELAX - 0x0 - # RELAX-NEXT: 0x14 R_LARCH_PCALA_LO12 .L1 0x0 -+# RELAX-NEXT: 0x14 R_LARCH_RELAX - 0x0 - # RELAX-NEXT: } - # RELAX-NEXT: Section ({{.*}}) .rela.data { - # RELAX-NEXT: 0xF R_LARCH_ADD8 .L3 0x0 -@@ -29,13 +31,21 @@ - # RELAX-NEXT: 0x12 R_LARCH_SUB32 .L2 0x0 - # RELAX-NEXT: 0x16 R_LARCH_ADD64 .L3 0x0 - # RELAX-NEXT: 0x16 R_LARCH_SUB64 .L2 0x0 -+# RELAX-NEXT: 0x1E R_LARCH_ADD8 .L4 0x0 -+# RELAX-NEXT: 0x1E R_LARCH_SUB8 .L3 0x0 -+# RELAX-NEXT: 0x1F R_LARCH_ADD16 .L4 0x0 -+# RELAX-NEXT: 0x1F R_LARCH_SUB16 .L3 0x0 -+# RELAX-NEXT: 0x21 R_LARCH_ADD32 .L4 0x0 -+# RELAX-NEXT: 0x21 R_LARCH_SUB32 .L3 0x0 -+# RELAX-NEXT: 0x25 R_LARCH_ADD64 .L4 0x0 -+# RELAX-NEXT: 0x25 R_LARCH_SUB64 .L3 0x0 - # RELAX-NEXT: } - # RELAX-NEXT: ] - - # RELAX: Hex dump of section '.data': - # RELAX-NEXT: 0x00000000 04040004 00000004 00000000 00000000 --# RELAX-NEXT: 0x00000010 00000000 00000000 00000000 00000808 --# RELAX-NEXT: 0x00000020 00080000 00080000 00000000 00 -+# RELAX-NEXT: 0x00000010 00000000 00000000 00000000 00000000 -+# RELAX-NEXT: 0x00000020 00000000 00000000 00000000 00 - - .text - .L1: -@@ -60,8 +70,6 @@ - .short .L3 - .L2 - .word .L3 - .L2 - .dword .L3 - .L2 --## TODO --## With relaxation, emit relocs because la.pcrel is a linker-relaxable inst. - .byte .L4 - .L3 - .short .L4 - .L3 - .word .L4 - .L3 --- -2.20.1 - diff --git a/0004-Backport-MC-LoongArch-Add-AlignFragment-size-if-layout-is-available-and-not-need-insert-nops.patch b/0004-Backport-MC-LoongArch-Add-AlignFragment-size-if-layout-is-available-and-not-need-insert-nops.patch deleted file mode 100644 index 72b79246dd2bc7bdd5f38d34d0d6237701dac6ef..0000000000000000000000000000000000000000 --- a/0004-Backport-MC-LoongArch-Add-AlignFragment-size-if-layout-is-available-and-not-need-insert-nops.patch +++ /dev/null @@ -1,123 +0,0 @@ -From be6e5c566f49bee5efe3d710bdd321e15d8d95ea Mon Sep 17 00:00:00 2001 -From: Jinyang He -Date: Thu, 14 Mar 2024 12:10:50 +0800 -Subject: [PATCH 05/14] [MC][LoongArch] Add AlignFragment size if layout is - available and not need insert nops (#76552) - -Due to delayed decision for ADD/SUB relocations, RISCV and LoongArch may -go slow fragment walk path with available layout. When RISCV (or -LoongArch in the future) don't need insert nops, that means relax is -disabled. With available layout and not needing insert nops, the size of -AlignFragment should be a constant. So we can add it to Displacement for -folding A-B. - -(cherry picked from commit 0731567a31e4ade97c27801045156a88c4589704) -Change-Id: I554d6766bd7f688204e956e4a6431574b4c511c9 ---- - llvm/lib/MC/MCExpr.cpp | 6 +++++ - llvm/test/MC/LoongArch/Misc/cfi-advance.s | 27 +++++++++++++++++++ - .../MC/LoongArch/Relocations/relax-addsub.s | 17 +++--------- - 3 files changed, 37 insertions(+), 13 deletions(-) - create mode 100644 llvm/test/MC/LoongArch/Misc/cfi-advance.s - -diff --git a/llvm/lib/MC/MCExpr.cpp b/llvm/lib/MC/MCExpr.cpp -index 5a6596f93824..a561fed11179 100644 ---- a/llvm/lib/MC/MCExpr.cpp -+++ b/llvm/lib/MC/MCExpr.cpp -@@ -707,8 +707,14 @@ static void AttemptToFoldSymbolOffsetDifference( - } - - int64_t Num; -+ unsigned Count; - if (DF) { - Displacement += DF->getContents().size(); -+ } else if (auto *AF = dyn_cast(FI); -+ AF && Layout && -+ !Asm->getBackend().shouldInsertExtraNopBytesForCodeAlign( -+ *AF, Count)) { -+ Displacement += Asm->computeFragmentSize(*Layout, *AF); - } else if (auto *FF = dyn_cast(FI); - FF && FF->getNumValues().evaluateAsAbsolute(Num)) { - Displacement += Num * FF->getValueSize(); -diff --git a/llvm/test/MC/LoongArch/Misc/cfi-advance.s b/llvm/test/MC/LoongArch/Misc/cfi-advance.s -new file mode 100644 -index 000000000000..662c43e6bcea ---- /dev/null -+++ b/llvm/test/MC/LoongArch/Misc/cfi-advance.s -@@ -0,0 +1,27 @@ -+# RUN: llvm-mc --filetype=obj --triple=loongarch64 -mattr=-relax %s -o %t.o -+# RUN: llvm-readobj -r %t.o | FileCheck --check-prefix=RELOC %s -+# RUN: llvm-dwarfdump --debug-frame %t.o | FileCheck --check-prefix=DWARFDUMP %s -+ -+# RELOC: Relocations [ -+# RELOC-NEXT: .rela.eh_frame { -+# RELOC-NEXT: 0x1C R_LARCH_32_PCREL .text 0x0 -+# RELOC-NEXT: } -+# RELOC-NEXT: ] -+# DWARFDUMP: DW_CFA_advance_loc: 4 -+# DWARFDUMP-NEXT: DW_CFA_def_cfa_offset: +8 -+# DWARFDUMP-NEXT: DW_CFA_advance_loc: 8 -+# DWARFDUMP-NEXT: DW_CFA_def_cfa_offset: +8 -+ -+ .text -+ .globl test -+ .p2align 2 -+ .type test,@function -+test: -+ .cfi_startproc -+ nop -+ .cfi_def_cfa_offset 8 -+ .p2align 3 -+ nop -+ .cfi_def_cfa_offset 8 -+ nop -+ .cfi_endproc -diff --git a/llvm/test/MC/LoongArch/Relocations/relax-addsub.s b/llvm/test/MC/LoongArch/Relocations/relax-addsub.s -index c4454f5bb98d..14922657ae89 100644 ---- a/llvm/test/MC/LoongArch/Relocations/relax-addsub.s -+++ b/llvm/test/MC/LoongArch/Relocations/relax-addsub.s -@@ -23,14 +23,6 @@ - # RELAX-NEXT: 0x14 R_LARCH_RELAX - 0x0 - # RELAX-NEXT: } - # RELAX-NEXT: Section ({{.*}}) .rela.data { --# RELAX-NEXT: 0xF R_LARCH_ADD8 .L3 0x0 --# RELAX-NEXT: 0xF R_LARCH_SUB8 .L2 0x0 --# RELAX-NEXT: 0x10 R_LARCH_ADD16 .L3 0x0 --# RELAX-NEXT: 0x10 R_LARCH_SUB16 .L2 0x0 --# RELAX-NEXT: 0x12 R_LARCH_ADD32 .L3 0x0 --# RELAX-NEXT: 0x12 R_LARCH_SUB32 .L2 0x0 --# RELAX-NEXT: 0x16 R_LARCH_ADD64 .L3 0x0 --# RELAX-NEXT: 0x16 R_LARCH_SUB64 .L2 0x0 - # RELAX-NEXT: 0x1E R_LARCH_ADD8 .L4 0x0 - # RELAX-NEXT: 0x1E R_LARCH_SUB8 .L3 0x0 - # RELAX-NEXT: 0x1F R_LARCH_ADD16 .L4 0x0 -@@ -43,8 +35,8 @@ - # RELAX-NEXT: ] - - # RELAX: Hex dump of section '.data': --# RELAX-NEXT: 0x00000000 04040004 00000004 00000000 00000000 --# RELAX-NEXT: 0x00000010 00000000 00000000 00000000 00000000 -+# RELAX-NEXT: 0x00000000 04040004 00000004 00000000 0000000c -+# RELAX-NEXT: 0x00000010 0c000c00 00000c00 00000000 00000000 - # RELAX-NEXT: 0x00000020 00000000 00000000 00000000 00 - - .text -@@ -63,13 +55,12 @@ - .short .L2 - .L1 - .word .L2 - .L1 - .dword .L2 - .L1 --## With relaxation, emit relocs because of the .align making the diff variable. --## TODO Handle alignment directive. Why they emit relocs now? They returns --## without folding symbols offset in AttemptToFoldSymbolOffsetDifference(). -+## TODO Handle alignment directive. - .byte .L3 - .L2 - .short .L3 - .L2 - .word .L3 - .L2 - .dword .L3 - .L2 -+## With relaxation, emit relocs because the la.pcrel makes the diff variable. - .byte .L4 - .L3 - .short .L4 - .L3 - .word .L4 - .L3 --- -2.20.1 - diff --git a/0005-Backport-LoongArch-RISCV-Support-R_LARCH_-ADD-SUB-_ULEB128-R_RISCV_-SET-SUB-_ULEB128-for-uleb128-directives.patch b/0005-Backport-LoongArch-RISCV-Support-R_LARCH_-ADD-SUB-_ULEB128-R_RISCV_-SET-SUB-_ULEB128-for-uleb128-directives.patch deleted file mode 100644 index c1c4f9ff301e8f5876ab5825fd9711387b9d5365..0000000000000000000000000000000000000000 --- a/0005-Backport-LoongArch-RISCV-Support-R_LARCH_-ADD-SUB-_ULEB128-R_RISCV_-SET-SUB-_ULEB128-for-uleb128-directives.patch +++ /dev/null @@ -1,633 +0,0 @@ -From 8d7b71890179d32474b3a1a1c627481bd5a2327d Mon Sep 17 00:00:00 2001 -From: zhanglimin -Date: Fri, 15 Mar 2024 14:39:48 +0800 -Subject: [PATCH 06/14] [LoongArch][RISCV] Support - R_LARCH_{ADD,SUB}_ULEB128/R_RISCV_{SET,SUB}_ULEB128 for .uleb128 directives - -This patch is originally from three upstream commits: -1, R_LARCH_{ADD,SUB}_ULEB128 are originally landed from b57159cb(#76433). -2, R_RISCV_{SET,SUB}_ULEB128 are originally supported from 1df5ea29. Among it, we change -the default behaviour of `-riscv-uleb128-reloc` to not produce uleb128 reloc, in order -to avoid any other side-effects due to the updated implementation of `MCAssembler::relaxLEB()` -function. And at the same time, we ensure that this patch can't introduce new default traits -(such as the generation for uleb128 reloc) on RISCV in this version. -3, Fix invalid-sleb.s in original commit d7398a35. - -Change-Id: Ie687b7d8483c76cf647141162641db1a9d819a04 ---- - .../llvm/BinaryFormat/ELFRelocs/RISCV.def | 2 + - llvm/include/llvm/MC/MCAsmBackend.h | 8 +++ - llvm/include/llvm/MC/MCFixup.h | 1 + - llvm/include/llvm/MC/MCFragment.h | 9 ++- - llvm/lib/MC/MCAsmBackend.cpp | 1 + - llvm/lib/MC/MCAssembler.cpp | 39 ++++++++-- - .../MCTargetDesc/LoongArchAsmBackend.cpp | 69 ++++++++++++++---- - .../MCTargetDesc/LoongArchAsmBackend.h | 3 + - .../RISCV/MCTargetDesc/RISCVAsmBackend.cpp | 27 +++++++ - .../RISCV/MCTargetDesc/RISCVAsmBackend.h | 2 + - llvm/test/MC/ELF/RISCV/gen-dwarf.s | 5 +- - llvm/test/MC/LoongArch/Relocations/leb128.s | 72 +++++++++++++++++++ - .../MC/LoongArch/Relocations/relax-addsub.s | 57 +++++++++++---- - llvm/test/MC/X86/invalid-sleb.s | 5 -- - 14 files changed, 252 insertions(+), 48 deletions(-) - create mode 100644 llvm/test/MC/LoongArch/Relocations/leb128.s - delete mode 100644 llvm/test/MC/X86/invalid-sleb.s - -diff --git a/llvm/include/llvm/BinaryFormat/ELFRelocs/RISCV.def b/llvm/include/llvm/BinaryFormat/ELFRelocs/RISCV.def -index 9a126df01531..c7fd6490041c 100644 ---- a/llvm/include/llvm/BinaryFormat/ELFRelocs/RISCV.def -+++ b/llvm/include/llvm/BinaryFormat/ELFRelocs/RISCV.def -@@ -55,3 +55,5 @@ ELF_RELOC(R_RISCV_SET32, 56) - ELF_RELOC(R_RISCV_32_PCREL, 57) - ELF_RELOC(R_RISCV_IRELATIVE, 58) - ELF_RELOC(R_RISCV_PLT32, 59) -+ELF_RELOC(R_RISCV_SET_ULEB128, 60) -+ELF_RELOC(R_RISCV_SUB_ULEB128, 61) -diff --git a/llvm/include/llvm/MC/MCAsmBackend.h b/llvm/include/llvm/MC/MCAsmBackend.h -index 5e08fb41679b..968a767b17f8 100644 ---- a/llvm/include/llvm/MC/MCAsmBackend.h -+++ b/llvm/include/llvm/MC/MCAsmBackend.h -@@ -21,6 +21,7 @@ class MCAlignFragment; - class MCDwarfCallFrameFragment; - class MCDwarfLineAddrFragment; - class MCFragment; -+class MCLEBFragment; - class MCRelaxableFragment; - class MCSymbol; - class MCAsmLayout; -@@ -194,6 +195,13 @@ public: - return false; - } - -+ // Defined by linker relaxation targets to possibly emit LEB128 relocations -+ // and set Value at the relocated location. -+ virtual std::pair -+ relaxLEB128(MCLEBFragment &LF, MCAsmLayout &Layout, int64_t &Value) const { -+ return std::make_pair(false, false); -+ } -+ - /// @} - - /// Returns the minimum size of a nop in bytes on this target. The assembler -diff --git a/llvm/include/llvm/MC/MCFixup.h b/llvm/include/llvm/MC/MCFixup.h -index 069ca058310f..7f48a90cb1ec 100644 ---- a/llvm/include/llvm/MC/MCFixup.h -+++ b/llvm/include/llvm/MC/MCFixup.h -@@ -25,6 +25,7 @@ enum MCFixupKind { - FK_Data_4, ///< A four-byte fixup. - FK_Data_8, ///< A eight-byte fixup. - FK_Data_6b, ///< A six-bits fixup. -+ FK_Data_leb128, ///< A leb128 fixup. - FK_PCRel_1, ///< A one-byte pc relative fixup. - FK_PCRel_2, ///< A two-byte pc relative fixup. - FK_PCRel_4, ///< A four-byte pc relative fixup. -diff --git a/llvm/include/llvm/MC/MCFragment.h b/llvm/include/llvm/MC/MCFragment.h -index 7be4792a4521..e965732010fe 100644 ---- a/llvm/include/llvm/MC/MCFragment.h -+++ b/llvm/include/llvm/MC/MCFragment.h -@@ -428,7 +428,7 @@ public: - } - }; - --class MCLEBFragment : public MCFragment { -+class MCLEBFragment final : public MCEncodedFragmentWithFixups<10, 1> { - /// True if this is a sleb128, false if uleb128. - bool IsSigned; - -@@ -439,17 +439,16 @@ class MCLEBFragment : public MCFragment { - - public: - MCLEBFragment(const MCExpr &Value_, bool IsSigned_, MCSection *Sec = nullptr) -- : MCFragment(FT_LEB, false, Sec), IsSigned(IsSigned_), Value(&Value_) { -+ : MCEncodedFragmentWithFixups<10, 1>(FT_LEB, false, Sec), -+ IsSigned(IsSigned_), Value(&Value_) { - Contents.push_back(0); - } - - const MCExpr &getValue() const { return *Value; } -+ void setValue(const MCExpr *Expr) { Value = Expr; } - - bool isSigned() const { return IsSigned; } - -- SmallString<8> &getContents() { return Contents; } -- const SmallString<8> &getContents() const { return Contents; } -- - /// @} - - static bool classof(const MCFragment *F) { -diff --git a/llvm/lib/MC/MCAsmBackend.cpp b/llvm/lib/MC/MCAsmBackend.cpp -index 64bbc63719c7..2eef7d363fe7 100644 ---- a/llvm/lib/MC/MCAsmBackend.cpp -+++ b/llvm/lib/MC/MCAsmBackend.cpp -@@ -89,6 +89,7 @@ const MCFixupKindInfo &MCAsmBackend::getFixupKindInfo(MCFixupKind Kind) const { - {"FK_Data_4", 0, 32, 0}, - {"FK_Data_8", 0, 64, 0}, - {"FK_Data_6b", 0, 6, 0}, -+ {"FK_Data_leb128", 0, 0, 0}, - {"FK_PCRel_1", 0, 8, MCFixupKindInfo::FKF_IsPCRel}, - {"FK_PCRel_2", 0, 16, MCFixupKindInfo::FKF_IsPCRel}, - {"FK_PCRel_4", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, -diff --git a/llvm/lib/MC/MCAssembler.cpp b/llvm/lib/MC/MCAssembler.cpp -index 55ed1a285cd7..86c798ec9e27 100644 ---- a/llvm/lib/MC/MCAssembler.cpp -+++ b/llvm/lib/MC/MCAssembler.cpp -@@ -918,6 +918,12 @@ void MCAssembler::layout(MCAsmLayout &Layout) { - Contents = DF.getContents(); - break; - } -+ case MCFragment::FT_LEB: { -+ auto &LF = cast(Frag); -+ Fixups = LF.getFixups(); -+ Contents = LF.getContents(); -+ break; -+ } - case MCFragment::FT_PseudoProbe: { - MCPseudoProbeAddrFragment &PF = cast(Frag); - Fixups = PF.getFixups(); -@@ -1006,12 +1012,31 @@ bool MCAssembler::relaxInstruction(MCAsmLayout &Layout, - } - - bool MCAssembler::relaxLEB(MCAsmLayout &Layout, MCLEBFragment &LF) { -- uint64_t OldSize = LF.getContents().size(); -+ const unsigned OldSize = static_cast(LF.getContents().size()); -+ unsigned PadTo = OldSize; - int64_t Value; -- bool Abs = LF.getValue().evaluateKnownAbsolute(Value, Layout); -- if (!Abs) -- report_fatal_error("sleb128 and uleb128 expressions must be absolute"); -- SmallString<8> &Data = LF.getContents(); -+ SmallVectorImpl &Data = LF.getContents(); -+ LF.getFixups().clear(); -+ // Use evaluateKnownAbsolute for Mach-O as a hack: .subsections_via_symbols -+ // requires that .uleb128 A-B is foldable where A and B reside in different -+ // fragments. This is used by __gcc_except_table. -+ bool Abs = getSubsectionsViaSymbols() -+ ? LF.getValue().evaluateKnownAbsolute(Value, Layout) -+ : LF.getValue().evaluateAsAbsolute(Value, Layout); -+ if (!Abs) { -+ bool Relaxed, UseZeroPad; -+ std::tie(Relaxed, UseZeroPad) = getBackend().relaxLEB128(LF, Layout, Value); -+ if (!Relaxed) { -+ getContext().reportError(LF.getValue().getLoc(), -+ Twine(LF.isSigned() ? ".s" : ".u") + -+ "leb128 expression is not absolute"); -+ LF.setValue(MCConstantExpr::create(0, Context)); -+ } -+ uint8_t Tmp[10]; // maximum size: ceil(64/7) -+ PadTo = std::max(PadTo, encodeULEB128(uint64_t(Value), Tmp)); -+ if (UseZeroPad) -+ Value = 0; -+ } - Data.clear(); - raw_svector_ostream OSE(Data); - // The compiler can generate EH table assembly that is impossible to assemble -@@ -1019,9 +1044,9 @@ bool MCAssembler::relaxLEB(MCAsmLayout &Layout, MCLEBFragment &LF) { - // to a later alignment fragment. To accommodate such tables, relaxation can - // only increase an LEB fragment size here, not decrease it. See PR35809. - if (LF.isSigned()) -- encodeSLEB128(Value, OSE, OldSize); -+ encodeSLEB128(Value, OSE, PadTo); - else -- encodeULEB128(Value, OSE, OldSize); -+ encodeULEB128(Value, OSE, PadTo); - return OldSize != LF.getContents().size(); - } - -diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp -index 1ed047a8e632..9227d4d6afed 100644 ---- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp -+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp -@@ -92,6 +92,7 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, uint64_t Value, - case FK_Data_2: - case FK_Data_4: - case FK_Data_8: -+ case FK_Data_leb128: - return Value; - case LoongArch::fixup_loongarch_b16: { - if (!isInt<18>(Value)) -@@ -129,6 +130,15 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, uint64_t Value, - } - } - -+static void fixupLeb128(MCContext &Ctx, const MCFixup &Fixup, -+ MutableArrayRef Data, uint64_t Value) { -+ unsigned I; -+ for (I = 0; I != Data.size() && Value; ++I, Value >>= 7) -+ Data[I] |= uint8_t(Value & 0x7f); -+ if (Value) -+ Ctx.reportError(Fixup.getLoc(), "Invalid uleb128 value!"); -+} -+ - void LoongArchAsmBackend::applyFixup(const MCAssembler &Asm, - const MCFixup &Fixup, - const MCValue &Target, -@@ -144,6 +154,10 @@ void LoongArchAsmBackend::applyFixup(const MCAssembler &Asm, - MCFixupKindInfo Info = getFixupKindInfo(Kind); - MCContext &Ctx = Asm.getContext(); - -+ // Fixup leb128 separately. -+ if (Fixup.getTargetKind() == FK_Data_leb128) -+ return fixupLeb128(Ctx, Fixup, Data, Value); -+ - // Apply any target-specific value adjustments. - Value = adjustFixupValue(Fixup, Value, Ctx); - -@@ -173,6 +187,7 @@ bool LoongArchAsmBackend::shouldForceRelocation(const MCAssembler &Asm, - case FK_Data_2: - case FK_Data_4: - case FK_Data_8: -+ case FK_Data_leb128: - return !Target.isAbsolute(); - } - } -@@ -202,9 +217,24 @@ getRelocPairForSize(unsigned Size) { - return std::make_pair( - MCFixupKind(FirstLiteralRelocationKind + ELF::R_LARCH_ADD64), - MCFixupKind(FirstLiteralRelocationKind + ELF::R_LARCH_SUB64)); -+ case 128: -+ return std::make_pair( -+ MCFixupKind(FirstLiteralRelocationKind + ELF::R_LARCH_ADD_ULEB128), -+ MCFixupKind(FirstLiteralRelocationKind + ELF::R_LARCH_SUB_ULEB128)); - } - } - -+std::pair LoongArchAsmBackend::relaxLEB128(MCLEBFragment &LF, -+ MCAsmLayout &Layout, -+ int64_t &Value) const { -+ const MCExpr &Expr = LF.getValue(); -+ if (LF.isSigned() || !Expr.evaluateKnownAbsolute(Value, Layout)) -+ return std::make_pair(false, false); -+ LF.getFixups().push_back( -+ MCFixup::create(0, &Expr, FK_Data_leb128, Expr.getLoc())); -+ return std::make_pair(true, true); -+} -+ - bool LoongArchAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count, - const MCSubtargetInfo *STI) const { - // We mostly follow binutils' convention here: align to 4-byte boundary with a -@@ -226,21 +256,27 @@ bool LoongArchAsmBackend::handleAddSubRelocations(const MCAsmLayout &Layout, - uint64_t &FixedValue) const { - std::pair FK; - uint64_t FixedValueA, FixedValueB; -- const MCSection &SecA = Target.getSymA()->getSymbol().getSection(); -- const MCSection &SecB = Target.getSymB()->getSymbol().getSection(); -- -- // We need record relocation if SecA != SecB. Usually SecB is same as the -- // section of Fixup, which will be record the relocation as PCRel. If SecB -- // is not same as the section of Fixup, it will report error. Just return -- // false and then this work can be finished by handleFixup. -- if (&SecA != &SecB) -- return false; -- -- // In SecA == SecB case. If the linker relaxation is enabled, we need record -- // the ADD, SUB relocations. Otherwise the FixedValue has already been -- // calculated out in evaluateFixup, return true and avoid record relocations. -- if (!STI.hasFeature(LoongArch::FeatureRelax)) -- return true; -+ const MCSymbol &SA = Target.getSymA()->getSymbol(); -+ const MCSymbol &SB = Target.getSymB()->getSymbol(); -+ -+ bool force = !SA.isInSection() || !SB.isInSection(); -+ if (!force) { -+ const MCSection &SecA = SA.getSection(); -+ const MCSection &SecB = SB.getSection(); -+ -+ // We need record relocation if SecA != SecB. Usually SecB is same as the -+ // section of Fixup, which will be record the relocation as PCRel. If SecB -+ // is not same as the section of Fixup, it will report error. Just return -+ // false and then this work can be finished by handleFixup. -+ if (&SecA != &SecB) -+ return false; -+ -+ // In SecA == SecB case. If the linker relaxation is enabled, we need record -+ // the ADD, SUB relocations. Otherwise the FixedValue has already been calc- -+ // ulated out in evaluateFixup, return true and avoid record relocations. -+ if (!STI.hasFeature(LoongArch::FeatureRelax)) -+ return true; -+ } - - switch (Fixup.getKind()) { - case llvm::FK_Data_1: -@@ -255,6 +291,9 @@ bool LoongArchAsmBackend::handleAddSubRelocations(const MCAsmLayout &Layout, - case llvm::FK_Data_8: - FK = getRelocPairForSize(64); - break; -+ case llvm::FK_Data_leb128: -+ FK = getRelocPairForSize(128); -+ break; - default: - llvm_unreachable("unsupported fixup size"); - } -diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h -index 20f25b5cf53b..49801e4fd81a 100644 ---- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h -+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h -@@ -65,6 +65,9 @@ public: - void relaxInstruction(MCInst &Inst, - const MCSubtargetInfo &STI) const override {} - -+ std::pair relaxLEB128(MCLEBFragment &LF, MCAsmLayout &Layout, -+ int64_t &Value) const override; -+ - bool writeNopData(raw_ostream &OS, uint64_t Count, - const MCSubtargetInfo *STI) const override; - -diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp -index 1b890fbe041a..5c651aa93225 100644 ---- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp -+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp -@@ -19,6 +19,7 @@ - #include "llvm/MC/MCObjectWriter.h" - #include "llvm/MC/MCSymbol.h" - #include "llvm/MC/MCValue.h" -+#include "llvm/Support/CommandLine.h" - #include "llvm/Support/Endian.h" - #include "llvm/Support/EndianStream.h" - #include "llvm/Support/ErrorHandling.h" -@@ -27,6 +28,13 @@ - - using namespace llvm; - -+// Temporary workaround for old linkers that do not support ULEB128 relocations, -+// which are abused by DWARF v5 DW_LLE_offset_pair/DW_RLE_offset_pair -+// implemented in Clang/LLVM. -+static cl::opt ULEB128Reloc( -+ "riscv-uleb128-reloc", cl::init(false), cl::Hidden, -+ cl::desc("Emit R_RISCV_SET_ULEB128/E_RISCV_SUB_ULEB128 if appropriate")); -+ - std::optional RISCVAsmBackend::getFixupKind(StringRef Name) const { - if (STI.getTargetTriple().isOSBinFormatELF()) { - unsigned Type; -@@ -126,6 +134,7 @@ bool RISCVAsmBackend::shouldForceRelocation(const MCAssembler &Asm, - case FK_Data_2: - case FK_Data_4: - case FK_Data_8: -+ case FK_Data_leb128: - if (Target.isAbsolute()) - return false; - break; -@@ -330,6 +339,19 @@ bool RISCVAsmBackend::relaxDwarfCFA(MCDwarfCallFrameFragment &DF, - return true; - } - -+std::pair RISCVAsmBackend::relaxLEB128(MCLEBFragment &LF, -+ MCAsmLayout &Layout, -+ int64_t &Value) const { -+ if (LF.isSigned()) -+ return std::make_pair(false, false); -+ const MCExpr &Expr = LF.getValue(); -+ if (ULEB128Reloc) { -+ LF.getFixups().push_back( -+ MCFixup::create(0, &Expr, FK_Data_leb128, Expr.getLoc())); -+ } -+ return std::make_pair(Expr.evaluateKnownAbsolute(Value, Layout), false); -+} -+ - // Given a compressed control flow instruction this function returns - // the expanded instruction. - unsigned RISCVAsmBackend::getRelaxedOpcode(unsigned Op) const { -@@ -416,6 +438,7 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, uint64_t Value, - case FK_Data_4: - case FK_Data_8: - case FK_Data_6b: -+ case FK_Data_leb128: - return Value; - case RISCV::fixup_riscv_set_6b: - return Value & 0x03; -@@ -596,6 +619,10 @@ bool RISCVAsmBackend::handleAddSubRelocations(const MCAsmLayout &Layout, - TA = ELF::R_RISCV_ADD64; - TB = ELF::R_RISCV_SUB64; - break; -+ case llvm::FK_Data_leb128: -+ TA = ELF::R_RISCV_SET_ULEB128; -+ TB = ELF::R_RISCV_SUB_ULEB128; -+ break; - default: - llvm_unreachable("unsupported fixup size"); - } -diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h -index 0ea1f32e8296..edefb171bcdc 100644 ---- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h -+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h -@@ -99,6 +99,8 @@ public: - bool &WasRelaxed) const override; - bool relaxDwarfCFA(MCDwarfCallFrameFragment &DF, MCAsmLayout &Layout, - bool &WasRelaxed) const override; -+ std::pair relaxLEB128(MCLEBFragment &LF, MCAsmLayout &Layout, -+ int64_t &Value) const override; - - bool writeNopData(raw_ostream &OS, uint64_t Count, - const MCSubtargetInfo *STI) const override; -diff --git a/llvm/test/MC/ELF/RISCV/gen-dwarf.s b/llvm/test/MC/ELF/RISCV/gen-dwarf.s -index 2235559d5f35..2a7dc777e70c 100644 ---- a/llvm/test/MC/ELF/RISCV/gen-dwarf.s -+++ b/llvm/test/MC/ELF/RISCV/gen-dwarf.s -@@ -9,7 +9,7 @@ - ## emit special opcodes to make .debug_line smaller, but we don't do this for - ## consistency. - --# RUN: llvm-mc -filetype=obj -triple=riscv64 -g -dwarf-version=5 -mattr=+relax < %s -o %t -+# RUN: llvm-mc -filetype=obj -triple=riscv64 -g -dwarf-version=5 -mattr=+relax -riscv-uleb128-reloc=1 < %s -o %t - # RUN: llvm-dwarfdump -eh-frame -debug-line -debug-rnglists -v %t | FileCheck %s - # RUN: llvm-readobj -r -x .eh_frame %t | FileCheck %s --check-prefix=RELOC - -@@ -48,9 +48,10 @@ - # RELOC-NEXT: 0x34 R_RISCV_32_PCREL 0x0 - # RELOC-NEXT: } - --## TODO A section needs two relocations. - # RELOC: Section ([[#]]) .rela.debug_rnglists { - # RELOC-NEXT: 0xD R_RISCV_64 .text.foo 0x0 -+# RELOC-NEXT: 0x15 R_RISCV_SET_ULEB128 0x0 -+# RELOC-NEXT: 0x15 R_RISCV_SUB_ULEB128 .text.foo 0x0 - # RELOC-NEXT: 0x17 R_RISCV_64 .text.bar 0x0 - # RELOC-NEXT: } - -diff --git a/llvm/test/MC/LoongArch/Relocations/leb128.s b/llvm/test/MC/LoongArch/Relocations/leb128.s -new file mode 100644 -index 000000000000..7a96ec551b76 ---- /dev/null -+++ b/llvm/test/MC/LoongArch/Relocations/leb128.s -@@ -0,0 +1,72 @@ -+# RUN: llvm-mc --filetype=obj --triple=loongarch64 --mattr=-relax %s -o %t -+# RUN: llvm-readobj -r -x .alloc_w %t | FileCheck --check-prefixes=CHECK,NORELAX %s -+# RUN: llvm-mc --filetype=obj --triple=loongarch64 --mattr=+relax %s -o %t.relax -+# RUN: llvm-readobj -r -x .alloc_w %t.relax | FileCheck --check-prefixes=CHECK,RELAX %s -+ -+# RUN: not llvm-mc --filetype=obj --triple=loongarch64 --mattr=-relax --defsym ERR=1 %s -o /dev/null 2>&1 | \ -+# RUN: FileCheck %s --check-prefix=ERR -+# RUN: not llvm-mc --filetype=obj --triple=loongarch64 --mattr=+relax --defsym ERR=1 %s -o /dev/null 2>&1 | \ -+# RUN: FileCheck %s --check-prefix=ERR -+ -+# CHECK: Relocations [ -+# CHECK-NEXT: .rela.alloc_w { -+# RELAX-NEXT: 0x0 R_LARCH_ADD_ULEB128 w1 0x0 -+# RELAX-NEXT: 0x0 R_LARCH_SUB_ULEB128 w 0x0 -+# RELAX-NEXT: 0x1 R_LARCH_ADD_ULEB128 w2 0x0 -+# RELAX-NEXT: 0x1 R_LARCH_SUB_ULEB128 w1 0x0 -+# CHECK-NEXT: 0x2 R_LARCH_PCALA_HI20 foo 0x0 -+# RELAX-NEXT: 0x2 R_LARCH_RELAX - 0x0 -+# CHECK-NEXT: 0x6 R_LARCH_PCALA_LO12 foo 0x0 -+# RELAX-NEXT: 0x6 R_LARCH_RELAX - 0x0 -+# RELAX-NEXT: 0xA R_LARCH_ADD_ULEB128 w2 0x0 -+# RELAX-NEXT: 0xA R_LARCH_SUB_ULEB128 w1 0x0 -+# RELAX-NEXT: 0xB R_LARCH_ADD_ULEB128 w2 0x78 -+# RELAX-NEXT: 0xB R_LARCH_SUB_ULEB128 w1 0x0 -+# RELAX-NEXT: 0xD R_LARCH_ADD_ULEB128 w1 0x0 -+# RELAX-NEXT: 0xD R_LARCH_SUB_ULEB128 w2 0x0 -+# RELAX-NEXT: 0x17 R_LARCH_ADD_ULEB128 w3 0x6F -+# RELAX-NEXT: 0x17 R_LARCH_SUB_ULEB128 w2 0x0 -+# RELAX-NEXT: 0x18 R_LARCH_ADD_ULEB128 w3 0x71 -+# RELAX-NEXT: 0x18 R_LARCH_SUB_ULEB128 w2 0x0 -+# CHECK-NEXT: } -+# CHECK-NEXT: ] -+ -+# CHECK: Hex dump of section '.alloc_w': -+# NORELAX-NEXT: 0x00000000 02080c00 001a8c01 c0020880 01f8ffff -+# NORELAX-NEXT: 0x00000010 ffffffff ffff017f 8101 -+# RELAX-NEXT: 0x00000000 00000c00 001a8c01 c0020080 00808080 -+# RELAX-NEXT: 0x00000010 80808080 80800000 8000 -+ -+.section .alloc_w,"ax",@progbits; w: -+.uleb128 w1-w # w1 is later defined in the same section -+.uleb128 w2-w1 # w1 and w2 are separated by a linker relaxable instruction -+w1: -+ la.pcrel $t0, foo -+w2: -+.uleb128 w2-w1 # 0x08 -+.uleb128 w2-w1+120 # 0x0180 -+.uleb128 -(w2-w1) # 0x01fffffffffffffffff8 -+.uleb128 w3-w2+111 # 0x7f -+.uleb128 w3-w2+113 # 0x0181 -+w3: -+ -+.ifdef ERR -+# ERR: :[[#@LINE+1]]:16: error: .uleb128 expression is not absolute -+.uleb128 extern-w # extern is undefined -+# ERR: :[[#@LINE+1]]:11: error: .uleb128 expression is not absolute -+.uleb128 w-extern -+# ERR: :[[#@LINE+1]]:11: error: .uleb128 expression is not absolute -+.uleb128 x-w # x is later defined in another section -+ -+.section .alloc_x,"aw",@progbits; x: -+# ERR: :[[#@LINE+1]]:11: error: .uleb128 expression is not absolute -+.uleb128 y-x -+.section .alloc_y,"aw",@progbits; y: -+# ERR: :[[#@LINE+1]]:11: error: .uleb128 expression is not absolute -+.uleb128 x-y -+ -+# ERR: :[[#@LINE+1]]:10: error: .uleb128 expression is not absolute -+.uleb128 extern -+# ERR: :[[#@LINE+1]]:10: error: .uleb128 expression is not absolute -+.uleb128 y -+.endif -diff --git a/llvm/test/MC/LoongArch/Relocations/relax-addsub.s b/llvm/test/MC/LoongArch/Relocations/relax-addsub.s -index 14922657ae89..cd01332afd0b 100644 ---- a/llvm/test/MC/LoongArch/Relocations/relax-addsub.s -+++ b/llvm/test/MC/LoongArch/Relocations/relax-addsub.s -@@ -8,12 +8,23 @@ - # NORELAX-NEXT: 0x10 R_LARCH_PCALA_HI20 .text 0x0 - # NORELAX-NEXT: 0x14 R_LARCH_PCALA_LO12 .text 0x0 - # NORELAX-NEXT: } -+# NORELAX-NEXT: Section ({{.*}}) .rela.data { -+# NORELAX-NEXT: 0x30 R_LARCH_ADD8 foo 0x0 -+# NORELAX-NEXT: 0x30 R_LARCH_SUB8 .text 0x10 -+# NORELAX-NEXT: 0x31 R_LARCH_ADD16 foo 0x0 -+# NORELAX-NEXT: 0x31 R_LARCH_SUB16 .text 0x10 -+# NORELAX-NEXT: 0x33 R_LARCH_ADD32 foo 0x0 -+# NORELAX-NEXT: 0x33 R_LARCH_SUB32 .text 0x10 -+# NORELAX-NEXT: 0x37 R_LARCH_ADD64 foo 0x0 -+# NORELAX-NEXT: 0x37 R_LARCH_SUB64 .text 0x10 -+# NORELAX-NEXT: } - # NORELAX-NEXT: ] - - # NORELAX: Hex dump of section '.data': --# NORELAX-NEXT: 0x00000000 04040004 00000004 00000000 0000000c --# NORELAX-NEXT: 0x00000010 0c000c00 00000c00 00000000 00000808 --# NORELAX-NEXT: 0x00000020 00080000 00080000 00000000 00 -+# NORELAX-NEXT: 0x00000000 04040004 00000004 00000000 00000004 -+# NORELAX-NEXT: 0x00000010 0c0c000c 0000000c 00000000 0000000c -+# NORELAX-NEXT: 0x00000020 08080008 00000008 00000000 00000008 -+# NORELAX-NEXT: 0x00000030 00000000 00000000 00000000 000000 - - # RELAX: Relocations [ - # RELAX-NEXT: Section ({{.*}}) .rela.text { -@@ -23,21 +34,32 @@ - # RELAX-NEXT: 0x14 R_LARCH_RELAX - 0x0 - # RELAX-NEXT: } - # RELAX-NEXT: Section ({{.*}}) .rela.data { --# RELAX-NEXT: 0x1E R_LARCH_ADD8 .L4 0x0 --# RELAX-NEXT: 0x1E R_LARCH_SUB8 .L3 0x0 --# RELAX-NEXT: 0x1F R_LARCH_ADD16 .L4 0x0 --# RELAX-NEXT: 0x1F R_LARCH_SUB16 .L3 0x0 --# RELAX-NEXT: 0x21 R_LARCH_ADD32 .L4 0x0 --# RELAX-NEXT: 0x21 R_LARCH_SUB32 .L3 0x0 --# RELAX-NEXT: 0x25 R_LARCH_ADD64 .L4 0x0 --# RELAX-NEXT: 0x25 R_LARCH_SUB64 .L3 0x0 -+# RELAX-NEXT: 0x20 R_LARCH_ADD8 .L4 0x0 -+# RELAX-NEXT: 0x20 R_LARCH_SUB8 .L3 0x0 -+# RELAX-NEXT: 0x21 R_LARCH_ADD16 .L4 0x0 -+# RELAX-NEXT: 0x21 R_LARCH_SUB16 .L3 0x0 -+# RELAX-NEXT: 0x23 R_LARCH_ADD32 .L4 0x0 -+# RELAX-NEXT: 0x23 R_LARCH_SUB32 .L3 0x0 -+# RELAX-NEXT: 0x27 R_LARCH_ADD64 .L4 0x0 -+# RELAX-NEXT: 0x27 R_LARCH_SUB64 .L3 0x0 -+# RELAX-NEXT: 0x2F R_LARCH_ADD_ULEB128 .L4 0x0 -+# RELAX-NEXT: 0x2F R_LARCH_SUB_ULEB128 .L3 0x0 -+# RELAX-NEXT: 0x30 R_LARCH_ADD8 foo 0x0 -+# RELAX-NEXT: 0x30 R_LARCH_SUB8 .L3 0x0 -+# RELAX-NEXT: 0x31 R_LARCH_ADD16 foo 0x0 -+# RELAX-NEXT: 0x31 R_LARCH_SUB16 .L3 0x0 -+# RELAX-NEXT: 0x33 R_LARCH_ADD32 foo 0x0 -+# RELAX-NEXT: 0x33 R_LARCH_SUB32 .L3 0x0 -+# RELAX-NEXT: 0x37 R_LARCH_ADD64 foo 0x0 -+# RELAX-NEXT: 0x37 R_LARCH_SUB64 .L3 0x0 - # RELAX-NEXT: } - # RELAX-NEXT: ] - - # RELAX: Hex dump of section '.data': --# RELAX-NEXT: 0x00000000 04040004 00000004 00000000 0000000c --# RELAX-NEXT: 0x00000010 0c000c00 00000c00 00000000 00000000 --# RELAX-NEXT: 0x00000020 00000000 00000000 00000000 00 -+# RELAX-NEXT: 0x00000000 04040004 00000004 00000000 00000004 -+# RELAX-NEXT: 0x00000010 0c0c000c 0000000c 00000000 0000000c -+# RELAX-NEXT: 0x00000020 00000000 00000000 00000000 00000000 -+# RELAX-NEXT: 0x00000030 00000000 00000000 00000000 000000 - - .text - .L1: -@@ -55,13 +77,20 @@ - .short .L2 - .L1 - .word .L2 - .L1 - .dword .L2 - .L1 -+.uleb128 .L2 - .L1 - ## TODO Handle alignment directive. - .byte .L3 - .L2 - .short .L3 - .L2 - .word .L3 - .L2 - .dword .L3 - .L2 -+.uleb128 .L3 - .L2 - ## With relaxation, emit relocs because the la.pcrel makes the diff variable. - .byte .L4 - .L3 - .short .L4 - .L3 - .word .L4 - .L3 - .dword .L4 - .L3 -+.uleb128 .L4 - .L3 -+.byte foo - .L3 -+.short foo - .L3 -+.word foo - .L3 -+.dword foo - .L3 -diff --git a/llvm/test/MC/X86/invalid-sleb.s b/llvm/test/MC/X86/invalid-sleb.s -deleted file mode 100644 -index 7d7df351ce4e..000000000000 ---- a/llvm/test/MC/X86/invalid-sleb.s -+++ /dev/null -@@ -1,5 +0,0 @@ --// RUN: not --crash llvm-mc -filetype=obj -triple x86_64-pc-linux %s -o %t 2>&1 | FileCheck %s -- --// CHECK: sleb128 and uleb128 expressions must be absolute -- -- .sleb128 undefined --- -2.20.1 - diff --git a/0006-Backport-LoongArch-Add-relaxDwarfLineAddr-and-relaxDwarfCFA-to-handle-the-mutable-label-diff-in-dwarfinfo.patch b/0006-Backport-LoongArch-Add-relaxDwarfLineAddr-and-relaxDwarfCFA-to-handle-the-mutable-label-diff-in-dwarfinfo.patch deleted file mode 100644 index 4d19f8cca77d5179068b4f3a08f077fae67f237e..0000000000000000000000000000000000000000 --- a/0006-Backport-LoongArch-Add-relaxDwarfLineAddr-and-relaxDwarfCFA-to-handle-the-mutable-label-diff-in-dwarfinfo.patch +++ /dev/null @@ -1,376 +0,0 @@ -From 286c92a8e78c4b67368c2f47a8e73036fdacbae2 Mon Sep 17 00:00:00 2001 -From: Jinyang He -Date: Tue, 16 Jan 2024 13:20:13 +0800 -Subject: [PATCH 07/14] [LoongArch] Add relaxDwarfLineAddr and relaxDwarfCFA to - handle the mutable label diff in dwarfinfo (#77728) - -When linker-relaxation is enabled, part of the label diff in dwarfinfo -cannot be computed before static link. Refer to RISCV, we add the -relaxDwarfLineAddr and relaxDwarfCFA to add relocations for these label -diffs. Calculate whether the label diff is mutable. For immutable label -diff, return false and do the other works by its parent function. - -(cherry picked from commit ed7f4edc19ada006789318a0929b57d1b5a761bd) -Change-Id: Iae5bad958c6d1a71dac1672f5f03991eaeea6d22 ---- - llvm/lib/Object/RelocationResolver.cpp | 12 +- - .../MCTargetDesc/LoongArchAsmBackend.cpp | 129 ++++++++++++++++++ - .../MCTargetDesc/LoongArchAsmBackend.h | 5 + - .../LoongArch/dwarf-loongarch-relocs.ll | 128 +++++++++++++++++ - llvm/test/DebugInfo/LoongArch/lit.local.cfg | 2 + - 5 files changed, 274 insertions(+), 2 deletions(-) - create mode 100644 llvm/test/DebugInfo/LoongArch/dwarf-loongarch-relocs.ll - create mode 100644 llvm/test/DebugInfo/LoongArch/lit.local.cfg - -diff --git a/llvm/lib/Object/RelocationResolver.cpp b/llvm/lib/Object/RelocationResolver.cpp -index 03ac59289528..0e5036d7dfcc 100644 ---- a/llvm/lib/Object/RelocationResolver.cpp -+++ b/llvm/lib/Object/RelocationResolver.cpp -@@ -539,6 +539,8 @@ static bool supportsLoongArch(uint64_t Type) { - case ELF::R_LARCH_32: - case ELF::R_LARCH_32_PCREL: - case ELF::R_LARCH_64: -+ case ELF::R_LARCH_ADD6: -+ case ELF::R_LARCH_SUB6: - case ELF::R_LARCH_ADD8: - case ELF::R_LARCH_SUB8: - case ELF::R_LARCH_ADD16: -@@ -564,6 +566,10 @@ static uint64_t resolveLoongArch(uint64_t Type, uint64_t Offset, uint64_t S, - return (S + Addend - Offset) & 0xFFFFFFFF; - case ELF::R_LARCH_64: - return S + Addend; -+ case ELF::R_LARCH_ADD6: -+ return (LocData & 0xC0) | ((LocData + S + Addend) & 0x3F); -+ case ELF::R_LARCH_SUB6: -+ return (LocData & 0xC0) | ((LocData - (S + Addend)) & 0x3F); - case ELF::R_LARCH_ADD8: - return (LocData + (S + Addend)) & 0xFF; - case ELF::R_LARCH_SUB8: -@@ -880,8 +886,10 @@ uint64_t resolveRelocation(RelocationResolver Resolver, const RelocationRef &R, - - if (GetRelSectionType() == ELF::SHT_RELA) { - Addend = getELFAddend(R); -- // RISCV relocations use both LocData and Addend. -- if (Obj->getArch() != Triple::riscv32 && -+ // LoongArch and RISCV relocations use both LocData and Addend. -+ if (Obj->getArch() != Triple::loongarch32 && -+ Obj->getArch() != Triple::loongarch64 && -+ Obj->getArch() != Triple::riscv32 && - Obj->getArch() != Triple::riscv64) - LocData = 0; - } -diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp -index 9227d4d6afed..8d82327b2e2b 100644 ---- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp -+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp -@@ -12,6 +12,7 @@ - - #include "LoongArchAsmBackend.h" - #include "LoongArchFixupKinds.h" -+#include "llvm/MC/MCAsmInfo.h" - #include "llvm/MC/MCAsmLayout.h" - #include "llvm/MC/MCAssembler.h" - #include "llvm/MC/MCContext.h" -@@ -19,6 +20,7 @@ - #include "llvm/MC/MCValue.h" - #include "llvm/Support/Endian.h" - #include "llvm/Support/EndianStream.h" -+#include "llvm/Support/LEB128.h" - - #define DEBUG_TYPE "loongarch-asmbackend" - -@@ -235,6 +237,133 @@ std::pair LoongArchAsmBackend::relaxLEB128(MCLEBFragment &LF, - return std::make_pair(true, true); - } - -+bool LoongArchAsmBackend::relaxDwarfLineAddr(MCDwarfLineAddrFragment &DF, -+ MCAsmLayout &Layout, -+ bool &WasRelaxed) const { -+ MCContext &C = Layout.getAssembler().getContext(); -+ -+ int64_t LineDelta = DF.getLineDelta(); -+ const MCExpr &AddrDelta = DF.getAddrDelta(); -+ SmallVectorImpl &Data = DF.getContents(); -+ SmallVectorImpl &Fixups = DF.getFixups(); -+ size_t OldSize = Data.size(); -+ -+ int64_t Value; -+ if (AddrDelta.evaluateAsAbsolute(Value, Layout)) -+ return false; -+ bool IsAbsolute = AddrDelta.evaluateKnownAbsolute(Value, Layout); -+ assert(IsAbsolute && "CFA with invalid expression"); -+ (void)IsAbsolute; -+ -+ Data.clear(); -+ Fixups.clear(); -+ raw_svector_ostream OS(Data); -+ -+ // INT64_MAX is a signal that this is actually a DW_LNE_end_sequence. -+ if (LineDelta != INT64_MAX) { -+ OS << uint8_t(dwarf::DW_LNS_advance_line); -+ encodeSLEB128(LineDelta, OS); -+ } -+ -+ unsigned Offset; -+ std::pair FK; -+ -+ // According to the DWARF specification, the `DW_LNS_fixed_advance_pc` opcode -+ // takes a single unsigned half (unencoded) operand. The maximum encodable -+ // value is therefore 65535. Set a conservative upper bound for relaxation. -+ if (Value > 60000) { -+ unsigned PtrSize = C.getAsmInfo()->getCodePointerSize(); -+ -+ OS << uint8_t(dwarf::DW_LNS_extended_op); -+ encodeULEB128(PtrSize + 1, OS); -+ -+ OS << uint8_t(dwarf::DW_LNE_set_address); -+ Offset = OS.tell(); -+ assert((PtrSize == 4 || PtrSize == 8) && "Unexpected pointer size"); -+ FK = getRelocPairForSize(PtrSize == 4 ? 32 : 64); -+ OS.write_zeros(PtrSize); -+ } else { -+ OS << uint8_t(dwarf::DW_LNS_fixed_advance_pc); -+ Offset = OS.tell(); -+ FK = getRelocPairForSize(16); -+ support::endian::write(OS, 0, support::little); -+ } -+ -+ const MCBinaryExpr &MBE = cast(AddrDelta); -+ Fixups.push_back(MCFixup::create(Offset, MBE.getLHS(), std::get<0>(FK))); -+ Fixups.push_back(MCFixup::create(Offset, MBE.getRHS(), std::get<1>(FK))); -+ -+ if (LineDelta == INT64_MAX) { -+ OS << uint8_t(dwarf::DW_LNS_extended_op); -+ OS << uint8_t(1); -+ OS << uint8_t(dwarf::DW_LNE_end_sequence); -+ } else { -+ OS << uint8_t(dwarf::DW_LNS_copy); -+ } -+ -+ WasRelaxed = OldSize != Data.size(); -+ return true; -+} -+ -+bool LoongArchAsmBackend::relaxDwarfCFA(MCDwarfCallFrameFragment &DF, -+ MCAsmLayout &Layout, -+ bool &WasRelaxed) const { -+ const MCExpr &AddrDelta = DF.getAddrDelta(); -+ SmallVectorImpl &Data = DF.getContents(); -+ SmallVectorImpl &Fixups = DF.getFixups(); -+ size_t OldSize = Data.size(); -+ -+ int64_t Value; -+ if (AddrDelta.evaluateAsAbsolute(Value, Layout)) -+ return false; -+ bool IsAbsolute = AddrDelta.evaluateKnownAbsolute(Value, Layout); -+ assert(IsAbsolute && "CFA with invalid expression"); -+ (void)IsAbsolute; -+ -+ Data.clear(); -+ Fixups.clear(); -+ raw_svector_ostream OS(Data); -+ -+ assert( -+ Layout.getAssembler().getContext().getAsmInfo()->getMinInstAlignment() == -+ 1 && -+ "expected 1-byte alignment"); -+ if (Value == 0) { -+ WasRelaxed = OldSize != Data.size(); -+ return true; -+ } -+ -+ auto AddFixups = [&Fixups, -+ &AddrDelta](unsigned Offset, -+ std::pair FK) { -+ const MCBinaryExpr &MBE = cast(AddrDelta); -+ Fixups.push_back(MCFixup::create(Offset, MBE.getLHS(), std::get<0>(FK))); -+ Fixups.push_back(MCFixup::create(Offset, MBE.getRHS(), std::get<1>(FK))); -+ }; -+ -+ if (isUIntN(6, Value)) { -+ OS << uint8_t(dwarf::DW_CFA_advance_loc); -+ AddFixups(0, getRelocPairForSize(6)); -+ } else if (isUInt<8>(Value)) { -+ OS << uint8_t(dwarf::DW_CFA_advance_loc1); -+ support::endian::write(OS, 0, support::little); -+ AddFixups(1, getRelocPairForSize(8)); -+ } else if (isUInt<16>(Value)) { -+ OS << uint8_t(dwarf::DW_CFA_advance_loc2); -+ support::endian::write(OS, 0, support::little); -+ AddFixups(1, getRelocPairForSize(16)); -+ } else if (isUInt<32>(Value)) { -+ OS << uint8_t(dwarf::DW_CFA_advance_loc4); -+ support::endian::write(OS, 0, support::little); -+ AddFixups(1, getRelocPairForSize(32)); -+ } else { -+ llvm_unreachable("unsupported CFA encoding"); -+ } -+ -+ WasRelaxed = OldSize != Data.size(); -+ return true; -+} -+ - bool LoongArchAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count, - const MCSubtargetInfo *STI) const { - // We mostly follow binutils' convention here: align to 4-byte boundary with a -diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h -index 49801e4fd81a..657f5ca5e731 100644 ---- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h -+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h -@@ -68,6 +68,11 @@ public: - std::pair relaxLEB128(MCLEBFragment &LF, MCAsmLayout &Layout, - int64_t &Value) const override; - -+ bool relaxDwarfLineAddr(MCDwarfLineAddrFragment &DF, MCAsmLayout &Layout, -+ bool &WasRelaxed) const override; -+ bool relaxDwarfCFA(MCDwarfCallFrameFragment &DF, MCAsmLayout &Layout, -+ bool &WasRelaxed) const override; -+ - bool writeNopData(raw_ostream &OS, uint64_t Count, - const MCSubtargetInfo *STI) const override; - -diff --git a/llvm/test/DebugInfo/LoongArch/dwarf-loongarch-relocs.ll b/llvm/test/DebugInfo/LoongArch/dwarf-loongarch-relocs.ll -new file mode 100644 -index 000000000000..e03b4c1d34de ---- /dev/null -+++ b/llvm/test/DebugInfo/LoongArch/dwarf-loongarch-relocs.ll -@@ -0,0 +1,128 @@ -+; RUN: llc --filetype=obj --mtriple=loongarch64 --mattr=-relax %s -o %t.o -+; RUN: llvm-readobj -r %t.o | FileCheck --check-prefixes=RELOCS-BOTH,RELOCS-NORL %s -+; RUN: llvm-objdump --source %t.o | FileCheck --check-prefix=SOURCE %s -+; RUN: llvm-dwarfdump --debug-info --debug-line %t.o | FileCheck --check-prefix=DWARF %s -+ -+; RUN: llc --filetype=obj --mtriple=loongarch64 --mattr=+relax %s -o %t.r.o -+; RUN: llvm-readobj -r %t.r.o | FileCheck --check-prefixes=RELOCS-BOTH,RELOCS-ENRL %s -+; RUN: llvm-objdump --source %t.r.o | FileCheck --check-prefix=SOURCE %s -+; RUN: llvm-dwarfdump --debug-info --debug-line %t.r.o | FileCheck --check-prefix=DWARF %s -+ -+; RELOCS-BOTH: Relocations [ -+; RELOCS-BOTH-NEXT: Section ({{.*}}) .rela.text { -+; RELOCS-BOTH-NEXT: 0x14 R_LARCH_PCALA_HI20 sym 0x0 -+; RELOCS-ENRL-NEXT: 0x14 R_LARCH_RELAX - 0x0 -+; RELOCS-BOTH-NEXT: 0x18 R_LARCH_PCALA_LO12 sym 0x0 -+; RELOCS-ENRL-NEXT: 0x18 R_LARCH_RELAX - 0x0 -+; RELOCS-BOTH-NEXT: } -+; RELOCS-BOTH: Section ({{.*}}) .rela.debug_frame { -+; RELOCS-NORL-NEXT: 0x1C R_LARCH_32 .debug_frame 0x0 -+; RELOCS-NORL-NEXT: 0x20 R_LARCH_64 .text 0x0 -+; RELOCS-ENRL-NEXT: 0x1C R_LARCH_32 0x0 -+; RELOCS-ENRL-NEXT: 0x20 R_LARCH_64 0x0 -+; RELOCS-ENRL-NEXT: 0x28 R_LARCH_ADD64 0x0 -+; RELOCS-ENRL-NEXT: 0x28 R_LARCH_SUB64 0x0 -+; RELOCS-ENRL-NEXT: 0x3F R_LARCH_ADD6 0x0 -+; RELOCS-ENRL-NEXT: 0x3F R_LARCH_SUB6 0x0 -+; RELOCS-BOTH-NEXT: } -+; RELOCS-BOTH: Section ({{.*}}) .rela.debug_line { -+; RELOCS-BOTH-NEXT: 0x22 R_LARCH_32 .debug_line_str 0x0 -+; RELOCS-BOTH-NEXT: 0x31 R_LARCH_32 .debug_line_str 0x2 -+; RELOCS-BOTH-NEXT: 0x46 R_LARCH_32 .debug_line_str 0x1B -+; RELOCS-NORL-NEXT: 0x4F R_LARCH_64 .text 0x0 -+; RELOCS-ENRL-NEXT: 0x4F R_LARCH_64 0x0 -+; RELOCS-ENRL-NEXT: 0x5F R_LARCH_ADD16 0x0 -+; RELOCS-ENRL-NEXT: 0x5F R_LARCH_SUB16 0x0 -+; RELOCS-BOTH-NEXT: } -+; RELOCS-BOTH-NEXT: ] -+ -+; SOURCE: 0000000000000000 : -+; SOURCE: ; { -+; SOURCE: ; asm volatile( -+; SOURCE: ; return 0; -+ -+; DWARF: DW_AT_producer ("clang") -+; DWARF: DW_AT_name ("dwarf-loongarch-relocs.c") -+; DWARF: DW_AT_comp_dir (".") -+; DWARF: DW_AT_name ("foo") -+; DWARF-NEXT: DW_AT_decl_file ("{{.*}}dwarf-loongarch-relocs.c") -+; DWARF-NEXT: DW_AT_decl_line (1) -+; DWARF-NEXT: DW_AT_type (0x00000032 "int") -+; DWARF: DW_AT_name ("int") -+; DWARF-NEXT: DW_AT_encoding (DW_ATE_signed) -+; DWARF-NEXT: DW_AT_byte_size (0x04) -+; DWARF: .debug_line contents: -+; DWARF-NEXT: debug_line[0x00000000] -+; DWARF-NEXT: Line table prologue: -+; DWARF-NEXT: total_length: {{.*}} -+; DWARF-NEXT: format: DWARF32 -+; DWARF-NEXT: version: 5 -+; DWARF-NEXT: address_size: 8 -+; DWARF-NEXT: seg_select_size: 0 -+; DWARF-NEXT: prologue_length: 0x0000003e -+; DWARF-NEXT: min_inst_length: 1 -+; DWARF-NEXT: max_ops_per_inst: 1 -+; DWARF-NEXT: default_is_stmt: 1 -+; DWARF-NEXT: line_base: -5 -+; DWARF-NEXT: line_range: 14 -+; DWARF-NEXT: opcode_base: 13 -+; DWARF-NEXT: standard_opcode_lengths[DW_LNS_copy] = 0 -+; DWARF-NEXT: standard_opcode_lengths[DW_LNS_advance_pc] = 1 -+; DWARF-NEXT: standard_opcode_lengths[DW_LNS_advance_line] = 1 -+; DWARF-NEXT: standard_opcode_lengths[DW_LNS_set_file] = 1 -+; DWARF-NEXT: standard_opcode_lengths[DW_LNS_set_column] = 1 -+; DWARF-NEXT: standard_opcode_lengths[DW_LNS_negate_stmt] = 0 -+; DWARF-NEXT: standard_opcode_lengths[DW_LNS_set_basic_block] = 0 -+; DWARF-NEXT: standard_opcode_lengths[DW_LNS_const_add_pc] = 0 -+; DWARF-NEXT: standard_opcode_lengths[DW_LNS_fixed_advance_pc] = 1 -+; DWARF-NEXT: standard_opcode_lengths[DW_LNS_set_prologue_end] = 0 -+; DWARF-NEXT: standard_opcode_lengths[DW_LNS_set_epilogue_begin] = 0 -+; DWARF-NEXT: standard_opcode_lengths[DW_LNS_set_isa] = 1 -+; DWARF-NEXT: include_directories[ 0] = "." -+; DWARF-NEXT: file_names[ 0]: -+; DWARF-NEXT: name: "dwarf-loongarch-relocs.c" -+; DWARF-NEXT: dir_index: 0 -+; DWARF-NEXT: md5_checksum: f44d6d71bc4da58b4abe338ca507c007 -+; DWARF-NEXT: source: "{{.*}}" -+; DWARF-EMPTY: -+; DWARF-NEXT: Address Line Column File ISA Discriminator OpIndex Flags -+; DWARF-NEXT: ------------------ ------ ------ ------ --- ------------- ------- ------------- -+; DWARF-NEXT: 0x0000000000000000 2 0 0 0 0 0 is_stmt -+; DWARF-NEXT: 0x0000000000000010 3 3 0 0 0 0 is_stmt prologue_end -+; DWARF-NEXT: 0x0000000000000020 10 3 0 0 0 0 is_stmt -+; DWARF-NEXT: 0x000000000000002c 10 3 0 0 0 0 epilogue_begin -+; DWARF-NEXT: 0x0000000000000034 10 3 0 0 0 0 end_sequence -+ -+; ModuleID = 'dwarf-loongarch-relocs.c' -+source_filename = "dwarf-loongarch-relocs.c" -+target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128" -+target triple = "loongarch64" -+ -+; Function Attrs: noinline nounwind optnone -+define dso_local signext i32 @foo() #0 !dbg !8 { -+ call void asm sideeffect ".cfi_remember_state\0A\09.cfi_adjust_cfa_offset 16\0A\09nop\0A\09la.pcrel $$t0, sym\0A\09nop\0A\09.cfi_restore_state\0A\09", ""() #1, !dbg !12, !srcloc !13 -+ ret i32 0, !dbg !14 -+} -+ -+attributes #0 = { noinline nounwind optnone "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="loongarch64" "target-features"="+64bit,+d,+f,+ual" } -+attributes #1 = { nounwind } -+ -+!llvm.dbg.cu = !{!0} -+!llvm.module.flags = !{!2, !3, !4, !5, !6} -+!llvm.ident = !{!7} -+ -+!0 = distinct !DICompileUnit(language: DW_LANG_C11, file: !1, producer: "clang", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None) -+!1 = !DIFile(filename: "dwarf-loongarch-relocs.c", directory: ".", checksumkind: CSK_MD5, checksum: "f44d6d71bc4da58b4abe338ca507c007", source: "int foo()\0A{\0A asm volatile(\0A \22.cfi_remember_state\\n\\t\22\0A \22.cfi_adjust_cfa_offset 16\\n\\t\22\0A \22nop\\n\\t\22\0A \22la.pcrel $t0, sym\\n\\t\22\0A \22nop\\n\\t\22\0A \22.cfi_restore_state\\n\\t\22);\0A return 0;\0A}\0A") -+!2 = !{i32 7, !"Dwarf Version", i32 5} -+!3 = !{i32 2, !"Debug Info Version", i32 3} -+!4 = !{i32 1, !"wchar_size", i32 4} -+!5 = !{i32 7, !"direct-access-external-data", i32 0} -+!6 = !{i32 7, !"frame-pointer", i32 2} -+!7 = !{!"clang"} -+!8 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !9, scopeLine: 2, spFlags: DISPFlagDefinition, unit: !0) -+!9 = !DISubroutineType(types: !10) -+!10 = !{!11} -+!11 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) -+!12 = !DILocation(line: 3, column: 3, scope: !8) -+!13 = !{i64 34, i64 56, i64 92, i64 106, i64 134, i64 148, i64 177} -+!14 = !DILocation(line: 10, column: 3, scope: !8) -diff --git a/llvm/test/DebugInfo/LoongArch/lit.local.cfg b/llvm/test/DebugInfo/LoongArch/lit.local.cfg -new file mode 100644 -index 000000000000..77becb8eee90 ---- /dev/null -+++ b/llvm/test/DebugInfo/LoongArch/lit.local.cfg -@@ -0,0 +1,2 @@ -+if "LoongArch" not in config.root.targets: -+ config.unsupported = True --- -2.20.1 - diff --git a/0007-Backport-LoongArch-Insert-nops-and-emit-align-reloc-when-handle-alignment-directive.patch b/0007-Backport-LoongArch-Insert-nops-and-emit-align-reloc-when-handle-alignment-directive.patch deleted file mode 100644 index 9d027af8cbb5d5309a467fddd80c13fe1c840a78..0000000000000000000000000000000000000000 --- a/0007-Backport-LoongArch-Insert-nops-and-emit-align-reloc-when-handle-alignment-directive.patch +++ /dev/null @@ -1,362 +0,0 @@ -From 87f6adc2acf635a0a4c294217fb54c55eee3a06c Mon Sep 17 00:00:00 2001 -From: Jinyang He -Date: Wed, 24 Jan 2024 09:17:49 +0800 -Subject: [PATCH 08/14] [LoongArch] Insert nops and emit align reloc when - handle alignment directive (#72962) - -Refer to RISCV, we will fix up the alignment if linker relaxation -changes code size and breaks alignment. Insert enough Nops and emit -R_LARCH_ALIGN relocation type so that linker could satisfy the alignment -by removing Nops. -It does so only in sections with the SHF_EXECINSTR flag. - -In LoongArch psABI v2.30, R_LARCH_ALIGN requires symbol index. The -lowest 8 bits of addend represent alignment and the other bits of addend -represent the maximum number of bytes to emit. - -(cherry picked from commit c51ab483e6c2d991a01179584705b83fbea1940d) -Change-Id: Iba30702c9dda378acfae0b1f1134926fa838a368 ---- - llvm/lib/MC/MCExpr.cpp | 2 +- - .../MCTargetDesc/LoongArchAsmBackend.cpp | 67 ++++++++++++++++ - .../MCTargetDesc/LoongArchAsmBackend.h | 15 ++++ - .../MCTargetDesc/LoongArchFixupKinds.h | 4 +- - .../Relocations/align-non-executable.s | 27 +++++++ - .../MC/LoongArch/Relocations/relax-addsub.s | 15 +++- - .../MC/LoongArch/Relocations/relax-align.s | 79 +++++++++++++++++++ - 7 files changed, 205 insertions(+), 4 deletions(-) - create mode 100644 llvm/test/MC/LoongArch/Relocations/align-non-executable.s - create mode 100644 llvm/test/MC/LoongArch/Relocations/relax-align.s - -diff --git a/llvm/lib/MC/MCExpr.cpp b/llvm/lib/MC/MCExpr.cpp -index a561fed11179..79808a58d81c 100644 ---- a/llvm/lib/MC/MCExpr.cpp -+++ b/llvm/lib/MC/MCExpr.cpp -@@ -711,7 +711,7 @@ static void AttemptToFoldSymbolOffsetDifference( - if (DF) { - Displacement += DF->getContents().size(); - } else if (auto *AF = dyn_cast(FI); -- AF && Layout && -+ AF && Layout && AF->hasEmitNops() && - !Asm->getBackend().shouldInsertExtraNopBytesForCodeAlign( - *AF, Count)) { - Displacement += Asm->computeFragmentSize(*Layout, *AF); -diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp -index 8d82327b2e2b..8c482356402f 100644 ---- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp -+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp -@@ -17,10 +17,13 @@ - #include "llvm/MC/MCAssembler.h" - #include "llvm/MC/MCContext.h" - #include "llvm/MC/MCELFObjectWriter.h" -+#include "llvm/MC/MCExpr.h" -+#include "llvm/MC/MCSection.h" - #include "llvm/MC/MCValue.h" - #include "llvm/Support/Endian.h" - #include "llvm/Support/EndianStream.h" - #include "llvm/Support/LEB128.h" -+#include "llvm/Support/MathExtras.h" - - #define DEBUG_TYPE "loongarch-asmbackend" - -@@ -177,6 +180,70 @@ void LoongArchAsmBackend::applyFixup(const MCAssembler &Asm, - } - } - -+// Linker relaxation may change code size. We have to insert Nops -+// for .align directive when linker relaxation enabled. So then Linker -+// could satisfy alignment by removing Nops. -+// The function returns the total Nops Size we need to insert. -+bool LoongArchAsmBackend::shouldInsertExtraNopBytesForCodeAlign( -+ const MCAlignFragment &AF, unsigned &Size) { -+ // Calculate Nops Size only when linker relaxation enabled. -+ if (!AF.getSubtargetInfo()->hasFeature(LoongArch::FeatureRelax)) -+ return false; -+ -+ // Ignore alignment if MaxBytesToEmit is less than the minimum Nop size. -+ const unsigned MinNopLen = 4; -+ if (AF.getMaxBytesToEmit() < MinNopLen) -+ return false; -+ Size = AF.getAlignment().value() - MinNopLen; -+ return AF.getAlignment() > MinNopLen; -+} -+ -+// We need to insert R_LARCH_ALIGN relocation type to indicate the -+// position of Nops and the total bytes of the Nops have been inserted -+// when linker relaxation enabled. -+// The function inserts fixup_loongarch_align fixup which eventually will -+// transfer to R_LARCH_ALIGN relocation type. -+// The improved R_LARCH_ALIGN requires symbol index. The lowest 8 bits of -+// addend represent alignment and the other bits of addend represent the -+// maximum number of bytes to emit. The maximum number of bytes is zero -+// means ignore the emit limit. -+bool LoongArchAsmBackend::shouldInsertFixupForCodeAlign( -+ MCAssembler &Asm, const MCAsmLayout &Layout, MCAlignFragment &AF) { -+ // Insert the fixup only when linker relaxation enabled. -+ if (!AF.getSubtargetInfo()->hasFeature(LoongArch::FeatureRelax)) -+ return false; -+ -+ // Calculate total Nops we need to insert. If there are none to insert -+ // then simply return. -+ unsigned Count; -+ if (!shouldInsertExtraNopBytesForCodeAlign(AF, Count)) -+ return false; -+ -+ MCSection *Sec = AF.getParent(); -+ MCContext &Ctx = Asm.getContext(); -+ const MCExpr *Dummy = MCConstantExpr::create(0, Ctx); -+ // Create fixup_loongarch_align fixup. -+ MCFixup Fixup = -+ MCFixup::create(0, Dummy, MCFixupKind(LoongArch::fixup_loongarch_align)); -+ const MCSymbolRefExpr *MCSym = getSecToAlignSym()[Sec]; -+ if (MCSym == nullptr) { -+ // Create a symbol and make the value of symbol is zero. -+ MCSymbol *Sym = Ctx.createNamedTempSymbol("la-relax-align"); -+ Sym->setFragment(&*Sec->getBeginSymbol()->getFragment()); -+ Asm.registerSymbol(*Sym); -+ MCSym = MCSymbolRefExpr::create(Sym, Ctx); -+ getSecToAlignSym()[Sec] = MCSym; -+ } -+ -+ uint64_t FixedValue = 0; -+ unsigned Lo = Log2_64(Count) + 1; -+ unsigned Hi = AF.getMaxBytesToEmit() >= Count ? 0 : AF.getMaxBytesToEmit(); -+ MCValue Value = MCValue::get(MCSym, nullptr, Hi << 8 | Lo); -+ Asm.getWriter().recordRelocation(Asm, Layout, &AF, Fixup, Value, FixedValue); -+ -+ return true; -+} -+ - bool LoongArchAsmBackend::shouldForceRelocation(const MCAssembler &Asm, - const MCFixup &Fixup, - const MCValue &Target) { -diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h -index 657f5ca5e731..71bbd003888a 100644 ---- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h -+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h -@@ -17,7 +17,9 @@ - #include "MCTargetDesc/LoongArchFixupKinds.h" - #include "MCTargetDesc/LoongArchMCTargetDesc.h" - #include "llvm/MC/MCAsmBackend.h" -+#include "llvm/MC/MCExpr.h" - #include "llvm/MC/MCFixupKindInfo.h" -+#include "llvm/MC/MCSection.h" - #include "llvm/MC/MCSubtargetInfo.h" - - namespace llvm { -@@ -27,6 +29,7 @@ class LoongArchAsmBackend : public MCAsmBackend { - uint8_t OSABI; - bool Is64Bit; - const MCTargetOptions &TargetOptions; -+ DenseMap SecToAlignSym; - - public: - LoongArchAsmBackend(const MCSubtargetInfo &STI, uint8_t OSABI, bool Is64Bit, -@@ -45,6 +48,15 @@ public: - uint64_t Value, bool IsResolved, - const MCSubtargetInfo *STI) const override; - -+ // Return Size with extra Nop Bytes for alignment directive in code section. -+ bool shouldInsertExtraNopBytesForCodeAlign(const MCAlignFragment &AF, -+ unsigned &Size) override; -+ -+ // Insert target specific fixup type for alignment directive in code section. -+ bool shouldInsertFixupForCodeAlign(MCAssembler &Asm, -+ const MCAsmLayout &Layout, -+ MCAlignFragment &AF) override; -+ - bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup, - const MCValue &Target) override; - -@@ -79,6 +91,9 @@ public: - std::unique_ptr - createObjectTargetWriter() const override; - const MCTargetOptions &getTargetOptions() const { return TargetOptions; } -+ DenseMap &getSecToAlignSym() { -+ return SecToAlignSym; -+ } - }; - } // end namespace llvm - -diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchFixupKinds.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchFixupKinds.h -index 178fa6e5262b..78414408f21f 100644 ---- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchFixupKinds.h -+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchFixupKinds.h -@@ -108,7 +108,9 @@ enum Fixups { - // 20-bit fixup corresponding to %gd_hi20(foo) for instruction lu12i.w. - fixup_loongarch_tls_gd_hi20, - // Generate an R_LARCH_RELAX which indicates the linker may relax here. -- fixup_loongarch_relax = FirstLiteralRelocationKind + ELF::R_LARCH_RELAX -+ fixup_loongarch_relax = FirstLiteralRelocationKind + ELF::R_LARCH_RELAX, -+ // Generate an R_LARCH_ALIGN which indicates the linker may fixup align here. -+ fixup_loongarch_align = FirstLiteralRelocationKind + ELF::R_LARCH_ALIGN, - }; - } // end namespace LoongArch - } // end namespace llvm -diff --git a/llvm/test/MC/LoongArch/Relocations/align-non-executable.s b/llvm/test/MC/LoongArch/Relocations/align-non-executable.s -new file mode 100644 -index 000000000000..47834acd9521 ---- /dev/null -+++ b/llvm/test/MC/LoongArch/Relocations/align-non-executable.s -@@ -0,0 +1,27 @@ -+## A label difference separated by an alignment directive, when the -+## referenced symbols are in a non-executable section with instructions, -+## should generate ADD/SUB relocations. -+## https://github.com/llvm/llvm-project/pull/76552 -+ -+# RUN: llvm-mc --filetype=obj --triple=loongarch64 --mattr=+relax %s \ -+# RUN: | llvm-readobj -r - | FileCheck --check-prefixes=CHECK,RELAX %s -+# RUN: llvm-mc --filetype=obj --triple=loongarch64 --mattr=-relax %s \ -+# RUN: | llvm-readobj -r - | FileCheck %s -+ -+.section ".dummy", "a" -+.L1: -+ la.pcrel $t0, sym -+.p2align 3 -+.L2: -+.dword .L2 - .L1 -+ -+# CHECK: Relocations [ -+# CHECK-NEXT: Section ({{.*}}) .rela.dummy { -+# CHECK-NEXT: 0x0 R_LARCH_PCALA_HI20 sym 0x0 -+# RELAX-NEXT: 0x0 R_LARCH_RELAX - 0x0 -+# CHECK-NEXT: 0x4 R_LARCH_PCALA_LO12 sym 0x0 -+# RELAX-NEXT: 0x4 R_LARCH_RELAX - 0x0 -+# RELAX-NEXT: 0x8 R_LARCH_ADD64 .L2 0x0 -+# RELAX-NEXT: 0x8 R_LARCH_SUB64 .L1 0x0 -+# CHECK-NEXT: } -+# CHECK-NEXT: ] -diff --git a/llvm/test/MC/LoongArch/Relocations/relax-addsub.s b/llvm/test/MC/LoongArch/Relocations/relax-addsub.s -index cd01332afd0b..18e0ede5e293 100644 ---- a/llvm/test/MC/LoongArch/Relocations/relax-addsub.s -+++ b/llvm/test/MC/LoongArch/Relocations/relax-addsub.s -@@ -28,12 +28,23 @@ - - # RELAX: Relocations [ - # RELAX-NEXT: Section ({{.*}}) .rela.text { -+# RELAX-NEXT: 0x4 R_LARCH_ALIGN {{.*}} 0x4 - # RELAX-NEXT: 0x10 R_LARCH_PCALA_HI20 .L1 0x0 - # RELAX-NEXT: 0x10 R_LARCH_RELAX - 0x0 - # RELAX-NEXT: 0x14 R_LARCH_PCALA_LO12 .L1 0x0 - # RELAX-NEXT: 0x14 R_LARCH_RELAX - 0x0 - # RELAX-NEXT: } - # RELAX-NEXT: Section ({{.*}}) .rela.data { -+# RELAX-NEXT: 0x10 R_LARCH_ADD8 .L3 0x0 -+# RELAX-NEXT: 0x10 R_LARCH_SUB8 .L2 0x0 -+# RELAX-NEXT: 0x11 R_LARCH_ADD16 .L3 0x0 -+# RELAX-NEXT: 0x11 R_LARCH_SUB16 .L2 0x0 -+# RELAX-NEXT: 0x13 R_LARCH_ADD32 .L3 0x0 -+# RELAX-NEXT: 0x13 R_LARCH_SUB32 .L2 0x0 -+# RELAX-NEXT: 0x17 R_LARCH_ADD64 .L3 0x0 -+# RELAX-NEXT: 0x17 R_LARCH_SUB64 .L2 0x0 -+# RELAX-NEXT: 0x1F R_LARCH_ADD_ULEB128 .L3 0x0 -+# RELAX-NEXT: 0x1F R_LARCH_SUB_ULEB128 .L2 0x0 - # RELAX-NEXT: 0x20 R_LARCH_ADD8 .L4 0x0 - # RELAX-NEXT: 0x20 R_LARCH_SUB8 .L3 0x0 - # RELAX-NEXT: 0x21 R_LARCH_ADD16 .L4 0x0 -@@ -57,7 +68,7 @@ - - # RELAX: Hex dump of section '.data': - # RELAX-NEXT: 0x00000000 04040004 00000004 00000000 00000004 --# RELAX-NEXT: 0x00000010 0c0c000c 0000000c 00000000 0000000c -+# RELAX-NEXT: 0x00000010 00000000 00000000 00000000 00000000 - # RELAX-NEXT: 0x00000020 00000000 00000000 00000000 00000000 - # RELAX-NEXT: 0x00000030 00000000 00000000 00000000 000000 - -@@ -78,7 +89,7 @@ - .word .L2 - .L1 - .dword .L2 - .L1 - .uleb128 .L2 - .L1 --## TODO Handle alignment directive. -+## With relaxation, emit relocs because the .align makes the diff variable. - .byte .L3 - .L2 - .short .L3 - .L2 - .word .L3 - .L2 -diff --git a/llvm/test/MC/LoongArch/Relocations/relax-align.s b/llvm/test/MC/LoongArch/Relocations/relax-align.s -new file mode 100644 -index 000000000000..294fd9fb916c ---- /dev/null -+++ b/llvm/test/MC/LoongArch/Relocations/relax-align.s -@@ -0,0 +1,79 @@ -+## The file testing Nop insertion with R_LARCH_ALIGN for relaxation. -+ -+# RUN: llvm-mc --filetype=obj --triple=loongarch64 --mattr=-relax %s -o %t -+# RUN: llvm-objdump -d %t | FileCheck %s --check-prefix=INSTR -+# RUN: llvm-readobj -r %t | FileCheck %s --check-prefix=RELOC -+# RUN: llvm-mc --filetype=obj --triple=loongarch64 --mattr=+relax %s -o %t.r -+# RUN: llvm-objdump -d %t.r | FileCheck %s --check-prefixes=INSTR,RELAX-INSTR -+# RUN: llvm-readobj -r %t.r | FileCheck %s --check-prefixes=RELOC,RELAX-RELOC -+ -+.text -+break 0 -+# INSTR: break 0 -+ -+## Not emit R_LARCH_ALIGN if alignment directive is less than or equal to -+## minimum code alignment(a.k.a 4). -+.p2align 2 -+.p2align 1 -+.p2align 0 -+ -+## Not emit instructions if max emit bytes less than min nop size. -+.p2align 4, , 2 -+ -+## Not emit R_LARCH_ALIGN if alignment directive with specific padding value. -+## The behavior is the same as GNU assembler. -+break 1 -+.p2align 4, 1 -+# INSTR-NEXT: break 1 -+# INSTR-COUNT-2: 01 01 01 01 -+ -+break 2 -+.p2align 4, 1, 12 -+# INSTR-NEXT: break 2 -+# INSTR-COUNT-3: 01 01 01 01 -+ -+break 3 -+.p2align 4 -+# INSTR-NEXT: break 3 -+# INSTR-COUNT-3: nop -+ -+break 4 -+.p2align 5 -+.p2align 4 -+# INSTR-NEXT: break 4 -+# INSTR-COUNT-3: nop -+# RELAX-INSTR-COUNT-7: nop -+ -+break 5 -+.p2align 4, , 11 -+# INSTR-NEXT: break 5 -+# RELAX-INSTR-COUNT-3: nop -+ -+break 6 -+## Not emit the third parameter. -+.p2align 4, , 12 -+# INSTR-NEXT: break 6 -+# INSTR-NEXT: nop -+# INSTR-NEXT: nop -+# RELAX-INSTR-NEXT: nop -+ -+ret -+# INSNR-NEXT: ret -+ -+## Test the symbol index is different from .text. -+.section .text2, "ax" -+.p2align 4 -+break 7 -+ -+# RELOC: Relocations [ -+# RELAX-RELOC-NEXT: Section ({{.*}}) .rela.text { -+# RELAX-RELOC-NEXT: 0x24 R_LARCH_ALIGN .Lla-relax-align0 0x4 -+# RELAX-RELOC-NEXT: 0x34 R_LARCH_ALIGN .Lla-relax-align0 0x5 -+# RELAX-RELOC-NEXT: 0x50 R_LARCH_ALIGN .Lla-relax-align0 0x4 -+# RELAX-RELOC-NEXT: 0x60 R_LARCH_ALIGN .Lla-relax-align0 0xB04 -+# RELAX-RELOC-NEXT: 0x70 R_LARCH_ALIGN .Lla-relax-align0 0x4 -+# RELAX-RELOC-NEXT: } -+# RELAX-RELOC-NEXT: Section ({{.*}}) .rela.text2 { -+# RELAX-RELOC-NEXT: 0x0 R_LARCH_ALIGN .Lla-relax-align1 0x4 -+# RELAX-RELOC-NEXT: } -+# RELOC-NEXT: ] --- -2.20.1 - diff --git a/0008-Backport-test-Update-dwarf-loongarch-relocs.ll.patch b/0008-Backport-test-Update-dwarf-loongarch-relocs.ll.patch deleted file mode 100644 index 4ed67f06967f3de41bd8d7355c307df3d736c2b7..0000000000000000000000000000000000000000 --- a/0008-Backport-test-Update-dwarf-loongarch-relocs.ll.patch +++ /dev/null @@ -1,86 +0,0 @@ -From f51ee6c3468eacc82d3b3f09fcca381178bdc9e7 Mon Sep 17 00:00:00 2001 -From: Weining Lu -Date: Wed, 24 Jan 2024 11:03:14 +0800 -Subject: [PATCH 11/14] [test] Update dwarf-loongarch-relocs.ll - -Address buildbot faiures: -http://45.33.8.238/macm1/77360/step_11.txt -http://45.33.8.238/linux/128902/step_12.txt - -(cherry picked from commit baba7e4175b6ca21e83b1cf8229f29dbba02e979) -(cherry picked from commit c9e73cdd9a17f15ede120ea57657553f9e105eab) -Change-Id: I00aa1414f556f0ba5ff6bf6a879a6fc1fcfa49e0 ---- - .../LoongArch/dwarf-loongarch-relocs.ll | 37 ++++++++++++------- - 1 file changed, 23 insertions(+), 14 deletions(-) - -diff --git a/llvm/test/DebugInfo/LoongArch/dwarf-loongarch-relocs.ll b/llvm/test/DebugInfo/LoongArch/dwarf-loongarch-relocs.ll -index e03b4c1d34de..07443a62b933 100644 ---- a/llvm/test/DebugInfo/LoongArch/dwarf-loongarch-relocs.ll -+++ b/llvm/test/DebugInfo/LoongArch/dwarf-loongarch-relocs.ll -@@ -1,19 +1,22 @@ - ; RUN: llc --filetype=obj --mtriple=loongarch64 --mattr=-relax %s -o %t.o - ; RUN: llvm-readobj -r %t.o | FileCheck --check-prefixes=RELOCS-BOTH,RELOCS-NORL %s --; RUN: llvm-objdump --source %t.o | FileCheck --check-prefix=SOURCE %s --; RUN: llvm-dwarfdump --debug-info --debug-line %t.o | FileCheck --check-prefix=DWARF %s -+; RUN: llvm-objdump --source %t.o | FileCheck --check-prefixes=SOURCE,SOURCE-NORL %s -+; RUN: llvm-dwarfdump --debug-info --debug-line %t.o | FileCheck --check-prefixes=DWARF,DWARF-NORL %s - - ; RUN: llc --filetype=obj --mtriple=loongarch64 --mattr=+relax %s -o %t.r.o - ; RUN: llvm-readobj -r %t.r.o | FileCheck --check-prefixes=RELOCS-BOTH,RELOCS-ENRL %s --; RUN: llvm-objdump --source %t.r.o | FileCheck --check-prefix=SOURCE %s --; RUN: llvm-dwarfdump --debug-info --debug-line %t.r.o | FileCheck --check-prefix=DWARF %s -+; RUN: llvm-objdump --source %t.r.o | FileCheck --check-prefixes=SOURCE,SOURCE-ENRL %s -+; RUN: llvm-dwarfdump --debug-info --debug-line %t.r.o | FileCheck --check-prefixes=DWARF,DWARF-ENRL %s - - ; RELOCS-BOTH: Relocations [ - ; RELOCS-BOTH-NEXT: Section ({{.*}}) .rela.text { --; RELOCS-BOTH-NEXT: 0x14 R_LARCH_PCALA_HI20 sym 0x0 --; RELOCS-ENRL-NEXT: 0x14 R_LARCH_RELAX - 0x0 --; RELOCS-BOTH-NEXT: 0x18 R_LARCH_PCALA_LO12 sym 0x0 --; RELOCS-ENRL-NEXT: 0x18 R_LARCH_RELAX - 0x0 -+; RELOCS-NORL-NEXT: 0x14 R_LARCH_PCALA_HI20 sym 0x0 -+; RELOCS-NORL-NEXT: 0x18 R_LARCH_PCALA_LO12 sym 0x0 -+; RELOCS-ENRL-NEXT: 0x0 R_LARCH_ALIGN .Lla-relax-align0 0x5 -+; RELOCS-ENRL-NEXT: 0x30 R_LARCH_PCALA_HI20 sym 0x0 -+; RELOCS-ENRL-NEXT: 0x30 R_LARCH_RELAX - 0x0 -+; RELOCS-ENRL-NEXT: 0x34 R_LARCH_PCALA_LO12 sym 0x0 -+; RELOCS-ENRL-NEXT: 0x34 R_LARCH_RELAX - 0x0 - ; RELOCS-BOTH-NEXT: } - ; RELOCS-BOTH: Section ({{.*}}) .rela.debug_frame { - ; RELOCS-NORL-NEXT: 0x1C R_LARCH_32 .debug_frame 0x0 -@@ -36,7 +39,8 @@ - ; RELOCS-BOTH-NEXT: } - ; RELOCS-BOTH-NEXT: ] - --; SOURCE: 0000000000000000 : -+; SOURCE-NORL: 0000000000000000 : -+; SOURCE-ENRL: 000000000000001c : - ; SOURCE: ; { - ; SOURCE: ; asm volatile( - ; SOURCE: ; return 0; -@@ -87,11 +91,16 @@ - ; DWARF-EMPTY: - ; DWARF-NEXT: Address Line Column File ISA Discriminator OpIndex Flags - ; DWARF-NEXT: ------------------ ------ ------ ------ --- ------------- ------- ------------- --; DWARF-NEXT: 0x0000000000000000 2 0 0 0 0 0 is_stmt --; DWARF-NEXT: 0x0000000000000010 3 3 0 0 0 0 is_stmt prologue_end --; DWARF-NEXT: 0x0000000000000020 10 3 0 0 0 0 is_stmt --; DWARF-NEXT: 0x000000000000002c 10 3 0 0 0 0 epilogue_begin --; DWARF-NEXT: 0x0000000000000034 10 3 0 0 0 0 end_sequence -+; DWARF-NORL-NEXT: 0x0000000000000000 2 0 0 0 0 0 is_stmt -+; DWARF-NORL-NEXT: 0x0000000000000010 3 3 0 0 0 0 is_stmt prologue_end -+; DWARF-NORL-NEXT: 0x0000000000000020 10 3 0 0 0 0 is_stmt -+; DWARF-NORL-NEXT: 0x000000000000002c 10 3 0 0 0 0 epilogue_begin -+; DWARF-NORL-NEXT: 0x0000000000000034 10 3 0 0 0 0 end_sequence -+; DWARF-ENRL-NEXT: 0x000000000000001c 2 0 0 0 0 0 is_stmt -+; DWARF-ENRL-NEXT: 0x000000000000002c 3 3 0 0 0 0 is_stmt prologue_end -+; DWARF-ENRL-NEXT: 0x000000000000003c 10 3 0 0 0 0 is_stmt -+; DWARF-ENRL-NEXT: 0x0000000000000048 10 3 0 0 0 0 epilogue_begin -+; DWARF-ENRL-NEXT: 0x0000000000000050 10 3 0 0 0 0 end_sequence - - ; ModuleID = 'dwarf-loongarch-relocs.c' - source_filename = "dwarf-loongarch-relocs.c" --- -2.20.1 - diff --git a/0009-Backport-MC-test-Change-ELF-uleb-ehtable.s-Mach-O-to-use-private-symbols-in-.uleb128-for-label-differences.patch b/0009-Backport-MC-test-Change-ELF-uleb-ehtable.s-Mach-O-to-use-private-symbols-in-.uleb128-for-label-differences.patch deleted file mode 100644 index 94bb77207cc44bb661a23445fcd360b1965ea2ce..0000000000000000000000000000000000000000 --- a/0009-Backport-MC-test-Change-ELF-uleb-ehtable.s-Mach-O-to-use-private-symbols-in-.uleb128-for-label-differences.patch +++ /dev/null @@ -1,53 +0,0 @@ -From 442b5109ccbabed1110c122c1ca92d4194ba632b Mon Sep 17 00:00:00 2001 -From: Fangrui Song -Date: Wed, 9 Aug 2023 21:42:18 -0700 -Subject: [PATCH 13/14] [MC][test] Change ELF/uleb-ehtable.s Mach-O to use - private symbols in .uleb128 for label differences - -On Mach-O, `.uleb128 A-B` where A and B are separated by a non-private symbol is invalid -(see D153167). - -(cherry picked from commit 0a89bda4a8b756a00985e0965f7686b5ceb43295) -Change-Id: I92ed11d6913b8c781e29be6e8c642cf0a371910d ---- - llvm/test/MC/ELF/uleb-ehtable.s | 13 +++++++++++-- - 1 file changed, 11 insertions(+), 2 deletions(-) - -diff --git a/llvm/test/MC/ELF/uleb-ehtable.s b/llvm/test/MC/ELF/uleb-ehtable.s -index ca3f9e97bffc..6407223f36e7 100644 ---- a/llvm/test/MC/ELF/uleb-ehtable.s -+++ b/llvm/test/MC/ELF/uleb-ehtable.s -@@ -1,7 +1,7 @@ - // RUN: llvm-mc -filetype=obj -triple i686-pc-linux-gnu %s -o - | llvm-readobj -S --sd - | FileCheck %s -check-prefix=CHECK -check-prefix=ELF - // RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -S --sd - | FileCheck %s -check-prefix=CHECK -check-prefix=ELF --// RUN: llvm-mc -filetype=obj -triple i386-apple-darwin9 %s -o - | llvm-readobj -S --sd - | FileCheck %s -check-prefix=CHECK -check-prefix=MACHO --// RUN: llvm-mc -filetype=obj -triple x86_64-apple-darwin9 %s -o - | llvm-readobj -S --sd - | FileCheck %s -check-prefix=CHECK -check-prefix=MACHO -+// RUN: llvm-mc -filetype=obj -triple i386-apple-darwin9 --defsym MACHO=1 %s -o - | llvm-readobj -S --sd - | FileCheck %s -check-prefix=CHECK -check-prefix=MACHO -+// RUN: llvm-mc -filetype=obj -triple x86_64-apple-darwin9 --defsym MACHO=1 %s -o - | llvm-readobj -S --sd - | FileCheck %s -check-prefix=CHECK -check-prefix=MACHO - - // Test that we can assemble a GCC-like EH table that has 16381-16383 bytes of - // non-padding data between .ttbaseref and .ttbase. The assembler must insert -@@ -13,11 +13,20 @@ - foo: - .byte 0xff // LPStart omitted - .byte 0x1 // TType encoding (uleb128) -+.ifdef MACHO -+ .uleb128 Lttbase-Lttbaseref -+Lttbaseref: -+.else - .uleb128 .ttbase-.ttbaseref - .ttbaseref: -+.endif - .fill 128*128-1, 1, 0xcd // call site and actions tables - .balign 4 -+.ifdef MACHO -+Lttbase: -+.else - .ttbase: -+.endif - .byte 1, 2, 3, 4 - - // ELF: Name: .data --- -2.20.1 - diff --git a/0010-Backport-Mips-MC-AttemptToFoldSymbolOffsetDifference-revert-isMicroMips-special-case.patch b/0010-Backport-Mips-MC-AttemptToFoldSymbolOffsetDifference-revert-isMicroMips-special-case.patch deleted file mode 100644 index 1d370eef1c56496e8b5d907d81486e7d9b6c3b6c..0000000000000000000000000000000000000000 --- a/0010-Backport-Mips-MC-AttemptToFoldSymbolOffsetDifference-revert-isMicroMips-special-case.patch +++ /dev/null @@ -1,135 +0,0 @@ -From 3b777f98a3997f338919af7ff1ef8a6fd07f76a0 Mon Sep 17 00:00:00 2001 -From: Fangrui Song -Date: Wed, 16 Aug 2023 23:11:59 -0700 -Subject: [PATCH 14/14] [Mips][MC] AttemptToFoldSymbolOffsetDifference: revert - isMicroMips special case - -D52985/D57677 added a .gcc_except_table workaround, but the new behavior -doesn't match GNU assembler. -``` -void foo(); -int bar() { - foo(); - try { throw 1; } - catch (int) { return 1; } - return 0; -} - -clang --target=mipsel-linux-gnu -mmicromips -S a.cc -mipsel-linux-gnu-gcc -mmicromips -c a.s -o gnu.o - -.uleb128 ($cst_end0)-($cst_begin0) // bit 0 is not forced to 1 -.uleb128 ($func_begin0)-($func_begin0) // bit 0 is not forced to 1 -``` - -I have inspected `.gcc_except_table` output by `mipsel-linux-gnu-gcc -mmicromips -c a.cc`. -The `.uleb128` values are not forced to set the least significant bit. - -In addition, D57677's adjustment (even->odd) to CodeGen/Mips/micromips-b-range.ll is wrong. -PC-relative `.long func - .` values will differ from GNU assembler as well. - -The original intention of D52985 seems unclear to me. I think whatever -goal it wants to achieve should be moved to an upper layer. - -This isMicroMips special case has caused problems to fix MCAssembler::relaxLEB to use evaluateAsAbsolute instead of evaluateKnownAbsolute, -which is needed to proper support R_RISCV_SET_ULEB128/R_RISCV_SUB_ULEB128. - -Differential Revision: https://reviews.llvm.org/D157655 - -(cherry picked from commit 4c89277095ee7cda3d20e0f5f18b384212069778) -Change-Id: Iedd73e0c61856c30fde442309fc16d4327829f1a ---- - llvm/lib/MC/MCExpr.cpp | 5 ----- - llvm/test/CodeGen/Mips/micromips-b-range.ll | 8 ++++---- - llvm/test/CodeGen/Mips/micromips-gcc-except-table.ll | 2 +- - llvm/test/DebugInfo/Mips/eh_frame.ll | 4 ++-- - 4 files changed, 7 insertions(+), 12 deletions(-) - -diff --git a/llvm/lib/MC/MCExpr.cpp b/llvm/lib/MC/MCExpr.cpp -index 79808a58d81c..c9ff1865cf91 100644 ---- a/llvm/lib/MC/MCExpr.cpp -+++ b/llvm/lib/MC/MCExpr.cpp -@@ -611,11 +611,6 @@ static void AttemptToFoldSymbolOffsetDifference( - if (Asm->isThumbFunc(&SA)) - Addend |= 1; - -- // If symbol is labeled as micromips, we set low-bit to ensure -- // correct offset in .gcc_except_table -- if (Asm->getBackend().isMicroMips(&SA)) -- Addend |= 1; -- - // Clear the symbol expr pointers to indicate we have folded these - // operands. - A = B = nullptr; -diff --git a/llvm/test/CodeGen/Mips/micromips-b-range.ll b/llvm/test/CodeGen/Mips/micromips-b-range.ll -index 064afff3da0e..81d1c04208cc 100644 ---- a/llvm/test/CodeGen/Mips/micromips-b-range.ll -+++ b/llvm/test/CodeGen/Mips/micromips-b-range.ll -@@ -13,7 +13,7 @@ - ; CHECK-NEXT: 1e: fb fd 00 00 sw $ra, 0($sp) - ; CHECK-NEXT: 22: 41 a1 00 01 lui $1, 1 - ; CHECK-NEXT: 26: 40 60 00 02 bal 0x2e --; CHECK-NEXT: 2a: 30 21 04 69 addiu $1, $1, 1129 -+; CHECK-NEXT: 2a: 30 21 04 68 addiu $1, $1, 1128 - ; CHECK-NEXT: 2e: 00 3f 09 50 addu $1, $ra, $1 - ; CHECK-NEXT: 32: ff fd 00 00 lw $ra, 0($sp) - ; CHECK-NEXT: 36: 00 01 0f 3c jr $1 -@@ -27,7 +27,7 @@ - ; CHECK-NEXT: 56: fb fd 00 00 sw $ra, 0($sp) - ; CHECK-NEXT: 5a: 41 a1 00 01 lui $1, 1 - ; CHECK-NEXT: 5e: 40 60 00 02 bal 0x66 --; CHECK-NEXT: 62: 30 21 04 5d addiu $1, $1, 1117 -+; CHECK-NEXT: 62: 30 21 04 5c addiu $1, $1, 1116 - ; CHECK-NEXT: 66: 00 3f 09 50 addu $1, $ra, $1 - ; CHECK-NEXT: 6a: ff fd 00 00 lw $ra, 0($sp) - ; CHECK-NEXT: 6e: 00 01 0f 3c jr $1 -@@ -39,7 +39,7 @@ - ; CHECK-NEXT: 86: fb fd 00 00 sw $ra, 0($sp) - ; CHECK-NEXT: 8a: 41 a1 00 01 lui $1, 1 - ; CHECK-NEXT: 8e: 40 60 00 02 bal 0x96 --; CHECK-NEXT: 92: 30 21 04 2d addiu $1, $1, 1069 -+; CHECK-NEXT: 92: 30 21 04 2c addiu $1, $1, 1068 - ; CHECK-NEXT: 96: 00 3f 09 50 addu $1, $ra, $1 - ; CHECK-NEXT: 9a: ff fd 00 00 lw $ra, 0($sp) - ; CHECK-NEXT: 9e: 00 01 0f 3c jr $1 -@@ -51,7 +51,7 @@ - ; CHECK-NEXT: 10476: fb fd 00 00 sw $ra, 0($sp) - ; CHECK-NEXT: 1047a: 41 a1 00 01 lui $1, 1 - ; CHECK-NEXT: 1047e: 40 60 00 02 bal 0x10486 --; CHECK-NEXT: 10482: 30 21 04 01 addiu $1, $1, 1025 -+; CHECK-NEXT: 10482: 30 21 04 00 addiu $1, $1, 1024 - ; CHECK-NEXT: 10486: 00 3f 09 50 addu $1, $ra, $1 - ; CHECK-NEXT: 1048a: ff fd 00 00 lw $ra, 0($sp) - ; CHECK-NEXT: 1048e: 00 01 0f 3c jr $1 -diff --git a/llvm/test/CodeGen/Mips/micromips-gcc-except-table.ll b/llvm/test/CodeGen/Mips/micromips-gcc-except-table.ll -index 2b63aff01574..20d64fc216b7 100644 ---- a/llvm/test/CodeGen/Mips/micromips-gcc-except-table.ll -+++ b/llvm/test/CodeGen/Mips/micromips-gcc-except-table.ll -@@ -1,7 +1,7 @@ - ; RUN: llc -mtriple=mips-linux-gnu -mcpu=mips32r2 -mattr=+micromips -O3 -filetype=obj < %s | llvm-objdump -s -j .gcc_except_table - | FileCheck %s - - ; CHECK: Contents of section .gcc_except_table: --; CHECK-NEXT: 0000 ff9b1501 0c011100 00110e1f 011f1800 -+; CHECK-NEXT: 0000 ff9b1501 0c001000 00100e1e 011e1800 - ; CHECK-NEXT: 0010 00010000 00000000 - - @_ZTIi = external constant ptr -diff --git a/llvm/test/DebugInfo/Mips/eh_frame.ll b/llvm/test/DebugInfo/Mips/eh_frame.ll -index 506e5b87892b..60d4dc76777e 100644 ---- a/llvm/test/DebugInfo/Mips/eh_frame.ll -+++ b/llvm/test/DebugInfo/Mips/eh_frame.ll -@@ -26,9 +26,9 @@ - ; CHECK-READELF-PIC-NEXT: R_MIPS_PC32 - ; CHECK-READELF-NEXT: .gcc_except_table - --; EXCEPT-TABLE-STATIC: 0000 ff9b1501 0c011500 00150e23 01231e00 ...........#.#.. -+; EXCEPT-TABLE-STATIC: 0000 ff9b1501 0c001400 00140e22 01221e00 ...........".".. - ; EXCEPT-TABLE-STATIC: 0010 00010000 00000000 --; EXCEPT-TABLE-PIC: 0000 ff9b1501 0c012d00 002d133f 013f2a00 ......-..-.?.?*. -+; EXCEPT-TABLE-PIC: 0000 ff9b1501 0c002c00 002c123e 013e2a00 ......,..,.>.>*. - ; EXCEPT-TABLE-PIC: 0010 00010000 00000000 ........ - - @_ZTIi = external constant ptr --- -2.20.1 - diff --git a/0011-Backport-LoongArch-Add-the-support-for-vector-in-llvm17.patch b/0011-Backport-LoongArch-Add-the-support-for-vector-in-llvm17.patch deleted file mode 100644 index 0976d4e394c13743bd87c17e78e67eefdb276df1..0000000000000000000000000000000000000000 --- a/0011-Backport-LoongArch-Add-the-support-for-vector-in-llvm17.patch +++ /dev/null @@ -1,56520 +0,0 @@ -From 6ff32ae0ca7a400249535b19d9ca489b44deae19 Mon Sep 17 00:00:00 2001 -From: wanglei -Date: Wed, 9 Aug 2023 16:01:37 +0800 -Subject: [PATCH 01/35] [Clang][LoongArch] Use the ClangBuiltin class to - automatically generate support for CBE and CFE - -Fixed the type modifier (L->W), removed redundant feature checking code -since the feature has already been checked in `EmitBuiltinExpr`. And -Cleaned up unused diagnostic information. - -Reviewed By: SixWeining - -Differential Revision: https://reviews.llvm.org/D156866 - -(cherry picked from commit ea8d3b1f9f2d7385d97fcd34d14db0eb2cb2795c) ---- - llvm/include/llvm/IR/IntrinsicsLoongArch.td | 141 ++++++++++---------- - llvm/lib/IR/Function.cpp | 1 + - 2 files changed, 72 insertions(+), 70 deletions(-) - -diff --git a/llvm/include/llvm/IR/IntrinsicsLoongArch.td b/llvm/include/llvm/IR/IntrinsicsLoongArch.td -index 5edce3c529e1..4219b2f55346 100644 ---- a/llvm/include/llvm/IR/IntrinsicsLoongArch.td -+++ b/llvm/include/llvm/IR/IntrinsicsLoongArch.td -@@ -51,74 +51,75 @@ defm int_loongarch_masked_cmpxchg : MaskedAtomicRMWFiveOpIntrinsics; - //===----------------------------------------------------------------------===// - // LoongArch BASE - --def int_loongarch_break : Intrinsic<[], [llvm_i32_ty], [ImmArg>]>; --def int_loongarch_cacop_d : Intrinsic<[], [llvm_i64_ty, llvm_i64_ty, llvm_i64_ty], -- [ImmArg>, ImmArg>]>; --def int_loongarch_cacop_w : Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], -- [ImmArg>, ImmArg>]>; --def int_loongarch_dbar : Intrinsic<[], [llvm_i32_ty], [ImmArg>]>; --def int_loongarch_ibar : Intrinsic<[], [llvm_i32_ty], [ImmArg>]>; --def int_loongarch_movfcsr2gr : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], -- [ImmArg>]>; --def int_loongarch_movgr2fcsr : Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], -- [ImmArg>]>; --def int_loongarch_syscall : Intrinsic<[], [llvm_i32_ty], [ImmArg>]>; -- --def int_loongarch_crc_w_b_w : Intrinsic<[llvm_i32_ty], -- [llvm_i32_ty, llvm_i32_ty]>; --def int_loongarch_crc_w_h_w : Intrinsic<[llvm_i32_ty], -- [llvm_i32_ty, llvm_i32_ty]>; --def int_loongarch_crc_w_w_w : Intrinsic<[llvm_i32_ty], -- [llvm_i32_ty, llvm_i32_ty]>; --def int_loongarch_crc_w_d_w : Intrinsic<[llvm_i32_ty], -- [llvm_i64_ty, llvm_i32_ty]>; -- --def int_loongarch_crcc_w_b_w : Intrinsic<[llvm_i32_ty], -- [llvm_i32_ty, llvm_i32_ty]>; --def int_loongarch_crcc_w_h_w : Intrinsic<[llvm_i32_ty], -- [llvm_i32_ty, llvm_i32_ty]>; --def int_loongarch_crcc_w_w_w : Intrinsic<[llvm_i32_ty], -- [llvm_i32_ty, llvm_i32_ty]>; --def int_loongarch_crcc_w_d_w : Intrinsic<[llvm_i32_ty], -- [llvm_i64_ty, llvm_i32_ty]>; -- --def int_loongarch_csrrd_w : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], -- [ImmArg>]>; --def int_loongarch_csrrd_d : Intrinsic<[llvm_i64_ty], [llvm_i32_ty], -- [ImmArg>]>; --def int_loongarch_csrwr_w : Intrinsic<[llvm_i32_ty], -- [llvm_i32_ty, llvm_i32_ty], -- [ImmArg>]>; --def int_loongarch_csrwr_d : Intrinsic<[llvm_i64_ty], -- [llvm_i64_ty, llvm_i32_ty], -- [ImmArg>]>; --def int_loongarch_csrxchg_w : Intrinsic<[llvm_i32_ty], -- [llvm_i32_ty, llvm_i32_ty, -- llvm_i32_ty], -- [ImmArg>]>; --def int_loongarch_csrxchg_d : Intrinsic<[llvm_i64_ty], -- [llvm_i64_ty, llvm_i64_ty, -- llvm_i32_ty], -- [ImmArg>]>; -- --def int_loongarch_iocsrrd_b : Intrinsic<[llvm_i32_ty], [llvm_i32_ty]>; --def int_loongarch_iocsrrd_h : Intrinsic<[llvm_i32_ty], [llvm_i32_ty]>; --def int_loongarch_iocsrrd_w : Intrinsic<[llvm_i32_ty], [llvm_i32_ty]>; --def int_loongarch_iocsrrd_d : Intrinsic<[llvm_i64_ty], [llvm_i32_ty]>; -- --def int_loongarch_iocsrwr_b : Intrinsic<[], [llvm_i32_ty, llvm_i32_ty]>; --def int_loongarch_iocsrwr_h : Intrinsic<[], [llvm_i32_ty, llvm_i32_ty]>; --def int_loongarch_iocsrwr_w : Intrinsic<[], [llvm_i32_ty, llvm_i32_ty]>; --def int_loongarch_iocsrwr_d : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty]>; -- --def int_loongarch_cpucfg : Intrinsic<[llvm_i32_ty], [llvm_i32_ty]>; -- --def int_loongarch_asrtle_d : Intrinsic<[], [llvm_i64_ty, llvm_i64_ty]>; --def int_loongarch_asrtgt_d : Intrinsic<[], [llvm_i64_ty, llvm_i64_ty]>; -- --def int_loongarch_lddir_d : Intrinsic<[llvm_i64_ty], -- [llvm_i64_ty, llvm_i64_ty], -- [ImmArg>]>; --def int_loongarch_ldpte_d : Intrinsic<[], [llvm_i64_ty, llvm_i64_ty], -- [ImmArg>]>; -+class BaseInt ret_types, list param_types, -+ list intr_properties = []> -+ : Intrinsic, -+ ClangBuiltin; -+ -+def int_loongarch_break : BaseInt<[], [llvm_i32_ty], [ImmArg>]>; -+def int_loongarch_cacop_d : BaseInt<[], [llvm_i64_ty, llvm_i64_ty, llvm_i64_ty], -+ [ImmArg>, ImmArg>]>; -+def int_loongarch_cacop_w : BaseInt<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], -+ [ImmArg>, ImmArg>]>; -+def int_loongarch_dbar : BaseInt<[], [llvm_i32_ty], [ImmArg>]>; -+ -+def int_loongarch_ibar : BaseInt<[], [llvm_i32_ty], [ImmArg>]>; -+def int_loongarch_movfcsr2gr : BaseInt<[llvm_i32_ty], [llvm_i32_ty], -+ [ImmArg>]>; -+def int_loongarch_movgr2fcsr : BaseInt<[], [llvm_i32_ty, llvm_i32_ty], -+ [ImmArg>]>; -+def int_loongarch_syscall : BaseInt<[], [llvm_i32_ty], [ImmArg>]>; -+ -+def int_loongarch_crc_w_b_w : BaseInt<[llvm_i32_ty], -+ [llvm_i32_ty, llvm_i32_ty]>; -+def int_loongarch_crc_w_h_w : BaseInt<[llvm_i32_ty], -+ [llvm_i32_ty, llvm_i32_ty]>; -+def int_loongarch_crc_w_w_w : BaseInt<[llvm_i32_ty], -+ [llvm_i32_ty, llvm_i32_ty]>; -+def int_loongarch_crc_w_d_w : BaseInt<[llvm_i32_ty], -+ [llvm_i64_ty, llvm_i32_ty]>; -+ -+def int_loongarch_crcc_w_b_w : BaseInt<[llvm_i32_ty], -+ [llvm_i32_ty, llvm_i32_ty]>; -+def int_loongarch_crcc_w_h_w : BaseInt<[llvm_i32_ty], -+ [llvm_i32_ty, llvm_i32_ty]>; -+def int_loongarch_crcc_w_w_w : BaseInt<[llvm_i32_ty], -+ [llvm_i32_ty, llvm_i32_ty]>; -+def int_loongarch_crcc_w_d_w : BaseInt<[llvm_i32_ty], -+ [llvm_i64_ty, llvm_i32_ty]>; -+ -+def int_loongarch_csrrd_w : BaseInt<[llvm_i32_ty], [llvm_i32_ty], -+ [ImmArg>]>; -+def int_loongarch_csrrd_d : BaseInt<[llvm_i64_ty], [llvm_i32_ty], -+ [ImmArg>]>; -+def int_loongarch_csrwr_w : BaseInt<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], -+ [ImmArg>]>; -+def int_loongarch_csrwr_d : BaseInt<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty], -+ [ImmArg>]>; -+def int_loongarch_csrxchg_w : BaseInt<[llvm_i32_ty], -+ [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], -+ [ImmArg>]>; -+def int_loongarch_csrxchg_d : BaseInt<[llvm_i64_ty], -+ [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty], -+ [ImmArg>]>; -+ -+def int_loongarch_iocsrrd_b : BaseInt<[llvm_i32_ty], [llvm_i32_ty]>; -+def int_loongarch_iocsrrd_h : BaseInt<[llvm_i32_ty], [llvm_i32_ty]>; -+def int_loongarch_iocsrrd_w : BaseInt<[llvm_i32_ty], [llvm_i32_ty]>; -+def int_loongarch_iocsrrd_d : BaseInt<[llvm_i64_ty], [llvm_i32_ty]>; -+ -+def int_loongarch_iocsrwr_b : BaseInt<[], [llvm_i32_ty, llvm_i32_ty]>; -+def int_loongarch_iocsrwr_h : BaseInt<[], [llvm_i32_ty, llvm_i32_ty]>; -+def int_loongarch_iocsrwr_w : BaseInt<[], [llvm_i32_ty, llvm_i32_ty]>; -+def int_loongarch_iocsrwr_d : BaseInt<[], [llvm_i64_ty, llvm_i32_ty]>; -+ -+def int_loongarch_cpucfg : BaseInt<[llvm_i32_ty], [llvm_i32_ty]>; -+ -+def int_loongarch_asrtle_d : BaseInt<[], [llvm_i64_ty, llvm_i64_ty]>; -+def int_loongarch_asrtgt_d : BaseInt<[], [llvm_i64_ty, llvm_i64_ty]>; -+ -+def int_loongarch_lddir_d : BaseInt<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], -+ [ImmArg>]>; -+def int_loongarch_ldpte_d : BaseInt<[], [llvm_i64_ty, llvm_i64_ty], -+ [ImmArg>]>; - } // TargetPrefix = "loongarch" -diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp -index 27219e89dc5f..435800d9e5f9 100644 ---- a/llvm/lib/IR/Function.cpp -+++ b/llvm/lib/IR/Function.cpp -@@ -37,6 +37,7 @@ - #include "llvm/IR/IntrinsicsBPF.h" - #include "llvm/IR/IntrinsicsDirectX.h" - #include "llvm/IR/IntrinsicsHexagon.h" -+#include "llvm/IR/IntrinsicsLoongArch.h" - #include "llvm/IR/IntrinsicsMips.h" - #include "llvm/IR/IntrinsicsNVPTX.h" - #include "llvm/IR/IntrinsicsPowerPC.h" --- -2.20.1 - - -From fca9d0a876fb72d3b483044a7616d27a47121512 Mon Sep 17 00:00:00 2001 -From: wanglei -Date: Sat, 19 Aug 2023 15:58:38 +0800 -Subject: [PATCH 02/35] [LoongArch] Add LSX intrinsic support - -For handling intrinsics, our approach is not simply to match them -one-to-one with instructions. Instead, we lower some intrinsics -to common nodes and then perform matching. The advantage of this -approach is that it allows us to fully utilize the passes available -at the common layer for optimizing purposes. - -We perform error checks on the immediate operand of all intrinsics, -rather than waiting until the end to throw exceptions. - -Reviewed By: SixWeining - -Differential Revision: https://reviews.llvm.org/D155829 - -(cherry picked from commit 53141b2fcfa20616970833e6513537d211116c05) ---- - llvm/include/llvm/IR/IntrinsicsLoongArch.td | 524 ++++++++++ - .../LoongArch/LoongArchISelDAGToDAG.cpp | 100 +- - .../Target/LoongArch/LoongArchISelDAGToDAG.h | 8 + - .../LoongArch/LoongArchISelLowering.cpp | 902 +++++++++++++++++- - .../Target/LoongArch/LoongArchISelLowering.h | 14 + - .../Target/LoongArch/LoongArchInstrInfo.cpp | 12 + - .../Target/LoongArch/LoongArchInstrInfo.td | 6 +- - .../Target/LoongArch/LoongArchLSXInstrInfo.td | 816 ++++++++++++++++ - 8 files changed, 2359 insertions(+), 23 deletions(-) - -diff --git a/llvm/include/llvm/IR/IntrinsicsLoongArch.td b/llvm/include/llvm/IR/IntrinsicsLoongArch.td -index 4219b2f55346..d39d8261ebe3 100644 ---- a/llvm/include/llvm/IR/IntrinsicsLoongArch.td -+++ b/llvm/include/llvm/IR/IntrinsicsLoongArch.td -@@ -123,3 +123,527 @@ def int_loongarch_lddir_d : BaseInt<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], - def int_loongarch_ldpte_d : BaseInt<[], [llvm_i64_ty, llvm_i64_ty], - [ImmArg>]>; - } // TargetPrefix = "loongarch" -+ -+/// Vector intrinsic -+ -+class VecInt ret_types, list param_types, -+ list intr_properties = []> -+ : Intrinsic, -+ ClangBuiltin; -+ -+//===----------------------------------------------------------------------===// -+// LSX -+ -+let TargetPrefix = "loongarch" in { -+ -+foreach inst = ["vadd_b", "vsub_b", -+ "vsadd_b", "vsadd_bu", "vssub_b", "vssub_bu", -+ "vavg_b", "vavg_bu", "vavgr_b", "vavgr_bu", -+ "vabsd_b", "vabsd_bu", "vadda_b", -+ "vmax_b", "vmax_bu", "vmin_b", "vmin_bu", -+ "vmul_b", "vmuh_b", "vmuh_bu", -+ "vdiv_b", "vdiv_bu", "vmod_b", "vmod_bu", "vsigncov_b", -+ "vand_v", "vor_v", "vxor_v", "vnor_v", "vandn_v", "vorn_v", -+ "vsll_b", "vsrl_b", "vsra_b", "vrotr_b", "vsrlr_b", "vsrar_b", -+ "vbitclr_b", "vbitset_b", "vbitrev_b", -+ "vseq_b", "vsle_b", "vsle_bu", "vslt_b", "vslt_bu", -+ "vpackev_b", "vpackod_b", "vpickev_b", "vpickod_b", -+ "vilvl_b", "vilvh_b"] in -+ def int_loongarch_lsx_#inst : VecInt<[llvm_v16i8_ty], -+ [llvm_v16i8_ty, llvm_v16i8_ty], -+ [IntrNoMem]>; -+ -+foreach inst = ["vadd_h", "vsub_h", -+ "vsadd_h", "vsadd_hu", "vssub_h", "vssub_hu", -+ "vavg_h", "vavg_hu", "vavgr_h", "vavgr_hu", -+ "vabsd_h", "vabsd_hu", "vadda_h", -+ "vmax_h", "vmax_hu", "vmin_h", "vmin_hu", -+ "vmul_h", "vmuh_h", "vmuh_hu", -+ "vdiv_h", "vdiv_hu", "vmod_h", "vmod_hu", "vsigncov_h", -+ "vsll_h", "vsrl_h", "vsra_h", "vrotr_h", "vsrlr_h", "vsrar_h", -+ "vbitclr_h", "vbitset_h", "vbitrev_h", -+ "vseq_h", "vsle_h", "vsle_hu", "vslt_h", "vslt_hu", -+ "vpackev_h", "vpackod_h", "vpickev_h", "vpickod_h", -+ "vilvl_h", "vilvh_h"] in -+ def int_loongarch_lsx_#inst : VecInt<[llvm_v8i16_ty], -+ [llvm_v8i16_ty, llvm_v8i16_ty], -+ [IntrNoMem]>; -+ -+foreach inst = ["vadd_w", "vsub_w", -+ "vsadd_w", "vsadd_wu", "vssub_w", "vssub_wu", -+ "vavg_w", "vavg_wu", "vavgr_w", "vavgr_wu", -+ "vabsd_w", "vabsd_wu", "vadda_w", -+ "vmax_w", "vmax_wu", "vmin_w", "vmin_wu", -+ "vmul_w", "vmuh_w", "vmuh_wu", -+ "vdiv_w", "vdiv_wu", "vmod_w", "vmod_wu", "vsigncov_w", -+ "vsll_w", "vsrl_w", "vsra_w", "vrotr_w", "vsrlr_w", "vsrar_w", -+ "vbitclr_w", "vbitset_w", "vbitrev_w", -+ "vseq_w", "vsle_w", "vsle_wu", "vslt_w", "vslt_wu", -+ "vpackev_w", "vpackod_w", "vpickev_w", "vpickod_w", -+ "vilvl_w", "vilvh_w"] in -+ def int_loongarch_lsx_#inst : VecInt<[llvm_v4i32_ty], -+ [llvm_v4i32_ty, llvm_v4i32_ty], -+ [IntrNoMem]>; -+ -+foreach inst = ["vadd_d", "vadd_q", "vsub_d", "vsub_q", -+ "vsadd_d", "vsadd_du", "vssub_d", "vssub_du", -+ "vhaddw_q_d", "vhaddw_qu_du", "vhsubw_q_d", "vhsubw_qu_du", -+ "vaddwev_q_d", "vaddwod_q_d", "vsubwev_q_d", "vsubwod_q_d", -+ "vaddwev_q_du", "vaddwod_q_du", "vsubwev_q_du", "vsubwod_q_du", -+ "vaddwev_q_du_d", "vaddwod_q_du_d", -+ "vavg_d", "vavg_du", "vavgr_d", "vavgr_du", -+ "vabsd_d", "vabsd_du", "vadda_d", -+ "vmax_d", "vmax_du", "vmin_d", "vmin_du", -+ "vmul_d", "vmuh_d", "vmuh_du", -+ "vmulwev_q_d", "vmulwod_q_d", "vmulwev_q_du", "vmulwod_q_du", -+ "vmulwev_q_du_d", "vmulwod_q_du_d", -+ "vdiv_d", "vdiv_du", "vmod_d", "vmod_du", "vsigncov_d", -+ "vsll_d", "vsrl_d", "vsra_d", "vrotr_d", "vsrlr_d", "vsrar_d", -+ "vbitclr_d", "vbitset_d", "vbitrev_d", -+ "vseq_d", "vsle_d", "vsle_du", "vslt_d", "vslt_du", -+ "vpackev_d", "vpackod_d", "vpickev_d", "vpickod_d", -+ "vilvl_d", "vilvh_d"] in -+ def int_loongarch_lsx_#inst : VecInt<[llvm_v2i64_ty], -+ [llvm_v2i64_ty, llvm_v2i64_ty], -+ [IntrNoMem]>; -+ -+foreach inst = ["vaddi_bu", "vsubi_bu", -+ "vmaxi_b", "vmaxi_bu", "vmini_b", "vmini_bu", -+ "vsat_b", "vsat_bu", -+ "vandi_b", "vori_b", "vxori_b", "vnori_b", -+ "vslli_b", "vsrli_b", "vsrai_b", "vrotri_b", -+ "vsrlri_b", "vsrari_b", -+ "vbitclri_b", "vbitseti_b", "vbitrevi_b", -+ "vseqi_b", "vslei_b", "vslei_bu", "vslti_b", "vslti_bu", -+ "vreplvei_b", "vbsll_v", "vbsrl_v", "vshuf4i_b"] in -+ def int_loongarch_lsx_#inst : VecInt<[llvm_v16i8_ty], -+ [llvm_v16i8_ty, llvm_i32_ty], -+ [IntrNoMem, ImmArg>]>; -+foreach inst = ["vaddi_hu", "vsubi_hu", -+ "vmaxi_h", "vmaxi_hu", "vmini_h", "vmini_hu", -+ "vsat_h", "vsat_hu", -+ "vslli_h", "vsrli_h", "vsrai_h", "vrotri_h", -+ "vsrlri_h", "vsrari_h", -+ "vbitclri_h", "vbitseti_h", "vbitrevi_h", -+ "vseqi_h", "vslei_h", "vslei_hu", "vslti_h", "vslti_hu", -+ "vreplvei_h", "vshuf4i_h"] in -+ def int_loongarch_lsx_#inst : VecInt<[llvm_v8i16_ty], -+ [llvm_v8i16_ty, llvm_i32_ty], -+ [IntrNoMem, ImmArg>]>; -+foreach inst = ["vaddi_wu", "vsubi_wu", -+ "vmaxi_w", "vmaxi_wu", "vmini_w", "vmini_wu", -+ "vsat_w", "vsat_wu", -+ "vslli_w", "vsrli_w", "vsrai_w", "vrotri_w", -+ "vsrlri_w", "vsrari_w", -+ "vbitclri_w", "vbitseti_w", "vbitrevi_w", -+ "vseqi_w", "vslei_w", "vslei_wu", "vslti_w", "vslti_wu", -+ "vreplvei_w", "vshuf4i_w"] in -+ def int_loongarch_lsx_#inst : VecInt<[llvm_v4i32_ty], -+ [llvm_v4i32_ty, llvm_i32_ty], -+ [IntrNoMem, ImmArg>]>; -+foreach inst = ["vaddi_du", "vsubi_du", -+ "vmaxi_d", "vmaxi_du", "vmini_d", "vmini_du", -+ "vsat_d", "vsat_du", -+ "vslli_d", "vsrli_d", "vsrai_d", "vrotri_d", -+ "vsrlri_d", "vsrari_d", -+ "vbitclri_d", "vbitseti_d", "vbitrevi_d", -+ "vseqi_d", "vslei_d", "vslei_du", "vslti_d", "vslti_du", -+ "vreplvei_d"] in -+ def int_loongarch_lsx_#inst : VecInt<[llvm_v2i64_ty], -+ [llvm_v2i64_ty, llvm_i32_ty], -+ [IntrNoMem, ImmArg>]>; -+ -+foreach inst = ["vhaddw_h_b", "vhaddw_hu_bu", "vhsubw_h_b", "vhsubw_hu_bu", -+ "vaddwev_h_b", "vaddwod_h_b", "vsubwev_h_b", "vsubwod_h_b", -+ "vaddwev_h_bu", "vaddwod_h_bu", "vsubwev_h_bu", "vsubwod_h_bu", -+ "vaddwev_h_bu_b", "vaddwod_h_bu_b", -+ "vmulwev_h_b", "vmulwod_h_b", "vmulwev_h_bu", "vmulwod_h_bu", -+ "vmulwev_h_bu_b", "vmulwod_h_bu_b"] in -+ def int_loongarch_lsx_#inst : VecInt<[llvm_v8i16_ty], -+ [llvm_v16i8_ty, llvm_v16i8_ty], -+ [IntrNoMem]>; -+ -+foreach inst = ["vhaddw_w_h", "vhaddw_wu_hu", "vhsubw_w_h", "vhsubw_wu_hu", -+ "vaddwev_w_h", "vaddwod_w_h", "vsubwev_w_h", "vsubwod_w_h", -+ "vaddwev_w_hu", "vaddwod_w_hu", "vsubwev_w_hu", "vsubwod_w_hu", -+ "vaddwev_w_hu_h", "vaddwod_w_hu_h", -+ "vmulwev_w_h", "vmulwod_w_h", "vmulwev_w_hu", "vmulwod_w_hu", -+ "vmulwev_w_hu_h", "vmulwod_w_hu_h"] in -+ def int_loongarch_lsx_#inst : VecInt<[llvm_v4i32_ty], -+ [llvm_v8i16_ty, llvm_v8i16_ty], -+ [IntrNoMem]>; -+ -+foreach inst = ["vhaddw_d_w", "vhaddw_du_wu", "vhsubw_d_w", "vhsubw_du_wu", -+ "vaddwev_d_w", "vaddwod_d_w", "vsubwev_d_w", "vsubwod_d_w", -+ "vaddwev_d_wu", "vaddwod_d_wu", "vsubwev_d_wu", "vsubwod_d_wu", -+ "vaddwev_d_wu_w", "vaddwod_d_wu_w", -+ "vmulwev_d_w", "vmulwod_d_w", "vmulwev_d_wu", "vmulwod_d_wu", -+ "vmulwev_d_wu_w", "vmulwod_d_wu_w"] in -+ def int_loongarch_lsx_#inst : VecInt<[llvm_v2i64_ty], -+ [llvm_v4i32_ty, llvm_v4i32_ty], -+ [IntrNoMem]>; -+ -+foreach inst = ["vsrln_b_h", "vsran_b_h", "vsrlrn_b_h", "vsrarn_b_h", -+ "vssrln_b_h", "vssran_b_h", "vssrln_bu_h", "vssran_bu_h", -+ "vssrlrn_b_h", "vssrarn_b_h", "vssrlrn_bu_h", "vssrarn_bu_h"] in -+ def int_loongarch_lsx_#inst : VecInt<[llvm_v16i8_ty], -+ [llvm_v8i16_ty, llvm_v8i16_ty], -+ [IntrNoMem]>; -+ -+foreach inst = ["vsrln_h_w", "vsran_h_w", "vsrlrn_h_w", "vsrarn_h_w", -+ "vssrln_h_w", "vssran_h_w", "vssrln_hu_w", "vssran_hu_w", -+ "vssrlrn_h_w", "vssrarn_h_w", "vssrlrn_hu_w", "vssrarn_hu_w"] in -+ def int_loongarch_lsx_#inst : VecInt<[llvm_v8i16_ty], -+ [llvm_v4i32_ty, llvm_v4i32_ty], -+ [IntrNoMem]>; -+ -+foreach inst = ["vsrln_w_d", "vsran_w_d", "vsrlrn_w_d", "vsrarn_w_d", -+ "vssrln_w_d", "vssran_w_d", "vssrln_wu_d", "vssran_wu_d", -+ "vssrlrn_w_d", "vssrarn_w_d", "vssrlrn_wu_d", "vssrarn_wu_d"] in -+ def int_loongarch_lsx_#inst : VecInt<[llvm_v4i32_ty], -+ [llvm_v2i64_ty, llvm_v2i64_ty], -+ [IntrNoMem]>; -+ -+foreach inst = ["vmadd_b", "vmsub_b", "vfrstp_b", "vbitsel_v", "vshuf_b"] in -+ def int_loongarch_lsx_#inst -+ : VecInt<[llvm_v16i8_ty], -+ [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty], -+ [IntrNoMem]>; -+foreach inst = ["vmadd_h", "vmsub_h", "vfrstp_h", "vshuf_h"] in -+ def int_loongarch_lsx_#inst -+ : VecInt<[llvm_v8i16_ty], -+ [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty], -+ [IntrNoMem]>; -+foreach inst = ["vmadd_w", "vmsub_w", "vshuf_w"] in -+ def int_loongarch_lsx_#inst -+ : VecInt<[llvm_v4i32_ty], -+ [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty], -+ [IntrNoMem]>; -+foreach inst = ["vmadd_d", "vmsub_d", "vshuf_d"] in -+ def int_loongarch_lsx_#inst -+ : VecInt<[llvm_v2i64_ty], -+ [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], -+ [IntrNoMem]>; -+ -+foreach inst = ["vsrlni_b_h", "vsrani_b_h", "vsrlrni_b_h", "vsrarni_b_h", -+ "vssrlni_b_h", "vssrani_b_h", "vssrlni_bu_h", "vssrani_bu_h", -+ "vssrlrni_b_h", "vssrarni_b_h", "vssrlrni_bu_h", "vssrarni_bu_h", -+ "vfrstpi_b", "vbitseli_b", "vextrins_b"] in -+ def int_loongarch_lsx_#inst -+ : VecInt<[llvm_v16i8_ty], -+ [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], -+ [IntrNoMem, ImmArg>]>; -+foreach inst = ["vsrlni_h_w", "vsrani_h_w", "vsrlrni_h_w", "vsrarni_h_w", -+ "vssrlni_h_w", "vssrani_h_w", "vssrlni_hu_w", "vssrani_hu_w", -+ "vssrlrni_h_w", "vssrarni_h_w", "vssrlrni_hu_w", "vssrarni_hu_w", -+ "vfrstpi_h", "vextrins_h"] in -+ def int_loongarch_lsx_#inst -+ : VecInt<[llvm_v8i16_ty], -+ [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty], -+ [IntrNoMem, ImmArg>]>; -+foreach inst = ["vsrlni_w_d", "vsrani_w_d", "vsrlrni_w_d", "vsrarni_w_d", -+ "vssrlni_w_d", "vssrani_w_d", "vssrlni_wu_d", "vssrani_wu_d", -+ "vssrlrni_w_d", "vssrarni_w_d", "vssrlrni_wu_d", "vssrarni_wu_d", -+ "vpermi_w", "vextrins_w"] in -+ def int_loongarch_lsx_#inst -+ : VecInt<[llvm_v4i32_ty], -+ [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], -+ [IntrNoMem, ImmArg>]>; -+foreach inst = ["vsrlni_d_q", "vsrani_d_q", "vsrlrni_d_q", "vsrarni_d_q", -+ "vssrlni_d_q", "vssrani_d_q", "vssrlni_du_q", "vssrani_du_q", -+ "vssrlrni_d_q", "vssrarni_d_q", "vssrlrni_du_q", "vssrarni_du_q", -+ "vshuf4i_d", "vextrins_d"] in -+ def int_loongarch_lsx_#inst -+ : VecInt<[llvm_v2i64_ty], -+ [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], -+ [IntrNoMem, ImmArg>]>; -+ -+foreach inst = ["vmaddwev_h_b", "vmaddwod_h_b", "vmaddwev_h_bu", -+ "vmaddwod_h_bu", "vmaddwev_h_bu_b", "vmaddwod_h_bu_b"] in -+ def int_loongarch_lsx_#inst -+ : VecInt<[llvm_v8i16_ty], -+ [llvm_v8i16_ty, llvm_v16i8_ty, llvm_v16i8_ty], -+ [IntrNoMem]>; -+foreach inst = ["vmaddwev_w_h", "vmaddwod_w_h", "vmaddwev_w_hu", -+ "vmaddwod_w_hu", "vmaddwev_w_hu_h", "vmaddwod_w_hu_h"] in -+ def int_loongarch_lsx_#inst -+ : VecInt<[llvm_v4i32_ty], -+ [llvm_v4i32_ty, llvm_v8i16_ty, llvm_v8i16_ty], -+ [IntrNoMem]>; -+foreach inst = ["vmaddwev_d_w", "vmaddwod_d_w", "vmaddwev_d_wu", -+ "vmaddwod_d_wu", "vmaddwev_d_wu_w", "vmaddwod_d_wu_w"] in -+ def int_loongarch_lsx_#inst -+ : VecInt<[llvm_v2i64_ty], -+ [llvm_v2i64_ty, llvm_v4i32_ty, llvm_v4i32_ty], -+ [IntrNoMem]>; -+foreach inst = ["vmaddwev_q_d", "vmaddwod_q_d", "vmaddwev_q_du", -+ "vmaddwod_q_du", "vmaddwev_q_du_d", "vmaddwod_q_du_d"] in -+ def int_loongarch_lsx_#inst -+ : VecInt<[llvm_v2i64_ty], -+ [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], -+ [IntrNoMem]>; -+ -+foreach inst = ["vsllwil_h_b", "vsllwil_hu_bu"] in -+ def int_loongarch_lsx_#inst : VecInt<[llvm_v8i16_ty], -+ [llvm_v16i8_ty, llvm_i32_ty], -+ [IntrNoMem, ImmArg>]>; -+foreach inst = ["vsllwil_w_h", "vsllwil_wu_hu"] in -+ def int_loongarch_lsx_#inst : VecInt<[llvm_v4i32_ty], -+ [llvm_v8i16_ty, llvm_i32_ty], -+ [IntrNoMem, ImmArg>]>; -+foreach inst = ["vsllwil_d_w", "vsllwil_du_wu"] in -+ def int_loongarch_lsx_#inst : VecInt<[llvm_v2i64_ty], -+ [llvm_v4i32_ty, llvm_i32_ty], -+ [IntrNoMem, ImmArg>]>; -+ -+foreach inst = ["vneg_b", "vmskltz_b", "vmskgez_b", "vmsknz_b", -+ "vclo_b", "vclz_b", "vpcnt_b"] in -+ def int_loongarch_lsx_#inst : VecInt<[llvm_v16i8_ty], [llvm_v16i8_ty], -+ [IntrNoMem]>; -+foreach inst = ["vneg_h", "vmskltz_h", "vclo_h", "vclz_h", "vpcnt_h"] in -+ def int_loongarch_lsx_#inst : VecInt<[llvm_v8i16_ty], [llvm_v8i16_ty], -+ [IntrNoMem]>; -+foreach inst = ["vneg_w", "vmskltz_w", "vclo_w", "vclz_w", "vpcnt_w"] in -+ def int_loongarch_lsx_#inst : VecInt<[llvm_v4i32_ty], [llvm_v4i32_ty], -+ [IntrNoMem]>; -+foreach inst = ["vneg_d", "vexth_q_d", "vexth_qu_du", "vmskltz_d", -+ "vextl_q_d", "vextl_qu_du", "vclo_d", "vclz_d", "vpcnt_d"] in -+ def int_loongarch_lsx_#inst : VecInt<[llvm_v2i64_ty], [llvm_v2i64_ty], -+ [IntrNoMem]>; -+ -+foreach inst = ["vexth_h_b", "vexth_hu_bu"] in -+ def int_loongarch_lsx_#inst : VecInt<[llvm_v8i16_ty], [llvm_v16i8_ty], -+ [IntrNoMem]>; -+foreach inst = ["vexth_w_h", "vexth_wu_hu"] in -+ def int_loongarch_lsx_#inst : VecInt<[llvm_v4i32_ty], [llvm_v8i16_ty], -+ [IntrNoMem]>; -+foreach inst = ["vexth_d_w", "vexth_du_wu"] in -+ def int_loongarch_lsx_#inst : VecInt<[llvm_v2i64_ty], [llvm_v4i32_ty], -+ [IntrNoMem]>; -+ -+def int_loongarch_lsx_vldi : VecInt<[llvm_v2i64_ty], [llvm_i32_ty], -+ [IntrNoMem, ImmArg>]>; -+def int_loongarch_lsx_vrepli_b : VecInt<[llvm_v16i8_ty], [llvm_i32_ty], -+ [IntrNoMem, ImmArg>]>; -+def int_loongarch_lsx_vrepli_h : VecInt<[llvm_v8i16_ty], [llvm_i32_ty], -+ [IntrNoMem, ImmArg>]>; -+def int_loongarch_lsx_vrepli_w : VecInt<[llvm_v4i32_ty], [llvm_i32_ty], -+ [IntrNoMem, ImmArg>]>; -+def int_loongarch_lsx_vrepli_d : VecInt<[llvm_v2i64_ty], [llvm_i32_ty], -+ [IntrNoMem, ImmArg>]>; -+ -+def int_loongarch_lsx_vreplgr2vr_b : VecInt<[llvm_v16i8_ty], [llvm_i32_ty], -+ [IntrNoMem]>; -+def int_loongarch_lsx_vreplgr2vr_h : VecInt<[llvm_v8i16_ty], [llvm_i32_ty], -+ [IntrNoMem]>; -+def int_loongarch_lsx_vreplgr2vr_w : VecInt<[llvm_v4i32_ty], [llvm_i32_ty], -+ [IntrNoMem]>; -+def int_loongarch_lsx_vreplgr2vr_d : VecInt<[llvm_v2i64_ty], [llvm_i64_ty], -+ [IntrNoMem]>; -+ -+def int_loongarch_lsx_vinsgr2vr_b -+ : VecInt<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty, llvm_i32_ty], -+ [IntrNoMem, ImmArg>]>; -+def int_loongarch_lsx_vinsgr2vr_h -+ : VecInt<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty, llvm_i32_ty], -+ [IntrNoMem, ImmArg>]>; -+def int_loongarch_lsx_vinsgr2vr_w -+ : VecInt<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty, llvm_i32_ty], -+ [IntrNoMem, ImmArg>]>; -+def int_loongarch_lsx_vinsgr2vr_d -+ : VecInt<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i64_ty, llvm_i32_ty], -+ [IntrNoMem, ImmArg>]>; -+ -+def int_loongarch_lsx_vreplve_b -+ : VecInt<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; -+def int_loongarch_lsx_vreplve_h -+ : VecInt<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; -+def int_loongarch_lsx_vreplve_w -+ : VecInt<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; -+def int_loongarch_lsx_vreplve_d -+ : VecInt<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; -+ -+foreach inst = ["vpickve2gr_b", "vpickve2gr_bu" ] in -+ def int_loongarch_lsx_#inst : VecInt<[llvm_i32_ty], -+ [llvm_v16i8_ty, llvm_i32_ty], -+ [IntrNoMem, ImmArg>]>; -+foreach inst = ["vpickve2gr_h", "vpickve2gr_hu" ] in -+ def int_loongarch_lsx_#inst : VecInt<[llvm_i32_ty], -+ [llvm_v8i16_ty, llvm_i32_ty], -+ [IntrNoMem, ImmArg>]>; -+foreach inst = ["vpickve2gr_w", "vpickve2gr_wu" ] in -+ def int_loongarch_lsx_#inst : VecInt<[llvm_i32_ty], -+ [llvm_v4i32_ty, llvm_i32_ty], -+ [IntrNoMem, ImmArg>]>; -+foreach inst = ["vpickve2gr_d", "vpickve2gr_du" ] in -+ def int_loongarch_lsx_#inst : VecInt<[llvm_i64_ty], -+ [llvm_v2i64_ty, llvm_i32_ty], -+ [IntrNoMem, ImmArg>]>; -+ -+def int_loongarch_lsx_bz_b : VecInt<[llvm_i32_ty], [llvm_v16i8_ty], -+ [IntrNoMem]>; -+def int_loongarch_lsx_bz_h : VecInt<[llvm_i32_ty], [llvm_v8i16_ty], -+ [IntrNoMem]>; -+def int_loongarch_lsx_bz_w : VecInt<[llvm_i32_ty], [llvm_v4i32_ty], -+ [IntrNoMem]>; -+def int_loongarch_lsx_bz_d : VecInt<[llvm_i32_ty], [llvm_v2i64_ty], -+ [IntrNoMem]>; -+def int_loongarch_lsx_bz_v : VecInt<[llvm_i32_ty], [llvm_v16i8_ty], -+ [IntrNoMem]>; -+ -+def int_loongarch_lsx_bnz_v : VecInt<[llvm_i32_ty], [llvm_v16i8_ty], -+ [IntrNoMem]>; -+def int_loongarch_lsx_bnz_b : VecInt<[llvm_i32_ty], [llvm_v16i8_ty], -+ [IntrNoMem]>; -+def int_loongarch_lsx_bnz_h : VecInt<[llvm_i32_ty], [llvm_v8i16_ty], -+ [IntrNoMem]>; -+def int_loongarch_lsx_bnz_w : VecInt<[llvm_i32_ty], [llvm_v4i32_ty], -+ [IntrNoMem]>; -+def int_loongarch_lsx_bnz_d : VecInt<[llvm_i32_ty], [llvm_v2i64_ty], -+ [IntrNoMem]>; -+ -+// LSX Float -+ -+foreach inst = ["vfadd_s", "vfsub_s", "vfmul_s", "vfdiv_s", -+ "vfmax_s", "vfmin_s", "vfmaxa_s", "vfmina_s"] in -+ def int_loongarch_lsx_#inst : VecInt<[llvm_v4f32_ty], -+ [llvm_v4f32_ty, llvm_v4f32_ty], -+ [IntrNoMem]>; -+foreach inst = ["vfadd_d", "vfsub_d", "vfmul_d", "vfdiv_d", -+ "vfmax_d", "vfmin_d", "vfmaxa_d", "vfmina_d"] in -+ def int_loongarch_lsx_#inst : VecInt<[llvm_v2f64_ty], -+ [llvm_v2f64_ty, llvm_v2f64_ty], -+ [IntrNoMem]>; -+ -+foreach inst = ["vfmadd_s", "vfmsub_s", "vfnmadd_s", "vfnmsub_s"] in -+ def int_loongarch_lsx_#inst -+ : VecInt<[llvm_v4f32_ty], -+ [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty], -+ [IntrNoMem]>; -+foreach inst = ["vfmadd_d", "vfmsub_d", "vfnmadd_d", "vfnmsub_d"] in -+ def int_loongarch_lsx_#inst -+ : VecInt<[llvm_v2f64_ty], -+ [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty], -+ [IntrNoMem]>; -+ -+foreach inst = ["vflogb_s", "vfsqrt_s", "vfrecip_s", "vfrsqrt_s", "vfrint_s", -+ "vfrintrne_s", "vfrintrz_s", "vfrintrp_s", "vfrintrm_s"] in -+ def int_loongarch_lsx_#inst : VecInt<[llvm_v4f32_ty], [llvm_v4f32_ty], -+ [IntrNoMem]>; -+foreach inst = ["vflogb_d", "vfsqrt_d", "vfrecip_d", "vfrsqrt_d", "vfrint_d", -+ "vfrintrne_d", "vfrintrz_d", "vfrintrp_d", "vfrintrm_d"] in -+ def int_loongarch_lsx_#inst : VecInt<[llvm_v2f64_ty], [llvm_v2f64_ty], -+ [IntrNoMem]>; -+ -+foreach inst = ["vfcvtl_s_h", "vfcvth_s_h"] in -+ def int_loongarch_lsx_#inst : VecInt<[llvm_v4f32_ty], [llvm_v8i16_ty], -+ [IntrNoMem]>; -+foreach inst = ["vfcvtl_d_s", "vfcvth_d_s"] in -+ def int_loongarch_lsx_#inst : VecInt<[llvm_v2f64_ty], [llvm_v4f32_ty], -+ [IntrNoMem]>; -+ -+foreach inst = ["vftintrne_w_s", "vftintrz_w_s", "vftintrp_w_s", "vftintrm_w_s", -+ "vftint_w_s", "vftintrz_wu_s", "vftint_wu_s", "vfclass_s"] in -+ def int_loongarch_lsx_#inst : VecInt<[llvm_v4i32_ty], [llvm_v4f32_ty], -+ [IntrNoMem]>; -+foreach inst = ["vftintrne_l_d", "vftintrz_l_d", "vftintrp_l_d", "vftintrm_l_d", -+ "vftint_l_d", "vftintrz_lu_d", "vftint_lu_d", "vfclass_d"] in -+ def int_loongarch_lsx_#inst : VecInt<[llvm_v2i64_ty], [llvm_v2f64_ty], -+ [IntrNoMem]>; -+ -+foreach inst = ["vftintrnel_l_s", "vftintrneh_l_s", "vftintrzl_l_s", -+ "vftintrzh_l_s", "vftintrpl_l_s", "vftintrph_l_s", -+ "vftintrml_l_s", "vftintrmh_l_s", "vftintl_l_s", -+ "vftinth_l_s"] in -+ def int_loongarch_lsx_#inst : VecInt<[llvm_v2i64_ty], [llvm_v4f32_ty], -+ [IntrNoMem]>; -+ -+foreach inst = ["vffint_s_w", "vffint_s_wu"] in -+ def int_loongarch_lsx_#inst : VecInt<[llvm_v4f32_ty], [llvm_v4i32_ty], -+ [IntrNoMem]>; -+foreach inst = ["vffint_d_l", "vffint_d_lu"] in -+ def int_loongarch_lsx_#inst : VecInt<[llvm_v2f64_ty], [llvm_v2i64_ty], -+ [IntrNoMem]>; -+ -+foreach inst = ["vffintl_d_w", "vffinth_d_w"] in -+ def int_loongarch_lsx_#inst : VecInt<[llvm_v2f64_ty], [llvm_v4i32_ty], -+ [IntrNoMem]>; -+ -+foreach inst = ["vffint_s_l"] in -+ def int_loongarch_lsx_#inst : VecInt<[llvm_v4f32_ty], -+ [llvm_v2i64_ty, llvm_v2i64_ty], -+ [IntrNoMem]>; -+foreach inst = ["vftintrne_w_d", "vftintrz_w_d", "vftintrp_w_d", "vftintrm_w_d", -+ "vftint_w_d"] in -+ def int_loongarch_lsx_#inst : VecInt<[llvm_v4i32_ty], -+ [llvm_v2f64_ty, llvm_v2f64_ty], -+ [IntrNoMem]>; -+ -+foreach inst = ["vfcvt_h_s"] in -+ def int_loongarch_lsx_#inst : VecInt<[llvm_v8i16_ty], -+ [llvm_v4f32_ty, llvm_v4f32_ty], -+ [IntrNoMem]>; -+foreach inst = ["vfcvt_s_d"] in -+ def int_loongarch_lsx_#inst : VecInt<[llvm_v4f32_ty], -+ [llvm_v2f64_ty, llvm_v2f64_ty], -+ [IntrNoMem]>; -+ -+foreach inst = ["vfcmp_caf_s", "vfcmp_cun_s", "vfcmp_ceq_s", "vfcmp_cueq_s", -+ "vfcmp_clt_s", "vfcmp_cult_s", "vfcmp_cle_s", "vfcmp_cule_s", -+ "vfcmp_cne_s", "vfcmp_cor_s", "vfcmp_cune_s", -+ "vfcmp_saf_s", "vfcmp_sun_s", "vfcmp_seq_s", "vfcmp_sueq_s", -+ "vfcmp_slt_s", "vfcmp_sult_s", "vfcmp_sle_s", "vfcmp_sule_s", -+ "vfcmp_sne_s", "vfcmp_sor_s", "vfcmp_sune_s"] in -+ def int_loongarch_lsx_#inst : VecInt<[llvm_v4i32_ty], -+ [llvm_v4f32_ty, llvm_v4f32_ty], -+ [IntrNoMem]>; -+foreach inst = ["vfcmp_caf_d", "vfcmp_cun_d", "vfcmp_ceq_d", "vfcmp_cueq_d", -+ "vfcmp_clt_d", "vfcmp_cult_d", "vfcmp_cle_d", "vfcmp_cule_d", -+ "vfcmp_cne_d", "vfcmp_cor_d", "vfcmp_cune_d", -+ "vfcmp_saf_d", "vfcmp_sun_d", "vfcmp_seq_d", "vfcmp_sueq_d", -+ "vfcmp_slt_d", "vfcmp_sult_d", "vfcmp_sle_d", "vfcmp_sule_d", -+ "vfcmp_sne_d", "vfcmp_sor_d", "vfcmp_sune_d"] in -+ def int_loongarch_lsx_#inst : VecInt<[llvm_v2i64_ty], -+ [llvm_v2f64_ty, llvm_v2f64_ty], -+ [IntrNoMem]>; -+ -+// LSX load/store -+def int_loongarch_lsx_vld -+ : VecInt<[llvm_v16i8_ty], [llvm_ptr_ty, llvm_i32_ty], -+ [IntrReadMem, IntrArgMemOnly, ImmArg>]>; -+def int_loongarch_lsx_vldx -+ : VecInt<[llvm_v16i8_ty], [llvm_ptr_ty, llvm_i64_ty], -+ [IntrReadMem, IntrArgMemOnly]>; -+def int_loongarch_lsx_vldrepl_b -+ : VecInt<[llvm_v16i8_ty], [llvm_ptr_ty, llvm_i32_ty], -+ [IntrReadMem, IntrArgMemOnly, ImmArg>]>; -+def int_loongarch_lsx_vldrepl_h -+ : VecInt<[llvm_v8i16_ty], [llvm_ptr_ty, llvm_i32_ty], -+ [IntrReadMem, IntrArgMemOnly, ImmArg>]>; -+def int_loongarch_lsx_vldrepl_w -+ : VecInt<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i32_ty], -+ [IntrReadMem, IntrArgMemOnly, ImmArg>]>; -+def int_loongarch_lsx_vldrepl_d -+ : VecInt<[llvm_v2i64_ty], [llvm_ptr_ty, llvm_i32_ty], -+ [IntrReadMem, IntrArgMemOnly, ImmArg>]>; -+ -+def int_loongarch_lsx_vst -+ : VecInt<[], [llvm_v16i8_ty, llvm_ptr_ty, llvm_i32_ty], -+ [IntrWriteMem, IntrArgMemOnly, ImmArg>]>; -+def int_loongarch_lsx_vstx -+ : VecInt<[], [llvm_v16i8_ty, llvm_ptr_ty, llvm_i64_ty], -+ [IntrWriteMem, IntrArgMemOnly]>; -+def int_loongarch_lsx_vstelm_b -+ : VecInt<[], [llvm_v16i8_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], -+ [IntrWriteMem, IntrArgMemOnly, ImmArg>, ImmArg>]>; -+def int_loongarch_lsx_vstelm_h -+ : VecInt<[], [llvm_v8i16_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], -+ [IntrWriteMem, IntrArgMemOnly, ImmArg>, ImmArg>]>; -+def int_loongarch_lsx_vstelm_w -+ : VecInt<[], [llvm_v4i32_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], -+ [IntrWriteMem, IntrArgMemOnly, ImmArg>, ImmArg>]>; -+def int_loongarch_lsx_vstelm_d -+ : VecInt<[], [llvm_v2i64_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], -+ [IntrWriteMem, IntrArgMemOnly, ImmArg>, ImmArg>]>; -+ -+} // TargetPrefix = "loongarch" -diff --git a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp -index ae7167cb5ce7..f55184019988 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp -+++ b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp -@@ -15,6 +15,7 @@ - #include "MCTargetDesc/LoongArchMCTargetDesc.h" - #include "MCTargetDesc/LoongArchMatInt.h" - #include "llvm/Support/KnownBits.h" -+#include "llvm/Support/raw_ostream.h" - - using namespace llvm; - -@@ -75,7 +76,14 @@ void LoongArchDAGToDAGISel::Select(SDNode *Node) { - ReplaceNode(Node, CurDAG->getMachineNode(ADDIOp, DL, VT, TFI, Imm)); - return; - } -- // TODO: Add selection nodes needed later. -+ case ISD::BITCAST: { -+ if (VT.is128BitVector() || VT.is512BitVector()) { -+ ReplaceUses(SDValue(Node, 0), Node->getOperand(0)); -+ CurDAG->RemoveDeadNode(Node); -+ return; -+ } -+ break; -+ } - } - - // Select the default instruction. -@@ -262,6 +270,96 @@ bool LoongArchDAGToDAGISel::selectZExti32(SDValue N, SDValue &Val) { - return false; - } - -+bool LoongArchDAGToDAGISel::selectVSplat(SDNode *N, APInt &Imm, -+ unsigned MinSizeInBits) const { -+ if (!Subtarget->hasExtLSX()) -+ return false; -+ -+ BuildVectorSDNode *Node = dyn_cast(N); -+ -+ if (!Node) -+ return false; -+ -+ APInt SplatValue, SplatUndef; -+ unsigned SplatBitSize; -+ bool HasAnyUndefs; -+ -+ if (!Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs, -+ MinSizeInBits, /*IsBigEndian=*/false)) -+ return false; -+ -+ Imm = SplatValue; -+ -+ return true; -+} -+ -+template -+bool LoongArchDAGToDAGISel::selectVSplatImm(SDValue N, SDValue &SplatVal) { -+ APInt ImmValue; -+ EVT EltTy = N->getValueType(0).getVectorElementType(); -+ -+ if (N->getOpcode() == ISD::BITCAST) -+ N = N->getOperand(0); -+ -+ if (selectVSplat(N.getNode(), ImmValue, EltTy.getSizeInBits()) && -+ ImmValue.getBitWidth() == EltTy.getSizeInBits()) { -+ if (IsSigned && ImmValue.isSignedIntN(ImmBitSize)) { -+ SplatVal = CurDAG->getTargetConstant(ImmValue.getSExtValue(), SDLoc(N), -+ Subtarget->getGRLenVT()); -+ return true; -+ } -+ if (!IsSigned && ImmValue.isIntN(ImmBitSize)) { -+ SplatVal = CurDAG->getTargetConstant(ImmValue.getZExtValue(), SDLoc(N), -+ Subtarget->getGRLenVT()); -+ return true; -+ } -+ } -+ -+ return false; -+} -+ -+bool LoongArchDAGToDAGISel::selectVSplatUimmInvPow2(SDValue N, -+ SDValue &SplatImm) const { -+ APInt ImmValue; -+ EVT EltTy = N->getValueType(0).getVectorElementType(); -+ -+ if (N->getOpcode() == ISD::BITCAST) -+ N = N->getOperand(0); -+ -+ if (selectVSplat(N.getNode(), ImmValue, EltTy.getSizeInBits()) && -+ ImmValue.getBitWidth() == EltTy.getSizeInBits()) { -+ int32_t Log2 = (~ImmValue).exactLogBase2(); -+ -+ if (Log2 != -1) { -+ SplatImm = CurDAG->getTargetConstant(Log2, SDLoc(N), EltTy); -+ return true; -+ } -+ } -+ -+ return false; -+} -+ -+bool LoongArchDAGToDAGISel::selectVSplatUimmPow2(SDValue N, -+ SDValue &SplatImm) const { -+ APInt ImmValue; -+ EVT EltTy = N->getValueType(0).getVectorElementType(); -+ -+ if (N->getOpcode() == ISD::BITCAST) -+ N = N->getOperand(0); -+ -+ if (selectVSplat(N.getNode(), ImmValue, EltTy.getSizeInBits()) && -+ ImmValue.getBitWidth() == EltTy.getSizeInBits()) { -+ int32_t Log2 = ImmValue.exactLogBase2(); -+ -+ if (Log2 != -1) { -+ SplatImm = CurDAG->getTargetConstant(Log2, SDLoc(N), EltTy); -+ return true; -+ } -+ } -+ -+ return false; -+} -+ - // This pass converts a legalized DAG into a LoongArch-specific DAG, ready - // for instruction scheduling. - FunctionPass *llvm::createLoongArchISelDag(LoongArchTargetMachine &TM) { -diff --git a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h -index 3099407aea3e..5e3d6ccc3755 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h -+++ b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h -@@ -56,6 +56,14 @@ public: - bool selectSExti32(SDValue N, SDValue &Val); - bool selectZExti32(SDValue N, SDValue &Val); - -+ bool selectVSplat(SDNode *N, APInt &Imm, unsigned MinSizeInBits) const; -+ -+ template -+ bool selectVSplatImm(SDValue N, SDValue &SplatVal); -+ -+ bool selectVSplatUimmInvPow2(SDValue N, SDValue &SplatImm) const; -+ bool selectVSplatUimmPow2(SDValue N, SDValue &SplatImm) const; -+ - // Include the pieces autogenerated from the target description. - #include "LoongArchGenDAGISel.inc" - }; -diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -index db5961fc501a..c05133647929 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -@@ -62,6 +62,13 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, - MVT::v4i64}) - addRegisterClass(VT, &LoongArch::LASX256RegClass); - -+ static const MVT::SimpleValueType LSXVTs[] = { -+ MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64}; -+ -+ if (Subtarget.hasExtLSX()) -+ for (MVT VT : LSXVTs) -+ addRegisterClass(VT, &LoongArch::LSX128RegClass); -+ - setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, GRLenVT, - MVT::i1, Promote); - -@@ -109,6 +116,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, - setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); - setOperationAction(ISD::READ_REGISTER, MVT::i32, Custom); - setOperationAction(ISD::WRITE_REGISTER, MVT::i32, Custom); -+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom); - if (Subtarget.hasBasicF() && !Subtarget.hasBasicD()) - setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); - if (Subtarget.hasBasicF()) -@@ -138,6 +146,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, - setOperationAction(ISD::WRITE_REGISTER, MVT::i64, Custom); - setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); - setOperationAction(ISD::INTRINSIC_VOID, MVT::i64, Custom); -+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom); - } - - static const ISD::CondCode FPCCToExpand[] = { -@@ -194,6 +203,10 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, - setOperationAction(ISD::UINT_TO_FP, GRLenVT, Custom); - } - -+ if (Subtarget.hasExtLSX()) -+ setOperationAction({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, -+ {MVT::v2i64, MVT::v4i32, MVT::v8i16, MVT::v16i8}, Legal); -+ - // Compute derived properties from the register classes. - computeRegisterProperties(Subtarget.getRegisterInfo()); - -@@ -215,6 +228,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, - setTargetDAGCombine(ISD::AND); - setTargetDAGCombine(ISD::OR); - setTargetDAGCombine(ISD::SRL); -+ if (Subtarget.hasExtLSX()) -+ setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); - } - - bool LoongArchTargetLowering::isOffsetFoldingLegal( -@@ -652,9 +667,24 @@ LoongArchTargetLowering::lowerGlobalTLSAddress(SDValue Op, - return Addr; - } - -+template -+static SDValue checkIntrinsicImmArg(SDValue Op, unsigned ImmOp, -+ SelectionDAG &DAG, bool IsSigned = false) { -+ auto *CImm = cast(Op->getOperand(ImmOp)); -+ // Check the ImmArg. -+ if ((IsSigned && !isInt(CImm->getSExtValue())) || -+ (!IsSigned && !isUInt(CImm->getZExtValue()))) { -+ DAG.getContext()->emitError(Op->getOperationName(0) + -+ ": argument out of range."); -+ return DAG.getNode(ISD::UNDEF, SDLoc(Op), Op.getValueType()); -+ } -+ return SDValue(); -+} -+ - SDValue - LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, - SelectionDAG &DAG) const { -+ SDLoc DL(Op); - switch (Op.getConstantOperandVal(0)) { - default: - return SDValue(); // Don't custom lower most intrinsics. -@@ -662,6 +692,141 @@ LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, - EVT PtrVT = getPointerTy(DAG.getDataLayout()); - return DAG.getRegister(LoongArch::R2, PtrVT); - } -+ case Intrinsic::loongarch_lsx_vpickve2gr_d: -+ case Intrinsic::loongarch_lsx_vpickve2gr_du: -+ case Intrinsic::loongarch_lsx_vreplvei_d: -+ return checkIntrinsicImmArg<1>(Op, 2, DAG); -+ case Intrinsic::loongarch_lsx_vreplvei_w: -+ return checkIntrinsicImmArg<2>(Op, 2, DAG); -+ case Intrinsic::loongarch_lsx_vsat_b: -+ case Intrinsic::loongarch_lsx_vsat_bu: -+ case Intrinsic::loongarch_lsx_vrotri_b: -+ case Intrinsic::loongarch_lsx_vsllwil_h_b: -+ case Intrinsic::loongarch_lsx_vsllwil_hu_bu: -+ case Intrinsic::loongarch_lsx_vsrlri_b: -+ case Intrinsic::loongarch_lsx_vsrari_b: -+ case Intrinsic::loongarch_lsx_vreplvei_h: -+ return checkIntrinsicImmArg<3>(Op, 2, DAG); -+ case Intrinsic::loongarch_lsx_vsat_h: -+ case Intrinsic::loongarch_lsx_vsat_hu: -+ case Intrinsic::loongarch_lsx_vrotri_h: -+ case Intrinsic::loongarch_lsx_vsllwil_w_h: -+ case Intrinsic::loongarch_lsx_vsllwil_wu_hu: -+ case Intrinsic::loongarch_lsx_vsrlri_h: -+ case Intrinsic::loongarch_lsx_vsrari_h: -+ case Intrinsic::loongarch_lsx_vreplvei_b: -+ return checkIntrinsicImmArg<4>(Op, 2, DAG); -+ case Intrinsic::loongarch_lsx_vsrlni_b_h: -+ case Intrinsic::loongarch_lsx_vsrani_b_h: -+ case Intrinsic::loongarch_lsx_vsrlrni_b_h: -+ case Intrinsic::loongarch_lsx_vsrarni_b_h: -+ case Intrinsic::loongarch_lsx_vssrlni_b_h: -+ case Intrinsic::loongarch_lsx_vssrani_b_h: -+ case Intrinsic::loongarch_lsx_vssrlni_bu_h: -+ case Intrinsic::loongarch_lsx_vssrani_bu_h: -+ case Intrinsic::loongarch_lsx_vssrlrni_b_h: -+ case Intrinsic::loongarch_lsx_vssrarni_b_h: -+ case Intrinsic::loongarch_lsx_vssrlrni_bu_h: -+ case Intrinsic::loongarch_lsx_vssrarni_bu_h: -+ return checkIntrinsicImmArg<4>(Op, 3, DAG); -+ case Intrinsic::loongarch_lsx_vsat_w: -+ case Intrinsic::loongarch_lsx_vsat_wu: -+ case Intrinsic::loongarch_lsx_vrotri_w: -+ case Intrinsic::loongarch_lsx_vsllwil_d_w: -+ case Intrinsic::loongarch_lsx_vsllwil_du_wu: -+ case Intrinsic::loongarch_lsx_vsrlri_w: -+ case Intrinsic::loongarch_lsx_vsrari_w: -+ case Intrinsic::loongarch_lsx_vslei_bu: -+ case Intrinsic::loongarch_lsx_vslei_hu: -+ case Intrinsic::loongarch_lsx_vslei_wu: -+ case Intrinsic::loongarch_lsx_vslei_du: -+ case Intrinsic::loongarch_lsx_vslti_bu: -+ case Intrinsic::loongarch_lsx_vslti_hu: -+ case Intrinsic::loongarch_lsx_vslti_wu: -+ case Intrinsic::loongarch_lsx_vslti_du: -+ case Intrinsic::loongarch_lsx_vbsll_v: -+ case Intrinsic::loongarch_lsx_vbsrl_v: -+ return checkIntrinsicImmArg<5>(Op, 2, DAG); -+ case Intrinsic::loongarch_lsx_vseqi_b: -+ case Intrinsic::loongarch_lsx_vseqi_h: -+ case Intrinsic::loongarch_lsx_vseqi_w: -+ case Intrinsic::loongarch_lsx_vseqi_d: -+ case Intrinsic::loongarch_lsx_vslei_b: -+ case Intrinsic::loongarch_lsx_vslei_h: -+ case Intrinsic::loongarch_lsx_vslei_w: -+ case Intrinsic::loongarch_lsx_vslei_d: -+ case Intrinsic::loongarch_lsx_vslti_b: -+ case Intrinsic::loongarch_lsx_vslti_h: -+ case Intrinsic::loongarch_lsx_vslti_w: -+ case Intrinsic::loongarch_lsx_vslti_d: -+ return checkIntrinsicImmArg<5>(Op, 2, DAG, /*IsSigned=*/true); -+ case Intrinsic::loongarch_lsx_vsrlni_h_w: -+ case Intrinsic::loongarch_lsx_vsrani_h_w: -+ case Intrinsic::loongarch_lsx_vsrlrni_h_w: -+ case Intrinsic::loongarch_lsx_vsrarni_h_w: -+ case Intrinsic::loongarch_lsx_vssrlni_h_w: -+ case Intrinsic::loongarch_lsx_vssrani_h_w: -+ case Intrinsic::loongarch_lsx_vssrlni_hu_w: -+ case Intrinsic::loongarch_lsx_vssrani_hu_w: -+ case Intrinsic::loongarch_lsx_vssrlrni_h_w: -+ case Intrinsic::loongarch_lsx_vssrarni_h_w: -+ case Intrinsic::loongarch_lsx_vssrlrni_hu_w: -+ case Intrinsic::loongarch_lsx_vssrarni_hu_w: -+ case Intrinsic::loongarch_lsx_vfrstpi_b: -+ case Intrinsic::loongarch_lsx_vfrstpi_h: -+ return checkIntrinsicImmArg<5>(Op, 3, DAG); -+ case Intrinsic::loongarch_lsx_vsat_d: -+ case Intrinsic::loongarch_lsx_vsat_du: -+ case Intrinsic::loongarch_lsx_vrotri_d: -+ case Intrinsic::loongarch_lsx_vsrlri_d: -+ case Intrinsic::loongarch_lsx_vsrari_d: -+ return checkIntrinsicImmArg<6>(Op, 2, DAG); -+ case Intrinsic::loongarch_lsx_vsrlni_w_d: -+ case Intrinsic::loongarch_lsx_vsrani_w_d: -+ case Intrinsic::loongarch_lsx_vsrlrni_w_d: -+ case Intrinsic::loongarch_lsx_vsrarni_w_d: -+ case Intrinsic::loongarch_lsx_vssrlni_w_d: -+ case Intrinsic::loongarch_lsx_vssrani_w_d: -+ case Intrinsic::loongarch_lsx_vssrlni_wu_d: -+ case Intrinsic::loongarch_lsx_vssrani_wu_d: -+ case Intrinsic::loongarch_lsx_vssrlrni_w_d: -+ case Intrinsic::loongarch_lsx_vssrarni_w_d: -+ case Intrinsic::loongarch_lsx_vssrlrni_wu_d: -+ case Intrinsic::loongarch_lsx_vssrarni_wu_d: -+ return checkIntrinsicImmArg<6>(Op, 3, DAG); -+ case Intrinsic::loongarch_lsx_vsrlni_d_q: -+ case Intrinsic::loongarch_lsx_vsrani_d_q: -+ case Intrinsic::loongarch_lsx_vsrlrni_d_q: -+ case Intrinsic::loongarch_lsx_vsrarni_d_q: -+ case Intrinsic::loongarch_lsx_vssrlni_d_q: -+ case Intrinsic::loongarch_lsx_vssrani_d_q: -+ case Intrinsic::loongarch_lsx_vssrlni_du_q: -+ case Intrinsic::loongarch_lsx_vssrani_du_q: -+ case Intrinsic::loongarch_lsx_vssrlrni_d_q: -+ case Intrinsic::loongarch_lsx_vssrarni_d_q: -+ case Intrinsic::loongarch_lsx_vssrlrni_du_q: -+ case Intrinsic::loongarch_lsx_vssrarni_du_q: -+ return checkIntrinsicImmArg<7>(Op, 3, DAG); -+ case Intrinsic::loongarch_lsx_vnori_b: -+ case Intrinsic::loongarch_lsx_vshuf4i_b: -+ case Intrinsic::loongarch_lsx_vshuf4i_h: -+ case Intrinsic::loongarch_lsx_vshuf4i_w: -+ return checkIntrinsicImmArg<8>(Op, 2, DAG); -+ case Intrinsic::loongarch_lsx_vshuf4i_d: -+ case Intrinsic::loongarch_lsx_vpermi_w: -+ case Intrinsic::loongarch_lsx_vbitseli_b: -+ case Intrinsic::loongarch_lsx_vextrins_b: -+ case Intrinsic::loongarch_lsx_vextrins_h: -+ case Intrinsic::loongarch_lsx_vextrins_w: -+ case Intrinsic::loongarch_lsx_vextrins_d: -+ return checkIntrinsicImmArg<8>(Op, 3, DAG); -+ case Intrinsic::loongarch_lsx_vrepli_b: -+ case Intrinsic::loongarch_lsx_vrepli_h: -+ case Intrinsic::loongarch_lsx_vrepli_w: -+ case Intrinsic::loongarch_lsx_vrepli_d: -+ return checkIntrinsicImmArg<10>(Op, 1, DAG, /*IsSigned=*/true); -+ case Intrinsic::loongarch_lsx_vldi: -+ return checkIntrinsicImmArg<13>(Op, 1, DAG, /*IsSigned=*/true); - } - } - -@@ -757,6 +922,29 @@ LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op, - : DAG.getNode(LoongArchISD::MOVFCSR2GR, DL, {VT, MVT::Other}, - {Chain, DAG.getConstant(Imm, DL, GRLenVT)}); - } -+ case Intrinsic::loongarch_lsx_vld: -+ case Intrinsic::loongarch_lsx_vldrepl_b: -+ return !isInt<12>(cast(Op.getOperand(3))->getSExtValue()) -+ ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG) -+ : SDValue(); -+ case Intrinsic::loongarch_lsx_vldrepl_h: -+ return !isShiftedInt<11, 1>( -+ cast(Op.getOperand(3))->getSExtValue()) -+ ? emitIntrinsicWithChainErrorMessage( -+ Op, "argument out of range or not a multiple of 2", DAG) -+ : SDValue(); -+ case Intrinsic::loongarch_lsx_vldrepl_w: -+ return !isShiftedInt<10, 2>( -+ cast(Op.getOperand(3))->getSExtValue()) -+ ? emitIntrinsicWithChainErrorMessage( -+ Op, "argument out of range or not a multiple of 4", DAG) -+ : SDValue(); -+ case Intrinsic::loongarch_lsx_vldrepl_d: -+ return !isShiftedInt<9, 3>( -+ cast(Op.getOperand(3))->getSExtValue()) -+ ? emitIntrinsicWithChainErrorMessage( -+ Op, "argument out of range or not a multiple of 8", DAG) -+ : SDValue(); - } - } - -@@ -875,6 +1063,36 @@ SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op, - : !isUInt<8>(Imm) ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) - : Op; - } -+ case Intrinsic::loongarch_lsx_vst: -+ return !isInt<12>(cast(Op.getOperand(4))->getSExtValue()) -+ ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) -+ : SDValue(); -+ case Intrinsic::loongarch_lsx_vstelm_b: -+ return (!isInt<8>(cast(Op.getOperand(4))->getSExtValue()) || -+ !isUInt<4>(cast(Op.getOperand(5))->getZExtValue())) -+ ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) -+ : SDValue(); -+ case Intrinsic::loongarch_lsx_vstelm_h: -+ return (!isShiftedInt<8, 1>( -+ cast(Op.getOperand(4))->getSExtValue()) || -+ !isUInt<3>(cast(Op.getOperand(5))->getZExtValue())) -+ ? emitIntrinsicErrorMessage( -+ Op, "argument out of range or not a multiple of 2", DAG) -+ : SDValue(); -+ case Intrinsic::loongarch_lsx_vstelm_w: -+ return (!isShiftedInt<8, 2>( -+ cast(Op.getOperand(4))->getSExtValue()) || -+ !isUInt<2>(cast(Op.getOperand(5))->getZExtValue())) -+ ? emitIntrinsicErrorMessage( -+ Op, "argument out of range or not a multiple of 4", DAG) -+ : SDValue(); -+ case Intrinsic::loongarch_lsx_vstelm_d: -+ return (!isShiftedInt<8, 3>( -+ cast(Op.getOperand(4))->getSExtValue()) || -+ !isUInt<1>(cast(Op.getOperand(5))->getZExtValue())) -+ ? emitIntrinsicErrorMessage( -+ Op, "argument out of range or not a multiple of 8", DAG) -+ : SDValue(); - } - } - -@@ -1026,16 +1244,110 @@ static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, - return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes); - } - --// Helper function that emits error message for intrinsics with chain and return --// a UNDEF and the chain as the results. --static void emitErrorAndReplaceIntrinsicWithChainResults( -+// Helper function that emits error message for intrinsics with/without chain -+// and return a UNDEF or and the chain as the results. -+static void emitErrorAndReplaceIntrinsicResults( - SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG, -- StringRef ErrorMsg) { -+ StringRef ErrorMsg, bool WithChain = true) { - DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + "."); - Results.push_back(DAG.getUNDEF(N->getValueType(0))); -+ if (!WithChain) -+ return; - Results.push_back(N->getOperand(0)); - } - -+template -+static void -+replaceVPICKVE2GRResults(SDNode *Node, SmallVectorImpl &Results, -+ SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, -+ unsigned ResOp) { -+ const StringRef ErrorMsgOOR = "argument out of range"; -+ unsigned Imm = cast(Node->getOperand(2))->getZExtValue(); -+ if (!isUInt(Imm)) { -+ emitErrorAndReplaceIntrinsicResults(Node, Results, DAG, ErrorMsgOOR, -+ /*WithChain=*/false); -+ return; -+ } -+ SDLoc DL(Node); -+ SDValue Vec = Node->getOperand(1); -+ -+ SDValue PickElt = -+ DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec, -+ DAG.getConstant(Imm, DL, Subtarget.getGRLenVT()), -+ DAG.getValueType(Vec.getValueType().getVectorElementType())); -+ Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, Node->getValueType(0), -+ PickElt.getValue(0))); -+} -+ -+static void replaceVecCondBranchResults(SDNode *N, -+ SmallVectorImpl &Results, -+ SelectionDAG &DAG, -+ const LoongArchSubtarget &Subtarget, -+ unsigned ResOp) { -+ SDLoc DL(N); -+ SDValue Vec = N->getOperand(1); -+ -+ SDValue CB = DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec); -+ Results.push_back( -+ DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), CB.getValue(0))); -+} -+ -+static void -+replaceINTRINSIC_WO_CHAINResults(SDNode *N, SmallVectorImpl &Results, -+ SelectionDAG &DAG, -+ const LoongArchSubtarget &Subtarget) { -+ switch (N->getConstantOperandVal(0)) { -+ default: -+ llvm_unreachable("Unexpected Intrinsic."); -+ case Intrinsic::loongarch_lsx_vpickve2gr_b: -+ replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget, -+ LoongArchISD::VPICK_SEXT_ELT); -+ break; -+ case Intrinsic::loongarch_lsx_vpickve2gr_h: -+ replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget, -+ LoongArchISD::VPICK_SEXT_ELT); -+ break; -+ case Intrinsic::loongarch_lsx_vpickve2gr_w: -+ replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget, -+ LoongArchISD::VPICK_SEXT_ELT); -+ break; -+ case Intrinsic::loongarch_lsx_vpickve2gr_bu: -+ replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget, -+ LoongArchISD::VPICK_ZEXT_ELT); -+ break; -+ case Intrinsic::loongarch_lsx_vpickve2gr_hu: -+ replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget, -+ LoongArchISD::VPICK_ZEXT_ELT); -+ break; -+ case Intrinsic::loongarch_lsx_vpickve2gr_wu: -+ replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget, -+ LoongArchISD::VPICK_ZEXT_ELT); -+ break; -+ case Intrinsic::loongarch_lsx_bz_b: -+ case Intrinsic::loongarch_lsx_bz_h: -+ case Intrinsic::loongarch_lsx_bz_w: -+ case Intrinsic::loongarch_lsx_bz_d: -+ replaceVecCondBranchResults(N, Results, DAG, Subtarget, -+ LoongArchISD::VALL_ZERO); -+ break; -+ case Intrinsic::loongarch_lsx_bz_v: -+ replaceVecCondBranchResults(N, Results, DAG, Subtarget, -+ LoongArchISD::VANY_ZERO); -+ break; -+ case Intrinsic::loongarch_lsx_bnz_b: -+ case Intrinsic::loongarch_lsx_bnz_h: -+ case Intrinsic::loongarch_lsx_bnz_w: -+ case Intrinsic::loongarch_lsx_bnz_d: -+ replaceVecCondBranchResults(N, Results, DAG, Subtarget, -+ LoongArchISD::VALL_NONZERO); -+ break; -+ case Intrinsic::loongarch_lsx_bnz_v: -+ replaceVecCondBranchResults(N, Results, DAG, Subtarget, -+ LoongArchISD::VANY_NONZERO); -+ break; -+ } -+} -+ - void LoongArchTargetLowering::ReplaceNodeResults( - SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG) const { - SDLoc DL(N); -@@ -1168,14 +1480,12 @@ void LoongArchTargetLowering::ReplaceNodeResults( - llvm_unreachable("Unexpected Intrinsic."); - case Intrinsic::loongarch_movfcsr2gr: { - if (!Subtarget.hasBasicF()) { -- emitErrorAndReplaceIntrinsicWithChainResults(N, Results, DAG, -- ErrorMsgReqF); -+ emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqF); - return; - } - unsigned Imm = cast(Op2)->getZExtValue(); - if (!isUInt<2>(Imm)) { -- emitErrorAndReplaceIntrinsicWithChainResults(N, Results, DAG, -- ErrorMsgOOR); -+ emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR); - return; - } - SDValue MOVFCSR2GRResults = DAG.getNode( -@@ -1211,7 +1521,7 @@ void LoongArchTargetLowering::ReplaceNodeResults( - {Chain, Op2, \ - DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \ - Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \ -- Results.push_back(NODE.getValue(1)); \ -+ Results.push_back(NODE.getValue(1)); \ - break; \ - } - CRC_CASE_EXT_UNARYOP(crc_w_d_w, CRC_W_D_W) -@@ -1220,8 +1530,7 @@ void LoongArchTargetLowering::ReplaceNodeResults( - #define CSR_CASE(ID) \ - case Intrinsic::loongarch_##ID: { \ - if (!Subtarget.is64Bit()) \ -- emitErrorAndReplaceIntrinsicWithChainResults(N, Results, DAG, \ -- ErrorMsgReqLA64); \ -+ emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64); \ - break; \ - } - CSR_CASE(csrrd_d); -@@ -1232,8 +1541,7 @@ void LoongArchTargetLowering::ReplaceNodeResults( - case Intrinsic::loongarch_csrrd_w: { - unsigned Imm = cast(Op2)->getZExtValue(); - if (!isUInt<14>(Imm)) { -- emitErrorAndReplaceIntrinsicWithChainResults(N, Results, DAG, -- ErrorMsgOOR); -+ emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR); - return; - } - SDValue CSRRDResults = -@@ -1247,8 +1555,7 @@ void LoongArchTargetLowering::ReplaceNodeResults( - case Intrinsic::loongarch_csrwr_w: { - unsigned Imm = cast(N->getOperand(3))->getZExtValue(); - if (!isUInt<14>(Imm)) { -- emitErrorAndReplaceIntrinsicWithChainResults(N, Results, DAG, -- ErrorMsgOOR); -+ emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR); - return; - } - SDValue CSRWRResults = -@@ -1263,8 +1570,7 @@ void LoongArchTargetLowering::ReplaceNodeResults( - case Intrinsic::loongarch_csrxchg_w: { - unsigned Imm = cast(N->getOperand(4))->getZExtValue(); - if (!isUInt<14>(Imm)) { -- emitErrorAndReplaceIntrinsicWithChainResults(N, Results, DAG, -- ErrorMsgOOR); -+ emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR); - return; - } - SDValue CSRXCHGResults = DAG.getNode( -@@ -1302,8 +1608,7 @@ void LoongArchTargetLowering::ReplaceNodeResults( - } - case Intrinsic::loongarch_lddir_d: { - if (!Subtarget.is64Bit()) { -- emitErrorAndReplaceIntrinsicWithChainResults(N, Results, DAG, -- ErrorMsgReqLA64); -+ emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64); - return; - } - break; -@@ -1322,6 +1627,10 @@ void LoongArchTargetLowering::ReplaceNodeResults( - Results.push_back(N->getOperand(0)); - break; - } -+ case ISD::INTRINSIC_WO_CHAIN: { -+ replaceINTRINSIC_WO_CHAINResults(N, Results, DAG, Subtarget); -+ break; -+ } - } - } - -@@ -1685,6 +1994,440 @@ static SDValue performBITREV_WCombine(SDNode *N, SelectionDAG &DAG, - Src.getOperand(0)); - } - -+template -+static SDValue legalizeIntrinsicImmArg(SDNode *Node, unsigned ImmOp, -+ SelectionDAG &DAG, -+ const LoongArchSubtarget &Subtarget, -+ bool IsSigned = false) { -+ SDLoc DL(Node); -+ auto *CImm = cast(Node->getOperand(ImmOp)); -+ // Check the ImmArg. -+ if ((IsSigned && !isInt(CImm->getSExtValue())) || -+ (!IsSigned && !isUInt(CImm->getZExtValue()))) { -+ DAG.getContext()->emitError(Node->getOperationName(0) + -+ ": argument out of range."); -+ return DAG.getNode(ISD::UNDEF, DL, Subtarget.getGRLenVT()); -+ } -+ return DAG.getConstant(CImm->getZExtValue(), DL, Subtarget.getGRLenVT()); -+} -+ -+template -+static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp, -+ SelectionDAG &DAG, bool IsSigned = false) { -+ SDLoc DL(Node); -+ EVT ResTy = Node->getValueType(0); -+ auto *CImm = cast(Node->getOperand(ImmOp)); -+ -+ // Check the ImmArg. -+ if ((IsSigned && !isInt(CImm->getSExtValue())) || -+ (!IsSigned && !isUInt(CImm->getZExtValue()))) { -+ DAG.getContext()->emitError(Node->getOperationName(0) + -+ ": argument out of range."); -+ return DAG.getNode(ISD::UNDEF, DL, ResTy); -+ } -+ return DAG.getConstant( -+ APInt(ResTy.getScalarType().getSizeInBits(), -+ IsSigned ? CImm->getSExtValue() : CImm->getZExtValue(), IsSigned), -+ DL, ResTy); -+} -+ -+static SDValue truncateVecElts(SDNode *Node, SelectionDAG &DAG) { -+ SDLoc DL(Node); -+ EVT ResTy = Node->getValueType(0); -+ SDValue Vec = Node->getOperand(2); -+ SDValue Mask = DAG.getConstant(Vec.getScalarValueSizeInBits() - 1, DL, ResTy); -+ return DAG.getNode(ISD::AND, DL, ResTy, Vec, Mask); -+} -+ -+static SDValue lowerVectorBitClear(SDNode *Node, SelectionDAG &DAG) { -+ SDLoc DL(Node); -+ EVT ResTy = Node->getValueType(0); -+ SDValue One = DAG.getConstant(1, DL, ResTy); -+ SDValue Bit = -+ DAG.getNode(ISD::SHL, DL, ResTy, One, truncateVecElts(Node, DAG)); -+ -+ return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1), -+ DAG.getNOT(DL, Bit, ResTy)); -+} -+ -+template -+static SDValue lowerVectorBitClearImm(SDNode *Node, SelectionDAG &DAG) { -+ SDLoc DL(Node); -+ EVT ResTy = Node->getValueType(0); -+ auto *CImm = cast(Node->getOperand(2)); -+ // Check the unsigned ImmArg. -+ if (!isUInt(CImm->getZExtValue())) { -+ DAG.getContext()->emitError(Node->getOperationName(0) + -+ ": argument out of range."); -+ return DAG.getNode(ISD::UNDEF, DL, ResTy); -+ } -+ -+ APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue(); -+ SDValue Mask = DAG.getConstant(~BitImm, DL, ResTy); -+ -+ return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1), Mask); -+} -+ -+template -+static SDValue lowerVectorBitSetImm(SDNode *Node, SelectionDAG &DAG) { -+ SDLoc DL(Node); -+ EVT ResTy = Node->getValueType(0); -+ auto *CImm = cast(Node->getOperand(2)); -+ // Check the unsigned ImmArg. -+ if (!isUInt(CImm->getZExtValue())) { -+ DAG.getContext()->emitError(Node->getOperationName(0) + -+ ": argument out of range."); -+ return DAG.getNode(ISD::UNDEF, DL, ResTy); -+ } -+ -+ APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue(); -+ SDValue BitImm = DAG.getConstant(Imm, DL, ResTy); -+ return DAG.getNode(ISD::OR, DL, ResTy, Node->getOperand(1), BitImm); -+} -+ -+template -+static SDValue lowerVectorBitRevImm(SDNode *Node, SelectionDAG &DAG) { -+ SDLoc DL(Node); -+ EVT ResTy = Node->getValueType(0); -+ auto *CImm = cast(Node->getOperand(2)); -+ // Check the unsigned ImmArg. -+ if (!isUInt(CImm->getZExtValue())) { -+ DAG.getContext()->emitError(Node->getOperationName(0) + -+ ": argument out of range."); -+ return DAG.getNode(ISD::UNDEF, DL, ResTy); -+ } -+ -+ APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue(); -+ SDValue BitImm = DAG.getConstant(Imm, DL, ResTy); -+ return DAG.getNode(ISD::XOR, DL, ResTy, Node->getOperand(1), BitImm); -+} -+ -+static SDValue -+performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, -+ TargetLowering::DAGCombinerInfo &DCI, -+ const LoongArchSubtarget &Subtarget) { -+ SDLoc DL(N); -+ switch (N->getConstantOperandVal(0)) { -+ default: -+ break; -+ case Intrinsic::loongarch_lsx_vadd_b: -+ case Intrinsic::loongarch_lsx_vadd_h: -+ case Intrinsic::loongarch_lsx_vadd_w: -+ case Intrinsic::loongarch_lsx_vadd_d: -+ return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1), -+ N->getOperand(2)); -+ case Intrinsic::loongarch_lsx_vaddi_bu: -+ case Intrinsic::loongarch_lsx_vaddi_hu: -+ case Intrinsic::loongarch_lsx_vaddi_wu: -+ case Intrinsic::loongarch_lsx_vaddi_du: -+ return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1), -+ lowerVectorSplatImm<5>(N, 2, DAG)); -+ case Intrinsic::loongarch_lsx_vsub_b: -+ case Intrinsic::loongarch_lsx_vsub_h: -+ case Intrinsic::loongarch_lsx_vsub_w: -+ case Intrinsic::loongarch_lsx_vsub_d: -+ return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1), -+ N->getOperand(2)); -+ case Intrinsic::loongarch_lsx_vsubi_bu: -+ case Intrinsic::loongarch_lsx_vsubi_hu: -+ case Intrinsic::loongarch_lsx_vsubi_wu: -+ case Intrinsic::loongarch_lsx_vsubi_du: -+ return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1), -+ lowerVectorSplatImm<5>(N, 2, DAG)); -+ case Intrinsic::loongarch_lsx_vneg_b: -+ case Intrinsic::loongarch_lsx_vneg_h: -+ case Intrinsic::loongarch_lsx_vneg_w: -+ case Intrinsic::loongarch_lsx_vneg_d: -+ return DAG.getNode( -+ ISD::SUB, DL, N->getValueType(0), -+ DAG.getConstant( -+ APInt(N->getValueType(0).getScalarType().getSizeInBits(), 0, -+ /*isSigned=*/true), -+ SDLoc(N), N->getValueType(0)), -+ N->getOperand(1)); -+ case Intrinsic::loongarch_lsx_vmax_b: -+ case Intrinsic::loongarch_lsx_vmax_h: -+ case Intrinsic::loongarch_lsx_vmax_w: -+ case Intrinsic::loongarch_lsx_vmax_d: -+ return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1), -+ N->getOperand(2)); -+ case Intrinsic::loongarch_lsx_vmax_bu: -+ case Intrinsic::loongarch_lsx_vmax_hu: -+ case Intrinsic::loongarch_lsx_vmax_wu: -+ case Intrinsic::loongarch_lsx_vmax_du: -+ return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1), -+ N->getOperand(2)); -+ case Intrinsic::loongarch_lsx_vmaxi_b: -+ case Intrinsic::loongarch_lsx_vmaxi_h: -+ case Intrinsic::loongarch_lsx_vmaxi_w: -+ case Intrinsic::loongarch_lsx_vmaxi_d: -+ return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1), -+ lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true)); -+ case Intrinsic::loongarch_lsx_vmaxi_bu: -+ case Intrinsic::loongarch_lsx_vmaxi_hu: -+ case Intrinsic::loongarch_lsx_vmaxi_wu: -+ case Intrinsic::loongarch_lsx_vmaxi_du: -+ return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1), -+ lowerVectorSplatImm<5>(N, 2, DAG)); -+ case Intrinsic::loongarch_lsx_vmin_b: -+ case Intrinsic::loongarch_lsx_vmin_h: -+ case Intrinsic::loongarch_lsx_vmin_w: -+ case Intrinsic::loongarch_lsx_vmin_d: -+ return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1), -+ N->getOperand(2)); -+ case Intrinsic::loongarch_lsx_vmin_bu: -+ case Intrinsic::loongarch_lsx_vmin_hu: -+ case Intrinsic::loongarch_lsx_vmin_wu: -+ case Intrinsic::loongarch_lsx_vmin_du: -+ return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1), -+ N->getOperand(2)); -+ case Intrinsic::loongarch_lsx_vmini_b: -+ case Intrinsic::loongarch_lsx_vmini_h: -+ case Intrinsic::loongarch_lsx_vmini_w: -+ case Intrinsic::loongarch_lsx_vmini_d: -+ return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1), -+ lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true)); -+ case Intrinsic::loongarch_lsx_vmini_bu: -+ case Intrinsic::loongarch_lsx_vmini_hu: -+ case Intrinsic::loongarch_lsx_vmini_wu: -+ case Intrinsic::loongarch_lsx_vmini_du: -+ return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1), -+ lowerVectorSplatImm<5>(N, 2, DAG)); -+ case Intrinsic::loongarch_lsx_vmul_b: -+ case Intrinsic::loongarch_lsx_vmul_h: -+ case Intrinsic::loongarch_lsx_vmul_w: -+ case Intrinsic::loongarch_lsx_vmul_d: -+ return DAG.getNode(ISD::MUL, DL, N->getValueType(0), N->getOperand(1), -+ N->getOperand(2)); -+ case Intrinsic::loongarch_lsx_vmadd_b: -+ case Intrinsic::loongarch_lsx_vmadd_h: -+ case Intrinsic::loongarch_lsx_vmadd_w: -+ case Intrinsic::loongarch_lsx_vmadd_d: { -+ EVT ResTy = N->getValueType(0); -+ return DAG.getNode(ISD::ADD, SDLoc(N), ResTy, N->getOperand(1), -+ DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2), -+ N->getOperand(3))); -+ } -+ case Intrinsic::loongarch_lsx_vmsub_b: -+ case Intrinsic::loongarch_lsx_vmsub_h: -+ case Intrinsic::loongarch_lsx_vmsub_w: -+ case Intrinsic::loongarch_lsx_vmsub_d: { -+ EVT ResTy = N->getValueType(0); -+ return DAG.getNode(ISD::SUB, SDLoc(N), ResTy, N->getOperand(1), -+ DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2), -+ N->getOperand(3))); -+ } -+ case Intrinsic::loongarch_lsx_vdiv_b: -+ case Intrinsic::loongarch_lsx_vdiv_h: -+ case Intrinsic::loongarch_lsx_vdiv_w: -+ case Intrinsic::loongarch_lsx_vdiv_d: -+ return DAG.getNode(ISD::SDIV, DL, N->getValueType(0), N->getOperand(1), -+ N->getOperand(2)); -+ case Intrinsic::loongarch_lsx_vdiv_bu: -+ case Intrinsic::loongarch_lsx_vdiv_hu: -+ case Intrinsic::loongarch_lsx_vdiv_wu: -+ case Intrinsic::loongarch_lsx_vdiv_du: -+ return DAG.getNode(ISD::UDIV, DL, N->getValueType(0), N->getOperand(1), -+ N->getOperand(2)); -+ case Intrinsic::loongarch_lsx_vmod_b: -+ case Intrinsic::loongarch_lsx_vmod_h: -+ case Intrinsic::loongarch_lsx_vmod_w: -+ case Intrinsic::loongarch_lsx_vmod_d: -+ return DAG.getNode(ISD::SREM, DL, N->getValueType(0), N->getOperand(1), -+ N->getOperand(2)); -+ case Intrinsic::loongarch_lsx_vmod_bu: -+ case Intrinsic::loongarch_lsx_vmod_hu: -+ case Intrinsic::loongarch_lsx_vmod_wu: -+ case Intrinsic::loongarch_lsx_vmod_du: -+ return DAG.getNode(ISD::UREM, DL, N->getValueType(0), N->getOperand(1), -+ N->getOperand(2)); -+ case Intrinsic::loongarch_lsx_vand_v: -+ return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1), -+ N->getOperand(2)); -+ case Intrinsic::loongarch_lsx_vor_v: -+ return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1), -+ N->getOperand(2)); -+ case Intrinsic::loongarch_lsx_vxor_v: -+ return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1), -+ N->getOperand(2)); -+ case Intrinsic::loongarch_lsx_vnor_v: { -+ SDValue Res = DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1), -+ N->getOperand(2)); -+ return DAG.getNOT(DL, Res, Res->getValueType(0)); -+ } -+ case Intrinsic::loongarch_lsx_vandi_b: -+ return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1), -+ lowerVectorSplatImm<8>(N, 2, DAG)); -+ case Intrinsic::loongarch_lsx_vori_b: -+ return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1), -+ lowerVectorSplatImm<8>(N, 2, DAG)); -+ case Intrinsic::loongarch_lsx_vxori_b: -+ return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1), -+ lowerVectorSplatImm<8>(N, 2, DAG)); -+ case Intrinsic::loongarch_lsx_vsll_b: -+ case Intrinsic::loongarch_lsx_vsll_h: -+ case Intrinsic::loongarch_lsx_vsll_w: -+ case Intrinsic::loongarch_lsx_vsll_d: -+ return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1), -+ truncateVecElts(N, DAG)); -+ case Intrinsic::loongarch_lsx_vslli_b: -+ return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1), -+ lowerVectorSplatImm<3>(N, 2, DAG)); -+ case Intrinsic::loongarch_lsx_vslli_h: -+ return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1), -+ lowerVectorSplatImm<4>(N, 2, DAG)); -+ case Intrinsic::loongarch_lsx_vslli_w: -+ return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1), -+ lowerVectorSplatImm<5>(N, 2, DAG)); -+ case Intrinsic::loongarch_lsx_vslli_d: -+ return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1), -+ lowerVectorSplatImm<6>(N, 2, DAG)); -+ case Intrinsic::loongarch_lsx_vsrl_b: -+ case Intrinsic::loongarch_lsx_vsrl_h: -+ case Intrinsic::loongarch_lsx_vsrl_w: -+ case Intrinsic::loongarch_lsx_vsrl_d: -+ return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1), -+ truncateVecElts(N, DAG)); -+ case Intrinsic::loongarch_lsx_vsrli_b: -+ return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1), -+ lowerVectorSplatImm<3>(N, 2, DAG)); -+ case Intrinsic::loongarch_lsx_vsrli_h: -+ return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1), -+ lowerVectorSplatImm<4>(N, 2, DAG)); -+ case Intrinsic::loongarch_lsx_vsrli_w: -+ return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1), -+ lowerVectorSplatImm<5>(N, 2, DAG)); -+ case Intrinsic::loongarch_lsx_vsrli_d: -+ return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1), -+ lowerVectorSplatImm<6>(N, 2, DAG)); -+ case Intrinsic::loongarch_lsx_vsra_b: -+ case Intrinsic::loongarch_lsx_vsra_h: -+ case Intrinsic::loongarch_lsx_vsra_w: -+ case Intrinsic::loongarch_lsx_vsra_d: -+ return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1), -+ truncateVecElts(N, DAG)); -+ case Intrinsic::loongarch_lsx_vsrai_b: -+ return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1), -+ lowerVectorSplatImm<3>(N, 2, DAG)); -+ case Intrinsic::loongarch_lsx_vsrai_h: -+ return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1), -+ lowerVectorSplatImm<4>(N, 2, DAG)); -+ case Intrinsic::loongarch_lsx_vsrai_w: -+ return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1), -+ lowerVectorSplatImm<5>(N, 2, DAG)); -+ case Intrinsic::loongarch_lsx_vsrai_d: -+ return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1), -+ lowerVectorSplatImm<6>(N, 2, DAG)); -+ case Intrinsic::loongarch_lsx_vpcnt_b: -+ case Intrinsic::loongarch_lsx_vpcnt_h: -+ case Intrinsic::loongarch_lsx_vpcnt_w: -+ case Intrinsic::loongarch_lsx_vpcnt_d: -+ return DAG.getNode(ISD::CTPOP, DL, N->getValueType(0), N->getOperand(1)); -+ case Intrinsic::loongarch_lsx_vbitclr_b: -+ case Intrinsic::loongarch_lsx_vbitclr_h: -+ case Intrinsic::loongarch_lsx_vbitclr_w: -+ case Intrinsic::loongarch_lsx_vbitclr_d: -+ return lowerVectorBitClear(N, DAG); -+ case Intrinsic::loongarch_lsx_vbitclri_b: -+ return lowerVectorBitClearImm<3>(N, DAG); -+ case Intrinsic::loongarch_lsx_vbitclri_h: -+ return lowerVectorBitClearImm<4>(N, DAG); -+ case Intrinsic::loongarch_lsx_vbitclri_w: -+ return lowerVectorBitClearImm<5>(N, DAG); -+ case Intrinsic::loongarch_lsx_vbitclri_d: -+ return lowerVectorBitClearImm<6>(N, DAG); -+ case Intrinsic::loongarch_lsx_vbitset_b: -+ case Intrinsic::loongarch_lsx_vbitset_h: -+ case Intrinsic::loongarch_lsx_vbitset_w: -+ case Intrinsic::loongarch_lsx_vbitset_d: { -+ EVT VecTy = N->getValueType(0); -+ SDValue One = DAG.getConstant(1, DL, VecTy); -+ return DAG.getNode( -+ ISD::OR, DL, VecTy, N->getOperand(1), -+ DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG))); -+ } -+ case Intrinsic::loongarch_lsx_vbitseti_b: -+ return lowerVectorBitSetImm<3>(N, DAG); -+ case Intrinsic::loongarch_lsx_vbitseti_h: -+ return lowerVectorBitSetImm<4>(N, DAG); -+ case Intrinsic::loongarch_lsx_vbitseti_w: -+ return lowerVectorBitSetImm<5>(N, DAG); -+ case Intrinsic::loongarch_lsx_vbitseti_d: -+ return lowerVectorBitSetImm<6>(N, DAG); -+ case Intrinsic::loongarch_lsx_vbitrev_b: -+ case Intrinsic::loongarch_lsx_vbitrev_h: -+ case Intrinsic::loongarch_lsx_vbitrev_w: -+ case Intrinsic::loongarch_lsx_vbitrev_d: { -+ EVT VecTy = N->getValueType(0); -+ SDValue One = DAG.getConstant(1, DL, VecTy); -+ return DAG.getNode( -+ ISD::XOR, DL, VecTy, N->getOperand(1), -+ DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG))); -+ } -+ case Intrinsic::loongarch_lsx_vbitrevi_b: -+ return lowerVectorBitRevImm<3>(N, DAG); -+ case Intrinsic::loongarch_lsx_vbitrevi_h: -+ return lowerVectorBitRevImm<4>(N, DAG); -+ case Intrinsic::loongarch_lsx_vbitrevi_w: -+ return lowerVectorBitRevImm<5>(N, DAG); -+ case Intrinsic::loongarch_lsx_vbitrevi_d: -+ return lowerVectorBitRevImm<6>(N, DAG); -+ case Intrinsic::loongarch_lsx_vfadd_s: -+ case Intrinsic::loongarch_lsx_vfadd_d: -+ return DAG.getNode(ISD::FADD, DL, N->getValueType(0), N->getOperand(1), -+ N->getOperand(2)); -+ case Intrinsic::loongarch_lsx_vfsub_s: -+ case Intrinsic::loongarch_lsx_vfsub_d: -+ return DAG.getNode(ISD::FSUB, DL, N->getValueType(0), N->getOperand(1), -+ N->getOperand(2)); -+ case Intrinsic::loongarch_lsx_vfmul_s: -+ case Intrinsic::loongarch_lsx_vfmul_d: -+ return DAG.getNode(ISD::FMUL, DL, N->getValueType(0), N->getOperand(1), -+ N->getOperand(2)); -+ case Intrinsic::loongarch_lsx_vfdiv_s: -+ case Intrinsic::loongarch_lsx_vfdiv_d: -+ return DAG.getNode(ISD::FDIV, DL, N->getValueType(0), N->getOperand(1), -+ N->getOperand(2)); -+ case Intrinsic::loongarch_lsx_vfmadd_s: -+ case Intrinsic::loongarch_lsx_vfmadd_d: -+ return DAG.getNode(ISD::FMA, DL, N->getValueType(0), N->getOperand(1), -+ N->getOperand(2), N->getOperand(3)); -+ case Intrinsic::loongarch_lsx_vinsgr2vr_b: -+ return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0), -+ N->getOperand(1), N->getOperand(2), -+ legalizeIntrinsicImmArg<4>(N, 3, DAG, Subtarget)); -+ case Intrinsic::loongarch_lsx_vinsgr2vr_h: -+ return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0), -+ N->getOperand(1), N->getOperand(2), -+ legalizeIntrinsicImmArg<3>(N, 3, DAG, Subtarget)); -+ case Intrinsic::loongarch_lsx_vinsgr2vr_w: -+ return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0), -+ N->getOperand(1), N->getOperand(2), -+ legalizeIntrinsicImmArg<2>(N, 3, DAG, Subtarget)); -+ case Intrinsic::loongarch_lsx_vinsgr2vr_d: -+ return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0), -+ N->getOperand(1), N->getOperand(2), -+ legalizeIntrinsicImmArg<1>(N, 3, DAG, Subtarget)); -+ case Intrinsic::loongarch_lsx_vreplgr2vr_b: -+ case Intrinsic::loongarch_lsx_vreplgr2vr_h: -+ case Intrinsic::loongarch_lsx_vreplgr2vr_w: -+ case Intrinsic::loongarch_lsx_vreplgr2vr_d: { -+ EVT ResTy = N->getValueType(0); -+ SmallVector Ops(ResTy.getVectorNumElements(), N->getOperand(1)); -+ return DAG.getBuildVector(ResTy, DL, Ops); -+ } -+ case Intrinsic::loongarch_lsx_vreplve_b: -+ case Intrinsic::loongarch_lsx_vreplve_h: -+ case Intrinsic::loongarch_lsx_vreplve_w: -+ case Intrinsic::loongarch_lsx_vreplve_d: -+ return DAG.getNode(LoongArchISD::VREPLVE, DL, N->getValueType(0), -+ N->getOperand(1), -+ DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(), -+ N->getOperand(2))); -+ } -+ return SDValue(); -+} -+ - SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N, - DAGCombinerInfo &DCI) const { - SelectionDAG &DAG = DCI.DAG; -@@ -1699,6 +2442,8 @@ SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N, - return performSRLCombine(N, DAG, DCI, Subtarget); - case LoongArchISD::BITREV_W: - return performBITREV_WCombine(N, DAG, DCI, Subtarget); -+ case ISD::INTRINSIC_WO_CHAIN: -+ return performINTRINSIC_WO_CHAINCombine(N, DAG, DCI, Subtarget); - } - return SDValue(); - } -@@ -1752,6 +2497,101 @@ static MachineBasicBlock *insertDivByZeroTrap(MachineInstr &MI, - return SinkMBB; - } - -+static MachineBasicBlock * -+emitVecCondBranchPseudo(MachineInstr &MI, MachineBasicBlock *BB, -+ const LoongArchSubtarget &Subtarget) { -+ unsigned CondOpc; -+ switch (MI.getOpcode()) { -+ default: -+ llvm_unreachable("Unexpected opcode"); -+ case LoongArch::PseudoVBZ: -+ CondOpc = LoongArch::VSETEQZ_V; -+ break; -+ case LoongArch::PseudoVBZ_B: -+ CondOpc = LoongArch::VSETANYEQZ_B; -+ break; -+ case LoongArch::PseudoVBZ_H: -+ CondOpc = LoongArch::VSETANYEQZ_H; -+ break; -+ case LoongArch::PseudoVBZ_W: -+ CondOpc = LoongArch::VSETANYEQZ_W; -+ break; -+ case LoongArch::PseudoVBZ_D: -+ CondOpc = LoongArch::VSETANYEQZ_D; -+ break; -+ case LoongArch::PseudoVBNZ: -+ CondOpc = LoongArch::VSETNEZ_V; -+ break; -+ case LoongArch::PseudoVBNZ_B: -+ CondOpc = LoongArch::VSETALLNEZ_B; -+ break; -+ case LoongArch::PseudoVBNZ_H: -+ CondOpc = LoongArch::VSETALLNEZ_H; -+ break; -+ case LoongArch::PseudoVBNZ_W: -+ CondOpc = LoongArch::VSETALLNEZ_W; -+ break; -+ case LoongArch::PseudoVBNZ_D: -+ CondOpc = LoongArch::VSETALLNEZ_D; -+ break; -+ } -+ -+ const TargetInstrInfo *TII = Subtarget.getInstrInfo(); -+ const BasicBlock *LLVM_BB = BB->getBasicBlock(); -+ DebugLoc DL = MI.getDebugLoc(); -+ MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); -+ MachineFunction::iterator It = ++BB->getIterator(); -+ -+ MachineFunction *F = BB->getParent(); -+ MachineBasicBlock *FalseBB = F->CreateMachineBasicBlock(LLVM_BB); -+ MachineBasicBlock *TrueBB = F->CreateMachineBasicBlock(LLVM_BB); -+ MachineBasicBlock *SinkBB = F->CreateMachineBasicBlock(LLVM_BB); -+ -+ F->insert(It, FalseBB); -+ F->insert(It, TrueBB); -+ F->insert(It, SinkBB); -+ -+ // Transfer the remainder of MBB and its successor edges to Sink. -+ SinkBB->splice(SinkBB->end(), BB, std::next(MI.getIterator()), BB->end()); -+ SinkBB->transferSuccessorsAndUpdatePHIs(BB); -+ -+ // Insert the real instruction to BB. -+ Register FCC = MRI.createVirtualRegister(&LoongArch::CFRRegClass); -+ BuildMI(BB, DL, TII->get(CondOpc), FCC).addReg(MI.getOperand(1).getReg()); -+ -+ // Insert branch. -+ BuildMI(BB, DL, TII->get(LoongArch::BCNEZ)).addReg(FCC).addMBB(TrueBB); -+ BB->addSuccessor(FalseBB); -+ BB->addSuccessor(TrueBB); -+ -+ // FalseBB. -+ Register RD1 = MRI.createVirtualRegister(&LoongArch::GPRRegClass); -+ BuildMI(FalseBB, DL, TII->get(LoongArch::ADDI_W), RD1) -+ .addReg(LoongArch::R0) -+ .addImm(0); -+ BuildMI(FalseBB, DL, TII->get(LoongArch::PseudoBR)).addMBB(SinkBB); -+ FalseBB->addSuccessor(SinkBB); -+ -+ // TrueBB. -+ Register RD2 = MRI.createVirtualRegister(&LoongArch::GPRRegClass); -+ BuildMI(TrueBB, DL, TII->get(LoongArch::ADDI_W), RD2) -+ .addReg(LoongArch::R0) -+ .addImm(1); -+ TrueBB->addSuccessor(SinkBB); -+ -+ // SinkBB: merge the results. -+ BuildMI(*SinkBB, SinkBB->begin(), DL, TII->get(LoongArch::PHI), -+ MI.getOperand(0).getReg()) -+ .addReg(RD1) -+ .addMBB(FalseBB) -+ .addReg(RD2) -+ .addMBB(TrueBB); -+ -+ // The pseudo instruction is gone now. -+ MI.eraseFromParent(); -+ return SinkBB; -+} -+ - MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter( - MachineInstr &MI, MachineBasicBlock *BB) const { - const TargetInstrInfo *TII = Subtarget.getInstrInfo(); -@@ -1786,6 +2626,17 @@ MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter( - MI.eraseFromParent(); - return BB; - } -+ case LoongArch::PseudoVBZ: -+ case LoongArch::PseudoVBZ_B: -+ case LoongArch::PseudoVBZ_H: -+ case LoongArch::PseudoVBZ_W: -+ case LoongArch::PseudoVBZ_D: -+ case LoongArch::PseudoVBNZ: -+ case LoongArch::PseudoVBNZ_B: -+ case LoongArch::PseudoVBNZ_H: -+ case LoongArch::PseudoVBNZ_W: -+ case LoongArch::PseudoVBNZ_D: -+ return emitVecCondBranchPseudo(MI, BB, Subtarget); - } - } - -@@ -1858,6 +2709,13 @@ const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const { - NODE_NAME_CASE(MOVFCSR2GR) - NODE_NAME_CASE(CACOP_D) - NODE_NAME_CASE(CACOP_W) -+ NODE_NAME_CASE(VPICK_SEXT_ELT) -+ NODE_NAME_CASE(VPICK_ZEXT_ELT) -+ NODE_NAME_CASE(VREPLVE) -+ NODE_NAME_CASE(VALL_ZERO) -+ NODE_NAME_CASE(VANY_ZERO) -+ NODE_NAME_CASE(VALL_NONZERO) -+ NODE_NAME_CASE(VANY_NONZERO) - } - #undef NODE_NAME_CASE - return nullptr; -@@ -1884,6 +2742,10 @@ const MCPhysReg ArgFPR64s[] = { - LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, LoongArch::F3_64, - LoongArch::F4_64, LoongArch::F5_64, LoongArch::F6_64, LoongArch::F7_64}; - -+const MCPhysReg ArgVRs[] = {LoongArch::VR0, LoongArch::VR1, LoongArch::VR2, -+ LoongArch::VR3, LoongArch::VR4, LoongArch::VR5, -+ LoongArch::VR6, LoongArch::VR7}; -+ - // Pass a 2*GRLen argument that has been split into two GRLen values through - // registers or the stack as necessary. - static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State, -@@ -2030,6 +2892,8 @@ static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI, - Reg = State.AllocateReg(ArgFPR32s); - else if (ValVT == MVT::f64 && !UseGPRForFloat) - Reg = State.AllocateReg(ArgFPR64s); -+ else if (ValVT.is128BitVector()) -+ Reg = State.AllocateReg(ArgVRs); - else - Reg = State.AllocateReg(ArgGPRs); - -diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h -index 500407493fe5..7765057ebffb 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h -+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h -@@ -110,6 +110,20 @@ enum NodeType : unsigned { - - // Read CPU configuration information operation - CPUCFG, -+ -+ // Vector Shuffle -+ VREPLVE, -+ -+ // Extended vector element extraction -+ VPICK_SEXT_ELT, -+ VPICK_ZEXT_ELT, -+ -+ // Vector comparisons -+ VALL_ZERO, -+ VANY_ZERO, -+ VALL_NONZERO, -+ VANY_NONZERO, -+ - // Intrinsic operations end ============================================= - }; - } // end namespace LoongArchISD -diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp -index ef79b8a0dcd3..a5d66ebac96a 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp -+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp -@@ -47,6 +47,14 @@ void LoongArchInstrInfo::copyPhysReg(MachineBasicBlock &MBB, - return; - } - -+ // VR->VR copies. -+ if (LoongArch::LSX128RegClass.contains(DstReg, SrcReg)) { -+ BuildMI(MBB, MBBI, DL, get(LoongArch::VORI_B), DstReg) -+ .addReg(SrcReg, getKillRegState(KillSrc)) -+ .addImm(0); -+ return; -+ } -+ - // GPR->CFR copy. - if (LoongArch::CFRRegClass.contains(DstReg) && - LoongArch::GPRRegClass.contains(SrcReg)) { -@@ -99,6 +107,8 @@ void LoongArchInstrInfo::storeRegToStackSlot( - Opcode = LoongArch::FST_S; - else if (LoongArch::FPR64RegClass.hasSubClassEq(RC)) - Opcode = LoongArch::FST_D; -+ else if (LoongArch::LSX128RegClass.hasSubClassEq(RC)) -+ Opcode = LoongArch::VST; - else if (LoongArch::CFRRegClass.hasSubClassEq(RC)) - Opcode = LoongArch::PseudoST_CFR; - else -@@ -133,6 +143,8 @@ void LoongArchInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, - Opcode = LoongArch::FLD_S; - else if (LoongArch::FPR64RegClass.hasSubClassEq(RC)) - Opcode = LoongArch::FLD_D; -+ else if (LoongArch::LSX128RegClass.hasSubClassEq(RC)) -+ Opcode = LoongArch::VLD; - else if (LoongArch::CFRRegClass.hasSubClassEq(RC)) - Opcode = LoongArch::PseudoLD_CFR; - else -diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td -index ac391ef471b1..b2c4bb812ba5 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td -+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td -@@ -182,7 +182,7 @@ def imm32 : Operand { - let ParserMatchClass = ImmAsmOperand<"", 32, "">; - } - --def uimm1 : Operand { -+def uimm1 : Operand, ImmLeaf(Imm);}]>{ - let ParserMatchClass = UImmAsmOperand<1>; - } - -@@ -197,11 +197,11 @@ def uimm2_plus1 : Operand, - let DecoderMethod = "decodeUImmOperand<2, 1>"; - } - --def uimm3 : Operand { -+def uimm3 : Operand, ImmLeaf(Imm);}]> { - let ParserMatchClass = UImmAsmOperand<3>; - } - --def uimm4 : Operand { -+def uimm4 : Operand, ImmLeaf(Imm);}]> { - let ParserMatchClass = UImmAsmOperand<4>; - } - -diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -index a8ed285a37cf..13332be0bc38 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -+++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -@@ -10,6 +10,146 @@ - // - //===----------------------------------------------------------------------===// - -+def SDT_LoongArchVreplve : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisVec<0>, -+ SDTCisInt<1>, SDTCisVec<1>, -+ SDTCisSameAs<0, 1>, SDTCisInt<2>]>; -+def SDT_LoongArchVecCond : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisVec<1>]>; -+ -+// Target nodes. -+def loongarch_vreplve : SDNode<"LoongArchISD::VREPLVE", SDT_LoongArchVreplve>; -+def loongarch_vall_nonzero : SDNode<"LoongArchISD::VALL_NONZERO", -+ SDT_LoongArchVecCond>; -+def loongarch_vany_nonzero : SDNode<"LoongArchISD::VANY_NONZERO", -+ SDT_LoongArchVecCond>; -+def loongarch_vall_zero : SDNode<"LoongArchISD::VALL_ZERO", -+ SDT_LoongArchVecCond>; -+def loongarch_vany_zero : SDNode<"LoongArchISD::VANY_ZERO", -+ SDT_LoongArchVecCond>; -+ -+def loongarch_vpick_sext_elt : SDNode<"LoongArchISD::VPICK_SEXT_ELT", -+ SDTypeProfile<1, 3, [SDTCisPtrTy<2>]>>; -+def loongarch_vpick_zext_elt : SDNode<"LoongArchISD::VPICK_ZEXT_ELT", -+ SDTypeProfile<1, 3, [SDTCisPtrTy<2>]>>; -+ -+class VecCond -+ : Pseudo<(outs GPR:$rd), (ins RC:$vj), -+ [(set GPR:$rd, (OpNode (TyNode RC:$vj)))]> { -+ let hasSideEffects = 0; -+ let mayLoad = 0; -+ let mayStore = 0; -+ let usesCustomInserter = 1; -+} -+ -+def vsplat_imm_eq_1 : PatFrags<(ops), [(build_vector), -+ (bitconvert (v4i32 (build_vector)))], [{ -+ APInt Imm; -+ EVT EltTy = N->getValueType(0).getVectorElementType(); -+ -+ if (N->getOpcode() == ISD::BITCAST) -+ N = N->getOperand(0).getNode(); -+ -+ return selectVSplat(N, Imm, EltTy.getSizeInBits()) && -+ Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 1; -+}]>; -+ -+def vsplati8_imm_eq_7 : PatFrags<(ops), [(build_vector)], [{ -+ APInt Imm; -+ EVT EltTy = N->getValueType(0).getVectorElementType(); -+ -+ if (N->getOpcode() == ISD::BITCAST) -+ N = N->getOperand(0).getNode(); -+ -+ return selectVSplat(N, Imm, EltTy.getSizeInBits()) && -+ Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 7; -+}]>; -+def vsplati16_imm_eq_15 : PatFrags<(ops), [(build_vector)], [{ -+ APInt Imm; -+ EVT EltTy = N->getValueType(0).getVectorElementType(); -+ -+ if (N->getOpcode() == ISD::BITCAST) -+ N = N->getOperand(0).getNode(); -+ -+ return selectVSplat(N, Imm, EltTy.getSizeInBits()) && -+ Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 15; -+}]>; -+def vsplati32_imm_eq_31 : PatFrags<(ops), [(build_vector)], [{ -+ APInt Imm; -+ EVT EltTy = N->getValueType(0).getVectorElementType(); -+ -+ if (N->getOpcode() == ISD::BITCAST) -+ N = N->getOperand(0).getNode(); -+ -+ return selectVSplat(N, Imm, EltTy.getSizeInBits()) && -+ Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 31; -+}]>; -+def vsplati64_imm_eq_63 : PatFrags<(ops), [(build_vector), -+ (bitconvert (v4i32 (build_vector)))], [{ -+ APInt Imm; -+ EVT EltTy = N->getValueType(0).getVectorElementType(); -+ -+ if (N->getOpcode() == ISD::BITCAST) -+ N = N->getOperand(0).getNode(); -+ -+ return selectVSplat(N, Imm, EltTy.getSizeInBits()) && -+ Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 63; -+}]>; -+ -+def vsplati8imm7 : PatFrag<(ops node:$reg), -+ (and node:$reg, vsplati8_imm_eq_7)>; -+def vsplati16imm15 : PatFrag<(ops node:$reg), -+ (and node:$reg, vsplati16_imm_eq_15)>; -+def vsplati32imm31 : PatFrag<(ops node:$reg), -+ (and node:$reg, vsplati32_imm_eq_31)>; -+def vsplati64imm63 : PatFrag<(ops node:$reg), -+ (and node:$reg, vsplati64_imm_eq_63)>; -+ -+foreach N = [3, 4, 5, 6, 8] in -+ def SplatPat_uimm#N : ComplexPattern", -+ [build_vector, bitconvert], [], 2>; -+ -+foreach N = [5] in -+ def SplatPat_simm#N : ComplexPattern", -+ [build_vector, bitconvert]>; -+ -+def vsplat_uimm_inv_pow2 : ComplexPattern; -+ -+def vsplat_uimm_pow2 : ComplexPattern; -+ -+def muladd : PatFrag<(ops node:$vd, node:$vj, node:$vk), -+ (add node:$vd, (mul node:$vj, node:$vk))>; -+ -+def mulsub : PatFrag<(ops node:$vd, node:$vj, node:$vk), -+ (sub node:$vd, (mul node:$vj, node:$vk))>; -+ -+def lsxsplati8 : PatFrag<(ops node:$e0), -+ (v16i8 (build_vector node:$e0, node:$e0, -+ node:$e0, node:$e0, -+ node:$e0, node:$e0, -+ node:$e0, node:$e0, -+ node:$e0, node:$e0, -+ node:$e0, node:$e0, -+ node:$e0, node:$e0, -+ node:$e0, node:$e0))>; -+def lsxsplati16 : PatFrag<(ops node:$e0), -+ (v8i16 (build_vector node:$e0, node:$e0, -+ node:$e0, node:$e0, -+ node:$e0, node:$e0, -+ node:$e0, node:$e0))>; -+def lsxsplati32 : PatFrag<(ops node:$e0), -+ (v4i32 (build_vector node:$e0, node:$e0, -+ node:$e0, node:$e0))>; -+ -+def lsxsplati64 : PatFrag<(ops node:$e0), -+ (v2i64 (build_vector node:$e0, node:$e0))>; -+ -+def to_valide_timm : SDNodeXForm(N); -+ return CurDAG->getTargetConstant(CN->getSExtValue(), SDLoc(N), Subtarget->getGRLenVT()); -+}]>; -+ - //===----------------------------------------------------------------------===// - // Instruction class templates - //===----------------------------------------------------------------------===// -@@ -1004,4 +1144,680 @@ def PseudoVREPLI_D : Pseudo<(outs LSX128:$vd), (ins simm10:$imm), [], - "vrepli.d", "$vd, $imm">; - } - -+def PseudoVBNZ_B : VecCond; -+def PseudoVBNZ_H : VecCond; -+def PseudoVBNZ_W : VecCond; -+def PseudoVBNZ_D : VecCond; -+def PseudoVBNZ : VecCond; -+ -+def PseudoVBZ_B : VecCond; -+def PseudoVBZ_H : VecCond; -+def PseudoVBZ_W : VecCond; -+def PseudoVBZ_D : VecCond; -+def PseudoVBZ : VecCond; -+ -+} // Predicates = [HasExtLSX] -+ -+multiclass PatVr { -+ def : Pat<(v16i8 (OpNode (v16i8 LSX128:$vj))), -+ (!cast(Inst#"_B") LSX128:$vj)>; -+ def : Pat<(v8i16 (OpNode (v8i16 LSX128:$vj))), -+ (!cast(Inst#"_H") LSX128:$vj)>; -+ def : Pat<(v4i32 (OpNode (v4i32 LSX128:$vj))), -+ (!cast(Inst#"_W") LSX128:$vj)>; -+ def : Pat<(v2i64 (OpNode (v2i64 LSX128:$vj))), -+ (!cast(Inst#"_D") LSX128:$vj)>; -+} -+ -+multiclass PatVrVr { -+ def : Pat<(OpNode (v16i8 LSX128:$vj), (v16i8 LSX128:$vk)), -+ (!cast(Inst#"_B") LSX128:$vj, LSX128:$vk)>; -+ def : Pat<(OpNode (v8i16 LSX128:$vj), (v8i16 LSX128:$vk)), -+ (!cast(Inst#"_H") LSX128:$vj, LSX128:$vk)>; -+ def : Pat<(OpNode (v4i32 LSX128:$vj), (v4i32 LSX128:$vk)), -+ (!cast(Inst#"_W") LSX128:$vj, LSX128:$vk)>; -+ def : Pat<(OpNode (v2i64 LSX128:$vj), (v2i64 LSX128:$vk)), -+ (!cast(Inst#"_D") LSX128:$vj, LSX128:$vk)>; -+} -+ -+multiclass PatVrVrF { -+ def : Pat<(OpNode (v4f32 LSX128:$vj), (v4f32 LSX128:$vk)), -+ (!cast(Inst#"_S") LSX128:$vj, LSX128:$vk)>; -+ def : Pat<(OpNode (v2f64 LSX128:$vj), (v2f64 LSX128:$vk)), -+ (!cast(Inst#"_D") LSX128:$vj, LSX128:$vk)>; -+} -+ -+multiclass PatVrVrU { -+ def : Pat<(OpNode (v16i8 LSX128:$vj), (v16i8 LSX128:$vk)), -+ (!cast(Inst#"_BU") LSX128:$vj, LSX128:$vk)>; -+ def : Pat<(OpNode (v8i16 LSX128:$vj), (v8i16 LSX128:$vk)), -+ (!cast(Inst#"_HU") LSX128:$vj, LSX128:$vk)>; -+ def : Pat<(OpNode (v4i32 LSX128:$vj), (v4i32 LSX128:$vk)), -+ (!cast(Inst#"_WU") LSX128:$vj, LSX128:$vk)>; -+ def : Pat<(OpNode (v2i64 LSX128:$vj), (v2i64 LSX128:$vk)), -+ (!cast(Inst#"_DU") LSX128:$vj, LSX128:$vk)>; -+} -+ -+multiclass PatVrSimm5 { -+ def : Pat<(OpNode (v16i8 LSX128:$vj), (v16i8 (SplatPat_simm5 simm5:$imm))), -+ (!cast(Inst#"_B") LSX128:$vj, simm5:$imm)>; -+ def : Pat<(OpNode (v8i16 LSX128:$vj), (v8i16 (SplatPat_simm5 simm5:$imm))), -+ (!cast(Inst#"_H") LSX128:$vj, simm5:$imm)>; -+ def : Pat<(OpNode (v4i32 LSX128:$vj), (v4i32 (SplatPat_simm5 simm5:$imm))), -+ (!cast(Inst#"_W") LSX128:$vj, simm5:$imm)>; -+ def : Pat<(OpNode (v2i64 LSX128:$vj), (v2i64 (SplatPat_simm5 simm5:$imm))), -+ (!cast(Inst#"_D") LSX128:$vj, simm5:$imm)>; -+} -+ -+multiclass PatVrUimm5 { -+ def : Pat<(OpNode (v16i8 LSX128:$vj), (v16i8 (SplatPat_uimm5 uimm5:$imm))), -+ (!cast(Inst#"_BU") LSX128:$vj, uimm5:$imm)>; -+ def : Pat<(OpNode (v8i16 LSX128:$vj), (v8i16 (SplatPat_uimm5 uimm5:$imm))), -+ (!cast(Inst#"_HU") LSX128:$vj, uimm5:$imm)>; -+ def : Pat<(OpNode (v4i32 LSX128:$vj), (v4i32 (SplatPat_uimm5 uimm5:$imm))), -+ (!cast(Inst#"_WU") LSX128:$vj, uimm5:$imm)>; -+ def : Pat<(OpNode (v2i64 LSX128:$vj), (v2i64 (SplatPat_uimm5 uimm5:$imm))), -+ (!cast(Inst#"_DU") LSX128:$vj, uimm5:$imm)>; -+} -+ -+multiclass PatVrVrVr { -+ def : Pat<(OpNode (v16i8 LSX128:$vd), (v16i8 LSX128:$vj), (v16i8 LSX128:$vk)), -+ (!cast(Inst#"_B") LSX128:$vd, LSX128:$vj, LSX128:$vk)>; -+ def : Pat<(OpNode (v8i16 LSX128:$vd), (v8i16 LSX128:$vj), (v8i16 LSX128:$vk)), -+ (!cast(Inst#"_H") LSX128:$vd, LSX128:$vj, LSX128:$vk)>; -+ def : Pat<(OpNode (v4i32 LSX128:$vd), (v4i32 LSX128:$vj), (v4i32 LSX128:$vk)), -+ (!cast(Inst#"_W") LSX128:$vd, LSX128:$vj, LSX128:$vk)>; -+ def : Pat<(OpNode (v2i64 LSX128:$vd), (v2i64 LSX128:$vj), (v2i64 LSX128:$vk)), -+ (!cast(Inst#"_D") LSX128:$vd, LSX128:$vj, LSX128:$vk)>; -+} -+ -+multiclass PatShiftVrVr { -+ def : Pat<(OpNode (v16i8 LSX128:$vj), (and vsplati8_imm_eq_7, -+ (v16i8 LSX128:$vk))), -+ (!cast(Inst#"_B") LSX128:$vj, LSX128:$vk)>; -+ def : Pat<(OpNode (v8i16 LSX128:$vj), (and vsplati16_imm_eq_15, -+ (v8i16 LSX128:$vk))), -+ (!cast(Inst#"_H") LSX128:$vj, LSX128:$vk)>; -+ def : Pat<(OpNode (v4i32 LSX128:$vj), (and vsplati32_imm_eq_31, -+ (v4i32 LSX128:$vk))), -+ (!cast(Inst#"_W") LSX128:$vj, LSX128:$vk)>; -+ def : Pat<(OpNode (v2i64 LSX128:$vj), (and vsplati64_imm_eq_63, -+ (v2i64 LSX128:$vk))), -+ (!cast(Inst#"_D") LSX128:$vj, LSX128:$vk)>; -+} -+ -+multiclass PatShiftVrUimm { -+ def : Pat<(OpNode (v16i8 LSX128:$vj), (v16i8 (SplatPat_uimm3 uimm3:$imm))), -+ (!cast(Inst#"_B") LSX128:$vj, uimm3:$imm)>; -+ def : Pat<(OpNode (v8i16 LSX128:$vj), (v8i16 (SplatPat_uimm4 uimm4:$imm))), -+ (!cast(Inst#"_H") LSX128:$vj, uimm4:$imm)>; -+ def : Pat<(OpNode (v4i32 LSX128:$vj), (v4i32 (SplatPat_uimm5 uimm5:$imm))), -+ (!cast(Inst#"_W") LSX128:$vj, uimm5:$imm)>; -+ def : Pat<(OpNode (v2i64 LSX128:$vj), (v2i64 (SplatPat_uimm6 uimm6:$imm))), -+ (!cast(Inst#"_D") LSX128:$vj, uimm6:$imm)>; -+} -+ -+class PatVrVrB -+ : Pat<(OpNode (v16i8 LSX128:$vj), (v16i8 LSX128:$vk)), -+ (Inst LSX128:$vj, LSX128:$vk)>; -+ -+let Predicates = [HasExtLSX] in { -+ -+// VADD_{B/H/W/D} -+defm : PatVrVr; -+// VSUB_{B/H/W/D} -+defm : PatVrVr; -+ -+// VADDI_{B/H/W/D}U -+defm : PatVrUimm5; -+// VSUBI_{B/H/W/D}U -+defm : PatVrUimm5; -+ -+// VNEG_{B/H/W/D} -+def : Pat<(sub immAllZerosV, (v16i8 LSX128:$vj)), (VNEG_B LSX128:$vj)>; -+def : Pat<(sub immAllZerosV, (v8i16 LSX128:$vj)), (VNEG_H LSX128:$vj)>; -+def : Pat<(sub immAllZerosV, (v4i32 LSX128:$vj)), (VNEG_W LSX128:$vj)>; -+def : Pat<(sub immAllZerosV, (v2i64 LSX128:$vj)), (VNEG_D LSX128:$vj)>; -+ -+// VMAX[I]_{B/H/W/D}[U] -+defm : PatVrVr; -+defm : PatVrVrU; -+defm : PatVrSimm5; -+defm : PatVrUimm5; -+ -+// VMIN[I]_{B/H/W/D}[U] -+defm : PatVrVr; -+defm : PatVrVrU; -+defm : PatVrSimm5; -+defm : PatVrUimm5; -+ -+// VMUL_{B/H/W/D} -+defm : PatVrVr; -+ -+// VMADD_{B/H/W/D} -+defm : PatVrVrVr; -+// VMSUB_{B/H/W/D} -+defm : PatVrVrVr; -+ -+// VDIV_{B/H/W/D}[U] -+defm : PatVrVr; -+defm : PatVrVrU; -+ -+// VMOD_{B/H/W/D}[U] -+defm : PatVrVr; -+defm : PatVrVrU; -+ -+// VAND_V -+def : PatVrVrB; -+// VNOR_V -+def : PatVrVrB; -+// VXOR_V -+def : PatVrVrB; -+// VNOR_V -+def : Pat<(vnot (or (v16i8 LSX128:$vj), (v16i8 LSX128:$vk))), -+ (VNOR_V LSX128:$vj, LSX128:$vk)>; -+ -+// VANDI_B -+def : Pat<(and (v16i8 LSX128:$vj), (v16i8 (SplatPat_uimm8 uimm8:$imm))), -+ (VANDI_B LSX128:$vj, uimm8:$imm)>; -+// VORI_B -+def : Pat<(or (v16i8 LSX128:$vj), (v16i8 (SplatPat_uimm8 uimm8:$imm))), -+ (VORI_B LSX128:$vj, uimm8:$imm)>; -+ -+// VXORI_B -+def : Pat<(xor (v16i8 LSX128:$vj), (v16i8 (SplatPat_uimm8 uimm8:$imm))), -+ (VXORI_B LSX128:$vj, uimm8:$imm)>; -+ -+// VSLL[I]_{B/H/W/D} -+defm : PatVrVr; -+defm : PatShiftVrVr; -+defm : PatShiftVrUimm; -+ -+// VSRL[I]_{B/H/W/D} -+defm : PatVrVr; -+defm : PatShiftVrVr; -+defm : PatShiftVrUimm; -+ -+// VSRA[I]_{B/H/W/D} -+defm : PatVrVr; -+defm : PatShiftVrVr; -+defm : PatShiftVrUimm; -+ -+// VPCNT_{B/H/W/D} -+defm : PatVr; -+ -+// VBITCLR_{B/H/W/D} -+def : Pat<(and v16i8:$vj, (vnot (shl vsplat_imm_eq_1, v16i8:$vk))), -+ (v16i8 (VBITCLR_B v16i8:$vj, v16i8:$vk))>; -+def : Pat<(and v8i16:$vj, (vnot (shl vsplat_imm_eq_1, v8i16:$vk))), -+ (v8i16 (VBITCLR_H v8i16:$vj, v8i16:$vk))>; -+def : Pat<(and v4i32:$vj, (vnot (shl vsplat_imm_eq_1, v4i32:$vk))), -+ (v4i32 (VBITCLR_W v4i32:$vj, v4i32:$vk))>; -+def : Pat<(and v2i64:$vj, (vnot (shl vsplat_imm_eq_1, v2i64:$vk))), -+ (v2i64 (VBITCLR_D v2i64:$vj, v2i64:$vk))>; -+def : Pat<(and v16i8:$vj, (vnot (shl vsplat_imm_eq_1, -+ (vsplati8imm7 v16i8:$vk)))), -+ (v16i8 (VBITCLR_B v16i8:$vj, v16i8:$vk))>; -+def : Pat<(and v8i16:$vj, (vnot (shl vsplat_imm_eq_1, -+ (vsplati16imm15 v8i16:$vk)))), -+ (v8i16 (VBITCLR_H v8i16:$vj, v8i16:$vk))>; -+def : Pat<(and v4i32:$vj, (vnot (shl vsplat_imm_eq_1, -+ (vsplati32imm31 v4i32:$vk)))), -+ (v4i32 (VBITCLR_W v4i32:$vj, v4i32:$vk))>; -+def : Pat<(and v2i64:$vj, (vnot (shl vsplat_imm_eq_1, -+ (vsplati64imm63 v2i64:$vk)))), -+ (v2i64 (VBITCLR_D v2i64:$vj, v2i64:$vk))>; -+ -+// VBITCLRI_{B/H/W/D} -+def : Pat<(and (v16i8 LSX128:$vj), (v16i8 (vsplat_uimm_inv_pow2 uimm3:$imm))), -+ (VBITCLRI_B LSX128:$vj, uimm3:$imm)>; -+def : Pat<(and (v8i16 LSX128:$vj), (v8i16 (vsplat_uimm_inv_pow2 uimm4:$imm))), -+ (VBITCLRI_H LSX128:$vj, uimm4:$imm)>; -+def : Pat<(and (v4i32 LSX128:$vj), (v4i32 (vsplat_uimm_inv_pow2 uimm5:$imm))), -+ (VBITCLRI_W LSX128:$vj, uimm5:$imm)>; -+def : Pat<(and (v2i64 LSX128:$vj), (v2i64 (vsplat_uimm_inv_pow2 uimm6:$imm))), -+ (VBITCLRI_D LSX128:$vj, uimm6:$imm)>; -+ -+// VBITSET_{B/H/W/D} -+def : Pat<(or v16i8:$vj, (shl vsplat_imm_eq_1, v16i8:$vk)), -+ (v16i8 (VBITSET_B v16i8:$vj, v16i8:$vk))>; -+def : Pat<(or v8i16:$vj, (shl vsplat_imm_eq_1, v8i16:$vk)), -+ (v8i16 (VBITSET_H v8i16:$vj, v8i16:$vk))>; -+def : Pat<(or v4i32:$vj, (shl vsplat_imm_eq_1, v4i32:$vk)), -+ (v4i32 (VBITSET_W v4i32:$vj, v4i32:$vk))>; -+def : Pat<(or v2i64:$vj, (shl vsplat_imm_eq_1, v2i64:$vk)), -+ (v2i64 (VBITSET_D v2i64:$vj, v2i64:$vk))>; -+def : Pat<(or v16i8:$vj, (shl vsplat_imm_eq_1, (vsplati8imm7 v16i8:$vk))), -+ (v16i8 (VBITSET_B v16i8:$vj, v16i8:$vk))>; -+def : Pat<(or v8i16:$vj, (shl vsplat_imm_eq_1, (vsplati16imm15 v8i16:$vk))), -+ (v8i16 (VBITSET_H v8i16:$vj, v8i16:$vk))>; -+def : Pat<(or v4i32:$vj, (shl vsplat_imm_eq_1, (vsplati32imm31 v4i32:$vk))), -+ (v4i32 (VBITSET_W v4i32:$vj, v4i32:$vk))>; -+def : Pat<(or v2i64:$vj, (shl vsplat_imm_eq_1, (vsplati64imm63 v2i64:$vk))), -+ (v2i64 (VBITSET_D v2i64:$vj, v2i64:$vk))>; -+ -+// VBITSETI_{B/H/W/D} -+def : Pat<(or (v16i8 LSX128:$vj), (v16i8 (vsplat_uimm_pow2 uimm3:$imm))), -+ (VBITSETI_B LSX128:$vj, uimm3:$imm)>; -+def : Pat<(or (v8i16 LSX128:$vj), (v8i16 (vsplat_uimm_pow2 uimm4:$imm))), -+ (VBITSETI_H LSX128:$vj, uimm4:$imm)>; -+def : Pat<(or (v4i32 LSX128:$vj), (v4i32 (vsplat_uimm_pow2 uimm5:$imm))), -+ (VBITSETI_W LSX128:$vj, uimm5:$imm)>; -+def : Pat<(or (v2i64 LSX128:$vj), (v2i64 (vsplat_uimm_pow2 uimm6:$imm))), -+ (VBITSETI_D LSX128:$vj, uimm6:$imm)>; -+ -+// VBITREV_{B/H/W/D} -+def : Pat<(xor v16i8:$vj, (shl vsplat_imm_eq_1, v16i8:$vk)), -+ (v16i8 (VBITREV_B v16i8:$vj, v16i8:$vk))>; -+def : Pat<(xor v8i16:$vj, (shl vsplat_imm_eq_1, v8i16:$vk)), -+ (v8i16 (VBITREV_H v8i16:$vj, v8i16:$vk))>; -+def : Pat<(xor v4i32:$vj, (shl vsplat_imm_eq_1, v4i32:$vk)), -+ (v4i32 (VBITREV_W v4i32:$vj, v4i32:$vk))>; -+def : Pat<(xor v2i64:$vj, (shl vsplat_imm_eq_1, v2i64:$vk)), -+ (v2i64 (VBITREV_D v2i64:$vj, v2i64:$vk))>; -+def : Pat<(xor v16i8:$vj, (shl vsplat_imm_eq_1, (vsplati8imm7 v16i8:$vk))), -+ (v16i8 (VBITREV_B v16i8:$vj, v16i8:$vk))>; -+def : Pat<(xor v8i16:$vj, (shl vsplat_imm_eq_1, (vsplati16imm15 v8i16:$vk))), -+ (v8i16 (VBITREV_H v8i16:$vj, v8i16:$vk))>; -+def : Pat<(xor v4i32:$vj, (shl vsplat_imm_eq_1, (vsplati32imm31 v4i32:$vk))), -+ (v4i32 (VBITREV_W v4i32:$vj, v4i32:$vk))>; -+def : Pat<(xor v2i64:$vj, (shl vsplat_imm_eq_1, (vsplati64imm63 v2i64:$vk))), -+ (v2i64 (VBITREV_D v2i64:$vj, v2i64:$vk))>; -+ -+// VBITREVI_{B/H/W/D} -+def : Pat<(xor (v16i8 LSX128:$vj), (v16i8 (vsplat_uimm_pow2 uimm3:$imm))), -+ (VBITREVI_B LSX128:$vj, uimm3:$imm)>; -+def : Pat<(xor (v8i16 LSX128:$vj), (v8i16 (vsplat_uimm_pow2 uimm4:$imm))), -+ (VBITREVI_H LSX128:$vj, uimm4:$imm)>; -+def : Pat<(xor (v4i32 LSX128:$vj), (v4i32 (vsplat_uimm_pow2 uimm5:$imm))), -+ (VBITREVI_W LSX128:$vj, uimm5:$imm)>; -+def : Pat<(xor (v2i64 LSX128:$vj), (v2i64 (vsplat_uimm_pow2 uimm6:$imm))), -+ (VBITREVI_D LSX128:$vj, uimm6:$imm)>; -+ -+// VFADD_{S/D} -+defm : PatVrVrF; -+ -+// VFSUB_{S/D} -+defm : PatVrVrF; -+ -+// VFMUL_{S/D} -+defm : PatVrVrF; -+ -+// VFDIV_{S/D} -+defm : PatVrVrF; -+ -+// VFMADD_{S/D} -+def : Pat<(fma v4f32:$vj, v4f32:$vk, v4f32:$va), -+ (VFMADD_S v4f32:$vj, v4f32:$vk, v4f32:$va)>; -+def : Pat<(fma v2f64:$vj, v2f64:$vk, v2f64:$va), -+ (VFMADD_D v2f64:$vj, v2f64:$vk, v2f64:$va)>; -+ -+// VINSGR2VR_{B/H/W/D} -+def : Pat<(vector_insert v16i8:$vd, GRLenVT:$rj, uimm4:$imm), -+ (VINSGR2VR_B v16i8:$vd, GRLenVT:$rj, uimm4:$imm)>; -+def : Pat<(vector_insert v8i16:$vd, GRLenVT:$rj, uimm3:$imm), -+ (VINSGR2VR_H v8i16:$vd, GRLenVT:$rj, uimm3:$imm)>; -+def : Pat<(vector_insert v4i32:$vd, GRLenVT:$rj, uimm2:$imm), -+ (VINSGR2VR_W v4i32:$vd, GRLenVT:$rj, uimm2:$imm)>; -+def : Pat<(vector_insert v2i64:$vd, GRLenVT:$rj, uimm1:$imm), -+ (VINSGR2VR_D v2i64:$vd, GRLenVT:$rj, uimm1:$imm)>; -+ -+// VPICKVE2GR_{B/H/W}[U] -+def : Pat<(loongarch_vpick_sext_elt v16i8:$vd, uimm4:$imm, i8), -+ (VPICKVE2GR_B v16i8:$vd, uimm4:$imm)>; -+def : Pat<(loongarch_vpick_sext_elt v8i16:$vd, uimm3:$imm, i16), -+ (VPICKVE2GR_H v8i16:$vd, uimm3:$imm)>; -+def : Pat<(loongarch_vpick_sext_elt v4i32:$vd, uimm2:$imm, i32), -+ (VPICKVE2GR_W v4i32:$vd, uimm2:$imm)>; -+ -+def : Pat<(loongarch_vpick_zext_elt v16i8:$vd, uimm4:$imm, i8), -+ (VPICKVE2GR_BU v16i8:$vd, uimm4:$imm)>; -+def : Pat<(loongarch_vpick_zext_elt v8i16:$vd, uimm3:$imm, i16), -+ (VPICKVE2GR_HU v8i16:$vd, uimm3:$imm)>; -+def : Pat<(loongarch_vpick_zext_elt v4i32:$vd, uimm2:$imm, i32), -+ (VPICKVE2GR_WU v4i32:$vd, uimm2:$imm)>; -+ -+// VREPLGR2VR_{B/H/W/D} -+def : Pat<(lsxsplati8 GPR:$rj), (VREPLGR2VR_B GPR:$rj)>; -+def : Pat<(lsxsplati16 GPR:$rj), (VREPLGR2VR_H GPR:$rj)>; -+def : Pat<(lsxsplati32 GPR:$rj), (VREPLGR2VR_W GPR:$rj)>; -+def : Pat<(lsxsplati64 GPR:$rj), (VREPLGR2VR_D GPR:$rj)>; -+ -+// VREPLVE_{B/H/W/D} -+def : Pat<(loongarch_vreplve v16i8:$vj, GRLenVT:$rk), -+ (VREPLVE_B v16i8:$vj, GRLenVT:$rk)>; -+def : Pat<(loongarch_vreplve v8i16:$vj, GRLenVT:$rk), -+ (VREPLVE_H v8i16:$vj, GRLenVT:$rk)>; -+def : Pat<(loongarch_vreplve v4i32:$vj, GRLenVT:$rk), -+ (VREPLVE_W v4i32:$vj, GRLenVT:$rk)>; -+def : Pat<(loongarch_vreplve v2i64:$vj, GRLenVT:$rk), -+ (VREPLVE_D v2i64:$vj, GRLenVT:$rk)>; -+ -+// Loads/Stores -+foreach vt = [v16i8, v8i16, v4i32, v2i64] in { -+ defm : LdPat; -+ def : RegRegLdPat; -+ defm : StPat; -+ def : RegRegStPat; -+} -+ -+} // Predicates = [HasExtLSX] -+ -+/// Intrinsic pattern -+ -+class deriveLSXIntrinsic { -+ Intrinsic ret = !cast(!tolower("int_loongarch_lsx_"#Inst)); -+} -+ -+let Predicates = [HasExtLSX] in { -+ -+// vty: v16i8/v8i16/v4i32/v2i64 -+// Pat<(Intrinsic vty:$vj, vty:$vk), -+// (LAInst vty:$vj, vty:$vk)>; -+foreach Inst = ["VSADD_B", "VSADD_BU", "VSSUB_B", "VSSUB_BU", -+ "VHADDW_H_B", "VHADDW_HU_BU", "VHSUBW_H_B", "VHSUBW_HU_BU", -+ "VADDWEV_H_B", "VADDWOD_H_B", "VSUBWEV_H_B", "VSUBWOD_H_B", -+ "VADDWEV_H_BU", "VADDWOD_H_BU", "VSUBWEV_H_BU", "VSUBWOD_H_BU", -+ "VADDWEV_H_BU_B", "VADDWOD_H_BU_B", -+ "VAVG_B", "VAVG_BU", "VAVGR_B", "VAVGR_BU", -+ "VABSD_B", "VABSD_BU", "VADDA_B", "VMUH_B", "VMUH_BU", -+ "VMULWEV_H_B", "VMULWOD_H_B", "VMULWEV_H_BU", "VMULWOD_H_BU", -+ "VMULWEV_H_BU_B", "VMULWOD_H_BU_B", "VSIGNCOV_B", -+ "VANDN_V", "VORN_V", "VROTR_B", "VSRLR_B", "VSRAR_B", -+ "VSEQ_B", "VSLE_B", "VSLE_BU", "VSLT_B", "VSLT_BU", -+ "VPACKEV_B", "VPACKOD_B", "VPICKEV_B", "VPICKOD_B", -+ "VILVL_B", "VILVH_B"] in -+ def : Pat<(deriveLSXIntrinsic.ret -+ (v16i8 LSX128:$vj), (v16i8 LSX128:$vk)), -+ (!cast(Inst) LSX128:$vj, LSX128:$vk)>; -+foreach Inst = ["VSADD_H", "VSADD_HU", "VSSUB_H", "VSSUB_HU", -+ "VHADDW_W_H", "VHADDW_WU_HU", "VHSUBW_W_H", "VHSUBW_WU_HU", -+ "VADDWEV_W_H", "VADDWOD_W_H", "VSUBWEV_W_H", "VSUBWOD_W_H", -+ "VADDWEV_W_HU", "VADDWOD_W_HU", "VSUBWEV_W_HU", "VSUBWOD_W_HU", -+ "VADDWEV_W_HU_H", "VADDWOD_W_HU_H", -+ "VAVG_H", "VAVG_HU", "VAVGR_H", "VAVGR_HU", -+ "VABSD_H", "VABSD_HU", "VADDA_H", "VMUH_H", "VMUH_HU", -+ "VMULWEV_W_H", "VMULWOD_W_H", "VMULWEV_W_HU", "VMULWOD_W_HU", -+ "VMULWEV_W_HU_H", "VMULWOD_W_HU_H", "VSIGNCOV_H", "VROTR_H", -+ "VSRLR_H", "VSRAR_H", "VSRLN_B_H", "VSRAN_B_H", "VSRLRN_B_H", -+ "VSRARN_B_H", "VSSRLN_B_H", "VSSRAN_B_H", "VSSRLN_BU_H", -+ "VSSRAN_BU_H", "VSSRLRN_B_H", "VSSRARN_B_H", "VSSRLRN_BU_H", -+ "VSSRARN_BU_H", -+ "VSEQ_H", "VSLE_H", "VSLE_HU", "VSLT_H", "VSLT_HU", -+ "VPACKEV_H", "VPACKOD_H", "VPICKEV_H", "VPICKOD_H", -+ "VILVL_H", "VILVH_H"] in -+ def : Pat<(deriveLSXIntrinsic.ret -+ (v8i16 LSX128:$vj), (v8i16 LSX128:$vk)), -+ (!cast(Inst) LSX128:$vj, LSX128:$vk)>; -+foreach Inst = ["VSADD_W", "VSADD_WU", "VSSUB_W", "VSSUB_WU", -+ "VHADDW_D_W", "VHADDW_DU_WU", "VHSUBW_D_W", "VHSUBW_DU_WU", -+ "VADDWEV_D_W", "VADDWOD_D_W", "VSUBWEV_D_W", "VSUBWOD_D_W", -+ "VADDWEV_D_WU", "VADDWOD_D_WU", "VSUBWEV_D_WU", "VSUBWOD_D_WU", -+ "VADDWEV_D_WU_W", "VADDWOD_D_WU_W", -+ "VAVG_W", "VAVG_WU", "VAVGR_W", "VAVGR_WU", -+ "VABSD_W", "VABSD_WU", "VADDA_W", "VMUH_W", "VMUH_WU", -+ "VMULWEV_D_W", "VMULWOD_D_W", "VMULWEV_D_WU", "VMULWOD_D_WU", -+ "VMULWEV_D_WU_W", "VMULWOD_D_WU_W", "VSIGNCOV_W", "VROTR_W", -+ "VSRLR_W", "VSRAR_W", "VSRLN_H_W", "VSRAN_H_W", "VSRLRN_H_W", -+ "VSRARN_H_W", "VSSRLN_H_W", "VSSRAN_H_W", "VSSRLN_HU_W", -+ "VSSRAN_HU_W", "VSSRLRN_H_W", "VSSRARN_H_W", "VSSRLRN_HU_W", -+ "VSSRARN_HU_W", -+ "VSEQ_W", "VSLE_W", "VSLE_WU", "VSLT_W", "VSLT_WU", -+ "VPACKEV_W", "VPACKOD_W", "VPICKEV_W", "VPICKOD_W", -+ "VILVL_W", "VILVH_W"] in -+ def : Pat<(deriveLSXIntrinsic.ret -+ (v4i32 LSX128:$vj), (v4i32 LSX128:$vk)), -+ (!cast(Inst) LSX128:$vj, LSX128:$vk)>; -+foreach Inst = ["VADD_Q", "VSUB_Q", -+ "VSADD_D", "VSADD_DU", "VSSUB_D", "VSSUB_DU", -+ "VHADDW_Q_D", "VHADDW_QU_DU", "VHSUBW_Q_D", "VHSUBW_QU_DU", -+ "VADDWEV_Q_D", "VADDWOD_Q_D", "VSUBWEV_Q_D", "VSUBWOD_Q_D", -+ "VADDWEV_Q_DU", "VADDWOD_Q_DU", "VSUBWEV_Q_DU", "VSUBWOD_Q_DU", -+ "VADDWEV_Q_DU_D", "VADDWOD_Q_DU_D", -+ "VAVG_D", "VAVG_DU", "VAVGR_D", "VAVGR_DU", -+ "VABSD_D", "VABSD_DU", "VADDA_D", "VMUH_D", "VMUH_DU", -+ "VMULWEV_Q_D", "VMULWOD_Q_D", "VMULWEV_Q_DU", "VMULWOD_Q_DU", -+ "VMULWEV_Q_DU_D", "VMULWOD_Q_DU_D", "VSIGNCOV_D", "VROTR_D", -+ "VSRLR_D", "VSRAR_D", "VSRLN_W_D", "VSRAN_W_D", "VSRLRN_W_D", -+ "VSRARN_W_D", "VSSRLN_W_D", "VSSRAN_W_D", "VSSRLN_WU_D", -+ "VSSRAN_WU_D", "VSSRLRN_W_D", "VSSRARN_W_D", "VSSRLRN_WU_D", -+ "VSSRARN_WU_D", "VFFINT_S_L", -+ "VSEQ_D", "VSLE_D", "VSLE_DU", "VSLT_D", "VSLT_DU", -+ "VPACKEV_D", "VPACKOD_D", "VPICKEV_D", "VPICKOD_D", -+ "VILVL_D", "VILVH_D"] in -+ def : Pat<(deriveLSXIntrinsic.ret -+ (v2i64 LSX128:$vj), (v2i64 LSX128:$vk)), -+ (!cast(Inst) LSX128:$vj, LSX128:$vk)>; -+ -+// vty: v16i8/v8i16/v4i32/v2i64 -+// Pat<(Intrinsic vty:$vd, vty:$vj, vty:$vk), -+// (LAInst vty:$vd, vty:$vj, vty:$vk)>; -+foreach Inst = ["VMADDWEV_H_B", "VMADDWOD_H_B", "VMADDWEV_H_BU", -+ "VMADDWOD_H_BU", "VMADDWEV_H_BU_B", "VMADDWOD_H_BU_B"] in -+ def : Pat<(deriveLSXIntrinsic.ret -+ (v8i16 LSX128:$vd), (v16i8 LSX128:$vj), (v16i8 LSX128:$vk)), -+ (!cast(Inst) LSX128:$vd, LSX128:$vj, LSX128:$vk)>; -+foreach Inst = ["VMADDWEV_W_H", "VMADDWOD_W_H", "VMADDWEV_W_HU", -+ "VMADDWOD_W_HU", "VMADDWEV_W_HU_H", "VMADDWOD_W_HU_H"] in -+ def : Pat<(deriveLSXIntrinsic.ret -+ (v4i32 LSX128:$vd), (v8i16 LSX128:$vj), (v8i16 LSX128:$vk)), -+ (!cast(Inst) LSX128:$vd, LSX128:$vj, LSX128:$vk)>; -+foreach Inst = ["VMADDWEV_D_W", "VMADDWOD_D_W", "VMADDWEV_D_WU", -+ "VMADDWOD_D_WU", "VMADDWEV_D_WU_W", "VMADDWOD_D_WU_W"] in -+ def : Pat<(deriveLSXIntrinsic.ret -+ (v2i64 LSX128:$vd), (v4i32 LSX128:$vj), (v4i32 LSX128:$vk)), -+ (!cast(Inst) LSX128:$vd, LSX128:$vj, LSX128:$vk)>; -+foreach Inst = ["VMADDWEV_Q_D", "VMADDWOD_Q_D", "VMADDWEV_Q_DU", -+ "VMADDWOD_Q_DU", "VMADDWEV_Q_DU_D", "VMADDWOD_Q_DU_D"] in -+ def : Pat<(deriveLSXIntrinsic.ret -+ (v2i64 LSX128:$vd), (v2i64 LSX128:$vj), (v2i64 LSX128:$vk)), -+ (!cast(Inst) LSX128:$vd, LSX128:$vj, LSX128:$vk)>; -+ -+// vty: v16i8/v8i16/v4i32/v2i64 -+// Pat<(Intrinsic vty:$vj), -+// (LAInst vty:$vj)>; -+foreach Inst = ["VEXTH_H_B", "VEXTH_HU_BU", -+ "VMSKLTZ_B", "VMSKGEZ_B", "VMSKNZ_B", -+ "VCLO_B", "VCLZ_B"] in -+ def : Pat<(deriveLSXIntrinsic.ret (v16i8 LSX128:$vj)), -+ (!cast(Inst) LSX128:$vj)>; -+foreach Inst = ["VEXTH_W_H", "VEXTH_WU_HU", "VMSKLTZ_H", -+ "VCLO_H", "VCLZ_H", "VFCVTL_S_H", "VFCVTH_S_H"] in -+ def : Pat<(deriveLSXIntrinsic.ret (v8i16 LSX128:$vj)), -+ (!cast(Inst) LSX128:$vj)>; -+foreach Inst = ["VEXTH_D_W", "VEXTH_DU_WU", "VMSKLTZ_W", -+ "VCLO_W", "VCLZ_W", "VFFINT_S_W", "VFFINT_S_WU", -+ "VFFINTL_D_W", "VFFINTH_D_W"] in -+ def : Pat<(deriveLSXIntrinsic.ret (v4i32 LSX128:$vj)), -+ (!cast(Inst) LSX128:$vj)>; -+foreach Inst = ["VEXTH_Q_D", "VEXTH_QU_DU", "VMSKLTZ_D", -+ "VEXTL_Q_D", "VEXTL_QU_DU", -+ "VCLO_D", "VCLZ_D", "VFFINT_D_L", "VFFINT_D_LU"] in -+ def : Pat<(deriveLSXIntrinsic.ret (v2i64 LSX128:$vj)), -+ (!cast(Inst) LSX128:$vj)>; -+ -+// Pat<(Intrinsic timm:$imm) -+// (LAInst timm:$imm)>; -+def : Pat<(int_loongarch_lsx_vldi timm:$imm), -+ (VLDI (to_valide_timm timm:$imm))>; -+foreach Inst = ["VREPLI_B", "VREPLI_H", "VREPLI_W", "VREPLI_D"] in -+ def : Pat<(deriveLSXIntrinsic.ret timm:$imm), -+ (!cast("Pseudo"#Inst) (to_valide_timm timm:$imm))>; -+ -+// vty: v16i8/v8i16/v4i32/v2i64 -+// Pat<(Intrinsic vty:$vj, timm:$imm) -+// (LAInst vty:$vj, timm:$imm)>; -+foreach Inst = ["VSAT_B", "VSAT_BU", "VNORI_B", "VROTRI_B", "VSLLWIL_H_B", -+ "VSLLWIL_HU_BU", "VSRLRI_B", "VSRARI_B", -+ "VSEQI_B", "VSLEI_B", "VSLEI_BU", "VSLTI_B", "VSLTI_BU", -+ "VREPLVEI_B", "VBSLL_V", "VBSRL_V", "VSHUF4I_B"] in -+ def : Pat<(deriveLSXIntrinsic.ret (v16i8 LSX128:$vj), timm:$imm), -+ (!cast(Inst) LSX128:$vj, (to_valide_timm timm:$imm))>; -+foreach Inst = ["VSAT_H", "VSAT_HU", "VROTRI_H", "VSLLWIL_W_H", -+ "VSLLWIL_WU_HU", "VSRLRI_H", "VSRARI_H", -+ "VSEQI_H", "VSLEI_H", "VSLEI_HU", "VSLTI_H", "VSLTI_HU", -+ "VREPLVEI_H", "VSHUF4I_H"] in -+ def : Pat<(deriveLSXIntrinsic.ret (v8i16 LSX128:$vj), timm:$imm), -+ (!cast(Inst) LSX128:$vj, (to_valide_timm timm:$imm))>; -+foreach Inst = ["VSAT_W", "VSAT_WU", "VROTRI_W", "VSLLWIL_D_W", -+ "VSLLWIL_DU_WU", "VSRLRI_W", "VSRARI_W", -+ "VSEQI_W", "VSLEI_W", "VSLEI_WU", "VSLTI_W", "VSLTI_WU", -+ "VREPLVEI_W", "VSHUF4I_W"] in -+ def : Pat<(deriveLSXIntrinsic.ret (v4i32 LSX128:$vj), timm:$imm), -+ (!cast(Inst) LSX128:$vj, (to_valide_timm timm:$imm))>; -+foreach Inst = ["VSAT_D", "VSAT_DU", "VROTRI_D", "VSRLRI_D", "VSRARI_D", -+ "VSEQI_D", "VSLEI_D", "VSLEI_DU", "VSLTI_D", "VSLTI_DU", -+ "VPICKVE2GR_D", "VPICKVE2GR_DU", -+ "VREPLVEI_D"] in -+ def : Pat<(deriveLSXIntrinsic.ret (v2i64 LSX128:$vj), timm:$imm), -+ (!cast(Inst) LSX128:$vj, (to_valide_timm timm:$imm))>; -+ -+// vty: v16i8/v8i16/v4i32/v2i64 -+// Pat<(Intrinsic vty:$vd, vty:$vj, timm:$imm) -+// (LAInst vty:$vd, vty:$vj, timm:$imm)>; -+foreach Inst = ["VSRLNI_B_H", "VSRANI_B_H", "VSRLRNI_B_H", "VSRARNI_B_H", -+ "VSSRLNI_B_H", "VSSRANI_B_H", "VSSRLNI_BU_H", "VSSRANI_BU_H", -+ "VSSRLRNI_B_H", "VSSRARNI_B_H", "VSSRLRNI_BU_H", "VSSRARNI_BU_H", -+ "VFRSTPI_B", "VBITSELI_B", "VEXTRINS_B"] in -+ def : Pat<(deriveLSXIntrinsic.ret -+ (v16i8 LSX128:$vd), (v16i8 LSX128:$vj), timm:$imm), -+ (!cast(Inst) LSX128:$vd, LSX128:$vj, -+ (to_valide_timm timm:$imm))>; -+foreach Inst = ["VSRLNI_H_W", "VSRANI_H_W", "VSRLRNI_H_W", "VSRARNI_H_W", -+ "VSSRLNI_H_W", "VSSRANI_H_W", "VSSRLNI_HU_W", "VSSRANI_HU_W", -+ "VSSRLRNI_H_W", "VSSRARNI_H_W", "VSSRLRNI_HU_W", "VSSRARNI_HU_W", -+ "VFRSTPI_H", "VEXTRINS_H"] in -+ def : Pat<(deriveLSXIntrinsic.ret -+ (v8i16 LSX128:$vd), (v8i16 LSX128:$vj), timm:$imm), -+ (!cast(Inst) LSX128:$vd, LSX128:$vj, -+ (to_valide_timm timm:$imm))>; -+foreach Inst = ["VSRLNI_W_D", "VSRANI_W_D", "VSRLRNI_W_D", "VSRARNI_W_D", -+ "VSSRLNI_W_D", "VSSRANI_W_D", "VSSRLNI_WU_D", "VSSRANI_WU_D", -+ "VSSRLRNI_W_D", "VSSRARNI_W_D", "VSSRLRNI_WU_D", "VSSRARNI_WU_D", -+ "VPERMI_W", "VEXTRINS_W"] in -+ def : Pat<(deriveLSXIntrinsic.ret -+ (v4i32 LSX128:$vd), (v4i32 LSX128:$vj), timm:$imm), -+ (!cast(Inst) LSX128:$vd, LSX128:$vj, -+ (to_valide_timm timm:$imm))>; -+foreach Inst = ["VSRLNI_D_Q", "VSRANI_D_Q", "VSRLRNI_D_Q", "VSRARNI_D_Q", -+ "VSSRLNI_D_Q", "VSSRANI_D_Q", "VSSRLNI_DU_Q", "VSSRANI_DU_Q", -+ "VSSRLRNI_D_Q", "VSSRARNI_D_Q", "VSSRLRNI_DU_Q", "VSSRARNI_DU_Q", -+ "VSHUF4I_D", "VEXTRINS_D"] in -+ def : Pat<(deriveLSXIntrinsic.ret -+ (v2i64 LSX128:$vd), (v2i64 LSX128:$vj), timm:$imm), -+ (!cast(Inst) LSX128:$vd, LSX128:$vj, -+ (to_valide_timm timm:$imm))>; -+ -+// vty: v16i8/v8i16/v4i32/v2i64 -+// Pat<(Intrinsic vty:$vd, vty:$vj, vty:$vk), -+// (LAInst vty:$vd, vty:$vj, vty:$vk)>; -+foreach Inst = ["VFRSTP_B", "VBITSEL_V", "VSHUF_B"] in -+ def : Pat<(deriveLSXIntrinsic.ret -+ (v16i8 LSX128:$vd), (v16i8 LSX128:$vj), (v16i8 LSX128:$vk)), -+ (!cast(Inst) LSX128:$vd, LSX128:$vj, LSX128:$vk)>; -+foreach Inst = ["VFRSTP_H", "VSHUF_H"] in -+ def : Pat<(deriveLSXIntrinsic.ret -+ (v8i16 LSX128:$vd), (v8i16 LSX128:$vj), (v8i16 LSX128:$vk)), -+ (!cast(Inst) LSX128:$vd, LSX128:$vj, LSX128:$vk)>; -+def : Pat<(int_loongarch_lsx_vshuf_w (v4i32 LSX128:$vd), (v4i32 LSX128:$vj), -+ (v4i32 LSX128:$vk)), -+ (VSHUF_W LSX128:$vd, LSX128:$vj, LSX128:$vk)>; -+def : Pat<(int_loongarch_lsx_vshuf_d (v2i64 LSX128:$vd), (v2i64 LSX128:$vj), -+ (v2i64 LSX128:$vk)), -+ (VSHUF_D LSX128:$vd, LSX128:$vj, LSX128:$vk)>; -+ -+// vty: v4f32/v2f64 -+// Pat<(Intrinsic vty:$vj, vty:$vk, vty:$va), -+// (LAInst vty:$vj, vty:$vk, vty:$va)>; -+foreach Inst = ["VFMSUB_S", "VFNMADD_S", "VFNMSUB_S"] in -+ def : Pat<(deriveLSXIntrinsic.ret -+ (v4f32 LSX128:$vj), (v4f32 LSX128:$vk), (v4f32 LSX128:$va)), -+ (!cast(Inst) LSX128:$vj, LSX128:$vk, LSX128:$va)>; -+foreach Inst = ["VFMSUB_D", "VFNMADD_D", "VFNMSUB_D"] in -+ def : Pat<(deriveLSXIntrinsic.ret -+ (v2f64 LSX128:$vj), (v2f64 LSX128:$vk), (v2f64 LSX128:$va)), -+ (!cast(Inst) LSX128:$vj, LSX128:$vk, LSX128:$va)>; -+ -+// vty: v4f32/v2f64 -+// Pat<(Intrinsic vty:$vj, vty:$vk), -+// (LAInst vty:$vj, vty:$vk)>; -+foreach Inst = ["VFMAX_S", "VFMIN_S", "VFMAXA_S", "VFMINA_S", "VFCVT_H_S", -+ "VFCMP_CAF_S", "VFCMP_CUN_S", "VFCMP_CEQ_S", "VFCMP_CUEQ_S", -+ "VFCMP_CLT_S", "VFCMP_CULT_S", "VFCMP_CLE_S", "VFCMP_CULE_S", -+ "VFCMP_CNE_S", "VFCMP_COR_S", "VFCMP_CUNE_S", -+ "VFCMP_SAF_S", "VFCMP_SUN_S", "VFCMP_SEQ_S", "VFCMP_SUEQ_S", -+ "VFCMP_SLT_S", "VFCMP_SULT_S", "VFCMP_SLE_S", "VFCMP_SULE_S", -+ "VFCMP_SNE_S", "VFCMP_SOR_S", "VFCMP_SUNE_S"] in -+ def : Pat<(deriveLSXIntrinsic.ret -+ (v4f32 LSX128:$vj), (v4f32 LSX128:$vk)), -+ (!cast(Inst) LSX128:$vj, LSX128:$vk)>; -+foreach Inst = ["VFMAX_D", "VFMIN_D", "VFMAXA_D", "VFMINA_D", "VFCVT_S_D", -+ "VFTINTRNE_W_D", "VFTINTRZ_W_D", "VFTINTRP_W_D", "VFTINTRM_W_D", -+ "VFTINT_W_D", -+ "VFCMP_CAF_D", "VFCMP_CUN_D", "VFCMP_CEQ_D", "VFCMP_CUEQ_D", -+ "VFCMP_CLT_D", "VFCMP_CULT_D", "VFCMP_CLE_D", "VFCMP_CULE_D", -+ "VFCMP_CNE_D", "VFCMP_COR_D", "VFCMP_CUNE_D", -+ "VFCMP_SAF_D", "VFCMP_SUN_D", "VFCMP_SEQ_D", "VFCMP_SUEQ_D", -+ "VFCMP_SLT_D", "VFCMP_SULT_D", "VFCMP_SLE_D", "VFCMP_SULE_D", -+ "VFCMP_SNE_D", "VFCMP_SOR_D", "VFCMP_SUNE_D"] in -+ def : Pat<(deriveLSXIntrinsic.ret -+ (v2f64 LSX128:$vj), (v2f64 LSX128:$vk)), -+ (!cast(Inst) LSX128:$vj, LSX128:$vk)>; -+ -+// vty: v4f32/v2f64 -+// Pat<(Intrinsic vty:$vj), -+// (LAInst vty:$vj)>; -+foreach Inst = ["VFLOGB_S", "VFCLASS_S", "VFSQRT_S", "VFRECIP_S", "VFRSQRT_S", -+ "VFRINT_S", "VFCVTL_D_S", "VFCVTH_D_S", -+ "VFRINTRNE_S", "VFRINTRZ_S", "VFRINTRP_S", "VFRINTRM_S", -+ "VFTINTRNE_W_S", "VFTINTRZ_W_S", "VFTINTRP_W_S", "VFTINTRM_W_S", -+ "VFTINT_W_S", "VFTINTRZ_WU_S", "VFTINT_WU_S", -+ "VFTINTRNEL_L_S", "VFTINTRNEH_L_S", "VFTINTRZL_L_S", -+ "VFTINTRZH_L_S", "VFTINTRPL_L_S", "VFTINTRPH_L_S", -+ "VFTINTRML_L_S", "VFTINTRMH_L_S", "VFTINTL_L_S", -+ "VFTINTH_L_S"] in -+ def : Pat<(deriveLSXIntrinsic.ret (v4f32 LSX128:$vj)), -+ (!cast(Inst) LSX128:$vj)>; -+foreach Inst = ["VFLOGB_D", "VFCLASS_D", "VFSQRT_D", "VFRECIP_D", "VFRSQRT_D", -+ "VFRINT_D", -+ "VFRINTRNE_D", "VFRINTRZ_D", "VFRINTRP_D", "VFRINTRM_D", -+ "VFTINTRNE_L_D", "VFTINTRZ_L_D", "VFTINTRP_L_D", "VFTINTRM_L_D", -+ "VFTINT_L_D", "VFTINTRZ_LU_D", "VFTINT_LU_D"] in -+ def : Pat<(deriveLSXIntrinsic.ret (v2f64 LSX128:$vj)), -+ (!cast(Inst) LSX128:$vj)>; -+ -+// load -+def : Pat<(int_loongarch_lsx_vld GPR:$rj, timm:$imm), -+ (VLD GPR:$rj, (to_valide_timm timm:$imm))>; -+def : Pat<(int_loongarch_lsx_vldx GPR:$rj, GPR:$rk), -+ (VLDX GPR:$rj, GPR:$rk)>; -+ -+def : Pat<(int_loongarch_lsx_vldrepl_b GPR:$rj, timm:$imm), -+ (VLDREPL_B GPR:$rj, (to_valide_timm timm:$imm))>; -+def : Pat<(int_loongarch_lsx_vldrepl_h GPR:$rj, timm:$imm), -+ (VLDREPL_H GPR:$rj, (to_valide_timm timm:$imm))>; -+def : Pat<(int_loongarch_lsx_vldrepl_w GPR:$rj, timm:$imm), -+ (VLDREPL_W GPR:$rj, (to_valide_timm timm:$imm))>; -+def : Pat<(int_loongarch_lsx_vldrepl_d GPR:$rj, timm:$imm), -+ (VLDREPL_D GPR:$rj, (to_valide_timm timm:$imm))>; -+ -+// store -+def : Pat<(int_loongarch_lsx_vst LSX128:$vd, GPR:$rj, timm:$imm), -+ (VST LSX128:$vd, GPR:$rj, (to_valide_timm timm:$imm))>; -+def : Pat<(int_loongarch_lsx_vstx LSX128:$vd, GPR:$rj, GPR:$rk), -+ (VSTX LSX128:$vd, GPR:$rj, GPR:$rk)>; -+ -+def : Pat<(int_loongarch_lsx_vstelm_b v16i8:$vd, GPR:$rj, timm:$imm, timm:$idx), -+ (VSTELM_B v16i8:$vd, GPR:$rj, (to_valide_timm timm:$imm), -+ (to_valide_timm timm:$idx))>; -+def : Pat<(int_loongarch_lsx_vstelm_h v8i16:$vd, GPR:$rj, timm:$imm, timm:$idx), -+ (VSTELM_H v8i16:$vd, GPR:$rj, (to_valide_timm timm:$imm), -+ (to_valide_timm timm:$idx))>; -+def : Pat<(int_loongarch_lsx_vstelm_w v4i32:$vd, GPR:$rj, timm:$imm, timm:$idx), -+ (VSTELM_W v4i32:$vd, GPR:$rj, (to_valide_timm timm:$imm), -+ (to_valide_timm timm:$idx))>; -+def : Pat<(int_loongarch_lsx_vstelm_d v2i64:$vd, GPR:$rj, timm:$imm, timm:$idx), -+ (VSTELM_D v2i64:$vd, GPR:$rj, (to_valide_timm timm:$imm), -+ (to_valide_timm timm:$idx))>; -+ - } // Predicates = [HasExtLSX] --- -2.20.1 - - -From 6f813b014a5df84162cc182994d597674d433a9a Mon Sep 17 00:00:00 2001 -From: wanglei -Date: Sat, 19 Aug 2023 16:53:50 +0800 -Subject: [PATCH 03/35] [LoongArch] Add LASX intrinsic support - -This patch is similar to D155829. - -Depends on D155829 - -Reviewed By: SixWeining - -Differential Revision: https://reviews.llvm.org/D155830 - -(cherry picked from commit 691f0d00b84f6ecaf8e341ef38256e939cca6b1e) ---- - llvm/include/llvm/IR/IntrinsicsLoongArch.td | 523 +++++++++++++ - .../LoongArch/LoongArchISelLowering.cpp | 402 +++++++++- - .../Target/LoongArch/LoongArchInstrInfo.cpp | 12 + - .../LoongArch/LoongArchLASXInstrInfo.td | 702 ++++++++++++++++++ - 4 files changed, 1633 insertions(+), 6 deletions(-) - -diff --git a/llvm/include/llvm/IR/IntrinsicsLoongArch.td b/llvm/include/llvm/IR/IntrinsicsLoongArch.td -index d39d8261ebe3..685deaec7709 100644 ---- a/llvm/include/llvm/IR/IntrinsicsLoongArch.td -+++ b/llvm/include/llvm/IR/IntrinsicsLoongArch.td -@@ -647,3 +647,526 @@ def int_loongarch_lsx_vstelm_d - [IntrWriteMem, IntrArgMemOnly, ImmArg>, ImmArg>]>; - - } // TargetPrefix = "loongarch" -+ -+//===----------------------------------------------------------------------===// -+// LASX -+ -+let TargetPrefix = "loongarch" in { -+foreach inst = ["xvadd_b", "xvsub_b", -+ "xvsadd_b", "xvsadd_bu", "xvssub_b", "xvssub_bu", -+ "xvavg_b", "xvavg_bu", "xvavgr_b", "xvavgr_bu", -+ "xvabsd_b", "xvabsd_bu", "xvadda_b", -+ "xvmax_b", "xvmax_bu", "xvmin_b", "xvmin_bu", -+ "xvmul_b", "xvmuh_b", "xvmuh_bu", -+ "xvdiv_b", "xvdiv_bu", "xvmod_b", "xvmod_bu", "xvsigncov_b", -+ "xvand_v", "xvor_v", "xvxor_v", "xvnor_v", "xvandn_v", "xvorn_v", -+ "xvsll_b", "xvsrl_b", "xvsra_b", "xvrotr_b", "xvsrlr_b", "xvsrar_b", -+ "xvbitclr_b", "xvbitset_b", "xvbitrev_b", -+ "xvseq_b", "xvsle_b", "xvsle_bu", "xvslt_b", "xvslt_bu", -+ "xvpackev_b", "xvpackod_b", "xvpickev_b", "xvpickod_b", -+ "xvilvl_b", "xvilvh_b"] in -+ def int_loongarch_lasx_#inst : VecInt<[llvm_v32i8_ty], -+ [llvm_v32i8_ty, llvm_v32i8_ty], -+ [IntrNoMem]>; -+ -+foreach inst = ["xvadd_h", "xvsub_h", -+ "xvsadd_h", "xvsadd_hu", "xvssub_h", "xvssub_hu", -+ "xvavg_h", "xvavg_hu", "xvavgr_h", "xvavgr_hu", -+ "xvabsd_h", "xvabsd_hu", "xvadda_h", -+ "xvmax_h", "xvmax_hu", "xvmin_h", "xvmin_hu", -+ "xvmul_h", "xvmuh_h", "xvmuh_hu", -+ "xvdiv_h", "xvdiv_hu", "xvmod_h", "xvmod_hu", "xvsigncov_h", -+ "xvsll_h", "xvsrl_h", "xvsra_h", "xvrotr_h", "xvsrlr_h", "xvsrar_h", -+ "xvbitclr_h", "xvbitset_h", "xvbitrev_h", -+ "xvseq_h", "xvsle_h", "xvsle_hu", "xvslt_h", "xvslt_hu", -+ "xvpackev_h", "xvpackod_h", "xvpickev_h", "xvpickod_h", -+ "xvilvl_h", "xvilvh_h"] in -+ def int_loongarch_lasx_#inst : VecInt<[llvm_v16i16_ty], -+ [llvm_v16i16_ty, llvm_v16i16_ty], -+ [IntrNoMem]>; -+ -+foreach inst = ["xvadd_w", "xvsub_w", -+ "xvsadd_w", "xvsadd_wu", "xvssub_w", "xvssub_wu", -+ "xvavg_w", "xvavg_wu", "xvavgr_w", "xvavgr_wu", -+ "xvabsd_w", "xvabsd_wu", "xvadda_w", -+ "xvmax_w", "xvmax_wu", "xvmin_w", "xvmin_wu", -+ "xvmul_w", "xvmuh_w", "xvmuh_wu", -+ "xvdiv_w", "xvdiv_wu", "xvmod_w", "xvmod_wu", "xvsigncov_w", -+ "xvsll_w", "xvsrl_w", "xvsra_w", "xvrotr_w", "xvsrlr_w", "xvsrar_w", -+ "xvbitclr_w", "xvbitset_w", "xvbitrev_w", -+ "xvseq_w", "xvsle_w", "xvsle_wu", "xvslt_w", "xvslt_wu", -+ "xvpackev_w", "xvpackod_w", "xvpickev_w", "xvpickod_w", -+ "xvilvl_w", "xvilvh_w", "xvperm_w"] in -+ def int_loongarch_lasx_#inst : VecInt<[llvm_v8i32_ty], -+ [llvm_v8i32_ty, llvm_v8i32_ty], -+ [IntrNoMem]>; -+ -+foreach inst = ["xvadd_d", "xvadd_q", "xvsub_d", "xvsub_q", -+ "xvsadd_d", "xvsadd_du", "xvssub_d", "xvssub_du", -+ "xvhaddw_q_d", "xvhaddw_qu_du", "xvhsubw_q_d", "xvhsubw_qu_du", -+ "xvaddwev_q_d", "xvaddwod_q_d", "xvsubwev_q_d", "xvsubwod_q_d", -+ "xvaddwev_q_du", "xvaddwod_q_du", "xvsubwev_q_du", "xvsubwod_q_du", -+ "xvaddwev_q_du_d", "xvaddwod_q_du_d", -+ "xvavg_d", "xvavg_du", "xvavgr_d", "xvavgr_du", -+ "xvabsd_d", "xvabsd_du", "xvadda_d", -+ "xvmax_d", "xvmax_du", "xvmin_d", "xvmin_du", -+ "xvmul_d", "xvmuh_d", "xvmuh_du", -+ "xvmulwev_q_d", "xvmulwod_q_d", "xvmulwev_q_du", "xvmulwod_q_du", -+ "xvmulwev_q_du_d", "xvmulwod_q_du_d", -+ "xvdiv_d", "xvdiv_du", "xvmod_d", "xvmod_du", "xvsigncov_d", -+ "xvsll_d", "xvsrl_d", "xvsra_d", "xvrotr_d", "xvsrlr_d", "xvsrar_d", -+ "xvbitclr_d", "xvbitset_d", "xvbitrev_d", -+ "xvseq_d", "xvsle_d", "xvsle_du", "xvslt_d", "xvslt_du", -+ "xvpackev_d", "xvpackod_d", "xvpickev_d", "xvpickod_d", -+ "xvilvl_d", "xvilvh_d"] in -+ def int_loongarch_lasx_#inst : VecInt<[llvm_v4i64_ty], -+ [llvm_v4i64_ty, llvm_v4i64_ty], -+ [IntrNoMem]>; -+ -+foreach inst = ["xvaddi_bu", "xvsubi_bu", -+ "xvmaxi_b", "xvmaxi_bu", "xvmini_b", "xvmini_bu", -+ "xvsat_b", "xvsat_bu", -+ "xvandi_b", "xvori_b", "xvxori_b", "xvnori_b", -+ "xvslli_b", "xvsrli_b", "xvsrai_b", "xvrotri_b", -+ "xvsrlri_b", "xvsrari_b", -+ "xvbitclri_b", "xvbitseti_b", "xvbitrevi_b", -+ "xvseqi_b", "xvslei_b", "xvslei_bu", "xvslti_b", "xvslti_bu", -+ "xvrepl128vei_b", "xvbsll_v", "xvbsrl_v", "xvshuf4i_b"] in -+ def int_loongarch_lasx_#inst : VecInt<[llvm_v32i8_ty], -+ [llvm_v32i8_ty, llvm_i32_ty], -+ [IntrNoMem, ImmArg>]>; -+foreach inst = ["xvaddi_hu", "xvsubi_hu", -+ "xvmaxi_h", "xvmaxi_hu", "xvmini_h", "xvmini_hu", -+ "xvsat_h", "xvsat_hu", -+ "xvslli_h", "xvsrli_h", "xvsrai_h", "xvrotri_h", -+ "xvsrlri_h", "xvsrari_h", -+ "xvbitclri_h", "xvbitseti_h", "xvbitrevi_h", -+ "xvseqi_h", "xvslei_h", "xvslei_hu", "xvslti_h", "xvslti_hu", -+ "xvrepl128vei_h", "xvshuf4i_h"] in -+ def int_loongarch_lasx_#inst : VecInt<[llvm_v16i16_ty], -+ [llvm_v16i16_ty, llvm_i32_ty], -+ [IntrNoMem, ImmArg>]>; -+foreach inst = ["xvaddi_wu", "xvsubi_wu", -+ "xvmaxi_w", "xvmaxi_wu", "xvmini_w", "xvmini_wu", -+ "xvsat_w", "xvsat_wu", -+ "xvslli_w", "xvsrli_w", "xvsrai_w", "xvrotri_w", -+ "xvsrlri_w", "xvsrari_w", -+ "xvbitclri_w", "xvbitseti_w", "xvbitrevi_w", -+ "xvseqi_w", "xvslei_w", "xvslei_wu", "xvslti_w", "xvslti_wu", -+ "xvrepl128vei_w", "xvshuf4i_w", "xvpickve_w"] in -+ def int_loongarch_lasx_#inst : VecInt<[llvm_v8i32_ty], -+ [llvm_v8i32_ty, llvm_i32_ty], -+ [IntrNoMem, ImmArg>]>; -+foreach inst = ["xvaddi_du", "xvsubi_du", -+ "xvmaxi_d", "xvmaxi_du", "xvmini_d", "xvmini_du", -+ "xvsat_d", "xvsat_du", -+ "xvslli_d", "xvsrli_d", "xvsrai_d", "xvrotri_d", -+ "xvsrlri_d", "xvsrari_d", -+ "xvbitclri_d", "xvbitseti_d", "xvbitrevi_d", -+ "xvseqi_d", "xvslei_d", "xvslei_du", "xvslti_d", "xvslti_du", -+ "xvrepl128vei_d", "xvpermi_d", "xvpickve_d"] in -+ def int_loongarch_lasx_#inst : VecInt<[llvm_v4i64_ty], -+ [llvm_v4i64_ty, llvm_i32_ty], -+ [IntrNoMem, ImmArg>]>; -+ -+foreach inst = ["xvhaddw_h_b", "xvhaddw_hu_bu", "xvhsubw_h_b", "xvhsubw_hu_bu", -+ "xvaddwev_h_b", "xvaddwod_h_b", "xvsubwev_h_b", "xvsubwod_h_b", -+ "xvaddwev_h_bu", "xvaddwod_h_bu", "xvsubwev_h_bu", "xvsubwod_h_bu", -+ "xvaddwev_h_bu_b", "xvaddwod_h_bu_b", -+ "xvmulwev_h_b", "xvmulwod_h_b", "xvmulwev_h_bu", "xvmulwod_h_bu", -+ "xvmulwev_h_bu_b", "xvmulwod_h_bu_b"] in -+ def int_loongarch_lasx_#inst : VecInt<[llvm_v16i16_ty], -+ [llvm_v32i8_ty, llvm_v32i8_ty], -+ [IntrNoMem]>; -+ -+foreach inst = ["xvhaddw_w_h", "xvhaddw_wu_hu", "xvhsubw_w_h", "xvhsubw_wu_hu", -+ "xvaddwev_w_h", "xvaddwod_w_h", "xvsubwev_w_h", "xvsubwod_w_h", -+ "xvaddwev_w_hu", "xvaddwod_w_hu", "xvsubwev_w_hu", "xvsubwod_w_hu", -+ "xvaddwev_w_hu_h", "xvaddwod_w_hu_h", -+ "xvmulwev_w_h", "xvmulwod_w_h", "xvmulwev_w_hu", "xvmulwod_w_hu", -+ "xvmulwev_w_hu_h", "xvmulwod_w_hu_h"] in -+ def int_loongarch_lasx_#inst : VecInt<[llvm_v8i32_ty], -+ [llvm_v16i16_ty, llvm_v16i16_ty], -+ [IntrNoMem]>; -+ -+foreach inst = ["xvhaddw_d_w", "xvhaddw_du_wu", "xvhsubw_d_w", "xvhsubw_du_wu", -+ "xvaddwev_d_w", "xvaddwod_d_w", "xvsubwev_d_w", "xvsubwod_d_w", -+ "xvaddwev_d_wu", "xvaddwod_d_wu", "xvsubwev_d_wu", "xvsubwod_d_wu", -+ "xvaddwev_d_wu_w", "xvaddwod_d_wu_w", -+ "xvmulwev_d_w", "xvmulwod_d_w", "xvmulwev_d_wu", "xvmulwod_d_wu", -+ "xvmulwev_d_wu_w", "xvmulwod_d_wu_w"] in -+ def int_loongarch_lasx_#inst : VecInt<[llvm_v4i64_ty], -+ [llvm_v8i32_ty, llvm_v8i32_ty], -+ [IntrNoMem]>; -+ -+foreach inst = ["xvsrln_b_h", "xvsran_b_h", "xvsrlrn_b_h", "xvsrarn_b_h", -+ "xvssrln_b_h", "xvssran_b_h", "xvssrln_bu_h", "xvssran_bu_h", -+ "xvssrlrn_b_h", "xvssrarn_b_h", "xvssrlrn_bu_h", "xvssrarn_bu_h"] in -+ def int_loongarch_lasx_#inst : VecInt<[llvm_v32i8_ty], -+ [llvm_v16i16_ty, llvm_v16i16_ty], -+ [IntrNoMem]>; -+ -+foreach inst = ["xvsrln_h_w", "xvsran_h_w", "xvsrlrn_h_w", "xvsrarn_h_w", -+ "xvssrln_h_w", "xvssran_h_w", "xvssrln_hu_w", "xvssran_hu_w", -+ "xvssrlrn_h_w", "xvssrarn_h_w", "xvssrlrn_hu_w", "xvssrarn_hu_w"] in -+ def int_loongarch_lasx_#inst : VecInt<[llvm_v16i16_ty], -+ [llvm_v8i32_ty, llvm_v8i32_ty], -+ [IntrNoMem]>; -+ -+foreach inst = ["xvsrln_w_d", "xvsran_w_d", "xvsrlrn_w_d", "xvsrarn_w_d", -+ "xvssrln_w_d", "xvssran_w_d", "xvssrln_wu_d", "xvssran_wu_d", -+ "xvssrlrn_w_d", "xvssrarn_w_d", "xvssrlrn_wu_d", "xvssrarn_wu_d"] in -+ def int_loongarch_lasx_#inst : VecInt<[llvm_v8i32_ty], -+ [llvm_v4i64_ty, llvm_v4i64_ty], -+ [IntrNoMem]>; -+ -+foreach inst = ["xvmadd_b", "xvmsub_b", "xvfrstp_b", "xvbitsel_v", "xvshuf_b"] in -+ def int_loongarch_lasx_#inst -+ : VecInt<[llvm_v32i8_ty], -+ [llvm_v32i8_ty, llvm_v32i8_ty, llvm_v32i8_ty], -+ [IntrNoMem]>; -+foreach inst = ["xvmadd_h", "xvmsub_h", "xvfrstp_h", "xvshuf_h"] in -+ def int_loongarch_lasx_#inst -+ : VecInt<[llvm_v16i16_ty], -+ [llvm_v16i16_ty, llvm_v16i16_ty, llvm_v16i16_ty], -+ [IntrNoMem]>; -+foreach inst = ["xvmadd_w", "xvmsub_w", "xvshuf_w"] in -+ def int_loongarch_lasx_#inst -+ : VecInt<[llvm_v8i32_ty], -+ [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty], -+ [IntrNoMem]>; -+foreach inst = ["xvmadd_d", "xvmsub_d", "xvshuf_d"] in -+ def int_loongarch_lasx_#inst -+ : VecInt<[llvm_v4i64_ty], -+ [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty], -+ [IntrNoMem]>; -+ -+foreach inst = ["xvsrlni_b_h", "xvsrani_b_h", "xvsrlrni_b_h", "xvsrarni_b_h", -+ "xvssrlni_b_h", "xvssrani_b_h", "xvssrlni_bu_h", "xvssrani_bu_h", -+ "xvssrlrni_b_h", "xvssrarni_b_h", "xvssrlrni_bu_h", "xvssrarni_bu_h", -+ "xvfrstpi_b", "xvbitseli_b", "xvextrins_b", "xvpermi_q"] in -+ def int_loongarch_lasx_#inst -+ : VecInt<[llvm_v32i8_ty], -+ [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], -+ [IntrNoMem, ImmArg>]>; -+foreach inst = ["xvsrlni_h_w", "xvsrani_h_w", "xvsrlrni_h_w", "xvsrarni_h_w", -+ "xvssrlni_h_w", "xvssrani_h_w", "xvssrlni_hu_w", "xvssrani_hu_w", -+ "xvssrlrni_h_w", "xvssrarni_h_w", "xvssrlrni_hu_w", "xvssrarni_hu_w", -+ "xvfrstpi_h", "xvextrins_h"] in -+ def int_loongarch_lasx_#inst -+ : VecInt<[llvm_v16i16_ty], -+ [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i32_ty], -+ [IntrNoMem, ImmArg>]>; -+foreach inst = ["xvsrlni_w_d", "xvsrani_w_d", "xvsrlrni_w_d", "xvsrarni_w_d", -+ "xvssrlni_w_d", "xvssrani_w_d", "xvssrlni_wu_d", "xvssrani_wu_d", -+ "xvssrlrni_w_d", "xvssrarni_w_d", "xvssrlrni_wu_d", "xvssrarni_wu_d", -+ "xvpermi_w", "xvextrins_w", "xvinsve0_w"] in -+ def int_loongarch_lasx_#inst -+ : VecInt<[llvm_v8i32_ty], -+ [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty], -+ [IntrNoMem, ImmArg>]>; -+foreach inst = ["xvsrlni_d_q", "xvsrani_d_q", "xvsrlrni_d_q", "xvsrarni_d_q", -+ "xvssrlni_d_q", "xvssrani_d_q", "xvssrlni_du_q", "xvssrani_du_q", -+ "xvssrlrni_d_q", "xvssrarni_d_q", "xvssrlrni_du_q", "xvssrarni_du_q", -+ "xvshuf4i_d", "xvextrins_d", "xvinsve0_d"] in -+ def int_loongarch_lasx_#inst -+ : VecInt<[llvm_v4i64_ty], -+ [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty], -+ [IntrNoMem, ImmArg>]>; -+ -+foreach inst = ["xvmaddwev_h_b", "xvmaddwod_h_b", "xvmaddwev_h_bu", -+ "xvmaddwod_h_bu", "xvmaddwev_h_bu_b", "xvmaddwod_h_bu_b"] in -+ def int_loongarch_lasx_#inst -+ : VecInt<[llvm_v16i16_ty], -+ [llvm_v16i16_ty, llvm_v32i8_ty, llvm_v32i8_ty], -+ [IntrNoMem]>; -+foreach inst = ["xvmaddwev_w_h", "xvmaddwod_w_h", "xvmaddwev_w_hu", -+ "xvmaddwod_w_hu", "xvmaddwev_w_hu_h", "xvmaddwod_w_hu_h"] in -+ def int_loongarch_lasx_#inst -+ : VecInt<[llvm_v8i32_ty], -+ [llvm_v8i32_ty, llvm_v16i16_ty, llvm_v16i16_ty], -+ [IntrNoMem]>; -+foreach inst = ["xvmaddwev_d_w", "xvmaddwod_d_w", "xvmaddwev_d_wu", -+ "xvmaddwod_d_wu", "xvmaddwev_d_wu_w", "xvmaddwod_d_wu_w"] in -+ def int_loongarch_lasx_#inst -+ : VecInt<[llvm_v4i64_ty], -+ [llvm_v4i64_ty, llvm_v8i32_ty, llvm_v8i32_ty], -+ [IntrNoMem]>; -+foreach inst = ["xvmaddwev_q_d", "xvmaddwod_q_d", "xvmaddwev_q_du", -+ "xvmaddwod_q_du", "xvmaddwev_q_du_d", "xvmaddwod_q_du_d"] in -+ def int_loongarch_lasx_#inst -+ : VecInt<[llvm_v4i64_ty], -+ [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty], -+ [IntrNoMem]>; -+ -+foreach inst = ["xvsllwil_h_b", "xvsllwil_hu_bu"] in -+ def int_loongarch_lasx_#inst : VecInt<[llvm_v16i16_ty], -+ [llvm_v32i8_ty, llvm_i32_ty], -+ [IntrNoMem, ImmArg>]>; -+foreach inst = ["xvsllwil_w_h", "xvsllwil_wu_hu"] in -+ def int_loongarch_lasx_#inst : VecInt<[llvm_v8i32_ty], -+ [llvm_v16i16_ty, llvm_i32_ty], -+ [IntrNoMem, ImmArg>]>; -+foreach inst = ["xvsllwil_d_w", "xvsllwil_du_wu"] in -+ def int_loongarch_lasx_#inst : VecInt<[llvm_v4i64_ty], -+ [llvm_v8i32_ty, llvm_i32_ty], -+ [IntrNoMem, ImmArg>]>; -+ -+foreach inst = ["xvneg_b", "xvmskltz_b", "xvmskgez_b", "xvmsknz_b", -+ "xvclo_b", "xvclz_b", "xvpcnt_b", -+ "xvreplve0_b", "xvreplve0_q"] in -+ def int_loongarch_lasx_#inst : VecInt<[llvm_v32i8_ty], [llvm_v32i8_ty], -+ [IntrNoMem]>; -+foreach inst = ["xvneg_h", "xvmskltz_h", "xvclo_h", "xvclz_h", "xvpcnt_h", -+ "xvreplve0_h"] in -+ def int_loongarch_lasx_#inst : VecInt<[llvm_v16i16_ty], [llvm_v16i16_ty], -+ [IntrNoMem]>; -+foreach inst = ["xvneg_w", "xvmskltz_w", "xvclo_w", "xvclz_w", "xvpcnt_w", -+ "xvreplve0_w"] in -+ def int_loongarch_lasx_#inst : VecInt<[llvm_v8i32_ty], [llvm_v8i32_ty], -+ [IntrNoMem]>; -+foreach inst = ["xvneg_d", "xvexth_q_d", "xvexth_qu_du", "xvmskltz_d", -+ "xvextl_q_d", "xvextl_qu_du", "xvclo_d", "xvclz_d", "xvpcnt_d", -+ "xvreplve0_d"] in -+ def int_loongarch_lasx_#inst : VecInt<[llvm_v4i64_ty], [llvm_v4i64_ty], -+ [IntrNoMem]>; -+ -+foreach inst = ["xvexth_h_b", "xvexth_hu_bu", "vext2xv_h_b", "vext2xv_hu_bu"] in -+ def int_loongarch_lasx_#inst : VecInt<[llvm_v16i16_ty], [llvm_v32i8_ty], -+ [IntrNoMem]>; -+foreach inst = ["xvexth_w_h", "xvexth_wu_hu", "vext2xv_w_h", "vext2xv_wu_hu"] in -+ def int_loongarch_lasx_#inst : VecInt<[llvm_v8i32_ty], [llvm_v16i16_ty], -+ [IntrNoMem]>; -+foreach inst = ["xvexth_d_w", "xvexth_du_wu", "vext2xv_d_w", "vext2xv_du_wu"] in -+ def int_loongarch_lasx_#inst : VecInt<[llvm_v4i64_ty], [llvm_v8i32_ty], -+ [IntrNoMem]>; -+ -+foreach inst = ["vext2xv_w_b", "vext2xv_wu_bu"] in -+ def int_loongarch_lasx_#inst : VecInt<[llvm_v8i32_ty], [llvm_v32i8_ty], -+ [IntrNoMem]>; -+foreach inst = ["vext2xv_d_h", "vext2xv_du_hu"] in -+ def int_loongarch_lasx_#inst : VecInt<[llvm_v4i64_ty], [llvm_v16i16_ty], -+ [IntrNoMem]>; -+ -+foreach inst = ["vext2xv_d_b", "vext2xv_du_bu"] in -+ def int_loongarch_lasx_#inst : VecInt<[llvm_v4i64_ty], [llvm_v32i8_ty], -+ [IntrNoMem]>; -+ -+def int_loongarch_lasx_xvldi : VecInt<[llvm_v4i64_ty], [llvm_i32_ty], -+ [IntrNoMem, ImmArg>]>; -+def int_loongarch_lasx_xvrepli_b : VecInt<[llvm_v32i8_ty], [llvm_i32_ty], -+ [IntrNoMem, ImmArg>]>; -+def int_loongarch_lasx_xvrepli_h : VecInt<[llvm_v16i16_ty], [llvm_i32_ty], -+ [IntrNoMem, ImmArg>]>; -+def int_loongarch_lasx_xvrepli_w : VecInt<[llvm_v8i32_ty], [llvm_i32_ty], -+ [IntrNoMem, ImmArg>]>; -+def int_loongarch_lasx_xvrepli_d : VecInt<[llvm_v4i64_ty], [llvm_i32_ty], -+ [IntrNoMem, ImmArg>]>; -+ -+def int_loongarch_lasx_xvreplgr2vr_b : VecInt<[llvm_v32i8_ty], [llvm_i32_ty], -+ [IntrNoMem]>; -+def int_loongarch_lasx_xvreplgr2vr_h : VecInt<[llvm_v16i16_ty], [llvm_i32_ty], -+ [IntrNoMem]>; -+def int_loongarch_lasx_xvreplgr2vr_w : VecInt<[llvm_v8i32_ty], [llvm_i32_ty], -+ [IntrNoMem]>; -+def int_loongarch_lasx_xvreplgr2vr_d : VecInt<[llvm_v4i64_ty], [llvm_i64_ty], -+ [IntrNoMem]>; -+ -+def int_loongarch_lasx_xvinsgr2vr_w -+ : VecInt<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty, llvm_i32_ty], -+ [IntrNoMem, ImmArg>]>; -+def int_loongarch_lasx_xvinsgr2vr_d -+ : VecInt<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i64_ty, llvm_i32_ty], -+ [IntrNoMem, ImmArg>]>; -+ -+def int_loongarch_lasx_xvreplve_b -+ : VecInt<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; -+def int_loongarch_lasx_xvreplve_h -+ : VecInt<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; -+def int_loongarch_lasx_xvreplve_w -+ : VecInt<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; -+def int_loongarch_lasx_xvreplve_d -+ : VecInt<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; -+ -+foreach inst = ["xvpickve2gr_w", "xvpickve2gr_wu" ] in -+ def int_loongarch_lasx_#inst : VecInt<[llvm_i32_ty], -+ [llvm_v8i32_ty, llvm_i32_ty], -+ [IntrNoMem, ImmArg>]>; -+foreach inst = ["xvpickve2gr_d", "xvpickve2gr_du" ] in -+ def int_loongarch_lasx_#inst : VecInt<[llvm_i64_ty], -+ [llvm_v4i64_ty, llvm_i32_ty], -+ [IntrNoMem, ImmArg>]>; -+ -+def int_loongarch_lasx_xbz_b : VecInt<[llvm_i32_ty], [llvm_v32i8_ty], -+ [IntrNoMem]>; -+def int_loongarch_lasx_xbz_h : VecInt<[llvm_i32_ty], [llvm_v16i16_ty], -+ [IntrNoMem]>; -+def int_loongarch_lasx_xbz_w : VecInt<[llvm_i32_ty], [llvm_v8i32_ty], -+ [IntrNoMem]>; -+def int_loongarch_lasx_xbz_d : VecInt<[llvm_i32_ty], [llvm_v4i64_ty], -+ [IntrNoMem]>; -+def int_loongarch_lasx_xbz_v : VecInt<[llvm_i32_ty], [llvm_v32i8_ty], -+ [IntrNoMem]>; -+ -+def int_loongarch_lasx_xbnz_v : VecInt<[llvm_i32_ty], [llvm_v32i8_ty], -+ [IntrNoMem]>; -+def int_loongarch_lasx_xbnz_b : VecInt<[llvm_i32_ty], [llvm_v32i8_ty], -+ [IntrNoMem]>; -+def int_loongarch_lasx_xbnz_h : VecInt<[llvm_i32_ty], [llvm_v16i16_ty], -+ [IntrNoMem]>; -+def int_loongarch_lasx_xbnz_w : VecInt<[llvm_i32_ty], [llvm_v8i32_ty], -+ [IntrNoMem]>; -+def int_loongarch_lasx_xbnz_d : VecInt<[llvm_i32_ty], [llvm_v4i64_ty], -+ [IntrNoMem]>; -+ -+// LASX Float -+ -+foreach inst = ["xvfadd_s", "xvfsub_s", "xvfmul_s", "xvfdiv_s", -+ "xvfmax_s", "xvfmin_s", "xvfmaxa_s", "xvfmina_s"] in -+ def int_loongarch_lasx_#inst : VecInt<[llvm_v8f32_ty], -+ [llvm_v8f32_ty, llvm_v8f32_ty], -+ [IntrNoMem]>; -+foreach inst = ["xvfadd_d", "xvfsub_d", "xvfmul_d", "xvfdiv_d", -+ "xvfmax_d", "xvfmin_d", "xvfmaxa_d", "xvfmina_d"] in -+ def int_loongarch_lasx_#inst : VecInt<[llvm_v4f64_ty], -+ [llvm_v4f64_ty, llvm_v4f64_ty], -+ [IntrNoMem]>; -+ -+foreach inst = ["xvfmadd_s", "xvfmsub_s", "xvfnmadd_s", "xvfnmsub_s"] in -+ def int_loongarch_lasx_#inst -+ : VecInt<[llvm_v8f32_ty], -+ [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty], -+ [IntrNoMem]>; -+foreach inst = ["xvfmadd_d", "xvfmsub_d", "xvfnmadd_d", "xvfnmsub_d"] in -+ def int_loongarch_lasx_#inst -+ : VecInt<[llvm_v4f64_ty], -+ [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty], -+ [IntrNoMem]>; -+ -+foreach inst = ["xvflogb_s", "xvfsqrt_s", "xvfrecip_s", "xvfrsqrt_s", "xvfrint_s", -+ "xvfrintrne_s", "xvfrintrz_s", "xvfrintrp_s", "xvfrintrm_s"] in -+ def int_loongarch_lasx_#inst : VecInt<[llvm_v8f32_ty], [llvm_v8f32_ty], -+ [IntrNoMem]>; -+foreach inst = ["xvflogb_d", "xvfsqrt_d", "xvfrecip_d", "xvfrsqrt_d", "xvfrint_d", -+ "xvfrintrne_d", "xvfrintrz_d", "xvfrintrp_d", "xvfrintrm_d"] in -+ def int_loongarch_lasx_#inst : VecInt<[llvm_v4f64_ty], [llvm_v4f64_ty], -+ [IntrNoMem]>; -+ -+foreach inst = ["xvfcvtl_s_h", "xvfcvth_s_h"] in -+ def int_loongarch_lasx_#inst : VecInt<[llvm_v8f32_ty], [llvm_v16i16_ty], -+ [IntrNoMem]>; -+foreach inst = ["xvfcvtl_d_s", "xvfcvth_d_s"] in -+ def int_loongarch_lasx_#inst : VecInt<[llvm_v4f64_ty], [llvm_v8f32_ty], -+ [IntrNoMem]>; -+ -+foreach inst = ["xvftintrne_w_s", "xvftintrz_w_s", "xvftintrp_w_s", "xvftintrm_w_s", -+ "xvftint_w_s", "xvftintrz_wu_s", "xvftint_wu_s", "xvfclass_s"] in -+ def int_loongarch_lasx_#inst : VecInt<[llvm_v8i32_ty], [llvm_v8f32_ty], -+ [IntrNoMem]>; -+foreach inst = ["xvftintrne_l_d", "xvftintrz_l_d", "xvftintrp_l_d", "xvftintrm_l_d", -+ "xvftint_l_d", "xvftintrz_lu_d", "xvftint_lu_d", "xvfclass_d"] in -+ def int_loongarch_lasx_#inst : VecInt<[llvm_v4i64_ty], [llvm_v4f64_ty], -+ [IntrNoMem]>; -+ -+foreach inst = ["xvftintrnel_l_s", "xvftintrneh_l_s", "xvftintrzl_l_s", -+ "xvftintrzh_l_s", "xvftintrpl_l_s", "xvftintrph_l_s", -+ "xvftintrml_l_s", "xvftintrmh_l_s", "xvftintl_l_s", -+ "xvftinth_l_s"] in -+ def int_loongarch_lasx_#inst : VecInt<[llvm_v4i64_ty], [llvm_v8f32_ty], -+ [IntrNoMem]>; -+ -+foreach inst = ["xvffint_s_w", "xvffint_s_wu"] in -+ def int_loongarch_lasx_#inst : VecInt<[llvm_v8f32_ty], [llvm_v8i32_ty], -+ [IntrNoMem]>; -+foreach inst = ["xvffint_d_l", "xvffint_d_lu"] in -+ def int_loongarch_lasx_#inst : VecInt<[llvm_v4f64_ty], [llvm_v4i64_ty], -+ [IntrNoMem]>; -+ -+foreach inst = ["xvffintl_d_w", "xvffinth_d_w"] in -+ def int_loongarch_lasx_#inst : VecInt<[llvm_v4f64_ty], [llvm_v8i32_ty], -+ [IntrNoMem]>; -+ -+foreach inst = ["xvffint_s_l"] in -+ def int_loongarch_lasx_#inst : VecInt<[llvm_v8f32_ty], -+ [llvm_v4i64_ty, llvm_v4i64_ty], -+ [IntrNoMem]>; -+foreach inst = ["xvftintrne_w_d", "xvftintrz_w_d", "xvftintrp_w_d", "xvftintrm_w_d", -+ "xvftint_w_d"] in -+ def int_loongarch_lasx_#inst : VecInt<[llvm_v8i32_ty], -+ [llvm_v4f64_ty, llvm_v4f64_ty], -+ [IntrNoMem]>; -+ -+foreach inst = ["xvfcvt_h_s"] in -+ def int_loongarch_lasx_#inst : VecInt<[llvm_v16i16_ty], -+ [llvm_v8f32_ty, llvm_v8f32_ty], -+ [IntrNoMem]>; -+foreach inst = ["xvfcvt_s_d"] in -+ def int_loongarch_lasx_#inst : VecInt<[llvm_v8f32_ty], -+ [llvm_v4f64_ty, llvm_v4f64_ty], -+ [IntrNoMem]>; -+ -+foreach inst = ["xvfcmp_caf_s", "xvfcmp_cun_s", "xvfcmp_ceq_s", "xvfcmp_cueq_s", -+ "xvfcmp_clt_s", "xvfcmp_cult_s", "xvfcmp_cle_s", "xvfcmp_cule_s", -+ "xvfcmp_cne_s", "xvfcmp_cor_s", "xvfcmp_cune_s", -+ "xvfcmp_saf_s", "xvfcmp_sun_s", "xvfcmp_seq_s", "xvfcmp_sueq_s", -+ "xvfcmp_slt_s", "xvfcmp_sult_s", "xvfcmp_sle_s", "xvfcmp_sule_s", -+ "xvfcmp_sne_s", "xvfcmp_sor_s", "xvfcmp_sune_s"] in -+ def int_loongarch_lasx_#inst : VecInt<[llvm_v8i32_ty], -+ [llvm_v8f32_ty, llvm_v8f32_ty], -+ [IntrNoMem]>; -+foreach inst = ["xvfcmp_caf_d", "xvfcmp_cun_d", "xvfcmp_ceq_d", "xvfcmp_cueq_d", -+ "xvfcmp_clt_d", "xvfcmp_cult_d", "xvfcmp_cle_d", "xvfcmp_cule_d", -+ "xvfcmp_cne_d", "xvfcmp_cor_d", "xvfcmp_cune_d", -+ "xvfcmp_saf_d", "xvfcmp_sun_d", "xvfcmp_seq_d", "xvfcmp_sueq_d", -+ "xvfcmp_slt_d", "xvfcmp_sult_d", "xvfcmp_sle_d", "xvfcmp_sule_d", -+ "xvfcmp_sne_d", "xvfcmp_sor_d", "xvfcmp_sune_d"] in -+ def int_loongarch_lasx_#inst : VecInt<[llvm_v4i64_ty], -+ [llvm_v4f64_ty, llvm_v4f64_ty], -+ [IntrNoMem]>; -+ -+def int_loongarch_lasx_xvpickve_w_f -+ : VecInt<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_i32_ty], -+ [IntrNoMem, ImmArg>]>; -+def int_loongarch_lasx_xvpickve_d_f -+ : VecInt<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_i32_ty], -+ [IntrNoMem, ImmArg>]>; -+ -+// LASX load/store -+def int_loongarch_lasx_xvld -+ : VecInt<[llvm_v32i8_ty], [llvm_ptr_ty, llvm_i32_ty], -+ [IntrReadMem, IntrArgMemOnly, ImmArg>]>; -+def int_loongarch_lasx_xvldx -+ : VecInt<[llvm_v32i8_ty], [llvm_ptr_ty, llvm_i64_ty], -+ [IntrReadMem, IntrArgMemOnly]>; -+def int_loongarch_lasx_xvldrepl_b -+ : VecInt<[llvm_v32i8_ty], [llvm_ptr_ty, llvm_i32_ty], -+ [IntrReadMem, IntrArgMemOnly, ImmArg>]>; -+def int_loongarch_lasx_xvldrepl_h -+ : VecInt<[llvm_v16i16_ty], [llvm_ptr_ty, llvm_i32_ty], -+ [IntrReadMem, IntrArgMemOnly, ImmArg>]>; -+def int_loongarch_lasx_xvldrepl_w -+ : VecInt<[llvm_v8i32_ty], [llvm_ptr_ty, llvm_i32_ty], -+ [IntrReadMem, IntrArgMemOnly, ImmArg>]>; -+def int_loongarch_lasx_xvldrepl_d -+ : VecInt<[llvm_v4i64_ty], [llvm_ptr_ty, llvm_i32_ty], -+ [IntrReadMem, IntrArgMemOnly, ImmArg>]>; -+ -+def int_loongarch_lasx_xvst -+ : VecInt<[], [llvm_v32i8_ty, llvm_ptr_ty, llvm_i32_ty], -+ [IntrWriteMem, IntrArgMemOnly, ImmArg>]>; -+def int_loongarch_lasx_xvstx -+ : VecInt<[], [llvm_v32i8_ty, llvm_ptr_ty, llvm_i64_ty], -+ [IntrWriteMem, IntrArgMemOnly]>; -+def int_loongarch_lasx_xvstelm_b -+ : VecInt<[], [llvm_v32i8_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], -+ [IntrWriteMem, IntrArgMemOnly, ImmArg>, ImmArg>]>; -+def int_loongarch_lasx_xvstelm_h -+ : VecInt<[], [llvm_v16i16_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], -+ [IntrWriteMem, IntrArgMemOnly, ImmArg>, ImmArg>]>; -+def int_loongarch_lasx_xvstelm_w -+ : VecInt<[], [llvm_v8i32_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], -+ [IntrWriteMem, IntrArgMemOnly, ImmArg>, ImmArg>]>; -+def int_loongarch_lasx_xvstelm_d -+ : VecInt<[], [llvm_v4i64_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], -+ [IntrWriteMem, IntrArgMemOnly, ImmArg>, ImmArg>]>; -+} // TargetPrefix = "loongarch" -diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -index c05133647929..3a40cd06a3eb 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -@@ -64,11 +64,17 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, - - static const MVT::SimpleValueType LSXVTs[] = { - MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64}; -+ static const MVT::SimpleValueType LASXVTs[] = { -+ MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, MVT::v8f32, MVT::v4f64}; - - if (Subtarget.hasExtLSX()) - for (MVT VT : LSXVTs) - addRegisterClass(VT, &LoongArch::LSX128RegClass); - -+ if (Subtarget.hasExtLASX()) -+ for (MVT VT : LASXVTs) -+ addRegisterClass(VT, &LoongArch::LASX256RegClass); -+ - setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, GRLenVT, - MVT::i1, Promote); - -@@ -207,6 +213,11 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, - setOperationAction({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, - {MVT::v2i64, MVT::v4i32, MVT::v8i16, MVT::v16i8}, Legal); - -+ if (Subtarget.hasExtLASX()) -+ setOperationAction({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, -+ {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}, -+ Legal); -+ - // Compute derived properties from the register classes. - computeRegisterProperties(Subtarget.getRegisterInfo()); - -@@ -695,9 +706,17 @@ LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, - case Intrinsic::loongarch_lsx_vpickve2gr_d: - case Intrinsic::loongarch_lsx_vpickve2gr_du: - case Intrinsic::loongarch_lsx_vreplvei_d: -+ case Intrinsic::loongarch_lasx_xvrepl128vei_d: - return checkIntrinsicImmArg<1>(Op, 2, DAG); - case Intrinsic::loongarch_lsx_vreplvei_w: -+ case Intrinsic::loongarch_lasx_xvrepl128vei_w: -+ case Intrinsic::loongarch_lasx_xvpickve2gr_d: -+ case Intrinsic::loongarch_lasx_xvpickve2gr_du: -+ case Intrinsic::loongarch_lasx_xvpickve_d: -+ case Intrinsic::loongarch_lasx_xvpickve_d_f: - return checkIntrinsicImmArg<2>(Op, 2, DAG); -+ case Intrinsic::loongarch_lasx_xvinsve0_d: -+ return checkIntrinsicImmArg<2>(Op, 3, DAG); - case Intrinsic::loongarch_lsx_vsat_b: - case Intrinsic::loongarch_lsx_vsat_bu: - case Intrinsic::loongarch_lsx_vrotri_b: -@@ -706,7 +725,19 @@ LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, - case Intrinsic::loongarch_lsx_vsrlri_b: - case Intrinsic::loongarch_lsx_vsrari_b: - case Intrinsic::loongarch_lsx_vreplvei_h: -+ case Intrinsic::loongarch_lasx_xvsat_b: -+ case Intrinsic::loongarch_lasx_xvsat_bu: -+ case Intrinsic::loongarch_lasx_xvrotri_b: -+ case Intrinsic::loongarch_lasx_xvsllwil_h_b: -+ case Intrinsic::loongarch_lasx_xvsllwil_hu_bu: -+ case Intrinsic::loongarch_lasx_xvsrlri_b: -+ case Intrinsic::loongarch_lasx_xvsrari_b: -+ case Intrinsic::loongarch_lasx_xvrepl128vei_h: -+ case Intrinsic::loongarch_lasx_xvpickve_w: -+ case Intrinsic::loongarch_lasx_xvpickve_w_f: - return checkIntrinsicImmArg<3>(Op, 2, DAG); -+ case Intrinsic::loongarch_lasx_xvinsve0_w: -+ return checkIntrinsicImmArg<3>(Op, 3, DAG); - case Intrinsic::loongarch_lsx_vsat_h: - case Intrinsic::loongarch_lsx_vsat_hu: - case Intrinsic::loongarch_lsx_vrotri_h: -@@ -715,6 +746,14 @@ LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, - case Intrinsic::loongarch_lsx_vsrlri_h: - case Intrinsic::loongarch_lsx_vsrari_h: - case Intrinsic::loongarch_lsx_vreplvei_b: -+ case Intrinsic::loongarch_lasx_xvsat_h: -+ case Intrinsic::loongarch_lasx_xvsat_hu: -+ case Intrinsic::loongarch_lasx_xvrotri_h: -+ case Intrinsic::loongarch_lasx_xvsllwil_w_h: -+ case Intrinsic::loongarch_lasx_xvsllwil_wu_hu: -+ case Intrinsic::loongarch_lasx_xvsrlri_h: -+ case Intrinsic::loongarch_lasx_xvsrari_h: -+ case Intrinsic::loongarch_lasx_xvrepl128vei_b: - return checkIntrinsicImmArg<4>(Op, 2, DAG); - case Intrinsic::loongarch_lsx_vsrlni_b_h: - case Intrinsic::loongarch_lsx_vsrani_b_h: -@@ -728,6 +767,18 @@ LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, - case Intrinsic::loongarch_lsx_vssrarni_b_h: - case Intrinsic::loongarch_lsx_vssrlrni_bu_h: - case Intrinsic::loongarch_lsx_vssrarni_bu_h: -+ case Intrinsic::loongarch_lasx_xvsrlni_b_h: -+ case Intrinsic::loongarch_lasx_xvsrani_b_h: -+ case Intrinsic::loongarch_lasx_xvsrlrni_b_h: -+ case Intrinsic::loongarch_lasx_xvsrarni_b_h: -+ case Intrinsic::loongarch_lasx_xvssrlni_b_h: -+ case Intrinsic::loongarch_lasx_xvssrani_b_h: -+ case Intrinsic::loongarch_lasx_xvssrlni_bu_h: -+ case Intrinsic::loongarch_lasx_xvssrani_bu_h: -+ case Intrinsic::loongarch_lasx_xvssrlrni_b_h: -+ case Intrinsic::loongarch_lasx_xvssrarni_b_h: -+ case Intrinsic::loongarch_lasx_xvssrlrni_bu_h: -+ case Intrinsic::loongarch_lasx_xvssrarni_bu_h: - return checkIntrinsicImmArg<4>(Op, 3, DAG); - case Intrinsic::loongarch_lsx_vsat_w: - case Intrinsic::loongarch_lsx_vsat_wu: -@@ -746,6 +797,23 @@ LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, - case Intrinsic::loongarch_lsx_vslti_du: - case Intrinsic::loongarch_lsx_vbsll_v: - case Intrinsic::loongarch_lsx_vbsrl_v: -+ case Intrinsic::loongarch_lasx_xvsat_w: -+ case Intrinsic::loongarch_lasx_xvsat_wu: -+ case Intrinsic::loongarch_lasx_xvrotri_w: -+ case Intrinsic::loongarch_lasx_xvsllwil_d_w: -+ case Intrinsic::loongarch_lasx_xvsllwil_du_wu: -+ case Intrinsic::loongarch_lasx_xvsrlri_w: -+ case Intrinsic::loongarch_lasx_xvsrari_w: -+ case Intrinsic::loongarch_lasx_xvslei_bu: -+ case Intrinsic::loongarch_lasx_xvslei_hu: -+ case Intrinsic::loongarch_lasx_xvslei_wu: -+ case Intrinsic::loongarch_lasx_xvslei_du: -+ case Intrinsic::loongarch_lasx_xvslti_bu: -+ case Intrinsic::loongarch_lasx_xvslti_hu: -+ case Intrinsic::loongarch_lasx_xvslti_wu: -+ case Intrinsic::loongarch_lasx_xvslti_du: -+ case Intrinsic::loongarch_lasx_xvbsll_v: -+ case Intrinsic::loongarch_lasx_xvbsrl_v: - return checkIntrinsicImmArg<5>(Op, 2, DAG); - case Intrinsic::loongarch_lsx_vseqi_b: - case Intrinsic::loongarch_lsx_vseqi_h: -@@ -759,6 +827,18 @@ LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, - case Intrinsic::loongarch_lsx_vslti_h: - case Intrinsic::loongarch_lsx_vslti_w: - case Intrinsic::loongarch_lsx_vslti_d: -+ case Intrinsic::loongarch_lasx_xvseqi_b: -+ case Intrinsic::loongarch_lasx_xvseqi_h: -+ case Intrinsic::loongarch_lasx_xvseqi_w: -+ case Intrinsic::loongarch_lasx_xvseqi_d: -+ case Intrinsic::loongarch_lasx_xvslei_b: -+ case Intrinsic::loongarch_lasx_xvslei_h: -+ case Intrinsic::loongarch_lasx_xvslei_w: -+ case Intrinsic::loongarch_lasx_xvslei_d: -+ case Intrinsic::loongarch_lasx_xvslti_b: -+ case Intrinsic::loongarch_lasx_xvslti_h: -+ case Intrinsic::loongarch_lasx_xvslti_w: -+ case Intrinsic::loongarch_lasx_xvslti_d: - return checkIntrinsicImmArg<5>(Op, 2, DAG, /*IsSigned=*/true); - case Intrinsic::loongarch_lsx_vsrlni_h_w: - case Intrinsic::loongarch_lsx_vsrani_h_w: -@@ -774,12 +854,31 @@ LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, - case Intrinsic::loongarch_lsx_vssrarni_hu_w: - case Intrinsic::loongarch_lsx_vfrstpi_b: - case Intrinsic::loongarch_lsx_vfrstpi_h: -+ case Intrinsic::loongarch_lasx_xvsrlni_h_w: -+ case Intrinsic::loongarch_lasx_xvsrani_h_w: -+ case Intrinsic::loongarch_lasx_xvsrlrni_h_w: -+ case Intrinsic::loongarch_lasx_xvsrarni_h_w: -+ case Intrinsic::loongarch_lasx_xvssrlni_h_w: -+ case Intrinsic::loongarch_lasx_xvssrani_h_w: -+ case Intrinsic::loongarch_lasx_xvssrlni_hu_w: -+ case Intrinsic::loongarch_lasx_xvssrani_hu_w: -+ case Intrinsic::loongarch_lasx_xvssrlrni_h_w: -+ case Intrinsic::loongarch_lasx_xvssrarni_h_w: -+ case Intrinsic::loongarch_lasx_xvssrlrni_hu_w: -+ case Intrinsic::loongarch_lasx_xvssrarni_hu_w: -+ case Intrinsic::loongarch_lasx_xvfrstpi_b: -+ case Intrinsic::loongarch_lasx_xvfrstpi_h: - return checkIntrinsicImmArg<5>(Op, 3, DAG); - case Intrinsic::loongarch_lsx_vsat_d: - case Intrinsic::loongarch_lsx_vsat_du: - case Intrinsic::loongarch_lsx_vrotri_d: - case Intrinsic::loongarch_lsx_vsrlri_d: - case Intrinsic::loongarch_lsx_vsrari_d: -+ case Intrinsic::loongarch_lasx_xvsat_d: -+ case Intrinsic::loongarch_lasx_xvsat_du: -+ case Intrinsic::loongarch_lasx_xvrotri_d: -+ case Intrinsic::loongarch_lasx_xvsrlri_d: -+ case Intrinsic::loongarch_lasx_xvsrari_d: - return checkIntrinsicImmArg<6>(Op, 2, DAG); - case Intrinsic::loongarch_lsx_vsrlni_w_d: - case Intrinsic::loongarch_lsx_vsrani_w_d: -@@ -793,6 +892,18 @@ LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, - case Intrinsic::loongarch_lsx_vssrarni_w_d: - case Intrinsic::loongarch_lsx_vssrlrni_wu_d: - case Intrinsic::loongarch_lsx_vssrarni_wu_d: -+ case Intrinsic::loongarch_lasx_xvsrlni_w_d: -+ case Intrinsic::loongarch_lasx_xvsrani_w_d: -+ case Intrinsic::loongarch_lasx_xvsrlrni_w_d: -+ case Intrinsic::loongarch_lasx_xvsrarni_w_d: -+ case Intrinsic::loongarch_lasx_xvssrlni_w_d: -+ case Intrinsic::loongarch_lasx_xvssrani_w_d: -+ case Intrinsic::loongarch_lasx_xvssrlni_wu_d: -+ case Intrinsic::loongarch_lasx_xvssrani_wu_d: -+ case Intrinsic::loongarch_lasx_xvssrlrni_w_d: -+ case Intrinsic::loongarch_lasx_xvssrarni_w_d: -+ case Intrinsic::loongarch_lasx_xvssrlrni_wu_d: -+ case Intrinsic::loongarch_lasx_xvssrarni_wu_d: - return checkIntrinsicImmArg<6>(Op, 3, DAG); - case Intrinsic::loongarch_lsx_vsrlni_d_q: - case Intrinsic::loongarch_lsx_vsrani_d_q: -@@ -806,11 +917,28 @@ LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, - case Intrinsic::loongarch_lsx_vssrarni_d_q: - case Intrinsic::loongarch_lsx_vssrlrni_du_q: - case Intrinsic::loongarch_lsx_vssrarni_du_q: -+ case Intrinsic::loongarch_lasx_xvsrlni_d_q: -+ case Intrinsic::loongarch_lasx_xvsrani_d_q: -+ case Intrinsic::loongarch_lasx_xvsrlrni_d_q: -+ case Intrinsic::loongarch_lasx_xvsrarni_d_q: -+ case Intrinsic::loongarch_lasx_xvssrlni_d_q: -+ case Intrinsic::loongarch_lasx_xvssrani_d_q: -+ case Intrinsic::loongarch_lasx_xvssrlni_du_q: -+ case Intrinsic::loongarch_lasx_xvssrani_du_q: -+ case Intrinsic::loongarch_lasx_xvssrlrni_d_q: -+ case Intrinsic::loongarch_lasx_xvssrarni_d_q: -+ case Intrinsic::loongarch_lasx_xvssrlrni_du_q: -+ case Intrinsic::loongarch_lasx_xvssrarni_du_q: - return checkIntrinsicImmArg<7>(Op, 3, DAG); - case Intrinsic::loongarch_lsx_vnori_b: - case Intrinsic::loongarch_lsx_vshuf4i_b: - case Intrinsic::loongarch_lsx_vshuf4i_h: - case Intrinsic::loongarch_lsx_vshuf4i_w: -+ case Intrinsic::loongarch_lasx_xvnori_b: -+ case Intrinsic::loongarch_lasx_xvshuf4i_b: -+ case Intrinsic::loongarch_lasx_xvshuf4i_h: -+ case Intrinsic::loongarch_lasx_xvshuf4i_w: -+ case Intrinsic::loongarch_lasx_xvpermi_d: - return checkIntrinsicImmArg<8>(Op, 2, DAG); - case Intrinsic::loongarch_lsx_vshuf4i_d: - case Intrinsic::loongarch_lsx_vpermi_w: -@@ -819,13 +947,26 @@ LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, - case Intrinsic::loongarch_lsx_vextrins_h: - case Intrinsic::loongarch_lsx_vextrins_w: - case Intrinsic::loongarch_lsx_vextrins_d: -+ case Intrinsic::loongarch_lasx_xvshuf4i_d: -+ case Intrinsic::loongarch_lasx_xvpermi_w: -+ case Intrinsic::loongarch_lasx_xvpermi_q: -+ case Intrinsic::loongarch_lasx_xvbitseli_b: -+ case Intrinsic::loongarch_lasx_xvextrins_b: -+ case Intrinsic::loongarch_lasx_xvextrins_h: -+ case Intrinsic::loongarch_lasx_xvextrins_w: -+ case Intrinsic::loongarch_lasx_xvextrins_d: - return checkIntrinsicImmArg<8>(Op, 3, DAG); - case Intrinsic::loongarch_lsx_vrepli_b: - case Intrinsic::loongarch_lsx_vrepli_h: - case Intrinsic::loongarch_lsx_vrepli_w: - case Intrinsic::loongarch_lsx_vrepli_d: -+ case Intrinsic::loongarch_lasx_xvrepli_b: -+ case Intrinsic::loongarch_lasx_xvrepli_h: -+ case Intrinsic::loongarch_lasx_xvrepli_w: -+ case Intrinsic::loongarch_lasx_xvrepli_d: - return checkIntrinsicImmArg<10>(Op, 1, DAG, /*IsSigned=*/true); - case Intrinsic::loongarch_lsx_vldi: -+ case Intrinsic::loongarch_lasx_xvldi: - return checkIntrinsicImmArg<13>(Op, 1, DAG, /*IsSigned=*/true); - } - } -@@ -924,22 +1065,27 @@ LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op, - } - case Intrinsic::loongarch_lsx_vld: - case Intrinsic::loongarch_lsx_vldrepl_b: -+ case Intrinsic::loongarch_lasx_xvld: -+ case Intrinsic::loongarch_lasx_xvldrepl_b: - return !isInt<12>(cast(Op.getOperand(3))->getSExtValue()) - ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG) - : SDValue(); - case Intrinsic::loongarch_lsx_vldrepl_h: -+ case Intrinsic::loongarch_lasx_xvldrepl_h: - return !isShiftedInt<11, 1>( - cast(Op.getOperand(3))->getSExtValue()) - ? emitIntrinsicWithChainErrorMessage( - Op, "argument out of range or not a multiple of 2", DAG) - : SDValue(); - case Intrinsic::loongarch_lsx_vldrepl_w: -+ case Intrinsic::loongarch_lasx_xvldrepl_w: - return !isShiftedInt<10, 2>( - cast(Op.getOperand(3))->getSExtValue()) - ? emitIntrinsicWithChainErrorMessage( - Op, "argument out of range or not a multiple of 4", DAG) - : SDValue(); - case Intrinsic::loongarch_lsx_vldrepl_d: -+ case Intrinsic::loongarch_lasx_xvldrepl_d: - return !isShiftedInt<9, 3>( - cast(Op.getOperand(3))->getSExtValue()) - ? emitIntrinsicWithChainErrorMessage( -@@ -1064,14 +1210,27 @@ SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op, - : Op; - } - case Intrinsic::loongarch_lsx_vst: -+ case Intrinsic::loongarch_lasx_xvst: - return !isInt<12>(cast(Op.getOperand(4))->getSExtValue()) - ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) - : SDValue(); -+ case Intrinsic::loongarch_lasx_xvstelm_b: -+ return (!isInt<8>(cast(Op.getOperand(4))->getSExtValue()) || -+ !isUInt<5>(cast(Op.getOperand(5))->getZExtValue())) -+ ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) -+ : SDValue(); - case Intrinsic::loongarch_lsx_vstelm_b: - return (!isInt<8>(cast(Op.getOperand(4))->getSExtValue()) || - !isUInt<4>(cast(Op.getOperand(5))->getZExtValue())) - ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) - : SDValue(); -+ case Intrinsic::loongarch_lasx_xvstelm_h: -+ return (!isShiftedInt<8, 1>( -+ cast(Op.getOperand(4))->getSExtValue()) || -+ !isUInt<4>(cast(Op.getOperand(5))->getZExtValue())) -+ ? emitIntrinsicErrorMessage( -+ Op, "argument out of range or not a multiple of 2", DAG) -+ : SDValue(); - case Intrinsic::loongarch_lsx_vstelm_h: - return (!isShiftedInt<8, 1>( - cast(Op.getOperand(4))->getSExtValue()) || -@@ -1079,6 +1238,13 @@ SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op, - ? emitIntrinsicErrorMessage( - Op, "argument out of range or not a multiple of 2", DAG) - : SDValue(); -+ case Intrinsic::loongarch_lasx_xvstelm_w: -+ return (!isShiftedInt<8, 2>( -+ cast(Op.getOperand(4))->getSExtValue()) || -+ !isUInt<3>(cast(Op.getOperand(5))->getZExtValue())) -+ ? emitIntrinsicErrorMessage( -+ Op, "argument out of range or not a multiple of 4", DAG) -+ : SDValue(); - case Intrinsic::loongarch_lsx_vstelm_w: - return (!isShiftedInt<8, 2>( - cast(Op.getOperand(4))->getSExtValue()) || -@@ -1086,6 +1252,13 @@ SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op, - ? emitIntrinsicErrorMessage( - Op, "argument out of range or not a multiple of 4", DAG) - : SDValue(); -+ case Intrinsic::loongarch_lasx_xvstelm_d: -+ return (!isShiftedInt<8, 3>( -+ cast(Op.getOperand(4))->getSExtValue()) || -+ !isUInt<2>(cast(Op.getOperand(5))->getZExtValue())) -+ ? emitIntrinsicErrorMessage( -+ Op, "argument out of range or not a multiple of 8", DAG) -+ : SDValue(); - case Intrinsic::loongarch_lsx_vstelm_d: - return (!isShiftedInt<8, 3>( - cast(Op.getOperand(4))->getSExtValue()) || -@@ -1304,6 +1477,7 @@ replaceINTRINSIC_WO_CHAINResults(SDNode *N, SmallVectorImpl &Results, - LoongArchISD::VPICK_SEXT_ELT); - break; - case Intrinsic::loongarch_lsx_vpickve2gr_h: -+ case Intrinsic::loongarch_lasx_xvpickve2gr_w: - replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget, - LoongArchISD::VPICK_SEXT_ELT); - break; -@@ -1316,6 +1490,7 @@ replaceINTRINSIC_WO_CHAINResults(SDNode *N, SmallVectorImpl &Results, - LoongArchISD::VPICK_ZEXT_ELT); - break; - case Intrinsic::loongarch_lsx_vpickve2gr_hu: -+ case Intrinsic::loongarch_lasx_xvpickve2gr_wu: - replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget, - LoongArchISD::VPICK_ZEXT_ELT); - break; -@@ -1327,10 +1502,15 @@ replaceINTRINSIC_WO_CHAINResults(SDNode *N, SmallVectorImpl &Results, - case Intrinsic::loongarch_lsx_bz_h: - case Intrinsic::loongarch_lsx_bz_w: - case Intrinsic::loongarch_lsx_bz_d: -+ case Intrinsic::loongarch_lasx_xbz_b: -+ case Intrinsic::loongarch_lasx_xbz_h: -+ case Intrinsic::loongarch_lasx_xbz_w: -+ case Intrinsic::loongarch_lasx_xbz_d: - replaceVecCondBranchResults(N, Results, DAG, Subtarget, - LoongArchISD::VALL_ZERO); - break; - case Intrinsic::loongarch_lsx_bz_v: -+ case Intrinsic::loongarch_lasx_xbz_v: - replaceVecCondBranchResults(N, Results, DAG, Subtarget, - LoongArchISD::VANY_ZERO); - break; -@@ -1338,10 +1518,15 @@ replaceINTRINSIC_WO_CHAINResults(SDNode *N, SmallVectorImpl &Results, - case Intrinsic::loongarch_lsx_bnz_h: - case Intrinsic::loongarch_lsx_bnz_w: - case Intrinsic::loongarch_lsx_bnz_d: -+ case Intrinsic::loongarch_lasx_xbnz_b: -+ case Intrinsic::loongarch_lasx_xbnz_h: -+ case Intrinsic::loongarch_lasx_xbnz_w: -+ case Intrinsic::loongarch_lasx_xbnz_d: - replaceVecCondBranchResults(N, Results, DAG, Subtarget, - LoongArchISD::VALL_NONZERO); - break; - case Intrinsic::loongarch_lsx_bnz_v: -+ case Intrinsic::loongarch_lasx_xbnz_v: - replaceVecCondBranchResults(N, Results, DAG, Subtarget, - LoongArchISD::VANY_NONZERO); - break; -@@ -2114,30 +2299,50 @@ performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, - case Intrinsic::loongarch_lsx_vadd_h: - case Intrinsic::loongarch_lsx_vadd_w: - case Intrinsic::loongarch_lsx_vadd_d: -+ case Intrinsic::loongarch_lasx_xvadd_b: -+ case Intrinsic::loongarch_lasx_xvadd_h: -+ case Intrinsic::loongarch_lasx_xvadd_w: -+ case Intrinsic::loongarch_lasx_xvadd_d: - return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1), - N->getOperand(2)); - case Intrinsic::loongarch_lsx_vaddi_bu: - case Intrinsic::loongarch_lsx_vaddi_hu: - case Intrinsic::loongarch_lsx_vaddi_wu: - case Intrinsic::loongarch_lsx_vaddi_du: -+ case Intrinsic::loongarch_lasx_xvaddi_bu: -+ case Intrinsic::loongarch_lasx_xvaddi_hu: -+ case Intrinsic::loongarch_lasx_xvaddi_wu: -+ case Intrinsic::loongarch_lasx_xvaddi_du: - return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1), - lowerVectorSplatImm<5>(N, 2, DAG)); - case Intrinsic::loongarch_lsx_vsub_b: - case Intrinsic::loongarch_lsx_vsub_h: - case Intrinsic::loongarch_lsx_vsub_w: - case Intrinsic::loongarch_lsx_vsub_d: -+ case Intrinsic::loongarch_lasx_xvsub_b: -+ case Intrinsic::loongarch_lasx_xvsub_h: -+ case Intrinsic::loongarch_lasx_xvsub_w: -+ case Intrinsic::loongarch_lasx_xvsub_d: - return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1), - N->getOperand(2)); - case Intrinsic::loongarch_lsx_vsubi_bu: - case Intrinsic::loongarch_lsx_vsubi_hu: - case Intrinsic::loongarch_lsx_vsubi_wu: - case Intrinsic::loongarch_lsx_vsubi_du: -+ case Intrinsic::loongarch_lasx_xvsubi_bu: -+ case Intrinsic::loongarch_lasx_xvsubi_hu: -+ case Intrinsic::loongarch_lasx_xvsubi_wu: -+ case Intrinsic::loongarch_lasx_xvsubi_du: - return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1), - lowerVectorSplatImm<5>(N, 2, DAG)); - case Intrinsic::loongarch_lsx_vneg_b: - case Intrinsic::loongarch_lsx_vneg_h: - case Intrinsic::loongarch_lsx_vneg_w: - case Intrinsic::loongarch_lsx_vneg_d: -+ case Intrinsic::loongarch_lasx_xvneg_b: -+ case Intrinsic::loongarch_lasx_xvneg_h: -+ case Intrinsic::loongarch_lasx_xvneg_w: -+ case Intrinsic::loongarch_lasx_xvneg_d: - return DAG.getNode( - ISD::SUB, DL, N->getValueType(0), - DAG.getConstant( -@@ -2149,60 +2354,100 @@ performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, - case Intrinsic::loongarch_lsx_vmax_h: - case Intrinsic::loongarch_lsx_vmax_w: - case Intrinsic::loongarch_lsx_vmax_d: -+ case Intrinsic::loongarch_lasx_xvmax_b: -+ case Intrinsic::loongarch_lasx_xvmax_h: -+ case Intrinsic::loongarch_lasx_xvmax_w: -+ case Intrinsic::loongarch_lasx_xvmax_d: - return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1), - N->getOperand(2)); - case Intrinsic::loongarch_lsx_vmax_bu: - case Intrinsic::loongarch_lsx_vmax_hu: - case Intrinsic::loongarch_lsx_vmax_wu: - case Intrinsic::loongarch_lsx_vmax_du: -+ case Intrinsic::loongarch_lasx_xvmax_bu: -+ case Intrinsic::loongarch_lasx_xvmax_hu: -+ case Intrinsic::loongarch_lasx_xvmax_wu: -+ case Intrinsic::loongarch_lasx_xvmax_du: - return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1), - N->getOperand(2)); - case Intrinsic::loongarch_lsx_vmaxi_b: - case Intrinsic::loongarch_lsx_vmaxi_h: - case Intrinsic::loongarch_lsx_vmaxi_w: - case Intrinsic::loongarch_lsx_vmaxi_d: -+ case Intrinsic::loongarch_lasx_xvmaxi_b: -+ case Intrinsic::loongarch_lasx_xvmaxi_h: -+ case Intrinsic::loongarch_lasx_xvmaxi_w: -+ case Intrinsic::loongarch_lasx_xvmaxi_d: - return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1), - lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true)); - case Intrinsic::loongarch_lsx_vmaxi_bu: - case Intrinsic::loongarch_lsx_vmaxi_hu: - case Intrinsic::loongarch_lsx_vmaxi_wu: - case Intrinsic::loongarch_lsx_vmaxi_du: -+ case Intrinsic::loongarch_lasx_xvmaxi_bu: -+ case Intrinsic::loongarch_lasx_xvmaxi_hu: -+ case Intrinsic::loongarch_lasx_xvmaxi_wu: -+ case Intrinsic::loongarch_lasx_xvmaxi_du: - return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1), - lowerVectorSplatImm<5>(N, 2, DAG)); - case Intrinsic::loongarch_lsx_vmin_b: - case Intrinsic::loongarch_lsx_vmin_h: - case Intrinsic::loongarch_lsx_vmin_w: - case Intrinsic::loongarch_lsx_vmin_d: -+ case Intrinsic::loongarch_lasx_xvmin_b: -+ case Intrinsic::loongarch_lasx_xvmin_h: -+ case Intrinsic::loongarch_lasx_xvmin_w: -+ case Intrinsic::loongarch_lasx_xvmin_d: - return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1), - N->getOperand(2)); - case Intrinsic::loongarch_lsx_vmin_bu: - case Intrinsic::loongarch_lsx_vmin_hu: - case Intrinsic::loongarch_lsx_vmin_wu: - case Intrinsic::loongarch_lsx_vmin_du: -+ case Intrinsic::loongarch_lasx_xvmin_bu: -+ case Intrinsic::loongarch_lasx_xvmin_hu: -+ case Intrinsic::loongarch_lasx_xvmin_wu: -+ case Intrinsic::loongarch_lasx_xvmin_du: - return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1), - N->getOperand(2)); - case Intrinsic::loongarch_lsx_vmini_b: - case Intrinsic::loongarch_lsx_vmini_h: - case Intrinsic::loongarch_lsx_vmini_w: - case Intrinsic::loongarch_lsx_vmini_d: -+ case Intrinsic::loongarch_lasx_xvmini_b: -+ case Intrinsic::loongarch_lasx_xvmini_h: -+ case Intrinsic::loongarch_lasx_xvmini_w: -+ case Intrinsic::loongarch_lasx_xvmini_d: - return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1), - lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true)); - case Intrinsic::loongarch_lsx_vmini_bu: - case Intrinsic::loongarch_lsx_vmini_hu: - case Intrinsic::loongarch_lsx_vmini_wu: - case Intrinsic::loongarch_lsx_vmini_du: -+ case Intrinsic::loongarch_lasx_xvmini_bu: -+ case Intrinsic::loongarch_lasx_xvmini_hu: -+ case Intrinsic::loongarch_lasx_xvmini_wu: -+ case Intrinsic::loongarch_lasx_xvmini_du: - return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1), - lowerVectorSplatImm<5>(N, 2, DAG)); - case Intrinsic::loongarch_lsx_vmul_b: - case Intrinsic::loongarch_lsx_vmul_h: - case Intrinsic::loongarch_lsx_vmul_w: - case Intrinsic::loongarch_lsx_vmul_d: -+ case Intrinsic::loongarch_lasx_xvmul_b: -+ case Intrinsic::loongarch_lasx_xvmul_h: -+ case Intrinsic::loongarch_lasx_xvmul_w: -+ case Intrinsic::loongarch_lasx_xvmul_d: - return DAG.getNode(ISD::MUL, DL, N->getValueType(0), N->getOperand(1), - N->getOperand(2)); - case Intrinsic::loongarch_lsx_vmadd_b: - case Intrinsic::loongarch_lsx_vmadd_h: - case Intrinsic::loongarch_lsx_vmadd_w: -- case Intrinsic::loongarch_lsx_vmadd_d: { -+ case Intrinsic::loongarch_lsx_vmadd_d: -+ case Intrinsic::loongarch_lasx_xvmadd_b: -+ case Intrinsic::loongarch_lasx_xvmadd_h: -+ case Intrinsic::loongarch_lasx_xvmadd_w: -+ case Intrinsic::loongarch_lasx_xvmadd_d: { - EVT ResTy = N->getValueType(0); - return DAG.getNode(ISD::ADD, SDLoc(N), ResTy, N->getOperand(1), - DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2), -@@ -2211,7 +2456,11 @@ performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, - case Intrinsic::loongarch_lsx_vmsub_b: - case Intrinsic::loongarch_lsx_vmsub_h: - case Intrinsic::loongarch_lsx_vmsub_w: -- case Intrinsic::loongarch_lsx_vmsub_d: { -+ case Intrinsic::loongarch_lsx_vmsub_d: -+ case Intrinsic::loongarch_lasx_xvmsub_b: -+ case Intrinsic::loongarch_lasx_xvmsub_h: -+ case Intrinsic::loongarch_lasx_xvmsub_w: -+ case Intrinsic::loongarch_lasx_xvmsub_d: { - EVT ResTy = N->getValueType(0); - return DAG.getNode(ISD::SUB, SDLoc(N), ResTy, N->getOperand(1), - DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2), -@@ -2221,125 +2470,188 @@ performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, - case Intrinsic::loongarch_lsx_vdiv_h: - case Intrinsic::loongarch_lsx_vdiv_w: - case Intrinsic::loongarch_lsx_vdiv_d: -+ case Intrinsic::loongarch_lasx_xvdiv_b: -+ case Intrinsic::loongarch_lasx_xvdiv_h: -+ case Intrinsic::loongarch_lasx_xvdiv_w: -+ case Intrinsic::loongarch_lasx_xvdiv_d: - return DAG.getNode(ISD::SDIV, DL, N->getValueType(0), N->getOperand(1), - N->getOperand(2)); - case Intrinsic::loongarch_lsx_vdiv_bu: - case Intrinsic::loongarch_lsx_vdiv_hu: - case Intrinsic::loongarch_lsx_vdiv_wu: - case Intrinsic::loongarch_lsx_vdiv_du: -+ case Intrinsic::loongarch_lasx_xvdiv_bu: -+ case Intrinsic::loongarch_lasx_xvdiv_hu: -+ case Intrinsic::loongarch_lasx_xvdiv_wu: -+ case Intrinsic::loongarch_lasx_xvdiv_du: - return DAG.getNode(ISD::UDIV, DL, N->getValueType(0), N->getOperand(1), - N->getOperand(2)); - case Intrinsic::loongarch_lsx_vmod_b: - case Intrinsic::loongarch_lsx_vmod_h: - case Intrinsic::loongarch_lsx_vmod_w: - case Intrinsic::loongarch_lsx_vmod_d: -+ case Intrinsic::loongarch_lasx_xvmod_b: -+ case Intrinsic::loongarch_lasx_xvmod_h: -+ case Intrinsic::loongarch_lasx_xvmod_w: -+ case Intrinsic::loongarch_lasx_xvmod_d: - return DAG.getNode(ISD::SREM, DL, N->getValueType(0), N->getOperand(1), - N->getOperand(2)); - case Intrinsic::loongarch_lsx_vmod_bu: - case Intrinsic::loongarch_lsx_vmod_hu: - case Intrinsic::loongarch_lsx_vmod_wu: - case Intrinsic::loongarch_lsx_vmod_du: -+ case Intrinsic::loongarch_lasx_xvmod_bu: -+ case Intrinsic::loongarch_lasx_xvmod_hu: -+ case Intrinsic::loongarch_lasx_xvmod_wu: -+ case Intrinsic::loongarch_lasx_xvmod_du: - return DAG.getNode(ISD::UREM, DL, N->getValueType(0), N->getOperand(1), - N->getOperand(2)); - case Intrinsic::loongarch_lsx_vand_v: -+ case Intrinsic::loongarch_lasx_xvand_v: - return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1), - N->getOperand(2)); - case Intrinsic::loongarch_lsx_vor_v: -+ case Intrinsic::loongarch_lasx_xvor_v: - return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1), - N->getOperand(2)); - case Intrinsic::loongarch_lsx_vxor_v: -+ case Intrinsic::loongarch_lasx_xvxor_v: - return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1), - N->getOperand(2)); -- case Intrinsic::loongarch_lsx_vnor_v: { -+ case Intrinsic::loongarch_lsx_vnor_v: -+ case Intrinsic::loongarch_lasx_xvnor_v: { - SDValue Res = DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1), - N->getOperand(2)); - return DAG.getNOT(DL, Res, Res->getValueType(0)); - } - case Intrinsic::loongarch_lsx_vandi_b: -+ case Intrinsic::loongarch_lasx_xvandi_b: - return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1), - lowerVectorSplatImm<8>(N, 2, DAG)); - case Intrinsic::loongarch_lsx_vori_b: -+ case Intrinsic::loongarch_lasx_xvori_b: - return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1), - lowerVectorSplatImm<8>(N, 2, DAG)); - case Intrinsic::loongarch_lsx_vxori_b: -+ case Intrinsic::loongarch_lasx_xvxori_b: - return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1), - lowerVectorSplatImm<8>(N, 2, DAG)); - case Intrinsic::loongarch_lsx_vsll_b: - case Intrinsic::loongarch_lsx_vsll_h: - case Intrinsic::loongarch_lsx_vsll_w: - case Intrinsic::loongarch_lsx_vsll_d: -+ case Intrinsic::loongarch_lasx_xvsll_b: -+ case Intrinsic::loongarch_lasx_xvsll_h: -+ case Intrinsic::loongarch_lasx_xvsll_w: -+ case Intrinsic::loongarch_lasx_xvsll_d: - return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1), - truncateVecElts(N, DAG)); - case Intrinsic::loongarch_lsx_vslli_b: -+ case Intrinsic::loongarch_lasx_xvslli_b: - return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1), - lowerVectorSplatImm<3>(N, 2, DAG)); - case Intrinsic::loongarch_lsx_vslli_h: -+ case Intrinsic::loongarch_lasx_xvslli_h: - return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1), - lowerVectorSplatImm<4>(N, 2, DAG)); - case Intrinsic::loongarch_lsx_vslli_w: -+ case Intrinsic::loongarch_lasx_xvslli_w: - return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1), - lowerVectorSplatImm<5>(N, 2, DAG)); - case Intrinsic::loongarch_lsx_vslli_d: -+ case Intrinsic::loongarch_lasx_xvslli_d: - return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1), - lowerVectorSplatImm<6>(N, 2, DAG)); - case Intrinsic::loongarch_lsx_vsrl_b: - case Intrinsic::loongarch_lsx_vsrl_h: - case Intrinsic::loongarch_lsx_vsrl_w: - case Intrinsic::loongarch_lsx_vsrl_d: -+ case Intrinsic::loongarch_lasx_xvsrl_b: -+ case Intrinsic::loongarch_lasx_xvsrl_h: -+ case Intrinsic::loongarch_lasx_xvsrl_w: -+ case Intrinsic::loongarch_lasx_xvsrl_d: - return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1), - truncateVecElts(N, DAG)); - case Intrinsic::loongarch_lsx_vsrli_b: -+ case Intrinsic::loongarch_lasx_xvsrli_b: - return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1), - lowerVectorSplatImm<3>(N, 2, DAG)); - case Intrinsic::loongarch_lsx_vsrli_h: -+ case Intrinsic::loongarch_lasx_xvsrli_h: - return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1), - lowerVectorSplatImm<4>(N, 2, DAG)); - case Intrinsic::loongarch_lsx_vsrli_w: -+ case Intrinsic::loongarch_lasx_xvsrli_w: - return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1), - lowerVectorSplatImm<5>(N, 2, DAG)); - case Intrinsic::loongarch_lsx_vsrli_d: -+ case Intrinsic::loongarch_lasx_xvsrli_d: - return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1), - lowerVectorSplatImm<6>(N, 2, DAG)); - case Intrinsic::loongarch_lsx_vsra_b: - case Intrinsic::loongarch_lsx_vsra_h: - case Intrinsic::loongarch_lsx_vsra_w: - case Intrinsic::loongarch_lsx_vsra_d: -+ case Intrinsic::loongarch_lasx_xvsra_b: -+ case Intrinsic::loongarch_lasx_xvsra_h: -+ case Intrinsic::loongarch_lasx_xvsra_w: -+ case Intrinsic::loongarch_lasx_xvsra_d: - return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1), - truncateVecElts(N, DAG)); - case Intrinsic::loongarch_lsx_vsrai_b: -+ case Intrinsic::loongarch_lasx_xvsrai_b: - return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1), - lowerVectorSplatImm<3>(N, 2, DAG)); - case Intrinsic::loongarch_lsx_vsrai_h: -+ case Intrinsic::loongarch_lasx_xvsrai_h: - return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1), - lowerVectorSplatImm<4>(N, 2, DAG)); - case Intrinsic::loongarch_lsx_vsrai_w: -+ case Intrinsic::loongarch_lasx_xvsrai_w: - return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1), - lowerVectorSplatImm<5>(N, 2, DAG)); - case Intrinsic::loongarch_lsx_vsrai_d: -+ case Intrinsic::loongarch_lasx_xvsrai_d: - return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1), - lowerVectorSplatImm<6>(N, 2, DAG)); - case Intrinsic::loongarch_lsx_vpcnt_b: - case Intrinsic::loongarch_lsx_vpcnt_h: - case Intrinsic::loongarch_lsx_vpcnt_w: - case Intrinsic::loongarch_lsx_vpcnt_d: -+ case Intrinsic::loongarch_lasx_xvpcnt_b: -+ case Intrinsic::loongarch_lasx_xvpcnt_h: -+ case Intrinsic::loongarch_lasx_xvpcnt_w: -+ case Intrinsic::loongarch_lasx_xvpcnt_d: - return DAG.getNode(ISD::CTPOP, DL, N->getValueType(0), N->getOperand(1)); - case Intrinsic::loongarch_lsx_vbitclr_b: - case Intrinsic::loongarch_lsx_vbitclr_h: - case Intrinsic::loongarch_lsx_vbitclr_w: - case Intrinsic::loongarch_lsx_vbitclr_d: -+ case Intrinsic::loongarch_lasx_xvbitclr_b: -+ case Intrinsic::loongarch_lasx_xvbitclr_h: -+ case Intrinsic::loongarch_lasx_xvbitclr_w: -+ case Intrinsic::loongarch_lasx_xvbitclr_d: - return lowerVectorBitClear(N, DAG); - case Intrinsic::loongarch_lsx_vbitclri_b: -+ case Intrinsic::loongarch_lasx_xvbitclri_b: - return lowerVectorBitClearImm<3>(N, DAG); - case Intrinsic::loongarch_lsx_vbitclri_h: -+ case Intrinsic::loongarch_lasx_xvbitclri_h: - return lowerVectorBitClearImm<4>(N, DAG); - case Intrinsic::loongarch_lsx_vbitclri_w: -+ case Intrinsic::loongarch_lasx_xvbitclri_w: - return lowerVectorBitClearImm<5>(N, DAG); - case Intrinsic::loongarch_lsx_vbitclri_d: -+ case Intrinsic::loongarch_lasx_xvbitclri_d: - return lowerVectorBitClearImm<6>(N, DAG); - case Intrinsic::loongarch_lsx_vbitset_b: - case Intrinsic::loongarch_lsx_vbitset_h: - case Intrinsic::loongarch_lsx_vbitset_w: -- case Intrinsic::loongarch_lsx_vbitset_d: { -+ case Intrinsic::loongarch_lsx_vbitset_d: -+ case Intrinsic::loongarch_lasx_xvbitset_b: -+ case Intrinsic::loongarch_lasx_xvbitset_h: -+ case Intrinsic::loongarch_lasx_xvbitset_w: -+ case Intrinsic::loongarch_lasx_xvbitset_d: { - EVT VecTy = N->getValueType(0); - SDValue One = DAG.getConstant(1, DL, VecTy); - return DAG.getNode( -@@ -2347,17 +2659,25 @@ performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, - DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG))); - } - case Intrinsic::loongarch_lsx_vbitseti_b: -+ case Intrinsic::loongarch_lasx_xvbitseti_b: - return lowerVectorBitSetImm<3>(N, DAG); - case Intrinsic::loongarch_lsx_vbitseti_h: -+ case Intrinsic::loongarch_lasx_xvbitseti_h: - return lowerVectorBitSetImm<4>(N, DAG); - case Intrinsic::loongarch_lsx_vbitseti_w: -+ case Intrinsic::loongarch_lasx_xvbitseti_w: - return lowerVectorBitSetImm<5>(N, DAG); - case Intrinsic::loongarch_lsx_vbitseti_d: -+ case Intrinsic::loongarch_lasx_xvbitseti_d: - return lowerVectorBitSetImm<6>(N, DAG); - case Intrinsic::loongarch_lsx_vbitrev_b: - case Intrinsic::loongarch_lsx_vbitrev_h: - case Intrinsic::loongarch_lsx_vbitrev_w: -- case Intrinsic::loongarch_lsx_vbitrev_d: { -+ case Intrinsic::loongarch_lsx_vbitrev_d: -+ case Intrinsic::loongarch_lasx_xvbitrev_b: -+ case Intrinsic::loongarch_lasx_xvbitrev_h: -+ case Intrinsic::loongarch_lasx_xvbitrev_w: -+ case Intrinsic::loongarch_lasx_xvbitrev_d: { - EVT VecTy = N->getValueType(0); - SDValue One = DAG.getConstant(1, DL, VecTy); - return DAG.getNode( -@@ -2365,31 +2685,45 @@ performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, - DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG))); - } - case Intrinsic::loongarch_lsx_vbitrevi_b: -+ case Intrinsic::loongarch_lasx_xvbitrevi_b: - return lowerVectorBitRevImm<3>(N, DAG); - case Intrinsic::loongarch_lsx_vbitrevi_h: -+ case Intrinsic::loongarch_lasx_xvbitrevi_h: - return lowerVectorBitRevImm<4>(N, DAG); - case Intrinsic::loongarch_lsx_vbitrevi_w: -+ case Intrinsic::loongarch_lasx_xvbitrevi_w: - return lowerVectorBitRevImm<5>(N, DAG); - case Intrinsic::loongarch_lsx_vbitrevi_d: -+ case Intrinsic::loongarch_lasx_xvbitrevi_d: - return lowerVectorBitRevImm<6>(N, DAG); - case Intrinsic::loongarch_lsx_vfadd_s: - case Intrinsic::loongarch_lsx_vfadd_d: -+ case Intrinsic::loongarch_lasx_xvfadd_s: -+ case Intrinsic::loongarch_lasx_xvfadd_d: - return DAG.getNode(ISD::FADD, DL, N->getValueType(0), N->getOperand(1), - N->getOperand(2)); - case Intrinsic::loongarch_lsx_vfsub_s: - case Intrinsic::loongarch_lsx_vfsub_d: -+ case Intrinsic::loongarch_lasx_xvfsub_s: -+ case Intrinsic::loongarch_lasx_xvfsub_d: - return DAG.getNode(ISD::FSUB, DL, N->getValueType(0), N->getOperand(1), - N->getOperand(2)); - case Intrinsic::loongarch_lsx_vfmul_s: - case Intrinsic::loongarch_lsx_vfmul_d: -+ case Intrinsic::loongarch_lasx_xvfmul_s: -+ case Intrinsic::loongarch_lasx_xvfmul_d: - return DAG.getNode(ISD::FMUL, DL, N->getValueType(0), N->getOperand(1), - N->getOperand(2)); - case Intrinsic::loongarch_lsx_vfdiv_s: - case Intrinsic::loongarch_lsx_vfdiv_d: -+ case Intrinsic::loongarch_lasx_xvfdiv_s: -+ case Intrinsic::loongarch_lasx_xvfdiv_d: - return DAG.getNode(ISD::FDIV, DL, N->getValueType(0), N->getOperand(1), - N->getOperand(2)); - case Intrinsic::loongarch_lsx_vfmadd_s: - case Intrinsic::loongarch_lsx_vfmadd_d: -+ case Intrinsic::loongarch_lasx_xvfmadd_s: -+ case Intrinsic::loongarch_lasx_xvfmadd_d: - return DAG.getNode(ISD::FMA, DL, N->getValueType(0), N->getOperand(1), - N->getOperand(2), N->getOperand(3)); - case Intrinsic::loongarch_lsx_vinsgr2vr_b: -@@ -2397,10 +2731,12 @@ performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, - N->getOperand(1), N->getOperand(2), - legalizeIntrinsicImmArg<4>(N, 3, DAG, Subtarget)); - case Intrinsic::loongarch_lsx_vinsgr2vr_h: -+ case Intrinsic::loongarch_lasx_xvinsgr2vr_w: - return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0), - N->getOperand(1), N->getOperand(2), - legalizeIntrinsicImmArg<3>(N, 3, DAG, Subtarget)); - case Intrinsic::loongarch_lsx_vinsgr2vr_w: -+ case Intrinsic::loongarch_lasx_xvinsgr2vr_d: - return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0), - N->getOperand(1), N->getOperand(2), - legalizeIntrinsicImmArg<2>(N, 3, DAG, Subtarget)); -@@ -2411,7 +2747,11 @@ performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, - case Intrinsic::loongarch_lsx_vreplgr2vr_b: - case Intrinsic::loongarch_lsx_vreplgr2vr_h: - case Intrinsic::loongarch_lsx_vreplgr2vr_w: -- case Intrinsic::loongarch_lsx_vreplgr2vr_d: { -+ case Intrinsic::loongarch_lsx_vreplgr2vr_d: -+ case Intrinsic::loongarch_lasx_xvreplgr2vr_b: -+ case Intrinsic::loongarch_lasx_xvreplgr2vr_h: -+ case Intrinsic::loongarch_lasx_xvreplgr2vr_w: -+ case Intrinsic::loongarch_lasx_xvreplgr2vr_d: { - EVT ResTy = N->getValueType(0); - SmallVector Ops(ResTy.getVectorNumElements(), N->getOperand(1)); - return DAG.getBuildVector(ResTy, DL, Ops); -@@ -2420,6 +2760,10 @@ performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, - case Intrinsic::loongarch_lsx_vreplve_h: - case Intrinsic::loongarch_lsx_vreplve_w: - case Intrinsic::loongarch_lsx_vreplve_d: -+ case Intrinsic::loongarch_lasx_xvreplve_b: -+ case Intrinsic::loongarch_lasx_xvreplve_h: -+ case Intrinsic::loongarch_lasx_xvreplve_w: -+ case Intrinsic::loongarch_lasx_xvreplve_d: - return DAG.getNode(LoongArchISD::VREPLVE, DL, N->getValueType(0), - N->getOperand(1), - DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(), -@@ -2534,6 +2878,36 @@ emitVecCondBranchPseudo(MachineInstr &MI, MachineBasicBlock *BB, - case LoongArch::PseudoVBNZ_D: - CondOpc = LoongArch::VSETALLNEZ_D; - break; -+ case LoongArch::PseudoXVBZ: -+ CondOpc = LoongArch::XVSETEQZ_V; -+ break; -+ case LoongArch::PseudoXVBZ_B: -+ CondOpc = LoongArch::XVSETANYEQZ_B; -+ break; -+ case LoongArch::PseudoXVBZ_H: -+ CondOpc = LoongArch::XVSETANYEQZ_H; -+ break; -+ case LoongArch::PseudoXVBZ_W: -+ CondOpc = LoongArch::XVSETANYEQZ_W; -+ break; -+ case LoongArch::PseudoXVBZ_D: -+ CondOpc = LoongArch::XVSETANYEQZ_D; -+ break; -+ case LoongArch::PseudoXVBNZ: -+ CondOpc = LoongArch::XVSETNEZ_V; -+ break; -+ case LoongArch::PseudoXVBNZ_B: -+ CondOpc = LoongArch::XVSETALLNEZ_B; -+ break; -+ case LoongArch::PseudoXVBNZ_H: -+ CondOpc = LoongArch::XVSETALLNEZ_H; -+ break; -+ case LoongArch::PseudoXVBNZ_W: -+ CondOpc = LoongArch::XVSETALLNEZ_W; -+ break; -+ case LoongArch::PseudoXVBNZ_D: -+ CondOpc = LoongArch::XVSETALLNEZ_D; -+ break; - } - - const TargetInstrInfo *TII = Subtarget.getInstrInfo(); -@@ -2636,6 +3010,16 @@ MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter( - case LoongArch::PseudoVBNZ_H: - case LoongArch::PseudoVBNZ_W: - case LoongArch::PseudoVBNZ_D: -+ case LoongArch::PseudoXVBZ: -+ case LoongArch::PseudoXVBZ_B: -+ case LoongArch::PseudoXVBZ_H: -+ case LoongArch::PseudoXVBZ_W: -+ case LoongArch::PseudoXVBZ_D: -+ case LoongArch::PseudoXVBNZ: -+ case LoongArch::PseudoXVBNZ_B: -+ case LoongArch::PseudoXVBNZ_H: -+ case LoongArch::PseudoXVBNZ_W: -+ case LoongArch::PseudoXVBNZ_D: - return emitVecCondBranchPseudo(MI, BB, Subtarget); - } - } -@@ -2746,6 +3130,10 @@ const MCPhysReg ArgVRs[] = {LoongArch::VR0, LoongArch::VR1, LoongArch::VR2, - LoongArch::VR3, LoongArch::VR4, LoongArch::VR5, - LoongArch::VR6, LoongArch::VR7}; - -+const MCPhysReg ArgXRs[] = {LoongArch::XR0, LoongArch::XR1, LoongArch::XR2, -+ LoongArch::XR3, LoongArch::XR4, LoongArch::XR5, -+ LoongArch::XR6, LoongArch::XR7}; -+ - // Pass a 2*GRLen argument that has been split into two GRLen values through - // registers or the stack as necessary. - static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State, -@@ -2894,6 +3282,8 @@ static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI, - Reg = State.AllocateReg(ArgFPR64s); - else if (ValVT.is128BitVector()) - Reg = State.AllocateReg(ArgVRs); -+ else if (ValVT.is256BitVector()) -+ Reg = State.AllocateReg(ArgXRs); - else - Reg = State.AllocateReg(ArgGPRs); - -diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp -index a5d66ebac96a..ddd1c9943fac 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp -+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp -@@ -55,6 +55,14 @@ void LoongArchInstrInfo::copyPhysReg(MachineBasicBlock &MBB, - return; - } - -+ // XR->XR copies. -+ if (LoongArch::LASX256RegClass.contains(DstReg, SrcReg)) { -+ BuildMI(MBB, MBBI, DL, get(LoongArch::XVORI_B), DstReg) -+ .addReg(SrcReg, getKillRegState(KillSrc)) -+ .addImm(0); -+ return; -+ } -+ - // GPR->CFR copy. - if (LoongArch::CFRRegClass.contains(DstReg) && - LoongArch::GPRRegClass.contains(SrcReg)) { -@@ -109,6 +117,8 @@ void LoongArchInstrInfo::storeRegToStackSlot( - Opcode = LoongArch::FST_D; - else if (LoongArch::LSX128RegClass.hasSubClassEq(RC)) - Opcode = LoongArch::VST; -+ else if (LoongArch::LASX256RegClass.hasSubClassEq(RC)) -+ Opcode = LoongArch::XVST; - else if (LoongArch::CFRRegClass.hasSubClassEq(RC)) - Opcode = LoongArch::PseudoST_CFR; - else -@@ -145,6 +155,8 @@ void LoongArchInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, - Opcode = LoongArch::FLD_D; - else if (LoongArch::LSX128RegClass.hasSubClassEq(RC)) - Opcode = LoongArch::VLD; -+ else if (LoongArch::LASX256RegClass.hasSubClassEq(RC)) -+ Opcode = LoongArch::XVLD; - else if (LoongArch::CFRRegClass.hasSubClassEq(RC)) - Opcode = LoongArch::PseudoLD_CFR; - else -diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -index dc37b37b2186..a3afd4789dfc 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -@@ -10,6 +10,30 @@ - // - //===----------------------------------------------------------------------===// - -+def lasxsplati8 -+ : PatFrag<(ops node:$e0), -+ (v32i8 (build_vector node:$e0, node:$e0, node:$e0, node:$e0, -+ node:$e0, node:$e0, node:$e0, node:$e0, -+ node:$e0, node:$e0, node:$e0, node:$e0, -+ node:$e0, node:$e0, node:$e0, node:$e0, -+ node:$e0, node:$e0, node:$e0, node:$e0, -+ node:$e0, node:$e0, node:$e0, node:$e0, -+ node:$e0, node:$e0, node:$e0, node:$e0, -+ node:$e0, node:$e0, node:$e0, node:$e0))>; -+def lasxsplati16 -+ : PatFrag<(ops node:$e0), -+ (v16i16 (build_vector node:$e0, node:$e0, node:$e0, node:$e0, -+ node:$e0, node:$e0, node:$e0, node:$e0, -+ node:$e0, node:$e0, node:$e0, node:$e0, -+ node:$e0, node:$e0, node:$e0, node:$e0))>; -+def lasxsplati32 -+ : PatFrag<(ops node:$e0), -+ (v8i32 (build_vector node:$e0, node:$e0, node:$e0, node:$e0, -+ node:$e0, node:$e0, node:$e0, node:$e0))>; -+def lasxsplati64 -+ : PatFrag<(ops node:$e0), -+ (v4i64 (build_vector node:$e0, node:$e0, node:$e0, node:$e0))>; -+ - //===----------------------------------------------------------------------===// - // Instruction class templates - //===----------------------------------------------------------------------===// -@@ -1029,4 +1053,682 @@ def PseudoXVREPLI_D : Pseudo<(outs LASX256:$xd), (ins simm10:$imm), [], - "xvrepli.d", "$xd, $imm">; - } - -+def PseudoXVBNZ_B : VecCond; -+def PseudoXVBNZ_H : VecCond; -+def PseudoXVBNZ_W : VecCond; -+def PseudoXVBNZ_D : VecCond; -+def PseudoXVBNZ : VecCond; -+ -+def PseudoXVBZ_B : VecCond; -+def PseudoXVBZ_H : VecCond; -+def PseudoXVBZ_W : VecCond; -+def PseudoXVBZ_D : VecCond; -+def PseudoXVBZ : VecCond; -+ -+} // Predicates = [HasExtLASX] -+ -+multiclass PatXr { -+ def : Pat<(v32i8 (OpNode (v32i8 LASX256:$xj))), -+ (!cast(Inst#"_B") LASX256:$xj)>; -+ def : Pat<(v16i16 (OpNode (v16i16 LASX256:$xj))), -+ (!cast(Inst#"_H") LASX256:$xj)>; -+ def : Pat<(v8i32 (OpNode (v8i32 LASX256:$xj))), -+ (!cast(Inst#"_W") LASX256:$xj)>; -+ def : Pat<(v4i64 (OpNode (v4i64 LASX256:$xj))), -+ (!cast(Inst#"_D") LASX256:$xj)>; -+} -+ -+multiclass PatXrXr { -+ def : Pat<(OpNode (v32i8 LASX256:$xj), (v32i8 LASX256:$xk)), -+ (!cast(Inst#"_B") LASX256:$xj, LASX256:$xk)>; -+ def : Pat<(OpNode (v16i16 LASX256:$xj), (v16i16 LASX256:$xk)), -+ (!cast(Inst#"_H") LASX256:$xj, LASX256:$xk)>; -+ def : Pat<(OpNode (v8i32 LASX256:$xj), (v8i32 LASX256:$xk)), -+ (!cast(Inst#"_W") LASX256:$xj, LASX256:$xk)>; -+ def : Pat<(OpNode (v4i64 LASX256:$xj), (v4i64 LASX256:$xk)), -+ (!cast(Inst#"_D") LASX256:$xj, LASX256:$xk)>; -+} -+ -+multiclass PatXrXrF { -+ def : Pat<(OpNode (v8f32 LASX256:$xj), (v8f32 LASX256:$xk)), -+ (!cast(Inst#"_S") LASX256:$xj, LASX256:$xk)>; -+ def : Pat<(OpNode (v4f64 LASX256:$xj), (v4f64 LASX256:$xk)), -+ (!cast(Inst#"_D") LASX256:$xj, LASX256:$xk)>; -+} -+ -+multiclass PatXrXrU { -+ def : Pat<(OpNode (v32i8 LASX256:$xj), (v32i8 LASX256:$xk)), -+ (!cast(Inst#"_BU") LASX256:$xj, LASX256:$xk)>; -+ def : Pat<(OpNode (v16i16 LASX256:$xj), (v16i16 LASX256:$xk)), -+ (!cast(Inst#"_HU") LASX256:$xj, LASX256:$xk)>; -+ def : Pat<(OpNode (v8i32 LASX256:$xj), (v8i32 LASX256:$xk)), -+ (!cast(Inst#"_WU") LASX256:$xj, LASX256:$xk)>; -+ def : Pat<(OpNode (v4i64 LASX256:$xj), (v4i64 LASX256:$xk)), -+ (!cast(Inst#"_DU") LASX256:$xj, LASX256:$xk)>; -+} -+ -+multiclass PatXrSimm5 { -+ def : Pat<(OpNode (v32i8 LASX256:$xj), (v32i8 (SplatPat_simm5 simm5:$imm))), -+ (!cast(Inst#"_B") LASX256:$xj, simm5:$imm)>; -+ def : Pat<(OpNode (v16i16 LASX256:$xj), (v16i16 (SplatPat_simm5 simm5:$imm))), -+ (!cast(Inst#"_H") LASX256:$xj, simm5:$imm)>; -+ def : Pat<(OpNode (v8i32 LASX256:$xj), (v8i32 (SplatPat_simm5 simm5:$imm))), -+ (!cast(Inst#"_W") LASX256:$xj, simm5:$imm)>; -+ def : Pat<(OpNode (v4i64 LASX256:$xj), (v4i64 (SplatPat_simm5 simm5:$imm))), -+ (!cast(Inst#"_D") LASX256:$xj, simm5:$imm)>; -+} -+ -+multiclass PatXrUimm5 { -+ def : Pat<(OpNode (v32i8 LASX256:$xj), (v32i8 (SplatPat_uimm5 uimm5:$imm))), -+ (!cast(Inst#"_BU") LASX256:$xj, uimm5:$imm)>; -+ def : Pat<(OpNode (v16i16 LASX256:$xj), (v16i16 (SplatPat_uimm5 uimm5:$imm))), -+ (!cast(Inst#"_HU") LASX256:$xj, uimm5:$imm)>; -+ def : Pat<(OpNode (v8i32 LASX256:$xj), (v8i32 (SplatPat_uimm5 uimm5:$imm))), -+ (!cast(Inst#"_WU") LASX256:$xj, uimm5:$imm)>; -+ def : Pat<(OpNode (v4i64 LASX256:$xj), (v4i64 (SplatPat_uimm5 uimm5:$imm))), -+ (!cast(Inst#"_DU") LASX256:$xj, uimm5:$imm)>; -+} -+ -+multiclass PatXrXrXr { -+ def : Pat<(OpNode (v32i8 LASX256:$xd), (v32i8 LASX256:$xj), -+ (v32i8 LASX256:$xk)), -+ (!cast(Inst#"_B") LASX256:$xd, LASX256:$xj, LASX256:$xk)>; -+ def : Pat<(OpNode (v16i16 LASX256:$xd), (v16i16 LASX256:$xj), -+ (v16i16 LASX256:$xk)), -+ (!cast(Inst#"_H") LASX256:$xd, LASX256:$xj, LASX256:$xk)>; -+ def : Pat<(OpNode (v8i32 LASX256:$xd), (v8i32 LASX256:$xj), -+ (v8i32 LASX256:$xk)), -+ (!cast(Inst#"_W") LASX256:$xd, LASX256:$xj, LASX256:$xk)>; -+ def : Pat<(OpNode (v4i64 LASX256:$xd), (v4i64 LASX256:$xj), -+ (v4i64 LASX256:$xk)), -+ (!cast(Inst#"_D") LASX256:$xd, LASX256:$xj, LASX256:$xk)>; -+} -+ -+multiclass PatShiftXrXr { -+ def : Pat<(OpNode (v32i8 LASX256:$xj), (and vsplati8_imm_eq_7, -+ (v32i8 LASX256:$xk))), -+ (!cast(Inst#"_B") LASX256:$xj, LASX256:$xk)>; -+ def : Pat<(OpNode (v16i16 LASX256:$xj), (and vsplati16_imm_eq_15, -+ (v16i16 LASX256:$xk))), -+ (!cast(Inst#"_H") LASX256:$xj, LASX256:$xk)>; -+ def : Pat<(OpNode (v8i32 LASX256:$xj), (and vsplati32_imm_eq_31, -+ (v8i32 LASX256:$xk))), -+ (!cast(Inst#"_W") LASX256:$xj, LASX256:$xk)>; -+ def : Pat<(OpNode (v4i64 LASX256:$xj), (and vsplati64_imm_eq_63, -+ (v4i64 LASX256:$xk))), -+ (!cast(Inst#"_D") LASX256:$xj, LASX256:$xk)>; -+} -+ -+multiclass PatShiftXrUimm { -+ def : Pat<(OpNode (v32i8 LASX256:$xj), (v32i8 (SplatPat_uimm3 uimm3:$imm))), -+ (!cast(Inst#"_B") LASX256:$xj, uimm3:$imm)>; -+ def : Pat<(OpNode (v16i16 LASX256:$xj), (v16i16 (SplatPat_uimm4 uimm4:$imm))), -+ (!cast(Inst#"_H") LASX256:$xj, uimm4:$imm)>; -+ def : Pat<(OpNode (v8i32 LASX256:$xj), (v8i32 (SplatPat_uimm5 uimm5:$imm))), -+ (!cast(Inst#"_W") LASX256:$xj, uimm5:$imm)>; -+ def : Pat<(OpNode (v4i64 LASX256:$xj), (v4i64 (SplatPat_uimm6 uimm6:$imm))), -+ (!cast(Inst#"_D") LASX256:$xj, uimm6:$imm)>; -+} -+ -+class PatXrXrB -+ : Pat<(OpNode (v32i8 LASX256:$xj), (v32i8 LASX256:$xk)), -+ (Inst LASX256:$xj, LASX256:$xk)>; -+ -+let Predicates = [HasExtLASX] in { -+ -+// XVADD_{B/H/W/D} -+defm : PatXrXr; -+// XVSUB_{B/H/W/D} -+defm : PatXrXr; -+ -+// XVADDI_{B/H/W/D}U -+defm : PatXrUimm5; -+// XVSUBI_{B/H/W/D}U -+defm : PatXrUimm5; -+ -+// XVNEG_{B/H/W/D} -+def : Pat<(sub immAllZerosV, (v32i8 LASX256:$xj)), (XVNEG_B LASX256:$xj)>; -+def : Pat<(sub immAllZerosV, (v16i16 LASX256:$xj)), (XVNEG_H LASX256:$xj)>; -+def : Pat<(sub immAllZerosV, (v8i32 LASX256:$xj)), (XVNEG_W LASX256:$xj)>; -+def : Pat<(sub immAllZerosV, (v4i64 LASX256:$xj)), (XVNEG_D LASX256:$xj)>; -+ -+// XVMAX[I]_{B/H/W/D}[U] -+defm : PatXrXr; -+defm : PatXrXrU; -+defm : PatXrSimm5; -+defm : PatXrUimm5; -+ -+// XVMIN[I]_{B/H/W/D}[U] -+defm : PatXrXr; -+defm : PatXrXrU; -+defm : PatXrSimm5; -+defm : PatXrUimm5; -+ -+// XVMUL_{B/H/W/D} -+defm : PatXrXr; -+ -+// XVMADD_{B/H/W/D} -+defm : PatXrXrXr; -+// XVMSUB_{B/H/W/D} -+defm : PatXrXrXr; -+ -+// XVDIV_{B/H/W/D}[U] -+defm : PatXrXr; -+defm : PatXrXrU; -+ -+// XVMOD_{B/H/W/D}[U] -+defm : PatXrXr; -+defm : PatXrXrU; -+ -+// XVAND_V -+def : PatXrXrB; -+// XVNOR_V -+def : PatXrXrB; -+// XVXOR_V -+def : PatXrXrB; -+// XVNOR_V -+def : Pat<(vnot (or (v32i8 LASX256:$xj), (v32i8 LASX256:$xk))), -+ (XVNOR_V LASX256:$xj, LASX256:$xk)>; -+ -+// XVANDI_B -+def : Pat<(and (v32i8 LASX256:$xj), (v32i8 (SplatPat_uimm8 uimm8:$imm))), -+ (XVANDI_B LASX256:$xj, uimm8:$imm)>; -+// XVORI_B -+def : Pat<(or (v32i8 LASX256:$xj), (v32i8 (SplatPat_uimm8 uimm8:$imm))), -+ (XVORI_B LASX256:$xj, uimm8:$imm)>; -+ -+// XVXORI_B -+def : Pat<(xor (v32i8 LASX256:$xj), (v32i8 (SplatPat_uimm8 uimm8:$imm))), -+ (XVXORI_B LASX256:$xj, uimm8:$imm)>; -+ -+// XVSLL[I]_{B/H/W/D} -+defm : PatXrXr; -+defm : PatShiftXrXr; -+defm : PatShiftXrUimm; -+ -+// XVSRL[I]_{B/H/W/D} -+defm : PatXrXr; -+defm : PatShiftXrXr; -+defm : PatShiftXrUimm; -+ -+// XVSRA[I]_{B/H/W/D} -+defm : PatXrXr; -+defm : PatShiftXrXr; -+defm : PatShiftXrUimm; -+ -+// XVPCNT_{B/H/W/D} -+defm : PatXr; -+ -+// XVBITCLR_{B/H/W/D} -+def : Pat<(and v32i8:$xj, (vnot (shl vsplat_imm_eq_1, v32i8:$xk))), -+ (v32i8 (XVBITCLR_B v32i8:$xj, v32i8:$xk))>; -+def : Pat<(and v16i16:$xj, (vnot (shl vsplat_imm_eq_1, v16i16:$xk))), -+ (v16i16 (XVBITCLR_H v16i16:$xj, v16i16:$xk))>; -+def : Pat<(and v8i32:$xj, (vnot (shl vsplat_imm_eq_1, v8i32:$xk))), -+ (v8i32 (XVBITCLR_W v8i32:$xj, v8i32:$xk))>; -+def : Pat<(and v4i64:$xj, (vnot (shl vsplat_imm_eq_1, v4i64:$xk))), -+ (v4i64 (XVBITCLR_D v4i64:$xj, v4i64:$xk))>; -+def : Pat<(and v32i8:$xj, (vnot (shl vsplat_imm_eq_1, -+ (vsplati8imm7 v32i8:$xk)))), -+ (v32i8 (XVBITCLR_B v32i8:$xj, v32i8:$xk))>; -+def : Pat<(and v16i16:$xj, (vnot (shl vsplat_imm_eq_1, -+ (vsplati16imm15 v16i16:$xk)))), -+ (v16i16 (XVBITCLR_H v16i16:$xj, v16i16:$xk))>; -+def : Pat<(and v8i32:$xj, (vnot (shl vsplat_imm_eq_1, -+ (vsplati32imm31 v8i32:$xk)))), -+ (v8i32 (XVBITCLR_W v8i32:$xj, v8i32:$xk))>; -+def : Pat<(and v4i64:$xj, (vnot (shl vsplat_imm_eq_1, -+ (vsplati64imm63 v4i64:$xk)))), -+ (v4i64 (XVBITCLR_D v4i64:$xj, v4i64:$xk))>; -+ -+// XVBITCLRI_{B/H/W/D} -+def : Pat<(and (v32i8 LASX256:$xj), (v32i8 (vsplat_uimm_inv_pow2 uimm3:$imm))), -+ (XVBITCLRI_B LASX256:$xj, uimm3:$imm)>; -+def : Pat<(and (v16i16 LASX256:$xj), (v16i16 (vsplat_uimm_inv_pow2 uimm4:$imm))), -+ (XVBITCLRI_H LASX256:$xj, uimm4:$imm)>; -+def : Pat<(and (v8i32 LASX256:$xj), (v8i32 (vsplat_uimm_inv_pow2 uimm5:$imm))), -+ (XVBITCLRI_W LASX256:$xj, uimm5:$imm)>; -+def : Pat<(and (v4i64 LASX256:$xj), (v4i64 (vsplat_uimm_inv_pow2 uimm6:$imm))), -+ (XVBITCLRI_D LASX256:$xj, uimm6:$imm)>; -+ -+// XVBITSET_{B/H/W/D} -+def : Pat<(or v32i8:$xj, (shl vsplat_imm_eq_1, v32i8:$xk)), -+ (v32i8 (XVBITSET_B v32i8:$xj, v32i8:$xk))>; -+def : Pat<(or v16i16:$xj, (shl vsplat_imm_eq_1, v16i16:$xk)), -+ (v16i16 (XVBITSET_H v16i16:$xj, v16i16:$xk))>; -+def : Pat<(or v8i32:$xj, (shl vsplat_imm_eq_1, v8i32:$xk)), -+ (v8i32 (XVBITSET_W v8i32:$xj, v8i32:$xk))>; -+def : Pat<(or v4i64:$xj, (shl vsplat_imm_eq_1, v4i64:$xk)), -+ (v4i64 (XVBITSET_D v4i64:$xj, v4i64:$xk))>; -+def : Pat<(or v32i8:$xj, (shl vsplat_imm_eq_1, (vsplati8imm7 v32i8:$xk))), -+ (v32i8 (XVBITSET_B v32i8:$xj, v32i8:$xk))>; -+def : Pat<(or v16i16:$xj, (shl vsplat_imm_eq_1, (vsplati16imm15 v16i16:$xk))), -+ (v16i16 (XVBITSET_H v16i16:$xj, v16i16:$xk))>; -+def : Pat<(or v8i32:$xj, (shl vsplat_imm_eq_1, (vsplati32imm31 v8i32:$xk))), -+ (v8i32 (XVBITSET_W v8i32:$xj, v8i32:$xk))>; -+def : Pat<(or v4i64:$xj, (shl vsplat_imm_eq_1, (vsplati64imm63 v4i64:$xk))), -+ (v4i64 (XVBITSET_D v4i64:$xj, v4i64:$xk))>; -+ -+// XVBITSETI_{B/H/W/D} -+def : Pat<(or (v32i8 LASX256:$xj), (v32i8 (vsplat_uimm_pow2 uimm3:$imm))), -+ (XVBITSETI_B LASX256:$xj, uimm3:$imm)>; -+def : Pat<(or (v16i16 LASX256:$xj), (v16i16 (vsplat_uimm_pow2 uimm4:$imm))), -+ (XVBITSETI_H LASX256:$xj, uimm4:$imm)>; -+def : Pat<(or (v8i32 LASX256:$xj), (v8i32 (vsplat_uimm_pow2 uimm5:$imm))), -+ (XVBITSETI_W LASX256:$xj, uimm5:$imm)>; -+def : Pat<(or (v4i64 LASX256:$xj), (v4i64 (vsplat_uimm_pow2 uimm6:$imm))), -+ (XVBITSETI_D LASX256:$xj, uimm6:$imm)>; -+ -+// XVBITREV_{B/H/W/D} -+def : Pat<(xor v32i8:$xj, (shl vsplat_imm_eq_1, v32i8:$xk)), -+ (v32i8 (XVBITREV_B v32i8:$xj, v32i8:$xk))>; -+def : Pat<(xor v16i16:$xj, (shl vsplat_imm_eq_1, v16i16:$xk)), -+ (v16i16 (XVBITREV_H v16i16:$xj, v16i16:$xk))>; -+def : Pat<(xor v8i32:$xj, (shl vsplat_imm_eq_1, v8i32:$xk)), -+ (v8i32 (XVBITREV_W v8i32:$xj, v8i32:$xk))>; -+def : Pat<(xor v4i64:$xj, (shl vsplat_imm_eq_1, v4i64:$xk)), -+ (v4i64 (XVBITREV_D v4i64:$xj, v4i64:$xk))>; -+def : Pat<(xor v32i8:$xj, (shl vsplat_imm_eq_1, (vsplati8imm7 v32i8:$xk))), -+ (v32i8 (XVBITREV_B v32i8:$xj, v32i8:$xk))>; -+def : Pat<(xor v16i16:$xj, (shl vsplat_imm_eq_1, (vsplati16imm15 v16i16:$xk))), -+ (v16i16 (XVBITREV_H v16i16:$xj, v16i16:$xk))>; -+def : Pat<(xor v8i32:$xj, (shl vsplat_imm_eq_1, (vsplati32imm31 v8i32:$xk))), -+ (v8i32 (XVBITREV_W v8i32:$xj, v8i32:$xk))>; -+def : Pat<(xor v4i64:$xj, (shl vsplat_imm_eq_1, (vsplati64imm63 v4i64:$xk))), -+ (v4i64 (XVBITREV_D v4i64:$xj, v4i64:$xk))>; -+ -+// XVBITREVI_{B/H/W/D} -+def : Pat<(xor (v32i8 LASX256:$xj), (v32i8 (vsplat_uimm_pow2 uimm3:$imm))), -+ (XVBITREVI_B LASX256:$xj, uimm3:$imm)>; -+def : Pat<(xor (v16i16 LASX256:$xj), (v16i16 (vsplat_uimm_pow2 uimm4:$imm))), -+ (XVBITREVI_H LASX256:$xj, uimm4:$imm)>; -+def : Pat<(xor (v8i32 LASX256:$xj), (v8i32 (vsplat_uimm_pow2 uimm5:$imm))), -+ (XVBITREVI_W LASX256:$xj, uimm5:$imm)>; -+def : Pat<(xor (v4i64 LASX256:$xj), (v4i64 (vsplat_uimm_pow2 uimm6:$imm))), -+ (XVBITREVI_D LASX256:$xj, uimm6:$imm)>; -+ -+// XVFADD_{S/D} -+defm : PatXrXrF; -+ -+// XVFSUB_{S/D} -+defm : PatXrXrF; -+ -+// XVFMUL_{S/D} -+defm : PatXrXrF; -+ -+// XVFDIV_{S/D} -+defm : PatXrXrF; -+ -+// XVFMADD_{S/D} -+def : Pat<(fma v8f32:$xj, v8f32:$xk, v8f32:$xa), -+ (XVFMADD_S v8f32:$xj, v8f32:$xk, v8f32:$xa)>; -+def : Pat<(fma v4f64:$xj, v4f64:$xk, v4f64:$xa), -+ (XVFMADD_D v4f64:$xj, v4f64:$xk, v4f64:$xa)>; -+ -+// XVINSGR2VR_{W/D} -+def : Pat<(vector_insert v8i32:$xd, GRLenVT:$rj, uimm3:$imm), -+ (XVINSGR2VR_W v8i32:$xd, GRLenVT:$rj, uimm3:$imm)>; -+def : Pat<(vector_insert v4i64:$xd, GRLenVT:$rj, uimm2:$imm), -+ (XVINSGR2VR_D v4i64:$xd, GRLenVT:$rj, uimm2:$imm)>; -+ -+// XVPICKVE2GR_W[U] -+def : Pat<(loongarch_vpick_sext_elt v8i32:$xd, uimm3:$imm, i32), -+ (XVPICKVE2GR_W v8i32:$xd, uimm3:$imm)>; -+def : Pat<(loongarch_vpick_zext_elt v8i32:$xd, uimm3:$imm, i32), -+ (XVPICKVE2GR_WU v8i32:$xd, uimm3:$imm)>; -+ -+// XVREPLGR2VR_{B/H/W/D} -+def : Pat<(lasxsplati8 GPR:$rj), (XVREPLGR2VR_B GPR:$rj)>; -+def : Pat<(lasxsplati16 GPR:$rj), (XVREPLGR2VR_H GPR:$rj)>; -+def : Pat<(lasxsplati32 GPR:$rj), (XVREPLGR2VR_W GPR:$rj)>; -+def : Pat<(lasxsplati64 GPR:$rj), (XVREPLGR2VR_D GPR:$rj)>; -+ -+// XVREPLVE_{B/H/W/D} -+def : Pat<(loongarch_vreplve v32i8:$xj, GRLenVT:$rk), -+ (XVREPLVE_B v32i8:$xj, GRLenVT:$rk)>; -+def : Pat<(loongarch_vreplve v16i16:$xj, GRLenVT:$rk), -+ (XVREPLVE_H v16i16:$xj, GRLenVT:$rk)>; -+def : Pat<(loongarch_vreplve v8i32:$xj, GRLenVT:$rk), -+ (XVREPLVE_W v8i32:$xj, GRLenVT:$rk)>; -+def : Pat<(loongarch_vreplve v4i64:$xj, GRLenVT:$rk), -+ (XVREPLVE_D v4i64:$xj, GRLenVT:$rk)>; -+ -+// Loads/Stores -+foreach vt = [v32i8, v16i16, v8i32, v4i64] in { -+ defm : LdPat; -+ def : RegRegLdPat; -+ defm : StPat; -+ def : RegRegStPat; -+} -+ -+} // Predicates = [HasExtLASX] -+ -+/// Intrinsic pattern -+ -+class deriveLASXIntrinsic { -+ Intrinsic ret = !cast(!tolower("int_loongarch_lasx_"#Inst)); -+} -+ -+let Predicates = [HasExtLASX] in { -+ -+// vty: v32i8/v16i16/v8i32/v4i64 -+// Pat<(Intrinsic vty:$xj, vty:$xk), -+// (LAInst vty:$xj, vty:$xk)>; -+foreach Inst = ["XVSADD_B", "XVSADD_BU", "XVSSUB_B", "XVSSUB_BU", -+ "XVHADDW_H_B", "XVHADDW_HU_BU", "XVHSUBW_H_B", "XVHSUBW_HU_BU", -+ "XVADDWEV_H_B", "XVADDWOD_H_B", "XVSUBWEV_H_B", "XVSUBWOD_H_B", -+ "XVADDWEV_H_BU", "XVADDWOD_H_BU", "XVSUBWEV_H_BU", "XVSUBWOD_H_BU", -+ "XVADDWEV_H_BU_B", "XVADDWOD_H_BU_B", -+ "XVAVG_B", "XVAVG_BU", "XVAVGR_B", "XVAVGR_BU", -+ "XVABSD_B", "XVABSD_BU", "XVADDA_B", "XVMUH_B", "XVMUH_BU", -+ "XVMULWEV_H_B", "XVMULWOD_H_B", "XVMULWEV_H_BU", "XVMULWOD_H_BU", -+ "XVMULWEV_H_BU_B", "XVMULWOD_H_BU_B", "XVSIGNCOV_B", -+ "XVANDN_V", "XVORN_V", "XVROTR_B", "XVSRLR_B", "XVSRAR_B", -+ "XVSEQ_B", "XVSLE_B", "XVSLE_BU", "XVSLT_B", "XVSLT_BU", -+ "XVPACKEV_B", "XVPACKOD_B", "XVPICKEV_B", "XVPICKOD_B", -+ "XVILVL_B", "XVILVH_B"] in -+ def : Pat<(deriveLASXIntrinsic.ret -+ (v32i8 LASX256:$xj), (v32i8 LASX256:$xk)), -+ (!cast(Inst) LASX256:$xj, LASX256:$xk)>; -+foreach Inst = ["XVSADD_H", "XVSADD_HU", "XVSSUB_H", "XVSSUB_HU", -+ "XVHADDW_W_H", "XVHADDW_WU_HU", "XVHSUBW_W_H", "XVHSUBW_WU_HU", -+ "XVADDWEV_W_H", "XVADDWOD_W_H", "XVSUBWEV_W_H", "XVSUBWOD_W_H", -+ "XVADDWEV_W_HU", "XVADDWOD_W_HU", "XVSUBWEV_W_HU", "XVSUBWOD_W_HU", -+ "XVADDWEV_W_HU_H", "XVADDWOD_W_HU_H", -+ "XVAVG_H", "XVAVG_HU", "XVAVGR_H", "XVAVGR_HU", -+ "XVABSD_H", "XVABSD_HU", "XVADDA_H", "XVMUH_H", "XVMUH_HU", -+ "XVMULWEV_W_H", "XVMULWOD_W_H", "XVMULWEV_W_HU", "XVMULWOD_W_HU", -+ "XVMULWEV_W_HU_H", "XVMULWOD_W_HU_H", "XVSIGNCOV_H", "XVROTR_H", -+ "XVSRLR_H", "XVSRAR_H", "XVSRLN_B_H", "XVSRAN_B_H", "XVSRLRN_B_H", -+ "XVSRARN_B_H", "XVSSRLN_B_H", "XVSSRAN_B_H", "XVSSRLN_BU_H", -+ "XVSSRAN_BU_H", "XVSSRLRN_B_H", "XVSSRARN_B_H", "XVSSRLRN_BU_H", -+ "XVSSRARN_BU_H", -+ "XVSEQ_H", "XVSLE_H", "XVSLE_HU", "XVSLT_H", "XVSLT_HU", -+ "XVPACKEV_H", "XVPACKOD_H", "XVPICKEV_H", "XVPICKOD_H", -+ "XVILVL_H", "XVILVH_H"] in -+ def : Pat<(deriveLASXIntrinsic.ret -+ (v16i16 LASX256:$xj), (v16i16 LASX256:$xk)), -+ (!cast(Inst) LASX256:$xj, LASX256:$xk)>; -+foreach Inst = ["XVSADD_W", "XVSADD_WU", "XVSSUB_W", "XVSSUB_WU", -+ "XVHADDW_D_W", "XVHADDW_DU_WU", "XVHSUBW_D_W", "XVHSUBW_DU_WU", -+ "XVADDWEV_D_W", "XVADDWOD_D_W", "XVSUBWEV_D_W", "XVSUBWOD_D_W", -+ "XVADDWEV_D_WU", "XVADDWOD_D_WU", "XVSUBWEV_D_WU", "XVSUBWOD_D_WU", -+ "XVADDWEV_D_WU_W", "XVADDWOD_D_WU_W", -+ "XVAVG_W", "XVAVG_WU", "XVAVGR_W", "XVAVGR_WU", -+ "XVABSD_W", "XVABSD_WU", "XVADDA_W", "XVMUH_W", "XVMUH_WU", -+ "XVMULWEV_D_W", "XVMULWOD_D_W", "XVMULWEV_D_WU", "XVMULWOD_D_WU", -+ "XVMULWEV_D_WU_W", "XVMULWOD_D_WU_W", "XVSIGNCOV_W", "XVROTR_W", -+ "XVSRLR_W", "XVSRAR_W", "XVSRLN_H_W", "XVSRAN_H_W", "XVSRLRN_H_W", -+ "XVSRARN_H_W", "XVSSRLN_H_W", "XVSSRAN_H_W", "XVSSRLN_HU_W", -+ "XVSSRAN_HU_W", "XVSSRLRN_H_W", "XVSSRARN_H_W", "XVSSRLRN_HU_W", -+ "XVSSRARN_HU_W", -+ "XVSEQ_W", "XVSLE_W", "XVSLE_WU", "XVSLT_W", "XVSLT_WU", -+ "XVPACKEV_W", "XVPACKOD_W", "XVPICKEV_W", "XVPICKOD_W", -+ "XVILVL_W", "XVILVH_W", "XVPERM_W"] in -+ def : Pat<(deriveLASXIntrinsic.ret -+ (v8i32 LASX256:$xj), (v8i32 LASX256:$xk)), -+ (!cast(Inst) LASX256:$xj, LASX256:$xk)>; -+foreach Inst = ["XVADD_Q", "XVSUB_Q", -+ "XVSADD_D", "XVSADD_DU", "XVSSUB_D", "XVSSUB_DU", -+ "XVHADDW_Q_D", "XVHADDW_QU_DU", "XVHSUBW_Q_D", "XVHSUBW_QU_DU", -+ "XVADDWEV_Q_D", "XVADDWOD_Q_D", "XVSUBWEV_Q_D", "XVSUBWOD_Q_D", -+ "XVADDWEV_Q_DU", "XVADDWOD_Q_DU", "XVSUBWEV_Q_DU", "XVSUBWOD_Q_DU", -+ "XVADDWEV_Q_DU_D", "XVADDWOD_Q_DU_D", -+ "XVAVG_D", "XVAVG_DU", "XVAVGR_D", "XVAVGR_DU", -+ "XVABSD_D", "XVABSD_DU", "XVADDA_D", "XVMUH_D", "XVMUH_DU", -+ "XVMULWEV_Q_D", "XVMULWOD_Q_D", "XVMULWEV_Q_DU", "XVMULWOD_Q_DU", -+ "XVMULWEV_Q_DU_D", "XVMULWOD_Q_DU_D", "XVSIGNCOV_D", "XVROTR_D", -+ "XVSRLR_D", "XVSRAR_D", "XVSRLN_W_D", "XVSRAN_W_D", "XVSRLRN_W_D", -+ "XVSRARN_W_D", "XVSSRLN_W_D", "XVSSRAN_W_D", "XVSSRLN_WU_D", -+ "XVSSRAN_WU_D", "XVSSRLRN_W_D", "XVSSRARN_W_D", "XVSSRLRN_WU_D", -+ "XVSSRARN_WU_D", "XVFFINT_S_L", -+ "XVSEQ_D", "XVSLE_D", "XVSLE_DU", "XVSLT_D", "XVSLT_DU", -+ "XVPACKEV_D", "XVPACKOD_D", "XVPICKEV_D", "XVPICKOD_D", -+ "XVILVL_D", "XVILVH_D"] in -+ def : Pat<(deriveLASXIntrinsic.ret -+ (v4i64 LASX256:$xj), (v4i64 LASX256:$xk)), -+ (!cast(Inst) LASX256:$xj, LASX256:$xk)>; -+ -+// vty: v32i8/v16i16/v8i32/v4i64 -+// Pat<(Intrinsic vty:$xd, vty:$xj, vty:$xk), -+// (LAInst vty:$xd, vty:$xj, vty:$xk)>; -+foreach Inst = ["XVMADDWEV_H_B", "XVMADDWOD_H_B", "XVMADDWEV_H_BU", -+ "XVMADDWOD_H_BU", "XVMADDWEV_H_BU_B", "XVMADDWOD_H_BU_B"] in -+ def : Pat<(deriveLASXIntrinsic.ret -+ (v16i16 LASX256:$xd), (v32i8 LASX256:$xj), (v32i8 LASX256:$xk)), -+ (!cast(Inst) LASX256:$xd, LASX256:$xj, LASX256:$xk)>; -+foreach Inst = ["XVMADDWEV_W_H", "XVMADDWOD_W_H", "XVMADDWEV_W_HU", -+ "XVMADDWOD_W_HU", "XVMADDWEV_W_HU_H", "XVMADDWOD_W_HU_H"] in -+ def : Pat<(deriveLASXIntrinsic.ret -+ (v8i32 LASX256:$xd), (v16i16 LASX256:$xj), (v16i16 LASX256:$xk)), -+ (!cast(Inst) LASX256:$xd, LASX256:$xj, LASX256:$xk)>; -+foreach Inst = ["XVMADDWEV_D_W", "XVMADDWOD_D_W", "XVMADDWEV_D_WU", -+ "XVMADDWOD_D_WU", "XVMADDWEV_D_WU_W", "XVMADDWOD_D_WU_W"] in -+ def : Pat<(deriveLASXIntrinsic.ret -+ (v4i64 LASX256:$xd), (v8i32 LASX256:$xj), (v8i32 LASX256:$xk)), -+ (!cast(Inst) LASX256:$xd, LASX256:$xj, LASX256:$xk)>; -+foreach Inst = ["XVMADDWEV_Q_D", "XVMADDWOD_Q_D", "XVMADDWEV_Q_DU", -+ "XVMADDWOD_Q_DU", "XVMADDWEV_Q_DU_D", "XVMADDWOD_Q_DU_D"] in -+ def : Pat<(deriveLASXIntrinsic.ret -+ (v4i64 LASX256:$xd), (v4i64 LASX256:$xj), (v4i64 LASX256:$xk)), -+ (!cast(Inst) LASX256:$xd, LASX256:$xj, LASX256:$xk)>; -+ -+// vty: v32i8/v16i16/v8i32/v4i64 -+// Pat<(Intrinsic vty:$xj), -+// (LAInst vty:$xj)>; -+foreach Inst = ["XVEXTH_H_B", "XVEXTH_HU_BU", -+ "XVMSKLTZ_B", "XVMSKGEZ_B", "XVMSKNZ_B", -+ "XVCLO_B", "XVCLZ_B", "VEXT2XV_H_B", "VEXT2XV_HU_BU", -+ "VEXT2XV_W_B", "VEXT2XV_WU_BU", "VEXT2XV_D_B", -+ "VEXT2XV_DU_BU", "XVREPLVE0_B", "XVREPLVE0_Q"] in -+ def : Pat<(deriveLASXIntrinsic.ret (v32i8 LASX256:$xj)), -+ (!cast(Inst) LASX256:$xj)>; -+foreach Inst = ["XVEXTH_W_H", "XVEXTH_WU_HU", "XVMSKLTZ_H", -+ "XVCLO_H", "XVCLZ_H", "XVFCVTL_S_H", "XVFCVTH_S_H", -+ "VEXT2XV_W_H", "VEXT2XV_WU_HU", "VEXT2XV_D_H", -+ "VEXT2XV_DU_HU", "XVREPLVE0_H"] in -+ def : Pat<(deriveLASXIntrinsic.ret (v16i16 LASX256:$xj)), -+ (!cast(Inst) LASX256:$xj)>; -+foreach Inst = ["XVEXTH_D_W", "XVEXTH_DU_WU", "XVMSKLTZ_W", -+ "XVCLO_W", "XVCLZ_W", "XVFFINT_S_W", "XVFFINT_S_WU", -+ "XVFFINTL_D_W", "XVFFINTH_D_W", -+ "VEXT2XV_D_W", "VEXT2XV_DU_WU", "XVREPLVE0_W"] in -+ def : Pat<(deriveLASXIntrinsic.ret (v8i32 LASX256:$xj)), -+ (!cast(Inst) LASX256:$xj)>; -+foreach Inst = ["XVEXTH_Q_D", "XVEXTH_QU_DU", "XVMSKLTZ_D", -+ "XVEXTL_Q_D", "XVEXTL_QU_DU", -+ "XVCLO_D", "XVCLZ_D", "XVFFINT_D_L", "XVFFINT_D_LU", -+ "XVREPLVE0_D"] in -+ def : Pat<(deriveLASXIntrinsic.ret (v4i64 LASX256:$xj)), -+ (!cast(Inst) LASX256:$xj)>; -+ -+// Pat<(Intrinsic timm:$imm) -+// (LAInst timm:$imm)>; -+def : Pat<(int_loongarch_lasx_xvldi timm:$imm), -+ (XVLDI (to_valide_timm timm:$imm))>; -+foreach Inst = ["XVREPLI_B", "XVREPLI_H", "XVREPLI_W", "XVREPLI_D"] in -+ def : Pat<(deriveLASXIntrinsic.ret timm:$imm), -+ (!cast("Pseudo"#Inst) (to_valide_timm timm:$imm))>; -+ -+// vty: v32i8/v16i16/v8i32/v4i64 -+// Pat<(Intrinsic vty:$xj, timm:$imm) -+// (LAInst vty:$xj, timm:$imm)>; -+foreach Inst = ["XVSAT_B", "XVSAT_BU", "XVNORI_B", "XVROTRI_B", "XVSLLWIL_H_B", -+ "XVSLLWIL_HU_BU", "XVSRLRI_B", "XVSRARI_B", -+ "XVSEQI_B", "XVSLEI_B", "XVSLEI_BU", "XVSLTI_B", "XVSLTI_BU", -+ "XVREPL128VEI_B", "XVBSLL_V", "XVBSRL_V", "XVSHUF4I_B"] in -+ def : Pat<(deriveLASXIntrinsic.ret (v32i8 LASX256:$xj), timm:$imm), -+ (!cast(Inst) LASX256:$xj, (to_valide_timm timm:$imm))>; -+foreach Inst = ["XVSAT_H", "XVSAT_HU", "XVROTRI_H", "XVSLLWIL_W_H", -+ "XVSLLWIL_WU_HU", "XVSRLRI_H", "XVSRARI_H", -+ "XVSEQI_H", "XVSLEI_H", "XVSLEI_HU", "XVSLTI_H", "XVSLTI_HU", -+ "XVREPL128VEI_H", "XVSHUF4I_H"] in -+ def : Pat<(deriveLASXIntrinsic.ret (v16i16 LASX256:$xj), timm:$imm), -+ (!cast(Inst) LASX256:$xj, (to_valide_timm timm:$imm))>; -+foreach Inst = ["XVSAT_W", "XVSAT_WU", "XVROTRI_W", "XVSLLWIL_D_W", -+ "XVSLLWIL_DU_WU", "XVSRLRI_W", "XVSRARI_W", -+ "XVSEQI_W", "XVSLEI_W", "XVSLEI_WU", "XVSLTI_W", "XVSLTI_WU", -+ "XVREPL128VEI_W", "XVSHUF4I_W", "XVPICKVE_W"] in -+ def : Pat<(deriveLASXIntrinsic.ret (v8i32 LASX256:$xj), timm:$imm), -+ (!cast(Inst) LASX256:$xj, (to_valide_timm timm:$imm))>; -+foreach Inst = ["XVSAT_D", "XVSAT_DU", "XVROTRI_D", "XVSRLRI_D", "XVSRARI_D", -+ "XVSEQI_D", "XVSLEI_D", "XVSLEI_DU", "XVSLTI_D", "XVSLTI_DU", -+ "XVPICKVE2GR_D", "XVPICKVE2GR_DU", -+ "XVREPL128VEI_D", "XVPERMI_D", "XVPICKVE_D"] in -+ def : Pat<(deriveLASXIntrinsic.ret (v4i64 LASX256:$xj), timm:$imm), -+ (!cast(Inst) LASX256:$xj, (to_valide_timm timm:$imm))>; -+ -+// vty: v32i8/v16i16/v8i32/v4i64 -+// Pat<(Intrinsic vty:$xd, vty:$xj, timm:$imm) -+// (LAInst vty:$xd, vty:$xj, timm:$imm)>; -+foreach Inst = ["XVSRLNI_B_H", "XVSRANI_B_H", "XVSRLRNI_B_H", "XVSRARNI_B_H", -+ "XVSSRLNI_B_H", "XVSSRANI_B_H", "XVSSRLNI_BU_H", "XVSSRANI_BU_H", -+ "XVSSRLRNI_B_H", "XVSSRARNI_B_H", "XVSSRLRNI_BU_H", "XVSSRARNI_BU_H", -+ "XVFRSTPI_B", "XVBITSELI_B", "XVEXTRINS_B", "XVPERMI_Q"] in -+ def : Pat<(deriveLASXIntrinsic.ret -+ (v32i8 LASX256:$xd), (v32i8 LASX256:$xj), timm:$imm), -+ (!cast(Inst) LASX256:$xd, LASX256:$xj, -+ (to_valide_timm timm:$imm))>; -+foreach Inst = ["XVSRLNI_H_W", "XVSRANI_H_W", "XVSRLRNI_H_W", "XVSRARNI_H_W", -+ "XVSSRLNI_H_W", "XVSSRANI_H_W", "XVSSRLNI_HU_W", "XVSSRANI_HU_W", -+ "XVSSRLRNI_H_W", "XVSSRARNI_H_W", "XVSSRLRNI_HU_W", "XVSSRARNI_HU_W", -+ "XVFRSTPI_H", "XVEXTRINS_H"] in -+ def : Pat<(deriveLASXIntrinsic.ret -+ (v16i16 LASX256:$xd), (v16i16 LASX256:$xj), timm:$imm), -+ (!cast(Inst) LASX256:$xd, LASX256:$xj, -+ (to_valide_timm timm:$imm))>; -+foreach Inst = ["XVSRLNI_W_D", "XVSRANI_W_D", "XVSRLRNI_W_D", "XVSRARNI_W_D", -+ "XVSSRLNI_W_D", "XVSSRANI_W_D", "XVSSRLNI_WU_D", "XVSSRANI_WU_D", -+ "XVSSRLRNI_W_D", "XVSSRARNI_W_D", "XVSSRLRNI_WU_D", "XVSSRARNI_WU_D", -+ "XVPERMI_W", "XVEXTRINS_W", "XVINSVE0_W"] in -+ def : Pat<(deriveLASXIntrinsic.ret -+ (v8i32 LASX256:$xd), (v8i32 LASX256:$xj), timm:$imm), -+ (!cast(Inst) LASX256:$xd, LASX256:$xj, -+ (to_valide_timm timm:$imm))>; -+foreach Inst = ["XVSRLNI_D_Q", "XVSRANI_D_Q", "XVSRLRNI_D_Q", "XVSRARNI_D_Q", -+ "XVSSRLNI_D_Q", "XVSSRANI_D_Q", "XVSSRLNI_DU_Q", "XVSSRANI_DU_Q", -+ "XVSSRLRNI_D_Q", "XVSSRARNI_D_Q", "XVSSRLRNI_DU_Q", "XVSSRARNI_DU_Q", -+ "XVSHUF4I_D", "XVEXTRINS_D", "XVINSVE0_D"] in -+ def : Pat<(deriveLASXIntrinsic.ret -+ (v4i64 LASX256:$xd), (v4i64 LASX256:$xj), timm:$imm), -+ (!cast(Inst) LASX256:$xd, LASX256:$xj, -+ (to_valide_timm timm:$imm))>; -+ -+// vty: v32i8/v16i16/v8i32/v4i64 -+// Pat<(Intrinsic vty:$xd, vty:$xj, vty:$xk), -+// (LAInst vty:$xd, vty:$xj, vty:$xk)>; -+foreach Inst = ["XVFRSTP_B", "XVBITSEL_V", "XVSHUF_B"] in -+ def : Pat<(deriveLASXIntrinsic.ret -+ (v32i8 LASX256:$xd), (v32i8 LASX256:$xj), (v32i8 LASX256:$xk)), -+ (!cast(Inst) LASX256:$xd, LASX256:$xj, LASX256:$xk)>; -+foreach Inst = ["XVFRSTP_H", "XVSHUF_H"] in -+ def : Pat<(deriveLASXIntrinsic.ret -+ (v16i16 LASX256:$xd), (v16i16 LASX256:$xj), (v16i16 LASX256:$xk)), -+ (!cast(Inst) LASX256:$xd, LASX256:$xj, LASX256:$xk)>; -+def : Pat<(int_loongarch_lasx_xvshuf_w (v8i32 LASX256:$xd), (v8i32 LASX256:$xj), -+ (v8i32 LASX256:$xk)), -+ (XVSHUF_W LASX256:$xd, LASX256:$xj, LASX256:$xk)>; -+def : Pat<(int_loongarch_lasx_xvshuf_d (v4i64 LASX256:$xd), (v4i64 LASX256:$xj), -+ (v4i64 LASX256:$xk)), -+ (XVSHUF_D LASX256:$xd, LASX256:$xj, LASX256:$xk)>; -+ -+// vty: v8f32/v4f64 -+// Pat<(Intrinsic vty:$xj, vty:$xk, vty:$xa), -+// (LAInst vty:$xj, vty:$xk, vty:$xa)>; -+foreach Inst = ["XVFMSUB_S", "XVFNMADD_S", "XVFNMSUB_S"] in -+ def : Pat<(deriveLASXIntrinsic.ret -+ (v8f32 LASX256:$xj), (v8f32 LASX256:$xk), (v8f32 LASX256:$xa)), -+ (!cast(Inst) LASX256:$xj, LASX256:$xk, LASX256:$xa)>; -+foreach Inst = ["XVFMSUB_D", "XVFNMADD_D", "XVFNMSUB_D"] in -+ def : Pat<(deriveLASXIntrinsic.ret -+ (v4f64 LASX256:$xj), (v4f64 LASX256:$xk), (v4f64 LASX256:$xa)), -+ (!cast(Inst) LASX256:$xj, LASX256:$xk, LASX256:$xa)>; -+ -+// vty: v8f32/v4f64 -+// Pat<(Intrinsic vty:$xj, vty:$xk), -+// (LAInst vty:$xj, vty:$xk)>; -+foreach Inst = ["XVFMAX_S", "XVFMIN_S", "XVFMAXA_S", "XVFMINA_S", "XVFCVT_H_S", -+ "XVFCMP_CAF_S", "XVFCMP_CUN_S", "XVFCMP_CEQ_S", "XVFCMP_CUEQ_S", -+ "XVFCMP_CLT_S", "XVFCMP_CULT_S", "XVFCMP_CLE_S", "XVFCMP_CULE_S", -+ "XVFCMP_CNE_S", "XVFCMP_COR_S", "XVFCMP_CUNE_S", -+ "XVFCMP_SAF_S", "XVFCMP_SUN_S", "XVFCMP_SEQ_S", "XVFCMP_SUEQ_S", -+ "XVFCMP_SLT_S", "XVFCMP_SULT_S", "XVFCMP_SLE_S", "XVFCMP_SULE_S", -+ "XVFCMP_SNE_S", "XVFCMP_SOR_S", "XVFCMP_SUNE_S"] in -+ def : Pat<(deriveLASXIntrinsic.ret -+ (v8f32 LASX256:$xj), (v8f32 LASX256:$xk)), -+ (!cast(Inst) LASX256:$xj, LASX256:$xk)>; -+foreach Inst = ["XVFMAX_D", "XVFMIN_D", "XVFMAXA_D", "XVFMINA_D", "XVFCVT_S_D", -+ "XVFTINTRNE_W_D", "XVFTINTRZ_W_D", "XVFTINTRP_W_D", "XVFTINTRM_W_D", -+ "XVFTINT_W_D", -+ "XVFCMP_CAF_D", "XVFCMP_CUN_D", "XVFCMP_CEQ_D", "XVFCMP_CUEQ_D", -+ "XVFCMP_CLT_D", "XVFCMP_CULT_D", "XVFCMP_CLE_D", "XVFCMP_CULE_D", -+ "XVFCMP_CNE_D", "XVFCMP_COR_D", "XVFCMP_CUNE_D", -+ "XVFCMP_SAF_D", "XVFCMP_SUN_D", "XVFCMP_SEQ_D", "XVFCMP_SUEQ_D", -+ "XVFCMP_SLT_D", "XVFCMP_SULT_D", "XVFCMP_SLE_D", "XVFCMP_SULE_D", -+ "XVFCMP_SNE_D", "XVFCMP_SOR_D", "XVFCMP_SUNE_D"] in -+ def : Pat<(deriveLASXIntrinsic.ret -+ (v4f64 LASX256:$xj), (v4f64 LASX256:$xk)), -+ (!cast(Inst) LASX256:$xj, LASX256:$xk)>; -+ -+// vty: v8f32/v4f64 -+// Pat<(Intrinsic vty:$xj), -+// (LAInst vty:$xj)>; -+foreach Inst = ["XVFLOGB_S", "XVFCLASS_S", "XVFSQRT_S", "XVFRECIP_S", "XVFRSQRT_S", -+ "XVFRINT_S", "XVFCVTL_D_S", "XVFCVTH_D_S", -+ "XVFRINTRNE_S", "XVFRINTRZ_S", "XVFRINTRP_S", "XVFRINTRM_S", -+ "XVFTINTRNE_W_S", "XVFTINTRZ_W_S", "XVFTINTRP_W_S", "XVFTINTRM_W_S", -+ "XVFTINT_W_S", "XVFTINTRZ_WU_S", "XVFTINT_WU_S", -+ "XVFTINTRNEL_L_S", "XVFTINTRNEH_L_S", "XVFTINTRZL_L_S", -+ "XVFTINTRZH_L_S", "XVFTINTRPL_L_S", "XVFTINTRPH_L_S", -+ "XVFTINTRML_L_S", "XVFTINTRMH_L_S", "XVFTINTL_L_S", -+ "XVFTINTH_L_S"] in -+ def : Pat<(deriveLASXIntrinsic.ret (v8f32 LASX256:$xj)), -+ (!cast(Inst) LASX256:$xj)>; -+foreach Inst = ["XVFLOGB_D", "XVFCLASS_D", "XVFSQRT_D", "XVFRECIP_D", "XVFRSQRT_D", -+ "XVFRINT_D", -+ "XVFRINTRNE_D", "XVFRINTRZ_D", "XVFRINTRP_D", "XVFRINTRM_D", -+ "XVFTINTRNE_L_D", "XVFTINTRZ_L_D", "XVFTINTRP_L_D", "XVFTINTRM_L_D", -+ "XVFTINT_L_D", "XVFTINTRZ_LU_D", "XVFTINT_LU_D"] in -+ def : Pat<(deriveLASXIntrinsic.ret (v4f64 LASX256:$xj)), -+ (!cast(Inst) LASX256:$xj)>; -+ -+def : Pat<(int_loongarch_lasx_xvpickve_w_f v8f32:$xj, timm:$imm), -+ (XVPICKVE_W v8f32:$xj, (to_valide_timm timm:$imm))>; -+def : Pat<(int_loongarch_lasx_xvpickve_d_f v4f64:$xj, timm:$imm), -+ (XVPICKVE_D v4f64:$xj, (to_valide_timm timm:$imm))>; -+ -+// load -+def : Pat<(int_loongarch_lasx_xvld GPR:$rj, timm:$imm), -+ (XVLD GPR:$rj, (to_valide_timm timm:$imm))>; -+def : Pat<(int_loongarch_lasx_xvldx GPR:$rj, GPR:$rk), -+ (XVLDX GPR:$rj, GPR:$rk)>; -+ -+def : Pat<(int_loongarch_lasx_xvldrepl_b GPR:$rj, timm:$imm), -+ (XVLDREPL_B GPR:$rj, (to_valide_timm timm:$imm))>; -+def : Pat<(int_loongarch_lasx_xvldrepl_h GPR:$rj, timm:$imm), -+ (XVLDREPL_H GPR:$rj, (to_valide_timm timm:$imm))>; -+def : Pat<(int_loongarch_lasx_xvldrepl_w GPR:$rj, timm:$imm), -+ (XVLDREPL_W GPR:$rj, (to_valide_timm timm:$imm))>; -+def : Pat<(int_loongarch_lasx_xvldrepl_d GPR:$rj, timm:$imm), -+ (XVLDREPL_D GPR:$rj, (to_valide_timm timm:$imm))>; -+ -+// store -+def : Pat<(int_loongarch_lasx_xvst LASX256:$xd, GPR:$rj, timm:$imm), -+ (XVST LASX256:$xd, GPR:$rj, (to_valide_timm timm:$imm))>; -+def : Pat<(int_loongarch_lasx_xvstx LASX256:$xd, GPR:$rj, GPR:$rk), -+ (XVSTX LASX256:$xd, GPR:$rj, GPR:$rk)>; -+ -+def : Pat<(int_loongarch_lasx_xvstelm_b v32i8:$xd, GPR:$rj, timm:$imm, timm:$idx), -+ (XVSTELM_B v32i8:$xd, GPR:$rj, (to_valide_timm timm:$imm), -+ (to_valide_timm timm:$idx))>; -+def : Pat<(int_loongarch_lasx_xvstelm_h v16i16:$xd, GPR:$rj, timm:$imm, timm:$idx), -+ (XVSTELM_H v16i16:$xd, GPR:$rj, (to_valide_timm timm:$imm), -+ (to_valide_timm timm:$idx))>; -+def : Pat<(int_loongarch_lasx_xvstelm_w v8i32:$xd, GPR:$rj, timm:$imm, timm:$idx), -+ (XVSTELM_W v8i32:$xd, GPR:$rj, (to_valide_timm timm:$imm), -+ (to_valide_timm timm:$idx))>; -+def : Pat<(int_loongarch_lasx_xvstelm_d v4i64:$xd, GPR:$rj, timm:$imm, timm:$idx), -+ (XVSTELM_D v4i64:$xd, GPR:$rj, (to_valide_timm timm:$imm), -+ (to_valide_timm timm:$idx))>; -+ - } // Predicates = [HasExtLASX] --- -2.20.1 - - -From 76928242b8b8e6228d1b1ec80c69b61c94d6ec79 Mon Sep 17 00:00:00 2001 -From: chenli -Date: Sat, 19 Aug 2023 17:10:41 +0800 -Subject: [PATCH 04/35] [LoongArch] Add LSX intrinsic testcases - -Depends on D155829 - -Reviewed By: SixWeining - -Differential Revision: https://reviews.llvm.org/D155834 - -(cherry picked from commit f3aa4416319aed198841401c6c9dc2e49afe2507) ---- - .../CodeGen/LoongArch/lsx/intrinsic-absd.ll | 98 ++++ - .../CodeGen/LoongArch/lsx/intrinsic-add.ll | 62 ++ - .../CodeGen/LoongArch/lsx/intrinsic-adda.ll | 50 ++ - .../CodeGen/LoongArch/lsx/intrinsic-addi.ll | 50 ++ - .../CodeGen/LoongArch/lsx/intrinsic-addw.ll | 290 ++++++++++ - .../CodeGen/LoongArch/lsx/intrinsic-and.ll | 14 + - .../CodeGen/LoongArch/lsx/intrinsic-andi.ll | 14 + - .../CodeGen/LoongArch/lsx/intrinsic-andn.ll | 14 + - .../CodeGen/LoongArch/lsx/intrinsic-avg.ll | 98 ++++ - .../CodeGen/LoongArch/lsx/intrinsic-avgr.ll | 98 ++++ - .../CodeGen/LoongArch/lsx/intrinsic-bitclr.ll | 98 ++++ - .../CodeGen/LoongArch/lsx/intrinsic-bitrev.ll | 98 ++++ - .../CodeGen/LoongArch/lsx/intrinsic-bitsel.ll | 14 + - .../LoongArch/lsx/intrinsic-bitseli.ll | 14 + - .../CodeGen/LoongArch/lsx/intrinsic-bitset.ll | 98 ++++ - .../CodeGen/LoongArch/lsx/intrinsic-bsll.ll | 14 + - .../CodeGen/LoongArch/lsx/intrinsic-bsrl.ll | 14 + - .../CodeGen/LoongArch/lsx/intrinsic-clo.ll | 50 ++ - .../CodeGen/LoongArch/lsx/intrinsic-clz.ll | 50 ++ - .../CodeGen/LoongArch/lsx/intrinsic-div.ll | 98 ++++ - .../CodeGen/LoongArch/lsx/intrinsic-exth.ll | 98 ++++ - .../CodeGen/LoongArch/lsx/intrinsic-extl.ll | 26 + - .../LoongArch/lsx/intrinsic-extrins.ll | 50 ++ - .../CodeGen/LoongArch/lsx/intrinsic-fadd.ll | 26 + - .../CodeGen/LoongArch/lsx/intrinsic-fclass.ll | 26 + - .../CodeGen/LoongArch/lsx/intrinsic-fcmp.ll | 530 ++++++++++++++++++ - .../CodeGen/LoongArch/lsx/intrinsic-fcvt.ll | 26 + - .../CodeGen/LoongArch/lsx/intrinsic-fcvth.ll | 26 + - .../CodeGen/LoongArch/lsx/intrinsic-fcvtl.ll | 26 + - .../CodeGen/LoongArch/lsx/intrinsic-fdiv.ll | 26 + - .../CodeGen/LoongArch/lsx/intrinsic-ffint.ll | 86 +++ - .../CodeGen/LoongArch/lsx/intrinsic-flogb.ll | 26 + - .../CodeGen/LoongArch/lsx/intrinsic-fmadd.ll | 26 + - .../CodeGen/LoongArch/lsx/intrinsic-fmax.ll | 26 + - .../CodeGen/LoongArch/lsx/intrinsic-fmaxa.ll | 26 + - .../CodeGen/LoongArch/lsx/intrinsic-fmin.ll | 26 + - .../CodeGen/LoongArch/lsx/intrinsic-fmina.ll | 26 + - .../CodeGen/LoongArch/lsx/intrinsic-fmsub.ll | 26 + - .../CodeGen/LoongArch/lsx/intrinsic-fmul.ll | 26 + - .../CodeGen/LoongArch/lsx/intrinsic-fnmadd.ll | 26 + - .../CodeGen/LoongArch/lsx/intrinsic-fnmsub.ll | 26 + - .../CodeGen/LoongArch/lsx/intrinsic-frecip.ll | 26 + - .../CodeGen/LoongArch/lsx/intrinsic-frint.ll | 122 ++++ - .../CodeGen/LoongArch/lsx/intrinsic-frsqrt.ll | 26 + - .../CodeGen/LoongArch/lsx/intrinsic-frstp.ll | 50 ++ - .../CodeGen/LoongArch/lsx/intrinsic-fsqrt.ll | 26 + - .../CodeGen/LoongArch/lsx/intrinsic-fsub.ll | 26 + - .../CodeGen/LoongArch/lsx/intrinsic-ftint.ll | 350 ++++++++++++ - .../CodeGen/LoongArch/lsx/intrinsic-haddw.ll | 98 ++++ - .../CodeGen/LoongArch/lsx/intrinsic-hsubw.ll | 98 ++++ - .../CodeGen/LoongArch/lsx/intrinsic-ilv.ll | 98 ++++ - .../LoongArch/lsx/intrinsic-insgr2vr.ll | 54 ++ - .../CodeGen/LoongArch/lsx/intrinsic-ld.ll | 26 + - .../CodeGen/LoongArch/lsx/intrinsic-ldi.ll | 62 ++ - .../CodeGen/LoongArch/lsx/intrinsic-ldrepl.ll | 50 ++ - .../CodeGen/LoongArch/lsx/intrinsic-madd.ll | 50 ++ - .../CodeGen/LoongArch/lsx/intrinsic-maddw.ll | 290 ++++++++++ - .../CodeGen/LoongArch/lsx/intrinsic-max.ll | 194 +++++++ - .../CodeGen/LoongArch/lsx/intrinsic-min.ll | 194 +++++++ - .../CodeGen/LoongArch/lsx/intrinsic-mod.ll | 98 ++++ - .../CodeGen/LoongArch/lsx/intrinsic-mskgez.ll | 14 + - .../CodeGen/LoongArch/lsx/intrinsic-mskltz.ll | 50 ++ - .../CodeGen/LoongArch/lsx/intrinsic-msknz.ll | 14 + - .../CodeGen/LoongArch/lsx/intrinsic-msub.ll | 50 ++ - .../CodeGen/LoongArch/lsx/intrinsic-muh.ll | 98 ++++ - .../CodeGen/LoongArch/lsx/intrinsic-mul.ll | 50 ++ - .../CodeGen/LoongArch/lsx/intrinsic-mulw.ll | 290 ++++++++++ - .../CodeGen/LoongArch/lsx/intrinsic-neg.ll | 50 ++ - .../CodeGen/LoongArch/lsx/intrinsic-nor.ll | 14 + - .../CodeGen/LoongArch/lsx/intrinsic-nori.ll | 14 + - .../CodeGen/LoongArch/lsx/intrinsic-or.ll | 14 + - .../CodeGen/LoongArch/lsx/intrinsic-ori.ll | 14 + - .../CodeGen/LoongArch/lsx/intrinsic-orn.ll | 14 + - .../CodeGen/LoongArch/lsx/intrinsic-pack.ll | 98 ++++ - .../CodeGen/LoongArch/lsx/intrinsic-pcnt.ll | 50 ++ - .../CodeGen/LoongArch/lsx/intrinsic-permi.ll | 14 + - .../CodeGen/LoongArch/lsx/intrinsic-pick.ll | 98 ++++ - .../LoongArch/lsx/intrinsic-pickve2gr.ll | 98 ++++ - .../LoongArch/lsx/intrinsic-replgr2vr.ll | 50 ++ - .../CodeGen/LoongArch/lsx/intrinsic-replve.ll | 50 ++ - .../LoongArch/lsx/intrinsic-replvei.ll | 50 ++ - .../CodeGen/LoongArch/lsx/intrinsic-rotr.ll | 98 ++++ - .../CodeGen/LoongArch/lsx/intrinsic-sadd.ll | 98 ++++ - .../CodeGen/LoongArch/lsx/intrinsic-sat.ll | 98 ++++ - .../CodeGen/LoongArch/lsx/intrinsic-seq.ll | 98 ++++ - .../CodeGen/LoongArch/lsx/intrinsic-set.ll | 38 ++ - .../LoongArch/lsx/intrinsic-setallnez.ll | 74 +++ - .../LoongArch/lsx/intrinsic-setanyeqz.ll | 74 +++ - .../CodeGen/LoongArch/lsx/intrinsic-shuf.ll | 50 ++ - .../CodeGen/LoongArch/lsx/intrinsic-shuf4i.ll | 50 ++ - .../LoongArch/lsx/intrinsic-signcov.ll | 50 ++ - .../CodeGen/LoongArch/lsx/intrinsic-sle.ll | 194 +++++++ - .../CodeGen/LoongArch/lsx/intrinsic-sll.ll | 98 ++++ - .../CodeGen/LoongArch/lsx/intrinsic-sllwil.ll | 74 +++ - .../CodeGen/LoongArch/lsx/intrinsic-slt.ll | 194 +++++++ - .../CodeGen/LoongArch/lsx/intrinsic-sra.ll | 98 ++++ - .../CodeGen/LoongArch/lsx/intrinsic-sran.ll | 38 ++ - .../CodeGen/LoongArch/lsx/intrinsic-srani.ll | 50 ++ - .../CodeGen/LoongArch/lsx/intrinsic-srar.ll | 98 ++++ - .../CodeGen/LoongArch/lsx/intrinsic-srarn.ll | 38 ++ - .../CodeGen/LoongArch/lsx/intrinsic-srarni.ll | 50 ++ - .../CodeGen/LoongArch/lsx/intrinsic-srl.ll | 98 ++++ - .../CodeGen/LoongArch/lsx/intrinsic-srln.ll | 38 ++ - .../CodeGen/LoongArch/lsx/intrinsic-srlni.ll | 50 ++ - .../CodeGen/LoongArch/lsx/intrinsic-srlr.ll | 98 ++++ - .../CodeGen/LoongArch/lsx/intrinsic-srlrn.ll | 38 ++ - .../CodeGen/LoongArch/lsx/intrinsic-srlrni.ll | 50 ++ - .../CodeGen/LoongArch/lsx/intrinsic-ssran.ll | 74 +++ - .../CodeGen/LoongArch/lsx/intrinsic-ssrani.ll | 98 ++++ - .../CodeGen/LoongArch/lsx/intrinsic-ssrarn.ll | 74 +++ - .../LoongArch/lsx/intrinsic-ssrarni.ll | 98 ++++ - .../CodeGen/LoongArch/lsx/intrinsic-ssrln.ll | 74 +++ - .../CodeGen/LoongArch/lsx/intrinsic-ssrlni.ll | 98 ++++ - .../CodeGen/LoongArch/lsx/intrinsic-ssrlrn.ll | 74 +++ - .../LoongArch/lsx/intrinsic-ssrlrni.ll | 98 ++++ - .../CodeGen/LoongArch/lsx/intrinsic-ssub.ll | 98 ++++ - .../CodeGen/LoongArch/lsx/intrinsic-st.ll | 26 + - .../CodeGen/LoongArch/lsx/intrinsic-stelm.ll | 50 ++ - .../CodeGen/LoongArch/lsx/intrinsic-sub.ll | 62 ++ - .../CodeGen/LoongArch/lsx/intrinsic-subi.ll | 50 ++ - .../CodeGen/LoongArch/lsx/intrinsic-subw.ll | 194 +++++++ - .../CodeGen/LoongArch/lsx/intrinsic-xor.ll | 14 + - .../CodeGen/LoongArch/lsx/intrinsic-xori.ll | 14 + - 123 files changed, 8902 insertions(+) - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-absd.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-add.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-adda.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-addi.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-addw.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-and.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-andi.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-andn.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-avg.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-avgr.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitclr.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitrev.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitsel.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitseli.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitset.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsll.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsrl.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-clo.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-clz.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-div.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-exth.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-extl.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-extrins.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-fadd.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-fclass.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcmp.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcvt.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcvth.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcvtl.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-fdiv.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ffint.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-flogb.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmadd.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmax.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmaxa.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmin.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmina.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmsub.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmul.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-fnmadd.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-fnmsub.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-frecip.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-frint.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-frsqrt.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-frstp.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-fsqrt.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-fsub.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ftint.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-haddw.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-hsubw.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ilv.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-insgr2vr.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ld.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldi.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldrepl.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-madd.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-maddw.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-max.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-min.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-mod.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-mskgez.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-mskltz.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-msknz.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-msub.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-muh.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-mul.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-mulw.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-neg.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-nor.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-nori.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-or.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ori.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-orn.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-pack.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-pcnt.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-permi.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-pick.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-pickve2gr.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-replgr2vr.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-replve.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-replvei.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-rotr.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-sadd.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-sat.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-seq.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-set.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-setallnez.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-setanyeqz.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf4i.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-signcov.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-sle.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-sll.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-sllwil.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-slt.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-sra.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-sran.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-srani.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-srar.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarn.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarni.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-srl.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-srln.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlni.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlr.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrn.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrni.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssran.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrani.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarn.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarni.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrln.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlni.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrn.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrni.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssub.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-st.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-stelm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-sub.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-subi.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-subw.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-xor.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-xori.ll - -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-absd.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-absd.ll -new file mode 100644 -index 000000000000..811d9d712de4 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-absd.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vabsd.b(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vabsd_b(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vabsd_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vabsd.b $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vabsd.b(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vabsd.h(<8 x i16>, <8 x i16>) -+ -+define <8 x i16> @lsx_vabsd_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vabsd_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vabsd.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vabsd.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vabsd.w(<4 x i32>, <4 x i32>) -+ -+define <4 x i32> @lsx_vabsd_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vabsd_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vabsd.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vabsd.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vabsd.d(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vabsd_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vabsd_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vabsd.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vabsd.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vabsd.bu(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vabsd_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vabsd_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vabsd.bu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vabsd.bu(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vabsd.hu(<8 x i16>, <8 x i16>) -+ -+define <8 x i16> @lsx_vabsd_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vabsd_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vabsd.hu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vabsd.hu(<8 x i16> %va, <8 x i16> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vabsd.wu(<4 x i32>, <4 x i32>) -+ -+define <4 x i32> @lsx_vabsd_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vabsd_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vabsd.wu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vabsd.wu(<4 x i32> %va, <4 x i32> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vabsd.du(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vabsd_du(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vabsd_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vabsd.du $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vabsd.du(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-add.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-add.ll -new file mode 100644 -index 000000000000..fac16c8308da ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-add.ll -@@ -0,0 +1,62 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vadd.b(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vadd_b(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vadd_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vadd.b $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vadd.b(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vadd.h(<8 x i16>, <8 x i16>) -+ -+define <8 x i16> @lsx_vadd_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vadd_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vadd.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vadd.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vadd.w(<4 x i32>, <4 x i32>) -+ -+define <4 x i32> @lsx_vadd_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vadd_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vadd.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vadd.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vadd.d(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vadd_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vadd_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vadd.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vadd.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vadd.q(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vadd_q(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vadd_q: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vadd.q $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vadd.q(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-adda.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-adda.ll -new file mode 100644 -index 000000000000..79be0a184bfb ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-adda.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vadda.b(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vadda_b(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vadda_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vadda.b $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vadda.b(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vadda.h(<8 x i16>, <8 x i16>) -+ -+define <8 x i16> @lsx_vadda_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vadda_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vadda.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vadda.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vadda.w(<4 x i32>, <4 x i32>) -+ -+define <4 x i32> @lsx_vadda_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vadda_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vadda.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vadda.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vadda.d(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vadda_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vadda_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vadda.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vadda.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-addi.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-addi.ll -new file mode 100644 -index 000000000000..b9134e0724fe ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-addi.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vaddi.bu(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vaddi_bu(<16 x i8> %va) nounwind { -+; CHECK-LABEL: lsx_vaddi_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vaddi.bu $vr0, $vr0, 31 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vaddi.bu(<16 x i8> %va, i32 31) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vaddi.hu(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vaddi_hu(<8 x i16> %va) nounwind { -+; CHECK-LABEL: lsx_vaddi_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vaddi.hu $vr0, $vr0, 31 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vaddi.hu(<8 x i16> %va, i32 31) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vaddi.wu(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vaddi_wu(<4 x i32> %va) nounwind { -+; CHECK-LABEL: lsx_vaddi_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vaddi.wu $vr0, $vr0, 31 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vaddi.wu(<4 x i32> %va, i32 31) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vaddi.du(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vaddi_du(<2 x i64> %va) nounwind { -+; CHECK-LABEL: lsx_vaddi_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vaddi.du $vr0, $vr0, 31 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vaddi.du(<2 x i64> %va, i32 31) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-addw.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-addw.ll -new file mode 100644 -index 000000000000..086e3bec12d2 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-addw.ll -@@ -0,0 +1,290 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <8 x i16> @llvm.loongarch.lsx.vaddwev.h.b(<16 x i8>, <16 x i8>) -+ -+define <8 x i16> @lsx_vaddwev_h_b(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vaddwev_h_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vaddwev.h.b $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.b(<16 x i8> %va, <16 x i8> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vaddwev.w.h(<8 x i16>, <8 x i16>) -+ -+define <4 x i32> @lsx_vaddwev_w_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vaddwev_w_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vaddwev.w.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vaddwev.d.w(<4 x i32>, <4 x i32>) -+ -+define <2 x i64> @lsx_vaddwev_d_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vaddwev_d_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vaddwev.d.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vaddwev.q.d(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vaddwev_q_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vaddwev_q_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vaddwev.q.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu(<16 x i8>, <16 x i8>) -+ -+define <8 x i16> @lsx_vaddwev_h_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vaddwev_h_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vaddwev.h.bu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu(<16 x i8> %va, <16 x i8> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu(<8 x i16>, <8 x i16>) -+ -+define <4 x i32> @lsx_vaddwev_w_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vaddwev_w_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vaddwev.w.hu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu(<8 x i16> %va, <8 x i16> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu(<4 x i32>, <4 x i32>) -+ -+define <2 x i64> @lsx_vaddwev_d_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vaddwev_d_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vaddwev.d.wu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu(<4 x i32> %va, <4 x i32> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vaddwev_q_du(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vaddwev_q_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vaddwev.q.du $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu.b(<16 x i8>, <16 x i8>) -+ -+define <8 x i16> @lsx_vaddwev_h_bu_b(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vaddwev_h_bu_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vaddwev.h.bu.b $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu.b(<16 x i8> %va, <16 x i8> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu.h(<8 x i16>, <8 x i16>) -+ -+define <4 x i32> @lsx_vaddwev_w_hu_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vaddwev_w_hu_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vaddwev.w.hu.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu.w(<4 x i32>, <4 x i32>) -+ -+define <2 x i64> @lsx_vaddwev_d_wu_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vaddwev_d_wu_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vaddwev.d.wu.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du.d(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vaddwev_q_du_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vaddwev_q_du_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vaddwev.q.du.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vaddwod.h.b(<16 x i8>, <16 x i8>) -+ -+define <8 x i16> @lsx_vaddwod_h_b(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vaddwod_h_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vaddwod.h.b $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.b(<16 x i8> %va, <16 x i8> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vaddwod.w.h(<8 x i16>, <8 x i16>) -+ -+define <4 x i32> @lsx_vaddwod_w_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vaddwod_w_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vaddwod.w.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vaddwod.d.w(<4 x i32>, <4 x i32>) -+ -+define <2 x i64> @lsx_vaddwod_d_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vaddwod_d_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vaddwod.d.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vaddwod.q.d(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vaddwod_q_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vaddwod_q_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vaddwod.q.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu(<16 x i8>, <16 x i8>) -+ -+define <8 x i16> @lsx_vaddwod_h_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vaddwod_h_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vaddwod.h.bu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu(<16 x i8> %va, <16 x i8> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu(<8 x i16>, <8 x i16>) -+ -+define <4 x i32> @lsx_vaddwod_w_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vaddwod_w_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vaddwod.w.hu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu(<8 x i16> %va, <8 x i16> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu(<4 x i32>, <4 x i32>) -+ -+define <2 x i64> @lsx_vaddwod_d_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vaddwod_d_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vaddwod.d.wu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu(<4 x i32> %va, <4 x i32> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vaddwod_q_du(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vaddwod_q_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vaddwod.q.du $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu.b(<16 x i8>, <16 x i8>) -+ -+define <8 x i16> @lsx_vaddwod_h_bu_b(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vaddwod_h_bu_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vaddwod.h.bu.b $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu.b(<16 x i8> %va, <16 x i8> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu.h(<8 x i16>, <8 x i16>) -+ -+define <4 x i32> @lsx_vaddwod_w_hu_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vaddwod_w_hu_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vaddwod.w.hu.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu.w(<4 x i32>, <4 x i32>) -+ -+define <2 x i64> @lsx_vaddwod_d_wu_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vaddwod_d_wu_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vaddwod.d.wu.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du.d(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vaddwod_q_du_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vaddwod_q_du_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vaddwod.q.du.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-and.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-and.ll -new file mode 100644 -index 000000000000..77496239c3a9 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-and.ll -@@ -0,0 +1,14 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vand.v(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vand_v(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vand_v: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vand.v $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vand.v(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-andi.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-andi.ll -new file mode 100644 -index 000000000000..9a1c38a641d0 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-andi.ll -@@ -0,0 +1,14 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vandi.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vandi_b(<16 x i8> %va) nounwind { -+; CHECK-LABEL: lsx_vandi_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vandi.b $vr0, $vr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vandi.b(<16 x i8> %va, i32 1) -+ ret <16 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-andn.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-andn.ll -new file mode 100644 -index 000000000000..b08c759ecc32 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-andn.ll -@@ -0,0 +1,14 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vandn.v(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vandn_v(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vandn_v: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vandn.v $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vandn.v(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-avg.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-avg.ll -new file mode 100644 -index 000000000000..fb0861f4cd5e ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-avg.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vavg.b(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vavg_b(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vavg_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vavg.b $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vavg.b(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vavg.h(<8 x i16>, <8 x i16>) -+ -+define <8 x i16> @lsx_vavg_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vavg_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vavg.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vavg.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vavg.w(<4 x i32>, <4 x i32>) -+ -+define <4 x i32> @lsx_vavg_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vavg_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vavg.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vavg.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vavg.d(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vavg_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vavg_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vavg.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vavg.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vavg.bu(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vavg_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vavg_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vavg.bu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vavg.bu(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vavg.hu(<8 x i16>, <8 x i16>) -+ -+define <8 x i16> @lsx_vavg_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vavg_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vavg.hu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vavg.hu(<8 x i16> %va, <8 x i16> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vavg.wu(<4 x i32>, <4 x i32>) -+ -+define <4 x i32> @lsx_vavg_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vavg_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vavg.wu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vavg.wu(<4 x i32> %va, <4 x i32> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vavg.du(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vavg_du(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vavg_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vavg.du $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vavg.du(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-avgr.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-avgr.ll -new file mode 100644 -index 000000000000..8bf7d0ed8817 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-avgr.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vavgr.b(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vavgr_b(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vavgr_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vavgr.b $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vavgr.b(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vavgr.h(<8 x i16>, <8 x i16>) -+ -+define <8 x i16> @lsx_vavgr_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vavgr_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vavgr.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vavgr.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vavgr.w(<4 x i32>, <4 x i32>) -+ -+define <4 x i32> @lsx_vavgr_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vavgr_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vavgr.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vavgr.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vavgr.d(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vavgr_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vavgr_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vavgr.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vavgr.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vavgr.bu(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vavgr_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vavgr_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vavgr.bu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vavgr.bu(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vavgr.hu(<8 x i16>, <8 x i16>) -+ -+define <8 x i16> @lsx_vavgr_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vavgr_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vavgr.hu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vavgr.hu(<8 x i16> %va, <8 x i16> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vavgr.wu(<4 x i32>, <4 x i32>) -+ -+define <4 x i32> @lsx_vavgr_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vavgr_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vavgr.wu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vavgr.wu(<4 x i32> %va, <4 x i32> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vavgr.du(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vavgr_du(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vavgr_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vavgr.du $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vavgr.du(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitclr.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitclr.ll -new file mode 100644 -index 000000000000..f5fba6dbb141 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitclr.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vbitclr.b(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vbitclr_b(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vbitclr_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vbitclr.b $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vbitclr.b(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vbitclr.h(<8 x i16>, <8 x i16>) -+ -+define <8 x i16> @lsx_vbitclr_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vbitclr_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vbitclr.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vbitclr.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vbitclr.w(<4 x i32>, <4 x i32>) -+ -+define <4 x i32> @lsx_vbitclr_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vbitclr_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vbitclr.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vbitclr.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vbitclr.d(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vbitclr_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vbitclr_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vbitclr.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vbitclr.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vbitclri.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vbitclri_b(<16 x i8> %va) nounwind { -+; CHECK-LABEL: lsx_vbitclri_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vbitclri.b $vr0, $vr0, 7 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vbitclri.b(<16 x i8> %va, i32 7) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vbitclri.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vbitclri_h(<8 x i16> %va) nounwind { -+; CHECK-LABEL: lsx_vbitclri_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vbitclri.h $vr0, $vr0, 15 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vbitclri.h(<8 x i16> %va, i32 15) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vbitclri.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vbitclri_w(<4 x i32> %va) nounwind { -+; CHECK-LABEL: lsx_vbitclri_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vbitclri.w $vr0, $vr0, 31 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vbitclri.w(<4 x i32> %va, i32 31) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vbitclri.d(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vbitclri_d(<2 x i64> %va) nounwind { -+; CHECK-LABEL: lsx_vbitclri_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vbitclri.d $vr0, $vr0, 63 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vbitclri.d(<2 x i64> %va, i32 63) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitrev.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitrev.ll -new file mode 100644 -index 000000000000..ad56e88fdb88 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitrev.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vbitrev.b(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vbitrev_b(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vbitrev_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vbitrev.b $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vbitrev.b(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vbitrev.h(<8 x i16>, <8 x i16>) -+ -+define <8 x i16> @lsx_vbitrev_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vbitrev_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vbitrev.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vbitrev.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vbitrev.w(<4 x i32>, <4 x i32>) -+ -+define <4 x i32> @lsx_vbitrev_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vbitrev_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vbitrev.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vbitrev.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vbitrev.d(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vbitrev_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vbitrev_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vbitrev.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vbitrev.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vbitrevi.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vbitrevi_b(<16 x i8> %va) nounwind { -+; CHECK-LABEL: lsx_vbitrevi_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vbitrevi.b $vr0, $vr0, 7 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vbitrevi.b(<16 x i8> %va, i32 7) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vbitrevi.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vbitrevi_h(<8 x i16> %va) nounwind { -+; CHECK-LABEL: lsx_vbitrevi_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vbitrevi.h $vr0, $vr0, 15 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vbitrevi.h(<8 x i16> %va, i32 15) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vbitrevi.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vbitrevi_w(<4 x i32> %va) nounwind { -+; CHECK-LABEL: lsx_vbitrevi_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vbitrevi.w $vr0, $vr0, 31 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vbitrevi.w(<4 x i32> %va, i32 31) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vbitrevi.d(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vbitrevi_d(<2 x i64> %va) nounwind { -+; CHECK-LABEL: lsx_vbitrevi_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vbitrevi.d $vr0, $vr0, 63 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vbitrevi.d(<2 x i64> %va, i32 63) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitsel.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitsel.ll -new file mode 100644 -index 000000000000..4b4b5ff1fc8c ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitsel.ll -@@ -0,0 +1,14 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vbitsel.v(<16 x i8>, <16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vbitsel_v(<16 x i8> %va, <16 x i8> %vb, <16 x i8> %vc) nounwind { -+; CHECK-LABEL: lsx_vbitsel_v: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vbitsel.v $vr0, $vr0, $vr1, $vr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vbitsel.v(<16 x i8> %va, <16 x i8> %vb, <16 x i8> %vc) -+ ret <16 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitseli.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitseli.ll -new file mode 100644 -index 000000000000..28d342b5c378 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitseli.ll -@@ -0,0 +1,14 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vbitseli.b(<16 x i8>, <16 x i8>, i32) -+ -+define <16 x i8> @lsx_vbitseli_b(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vbitseli_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vbitseli.b $vr0, $vr1, 255 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vbitseli.b(<16 x i8> %va, <16 x i8> %vb, i32 255) -+ ret <16 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitset.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitset.ll -new file mode 100644 -index 000000000000..75d98e6f8bce ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitset.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vbitset.b(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vbitset_b(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vbitset_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vbitset.b $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vbitset.b(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vbitset.h(<8 x i16>, <8 x i16>) -+ -+define <8 x i16> @lsx_vbitset_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vbitset_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vbitset.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vbitset.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vbitset.w(<4 x i32>, <4 x i32>) -+ -+define <4 x i32> @lsx_vbitset_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vbitset_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vbitset.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vbitset.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vbitset.d(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vbitset_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vbitset_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vbitset.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vbitset.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vbitseti.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vbitseti_b(<16 x i8> %va) nounwind { -+; CHECK-LABEL: lsx_vbitseti_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vbitseti.b $vr0, $vr0, 7 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vbitseti.b(<16 x i8> %va, i32 7) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vbitseti.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vbitseti_h(<8 x i16> %va) nounwind { -+; CHECK-LABEL: lsx_vbitseti_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vbitseti.h $vr0, $vr0, 15 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vbitseti.h(<8 x i16> %va, i32 15) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vbitseti.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vbitseti_w(<4 x i32> %va) nounwind { -+; CHECK-LABEL: lsx_vbitseti_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vbitseti.w $vr0, $vr0, 31 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vbitseti.w(<4 x i32> %va, i32 31) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vbitseti.d(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vbitseti_d(<2 x i64> %va) nounwind { -+; CHECK-LABEL: lsx_vbitseti_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vbitseti.d $vr0, $vr0, 63 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vbitseti.d(<2 x i64> %va, i32 63) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsll.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsll.ll -new file mode 100644 -index 000000000000..e7eb1cfcb407 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsll.ll -@@ -0,0 +1,14 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vbsll.v(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vbsll_v(<16 x i8> %va) nounwind { -+; CHECK-LABEL: lsx_vbsll_v: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vbsll.v $vr0, $vr0, 31 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vbsll.v(<16 x i8> %va, i32 31) -+ ret <16 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsrl.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsrl.ll -new file mode 100644 -index 000000000000..fe0565297641 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsrl.ll -@@ -0,0 +1,14 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vbsrl.v(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vbsrl_v(<16 x i8> %va) nounwind { -+; CHECK-LABEL: lsx_vbsrl_v: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vbsrl.v $vr0, $vr0, 31 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vbsrl.v(<16 x i8> %va, i32 31) -+ ret <16 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-clo.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-clo.ll -new file mode 100644 -index 000000000000..c581109f3fd0 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-clo.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vclo.b(<16 x i8>) -+ -+define <16 x i8> @lsx_vclo_b(<16 x i8> %va) nounwind { -+; CHECK-LABEL: lsx_vclo_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vclo.b $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vclo.b(<16 x i8> %va) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vclo.h(<8 x i16>) -+ -+define <8 x i16> @lsx_vclo_h(<8 x i16> %va) nounwind { -+; CHECK-LABEL: lsx_vclo_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vclo.h $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vclo.h(<8 x i16> %va) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vclo.w(<4 x i32>) -+ -+define <4 x i32> @lsx_vclo_w(<4 x i32> %va) nounwind { -+; CHECK-LABEL: lsx_vclo_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vclo.w $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vclo.w(<4 x i32> %va) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vclo.d(<2 x i64>) -+ -+define <2 x i64> @lsx_vclo_d(<2 x i64> %va) nounwind { -+; CHECK-LABEL: lsx_vclo_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vclo.d $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vclo.d(<2 x i64> %va) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-clz.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-clz.ll -new file mode 100644 -index 000000000000..25c37b64349b ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-clz.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vclz.b(<16 x i8>) -+ -+define <16 x i8> @lsx_vclz_b(<16 x i8> %va) nounwind { -+; CHECK-LABEL: lsx_vclz_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vclz.b $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vclz.b(<16 x i8> %va) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vclz.h(<8 x i16>) -+ -+define <8 x i16> @lsx_vclz_h(<8 x i16> %va) nounwind { -+; CHECK-LABEL: lsx_vclz_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vclz.h $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vclz.h(<8 x i16> %va) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vclz.w(<4 x i32>) -+ -+define <4 x i32> @lsx_vclz_w(<4 x i32> %va) nounwind { -+; CHECK-LABEL: lsx_vclz_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vclz.w $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vclz.w(<4 x i32> %va) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vclz.d(<2 x i64>) -+ -+define <2 x i64> @lsx_vclz_d(<2 x i64> %va) nounwind { -+; CHECK-LABEL: lsx_vclz_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vclz.d $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vclz.d(<2 x i64> %va) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-div.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-div.ll -new file mode 100644 -index 000000000000..53166e84d269 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-div.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vdiv.b(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vdiv_b(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vdiv_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vdiv.b $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vdiv.b(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vdiv.h(<8 x i16>, <8 x i16>) -+ -+define <8 x i16> @lsx_vdiv_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vdiv_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vdiv.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vdiv.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vdiv.w(<4 x i32>, <4 x i32>) -+ -+define <4 x i32> @lsx_vdiv_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vdiv_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vdiv.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vdiv.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vdiv.d(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vdiv_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vdiv_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vdiv.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vdiv.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vdiv.bu(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vdiv_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vdiv_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vdiv.bu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vdiv.bu(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vdiv.hu(<8 x i16>, <8 x i16>) -+ -+define <8 x i16> @lsx_vdiv_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vdiv_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vdiv.hu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vdiv.hu(<8 x i16> %va, <8 x i16> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vdiv.wu(<4 x i32>, <4 x i32>) -+ -+define <4 x i32> @lsx_vdiv_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vdiv_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vdiv.wu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vdiv.wu(<4 x i32> %va, <4 x i32> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vdiv.du(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vdiv_du(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vdiv_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vdiv.du $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vdiv.du(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-exth.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-exth.ll -new file mode 100644 -index 000000000000..2f3e891a9eef ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-exth.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <8 x i16> @llvm.loongarch.lsx.vexth.h.b(<16 x i8>) -+ -+define <8 x i16> @lsx_vexth_h_b(<16 x i8> %va) nounwind { -+; CHECK-LABEL: lsx_vexth_h_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vexth.h.b $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vexth.h.b(<16 x i8> %va) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vexth.w.h(<8 x i16>) -+ -+define <4 x i32> @lsx_vexth_w_h(<8 x i16> %va) nounwind { -+; CHECK-LABEL: lsx_vexth_w_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vexth.w.h $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vexth.w.h(<8 x i16> %va) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vexth.d.w(<4 x i32>) -+ -+define <2 x i64> @lsx_vexth_d_w(<4 x i32> %va) nounwind { -+; CHECK-LABEL: lsx_vexth_d_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vexth.d.w $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vexth.d.w(<4 x i32> %va) -+ ret <2 x i64> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vexth.q.d(<2 x i64>) -+ -+define <2 x i64> @lsx_vexth_q_d(<2 x i64> %va) nounwind { -+; CHECK-LABEL: lsx_vexth_q_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vexth.q.d $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vexth.q.d(<2 x i64> %va) -+ ret <2 x i64> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vexth.hu.bu(<16 x i8>) -+ -+define <8 x i16> @lsx_vexth_hu_bu(<16 x i8> %va) nounwind { -+; CHECK-LABEL: lsx_vexth_hu_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vexth.hu.bu $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vexth.hu.bu(<16 x i8> %va) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vexth.wu.hu(<8 x i16>) -+ -+define <4 x i32> @lsx_vexth_wu_hu(<8 x i16> %va) nounwind { -+; CHECK-LABEL: lsx_vexth_wu_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vexth.wu.hu $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vexth.wu.hu(<8 x i16> %va) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vexth.du.wu(<4 x i32>) -+ -+define <2 x i64> @lsx_vexth_du_wu(<4 x i32> %va) nounwind { -+; CHECK-LABEL: lsx_vexth_du_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vexth.du.wu $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vexth.du.wu(<4 x i32> %va) -+ ret <2 x i64> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vexth.qu.du(<2 x i64>) -+ -+define <2 x i64> @lsx_vexth_qu_du(<2 x i64> %va) nounwind { -+; CHECK-LABEL: lsx_vexth_qu_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vexth.qu.du $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vexth.qu.du(<2 x i64> %va) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-extl.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-extl.ll -new file mode 100644 -index 000000000000..cbf19e2a3919 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-extl.ll -@@ -0,0 +1,26 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <2 x i64> @llvm.loongarch.lsx.vextl.q.d(<2 x i64>) -+ -+define <2 x i64> @lsx_vextl_q_d(<2 x i64> %va) nounwind { -+; CHECK-LABEL: lsx_vextl_q_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vextl.q.d $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vextl.q.d(<2 x i64> %va) -+ ret <2 x i64> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vextl.qu.du(<2 x i64>) -+ -+define <2 x i64> @lsx_vextl_qu_du(<2 x i64> %va) nounwind { -+; CHECK-LABEL: lsx_vextl_qu_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vextl.qu.du $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vextl.qu.du(<2 x i64> %va) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-extrins.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-extrins.ll -new file mode 100644 -index 000000000000..8f03a2b81291 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-extrins.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vextrins.b(<16 x i8>, <16 x i8>, i32) -+ -+define <16 x i8> @lsx_vextrins_b(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vextrins_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vextrins.b $vr0, $vr1, 255 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vextrins.b(<16 x i8> %va, <16 x i8> %vb, i32 255) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vextrins.h(<8 x i16>, <8 x i16>, i32) -+ -+define <8 x i16> @lsx_vextrins_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vextrins_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vextrins.h $vr0, $vr1, 255 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vextrins.h(<8 x i16> %va, <8 x i16> %vb, i32 255) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vextrins.w(<4 x i32>, <4 x i32>, i32) -+ -+define <4 x i32> @lsx_vextrins_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vextrins_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vextrins.w $vr0, $vr1, 255 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vextrins.w(<4 x i32> %va, <4 x i32> %vb, i32 255) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vextrins.d(<2 x i64>, <2 x i64>, i32) -+ -+define <2 x i64> @lsx_vextrins_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vextrins_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vextrins.d $vr0, $vr1, 255 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vextrins.d(<2 x i64> %va, <2 x i64> %vb, i32 255) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fadd.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fadd.ll -new file mode 100644 -index 000000000000..569002314c92 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fadd.ll -@@ -0,0 +1,26 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <4 x float> @llvm.loongarch.lsx.vfadd.s(<4 x float>, <4 x float>) -+ -+define <4 x float> @lsx_vfadd_s(<4 x float> %va, <4 x float> %vb) nounwind { -+; CHECK-LABEL: lsx_vfadd_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfadd.s $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x float> @llvm.loongarch.lsx.vfadd.s(<4 x float> %va, <4 x float> %vb) -+ ret <4 x float> %res -+} -+ -+declare <2 x double> @llvm.loongarch.lsx.vfadd.d(<2 x double>, <2 x double>) -+ -+define <2 x double> @lsx_vfadd_d(<2 x double> %va, <2 x double> %vb) nounwind { -+; CHECK-LABEL: lsx_vfadd_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfadd.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x double> @llvm.loongarch.lsx.vfadd.d(<2 x double> %va, <2 x double> %vb) -+ ret <2 x double> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fclass.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fclass.ll -new file mode 100644 -index 000000000000..0c6682187101 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fclass.ll -@@ -0,0 +1,26 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <4 x i32> @llvm.loongarch.lsx.vfclass.s(<4 x float>) -+ -+define <4 x i32> @lsx_vfclass_s(<4 x float> %va) nounwind { -+; CHECK-LABEL: lsx_vfclass_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfclass.s $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vfclass.s(<4 x float> %va) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vfclass.d(<2 x double>) -+ -+define <2 x i64> @lsx_vfclass_d(<2 x double> %va) nounwind { -+; CHECK-LABEL: lsx_vfclass_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfclass.d $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vfclass.d(<2 x double> %va) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcmp.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcmp.ll -new file mode 100644 -index 000000000000..669c53b73b16 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcmp.ll -@@ -0,0 +1,530 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <4 x i32> @llvm.loongarch.lsx.vfcmp.caf.s(<4 x float>, <4 x float>) -+ -+define <4 x i32> @lsx_vfcmp_caf_s(<4 x float> %va, <4 x float> %vb) nounwind { -+; CHECK-LABEL: lsx_vfcmp_caf_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfcmp.caf.s $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.caf.s(<4 x float> %va, <4 x float> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vfcmp.caf.d(<2 x double>, <2 x double>) -+ -+define <2 x i64> @lsx_vfcmp_caf_d(<2 x double> %va, <2 x double> %vb) nounwind { -+; CHECK-LABEL: lsx_vfcmp_caf_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfcmp.caf.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.caf.d(<2 x double> %va, <2 x double> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vfcmp.cun.s(<4 x float>, <4 x float>) -+ -+define <4 x i32> @lsx_vfcmp_cun_s(<4 x float> %va, <4 x float> %vb) nounwind { -+; CHECK-LABEL: lsx_vfcmp_cun_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfcmp.cun.s $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.cun.s(<4 x float> %va, <4 x float> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vfcmp.cun.d(<2 x double>, <2 x double>) -+ -+define <2 x i64> @lsx_vfcmp_cun_d(<2 x double> %va, <2 x double> %vb) nounwind { -+; CHECK-LABEL: lsx_vfcmp_cun_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfcmp.cun.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.cun.d(<2 x double> %va, <2 x double> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vfcmp.ceq.s(<4 x float>, <4 x float>) -+ -+define <4 x i32> @lsx_vfcmp_ceq_s(<4 x float> %va, <4 x float> %vb) nounwind { -+; CHECK-LABEL: lsx_vfcmp_ceq_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfcmp.ceq.s $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.ceq.s(<4 x float> %va, <4 x float> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vfcmp.ceq.d(<2 x double>, <2 x double>) -+ -+define <2 x i64> @lsx_vfcmp_ceq_d(<2 x double> %va, <2 x double> %vb) nounwind { -+; CHECK-LABEL: lsx_vfcmp_ceq_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfcmp.ceq.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.ceq.d(<2 x double> %va, <2 x double> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vfcmp.cueq.s(<4 x float>, <4 x float>) -+ -+define <4 x i32> @lsx_vfcmp_cueq_s(<4 x float> %va, <4 x float> %vb) nounwind { -+; CHECK-LABEL: lsx_vfcmp_cueq_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfcmp.cueq.s $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.cueq.s(<4 x float> %va, <4 x float> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vfcmp.cueq.d(<2 x double>, <2 x double>) -+ -+define <2 x i64> @lsx_vfcmp_cueq_d(<2 x double> %va, <2 x double> %vb) nounwind { -+; CHECK-LABEL: lsx_vfcmp_cueq_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfcmp.cueq.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.cueq.d(<2 x double> %va, <2 x double> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vfcmp.clt.s(<4 x float>, <4 x float>) -+ -+define <4 x i32> @lsx_vfcmp_clt_s(<4 x float> %va, <4 x float> %vb) nounwind { -+; CHECK-LABEL: lsx_vfcmp_clt_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfcmp.clt.s $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.clt.s(<4 x float> %va, <4 x float> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vfcmp.clt.d(<2 x double>, <2 x double>) -+ -+define <2 x i64> @lsx_vfcmp_clt_d(<2 x double> %va, <2 x double> %vb) nounwind { -+; CHECK-LABEL: lsx_vfcmp_clt_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfcmp.clt.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.clt.d(<2 x double> %va, <2 x double> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vfcmp.cult.s(<4 x float>, <4 x float>) -+ -+define <4 x i32> @lsx_vfcmp_cult_s(<4 x float> %va, <4 x float> %vb) nounwind { -+; CHECK-LABEL: lsx_vfcmp_cult_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfcmp.cult.s $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.cult.s(<4 x float> %va, <4 x float> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vfcmp.cult.d(<2 x double>, <2 x double>) -+ -+define <2 x i64> @lsx_vfcmp_cult_d(<2 x double> %va, <2 x double> %vb) nounwind { -+; CHECK-LABEL: lsx_vfcmp_cult_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfcmp.cult.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.cult.d(<2 x double> %va, <2 x double> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vfcmp.cle.s(<4 x float>, <4 x float>) -+ -+define <4 x i32> @lsx_vfcmp_cle_s(<4 x float> %va, <4 x float> %vb) nounwind { -+; CHECK-LABEL: lsx_vfcmp_cle_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfcmp.cle.s $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.cle.s(<4 x float> %va, <4 x float> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vfcmp.cle.d(<2 x double>, <2 x double>) -+ -+define <2 x i64> @lsx_vfcmp_cle_d(<2 x double> %va, <2 x double> %vb) nounwind { -+; CHECK-LABEL: lsx_vfcmp_cle_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfcmp.cle.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.cle.d(<2 x double> %va, <2 x double> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vfcmp.cule.s(<4 x float>, <4 x float>) -+ -+define <4 x i32> @lsx_vfcmp_cule_s(<4 x float> %va, <4 x float> %vb) nounwind { -+; CHECK-LABEL: lsx_vfcmp_cule_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfcmp.cule.s $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.cule.s(<4 x float> %va, <4 x float> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vfcmp.cule.d(<2 x double>, <2 x double>) -+ -+define <2 x i64> @lsx_vfcmp_cule_d(<2 x double> %va, <2 x double> %vb) nounwind { -+; CHECK-LABEL: lsx_vfcmp_cule_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfcmp.cule.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.cule.d(<2 x double> %va, <2 x double> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vfcmp.cne.s(<4 x float>, <4 x float>) -+ -+define <4 x i32> @lsx_vfcmp_cne_s(<4 x float> %va, <4 x float> %vb) nounwind { -+; CHECK-LABEL: lsx_vfcmp_cne_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfcmp.cne.s $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.cne.s(<4 x float> %va, <4 x float> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vfcmp.cne.d(<2 x double>, <2 x double>) -+ -+define <2 x i64> @lsx_vfcmp_cne_d(<2 x double> %va, <2 x double> %vb) nounwind { -+; CHECK-LABEL: lsx_vfcmp_cne_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfcmp.cne.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.cne.d(<2 x double> %va, <2 x double> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vfcmp.cor.s(<4 x float>, <4 x float>) -+ -+define <4 x i32> @lsx_vfcmp_cor_s(<4 x float> %va, <4 x float> %vb) nounwind { -+; CHECK-LABEL: lsx_vfcmp_cor_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfcmp.cor.s $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.cor.s(<4 x float> %va, <4 x float> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vfcmp.cor.d(<2 x double>, <2 x double>) -+ -+define <2 x i64> @lsx_vfcmp_cor_d(<2 x double> %va, <2 x double> %vb) nounwind { -+; CHECK-LABEL: lsx_vfcmp_cor_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfcmp.cor.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.cor.d(<2 x double> %va, <2 x double> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vfcmp.cune.s(<4 x float>, <4 x float>) -+ -+define <4 x i32> @lsx_vfcmp_cune_s(<4 x float> %va, <4 x float> %vb) nounwind { -+; CHECK-LABEL: lsx_vfcmp_cune_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfcmp.cune.s $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.cune.s(<4 x float> %va, <4 x float> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vfcmp.cune.d(<2 x double>, <2 x double>) -+ -+define <2 x i64> @lsx_vfcmp_cune_d(<2 x double> %va, <2 x double> %vb) nounwind { -+; CHECK-LABEL: lsx_vfcmp_cune_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfcmp.cune.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.cune.d(<2 x double> %va, <2 x double> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vfcmp.saf.s(<4 x float>, <4 x float>) -+ -+define <4 x i32> @lsx_vfcmp_saf_s(<4 x float> %va, <4 x float> %vb) nounwind { -+; CHECK-LABEL: lsx_vfcmp_saf_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfcmp.saf.s $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.saf.s(<4 x float> %va, <4 x float> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vfcmp.saf.d(<2 x double>, <2 x double>) -+ -+define <2 x i64> @lsx_vfcmp_saf_d(<2 x double> %va, <2 x double> %vb) nounwind { -+; CHECK-LABEL: lsx_vfcmp_saf_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfcmp.saf.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.saf.d(<2 x double> %va, <2 x double> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vfcmp.sun.s(<4 x float>, <4 x float>) -+ -+define <4 x i32> @lsx_vfcmp_sun_s(<4 x float> %va, <4 x float> %vb) nounwind { -+; CHECK-LABEL: lsx_vfcmp_sun_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfcmp.sun.s $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.sun.s(<4 x float> %va, <4 x float> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vfcmp.sun.d(<2 x double>, <2 x double>) -+ -+define <2 x i64> @lsx_vfcmp_sun_d(<2 x double> %va, <2 x double> %vb) nounwind { -+; CHECK-LABEL: lsx_vfcmp_sun_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfcmp.sun.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.sun.d(<2 x double> %va, <2 x double> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vfcmp.seq.s(<4 x float>, <4 x float>) -+ -+define <4 x i32> @lsx_vfcmp_seq_s(<4 x float> %va, <4 x float> %vb) nounwind { -+; CHECK-LABEL: lsx_vfcmp_seq_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfcmp.seq.s $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.seq.s(<4 x float> %va, <4 x float> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vfcmp.seq.d(<2 x double>, <2 x double>) -+ -+define <2 x i64> @lsx_vfcmp_seq_d(<2 x double> %va, <2 x double> %vb) nounwind { -+; CHECK-LABEL: lsx_vfcmp_seq_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfcmp.seq.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.seq.d(<2 x double> %va, <2 x double> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vfcmp.sueq.s(<4 x float>, <4 x float>) -+ -+define <4 x i32> @lsx_vfcmp_sueq_s(<4 x float> %va, <4 x float> %vb) nounwind { -+; CHECK-LABEL: lsx_vfcmp_sueq_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfcmp.sueq.s $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.sueq.s(<4 x float> %va, <4 x float> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vfcmp.sueq.d(<2 x double>, <2 x double>) -+ -+define <2 x i64> @lsx_vfcmp_sueq_d(<2 x double> %va, <2 x double> %vb) nounwind { -+; CHECK-LABEL: lsx_vfcmp_sueq_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfcmp.sueq.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.sueq.d(<2 x double> %va, <2 x double> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vfcmp.slt.s(<4 x float>, <4 x float>) -+ -+define <4 x i32> @lsx_vfcmp_slt_s(<4 x float> %va, <4 x float> %vb) nounwind { -+; CHECK-LABEL: lsx_vfcmp_slt_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfcmp.slt.s $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.slt.s(<4 x float> %va, <4 x float> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vfcmp.slt.d(<2 x double>, <2 x double>) -+ -+define <2 x i64> @lsx_vfcmp_slt_d(<2 x double> %va, <2 x double> %vb) nounwind { -+; CHECK-LABEL: lsx_vfcmp_slt_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfcmp.slt.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.slt.d(<2 x double> %va, <2 x double> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vfcmp.sult.s(<4 x float>, <4 x float>) -+ -+define <4 x i32> @lsx_vfcmp_sult_s(<4 x float> %va, <4 x float> %vb) nounwind { -+; CHECK-LABEL: lsx_vfcmp_sult_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfcmp.sult.s $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.sult.s(<4 x float> %va, <4 x float> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vfcmp.sult.d(<2 x double>, <2 x double>) -+ -+define <2 x i64> @lsx_vfcmp_sult_d(<2 x double> %va, <2 x double> %vb) nounwind { -+; CHECK-LABEL: lsx_vfcmp_sult_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfcmp.sult.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.sult.d(<2 x double> %va, <2 x double> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vfcmp.sle.s(<4 x float>, <4 x float>) -+ -+define <4 x i32> @lsx_vfcmp_sle_s(<4 x float> %va, <4 x float> %vb) nounwind { -+; CHECK-LABEL: lsx_vfcmp_sle_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfcmp.sle.s $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.sle.s(<4 x float> %va, <4 x float> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vfcmp.sle.d(<2 x double>, <2 x double>) -+ -+define <2 x i64> @lsx_vfcmp_sle_d(<2 x double> %va, <2 x double> %vb) nounwind { -+; CHECK-LABEL: lsx_vfcmp_sle_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfcmp.sle.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.sle.d(<2 x double> %va, <2 x double> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vfcmp.sule.s(<4 x float>, <4 x float>) -+ -+define <4 x i32> @lsx_vfcmp_sule_s(<4 x float> %va, <4 x float> %vb) nounwind { -+; CHECK-LABEL: lsx_vfcmp_sule_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfcmp.sule.s $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.sule.s(<4 x float> %va, <4 x float> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vfcmp.sule.d(<2 x double>, <2 x double>) -+ -+define <2 x i64> @lsx_vfcmp_sule_d(<2 x double> %va, <2 x double> %vb) nounwind { -+; CHECK-LABEL: lsx_vfcmp_sule_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfcmp.sule.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.sule.d(<2 x double> %va, <2 x double> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vfcmp.sne.s(<4 x float>, <4 x float>) -+ -+define <4 x i32> @lsx_vfcmp_sne_s(<4 x float> %va, <4 x float> %vb) nounwind { -+; CHECK-LABEL: lsx_vfcmp_sne_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfcmp.sne.s $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.sne.s(<4 x float> %va, <4 x float> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vfcmp.sne.d(<2 x double>, <2 x double>) -+ -+define <2 x i64> @lsx_vfcmp_sne_d(<2 x double> %va, <2 x double> %vb) nounwind { -+; CHECK-LABEL: lsx_vfcmp_sne_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfcmp.sne.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.sne.d(<2 x double> %va, <2 x double> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vfcmp.sor.s(<4 x float>, <4 x float>) -+ -+define <4 x i32> @lsx_vfcmp_sor_s(<4 x float> %va, <4 x float> %vb) nounwind { -+; CHECK-LABEL: lsx_vfcmp_sor_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfcmp.sor.s $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.sor.s(<4 x float> %va, <4 x float> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vfcmp.sor.d(<2 x double>, <2 x double>) -+ -+define <2 x i64> @lsx_vfcmp_sor_d(<2 x double> %va, <2 x double> %vb) nounwind { -+; CHECK-LABEL: lsx_vfcmp_sor_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfcmp.sor.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.sor.d(<2 x double> %va, <2 x double> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vfcmp.sune.s(<4 x float>, <4 x float>) -+ -+define <4 x i32> @lsx_vfcmp_sune_s(<4 x float> %va, <4 x float> %vb) nounwind { -+; CHECK-LABEL: lsx_vfcmp_sune_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfcmp.sune.s $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.sune.s(<4 x float> %va, <4 x float> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vfcmp.sune.d(<2 x double>, <2 x double>) -+ -+define <2 x i64> @lsx_vfcmp_sune_d(<2 x double> %va, <2 x double> %vb) nounwind { -+; CHECK-LABEL: lsx_vfcmp_sune_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfcmp.sune.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.sune.d(<2 x double> %va, <2 x double> %vb) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcvt.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcvt.ll -new file mode 100644 -index 000000000000..a6a151a96d84 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcvt.ll -@@ -0,0 +1,26 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <8 x i16> @llvm.loongarch.lsx.vfcvt.h.s(<4 x float>, <4 x float>) -+ -+define <8 x i16> @lsx_vfcvt_h_s(<4 x float> %va, <4 x float> %vb) nounwind { -+; CHECK-LABEL: lsx_vfcvt_h_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfcvt.h.s $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vfcvt.h.s(<4 x float> %va, <4 x float> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x float> @llvm.loongarch.lsx.vfcvt.s.d(<2 x double>, <2 x double>) -+ -+define <4 x float> @lsx_vfcvt_s_d(<2 x double> %va, <2 x double> %vb) nounwind { -+; CHECK-LABEL: lsx_vfcvt_s_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfcvt.s.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x float> @llvm.loongarch.lsx.vfcvt.s.d(<2 x double> %va, <2 x double> %vb) -+ ret <4 x float> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcvth.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcvth.ll -new file mode 100644 -index 000000000000..a9e4328bd011 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcvth.ll -@@ -0,0 +1,26 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <4 x float> @llvm.loongarch.lsx.vfcvth.s.h(<8 x i16>) -+ -+define <4 x float> @lsx_vfcvth_s_h(<8 x i16> %va) nounwind { -+; CHECK-LABEL: lsx_vfcvth_s_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfcvth.s.h $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x float> @llvm.loongarch.lsx.vfcvth.s.h(<8 x i16> %va) -+ ret <4 x float> %res -+} -+ -+declare <2 x double> @llvm.loongarch.lsx.vfcvth.d.s(<4 x float>) -+ -+define <2 x double> @lsx_vfcvth_d_s(<4 x float> %va) nounwind { -+; CHECK-LABEL: lsx_vfcvth_d_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfcvth.d.s $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x double> @llvm.loongarch.lsx.vfcvth.d.s(<4 x float> %va) -+ ret <2 x double> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcvtl.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcvtl.ll -new file mode 100644 -index 000000000000..9a69964bb227 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcvtl.ll -@@ -0,0 +1,26 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <4 x float> @llvm.loongarch.lsx.vfcvtl.s.h(<8 x i16>) -+ -+define <4 x float> @lsx_vfcvtl_s_h(<8 x i16> %va) nounwind { -+; CHECK-LABEL: lsx_vfcvtl_s_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfcvtl.s.h $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x float> @llvm.loongarch.lsx.vfcvtl.s.h(<8 x i16> %va) -+ ret <4 x float> %res -+} -+ -+declare <2 x double> @llvm.loongarch.lsx.vfcvtl.d.s(<4 x float>) -+ -+define <2 x double> @lsx_vfcvtl_d_s(<4 x float> %va) nounwind { -+; CHECK-LABEL: lsx_vfcvtl_d_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfcvtl.d.s $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x double> @llvm.loongarch.lsx.vfcvtl.d.s(<4 x float> %va) -+ ret <2 x double> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fdiv.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fdiv.ll -new file mode 100644 -index 000000000000..1ca8e5e2c0e9 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fdiv.ll -@@ -0,0 +1,26 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <4 x float> @llvm.loongarch.lsx.vfdiv.s(<4 x float>, <4 x float>) -+ -+define <4 x float> @lsx_vfdiv_s(<4 x float> %va, <4 x float> %vb) nounwind { -+; CHECK-LABEL: lsx_vfdiv_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfdiv.s $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x float> @llvm.loongarch.lsx.vfdiv.s(<4 x float> %va, <4 x float> %vb) -+ ret <4 x float> %res -+} -+ -+declare <2 x double> @llvm.loongarch.lsx.vfdiv.d(<2 x double>, <2 x double>) -+ -+define <2 x double> @lsx_vfdiv_d(<2 x double> %va, <2 x double> %vb) nounwind { -+; CHECK-LABEL: lsx_vfdiv_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfdiv.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x double> @llvm.loongarch.lsx.vfdiv.d(<2 x double> %va, <2 x double> %vb) -+ ret <2 x double> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ffint.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ffint.ll -new file mode 100644 -index 000000000000..62fbcfa339cd ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ffint.ll -@@ -0,0 +1,86 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <4 x float> @llvm.loongarch.lsx.vffint.s.w(<4 x i32>) -+ -+define <4 x float> @lsx_vffint_s_w(<4 x i32> %va) nounwind { -+; CHECK-LABEL: lsx_vffint_s_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vffint.s.w $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x float> @llvm.loongarch.lsx.vffint.s.w(<4 x i32> %va) -+ ret <4 x float> %res -+} -+ -+declare <2 x double> @llvm.loongarch.lsx.vffint.d.l(<2 x i64>) -+ -+define <2 x double> @lsx_vffint_d_l(<2 x i64> %va) nounwind { -+; CHECK-LABEL: lsx_vffint_d_l: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vffint.d.l $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x double> @llvm.loongarch.lsx.vffint.d.l(<2 x i64> %va) -+ ret <2 x double> %res -+} -+ -+declare <4 x float> @llvm.loongarch.lsx.vffint.s.wu(<4 x i32>) -+ -+define <4 x float> @lsx_vffint_s_wu(<4 x i32> %va) nounwind { -+; CHECK-LABEL: lsx_vffint_s_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vffint.s.wu $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x float> @llvm.loongarch.lsx.vffint.s.wu(<4 x i32> %va) -+ ret <4 x float> %res -+} -+ -+declare <2 x double> @llvm.loongarch.lsx.vffint.d.lu(<2 x i64>) -+ -+define <2 x double> @lsx_vffint_d_lu(<2 x i64> %va) nounwind { -+; CHECK-LABEL: lsx_vffint_d_lu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vffint.d.lu $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x double> @llvm.loongarch.lsx.vffint.d.lu(<2 x i64> %va) -+ ret <2 x double> %res -+} -+ -+declare <2 x double> @llvm.loongarch.lsx.vffintl.d.w(<4 x i32>) -+ -+define <2 x double> @lsx_vffintl_d_w(<4 x i32> %va) nounwind { -+; CHECK-LABEL: lsx_vffintl_d_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vffintl.d.w $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x double> @llvm.loongarch.lsx.vffintl.d.w(<4 x i32> %va) -+ ret <2 x double> %res -+} -+ -+declare <2 x double> @llvm.loongarch.lsx.vffinth.d.w(<4 x i32>) -+ -+define <2 x double> @lsx_vffinth_d_w(<4 x i32> %va) nounwind { -+; CHECK-LABEL: lsx_vffinth_d_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vffinth.d.w $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x double> @llvm.loongarch.lsx.vffinth.d.w(<4 x i32> %va) -+ ret <2 x double> %res -+} -+ -+declare <4 x float> @llvm.loongarch.lsx.vffint.s.l(<2 x i64>, <2 x i64>) -+ -+define <4 x float> @lsx_vffint_s_l(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vffint_s_l: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vffint.s.l $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x float> @llvm.loongarch.lsx.vffint.s.l(<2 x i64> %va, <2 x i64> %vb) -+ ret <4 x float> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-flogb.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-flogb.ll -new file mode 100644 -index 000000000000..d8382acc70ed ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-flogb.ll -@@ -0,0 +1,26 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <4 x float> @llvm.loongarch.lsx.vflogb.s(<4 x float>) -+ -+define <4 x float> @lsx_vflogb_s(<4 x float> %va) nounwind { -+; CHECK-LABEL: lsx_vflogb_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vflogb.s $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x float> @llvm.loongarch.lsx.vflogb.s(<4 x float> %va) -+ ret <4 x float> %res -+} -+ -+declare <2 x double> @llvm.loongarch.lsx.vflogb.d(<2 x double>) -+ -+define <2 x double> @lsx_vflogb_d(<2 x double> %va) nounwind { -+; CHECK-LABEL: lsx_vflogb_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vflogb.d $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x double> @llvm.loongarch.lsx.vflogb.d(<2 x double> %va) -+ ret <2 x double> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmadd.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmadd.ll -new file mode 100644 -index 000000000000..adbaf6c76b1b ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmadd.ll -@@ -0,0 +1,26 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <4 x float> @llvm.loongarch.lsx.vfmadd.s(<4 x float>, <4 x float>, <4 x float>) -+ -+define <4 x float> @lsx_vfmadd_s(<4 x float> %va, <4 x float> %vb, <4 x float> %vc) nounwind { -+; CHECK-LABEL: lsx_vfmadd_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfmadd.s $vr0, $vr0, $vr1, $vr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x float> @llvm.loongarch.lsx.vfmadd.s(<4 x float> %va, <4 x float> %vb, <4 x float> %vc) -+ ret <4 x float> %res -+} -+ -+declare <2 x double> @llvm.loongarch.lsx.vfmadd.d(<2 x double>, <2 x double>, <2 x double>) -+ -+define <2 x double> @lsx_vfmadd_d(<2 x double> %va, <2 x double> %vb, <2 x double> %vc) nounwind { -+; CHECK-LABEL: lsx_vfmadd_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfmadd.d $vr0, $vr0, $vr1, $vr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x double> @llvm.loongarch.lsx.vfmadd.d(<2 x double> %va, <2 x double> %vb, <2 x double> %vc) -+ ret <2 x double> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmax.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmax.ll -new file mode 100644 -index 000000000000..89f757c4e456 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmax.ll -@@ -0,0 +1,26 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <4 x float> @llvm.loongarch.lsx.vfmax.s(<4 x float>, <4 x float>) -+ -+define <4 x float> @lsx_vfmax_s(<4 x float> %va, <4 x float> %vb) nounwind { -+; CHECK-LABEL: lsx_vfmax_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfmax.s $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x float> @llvm.loongarch.lsx.vfmax.s(<4 x float> %va, <4 x float> %vb) -+ ret <4 x float> %res -+} -+ -+declare <2 x double> @llvm.loongarch.lsx.vfmax.d(<2 x double>, <2 x double>) -+ -+define <2 x double> @lsx_vfmax_d(<2 x double> %va, <2 x double> %vb) nounwind { -+; CHECK-LABEL: lsx_vfmax_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfmax.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x double> @llvm.loongarch.lsx.vfmax.d(<2 x double> %va, <2 x double> %vb) -+ ret <2 x double> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmaxa.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmaxa.ll -new file mode 100644 -index 000000000000..5662acc0b9a1 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmaxa.ll -@@ -0,0 +1,26 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <4 x float> @llvm.loongarch.lsx.vfmaxa.s(<4 x float>, <4 x float>) -+ -+define <4 x float> @lsx_vfmaxa_s(<4 x float> %va, <4 x float> %vb) nounwind { -+; CHECK-LABEL: lsx_vfmaxa_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfmaxa.s $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x float> @llvm.loongarch.lsx.vfmaxa.s(<4 x float> %va, <4 x float> %vb) -+ ret <4 x float> %res -+} -+ -+declare <2 x double> @llvm.loongarch.lsx.vfmaxa.d(<2 x double>, <2 x double>) -+ -+define <2 x double> @lsx_vfmaxa_d(<2 x double> %va, <2 x double> %vb) nounwind { -+; CHECK-LABEL: lsx_vfmaxa_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfmaxa.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x double> @llvm.loongarch.lsx.vfmaxa.d(<2 x double> %va, <2 x double> %vb) -+ ret <2 x double> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmin.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmin.ll -new file mode 100644 -index 000000000000..0f844240277f ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmin.ll -@@ -0,0 +1,26 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <4 x float> @llvm.loongarch.lsx.vfmin.s(<4 x float>, <4 x float>) -+ -+define <4 x float> @lsx_vfmin_s(<4 x float> %va, <4 x float> %vb) nounwind { -+; CHECK-LABEL: lsx_vfmin_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfmin.s $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x float> @llvm.loongarch.lsx.vfmin.s(<4 x float> %va, <4 x float> %vb) -+ ret <4 x float> %res -+} -+ -+declare <2 x double> @llvm.loongarch.lsx.vfmin.d(<2 x double>, <2 x double>) -+ -+define <2 x double> @lsx_vfmin_d(<2 x double> %va, <2 x double> %vb) nounwind { -+; CHECK-LABEL: lsx_vfmin_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfmin.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x double> @llvm.loongarch.lsx.vfmin.d(<2 x double> %va, <2 x double> %vb) -+ ret <2 x double> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmina.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmina.ll -new file mode 100644 -index 000000000000..27f70b5fba32 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmina.ll -@@ -0,0 +1,26 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <4 x float> @llvm.loongarch.lsx.vfmina.s(<4 x float>, <4 x float>) -+ -+define <4 x float> @lsx_vfmina_s(<4 x float> %va, <4 x float> %vb) nounwind { -+; CHECK-LABEL: lsx_vfmina_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfmina.s $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x float> @llvm.loongarch.lsx.vfmina.s(<4 x float> %va, <4 x float> %vb) -+ ret <4 x float> %res -+} -+ -+declare <2 x double> @llvm.loongarch.lsx.vfmina.d(<2 x double>, <2 x double>) -+ -+define <2 x double> @lsx_vfmina_d(<2 x double> %va, <2 x double> %vb) nounwind { -+; CHECK-LABEL: lsx_vfmina_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfmina.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x double> @llvm.loongarch.lsx.vfmina.d(<2 x double> %va, <2 x double> %vb) -+ ret <2 x double> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmsub.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmsub.ll -new file mode 100644 -index 000000000000..856ca9cadbd9 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmsub.ll -@@ -0,0 +1,26 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <4 x float> @llvm.loongarch.lsx.vfmsub.s(<4 x float>, <4 x float>, <4 x float>) -+ -+define <4 x float> @lsx_vfmsub_s(<4 x float> %va, <4 x float> %vb, <4 x float> %vc) nounwind { -+; CHECK-LABEL: lsx_vfmsub_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfmsub.s $vr0, $vr0, $vr1, $vr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x float> @llvm.loongarch.lsx.vfmsub.s(<4 x float> %va, <4 x float> %vb, <4 x float> %vc) -+ ret <4 x float> %res -+} -+ -+declare <2 x double> @llvm.loongarch.lsx.vfmsub.d(<2 x double>, <2 x double>, <2 x double>) -+ -+define <2 x double> @lsx_vfmsub_d(<2 x double> %va, <2 x double> %vb, <2 x double> %vc) nounwind { -+; CHECK-LABEL: lsx_vfmsub_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfmsub.d $vr0, $vr0, $vr1, $vr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x double> @llvm.loongarch.lsx.vfmsub.d(<2 x double> %va, <2 x double> %vb, <2 x double> %vc) -+ ret <2 x double> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmul.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmul.ll -new file mode 100644 -index 000000000000..1e6c4c77d536 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmul.ll -@@ -0,0 +1,26 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <4 x float> @llvm.loongarch.lsx.vfmul.s(<4 x float>, <4 x float>) -+ -+define <4 x float> @lsx_vfmul_s(<4 x float> %va, <4 x float> %vb) nounwind { -+; CHECK-LABEL: lsx_vfmul_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfmul.s $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x float> @llvm.loongarch.lsx.vfmul.s(<4 x float> %va, <4 x float> %vb) -+ ret <4 x float> %res -+} -+ -+declare <2 x double> @llvm.loongarch.lsx.vfmul.d(<2 x double>, <2 x double>) -+ -+define <2 x double> @lsx_vfmul_d(<2 x double> %va, <2 x double> %vb) nounwind { -+; CHECK-LABEL: lsx_vfmul_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfmul.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x double> @llvm.loongarch.lsx.vfmul.d(<2 x double> %va, <2 x double> %vb) -+ ret <2 x double> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fnmadd.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fnmadd.ll -new file mode 100644 -index 000000000000..e1a9ea78ef9d ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fnmadd.ll -@@ -0,0 +1,26 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <4 x float> @llvm.loongarch.lsx.vfnmadd.s(<4 x float>, <4 x float>, <4 x float>) -+ -+define <4 x float> @lsx_vfnmadd_s(<4 x float> %va, <4 x float> %vb, <4 x float> %vc) nounwind { -+; CHECK-LABEL: lsx_vfnmadd_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfnmadd.s $vr0, $vr0, $vr1, $vr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x float> @llvm.loongarch.lsx.vfnmadd.s(<4 x float> %va, <4 x float> %vb, <4 x float> %vc) -+ ret <4 x float> %res -+} -+ -+declare <2 x double> @llvm.loongarch.lsx.vfnmadd.d(<2 x double>, <2 x double>, <2 x double>) -+ -+define <2 x double> @lsx_vfnmadd_d(<2 x double> %va, <2 x double> %vb, <2 x double> %vc) nounwind { -+; CHECK-LABEL: lsx_vfnmadd_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfnmadd.d $vr0, $vr0, $vr1, $vr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x double> @llvm.loongarch.lsx.vfnmadd.d(<2 x double> %va, <2 x double> %vb, <2 x double> %vc) -+ ret <2 x double> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fnmsub.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fnmsub.ll -new file mode 100644 -index 000000000000..46db0f4a5061 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fnmsub.ll -@@ -0,0 +1,26 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <4 x float> @llvm.loongarch.lsx.vfnmsub.s(<4 x float>, <4 x float>, <4 x float>) -+ -+define <4 x float> @lsx_vfnmsub_s(<4 x float> %va, <4 x float> %vb, <4 x float> %vc) nounwind { -+; CHECK-LABEL: lsx_vfnmsub_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfnmsub.s $vr0, $vr0, $vr1, $vr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x float> @llvm.loongarch.lsx.vfnmsub.s(<4 x float> %va, <4 x float> %vb, <4 x float> %vc) -+ ret <4 x float> %res -+} -+ -+declare <2 x double> @llvm.loongarch.lsx.vfnmsub.d(<2 x double>, <2 x double>, <2 x double>) -+ -+define <2 x double> @lsx_vfnmsub_d(<2 x double> %va, <2 x double> %vb, <2 x double> %vc) nounwind { -+; CHECK-LABEL: lsx_vfnmsub_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfnmsub.d $vr0, $vr0, $vr1, $vr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x double> @llvm.loongarch.lsx.vfnmsub.d(<2 x double> %va, <2 x double> %vb, <2 x double> %vc) -+ ret <2 x double> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frecip.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frecip.ll -new file mode 100644 -index 000000000000..669fde5912d4 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frecip.ll -@@ -0,0 +1,26 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <4 x float> @llvm.loongarch.lsx.vfrecip.s(<4 x float>) -+ -+define <4 x float> @lsx_vfrecip_s(<4 x float> %va) nounwind { -+; CHECK-LABEL: lsx_vfrecip_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfrecip.s $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x float> @llvm.loongarch.lsx.vfrecip.s(<4 x float> %va) -+ ret <4 x float> %res -+} -+ -+declare <2 x double> @llvm.loongarch.lsx.vfrecip.d(<2 x double>) -+ -+define <2 x double> @lsx_vfrecip_d(<2 x double> %va) nounwind { -+; CHECK-LABEL: lsx_vfrecip_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfrecip.d $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x double> @llvm.loongarch.lsx.vfrecip.d(<2 x double> %va) -+ ret <2 x double> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frint.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frint.ll -new file mode 100644 -index 000000000000..8d872fc72962 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frint.ll -@@ -0,0 +1,122 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <4 x float> @llvm.loongarch.lsx.vfrintrne.s(<4 x float>) -+ -+define <4 x float> @lsx_vfrintrne_s(<4 x float> %va) nounwind { -+; CHECK-LABEL: lsx_vfrintrne_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfrintrne.s $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x float> @llvm.loongarch.lsx.vfrintrne.s(<4 x float> %va) -+ ret <4 x float> %res -+} -+ -+declare <2 x double> @llvm.loongarch.lsx.vfrintrne.d(<2 x double>) -+ -+define <2 x double> @lsx_vfrintrne_d(<2 x double> %va) nounwind { -+; CHECK-LABEL: lsx_vfrintrne_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfrintrne.d $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x double> @llvm.loongarch.lsx.vfrintrne.d(<2 x double> %va) -+ ret <2 x double> %res -+} -+ -+declare <4 x float> @llvm.loongarch.lsx.vfrintrz.s(<4 x float>) -+ -+define <4 x float> @lsx_vfrintrz_s(<4 x float> %va) nounwind { -+; CHECK-LABEL: lsx_vfrintrz_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfrintrz.s $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x float> @llvm.loongarch.lsx.vfrintrz.s(<4 x float> %va) -+ ret <4 x float> %res -+} -+ -+declare <2 x double> @llvm.loongarch.lsx.vfrintrz.d(<2 x double>) -+ -+define <2 x double> @lsx_vfrintrz_d(<2 x double> %va) nounwind { -+; CHECK-LABEL: lsx_vfrintrz_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfrintrz.d $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x double> @llvm.loongarch.lsx.vfrintrz.d(<2 x double> %va) -+ ret <2 x double> %res -+} -+ -+declare <4 x float> @llvm.loongarch.lsx.vfrintrp.s(<4 x float>) -+ -+define <4 x float> @lsx_vfrintrp_s(<4 x float> %va) nounwind { -+; CHECK-LABEL: lsx_vfrintrp_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfrintrp.s $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x float> @llvm.loongarch.lsx.vfrintrp.s(<4 x float> %va) -+ ret <4 x float> %res -+} -+ -+declare <2 x double> @llvm.loongarch.lsx.vfrintrp.d(<2 x double>) -+ -+define <2 x double> @lsx_vfrintrp_d(<2 x double> %va) nounwind { -+; CHECK-LABEL: lsx_vfrintrp_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfrintrp.d $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x double> @llvm.loongarch.lsx.vfrintrp.d(<2 x double> %va) -+ ret <2 x double> %res -+} -+ -+declare <4 x float> @llvm.loongarch.lsx.vfrintrm.s(<4 x float>) -+ -+define <4 x float> @lsx_vfrintrm_s(<4 x float> %va) nounwind { -+; CHECK-LABEL: lsx_vfrintrm_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfrintrm.s $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x float> @llvm.loongarch.lsx.vfrintrm.s(<4 x float> %va) -+ ret <4 x float> %res -+} -+ -+declare <2 x double> @llvm.loongarch.lsx.vfrintrm.d(<2 x double>) -+ -+define <2 x double> @lsx_vfrintrm_d(<2 x double> %va) nounwind { -+; CHECK-LABEL: lsx_vfrintrm_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfrintrm.d $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x double> @llvm.loongarch.lsx.vfrintrm.d(<2 x double> %va) -+ ret <2 x double> %res -+} -+ -+declare <4 x float> @llvm.loongarch.lsx.vfrint.s(<4 x float>) -+ -+define <4 x float> @lsx_vfrint_s(<4 x float> %va) nounwind { -+; CHECK-LABEL: lsx_vfrint_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfrint.s $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x float> @llvm.loongarch.lsx.vfrint.s(<4 x float> %va) -+ ret <4 x float> %res -+} -+ -+declare <2 x double> @llvm.loongarch.lsx.vfrint.d(<2 x double>) -+ -+define <2 x double> @lsx_vfrint_d(<2 x double> %va) nounwind { -+; CHECK-LABEL: lsx_vfrint_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfrint.d $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x double> @llvm.loongarch.lsx.vfrint.d(<2 x double> %va) -+ ret <2 x double> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frsqrt.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frsqrt.ll -new file mode 100644 -index 000000000000..326d87308b0b ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frsqrt.ll -@@ -0,0 +1,26 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <4 x float> @llvm.loongarch.lsx.vfrsqrt.s(<4 x float>) -+ -+define <4 x float> @lsx_vfrsqrt_s(<4 x float> %va) nounwind { -+; CHECK-LABEL: lsx_vfrsqrt_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfrsqrt.s $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x float> @llvm.loongarch.lsx.vfrsqrt.s(<4 x float> %va) -+ ret <4 x float> %res -+} -+ -+declare <2 x double> @llvm.loongarch.lsx.vfrsqrt.d(<2 x double>) -+ -+define <2 x double> @lsx_vfrsqrt_d(<2 x double> %va) nounwind { -+; CHECK-LABEL: lsx_vfrsqrt_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfrsqrt.d $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x double> @llvm.loongarch.lsx.vfrsqrt.d(<2 x double> %va) -+ ret <2 x double> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frstp.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frstp.ll -new file mode 100644 -index 000000000000..5c072b194d4f ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frstp.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vfrstp.b(<16 x i8>, <16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vfrstp_b(<16 x i8> %va, <16 x i8> %vb, <16 x i8> %vc) nounwind { -+; CHECK-LABEL: lsx_vfrstp_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfrstp.b $vr0, $vr1, $vr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vfrstp.b(<16 x i8> %va, <16 x i8> %vb, <16 x i8> %vc) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vfrstp.h(<8 x i16>, <8 x i16>, <8 x i16>) -+ -+define <8 x i16> @lsx_vfrstp_h(<8 x i16> %va, <8 x i16> %vb, <8 x i16> %vc) nounwind { -+; CHECK-LABEL: lsx_vfrstp_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfrstp.h $vr0, $vr1, $vr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vfrstp.h(<8 x i16> %va, <8 x i16> %vb, <8 x i16> %vc) -+ ret <8 x i16> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vfrstpi.b(<16 x i8>, <16 x i8>, i32) -+ -+define <16 x i8> @lsx_vfrstpi_b(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vfrstpi_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfrstpi.b $vr0, $vr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vfrstpi.b(<16 x i8> %va, <16 x i8> %vb, i32 1) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vfrstpi.h(<8 x i16>, <8 x i16>, i32) -+ -+define <8 x i16> @lsx_vfrstpi_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vfrstpi_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfrstpi.h $vr0, $vr1, 31 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vfrstpi.h(<8 x i16> %va, <8 x i16> %vb, i32 31) -+ ret <8 x i16> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fsqrt.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fsqrt.ll -new file mode 100644 -index 000000000000..55bffba9e99e ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fsqrt.ll -@@ -0,0 +1,26 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <4 x float> @llvm.loongarch.lsx.vfsqrt.s(<4 x float>) -+ -+define <4 x float> @lsx_vfsqrt_s(<4 x float> %va) nounwind { -+; CHECK-LABEL: lsx_vfsqrt_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfsqrt.s $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x float> @llvm.loongarch.lsx.vfsqrt.s(<4 x float> %va) -+ ret <4 x float> %res -+} -+ -+declare <2 x double> @llvm.loongarch.lsx.vfsqrt.d(<2 x double>) -+ -+define <2 x double> @lsx_vfsqrt_d(<2 x double> %va) nounwind { -+; CHECK-LABEL: lsx_vfsqrt_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfsqrt.d $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x double> @llvm.loongarch.lsx.vfsqrt.d(<2 x double> %va) -+ ret <2 x double> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fsub.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fsub.ll -new file mode 100644 -index 000000000000..2beba4a70dc9 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fsub.ll -@@ -0,0 +1,26 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <4 x float> @llvm.loongarch.lsx.vfsub.s(<4 x float>, <4 x float>) -+ -+define <4 x float> @lsx_vfsub_s(<4 x float> %va, <4 x float> %vb) nounwind { -+; CHECK-LABEL: lsx_vfsub_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfsub.s $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x float> @llvm.loongarch.lsx.vfsub.s(<4 x float> %va, <4 x float> %vb) -+ ret <4 x float> %res -+} -+ -+declare <2 x double> @llvm.loongarch.lsx.vfsub.d(<2 x double>, <2 x double>) -+ -+define <2 x double> @lsx_vfsub_d(<2 x double> %va, <2 x double> %vb) nounwind { -+; CHECK-LABEL: lsx_vfsub_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vfsub.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x double> @llvm.loongarch.lsx.vfsub.d(<2 x double> %va, <2 x double> %vb) -+ ret <2 x double> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ftint.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ftint.ll -new file mode 100644 -index 000000000000..2a494cd7fa87 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ftint.ll -@@ -0,0 +1,350 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <4 x i32> @llvm.loongarch.lsx.vftintrne.w.s(<4 x float>) -+ -+define <4 x i32> @lsx_vftintrne_w_s(<4 x float> %va) nounwind { -+; CHECK-LABEL: lsx_vftintrne_w_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vftintrne.w.s $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vftintrne.w.s(<4 x float> %va) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vftintrne.l.d(<2 x double>) -+ -+define <2 x i64> @lsx_vftintrne_l_d(<2 x double> %va) nounwind { -+; CHECK-LABEL: lsx_vftintrne_l_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vftintrne.l.d $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vftintrne.l.d(<2 x double> %va) -+ ret <2 x i64> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vftintrz.w.s(<4 x float>) -+ -+define <4 x i32> @lsx_vftintrz_w_s(<4 x float> %va) nounwind { -+; CHECK-LABEL: lsx_vftintrz_w_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vftintrz.w.s $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vftintrz.w.s(<4 x float> %va) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vftintrz.l.d(<2 x double>) -+ -+define <2 x i64> @lsx_vftintrz_l_d(<2 x double> %va) nounwind { -+; CHECK-LABEL: lsx_vftintrz_l_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vftintrz.l.d $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vftintrz.l.d(<2 x double> %va) -+ ret <2 x i64> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vftintrp.w.s(<4 x float>) -+ -+define <4 x i32> @lsx_vftintrp_w_s(<4 x float> %va) nounwind { -+; CHECK-LABEL: lsx_vftintrp_w_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vftintrp.w.s $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vftintrp.w.s(<4 x float> %va) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vftintrp.l.d(<2 x double>) -+ -+define <2 x i64> @lsx_vftintrp_l_d(<2 x double> %va) nounwind { -+; CHECK-LABEL: lsx_vftintrp_l_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vftintrp.l.d $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vftintrp.l.d(<2 x double> %va) -+ ret <2 x i64> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vftintrm.w.s(<4 x float>) -+ -+define <4 x i32> @lsx_vftintrm_w_s(<4 x float> %va) nounwind { -+; CHECK-LABEL: lsx_vftintrm_w_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vftintrm.w.s $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vftintrm.w.s(<4 x float> %va) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vftintrm.l.d(<2 x double>) -+ -+define <2 x i64> @lsx_vftintrm_l_d(<2 x double> %va) nounwind { -+; CHECK-LABEL: lsx_vftintrm_l_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vftintrm.l.d $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vftintrm.l.d(<2 x double> %va) -+ ret <2 x i64> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vftint.w.s(<4 x float>) -+ -+define <4 x i32> @lsx_vftint_w_s(<4 x float> %va) nounwind { -+; CHECK-LABEL: lsx_vftint_w_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vftint.w.s $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vftint.w.s(<4 x float> %va) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vftint.l.d(<2 x double>) -+ -+define <2 x i64> @lsx_vftint_l_d(<2 x double> %va) nounwind { -+; CHECK-LABEL: lsx_vftint_l_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vftint.l.d $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vftint.l.d(<2 x double> %va) -+ ret <2 x i64> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vftintrz.wu.s(<4 x float>) -+ -+define <4 x i32> @lsx_vftintrz_wu_s(<4 x float> %va) nounwind { -+; CHECK-LABEL: lsx_vftintrz_wu_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vftintrz.wu.s $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vftintrz.wu.s(<4 x float> %va) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vftintrz.lu.d(<2 x double>) -+ -+define <2 x i64> @lsx_vftintrz_lu_d(<2 x double> %va) nounwind { -+; CHECK-LABEL: lsx_vftintrz_lu_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vftintrz.lu.d $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vftintrz.lu.d(<2 x double> %va) -+ ret <2 x i64> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vftint.wu.s(<4 x float>) -+ -+define <4 x i32> @lsx_vftint_wu_s(<4 x float> %va) nounwind { -+; CHECK-LABEL: lsx_vftint_wu_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vftint.wu.s $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vftint.wu.s(<4 x float> %va) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vftint.lu.d(<2 x double>) -+ -+define <2 x i64> @lsx_vftint_lu_d(<2 x double> %va) nounwind { -+; CHECK-LABEL: lsx_vftint_lu_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vftint.lu.d $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vftint.lu.d(<2 x double> %va) -+ ret <2 x i64> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vftintrne.w.d(<2 x double>, <2 x double>) -+ -+define <4 x i32> @lsx_vftintrne_w_d(<2 x double> %va, <2 x double> %vb) nounwind { -+; CHECK-LABEL: lsx_vftintrne_w_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vftintrne.w.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vftintrne.w.d(<2 x double> %va, <2 x double> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vftintrz.w.d(<2 x double>, <2 x double>) -+ -+define <4 x i32> @lsx_vftintrz_w_d(<2 x double> %va, <2 x double> %vb) nounwind { -+; CHECK-LABEL: lsx_vftintrz_w_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vftintrz.w.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vftintrz.w.d(<2 x double> %va, <2 x double> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vftintrp.w.d(<2 x double>, <2 x double>) -+ -+define <4 x i32> @lsx_vftintrp_w_d(<2 x double> %va, <2 x double> %vb) nounwind { -+; CHECK-LABEL: lsx_vftintrp_w_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vftintrp.w.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vftintrp.w.d(<2 x double> %va, <2 x double> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vftintrm.w.d(<2 x double>, <2 x double>) -+ -+define <4 x i32> @lsx_vftintrm_w_d(<2 x double> %va, <2 x double> %vb) nounwind { -+; CHECK-LABEL: lsx_vftintrm_w_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vftintrm.w.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vftintrm.w.d(<2 x double> %va, <2 x double> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vftint.w.d(<2 x double>, <2 x double>) -+ -+define <4 x i32> @lsx_vftint_w_d(<2 x double> %va, <2 x double> %vb) nounwind { -+; CHECK-LABEL: lsx_vftint_w_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vftint.w.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vftint.w.d(<2 x double> %va, <2 x double> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vftintrnel.l.s(<4 x float>) -+ -+define <2 x i64> @lsx_vftintrnel_l_s(<4 x float> %va) nounwind { -+; CHECK-LABEL: lsx_vftintrnel_l_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vftintrnel.l.s $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vftintrnel.l.s(<4 x float> %va) -+ ret <2 x i64> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vftintrneh.l.s(<4 x float>) -+ -+define <2 x i64> @lsx_vftintrneh_l_s(<4 x float> %va) nounwind { -+; CHECK-LABEL: lsx_vftintrneh_l_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vftintrneh.l.s $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vftintrneh.l.s(<4 x float> %va) -+ ret <2 x i64> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vftintrzl.l.s(<4 x float>) -+ -+define <2 x i64> @lsx_vftintrzl_l_s(<4 x float> %va) nounwind { -+; CHECK-LABEL: lsx_vftintrzl_l_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vftintrzl.l.s $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vftintrzl.l.s(<4 x float> %va) -+ ret <2 x i64> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vftintrzh.l.s(<4 x float>) -+ -+define <2 x i64> @lsx_vftintrzh_l_s(<4 x float> %va) nounwind { -+; CHECK-LABEL: lsx_vftintrzh_l_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vftintrzh.l.s $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vftintrzh.l.s(<4 x float> %va) -+ ret <2 x i64> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vftintrpl.l.s(<4 x float>) -+ -+define <2 x i64> @lsx_vftintrpl_l_s(<4 x float> %va) nounwind { -+; CHECK-LABEL: lsx_vftintrpl_l_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vftintrpl.l.s $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vftintrpl.l.s(<4 x float> %va) -+ ret <2 x i64> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vftintrph.l.s(<4 x float>) -+ -+define <2 x i64> @lsx_vftintrph_l_s(<4 x float> %va) nounwind { -+; CHECK-LABEL: lsx_vftintrph_l_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vftintrph.l.s $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vftintrph.l.s(<4 x float> %va) -+ ret <2 x i64> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vftintrml.l.s(<4 x float>) -+ -+define <2 x i64> @lsx_vftintrml_l_s(<4 x float> %va) nounwind { -+; CHECK-LABEL: lsx_vftintrml_l_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vftintrml.l.s $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vftintrml.l.s(<4 x float> %va) -+ ret <2 x i64> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vftintrmh.l.s(<4 x float>) -+ -+define <2 x i64> @lsx_vftintrmh_l_s(<4 x float> %va) nounwind { -+; CHECK-LABEL: lsx_vftintrmh_l_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vftintrmh.l.s $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vftintrmh.l.s(<4 x float> %va) -+ ret <2 x i64> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vftintl.l.s(<4 x float>) -+ -+define <2 x i64> @lsx_vftintl_l_s(<4 x float> %va) nounwind { -+; CHECK-LABEL: lsx_vftintl_l_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vftintl.l.s $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vftintl.l.s(<4 x float> %va) -+ ret <2 x i64> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vftinth.l.s(<4 x float>) -+ -+define <2 x i64> @lsx_vftinth_l_s(<4 x float> %va) nounwind { -+; CHECK-LABEL: lsx_vftinth_l_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vftinth.l.s $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vftinth.l.s(<4 x float> %va) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-haddw.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-haddw.ll -new file mode 100644 -index 000000000000..05725582334a ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-haddw.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <8 x i16> @llvm.loongarch.lsx.vhaddw.h.b(<16 x i8>, <16 x i8>) -+ -+define <8 x i16> @lsx_vhaddw_h_b(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vhaddw_h_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vhaddw.h.b $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vhaddw.h.b(<16 x i8> %va, <16 x i8> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vhaddw.w.h(<8 x i16>, <8 x i16>) -+ -+define <4 x i32> @lsx_vhaddw_w_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vhaddw_w_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vhaddw.w.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vhaddw.w.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vhaddw.d.w(<4 x i32>, <4 x i32>) -+ -+define <2 x i64> @lsx_vhaddw_d_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vhaddw_d_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vhaddw.d.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vhaddw.d.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vhaddw.q.d(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vhaddw_q_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vhaddw_q_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vhaddw.q.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vhaddw.q.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vhaddw.hu.bu(<16 x i8>, <16 x i8>) -+ -+define <8 x i16> @lsx_vhaddw_hu_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vhaddw_hu_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vhaddw.hu.bu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vhaddw.hu.bu(<16 x i8> %va, <16 x i8> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vhaddw.wu.hu(<8 x i16>, <8 x i16>) -+ -+define <4 x i32> @lsx_vhaddw_wu_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vhaddw_wu_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vhaddw.wu.hu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vhaddw.wu.hu(<8 x i16> %va, <8 x i16> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vhaddw.du.wu(<4 x i32>, <4 x i32>) -+ -+define <2 x i64> @lsx_vhaddw_du_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vhaddw_du_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vhaddw.du.wu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vhaddw.du.wu(<4 x i32> %va, <4 x i32> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vhaddw.qu.du(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vhaddw_qu_du(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vhaddw_qu_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vhaddw.qu.du $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vhaddw.qu.du(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-hsubw.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-hsubw.ll -new file mode 100644 -index 000000000000..dd5815b2ea85 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-hsubw.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <8 x i16> @llvm.loongarch.lsx.vhsubw.h.b(<16 x i8>, <16 x i8>) -+ -+define <8 x i16> @lsx_vhsubw_h_b(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vhsubw_h_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vhsubw.h.b $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vhsubw.h.b(<16 x i8> %va, <16 x i8> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vhsubw.w.h(<8 x i16>, <8 x i16>) -+ -+define <4 x i32> @lsx_vhsubw_w_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vhsubw_w_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vhsubw.w.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vhsubw.w.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vhsubw.d.w(<4 x i32>, <4 x i32>) -+ -+define <2 x i64> @lsx_vhsubw_d_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vhsubw_d_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vhsubw.d.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vhsubw.d.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vhsubw.q.d(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vhsubw_q_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vhsubw_q_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vhsubw.q.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vhsubw.q.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vhsubw.hu.bu(<16 x i8>, <16 x i8>) -+ -+define <8 x i16> @lsx_vhsubw_hu_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vhsubw_hu_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vhsubw.hu.bu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vhsubw.hu.bu(<16 x i8> %va, <16 x i8> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vhsubw.wu.hu(<8 x i16>, <8 x i16>) -+ -+define <4 x i32> @lsx_vhsubw_wu_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vhsubw_wu_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vhsubw.wu.hu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vhsubw.wu.hu(<8 x i16> %va, <8 x i16> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vhsubw.du.wu(<4 x i32>, <4 x i32>) -+ -+define <2 x i64> @lsx_vhsubw_du_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vhsubw_du_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vhsubw.du.wu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vhsubw.du.wu(<4 x i32> %va, <4 x i32> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vhsubw.qu.du(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vhsubw_qu_du(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vhsubw_qu_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vhsubw.qu.du $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vhsubw.qu.du(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ilv.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ilv.ll -new file mode 100644 -index 000000000000..77b0b3484df8 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ilv.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vilvl.b(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vilvl_b(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vilvl_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vilvl.b $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vilvl.b(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vilvl.h(<8 x i16>, <8 x i16>) -+ -+define <8 x i16> @lsx_vilvl_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vilvl_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vilvl.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vilvl.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vilvl.w(<4 x i32>, <4 x i32>) -+ -+define <4 x i32> @lsx_vilvl_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vilvl_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vilvl.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vilvl.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vilvl.d(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vilvl_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vilvl_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vilvl.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vilvl.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vilvh.b(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vilvh_b(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vilvh_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vilvh.b $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vilvh.b(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vilvh.h(<8 x i16>, <8 x i16>) -+ -+define <8 x i16> @lsx_vilvh_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vilvh_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vilvh.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vilvh.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vilvh.w(<4 x i32>, <4 x i32>) -+ -+define <4 x i32> @lsx_vilvh_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vilvh_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vilvh.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vilvh.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vilvh.d(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vilvh_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vilvh_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vilvh.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vilvh.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-insgr2vr.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-insgr2vr.ll -new file mode 100644 -index 000000000000..61d2cbd28066 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-insgr2vr.ll -@@ -0,0 +1,54 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vinsgr2vr.b(<16 x i8>, i32, i32) -+ -+define <16 x i8> @lsx_vinsgr2vr_b(<16 x i8> %va) nounwind { -+; CHECK-LABEL: lsx_vinsgr2vr_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: ori $a0, $zero, 1 -+; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 15 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vinsgr2vr.b(<16 x i8> %va, i32 1, i32 15) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vinsgr2vr.h(<8 x i16>, i32, i32) -+ -+define <8 x i16> @lsx_vinsgr2vr_h(<8 x i16> %va) nounwind { -+; CHECK-LABEL: lsx_vinsgr2vr_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: ori $a0, $zero, 1 -+; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 7 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vinsgr2vr.h(<8 x i16> %va, i32 1, i32 7) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vinsgr2vr.w(<4 x i32>, i32, i32) -+ -+define <4 x i32> @lsx_vinsgr2vr_w(<4 x i32> %va) nounwind { -+; CHECK-LABEL: lsx_vinsgr2vr_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: ori $a0, $zero, 1 -+; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 3 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vinsgr2vr.w(<4 x i32> %va, i32 1, i32 3) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vinsgr2vr.d(<2 x i64>, i64, i32) -+ -+define <2 x i64> @lsx_vinsgr2vr_d(<2 x i64> %va) nounwind { -+; CHECK-LABEL: lsx_vinsgr2vr_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: ori $a0, $zero, 1 -+; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vinsgr2vr.d(<2 x i64> %va, i64 1, i32 1) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ld.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ld.ll -new file mode 100644 -index 000000000000..b9e2ff8088d8 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ld.ll -@@ -0,0 +1,26 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vld(i8*, i32) -+ -+define <16 x i8> @lsx_vld(i8* %p) nounwind { -+; CHECK-LABEL: lsx_vld: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vld(i8* %p, i32 1) -+ ret <16 x i8> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vldx(i8*, i64) -+ -+define <16 x i8> @lsx_vldx(i8* %p, i64 %b) nounwind { -+; CHECK-LABEL: lsx_vldx: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vldx $vr0, $a0, $a1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vldx(i8* %p, i64 %b) -+ ret <16 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldi.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldi.ll -new file mode 100644 -index 000000000000..ace910b54d9a ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldi.ll -@@ -0,0 +1,62 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <2 x i64> @llvm.loongarch.lsx.vldi(i32) -+ -+define <2 x i64> @lsx_vldi() nounwind { -+; CHECK-LABEL: lsx_vldi: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vldi $vr0, 4095 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vldi(i32 4095) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vrepli.b(i32) -+ -+define <16 x i8> @lsx_vrepli_b() nounwind { -+; CHECK-LABEL: lsx_vrepli_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vrepli.b $vr0, 511 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vrepli.b(i32 511) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vrepli.h(i32) -+ -+define <8 x i16> @lsx_vrepli_h() nounwind { -+; CHECK-LABEL: lsx_vrepli_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vrepli.h $vr0, 511 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vrepli.h(i32 511) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vrepli.w(i32) -+ -+define <4 x i32> @lsx_vrepli_w() nounwind { -+; CHECK-LABEL: lsx_vrepli_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vrepli.w $vr0, 511 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vrepli.w(i32 511) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vrepli.d(i32) -+ -+define <2 x i64> @lsx_vrepli_d() nounwind { -+; CHECK-LABEL: lsx_vrepli_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vrepli.d $vr0, 511 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vrepli.d(i32 511) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldrepl.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldrepl.ll -new file mode 100644 -index 000000000000..1a9cf3d3a766 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldrepl.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vldrepl.b(i8*, i32) -+ -+define <16 x i8> @lsx_vldrepl_b(i8* %p, i32 %b) nounwind { -+; CHECK-LABEL: lsx_vldrepl_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vldrepl.b $vr0, $a0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vldrepl.b(i8* %p, i32 1) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vldrepl.h(i8*, i32) -+ -+define <8 x i16> @lsx_vldrepl_h(i8* %p, i32 %b) nounwind { -+; CHECK-LABEL: lsx_vldrepl_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vldrepl.h $vr0, $a0, 2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vldrepl.h(i8* %p, i32 2) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vldrepl.w(i8*, i32) -+ -+define <4 x i32> @lsx_vldrepl_w(i8* %p, i32 %b) nounwind { -+; CHECK-LABEL: lsx_vldrepl_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vldrepl.w $vr0, $a0, 4 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vldrepl.w(i8* %p, i32 4) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vldrepl.d(i8*, i32) -+ -+define <2 x i64> @lsx_vldrepl_d(i8* %p, i32 %b) nounwind { -+; CHECK-LABEL: lsx_vldrepl_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vldrepl.d $vr0, $a0, 8 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vldrepl.d(i8* %p, i32 8) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-madd.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-madd.ll -new file mode 100644 -index 000000000000..89503724fd73 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-madd.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vmadd.b(<16 x i8>, <16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vmadd_b(<16 x i8> %va, <16 x i8> %vb, <16 x i8> %vc) nounwind { -+; CHECK-LABEL: lsx_vmadd_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmadd.b $vr0, $vr1, $vr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vmadd.b(<16 x i8> %va, <16 x i8> %vb, <16 x i8> %vc) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vmadd.h(<8 x i16>, <8 x i16>, <8 x i16>) -+ -+define <8 x i16> @lsx_vmadd_h(<8 x i16> %va, <8 x i16> %vb, <8 x i16> %vc) nounwind { -+; CHECK-LABEL: lsx_vmadd_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmadd.h $vr0, $vr1, $vr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vmadd.h(<8 x i16> %va, <8 x i16> %vb, <8 x i16> %vc) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vmadd.w(<4 x i32>, <4 x i32>, <4 x i32>) -+ -+define <4 x i32> @lsx_vmadd_w(<4 x i32> %va, <4 x i32> %vb, <4 x i32> %vc) nounwind { -+; CHECK-LABEL: lsx_vmadd_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmadd.w $vr0, $vr1, $vr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vmadd.w(<4 x i32> %va, <4 x i32> %vb, <4 x i32> %vc) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vmadd.d(<2 x i64>, <2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vmadd_d(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) nounwind { -+; CHECK-LABEL: lsx_vmadd_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmadd.d $vr0, $vr1, $vr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmadd.d(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-maddw.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-maddw.ll -new file mode 100644 -index 000000000000..1e3ab25a5fcf ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-maddw.ll -@@ -0,0 +1,290 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.b(<8 x i16>, <16 x i8>, <16 x i8>) -+ -+define <8 x i16> @lsx_vmaddwev_h_b(<8 x i16> %va, <16 x i8> %vb, <16 x i8> %vc) nounwind { -+; CHECK-LABEL: lsx_vmaddwev_h_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmaddwev.h.b $vr0, $vr1, $vr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.b(<8 x i16> %va, <16 x i8> %vb, <16 x i8> %vc) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.h(<4 x i32>, <8 x i16>, <8 x i16>) -+ -+define <4 x i32> @lsx_vmaddwev_w_h(<4 x i32> %va, <8 x i16> %vb, <8 x i16> %vc) nounwind { -+; CHECK-LABEL: lsx_vmaddwev_w_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmaddwev.w.h $vr0, $vr1, $vr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.h(<4 x i32> %va, <8 x i16> %vb, <8 x i16> %vc) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.w(<2 x i64>, <4 x i32>, <4 x i32>) -+ -+define <2 x i64> @lsx_vmaddwev_d_w(<2 x i64> %va, <4 x i32> %vb, <4 x i32> %vc) nounwind { -+; CHECK-LABEL: lsx_vmaddwev_d_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmaddwev.d.w $vr0, $vr1, $vr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.w(<2 x i64> %va, <4 x i32> %vb, <4 x i32> %vc) -+ ret <2 x i64> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.d(<2 x i64>, <2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vmaddwev_q_d(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) nounwind { -+; CHECK-LABEL: lsx_vmaddwev_q_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmaddwev.q.d $vr0, $vr1, $vr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.d(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) -+ ret <2 x i64> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu(<8 x i16>, <16 x i8>, <16 x i8>) -+ -+define <8 x i16> @lsx_vmaddwev_h_bu(<8 x i16> %va, <16 x i8> %vb, <16 x i8> %vc) nounwind { -+; CHECK-LABEL: lsx_vmaddwev_h_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmaddwev.h.bu $vr0, $vr1, $vr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu(<8 x i16> %va, <16 x i8> %vb, <16 x i8> %vc) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu(<4 x i32>, <8 x i16>, <8 x i16>) -+ -+define <4 x i32> @lsx_vmaddwev_w_hu(<4 x i32> %va, <8 x i16> %vb, <8 x i16> %vc) nounwind { -+; CHECK-LABEL: lsx_vmaddwev_w_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmaddwev.w.hu $vr0, $vr1, $vr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu(<4 x i32> %va, <8 x i16> %vb, <8 x i16> %vc) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu(<2 x i64>, <4 x i32>, <4 x i32>) -+ -+define <2 x i64> @lsx_vmaddwev_d_wu(<2 x i64> %va, <4 x i32> %vb, <4 x i32> %vc) nounwind { -+; CHECK-LABEL: lsx_vmaddwev_d_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmaddwev.d.wu $vr0, $vr1, $vr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu(<2 x i64> %va, <4 x i32> %vb, <4 x i32> %vc) -+ ret <2 x i64> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du(<2 x i64>, <2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vmaddwev_q_du(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) nounwind { -+; CHECK-LABEL: lsx_vmaddwev_q_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmaddwev.q.du $vr0, $vr1, $vr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) -+ ret <2 x i64> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu.b(<8 x i16>, <16 x i8>, <16 x i8>) -+ -+define <8 x i16> @lsx_vmaddwev_h_bu_b(<8 x i16> %va, <16 x i8> %vb, <16 x i8> %vc) nounwind { -+; CHECK-LABEL: lsx_vmaddwev_h_bu_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmaddwev.h.bu.b $vr0, $vr1, $vr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu.b(<8 x i16> %va, <16 x i8> %vb, <16 x i8> %vc) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu.h(<4 x i32>, <8 x i16>, <8 x i16>) -+ -+define <4 x i32> @lsx_vmaddwev_w_hu_h(<4 x i32> %va, <8 x i16> %vb, <8 x i16> %vc) nounwind { -+; CHECK-LABEL: lsx_vmaddwev_w_hu_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmaddwev.w.hu.h $vr0, $vr1, $vr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu.h(<4 x i32> %va, <8 x i16> %vb, <8 x i16> %vc) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu.w(<2 x i64>, <4 x i32>, <4 x i32>) -+ -+define <2 x i64> @lsx_vmaddwev_d_wu_w(<2 x i64> %va, <4 x i32> %vb, <4 x i32> %vc) nounwind { -+; CHECK-LABEL: lsx_vmaddwev_d_wu_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmaddwev.d.wu.w $vr0, $vr1, $vr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu.w(<2 x i64> %va, <4 x i32> %vb, <4 x i32> %vc) -+ ret <2 x i64> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du.d(<2 x i64>, <2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vmaddwev_q_du_d(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) nounwind { -+; CHECK-LABEL: lsx_vmaddwev_q_du_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmaddwev.q.du.d $vr0, $vr1, $vr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du.d(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) -+ ret <2 x i64> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.b(<8 x i16>, <16 x i8>, <16 x i8>) -+ -+define <8 x i16> @lsx_vmaddwod_h_b(<8 x i16> %va, <16 x i8> %vb, <16 x i8> %vc) nounwind { -+; CHECK-LABEL: lsx_vmaddwod_h_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmaddwod.h.b $vr0, $vr1, $vr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.b(<8 x i16> %va, <16 x i8> %vb, <16 x i8> %vc) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.h(<4 x i32>, <8 x i16>, <8 x i16>) -+ -+define <4 x i32> @lsx_vmaddwod_w_h(<4 x i32> %va, <8 x i16> %vb, <8 x i16> %vc) nounwind { -+; CHECK-LABEL: lsx_vmaddwod_w_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmaddwod.w.h $vr0, $vr1, $vr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.h(<4 x i32> %va, <8 x i16> %vb, <8 x i16> %vc) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.w(<2 x i64>, <4 x i32>, <4 x i32>) -+ -+define <2 x i64> @lsx_vmaddwod_d_w(<2 x i64> %va, <4 x i32> %vb, <4 x i32> %vc) nounwind { -+; CHECK-LABEL: lsx_vmaddwod_d_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmaddwod.d.w $vr0, $vr1, $vr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.w(<2 x i64> %va, <4 x i32> %vb, <4 x i32> %vc) -+ ret <2 x i64> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.d(<2 x i64>, <2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vmaddwod_q_d(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) nounwind { -+; CHECK-LABEL: lsx_vmaddwod_q_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmaddwod.q.d $vr0, $vr1, $vr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.d(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) -+ ret <2 x i64> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu(<8 x i16>, <16 x i8>, <16 x i8>) -+ -+define <8 x i16> @lsx_vmaddwod_h_bu(<8 x i16> %va, <16 x i8> %vb, <16 x i8> %vc) nounwind { -+; CHECK-LABEL: lsx_vmaddwod_h_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmaddwod.h.bu $vr0, $vr1, $vr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu(<8 x i16> %va, <16 x i8> %vb, <16 x i8> %vc) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu(<4 x i32>, <8 x i16>, <8 x i16>) -+ -+define <4 x i32> @lsx_vmaddwod_w_hu(<4 x i32> %va, <8 x i16> %vb, <8 x i16> %vc) nounwind { -+; CHECK-LABEL: lsx_vmaddwod_w_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmaddwod.w.hu $vr0, $vr1, $vr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu(<4 x i32> %va, <8 x i16> %vb, <8 x i16> %vc) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu(<2 x i64>, <4 x i32>, <4 x i32>) -+ -+define <2 x i64> @lsx_vmaddwod_d_wu(<2 x i64> %va, <4 x i32> %vb, <4 x i32> %vc) nounwind { -+; CHECK-LABEL: lsx_vmaddwod_d_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmaddwod.d.wu $vr0, $vr1, $vr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu(<2 x i64> %va, <4 x i32> %vb, <4 x i32> %vc) -+ ret <2 x i64> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du(<2 x i64>, <2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vmaddwod_q_du(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) nounwind { -+; CHECK-LABEL: lsx_vmaddwod_q_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmaddwod.q.du $vr0, $vr1, $vr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) -+ ret <2 x i64> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu.b(<8 x i16>, <16 x i8>, <16 x i8>) -+ -+define <8 x i16> @lsx_vmaddwod_h_bu_b(<8 x i16> %va, <16 x i8> %vb, <16 x i8> %vc) nounwind { -+; CHECK-LABEL: lsx_vmaddwod_h_bu_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmaddwod.h.bu.b $vr0, $vr1, $vr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu.b(<8 x i16> %va, <16 x i8> %vb, <16 x i8> %vc) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu.h(<4 x i32>, <8 x i16>, <8 x i16>) -+ -+define <4 x i32> @lsx_vmaddwod_w_hu_h(<4 x i32> %va, <8 x i16> %vb, <8 x i16> %vc) nounwind { -+; CHECK-LABEL: lsx_vmaddwod_w_hu_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmaddwod.w.hu.h $vr0, $vr1, $vr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu.h(<4 x i32> %va, <8 x i16> %vb, <8 x i16> %vc) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu.w(<2 x i64>, <4 x i32>, <4 x i32>) -+ -+define <2 x i64> @lsx_vmaddwod_d_wu_w(<2 x i64> %va, <4 x i32> %vb, <4 x i32> %vc) nounwind { -+; CHECK-LABEL: lsx_vmaddwod_d_wu_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmaddwod.d.wu.w $vr0, $vr1, $vr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu.w(<2 x i64> %va, <4 x i32> %vb, <4 x i32> %vc) -+ ret <2 x i64> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du.d(<2 x i64>, <2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vmaddwod_q_du_d(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) nounwind { -+; CHECK-LABEL: lsx_vmaddwod_q_du_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmaddwod.q.du.d $vr0, $vr1, $vr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du.d(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-max.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-max.ll -new file mode 100644 -index 000000000000..4dd289cf6ed7 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-max.ll -@@ -0,0 +1,194 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vmax.b(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vmax_b(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vmax_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmax.b $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vmax.b(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vmax.h(<8 x i16>, <8 x i16>) -+ -+define <8 x i16> @lsx_vmax_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vmax_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmax.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vmax.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vmax.w(<4 x i32>, <4 x i32>) -+ -+define <4 x i32> @lsx_vmax_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vmax_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmax.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vmax.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vmax.d(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vmax_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vmax_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmax.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmax.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vmaxi.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vmaxi_b(<16 x i8> %va) nounwind { -+; CHECK-LABEL: lsx_vmaxi_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmaxi.b $vr0, $vr0, -16 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vmaxi.b(<16 x i8> %va, i32 -16) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vmaxi.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vmaxi_h(<8 x i16> %va) nounwind { -+; CHECK-LABEL: lsx_vmaxi_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmaxi.h $vr0, $vr0, -16 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vmaxi.h(<8 x i16> %va, i32 -16) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vmaxi.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vmaxi_w(<4 x i32> %va) nounwind { -+; CHECK-LABEL: lsx_vmaxi_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmaxi.w $vr0, $vr0, 15 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vmaxi.w(<4 x i32> %va, i32 15) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vmaxi.d(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vmaxi_d(<2 x i64> %va) nounwind { -+; CHECK-LABEL: lsx_vmaxi_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmaxi.d $vr0, $vr0, 15 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmaxi.d(<2 x i64> %va, i32 15) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vmax.bu(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vmax_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vmax_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmax.bu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vmax.bu(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vmax.hu(<8 x i16>, <8 x i16>) -+ -+define <8 x i16> @lsx_vmax_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vmax_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmax.hu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vmax.hu(<8 x i16> %va, <8 x i16> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vmax.wu(<4 x i32>, <4 x i32>) -+ -+define <4 x i32> @lsx_vmax_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vmax_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmax.wu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vmax.wu(<4 x i32> %va, <4 x i32> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vmax.du(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vmax_du(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vmax_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmax.du $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmax.du(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vmaxi.bu(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vmaxi_bu(<16 x i8> %va) nounwind { -+; CHECK-LABEL: lsx_vmaxi_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmaxi.bu $vr0, $vr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vmaxi.bu(<16 x i8> %va, i32 1) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vmaxi.hu(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vmaxi_hu(<8 x i16> %va) nounwind { -+; CHECK-LABEL: lsx_vmaxi_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmaxi.hu $vr0, $vr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vmaxi.hu(<8 x i16> %va, i32 1) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vmaxi.wu(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vmaxi_wu(<4 x i32> %va) nounwind { -+; CHECK-LABEL: lsx_vmaxi_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmaxi.wu $vr0, $vr0, 31 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vmaxi.wu(<4 x i32> %va, i32 31) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vmaxi.du(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vmaxi_du(<2 x i64> %va) nounwind { -+; CHECK-LABEL: lsx_vmaxi_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmaxi.du $vr0, $vr0, 31 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmaxi.du(<2 x i64> %va, i32 31) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-min.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-min.ll -new file mode 100644 -index 000000000000..aa12a5ead6a3 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-min.ll -@@ -0,0 +1,194 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vmin.b(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vmin_b(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vmin_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmin.b $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vmin.b(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vmin.h(<8 x i16>, <8 x i16>) -+ -+define <8 x i16> @lsx_vmin_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vmin_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmin.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vmin.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vmin.w(<4 x i32>, <4 x i32>) -+ -+define <4 x i32> @lsx_vmin_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vmin_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmin.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vmin.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vmin.d(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vmin_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vmin_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmin.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmin.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vmini.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vmini_b(<16 x i8> %va) nounwind { -+; CHECK-LABEL: lsx_vmini_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmini.b $vr0, $vr0, 15 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vmini.b(<16 x i8> %va, i32 15) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vmini.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vmini_h(<8 x i16> %va) nounwind { -+; CHECK-LABEL: lsx_vmini_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmini.h $vr0, $vr0, 15 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vmini.h(<8 x i16> %va, i32 15) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vmini.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vmini_w(<4 x i32> %va) nounwind { -+; CHECK-LABEL: lsx_vmini_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmini.w $vr0, $vr0, -16 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vmini.w(<4 x i32> %va, i32 -16) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vmini.d(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vmini_d(<2 x i64> %va) nounwind { -+; CHECK-LABEL: lsx_vmini_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmini.d $vr0, $vr0, -16 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmini.d(<2 x i64> %va, i32 -16) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vmin.bu(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vmin_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vmin_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmin.bu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vmin.bu(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vmin.hu(<8 x i16>, <8 x i16>) -+ -+define <8 x i16> @lsx_vmin_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vmin_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmin.hu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vmin.hu(<8 x i16> %va, <8 x i16> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vmin.wu(<4 x i32>, <4 x i32>) -+ -+define <4 x i32> @lsx_vmin_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vmin_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmin.wu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vmin.wu(<4 x i32> %va, <4 x i32> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vmin.du(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vmin_du(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vmin_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmin.du $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmin.du(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vmini.bu(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vmini_bu(<16 x i8> %va) nounwind { -+; CHECK-LABEL: lsx_vmini_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmini.bu $vr0, $vr0, 31 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vmini.bu(<16 x i8> %va, i32 31) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vmini.hu(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vmini_hu(<8 x i16> %va) nounwind { -+; CHECK-LABEL: lsx_vmini_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmini.hu $vr0, $vr0, 31 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vmini.hu(<8 x i16> %va, i32 31) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vmini.wu(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vmini_wu(<4 x i32> %va) nounwind { -+; CHECK-LABEL: lsx_vmini_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmini.wu $vr0, $vr0, 31 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vmini.wu(<4 x i32> %va, i32 31) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vmini.du(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vmini_du(<2 x i64> %va) nounwind { -+; CHECK-LABEL: lsx_vmini_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmini.du $vr0, $vr0, 31 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmini.du(<2 x i64> %va, i32 31) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mod.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mod.ll -new file mode 100644 -index 000000000000..6b3dc6865584 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mod.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vmod.b(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vmod_b(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vmod_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmod.b $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vmod.b(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vmod.h(<8 x i16>, <8 x i16>) -+ -+define <8 x i16> @lsx_vmod_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vmod_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmod.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vmod.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vmod.w(<4 x i32>, <4 x i32>) -+ -+define <4 x i32> @lsx_vmod_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vmod_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmod.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vmod.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vmod.d(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vmod_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vmod_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmod.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmod.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vmod.bu(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vmod_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vmod_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmod.bu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vmod.bu(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vmod.hu(<8 x i16>, <8 x i16>) -+ -+define <8 x i16> @lsx_vmod_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vmod_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmod.hu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vmod.hu(<8 x i16> %va, <8 x i16> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vmod.wu(<4 x i32>, <4 x i32>) -+ -+define <4 x i32> @lsx_vmod_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vmod_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmod.wu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vmod.wu(<4 x i32> %va, <4 x i32> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vmod.du(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vmod_du(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vmod_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmod.du $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmod.du(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mskgez.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mskgez.ll -new file mode 100644 -index 000000000000..3ecd777aee67 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mskgez.ll -@@ -0,0 +1,14 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vmskgez.b(<16 x i8>) -+ -+define <16 x i8> @lsx_vmskgez_b(<16 x i8> %va) nounwind { -+; CHECK-LABEL: lsx_vmskgez_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmskgez.b $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vmskgez.b(<16 x i8> %va) -+ ret <16 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mskltz.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mskltz.ll -new file mode 100644 -index 000000000000..be00c76137c7 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mskltz.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vmskltz.b(<16 x i8>) -+ -+define <16 x i8> @lsx_vmskltz_b(<16 x i8> %va) nounwind { -+; CHECK-LABEL: lsx_vmskltz_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmskltz.b $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vmskltz.b(<16 x i8> %va) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vmskltz.h(<8 x i16>) -+ -+define <8 x i16> @lsx_vmskltz_h(<8 x i16> %va) nounwind { -+; CHECK-LABEL: lsx_vmskltz_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmskltz.h $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vmskltz.h(<8 x i16> %va) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vmskltz.w(<4 x i32>) -+ -+define <4 x i32> @lsx_vmskltz_w(<4 x i32> %va) nounwind { -+; CHECK-LABEL: lsx_vmskltz_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmskltz.w $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vmskltz.w(<4 x i32> %va) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vmskltz.d(<2 x i64>) -+ -+define <2 x i64> @lsx_vmskltz_d(<2 x i64> %va) nounwind { -+; CHECK-LABEL: lsx_vmskltz_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmskltz.d $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmskltz.d(<2 x i64> %va) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-msknz.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-msknz.ll -new file mode 100644 -index 000000000000..02f1752f7190 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-msknz.ll -@@ -0,0 +1,14 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vmsknz.b(<16 x i8>) -+ -+define <16 x i8> @lsx_vmsknz_b(<16 x i8> %va) nounwind { -+; CHECK-LABEL: lsx_vmsknz_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmsknz.b $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vmsknz.b(<16 x i8> %va) -+ ret <16 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-msub.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-msub.ll -new file mode 100644 -index 000000000000..98684e10c78e ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-msub.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vmsub.b(<16 x i8>, <16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vmsub_b(<16 x i8> %va, <16 x i8> %vb, <16 x i8> %vc) nounwind { -+; CHECK-LABEL: lsx_vmsub_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmsub.b $vr0, $vr1, $vr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vmsub.b(<16 x i8> %va, <16 x i8> %vb, <16 x i8> %vc) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vmsub.h(<8 x i16>, <8 x i16>, <8 x i16>) -+ -+define <8 x i16> @lsx_vmsub_h(<8 x i16> %va, <8 x i16> %vb, <8 x i16> %vc) nounwind { -+; CHECK-LABEL: lsx_vmsub_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmsub.h $vr0, $vr1, $vr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vmsub.h(<8 x i16> %va, <8 x i16> %vb, <8 x i16> %vc) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vmsub.w(<4 x i32>, <4 x i32>, <4 x i32>) -+ -+define <4 x i32> @lsx_vmsub_w(<4 x i32> %va, <4 x i32> %vb, <4 x i32> %vc) nounwind { -+; CHECK-LABEL: lsx_vmsub_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmsub.w $vr0, $vr1, $vr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vmsub.w(<4 x i32> %va, <4 x i32> %vb, <4 x i32> %vc) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vmsub.d(<2 x i64>, <2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vmsub_d(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) nounwind { -+; CHECK-LABEL: lsx_vmsub_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmsub.d $vr0, $vr1, $vr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmsub.d(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-muh.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-muh.ll -new file mode 100644 -index 000000000000..a4deb8f8f823 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-muh.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vmuh.b(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vmuh_b(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vmuh_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmuh.b $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vmuh.b(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vmuh.h(<8 x i16>, <8 x i16>) -+ -+define <8 x i16> @lsx_vmuh_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vmuh_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmuh.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vmuh.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vmuh.w(<4 x i32>, <4 x i32>) -+ -+define <4 x i32> @lsx_vmuh_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vmuh_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmuh.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vmuh.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vmuh.d(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vmuh_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vmuh_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmuh.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmuh.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vmuh.bu(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vmuh_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vmuh_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmuh.bu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vmuh.bu(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vmuh.hu(<8 x i16>, <8 x i16>) -+ -+define <8 x i16> @lsx_vmuh_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vmuh_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmuh.hu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vmuh.hu(<8 x i16> %va, <8 x i16> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vmuh.wu(<4 x i32>, <4 x i32>) -+ -+define <4 x i32> @lsx_vmuh_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vmuh_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmuh.wu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vmuh.wu(<4 x i32> %va, <4 x i32> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vmuh.du(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vmuh_du(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vmuh_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmuh.du $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmuh.du(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mul.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mul.ll -new file mode 100644 -index 000000000000..aca60d1663b7 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mul.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vmul.b(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vmul_b(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vmul_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmul.b $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vmul.b(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vmul.h(<8 x i16>, <8 x i16>) -+ -+define <8 x i16> @lsx_vmul_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vmul_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmul.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vmul.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vmul.w(<4 x i32>, <4 x i32>) -+ -+define <4 x i32> @lsx_vmul_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vmul_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmul.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vmul.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vmul.d(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vmul_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vmul_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmul.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmul.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mulw.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mulw.ll -new file mode 100644 -index 000000000000..eb55c1f809e3 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mulw.ll -@@ -0,0 +1,290 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <8 x i16> @llvm.loongarch.lsx.vmulwev.h.b(<16 x i8>, <16 x i8>) -+ -+define <8 x i16> @lsx_vmulwev_h_b(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vmulwev_h_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmulwev.h.b $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.b(<16 x i8> %va, <16 x i8> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vmulwev.w.h(<8 x i16>, <8 x i16>) -+ -+define <4 x i32> @lsx_vmulwev_w_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vmulwev_w_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmulwev.w.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vmulwev.d.w(<4 x i32>, <4 x i32>) -+ -+define <2 x i64> @lsx_vmulwev_d_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vmulwev_d_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmulwev.d.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vmulwev.q.d(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vmulwev_q_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vmulwev_q_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmulwev.q.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu(<16 x i8>, <16 x i8>) -+ -+define <8 x i16> @lsx_vmulwev_h_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vmulwev_h_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmulwev.h.bu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu(<16 x i8> %va, <16 x i8> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu(<8 x i16>, <8 x i16>) -+ -+define <4 x i32> @lsx_vmulwev_w_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vmulwev_w_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmulwev.w.hu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu(<8 x i16> %va, <8 x i16> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu(<4 x i32>, <4 x i32>) -+ -+define <2 x i64> @lsx_vmulwev_d_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vmulwev_d_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmulwev.d.wu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu(<4 x i32> %va, <4 x i32> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vmulwev_q_du(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vmulwev_q_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmulwev.q.du $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu.b(<16 x i8>, <16 x i8>) -+ -+define <8 x i16> @lsx_vmulwev_h_bu_b(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vmulwev_h_bu_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmulwev.h.bu.b $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu.b(<16 x i8> %va, <16 x i8> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu.h(<8 x i16>, <8 x i16>) -+ -+define <4 x i32> @lsx_vmulwev_w_hu_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vmulwev_w_hu_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmulwev.w.hu.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu.w(<4 x i32>, <4 x i32>) -+ -+define <2 x i64> @lsx_vmulwev_d_wu_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vmulwev_d_wu_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmulwev.d.wu.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du.d(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vmulwev_q_du_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vmulwev_q_du_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmulwev.q.du.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vmulwod.h.b(<16 x i8>, <16 x i8>) -+ -+define <8 x i16> @lsx_vmulwod_h_b(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vmulwod_h_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmulwod.h.b $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.b(<16 x i8> %va, <16 x i8> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vmulwod.w.h(<8 x i16>, <8 x i16>) -+ -+define <4 x i32> @lsx_vmulwod_w_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vmulwod_w_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmulwod.w.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vmulwod.d.w(<4 x i32>, <4 x i32>) -+ -+define <2 x i64> @lsx_vmulwod_d_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vmulwod_d_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmulwod.d.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vmulwod.q.d(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vmulwod_q_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vmulwod_q_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmulwod.q.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu(<16 x i8>, <16 x i8>) -+ -+define <8 x i16> @lsx_vmulwod_h_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vmulwod_h_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmulwod.h.bu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu(<16 x i8> %va, <16 x i8> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu(<8 x i16>, <8 x i16>) -+ -+define <4 x i32> @lsx_vmulwod_w_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vmulwod_w_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmulwod.w.hu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu(<8 x i16> %va, <8 x i16> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu(<4 x i32>, <4 x i32>) -+ -+define <2 x i64> @lsx_vmulwod_d_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vmulwod_d_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmulwod.d.wu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu(<4 x i32> %va, <4 x i32> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vmulwod_q_du(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vmulwod_q_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmulwod.q.du $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu.b(<16 x i8>, <16 x i8>) -+ -+define <8 x i16> @lsx_vmulwod_h_bu_b(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vmulwod_h_bu_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmulwod.h.bu.b $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu.b(<16 x i8> %va, <16 x i8> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu.h(<8 x i16>, <8 x i16>) -+ -+define <4 x i32> @lsx_vmulwod_w_hu_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vmulwod_w_hu_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmulwod.w.hu.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu.w(<4 x i32>, <4 x i32>) -+ -+define <2 x i64> @lsx_vmulwod_d_wu_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vmulwod_d_wu_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmulwod.d.wu.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du.d(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vmulwod_q_du_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vmulwod_q_du_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vmulwod.q.du.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-neg.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-neg.ll -new file mode 100644 -index 000000000000..43c6e9757614 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-neg.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vneg.b(<16 x i8>) -+ -+define <16 x i8> @lsx_vneg_b(<16 x i8> %va) nounwind { -+; CHECK-LABEL: lsx_vneg_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vneg.b $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vneg.b(<16 x i8> %va) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vneg.h(<8 x i16>) -+ -+define <8 x i16> @lsx_vneg_h(<8 x i16> %va) nounwind { -+; CHECK-LABEL: lsx_vneg_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vneg.h $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vneg.h(<8 x i16> %va) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vneg.w(<4 x i32>) -+ -+define <4 x i32> @lsx_vneg_w(<4 x i32> %va) nounwind { -+; CHECK-LABEL: lsx_vneg_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vneg.w $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vneg.w(<4 x i32> %va) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vneg.d(<2 x i64>) -+ -+define <2 x i64> @lsx_vneg_d(<2 x i64> %va) nounwind { -+; CHECK-LABEL: lsx_vneg_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vneg.d $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vneg.d(<2 x i64> %va) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-nor.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-nor.ll -new file mode 100644 -index 000000000000..16619225f2d1 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-nor.ll -@@ -0,0 +1,14 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vnor.v(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vnor_v(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vnor_v: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vnor.v $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vnor.v(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-nori.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-nori.ll -new file mode 100644 -index 000000000000..c2388a1e0da3 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-nori.ll -@@ -0,0 +1,14 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vnori.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vnori_b(<16 x i8> %va) nounwind { -+; CHECK-LABEL: lsx_vnori_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vnori.b $vr0, $vr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vnori.b(<16 x i8> %va, i32 1) -+ ret <16 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-or.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-or.ll -new file mode 100644 -index 000000000000..ab557003d150 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-or.ll -@@ -0,0 +1,14 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vor.v(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vor_v(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vor_v: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vor.v $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vor.v(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ori.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ori.ll -new file mode 100644 -index 000000000000..85c0f432c54a ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ori.ll -@@ -0,0 +1,14 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vori.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vori_b(<16 x i8> %va) nounwind { -+; CHECK-LABEL: lsx_vori_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vori.b $vr0, $vr0, 3 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vori.b(<16 x i8> %va, i32 3) -+ ret <16 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-orn.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-orn.ll -new file mode 100644 -index 000000000000..4528628e02c3 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-orn.ll -@@ -0,0 +1,14 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vorn.v(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vorn_v(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vorn_v: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vorn.v $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vorn.v(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pack.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pack.ll -new file mode 100644 -index 000000000000..70a3620d1757 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pack.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vpackev.b(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vpackev_b(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vpackev_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vpackev.b $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vpackev.b(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vpackev.h(<8 x i16>, <8 x i16>) -+ -+define <8 x i16> @lsx_vpackev_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vpackev_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vpackev.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vpackev.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vpackev.w(<4 x i32>, <4 x i32>) -+ -+define <4 x i32> @lsx_vpackev_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vpackev_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vpackev.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vpackev.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vpackev.d(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vpackev_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vpackev_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vpackev.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vpackev.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vpackod.b(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vpackod_b(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vpackod_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vpackod.b $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vpackod.b(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vpackod.h(<8 x i16>, <8 x i16>) -+ -+define <8 x i16> @lsx_vpackod_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vpackod_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vpackod.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vpackod.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vpackod.w(<4 x i32>, <4 x i32>) -+ -+define <4 x i32> @lsx_vpackod_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vpackod_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vpackod.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vpackod.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vpackod.d(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vpackod_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vpackod_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vpackod.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vpackod.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pcnt.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pcnt.ll -new file mode 100644 -index 000000000000..431b270ab0a1 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pcnt.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vpcnt.b(<16 x i8>) -+ -+define <16 x i8> @lsx_vpcnt_b(<16 x i8> %va) nounwind { -+; CHECK-LABEL: lsx_vpcnt_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vpcnt.b $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vpcnt.b(<16 x i8> %va) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vpcnt.h(<8 x i16>) -+ -+define <8 x i16> @lsx_vpcnt_h(<8 x i16> %va) nounwind { -+; CHECK-LABEL: lsx_vpcnt_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vpcnt.h $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vpcnt.h(<8 x i16> %va) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vpcnt.w(<4 x i32>) -+ -+define <4 x i32> @lsx_vpcnt_w(<4 x i32> %va) nounwind { -+; CHECK-LABEL: lsx_vpcnt_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vpcnt.w $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vpcnt.w(<4 x i32> %va) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vpcnt.d(<2 x i64>) -+ -+define <2 x i64> @lsx_vpcnt_d(<2 x i64> %va) nounwind { -+; CHECK-LABEL: lsx_vpcnt_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vpcnt.d $vr0, $vr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vpcnt.d(<2 x i64> %va) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-permi.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-permi.ll -new file mode 100644 -index 000000000000..b8367d98caf6 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-permi.ll -@@ -0,0 +1,14 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <4 x i32> @llvm.loongarch.lsx.vpermi.w(<4 x i32>, <4 x i32>, i32) -+ -+define <4 x i32> @lsx_vpermi_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vpermi_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vpermi.w $vr0, $vr1, 255 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vpermi.w(<4 x i32> %va, <4 x i32> %vb, i32 255) -+ ret <4 x i32> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pick.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pick.ll -new file mode 100644 -index 000000000000..4ebf29e1409c ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pick.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vpickev.b(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vpickev_b(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vpickev_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vpickev.b $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vpickev.b(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vpickev.h(<8 x i16>, <8 x i16>) -+ -+define <8 x i16> @lsx_vpickev_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vpickev_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vpickev.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vpickev.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vpickev.w(<4 x i32>, <4 x i32>) -+ -+define <4 x i32> @lsx_vpickev_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vpickev_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vpickev.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vpickev.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vpickev.d(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vpickev_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vpickev_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vpickev.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vpickev.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vpickod.b(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vpickod_b(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vpickod_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vpickod.b $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vpickod.b(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vpickod.h(<8 x i16>, <8 x i16>) -+ -+define <8 x i16> @lsx_vpickod_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vpickod_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vpickod.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vpickod.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vpickod.w(<4 x i32>, <4 x i32>) -+ -+define <4 x i32> @lsx_vpickod_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vpickod_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vpickod.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vpickod.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vpickod.d(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vpickod_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vpickod_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vpickod.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vpickod.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pickve2gr.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pickve2gr.ll -new file mode 100644 -index 000000000000..ed56d30ce3c4 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pickve2gr.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare i32 @llvm.loongarch.lsx.vpickve2gr.b(<16 x i8>, i32) -+ -+define i32 @lsx_vpickve2gr_b(<16 x i8> %va) nounwind { -+; CHECK-LABEL: lsx_vpickve2gr_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 15 -+; CHECK-NEXT: ret -+entry: -+ %res = call i32 @llvm.loongarch.lsx.vpickve2gr.b(<16 x i8> %va, i32 15) -+ ret i32 %res -+} -+ -+declare i32 @llvm.loongarch.lsx.vpickve2gr.h(<8 x i16>, i32) -+ -+define i32 @lsx_vpickve2gr_h(<8 x i16> %va) nounwind { -+; CHECK-LABEL: lsx_vpickve2gr_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 7 -+; CHECK-NEXT: ret -+entry: -+ %res = call i32 @llvm.loongarch.lsx.vpickve2gr.h(<8 x i16> %va, i32 7) -+ ret i32 %res -+} -+ -+declare i32 @llvm.loongarch.lsx.vpickve2gr.w(<4 x i32>, i32) -+ -+define i32 @lsx_vpickve2gr_w(<4 x i32> %va) nounwind { -+; CHECK-LABEL: lsx_vpickve2gr_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 3 -+; CHECK-NEXT: ret -+entry: -+ %res = call i32 @llvm.loongarch.lsx.vpickve2gr.w(<4 x i32> %va, i32 3) -+ ret i32 %res -+} -+ -+declare i64 @llvm.loongarch.lsx.vpickve2gr.d(<2 x i64>, i32) -+ -+define i64 @lsx_vpickve2gr_d(<2 x i64> %va) nounwind { -+; CHECK-LABEL: lsx_vpickve2gr_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call i64 @llvm.loongarch.lsx.vpickve2gr.d(<2 x i64> %va, i32 1) -+ ret i64 %res -+} -+ -+declare i32 @llvm.loongarch.lsx.vpickve2gr.bu(<16 x i8>, i32) -+ -+define i32 @lsx_vpickve2gr_bu(<16 x i8> %va) nounwind { -+; CHECK-LABEL: lsx_vpickve2gr_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vpickve2gr.bu $a0, $vr0, 15 -+; CHECK-NEXT: ret -+entry: -+ %res = call i32 @llvm.loongarch.lsx.vpickve2gr.bu(<16 x i8> %va, i32 15) -+ ret i32 %res -+} -+ -+declare i32 @llvm.loongarch.lsx.vpickve2gr.hu(<8 x i16>, i32) -+ -+define i32 @lsx_vpickve2gr_hu(<8 x i16> %va) nounwind { -+; CHECK-LABEL: lsx_vpickve2gr_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 7 -+; CHECK-NEXT: ret -+entry: -+ %res = call i32 @llvm.loongarch.lsx.vpickve2gr.hu(<8 x i16> %va, i32 7) -+ ret i32 %res -+} -+ -+declare i32 @llvm.loongarch.lsx.vpickve2gr.wu(<4 x i32>, i32) -+ -+define i32 @lsx_vpickve2gr_wu(<4 x i32> %va) nounwind { -+; CHECK-LABEL: lsx_vpickve2gr_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vpickve2gr.wu $a0, $vr0, 3 -+; CHECK-NEXT: ret -+entry: -+ %res = call i32 @llvm.loongarch.lsx.vpickve2gr.wu(<4 x i32> %va, i32 3) -+ ret i32 %res -+} -+ -+declare i64 @llvm.loongarch.lsx.vpickve2gr.du(<2 x i64>, i32) -+ -+define i64 @lsx_vpickve2gr_du(<2 x i64> %va) nounwind { -+; CHECK-LABEL: lsx_vpickve2gr_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vpickve2gr.du $a0, $vr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call i64 @llvm.loongarch.lsx.vpickve2gr.du(<2 x i64> %va, i32 1) -+ ret i64 %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replgr2vr.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replgr2vr.ll -new file mode 100644 -index 000000000000..091f1c98c228 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replgr2vr.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vreplgr2vr.b(i32) -+ -+define <16 x i8> @lsx_vreplgr2vr_b(i32 %a) nounwind { -+; CHECK-LABEL: lsx_vreplgr2vr_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vreplgr2vr.b $vr0, $a0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vreplgr2vr.b(i32 %a) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vreplgr2vr.h(i32) -+ -+define <8 x i16> @lsx_vreplgr2vr_h(i32 %a) nounwind { -+; CHECK-LABEL: lsx_vreplgr2vr_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vreplgr2vr.h $vr0, $a0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vreplgr2vr.h(i32 %a) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vreplgr2vr.w(i32) -+ -+define <4 x i32> @lsx_vreplgr2vr_w(i32 %a) nounwind { -+; CHECK-LABEL: lsx_vreplgr2vr_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vreplgr2vr.w $vr0, $a0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vreplgr2vr.w(i32 %a) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vreplgr2vr.d(i64) -+ -+define <2 x i64> @lsx_vreplgr2vr_d(i64 %a) nounwind { -+; CHECK-LABEL: lsx_vreplgr2vr_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vreplgr2vr.d $vr0, $a0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vreplgr2vr.d(i64 %a) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replve.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replve.ll -new file mode 100644 -index 000000000000..3ba184dad052 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replve.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vreplve.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vreplve_b(<16 x i8> %va, i32 %b) nounwind { -+; CHECK-LABEL: lsx_vreplve_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vreplve.b $vr0, $vr0, $a0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vreplve.b(<16 x i8> %va, i32 %b) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vreplve.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vreplve_h(<8 x i16> %va, i32 %b) nounwind { -+; CHECK-LABEL: lsx_vreplve_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vreplve.h $vr0, $vr0, $a0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vreplve.h(<8 x i16> %va, i32 %b) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vreplve.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vreplve_w(<4 x i32> %va, i32 %b) nounwind { -+; CHECK-LABEL: lsx_vreplve_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vreplve.w $vr0, $vr0, $a0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vreplve.w(<4 x i32> %va, i32 %b) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vreplve.d(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vreplve_d(<2 x i64> %va, i32 %b) nounwind { -+; CHECK-LABEL: lsx_vreplve_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vreplve.d $vr0, $vr0, $a0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vreplve.d(<2 x i64> %va, i32 %b) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replvei.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replvei.ll -new file mode 100644 -index 000000000000..9b8af1878cb8 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replvei.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vreplvei.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vreplvei_b(<16 x i8> %va) nounwind { -+; CHECK-LABEL: lsx_vreplvei_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vreplvei.b $vr0, $vr0, 15 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vreplvei.b(<16 x i8> %va, i32 15) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vreplvei.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vreplvei_h(<8 x i16> %va) nounwind { -+; CHECK-LABEL: lsx_vreplvei_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vreplvei.h $vr0, $vr0, 7 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vreplvei.h(<8 x i16> %va, i32 7) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vreplvei.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vreplvei_w(<4 x i32> %va) nounwind { -+; CHECK-LABEL: lsx_vreplvei_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vreplvei.w $vr0, $vr0, 3 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vreplvei.w(<4 x i32> %va, i32 3) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vreplvei.d(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vreplvei_d(<2 x i64> %va) nounwind { -+; CHECK-LABEL: lsx_vreplvei_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vreplvei.d $vr0, $vr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vreplvei.d(<2 x i64> %va, i32 1) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-rotr.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-rotr.ll -new file mode 100644 -index 000000000000..df8650677147 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-rotr.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vrotr.b(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vrotr_b(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vrotr_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vrotr.b $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vrotr.b(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vrotr.h(<8 x i16>, <8 x i16>) -+ -+define <8 x i16> @lsx_vrotr_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vrotr_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vrotr.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vrotr.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vrotr.w(<4 x i32>, <4 x i32>) -+ -+define <4 x i32> @lsx_vrotr_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vrotr_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vrotr.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vrotr.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vrotr.d(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vrotr_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vrotr_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vrotr.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vrotr.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vrotri.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vrotri_b(<16 x i8> %va) nounwind { -+; CHECK-LABEL: lsx_vrotri_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vrotri.b $vr0, $vr0, 7 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vrotri.b(<16 x i8> %va, i32 7) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vrotri.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vrotri_h(<8 x i16> %va) nounwind { -+; CHECK-LABEL: lsx_vrotri_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vrotri.h $vr0, $vr0, 15 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vrotri.h(<8 x i16> %va, i32 15) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vrotri.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vrotri_w(<4 x i32> %va) nounwind { -+; CHECK-LABEL: lsx_vrotri_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vrotri.w $vr0, $vr0, 31 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vrotri.w(<4 x i32> %va, i32 31) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vrotri.d(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vrotri_d(<2 x i64> %va) nounwind { -+; CHECK-LABEL: lsx_vrotri_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vrotri.d $vr0, $vr0, 63 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vrotri.d(<2 x i64> %va, i32 63) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sadd.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sadd.ll -new file mode 100644 -index 000000000000..a54f955766df ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sadd.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vsadd.b(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vsadd_b(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vsadd_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsadd.b $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsadd.b(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsadd.h(<8 x i16>, <8 x i16>) -+ -+define <8 x i16> @lsx_vsadd_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vsadd_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsadd.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsadd.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsadd.w(<4 x i32>, <4 x i32>) -+ -+define <4 x i32> @lsx_vsadd_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vsadd_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsadd.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsadd.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsadd.d(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vsadd_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vsadd_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsadd.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsadd.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vsadd.bu(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vsadd_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vsadd_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsadd.bu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsadd.bu(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsadd.hu(<8 x i16>, <8 x i16>) -+ -+define <8 x i16> @lsx_vsadd_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vsadd_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsadd.hu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsadd.hu(<8 x i16> %va, <8 x i16> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsadd.wu(<4 x i32>, <4 x i32>) -+ -+define <4 x i32> @lsx_vsadd_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vsadd_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsadd.wu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsadd.wu(<4 x i32> %va, <4 x i32> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsadd.du(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vsadd_du(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vsadd_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsadd.du $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsadd.du(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sat.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sat.ll -new file mode 100644 -index 000000000000..4286842a63b9 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sat.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vsat.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vsat_b(<16 x i8> %va) nounwind { -+; CHECK-LABEL: lsx_vsat_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsat.b $vr0, $vr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsat.b(<16 x i8> %va, i32 1) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsat.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vsat_h(<8 x i16> %va) nounwind { -+; CHECK-LABEL: lsx_vsat_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsat.h $vr0, $vr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsat.h(<8 x i16> %va, i32 1) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsat.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vsat_w(<4 x i32> %va) nounwind { -+; CHECK-LABEL: lsx_vsat_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsat.w $vr0, $vr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsat.w(<4 x i32> %va, i32 1) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsat.d(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vsat_d(<2 x i64> %va) nounwind { -+; CHECK-LABEL: lsx_vsat_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsat.d $vr0, $vr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsat.d(<2 x i64> %va, i32 1) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vsat.bu(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vsat_bu(<16 x i8> %va) nounwind { -+; CHECK-LABEL: lsx_vsat_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsat.bu $vr0, $vr0, 7 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsat.bu(<16 x i8> %va, i32 7) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsat.hu(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vsat_hu(<8 x i16> %va) nounwind { -+; CHECK-LABEL: lsx_vsat_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsat.hu $vr0, $vr0, 15 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsat.hu(<8 x i16> %va, i32 15) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsat.wu(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vsat_wu(<4 x i32> %va) nounwind { -+; CHECK-LABEL: lsx_vsat_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsat.wu $vr0, $vr0, 31 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsat.wu(<4 x i32> %va, i32 31) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsat.du(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vsat_du(<2 x i64> %va) nounwind { -+; CHECK-LABEL: lsx_vsat_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsat.du $vr0, $vr0, 63 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsat.du(<2 x i64> %va, i32 63) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-seq.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-seq.ll -new file mode 100644 -index 000000000000..3cb4acd82439 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-seq.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vseq.b(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vseq_b(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vseq_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vseq.b $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vseq.b(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vseq.h(<8 x i16>, <8 x i16>) -+ -+define <8 x i16> @lsx_vseq_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vseq_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vseq.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vseq.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vseq.w(<4 x i32>, <4 x i32>) -+ -+define <4 x i32> @lsx_vseq_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vseq_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vseq.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vseq.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vseq.d(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vseq_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vseq_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vseq.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vseq.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vseqi.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vseqi_b(<16 x i8> %va) nounwind { -+; CHECK-LABEL: lsx_vseqi_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vseqi.b $vr0, $vr0, 15 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vseqi.b(<16 x i8> %va, i32 15) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vseqi.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vseqi_h(<8 x i16> %va) nounwind { -+; CHECK-LABEL: lsx_vseqi_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vseqi.h $vr0, $vr0, 15 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vseqi.h(<8 x i16> %va, i32 15) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vseqi.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vseqi_w(<4 x i32> %va) nounwind { -+; CHECK-LABEL: lsx_vseqi_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vseqi.w $vr0, $vr0, -16 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vseqi.w(<4 x i32> %va, i32 -16) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vseqi.d(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vseqi_d(<2 x i64> %va) nounwind { -+; CHECK-LABEL: lsx_vseqi_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vseqi.d $vr0, $vr0, -16 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vseqi.d(<2 x i64> %va, i32 -16) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-set.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-set.ll -new file mode 100644 -index 000000000000..3188fb4e2c2e ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-set.ll -@@ -0,0 +1,38 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare i32 @llvm.loongarch.lsx.bz.v(<16 x i8>) -+ -+define i32 @lsx_bz_v(<16 x i8> %va) nounwind { -+; CHECK-LABEL: lsx_bz_v: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vseteqz.v $fcc0, $vr0 -+; CHECK-NEXT: bcnez $fcc0, .LBB0_2 -+; CHECK-NEXT: # %bb.1: # %entry -+; CHECK-NEXT: addi.w $a0, $zero, 0 -+; CHECK-NEXT: ret -+; CHECK-NEXT: .LBB0_2: # %entry -+; CHECK-NEXT: addi.w $a0, $zero, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call i32 @llvm.loongarch.lsx.bz.v(<16 x i8> %va) -+ ret i32 %res -+} -+ -+declare i32 @llvm.loongarch.lsx.bnz.v(<16 x i8>) -+ -+define i32 @lsx_bnz_v(<16 x i8> %va) nounwind { -+; CHECK-LABEL: lsx_bnz_v: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsetnez.v $fcc0, $vr0 -+; CHECK-NEXT: bcnez $fcc0, .LBB1_2 -+; CHECK-NEXT: # %bb.1: # %entry -+; CHECK-NEXT: addi.w $a0, $zero, 0 -+; CHECK-NEXT: ret -+; CHECK-NEXT: .LBB1_2: # %entry -+; CHECK-NEXT: addi.w $a0, $zero, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call i32 @llvm.loongarch.lsx.bnz.v(<16 x i8> %va) -+ ret i32 %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-setallnez.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-setallnez.ll -new file mode 100644 -index 000000000000..22e01922e87b ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-setallnez.ll -@@ -0,0 +1,74 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare i32 @llvm.loongarch.lsx.bnz.b(<16 x i8>) -+ -+define i32 @lsx_bnz_b(<16 x i8> %va) nounwind { -+; CHECK-LABEL: lsx_bnz_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsetallnez.b $fcc0, $vr0 -+; CHECK-NEXT: bcnez $fcc0, .LBB0_2 -+; CHECK-NEXT: # %bb.1: # %entry -+; CHECK-NEXT: addi.w $a0, $zero, 0 -+; CHECK-NEXT: ret -+; CHECK-NEXT: .LBB0_2: # %entry -+; CHECK-NEXT: addi.w $a0, $zero, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call i32 @llvm.loongarch.lsx.bnz.b(<16 x i8> %va) -+ ret i32 %res -+} -+ -+declare i32 @llvm.loongarch.lsx.bnz.h(<8 x i16>) -+ -+define i32 @lsx_bnz_h(<8 x i16> %va) nounwind { -+; CHECK-LABEL: lsx_bnz_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsetallnez.h $fcc0, $vr0 -+; CHECK-NEXT: bcnez $fcc0, .LBB1_2 -+; CHECK-NEXT: # %bb.1: # %entry -+; CHECK-NEXT: addi.w $a0, $zero, 0 -+; CHECK-NEXT: ret -+; CHECK-NEXT: .LBB1_2: # %entry -+; CHECK-NEXT: addi.w $a0, $zero, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call i32 @llvm.loongarch.lsx.bnz.h(<8 x i16> %va) -+ ret i32 %res -+} -+ -+declare i32 @llvm.loongarch.lsx.bnz.w(<4 x i32>) -+ -+define i32 @lsx_bnz_w(<4 x i32> %va) nounwind { -+; CHECK-LABEL: lsx_bnz_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsetallnez.w $fcc0, $vr0 -+; CHECK-NEXT: bcnez $fcc0, .LBB2_2 -+; CHECK-NEXT: # %bb.1: # %entry -+; CHECK-NEXT: addi.w $a0, $zero, 0 -+; CHECK-NEXT: ret -+; CHECK-NEXT: .LBB2_2: # %entry -+; CHECK-NEXT: addi.w $a0, $zero, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call i32 @llvm.loongarch.lsx.bnz.w(<4 x i32> %va) -+ ret i32 %res -+} -+ -+declare i32 @llvm.loongarch.lsx.bnz.d(<2 x i64>) -+ -+define i32 @lsx_bnz_d(<2 x i64> %va) nounwind { -+; CHECK-LABEL: lsx_bnz_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsetallnez.d $fcc0, $vr0 -+; CHECK-NEXT: bcnez $fcc0, .LBB3_2 -+; CHECK-NEXT: # %bb.1: # %entry -+; CHECK-NEXT: addi.w $a0, $zero, 0 -+; CHECK-NEXT: ret -+; CHECK-NEXT: .LBB3_2: # %entry -+; CHECK-NEXT: addi.w $a0, $zero, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call i32 @llvm.loongarch.lsx.bnz.d(<2 x i64> %va) -+ ret i32 %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-setanyeqz.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-setanyeqz.ll -new file mode 100644 -index 000000000000..96c79c10e468 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-setanyeqz.ll -@@ -0,0 +1,74 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare i32 @llvm.loongarch.lsx.bz.b(<16 x i8>) -+ -+define i32 @lsx_bz_b(<16 x i8> %va) nounwind { -+; CHECK-LABEL: lsx_bz_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsetanyeqz.b $fcc0, $vr0 -+; CHECK-NEXT: bcnez $fcc0, .LBB0_2 -+; CHECK-NEXT: # %bb.1: # %entry -+; CHECK-NEXT: addi.w $a0, $zero, 0 -+; CHECK-NEXT: ret -+; CHECK-NEXT: .LBB0_2: # %entry -+; CHECK-NEXT: addi.w $a0, $zero, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call i32 @llvm.loongarch.lsx.bz.b(<16 x i8> %va) -+ ret i32 %res -+} -+ -+declare i32 @llvm.loongarch.lsx.bz.h(<8 x i16>) -+ -+define i32 @lsx_bz_h(<8 x i16> %va) nounwind { -+; CHECK-LABEL: lsx_bz_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsetanyeqz.h $fcc0, $vr0 -+; CHECK-NEXT: bcnez $fcc0, .LBB1_2 -+; CHECK-NEXT: # %bb.1: # %entry -+; CHECK-NEXT: addi.w $a0, $zero, 0 -+; CHECK-NEXT: ret -+; CHECK-NEXT: .LBB1_2: # %entry -+; CHECK-NEXT: addi.w $a0, $zero, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call i32 @llvm.loongarch.lsx.bz.h(<8 x i16> %va) -+ ret i32 %res -+} -+ -+declare i32 @llvm.loongarch.lsx.bz.w(<4 x i32>) -+ -+define i32 @lsx_bz_w(<4 x i32> %va) nounwind { -+; CHECK-LABEL: lsx_bz_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsetanyeqz.w $fcc0, $vr0 -+; CHECK-NEXT: bcnez $fcc0, .LBB2_2 -+; CHECK-NEXT: # %bb.1: # %entry -+; CHECK-NEXT: addi.w $a0, $zero, 0 -+; CHECK-NEXT: ret -+; CHECK-NEXT: .LBB2_2: # %entry -+; CHECK-NEXT: addi.w $a0, $zero, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call i32 @llvm.loongarch.lsx.bz.w(<4 x i32> %va) -+ ret i32 %res -+} -+ -+declare i32 @llvm.loongarch.lsx.bz.d(<2 x i64>) -+ -+define i32 @lsx_bz_d(<2 x i64> %va) nounwind { -+; CHECK-LABEL: lsx_bz_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsetanyeqz.d $fcc0, $vr0 -+; CHECK-NEXT: bcnez $fcc0, .LBB3_2 -+; CHECK-NEXT: # %bb.1: # %entry -+; CHECK-NEXT: addi.w $a0, $zero, 0 -+; CHECK-NEXT: ret -+; CHECK-NEXT: .LBB3_2: # %entry -+; CHECK-NEXT: addi.w $a0, $zero, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call i32 @llvm.loongarch.lsx.bz.d(<2 x i64> %va) -+ ret i32 %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf.ll -new file mode 100644 -index 000000000000..f5d516521e45 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vshuf.b(<16 x i8>, <16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vshuf_b(<16 x i8> %va, <16 x i8> %vb, <16 x i8> %vc) nounwind { -+; CHECK-LABEL: lsx_vshuf_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vshuf.b $vr0, $vr0, $vr1, $vr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vshuf.b(<16 x i8> %va, <16 x i8> %vb, <16 x i8> %vc) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vshuf.h(<8 x i16>, <8 x i16>, <8 x i16>) -+ -+define <8 x i16> @lsx_vshuf_h(<8 x i16> %va, <8 x i16> %vb, <8 x i16> %vc) nounwind { -+; CHECK-LABEL: lsx_vshuf_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vshuf.h $vr0, $vr1, $vr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vshuf.h(<8 x i16> %va, <8 x i16> %vb, <8 x i16> %vc) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vshuf.w(<4 x i32>, <4 x i32>, <4 x i32>) -+ -+define <4 x i32> @lsx_vshuf_w(<4 x i32> %va, <4 x i32> %vb, <4 x i32> %vc) nounwind { -+; CHECK-LABEL: lsx_vshuf_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vshuf.w $vr0, $vr1, $vr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vshuf.w(<4 x i32> %va, <4 x i32> %vb, <4 x i32> %vc) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vshuf.d(<2 x i64>, <2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vshuf_d(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) nounwind { -+; CHECK-LABEL: lsx_vshuf_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vshuf.d $vr0, $vr1, $vr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vshuf.d(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf4i.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf4i.ll -new file mode 100644 -index 000000000000..1ad5f2af5591 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf4i.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vshuf4i.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vshuf4i_b(<16 x i8> %va) nounwind { -+; CHECK-LABEL: lsx_vshuf4i_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vshuf4i.b $vr0, $vr0, 255 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vshuf4i.b(<16 x i8> %va, i32 255) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vshuf4i.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vshuf4i_h(<8 x i16> %va) nounwind { -+; CHECK-LABEL: lsx_vshuf4i_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vshuf4i.h $vr0, $vr0, 255 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vshuf4i.h(<8 x i16> %va, i32 255) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vshuf4i.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vshuf4i_w(<4 x i32> %va) nounwind { -+; CHECK-LABEL: lsx_vshuf4i_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vshuf4i.w $vr0, $vr0, 255 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vshuf4i.w(<4 x i32> %va, i32 255) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vshuf4i.d(<2 x i64>, <2 x i64>, i32) -+ -+define <2 x i64> @lsx_vshuf4i_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vshuf4i_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vshuf4i.d $vr0, $vr1, 255 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vshuf4i.d(<2 x i64> %va, <2 x i64> %vb, i32 255) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-signcov.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-signcov.ll -new file mode 100644 -index 000000000000..3997b0cc995c ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-signcov.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vsigncov.b(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vsigncov_b(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vsigncov_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsigncov.b $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsigncov.b(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsigncov.h(<8 x i16>, <8 x i16>) -+ -+define <8 x i16> @lsx_vsigncov_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vsigncov_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsigncov.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsigncov.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsigncov.w(<4 x i32>, <4 x i32>) -+ -+define <4 x i32> @lsx_vsigncov_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vsigncov_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsigncov.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsigncov.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsigncov.d(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vsigncov_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vsigncov_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsigncov.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsigncov.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sle.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sle.ll -new file mode 100644 -index 000000000000..5a9d5f06e63f ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sle.ll -@@ -0,0 +1,194 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vsle.b(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vsle_b(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vsle_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsle.b $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsle.b(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsle.h(<8 x i16>, <8 x i16>) -+ -+define <8 x i16> @lsx_vsle_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vsle_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsle.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsle.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsle.w(<4 x i32>, <4 x i32>) -+ -+define <4 x i32> @lsx_vsle_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vsle_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsle.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsle.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsle.d(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vsle_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vsle_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsle.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsle.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vslei.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vslei_b(<16 x i8> %va) nounwind { -+; CHECK-LABEL: lsx_vslei_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vslei.b $vr0, $vr0, 15 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vslei.b(<16 x i8> %va, i32 15) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vslei.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vslei_h(<8 x i16> %va) nounwind { -+; CHECK-LABEL: lsx_vslei_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vslei.h $vr0, $vr0, 15 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vslei.h(<8 x i16> %va, i32 15) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vslei.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vslei_w(<4 x i32> %va) nounwind { -+; CHECK-LABEL: lsx_vslei_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vslei.w $vr0, $vr0, -16 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vslei.w(<4 x i32> %va, i32 -16) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vslei.d(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vslei_d(<2 x i64> %va) nounwind { -+; CHECK-LABEL: lsx_vslei_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vslei.d $vr0, $vr0, -16 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vslei.d(<2 x i64> %va, i32 -16) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vsle.bu(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vsle_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vsle_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsle.bu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsle.bu(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsle.hu(<8 x i16>, <8 x i16>) -+ -+define <8 x i16> @lsx_vsle_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vsle_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsle.hu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsle.hu(<8 x i16> %va, <8 x i16> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsle.wu(<4 x i32>, <4 x i32>) -+ -+define <4 x i32> @lsx_vsle_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vsle_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsle.wu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsle.wu(<4 x i32> %va, <4 x i32> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsle.du(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vsle_du(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vsle_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsle.du $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsle.du(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vslei.bu(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vslei_bu(<16 x i8> %va) nounwind { -+; CHECK-LABEL: lsx_vslei_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vslei.bu $vr0, $vr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vslei.bu(<16 x i8> %va, i32 1) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vslei.hu(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vslei_hu(<8 x i16> %va) nounwind { -+; CHECK-LABEL: lsx_vslei_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vslei.hu $vr0, $vr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vslei.hu(<8 x i16> %va, i32 1) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vslei.wu(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vslei_wu(<4 x i32> %va) nounwind { -+; CHECK-LABEL: lsx_vslei_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vslei.wu $vr0, $vr0, 31 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vslei.wu(<4 x i32> %va, i32 31) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vslei.du(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vslei_du(<2 x i64> %va) nounwind { -+; CHECK-LABEL: lsx_vslei_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vslei.du $vr0, $vr0, 31 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vslei.du(<2 x i64> %va, i32 31) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sll.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sll.ll -new file mode 100644 -index 000000000000..7bc20af41f17 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sll.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vsll.b(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vsll_b(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vsll_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsll.b $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsll.b(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsll.h(<8 x i16>, <8 x i16>) -+ -+define <8 x i16> @lsx_vsll_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vsll_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsll.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsll.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsll.w(<4 x i32>, <4 x i32>) -+ -+define <4 x i32> @lsx_vsll_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vsll_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsll.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsll.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsll.d(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vsll_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vsll_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsll.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsll.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vslli.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vslli_b(<16 x i8> %va) nounwind { -+; CHECK-LABEL: lsx_vslli_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vslli.b $vr0, $vr0, 7 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vslli.b(<16 x i8> %va, i32 7) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vslli.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vslli_h(<8 x i16> %va) nounwind { -+; CHECK-LABEL: lsx_vslli_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vslli.h $vr0, $vr0, 15 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vslli.h(<8 x i16> %va, i32 15) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vslli.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vslli_w(<4 x i32> %va) nounwind { -+; CHECK-LABEL: lsx_vslli_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vslli.w $vr0, $vr0, 31 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vslli.w(<4 x i32> %va, i32 31) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vslli.d(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vslli_d(<2 x i64> %va) nounwind { -+; CHECK-LABEL: lsx_vslli_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vslli.d $vr0, $vr0, 63 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vslli.d(<2 x i64> %va, i32 63) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sllwil.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sllwil.ll -new file mode 100644 -index 000000000000..29ab70da1ced ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sllwil.ll -@@ -0,0 +1,74 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsllwil.h.b(<16 x i8>, i32) -+ -+define <8 x i16> @lsx_vsllwil_h_b(<16 x i8> %va) nounwind { -+; CHECK-LABEL: lsx_vsllwil_h_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsllwil.h.b $vr0, $vr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsllwil.h.b(<16 x i8> %va, i32 1) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsllwil.w.h(<8 x i16>, i32) -+ -+define <4 x i32> @lsx_vsllwil_w_h(<8 x i16> %va) nounwind { -+; CHECK-LABEL: lsx_vsllwil_w_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsllwil.w.h $vr0, $vr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsllwil.w.h(<8 x i16> %va, i32 1) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsllwil.d.w(<4 x i32>, i32) -+ -+define <2 x i64> @lsx_vsllwil_d_w(<4 x i32> %va) nounwind { -+; CHECK-LABEL: lsx_vsllwil_d_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsllwil.d.w $vr0, $vr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsllwil.d.w(<4 x i32> %va, i32 1) -+ ret <2 x i64> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsllwil.hu.bu(<16 x i8>, i32) -+ -+define <8 x i16> @lsx_vsllwil_hu_bu(<16 x i8> %va) nounwind { -+; CHECK-LABEL: lsx_vsllwil_hu_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsllwil.hu.bu $vr0, $vr0, 7 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsllwil.hu.bu(<16 x i8> %va, i32 7) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsllwil.wu.hu(<8 x i16>, i32) -+ -+define <4 x i32> @lsx_vsllwil_wu_hu(<8 x i16> %va) nounwind { -+; CHECK-LABEL: lsx_vsllwil_wu_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsllwil.wu.hu $vr0, $vr0, 15 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsllwil.wu.hu(<8 x i16> %va, i32 15) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsllwil.du.wu(<4 x i32>, i32) -+ -+define <2 x i64> @lsx_vsllwil_du_wu(<4 x i32> %va) nounwind { -+; CHECK-LABEL: lsx_vsllwil_du_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsllwil.du.wu $vr0, $vr0, 31 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsllwil.du.wu(<4 x i32> %va, i32 31) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-slt.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-slt.ll -new file mode 100644 -index 000000000000..18683e9dc46f ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-slt.ll -@@ -0,0 +1,194 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vslt.b(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vslt_b(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vslt_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vslt.b $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vslt.b(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vslt.h(<8 x i16>, <8 x i16>) -+ -+define <8 x i16> @lsx_vslt_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vslt_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vslt.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vslt.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vslt.w(<4 x i32>, <4 x i32>) -+ -+define <4 x i32> @lsx_vslt_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vslt_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vslt.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vslt.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vslt.d(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vslt_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vslt_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vslt.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vslt.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vslti.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vslti_b(<16 x i8> %va) nounwind { -+; CHECK-LABEL: lsx_vslti_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vslti.b $vr0, $vr0, 15 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vslti.b(<16 x i8> %va, i32 15) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vslti.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vslti_h(<8 x i16> %va) nounwind { -+; CHECK-LABEL: lsx_vslti_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vslti.h $vr0, $vr0, 15 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vslti.h(<8 x i16> %va, i32 15) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vslti.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vslti_w(<4 x i32> %va) nounwind { -+; CHECK-LABEL: lsx_vslti_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vslti.w $vr0, $vr0, -16 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vslti.w(<4 x i32> %va, i32 -16) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vslti.d(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vslti_d(<2 x i64> %va) nounwind { -+; CHECK-LABEL: lsx_vslti_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vslti.d $vr0, $vr0, -16 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vslti.d(<2 x i64> %va, i32 -16) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vslt.bu(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vslt_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vslt_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vslt.bu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vslt.bu(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vslt.hu(<8 x i16>, <8 x i16>) -+ -+define <8 x i16> @lsx_vslt_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vslt_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vslt.hu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vslt.hu(<8 x i16> %va, <8 x i16> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vslt.wu(<4 x i32>, <4 x i32>) -+ -+define <4 x i32> @lsx_vslt_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vslt_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vslt.wu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vslt.wu(<4 x i32> %va, <4 x i32> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vslt.du(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vslt_du(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vslt_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vslt.du $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vslt.du(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vslti.bu(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vslti_bu(<16 x i8> %va) nounwind { -+; CHECK-LABEL: lsx_vslti_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vslti.bu $vr0, $vr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vslti.bu(<16 x i8> %va, i32 1) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vslti.hu(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vslti_hu(<8 x i16> %va) nounwind { -+; CHECK-LABEL: lsx_vslti_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vslti.hu $vr0, $vr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vslti.hu(<8 x i16> %va, i32 1) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vslti.wu(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vslti_wu(<4 x i32> %va) nounwind { -+; CHECK-LABEL: lsx_vslti_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vslti.wu $vr0, $vr0, 31 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vslti.wu(<4 x i32> %va, i32 31) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vslti.du(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vslti_du(<2 x i64> %va) nounwind { -+; CHECK-LABEL: lsx_vslti_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vslti.du $vr0, $vr0, 31 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vslti.du(<2 x i64> %va, i32 31) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sra.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sra.ll -new file mode 100644 -index 000000000000..e85c8464c18e ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sra.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vsra.b(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vsra_b(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vsra_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsra.b $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsra.b(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsra.h(<8 x i16>, <8 x i16>) -+ -+define <8 x i16> @lsx_vsra_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vsra_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsra.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsra.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsra.w(<4 x i32>, <4 x i32>) -+ -+define <4 x i32> @lsx_vsra_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vsra_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsra.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsra.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsra.d(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vsra_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vsra_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsra.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsra.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vsrai.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vsrai_b(<16 x i8> %va) nounwind { -+; CHECK-LABEL: lsx_vsrai_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrai.b $vr0, $vr0, 7 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsrai.b(<16 x i8> %va, i32 7) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsrai.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vsrai_h(<8 x i16> %va) nounwind { -+; CHECK-LABEL: lsx_vsrai_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrai.h $vr0, $vr0, 15 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsrai.h(<8 x i16> %va, i32 15) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsrai.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vsrai_w(<4 x i32> %va) nounwind { -+; CHECK-LABEL: lsx_vsrai_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrai.w $vr0, $vr0, 31 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsrai.w(<4 x i32> %va, i32 31) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsrai.d(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vsrai_d(<2 x i64> %va) nounwind { -+; CHECK-LABEL: lsx_vsrai_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrai.d $vr0, $vr0, 63 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsrai.d(<2 x i64> %va, i32 63) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sran.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sran.ll -new file mode 100644 -index 000000000000..4ffe5a704c2c ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sran.ll -@@ -0,0 +1,38 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vsran.b.h(<8 x i16>, <8 x i16>) -+ -+define <16 x i8> @lsx_vsran_b_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vsran_b_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsran.b.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsran.b.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsran.h.w(<4 x i32>, <4 x i32>) -+ -+define <8 x i16> @lsx_vsran_h_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vsran_h_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsran.h.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsran.h.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsran.w.d(<2 x i64>, <2 x i64>) -+ -+define <4 x i32> @lsx_vsran_w_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vsran_w_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsran.w.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsran.w.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <4 x i32> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srani.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srani.ll -new file mode 100644 -index 000000000000..717c641616c8 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srani.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vsrani.b.h(<16 x i8>, <16 x i8>, i32) -+ -+define <16 x i8> @lsx_vsrani_b_h(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vsrani_b_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrani.b.h $vr0, $vr1, 15 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsrani.b.h(<16 x i8> %va, <16 x i8> %vb, i32 15) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsrani.h.w(<8 x i16>, <8 x i16>, i32) -+ -+define <8 x i16> @lsx_vsrani_h_w(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vsrani_h_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrani.h.w $vr0, $vr1, 31 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsrani.h.w(<8 x i16> %va, <8 x i16> %vb, i32 31) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsrani.w.d(<4 x i32>, <4 x i32>, i32) -+ -+define <4 x i32> @lsx_vsrani_w_d(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vsrani_w_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrani.w.d $vr0, $vr1, 63 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsrani.w.d(<4 x i32> %va, <4 x i32> %vb, i32 63) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsrani.d.q(<2 x i64>, <2 x i64>, i32) -+ -+define <2 x i64> @lsx_vsrani_d_q(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vsrani_d_q: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrani.d.q $vr0, $vr1, 127 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsrani.d.q(<2 x i64> %va, <2 x i64> %vb, i32 127) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srar.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srar.ll -new file mode 100644 -index 000000000000..8b52b7ac9631 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srar.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vsrar.b(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vsrar_b(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vsrar_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrar.b $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsrar.b(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsrar.h(<8 x i16>, <8 x i16>) -+ -+define <8 x i16> @lsx_vsrar_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vsrar_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrar.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsrar.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsrar.w(<4 x i32>, <4 x i32>) -+ -+define <4 x i32> @lsx_vsrar_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vsrar_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrar.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsrar.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsrar.d(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vsrar_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vsrar_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrar.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsrar.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vsrari.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vsrari_b(<16 x i8> %va) nounwind { -+; CHECK-LABEL: lsx_vsrari_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrari.b $vr0, $vr0, 7 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsrari.b(<16 x i8> %va, i32 7) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsrari.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vsrari_h(<8 x i16> %va) nounwind { -+; CHECK-LABEL: lsx_vsrari_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrari.h $vr0, $vr0, 15 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsrari.h(<8 x i16> %va, i32 15) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsrari.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vsrari_w(<4 x i32> %va) nounwind { -+; CHECK-LABEL: lsx_vsrari_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrari.w $vr0, $vr0, 31 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsrari.w(<4 x i32> %va, i32 31) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsrari.d(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vsrari_d(<2 x i64> %va) nounwind { -+; CHECK-LABEL: lsx_vsrari_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrari.d $vr0, $vr0, 63 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsrari.d(<2 x i64> %va, i32 63) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarn.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarn.ll -new file mode 100644 -index 000000000000..d4cdfb5359ea ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarn.ll -@@ -0,0 +1,38 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vsrarn.b.h(<8 x i16>, <8 x i16>) -+ -+define <16 x i8> @lsx_vsrarn_b_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vsrarn_b_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrarn.b.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsrarn.b.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsrarn.h.w(<4 x i32>, <4 x i32>) -+ -+define <8 x i16> @lsx_vsrarn_h_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vsrarn_h_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrarn.h.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsrarn.h.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsrarn.w.d(<2 x i64>, <2 x i64>) -+ -+define <4 x i32> @lsx_vsrarn_w_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vsrarn_w_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrarn.w.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsrarn.w.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <4 x i32> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarni.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarni.ll -new file mode 100644 -index 000000000000..2253e88372fc ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarni.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vsrarni.b.h(<16 x i8>, <16 x i8>, i32) -+ -+define <16 x i8> @lsx_vsrarni_b_h(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vsrarni_b_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrarni.b.h $vr0, $vr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsrarni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 1) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsrarni.h.w(<8 x i16>, <8 x i16>, i32) -+ -+define <8 x i16> @lsx_vsrarni_h_w(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vsrarni_h_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrarni.h.w $vr0, $vr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsrarni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 1) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsrarni.w.d(<4 x i32>, <4 x i32>, i32) -+ -+define <4 x i32> @lsx_vsrarni_w_d(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vsrarni_w_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrarni.w.d $vr0, $vr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsrarni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 1) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsrarni.d.q(<2 x i64>, <2 x i64>, i32) -+ -+define <2 x i64> @lsx_vsrarni_d_q(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vsrarni_d_q: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrarni.d.q $vr0, $vr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsrarni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 1) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srl.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srl.ll -new file mode 100644 -index 000000000000..1cddd9622233 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srl.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vsrl.b(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vsrl_b(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vsrl_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrl.b $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsrl.b(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsrl.h(<8 x i16>, <8 x i16>) -+ -+define <8 x i16> @lsx_vsrl_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vsrl_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrl.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsrl.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsrl.w(<4 x i32>, <4 x i32>) -+ -+define <4 x i32> @lsx_vsrl_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vsrl_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrl.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsrl.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsrl.d(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vsrl_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vsrl_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrl.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsrl.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vsrli.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vsrli_b(<16 x i8> %va) nounwind { -+; CHECK-LABEL: lsx_vsrli_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrli.b $vr0, $vr0, 7 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsrli.b(<16 x i8> %va, i32 7) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsrli.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vsrli_h(<8 x i16> %va) nounwind { -+; CHECK-LABEL: lsx_vsrli_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrli.h $vr0, $vr0, 15 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsrli.h(<8 x i16> %va, i32 15) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsrli.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vsrli_w(<4 x i32> %va) nounwind { -+; CHECK-LABEL: lsx_vsrli_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrli.w $vr0, $vr0, 31 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsrli.w(<4 x i32> %va, i32 31) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsrli.d(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vsrli_d(<2 x i64> %va) nounwind { -+; CHECK-LABEL: lsx_vsrli_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrli.d $vr0, $vr0, 63 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsrli.d(<2 x i64> %va, i32 63) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srln.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srln.ll -new file mode 100644 -index 000000000000..1c9b23243ffb ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srln.ll -@@ -0,0 +1,38 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vsrln.b.h(<8 x i16>, <8 x i16>) -+ -+define <16 x i8> @lsx_vsrln_b_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vsrln_b_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrln.b.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsrln.b.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsrln.h.w(<4 x i32>, <4 x i32>) -+ -+define <8 x i16> @lsx_vsrln_h_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vsrln_h_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrln.h.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsrln.h.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsrln.w.d(<2 x i64>, <2 x i64>) -+ -+define <4 x i32> @lsx_vsrln_w_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vsrln_w_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrln.w.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsrln.w.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <4 x i32> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlni.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlni.ll -new file mode 100644 -index 000000000000..6e523efa1824 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlni.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vsrlni.b.h(<16 x i8>, <16 x i8>, i32) -+ -+define <16 x i8> @lsx_vsrlni_b_h(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vsrlni_b_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrlni.b.h $vr0, $vr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsrlni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 1) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsrlni.h.w(<8 x i16>, <8 x i16>, i32) -+ -+define <8 x i16> @lsx_vsrlni_h_w(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vsrlni_h_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrlni.h.w $vr0, $vr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsrlni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 1) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsrlni.w.d(<4 x i32>, <4 x i32>, i32) -+ -+define <4 x i32> @lsx_vsrlni_w_d(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vsrlni_w_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrlni.w.d $vr0, $vr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsrlni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 1) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsrlni.d.q(<2 x i64>, <2 x i64>, i32) -+ -+define <2 x i64> @lsx_vsrlni_d_q(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vsrlni_d_q: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrlni.d.q $vr0, $vr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsrlni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 1) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlr.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlr.ll -new file mode 100644 -index 000000000000..51638fa1a47f ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlr.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vsrlr.b(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vsrlr_b(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vsrlr_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrlr.b $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsrlr.b(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsrlr.h(<8 x i16>, <8 x i16>) -+ -+define <8 x i16> @lsx_vsrlr_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vsrlr_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrlr.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsrlr.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsrlr.w(<4 x i32>, <4 x i32>) -+ -+define <4 x i32> @lsx_vsrlr_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vsrlr_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrlr.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsrlr.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsrlr.d(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vsrlr_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vsrlr_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrlr.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsrlr.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vsrlri.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vsrlri_b(<16 x i8> %va) nounwind { -+; CHECK-LABEL: lsx_vsrlri_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrlri.b $vr0, $vr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsrlri.b(<16 x i8> %va, i32 1) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsrlri.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vsrlri_h(<8 x i16> %va) nounwind { -+; CHECK-LABEL: lsx_vsrlri_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrlri.h $vr0, $vr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsrlri.h(<8 x i16> %va, i32 1) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsrlri.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vsrlri_w(<4 x i32> %va) nounwind { -+; CHECK-LABEL: lsx_vsrlri_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrlri.w $vr0, $vr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsrlri.w(<4 x i32> %va, i32 1) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsrlri.d(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vsrlri_d(<2 x i64> %va) nounwind { -+; CHECK-LABEL: lsx_vsrlri_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrlri.d $vr0, $vr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsrlri.d(<2 x i64> %va, i32 1) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrn.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrn.ll -new file mode 100644 -index 000000000000..893e51396241 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrn.ll -@@ -0,0 +1,38 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vsrlrn.b.h(<8 x i16>, <8 x i16>) -+ -+define <16 x i8> @lsx_vsrlrn_b_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vsrlrn_b_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrlrn.b.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsrlrn.b.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsrlrn.h.w(<4 x i32>, <4 x i32>) -+ -+define <8 x i16> @lsx_vsrlrn_h_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vsrlrn_h_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrlrn.h.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsrlrn.h.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsrlrn.w.d(<2 x i64>, <2 x i64>) -+ -+define <4 x i32> @lsx_vsrlrn_w_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vsrlrn_w_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrlrn.w.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsrlrn.w.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <4 x i32> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrni.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrni.ll -new file mode 100644 -index 000000000000..d1ea450d2237 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrni.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vsrlrni.b.h(<16 x i8>, <16 x i8>, i32) -+ -+define <16 x i8> @lsx_vsrlrni_b_h(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vsrlrni_b_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrlrni.b.h $vr0, $vr1, 15 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsrlrni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 15) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsrlrni.h.w(<8 x i16>, <8 x i16>, i32) -+ -+define <8 x i16> @lsx_vsrlrni_h_w(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vsrlrni_h_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrlrni.h.w $vr0, $vr1, 31 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsrlrni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 31) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsrlrni.w.d(<4 x i32>, <4 x i32>, i32) -+ -+define <4 x i32> @lsx_vsrlrni_w_d(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vsrlrni_w_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrlrni.w.d $vr0, $vr1, 63 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsrlrni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 63) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsrlrni.d.q(<2 x i64>, <2 x i64>, i32) -+ -+define <2 x i64> @lsx_vsrlrni_d_q(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vsrlrni_d_q: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsrlrni.d.q $vr0, $vr1, 127 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsrlrni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 127) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssran.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssran.ll -new file mode 100644 -index 000000000000..cecccbb730c9 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssran.ll -@@ -0,0 +1,74 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vssran.b.h(<8 x i16>, <8 x i16>) -+ -+define <16 x i8> @lsx_vssran_b_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vssran_b_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssran.b.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vssran.b.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vssran.h.w(<4 x i32>, <4 x i32>) -+ -+define <8 x i16> @lsx_vssran_h_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vssran_h_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssran.h.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vssran.h.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vssran.w.d(<2 x i64>, <2 x i64>) -+ -+define <4 x i32> @lsx_vssran_w_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vssran_w_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssran.w.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vssran.w.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vssran.bu.h(<8 x i16>, <8 x i16>) -+ -+define <16 x i8> @lsx_vssran_bu_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vssran_bu_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssran.bu.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vssran.bu.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vssran.hu.w(<4 x i32>, <4 x i32>) -+ -+define <8 x i16> @lsx_vssran_hu_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vssran_hu_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssran.hu.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vssran.hu.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vssran.wu.d(<2 x i64>, <2 x i64>) -+ -+define <4 x i32> @lsx_vssran_wu_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vssran_wu_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssran.wu.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vssran.wu.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <4 x i32> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrani.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrani.ll -new file mode 100644 -index 000000000000..57b8eb169866 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrani.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vssrani.b.h(<16 x i8>, <16 x i8>, i32) -+ -+define <16 x i8> @lsx_vssrani_b_h(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vssrani_b_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssrani.b.h $vr0, $vr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vssrani.b.h(<16 x i8> %va, <16 x i8> %vb, i32 1) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vssrani.h.w(<8 x i16>, <8 x i16>, i32) -+ -+define <8 x i16> @lsx_vssrani_h_w(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vssrani_h_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssrani.h.w $vr0, $vr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vssrani.h.w(<8 x i16> %va, <8 x i16> %vb, i32 1) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vssrani.w.d(<4 x i32>, <4 x i32>, i32) -+ -+define <4 x i32> @lsx_vssrani_w_d(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vssrani_w_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssrani.w.d $vr0, $vr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vssrani.w.d(<4 x i32> %va, <4 x i32> %vb, i32 1) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vssrani.d.q(<2 x i64>, <2 x i64>, i32) -+ -+define <2 x i64> @lsx_vssrani_d_q(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vssrani_d_q: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssrani.d.q $vr0, $vr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vssrani.d.q(<2 x i64> %va, <2 x i64> %vb, i32 1) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vssrani.bu.h(<16 x i8>, <16 x i8>, i32) -+ -+define <16 x i8> @lsx_vssrani_bu_h(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vssrani_bu_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssrani.bu.h $vr0, $vr1, 15 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vssrani.bu.h(<16 x i8> %va, <16 x i8> %vb, i32 15) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vssrani.hu.w(<8 x i16>, <8 x i16>, i32) -+ -+define <8 x i16> @lsx_vssrani_hu_w(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vssrani_hu_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssrani.hu.w $vr0, $vr1, 31 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vssrani.hu.w(<8 x i16> %va, <8 x i16> %vb, i32 31) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vssrani.wu.d(<4 x i32>, <4 x i32>, i32) -+ -+define <4 x i32> @lsx_vssrani_wu_d(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vssrani_wu_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssrani.wu.d $vr0, $vr1, 63 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vssrani.wu.d(<4 x i32> %va, <4 x i32> %vb, i32 63) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vssrani.du.q(<2 x i64>, <2 x i64>, i32) -+ -+define <2 x i64> @lsx_vssrani_du_q(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vssrani_du_q: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssrani.du.q $vr0, $vr1, 127 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vssrani.du.q(<2 x i64> %va, <2 x i64> %vb, i32 127) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarn.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarn.ll -new file mode 100644 -index 000000000000..c6b7d9ec8e1d ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarn.ll -@@ -0,0 +1,74 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vssrarn.b.h(<8 x i16>, <8 x i16>) -+ -+define <16 x i8> @lsx_vssrarn_b_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vssrarn_b_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssrarn.b.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vssrarn.b.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vssrarn.h.w(<4 x i32>, <4 x i32>) -+ -+define <8 x i16> @lsx_vssrarn_h_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vssrarn_h_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssrarn.h.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vssrarn.h.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vssrarn.w.d(<2 x i64>, <2 x i64>) -+ -+define <4 x i32> @lsx_vssrarn_w_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vssrarn_w_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssrarn.w.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vssrarn.w.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vssrarn.bu.h(<8 x i16>, <8 x i16>) -+ -+define <16 x i8> @lsx_vssrarn_bu_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vssrarn_bu_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssrarn.bu.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vssrarn.bu.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vssrarn.hu.w(<4 x i32>, <4 x i32>) -+ -+define <8 x i16> @lsx_vssrarn_hu_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vssrarn_hu_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssrarn.hu.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vssrarn.hu.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vssrarn.wu.d(<2 x i64>, <2 x i64>) -+ -+define <4 x i32> @lsx_vssrarn_wu_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vssrarn_wu_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssrarn.wu.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vssrarn.wu.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <4 x i32> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarni.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarni.ll -new file mode 100644 -index 000000000000..1a2e91962ac3 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarni.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vssrarni.b.h(<16 x i8>, <16 x i8>, i32) -+ -+define <16 x i8> @lsx_vssrarni_b_h(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vssrarni_b_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssrarni.b.h $vr0, $vr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vssrarni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 1) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vssrarni.h.w(<8 x i16>, <8 x i16>, i32) -+ -+define <8 x i16> @lsx_vssrarni_h_w(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vssrarni_h_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssrarni.h.w $vr0, $vr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vssrarni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 1) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vssrarni.w.d(<4 x i32>, <4 x i32>, i32) -+ -+define <4 x i32> @lsx_vssrarni_w_d(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vssrarni_w_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssrarni.w.d $vr0, $vr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vssrarni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 1) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vssrarni.d.q(<2 x i64>, <2 x i64>, i32) -+ -+define <2 x i64> @lsx_vssrarni_d_q(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vssrarni_d_q: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssrarni.d.q $vr0, $vr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vssrarni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 1) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vssrarni.bu.h(<16 x i8>, <16 x i8>, i32) -+ -+define <16 x i8> @lsx_vssrarni_bu_h(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vssrarni_bu_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssrarni.bu.h $vr0, $vr1, 15 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vssrarni.bu.h(<16 x i8> %va, <16 x i8> %vb, i32 15) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vssrarni.hu.w(<8 x i16>, <8 x i16>, i32) -+ -+define <8 x i16> @lsx_vssrarni_hu_w(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vssrarni_hu_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssrarni.hu.w $vr0, $vr1, 31 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vssrarni.hu.w(<8 x i16> %va, <8 x i16> %vb, i32 31) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vssrarni.wu.d(<4 x i32>, <4 x i32>, i32) -+ -+define <4 x i32> @lsx_vssrarni_wu_d(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vssrarni_wu_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssrarni.wu.d $vr0, $vr1, 63 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vssrarni.wu.d(<4 x i32> %va, <4 x i32> %vb, i32 63) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vssrarni.du.q(<2 x i64>, <2 x i64>, i32) -+ -+define <2 x i64> @lsx_vssrarni_du_q(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vssrarni_du_q: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssrarni.du.q $vr0, $vr1, 127 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vssrarni.du.q(<2 x i64> %va, <2 x i64> %vb, i32 127) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrln.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrln.ll -new file mode 100644 -index 000000000000..697ccc3962a8 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrln.ll -@@ -0,0 +1,74 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vssrln.b.h(<8 x i16>, <8 x i16>) -+ -+define <16 x i8> @lsx_vssrln_b_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vssrln_b_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssrln.b.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vssrln.b.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vssrln.h.w(<4 x i32>, <4 x i32>) -+ -+define <8 x i16> @lsx_vssrln_h_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vssrln_h_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssrln.h.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vssrln.h.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vssrln.w.d(<2 x i64>, <2 x i64>) -+ -+define <4 x i32> @lsx_vssrln_w_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vssrln_w_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssrln.w.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vssrln.w.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vssrln.bu.h(<8 x i16>, <8 x i16>) -+ -+define <16 x i8> @lsx_vssrln_bu_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vssrln_bu_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssrln.bu.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vssrln.bu.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vssrln.hu.w(<4 x i32>, <4 x i32>) -+ -+define <8 x i16> @lsx_vssrln_hu_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vssrln_hu_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssrln.hu.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vssrln.hu.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vssrln.wu.d(<2 x i64>, <2 x i64>) -+ -+define <4 x i32> @lsx_vssrln_wu_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vssrln_wu_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssrln.wu.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vssrln.wu.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <4 x i32> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlni.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlni.ll -new file mode 100644 -index 000000000000..8dd41e7abe87 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlni.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vssrlni.b.h(<16 x i8>, <16 x i8>, i32) -+ -+define <16 x i8> @lsx_vssrlni_b_h(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vssrlni_b_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssrlni.b.h $vr0, $vr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vssrlni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 1) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vssrlni.h.w(<8 x i16>, <8 x i16>, i32) -+ -+define <8 x i16> @lsx_vssrlni_h_w(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vssrlni_h_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssrlni.h.w $vr0, $vr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vssrlni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 1) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vssrlni.w.d(<4 x i32>, <4 x i32>, i32) -+ -+define <4 x i32> @lsx_vssrlni_w_d(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vssrlni_w_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssrlni.w.d $vr0, $vr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vssrlni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 1) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vssrlni.d.q(<2 x i64>, <2 x i64>, i32) -+ -+define <2 x i64> @lsx_vssrlni_d_q(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vssrlni_d_q: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssrlni.d.q $vr0, $vr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vssrlni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 1) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vssrlni.bu.h(<16 x i8>, <16 x i8>, i32) -+ -+define <16 x i8> @lsx_vssrlni_bu_h(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vssrlni_bu_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssrlni.bu.h $vr0, $vr1, 15 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vssrlni.bu.h(<16 x i8> %va, <16 x i8> %vb, i32 15) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vssrlni.hu.w(<8 x i16>, <8 x i16>, i32) -+ -+define <8 x i16> @lsx_vssrlni_hu_w(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vssrlni_hu_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssrlni.hu.w $vr0, $vr1, 31 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vssrlni.hu.w(<8 x i16> %va, <8 x i16> %vb, i32 31) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vssrlni.wu.d(<4 x i32>, <4 x i32>, i32) -+ -+define <4 x i32> @lsx_vssrlni_wu_d(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vssrlni_wu_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssrlni.wu.d $vr0, $vr1, 63 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vssrlni.wu.d(<4 x i32> %va, <4 x i32> %vb, i32 63) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vssrlni.du.q(<2 x i64>, <2 x i64>, i32) -+ -+define <2 x i64> @lsx_vssrlni_du_q(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vssrlni_du_q: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssrlni.du.q $vr0, $vr1, 127 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vssrlni.du.q(<2 x i64> %va, <2 x i64> %vb, i32 127) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrn.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrn.ll -new file mode 100644 -index 000000000000..a8e76cbaa7fd ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrn.ll -@@ -0,0 +1,74 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vssrlrn.b.h(<8 x i16>, <8 x i16>) -+ -+define <16 x i8> @lsx_vssrlrn_b_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vssrlrn_b_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssrlrn.b.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vssrlrn.b.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vssrlrn.h.w(<4 x i32>, <4 x i32>) -+ -+define <8 x i16> @lsx_vssrlrn_h_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vssrlrn_h_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssrlrn.h.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vssrlrn.h.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vssrlrn.w.d(<2 x i64>, <2 x i64>) -+ -+define <4 x i32> @lsx_vssrlrn_w_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vssrlrn_w_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssrlrn.w.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vssrlrn.w.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vssrlrn.bu.h(<8 x i16>, <8 x i16>) -+ -+define <16 x i8> @lsx_vssrlrn_bu_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vssrlrn_bu_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssrlrn.bu.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vssrlrn.bu.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vssrlrn.hu.w(<4 x i32>, <4 x i32>) -+ -+define <8 x i16> @lsx_vssrlrn_hu_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vssrlrn_hu_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssrlrn.hu.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vssrlrn.hu.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vssrlrn.wu.d(<2 x i64>, <2 x i64>) -+ -+define <4 x i32> @lsx_vssrlrn_wu_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vssrlrn_wu_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssrlrn.wu.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vssrlrn.wu.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <4 x i32> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrni.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrni.ll -new file mode 100644 -index 000000000000..869e81b2b09d ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrni.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vssrlrni.b.h(<16 x i8>, <16 x i8>, i32) -+ -+define <16 x i8> @lsx_vssrlrni_b_h(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vssrlrni_b_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssrlrni.b.h $vr0, $vr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vssrlrni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 1) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vssrlrni.h.w(<8 x i16>, <8 x i16>, i32) -+ -+define <8 x i16> @lsx_vssrlrni_h_w(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vssrlrni_h_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssrlrni.h.w $vr0, $vr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vssrlrni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 1) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vssrlrni.w.d(<4 x i32>, <4 x i32>, i32) -+ -+define <4 x i32> @lsx_vssrlrni_w_d(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vssrlrni_w_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssrlrni.w.d $vr0, $vr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vssrlrni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 1) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vssrlrni.d.q(<2 x i64>, <2 x i64>, i32) -+ -+define <2 x i64> @lsx_vssrlrni_d_q(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vssrlrni_d_q: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssrlrni.d.q $vr0, $vr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vssrlrni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 1) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vssrlrni.bu.h(<16 x i8>, <16 x i8>, i32) -+ -+define <16 x i8> @lsx_vssrlrni_bu_h(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vssrlrni_bu_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssrlrni.bu.h $vr0, $vr1, 15 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vssrlrni.bu.h(<16 x i8> %va, <16 x i8> %vb, i32 15) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vssrlrni.hu.w(<8 x i16>, <8 x i16>, i32) -+ -+define <8 x i16> @lsx_vssrlrni_hu_w(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vssrlrni_hu_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssrlrni.hu.w $vr0, $vr1, 31 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vssrlrni.hu.w(<8 x i16> %va, <8 x i16> %vb, i32 31) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vssrlrni.wu.d(<4 x i32>, <4 x i32>, i32) -+ -+define <4 x i32> @lsx_vssrlrni_wu_d(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vssrlrni_wu_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssrlrni.wu.d $vr0, $vr1, 63 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vssrlrni.wu.d(<4 x i32> %va, <4 x i32> %vb, i32 63) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vssrlrni.du.q(<2 x i64>, <2 x i64>, i32) -+ -+define <2 x i64> @lsx_vssrlrni_du_q(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vssrlrni_du_q: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssrlrni.du.q $vr0, $vr1, 127 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vssrlrni.du.q(<2 x i64> %va, <2 x i64> %vb, i32 127) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssub.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssub.ll -new file mode 100644 -index 000000000000..c594b426d650 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssub.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vssub.b(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vssub_b(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vssub_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssub.b $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vssub.b(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vssub.h(<8 x i16>, <8 x i16>) -+ -+define <8 x i16> @lsx_vssub_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vssub_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssub.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vssub.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vssub.w(<4 x i32>, <4 x i32>) -+ -+define <4 x i32> @lsx_vssub_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vssub_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssub.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vssub.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vssub.d(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vssub_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vssub_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssub.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vssub.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vssub.bu(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vssub_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vssub_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssub.bu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vssub.bu(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vssub.hu(<8 x i16>, <8 x i16>) -+ -+define <8 x i16> @lsx_vssub_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vssub_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssub.hu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vssub.hu(<8 x i16> %va, <8 x i16> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vssub.wu(<4 x i32>, <4 x i32>) -+ -+define <4 x i32> @lsx_vssub_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vssub_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssub.wu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vssub.wu(<4 x i32> %va, <4 x i32> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vssub.du(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vssub_du(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vssub_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vssub.du $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vssub.du(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-st.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-st.ll -new file mode 100644 -index 000000000000..798f509f2318 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-st.ll -@@ -0,0 +1,26 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare void @llvm.loongarch.lsx.vst(<16 x i8>, i8*, i32) -+ -+define void @lsx_vst(<16 x i8> %va, i8* %p) nounwind { -+; CHECK-LABEL: lsx_vst: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vst $vr0, $a0, -2048 -+; CHECK-NEXT: ret -+entry: -+ call void @llvm.loongarch.lsx.vst(<16 x i8> %va, i8* %p, i32 -2048) -+ ret void -+} -+ -+declare void @llvm.loongarch.lsx.vstx(<16 x i8>, i8*, i64) -+ -+define void @lsx_vstx(<16 x i8> %va, i8* %p, i64 %c) nounwind { -+; CHECK-LABEL: lsx_vstx: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vstx $vr0, $a0, $a1 -+; CHECK-NEXT: ret -+entry: -+ call void @llvm.loongarch.lsx.vstx(<16 x i8> %va, i8* %p, i64 %c) -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-stelm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-stelm.ll -new file mode 100644 -index 000000000000..6b9e7a9d7462 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-stelm.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare void @llvm.loongarch.lsx.vstelm.b(<16 x i8>, i8*, i32, i32) -+ -+define void @lsx_vstelm_b(<16 x i8> %va, i8* %p) nounwind { -+; CHECK-LABEL: lsx_vstelm_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vstelm.b $vr0, $a0, 1, 15 -+; CHECK-NEXT: ret -+entry: -+ call void @llvm.loongarch.lsx.vstelm.b(<16 x i8> %va, i8* %p, i32 1, i32 15) -+ ret void -+} -+ -+declare void @llvm.loongarch.lsx.vstelm.h(<8 x i16>, i8*, i32, i32) -+ -+define void @lsx_vstelm_h(<8 x i16> %va, i8* %p) nounwind { -+; CHECK-LABEL: lsx_vstelm_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vstelm.h $vr0, $a0, 2, 7 -+; CHECK-NEXT: ret -+entry: -+ call void @llvm.loongarch.lsx.vstelm.h(<8 x i16> %va, i8* %p, i32 2, i32 7) -+ ret void -+} -+ -+declare void @llvm.loongarch.lsx.vstelm.w(<4 x i32>, i8*, i32, i32) -+ -+define void @lsx_vstelm_w(<4 x i32> %va, i8* %p) nounwind { -+; CHECK-LABEL: lsx_vstelm_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vstelm.w $vr0, $a0, 4, 3 -+; CHECK-NEXT: ret -+entry: -+ call void @llvm.loongarch.lsx.vstelm.w(<4 x i32> %va, i8* %p, i32 4, i32 3) -+ ret void -+} -+ -+declare void @llvm.loongarch.lsx.vstelm.d(<2 x i64>, i8*, i32, i32) -+ -+define void @lsx_vstelm_d(<2 x i64> %va, i8* %p) nounwind { -+; CHECK-LABEL: lsx_vstelm_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vstelm.d $vr0, $a0, 8, 1 -+; CHECK-NEXT: ret -+entry: -+ call void @llvm.loongarch.lsx.vstelm.d(<2 x i64> %va, i8* %p, i32 8, i32 1) -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sub.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sub.ll -new file mode 100644 -index 000000000000..5c04a3d8de0d ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sub.ll -@@ -0,0 +1,62 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vsub.b(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vsub_b(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vsub_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsub.b $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsub.b(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsub.h(<8 x i16>, <8 x i16>) -+ -+define <8 x i16> @lsx_vsub_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vsub_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsub.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsub.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsub.w(<4 x i32>, <4 x i32>) -+ -+define <4 x i32> @lsx_vsub_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vsub_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsub.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsub.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsub.d(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vsub_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vsub_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsub.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsub.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsub.q(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vsub_q(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vsub_q: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsub.q $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsub.q(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-subi.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-subi.ll -new file mode 100644 -index 000000000000..304a4e4a78cc ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-subi.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vsubi.bu(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vsubi_bu(<16 x i8> %va) nounwind { -+; CHECK-LABEL: lsx_vsubi_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsubi.bu $vr0, $vr0, 31 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsubi.bu(<16 x i8> %va, i32 31) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsubi.hu(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vsubi_hu(<8 x i16> %va) nounwind { -+; CHECK-LABEL: lsx_vsubi_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsubi.hu $vr0, $vr0, 31 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsubi.hu(<8 x i16> %va, i32 31) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsubi.wu(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vsubi_wu(<4 x i32> %va) nounwind { -+; CHECK-LABEL: lsx_vsubi_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsubi.wu $vr0, $vr0, 31 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsubi.wu(<4 x i32> %va, i32 31) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsubi.du(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vsubi_du(<2 x i64> %va) nounwind { -+; CHECK-LABEL: lsx_vsubi_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsubi.du $vr0, $vr0, 31 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsubi.du(<2 x i64> %va, i32 31) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-subw.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-subw.ll -new file mode 100644 -index 000000000000..48100db74334 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-subw.ll -@@ -0,0 +1,194 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsubwev.h.b(<16 x i8>, <16 x i8>) -+ -+define <8 x i16> @lsx_vsubwev_h_b(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vsubwev_h_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsubwev.h.b $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsubwev.h.b(<16 x i8> %va, <16 x i8> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsubwev.w.h(<8 x i16>, <8 x i16>) -+ -+define <4 x i32> @lsx_vsubwev_w_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vsubwev_w_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsubwev.w.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsubwev.w.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsubwev.d.w(<4 x i32>, <4 x i32>) -+ -+define <2 x i64> @lsx_vsubwev_d_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vsubwev_d_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsubwev.d.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsubwev.d.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsubwev.q.d(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vsubwev_q_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vsubwev_q_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsubwev.q.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsubwev.q.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsubwev.h.bu(<16 x i8>, <16 x i8>) -+ -+define <8 x i16> @lsx_vsubwev_h_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vsubwev_h_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsubwev.h.bu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsubwev.h.bu(<16 x i8> %va, <16 x i8> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsubwev.w.hu(<8 x i16>, <8 x i16>) -+ -+define <4 x i32> @lsx_vsubwev_w_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vsubwev_w_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsubwev.w.hu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsubwev.w.hu(<8 x i16> %va, <8 x i16> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsubwev.d.wu(<4 x i32>, <4 x i32>) -+ -+define <2 x i64> @lsx_vsubwev_d_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vsubwev_d_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsubwev.d.wu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsubwev.d.wu(<4 x i32> %va, <4 x i32> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsubwev.q.du(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vsubwev_q_du(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vsubwev_q_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsubwev.q.du $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsubwev.q.du(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsubwod.h.b(<16 x i8>, <16 x i8>) -+ -+define <8 x i16> @lsx_vsubwod_h_b(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vsubwod_h_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsubwod.h.b $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsubwod.h.b(<16 x i8> %va, <16 x i8> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsubwod.w.h(<8 x i16>, <8 x i16>) -+ -+define <4 x i32> @lsx_vsubwod_w_h(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vsubwod_w_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsubwod.w.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsubwod.w.h(<8 x i16> %va, <8 x i16> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsubwod.d.w(<4 x i32>, <4 x i32>) -+ -+define <2 x i64> @lsx_vsubwod_d_w(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vsubwod_d_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsubwod.d.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsubwod.d.w(<4 x i32> %va, <4 x i32> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsubwod.q.d(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vsubwod_q_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vsubwod_q_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsubwod.q.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsubwod.q.d(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsubwod.h.bu(<16 x i8>, <16 x i8>) -+ -+define <8 x i16> @lsx_vsubwod_h_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vsubwod_h_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsubwod.h.bu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsubwod.h.bu(<16 x i8> %va, <16 x i8> %vb) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsubwod.w.hu(<8 x i16>, <8 x i16>) -+ -+define <4 x i32> @lsx_vsubwod_w_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK-LABEL: lsx_vsubwod_w_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsubwod.w.hu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsubwod.w.hu(<8 x i16> %va, <8 x i16> %vb) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsubwod.d.wu(<4 x i32>, <4 x i32>) -+ -+define <2 x i64> @lsx_vsubwod_d_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK-LABEL: lsx_vsubwod_d_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsubwod.d.wu $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsubwod.d.wu(<4 x i32> %va, <4 x i32> %vb) -+ ret <2 x i64> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsubwod.q.du(<2 x i64>, <2 x i64>) -+ -+define <2 x i64> @lsx_vsubwod_q_du(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK-LABEL: lsx_vsubwod_q_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vsubwod.q.du $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsubwod.q.du(<2 x i64> %va, <2 x i64> %vb) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-xor.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-xor.ll -new file mode 100644 -index 000000000000..72a1fe93c2c0 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-xor.ll -@@ -0,0 +1,14 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vxor.v(<16 x i8>, <16 x i8>) -+ -+define <16 x i8> @lsx_vxor_v(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK-LABEL: lsx_vxor_v: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vxor.v(<16 x i8> %va, <16 x i8> %vb) -+ ret <16 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-xori.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-xori.ll -new file mode 100644 -index 000000000000..09669cd5ac14 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-xori.ll -@@ -0,0 +1,14 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vxori.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vxori_b(<16 x i8> %va) nounwind { -+; CHECK-LABEL: lsx_vxori_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vxori.b $vr0, $vr0, 3 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vxori.b(<16 x i8> %va, i32 3) -+ ret <16 x i8> %res -+} --- -2.20.1 - - -From fd469d4a3c3b439f40accda691597502bc444a99 Mon Sep 17 00:00:00 2001 -From: chenli -Date: Sat, 19 Aug 2023 17:12:27 +0800 -Subject: [PATCH 05/35] [LoongArch] Add LASX intrinsic testcases - -Depends on D155830 - -Reviewed By: SixWeining - -Differential Revision: https://reviews.llvm.org/D155835 - -(cherry picked from commit 83311b2b5d1b9869f9a7b265994394ea898448a2) ---- - .../CodeGen/LoongArch/lasx/intrinsic-absd.ll | 98 ++++ - .../CodeGen/LoongArch/lasx/intrinsic-add.ll | 62 ++ - .../CodeGen/LoongArch/lasx/intrinsic-adda.ll | 50 ++ - .../CodeGen/LoongArch/lasx/intrinsic-addi.ll | 50 ++ - .../CodeGen/LoongArch/lasx/intrinsic-addw.ll | 290 ++++++++++ - .../CodeGen/LoongArch/lasx/intrinsic-and.ll | 14 + - .../CodeGen/LoongArch/lasx/intrinsic-andi.ll | 14 + - .../CodeGen/LoongArch/lasx/intrinsic-andn.ll | 14 + - .../CodeGen/LoongArch/lasx/intrinsic-avg.ll | 98 ++++ - .../CodeGen/LoongArch/lasx/intrinsic-avgr.ll | 98 ++++ - .../LoongArch/lasx/intrinsic-bitclr.ll | 98 ++++ - .../LoongArch/lasx/intrinsic-bitrev.ll | 98 ++++ - .../LoongArch/lasx/intrinsic-bitsel.ll | 14 + - .../LoongArch/lasx/intrinsic-bitseli.ll | 14 + - .../LoongArch/lasx/intrinsic-bitset.ll | 98 ++++ - .../CodeGen/LoongArch/lasx/intrinsic-bsll.ll | 14 + - .../CodeGen/LoongArch/lasx/intrinsic-bsrl.ll | 14 + - .../CodeGen/LoongArch/lasx/intrinsic-clo.ll | 50 ++ - .../CodeGen/LoongArch/lasx/intrinsic-clz.ll | 50 ++ - .../CodeGen/LoongArch/lasx/intrinsic-div.ll | 98 ++++ - .../LoongArch/lasx/intrinsic-ext2xv.ll | 146 +++++ - .../CodeGen/LoongArch/lasx/intrinsic-exth.ll | 98 ++++ - .../CodeGen/LoongArch/lasx/intrinsic-extl.ll | 26 + - .../LoongArch/lasx/intrinsic-extrins.ll | 50 ++ - .../CodeGen/LoongArch/lasx/intrinsic-fadd.ll | 26 + - .../LoongArch/lasx/intrinsic-fclass.ll | 26 + - .../CodeGen/LoongArch/lasx/intrinsic-fcmp.ll | 530 ++++++++++++++++++ - .../CodeGen/LoongArch/lasx/intrinsic-fcvt.ll | 26 + - .../CodeGen/LoongArch/lasx/intrinsic-fcvth.ll | 26 + - .../CodeGen/LoongArch/lasx/intrinsic-fcvtl.ll | 26 + - .../CodeGen/LoongArch/lasx/intrinsic-fdiv.ll | 26 + - .../CodeGen/LoongArch/lasx/intrinsic-ffint.ll | 86 +++ - .../CodeGen/LoongArch/lasx/intrinsic-flogb.ll | 26 + - .../CodeGen/LoongArch/lasx/intrinsic-fmadd.ll | 26 + - .../CodeGen/LoongArch/lasx/intrinsic-fmax.ll | 26 + - .../CodeGen/LoongArch/lasx/intrinsic-fmaxa.ll | 26 + - .../CodeGen/LoongArch/lasx/intrinsic-fmin.ll | 26 + - .../CodeGen/LoongArch/lasx/intrinsic-fmina.ll | 26 + - .../CodeGen/LoongArch/lasx/intrinsic-fmsub.ll | 26 + - .../CodeGen/LoongArch/lasx/intrinsic-fmul.ll | 26 + - .../LoongArch/lasx/intrinsic-fnmadd.ll | 26 + - .../LoongArch/lasx/intrinsic-fnmsub.ll | 26 + - .../LoongArch/lasx/intrinsic-frecip.ll | 26 + - .../CodeGen/LoongArch/lasx/intrinsic-frint.ll | 122 ++++ - .../LoongArch/lasx/intrinsic-frsqrt.ll | 26 + - .../CodeGen/LoongArch/lasx/intrinsic-frstp.ll | 50 ++ - .../CodeGen/LoongArch/lasx/intrinsic-fsqrt.ll | 26 + - .../CodeGen/LoongArch/lasx/intrinsic-fsub.ll | 26 + - .../CodeGen/LoongArch/lasx/intrinsic-ftint.ll | 350 ++++++++++++ - .../CodeGen/LoongArch/lasx/intrinsic-haddw.ll | 98 ++++ - .../CodeGen/LoongArch/lasx/intrinsic-hsubw.ll | 98 ++++ - .../CodeGen/LoongArch/lasx/intrinsic-ilv.ll | 98 ++++ - .../LoongArch/lasx/intrinsic-insgr2vr.ll | 28 + - .../LoongArch/lasx/intrinsic-insve0.ll | 26 + - .../CodeGen/LoongArch/lasx/intrinsic-ld.ll | 26 + - .../CodeGen/LoongArch/lasx/intrinsic-ldi.ll | 62 ++ - .../LoongArch/lasx/intrinsic-ldrepl.ll | 50 ++ - .../CodeGen/LoongArch/lasx/intrinsic-madd.ll | 50 ++ - .../CodeGen/LoongArch/lasx/intrinsic-maddw.ll | 290 ++++++++++ - .../CodeGen/LoongArch/lasx/intrinsic-max.ll | 194 +++++++ - .../CodeGen/LoongArch/lasx/intrinsic-min.ll | 194 +++++++ - .../CodeGen/LoongArch/lasx/intrinsic-mod.ll | 98 ++++ - .../LoongArch/lasx/intrinsic-mskgez.ll | 14 + - .../LoongArch/lasx/intrinsic-mskltz.ll | 50 ++ - .../CodeGen/LoongArch/lasx/intrinsic-msknz.ll | 14 + - .../CodeGen/LoongArch/lasx/intrinsic-msub.ll | 50 ++ - .../CodeGen/LoongArch/lasx/intrinsic-muh.ll | 98 ++++ - .../CodeGen/LoongArch/lasx/intrinsic-mul.ll | 50 ++ - .../CodeGen/LoongArch/lasx/intrinsic-mulw.ll | 290 ++++++++++ - .../CodeGen/LoongArch/lasx/intrinsic-neg.ll | 50 ++ - .../CodeGen/LoongArch/lasx/intrinsic-nor.ll | 14 + - .../CodeGen/LoongArch/lasx/intrinsic-nori.ll | 14 + - .../CodeGen/LoongArch/lasx/intrinsic-or.ll | 14 + - .../CodeGen/LoongArch/lasx/intrinsic-ori.ll | 14 + - .../CodeGen/LoongArch/lasx/intrinsic-orn.ll | 14 + - .../CodeGen/LoongArch/lasx/intrinsic-pack.ll | 98 ++++ - .../CodeGen/LoongArch/lasx/intrinsic-pcnt.ll | 50 ++ - .../CodeGen/LoongArch/lasx/intrinsic-perm.ll | 14 + - .../CodeGen/LoongArch/lasx/intrinsic-permi.ll | 38 ++ - .../CodeGen/LoongArch/lasx/intrinsic-pick.ll | 98 ++++ - .../LoongArch/lasx/intrinsic-pickve.ll | 50 ++ - .../LoongArch/lasx/intrinsic-pickve2gr.ll | 53 ++ - .../LoongArch/lasx/intrinsic-repl128vei.ll | 50 ++ - .../LoongArch/lasx/intrinsic-replgr2vr.ll | 50 ++ - .../LoongArch/lasx/intrinsic-replve.ll | 50 ++ - .../LoongArch/lasx/intrinsic-replve0.ll | 62 ++ - .../CodeGen/LoongArch/lasx/intrinsic-rotr.ll | 98 ++++ - .../CodeGen/LoongArch/lasx/intrinsic-sadd.ll | 98 ++++ - .../CodeGen/LoongArch/lasx/intrinsic-sat.ll | 98 ++++ - .../CodeGen/LoongArch/lasx/intrinsic-seq.ll | 98 ++++ - .../CodeGen/LoongArch/lasx/intrinsic-set.ll | 38 ++ - .../LoongArch/lasx/intrinsic-setallnez.ll | 74 +++ - .../LoongArch/lasx/intrinsic-setanyeqz.ll | 74 +++ - .../CodeGen/LoongArch/lasx/intrinsic-shuf.ll | 50 ++ - .../LoongArch/lasx/intrinsic-shuf4i.ll | 50 ++ - .../LoongArch/lasx/intrinsic-signcov.ll | 50 ++ - .../CodeGen/LoongArch/lasx/intrinsic-sle.ll | 194 +++++++ - .../CodeGen/LoongArch/lasx/intrinsic-sll.ll | 98 ++++ - .../LoongArch/lasx/intrinsic-sllwil.ll | 74 +++ - .../CodeGen/LoongArch/lasx/intrinsic-slt.ll | 194 +++++++ - .../CodeGen/LoongArch/lasx/intrinsic-sra.ll | 98 ++++ - .../CodeGen/LoongArch/lasx/intrinsic-sran.ll | 38 ++ - .../CodeGen/LoongArch/lasx/intrinsic-srani.ll | 50 ++ - .../CodeGen/LoongArch/lasx/intrinsic-srar.ll | 98 ++++ - .../CodeGen/LoongArch/lasx/intrinsic-srarn.ll | 38 ++ - .../LoongArch/lasx/intrinsic-srarni.ll | 50 ++ - .../CodeGen/LoongArch/lasx/intrinsic-srl.ll | 98 ++++ - .../CodeGen/LoongArch/lasx/intrinsic-srln.ll | 38 ++ - .../CodeGen/LoongArch/lasx/intrinsic-srlni.ll | 50 ++ - .../CodeGen/LoongArch/lasx/intrinsic-srlr.ll | 98 ++++ - .../CodeGen/LoongArch/lasx/intrinsic-srlrn.ll | 38 ++ - .../LoongArch/lasx/intrinsic-srlrni.ll | 50 ++ - .../CodeGen/LoongArch/lasx/intrinsic-ssran.ll | 74 +++ - .../LoongArch/lasx/intrinsic-ssrani.ll | 98 ++++ - .../LoongArch/lasx/intrinsic-ssrarn.ll | 74 +++ - .../LoongArch/lasx/intrinsic-ssrarni.ll | 98 ++++ - .../CodeGen/LoongArch/lasx/intrinsic-ssrln.ll | 74 +++ - .../LoongArch/lasx/intrinsic-ssrlni.ll | 98 ++++ - .../LoongArch/lasx/intrinsic-ssrlrn.ll | 74 +++ - .../LoongArch/lasx/intrinsic-ssrlrni.ll | 98 ++++ - .../CodeGen/LoongArch/lasx/intrinsic-ssub.ll | 98 ++++ - .../CodeGen/LoongArch/lasx/intrinsic-st.ll | 27 + - .../CodeGen/LoongArch/lasx/intrinsic-stelm.ll | 50 ++ - .../CodeGen/LoongArch/lasx/intrinsic-sub.ll | 62 ++ - .../CodeGen/LoongArch/lasx/intrinsic-subi.ll | 50 ++ - .../CodeGen/LoongArch/lasx/intrinsic-subw.ll | 194 +++++++ - .../CodeGen/LoongArch/lasx/intrinsic-xor.ll | 14 + - .../CodeGen/LoongArch/lasx/intrinsic-xori.ll | 14 + - 128 files changed, 9154 insertions(+) - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-absd.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-add.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-adda.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-addi.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-addw.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-and.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-andi.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-andn.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-avg.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-avgr.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitclr.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitrev.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitsel.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitseli.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitset.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsll.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsrl.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-clo.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-clz.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-div.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ext2xv.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-exth.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-extl.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-extrins.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-fadd.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-fclass.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcmp.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcvt.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcvth.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcvtl.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-fdiv.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ffint.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-flogb.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmadd.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmax.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmaxa.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmin.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmina.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmsub.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmul.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-fnmadd.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-fnmsub.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-frecip.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-frint.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-frsqrt.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-frstp.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-fsqrt.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-fsub.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ftint.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-haddw.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-hsubw.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ilv.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-insgr2vr.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-insve0.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ld.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldi.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldrepl.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-madd.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-maddw.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-max.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-min.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-mod.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-mskgez.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-mskltz.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-msknz.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-msub.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-muh.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-mul.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-mulw.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-neg.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-nor.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-nori.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-or.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ori.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-orn.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-pack.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-pcnt.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-perm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-permi.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-pick.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve2gr.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl128vei.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-replgr2vr.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-replve.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-replve0.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-rotr.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-sadd.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-sat.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-seq.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-set.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-setallnez.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-setanyeqz.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf4i.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-signcov.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-sle.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-sll.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-sllwil.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-slt.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-sra.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-sran.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-srani.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-srar.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarn.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarni.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-srl.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-srln.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlni.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlr.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrn.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrni.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssran.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrani.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarn.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarni.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrln.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlni.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrn.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrni.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssub.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-st.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-stelm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-sub.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-subi.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-subw.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-xor.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-xori.ll - -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-absd.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-absd.ll -new file mode 100644 -index 000000000000..bf54f44357b0 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-absd.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvabsd.b(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvabsd_b(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvabsd_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvabsd.b $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvabsd.b(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvabsd.h(<16 x i16>, <16 x i16>) -+ -+define <16 x i16> @lasx_xvabsd_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvabsd_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvabsd.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvabsd.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvabsd.w(<8 x i32>, <8 x i32>) -+ -+define <8 x i32> @lasx_xvabsd_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvabsd_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvabsd.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvabsd.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvabsd.d(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvabsd_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvabsd_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvabsd.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvabsd.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvabsd.bu(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvabsd_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvabsd_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvabsd.bu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvabsd.bu(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvabsd.hu(<16 x i16>, <16 x i16>) -+ -+define <16 x i16> @lasx_xvabsd_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvabsd_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvabsd.hu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvabsd.hu(<16 x i16> %va, <16 x i16> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvabsd.wu(<8 x i32>, <8 x i32>) -+ -+define <8 x i32> @lasx_xvabsd_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvabsd_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvabsd.wu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvabsd.wu(<8 x i32> %va, <8 x i32> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvabsd.du(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvabsd_du(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvabsd_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvabsd.du $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvabsd.du(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-add.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-add.ll -new file mode 100644 -index 000000000000..0c2f2ace29fc ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-add.ll -@@ -0,0 +1,62 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvadd.b(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvadd_b(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvadd_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvadd.b $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvadd.b(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvadd.h(<16 x i16>, <16 x i16>) -+ -+define <16 x i16> @lasx_xvadd_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvadd_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvadd.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvadd.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvadd.w(<8 x i32>, <8 x i32>) -+ -+define <8 x i32> @lasx_xvadd_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvadd_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvadd.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvadd.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvadd.d(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvadd_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvadd_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvadd.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvadd.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvadd.q(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvadd_q(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvadd_q: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvadd.q $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvadd.q(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-adda.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-adda.ll -new file mode 100644 -index 000000000000..c1258d53e913 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-adda.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvadda.b(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvadda_b(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvadda_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvadda.b $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvadda.b(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvadda.h(<16 x i16>, <16 x i16>) -+ -+define <16 x i16> @lasx_xvadda_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvadda_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvadda.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvadda.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvadda.w(<8 x i32>, <8 x i32>) -+ -+define <8 x i32> @lasx_xvadda_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvadda_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvadda.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvadda.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvadda.d(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvadda_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvadda_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvadda.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvadda.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-addi.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-addi.ll -new file mode 100644 -index 000000000000..09b5d07a0151 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-addi.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvaddi.bu(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvaddi_bu(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_xvaddi_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvaddi.bu $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvaddi.bu(<32 x i8> %va, i32 1) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvaddi.hu(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvaddi_hu(<16 x i16> %va) nounwind { -+; CHECK-LABEL: lasx_xvaddi_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvaddi.hu $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvaddi.hu(<16 x i16> %va, i32 1) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvaddi.wu(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvaddi_wu(<8 x i32> %va) nounwind { -+; CHECK-LABEL: lasx_xvaddi_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvaddi.wu $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvaddi.wu(<8 x i32> %va, i32 1) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvaddi.du(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvaddi_du(<4 x i64> %va) nounwind { -+; CHECK-LABEL: lasx_xvaddi_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvaddi.du $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvaddi.du(<4 x i64> %va, i32 1) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-addw.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-addw.ll -new file mode 100644 -index 000000000000..ef7a1b5a50ef ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-addw.ll -@@ -0,0 +1,290 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.b(<32 x i8>, <32 x i8>) -+ -+define <16 x i16> @lasx_xvaddwev_h_b(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvaddwev_h_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvaddwev.h.b $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.b(<32 x i8> %va, <32 x i8> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.h(<16 x i16>, <16 x i16>) -+ -+define <8 x i32> @lasx_xvaddwev_w_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvaddwev_w_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvaddwev.w.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.w(<8 x i32>, <8 x i32>) -+ -+define <4 x i64> @lasx_xvaddwev_d_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvaddwev_d_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvaddwev.d.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.d(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvaddwev_q_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvaddwev_q_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvaddwev.q.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu(<32 x i8>, <32 x i8>) -+ -+define <16 x i16> @lasx_xvaddwev_h_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvaddwev_h_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvaddwev.h.bu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu(<32 x i8> %va, <32 x i8> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu(<16 x i16>, <16 x i16>) -+ -+define <8 x i32> @lasx_xvaddwev_w_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvaddwev_w_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvaddwev.w.hu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu(<16 x i16> %va, <16 x i16> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu(<8 x i32>, <8 x i32>) -+ -+define <4 x i64> @lasx_xvaddwev_d_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvaddwev_d_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvaddwev.d.wu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu(<8 x i32> %va, <8 x i32> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvaddwev_q_du(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvaddwev_q_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvaddwev.q.du $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu.b(<32 x i8>, <32 x i8>) -+ -+define <16 x i16> @lasx_xvaddwev_h_bu_b(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvaddwev_h_bu_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvaddwev.h.bu.b $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu.b(<32 x i8> %va, <32 x i8> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu.h(<16 x i16>, <16 x i16>) -+ -+define <8 x i32> @lasx_xvaddwev_w_hu_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvaddwev_w_hu_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvaddwev.w.hu.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu.w(<8 x i32>, <8 x i32>) -+ -+define <4 x i64> @lasx_xvaddwev_d_wu_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvaddwev_d_wu_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvaddwev.d.wu.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du.d(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvaddwev_q_du_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvaddwev_q_du_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvaddwev.q.du.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.b(<32 x i8>, <32 x i8>) -+ -+define <16 x i16> @lasx_xvaddwod_h_b(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvaddwod_h_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvaddwod.h.b $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.b(<32 x i8> %va, <32 x i8> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.h(<16 x i16>, <16 x i16>) -+ -+define <8 x i32> @lasx_xvaddwod_w_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvaddwod_w_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvaddwod.w.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.w(<8 x i32>, <8 x i32>) -+ -+define <4 x i64> @lasx_xvaddwod_d_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvaddwod_d_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvaddwod.d.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.d(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvaddwod_q_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvaddwod_q_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvaddwod.q.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu(<32 x i8>, <32 x i8>) -+ -+define <16 x i16> @lasx_xvaddwod_h_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvaddwod_h_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvaddwod.h.bu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu(<32 x i8> %va, <32 x i8> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu(<16 x i16>, <16 x i16>) -+ -+define <8 x i32> @lasx_xvaddwod_w_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvaddwod_w_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvaddwod.w.hu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu(<16 x i16> %va, <16 x i16> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu(<8 x i32>, <8 x i32>) -+ -+define <4 x i64> @lasx_xvaddwod_d_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvaddwod_d_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvaddwod.d.wu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu(<8 x i32> %va, <8 x i32> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvaddwod_q_du(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvaddwod_q_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvaddwod.q.du $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu.b(<32 x i8>, <32 x i8>) -+ -+define <16 x i16> @lasx_xvaddwod_h_bu_b(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvaddwod_h_bu_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvaddwod.h.bu.b $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu.b(<32 x i8> %va, <32 x i8> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu.h(<16 x i16>, <16 x i16>) -+ -+define <8 x i32> @lasx_xvaddwod_w_hu_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvaddwod_w_hu_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvaddwod.w.hu.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu.w(<8 x i32>, <8 x i32>) -+ -+define <4 x i64> @lasx_xvaddwod_d_wu_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvaddwod_d_wu_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvaddwod.d.wu.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du.d(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvaddwod_q_du_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvaddwod_q_du_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvaddwod.q.du.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-and.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-and.ll -new file mode 100644 -index 000000000000..15f3a8094770 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-and.ll -@@ -0,0 +1,14 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvand.v(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvand_v(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvand_v: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvand.v(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-andi.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-andi.ll -new file mode 100644 -index 000000000000..88cf142d6968 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-andi.ll -@@ -0,0 +1,14 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvandi.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvandi_b(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_xvandi_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvandi.b $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvandi.b(<32 x i8> %va, i32 1) -+ ret <32 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-andn.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-andn.ll -new file mode 100644 -index 000000000000..f385ef3661cb ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-andn.ll -@@ -0,0 +1,14 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvandn.v(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvandn_v(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvandn_v: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvandn.v $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvandn.v(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-avg.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-avg.ll -new file mode 100644 -index 000000000000..488d3b96b003 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-avg.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvavg.b(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvavg_b(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvavg_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvavg.b $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvavg.b(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvavg.h(<16 x i16>, <16 x i16>) -+ -+define <16 x i16> @lasx_xvavg_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvavg_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvavg.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvavg.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvavg.w(<8 x i32>, <8 x i32>) -+ -+define <8 x i32> @lasx_xvavg_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvavg_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvavg.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvavg.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvavg.d(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvavg_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvavg_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvavg.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvavg.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvavg.bu(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvavg_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvavg_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvavg.bu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvavg.bu(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvavg.hu(<16 x i16>, <16 x i16>) -+ -+define <16 x i16> @lasx_xvavg_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvavg_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvavg.hu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvavg.hu(<16 x i16> %va, <16 x i16> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvavg.wu(<8 x i32>, <8 x i32>) -+ -+define <8 x i32> @lasx_xvavg_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvavg_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvavg.wu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvavg.wu(<8 x i32> %va, <8 x i32> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvavg.du(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvavg_du(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvavg_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvavg.du $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvavg.du(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-avgr.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-avgr.ll -new file mode 100644 -index 000000000000..b5ab5a5366aa ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-avgr.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvavgr.b(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvavgr_b(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvavgr_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvavgr.b $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvavgr.b(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvavgr.h(<16 x i16>, <16 x i16>) -+ -+define <16 x i16> @lasx_xvavgr_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvavgr_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvavgr.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvavgr.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvavgr.w(<8 x i32>, <8 x i32>) -+ -+define <8 x i32> @lasx_xvavgr_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvavgr_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvavgr.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvavgr.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvavgr.d(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvavgr_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvavgr_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvavgr.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvavgr.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvavgr.bu(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvavgr_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvavgr_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvavgr.bu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvavgr.bu(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvavgr.hu(<16 x i16>, <16 x i16>) -+ -+define <16 x i16> @lasx_xvavgr_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvavgr_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvavgr.hu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvavgr.hu(<16 x i16> %va, <16 x i16> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvavgr.wu(<8 x i32>, <8 x i32>) -+ -+define <8 x i32> @lasx_xvavgr_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvavgr_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvavgr.wu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvavgr.wu(<8 x i32> %va, <8 x i32> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvavgr.du(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvavgr_du(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvavgr_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvavgr.du $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvavgr.du(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitclr.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitclr.ll -new file mode 100644 -index 000000000000..cec71bab2fe8 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitclr.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvbitclr.b(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvbitclr_b(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvbitclr_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvbitclr.b $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvbitclr.b(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvbitclr.h(<16 x i16>, <16 x i16>) -+ -+define <16 x i16> @lasx_xvbitclr_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvbitclr_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvbitclr.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvbitclr.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvbitclr.w(<8 x i32>, <8 x i32>) -+ -+define <8 x i32> @lasx_xvbitclr_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvbitclr_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvbitclr.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvbitclr.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvbitclr.d(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvbitclr_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvbitclr_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvbitclr.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvbitclr.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvbitclri.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvbitclri_b(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_xvbitclri_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvbitclri.b $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvbitclri.b(<32 x i8> %va, i32 1) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvbitclri.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvbitclri_h(<16 x i16> %va) nounwind { -+; CHECK-LABEL: lasx_xvbitclri_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvbitclri.h $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvbitclri.h(<16 x i16> %va, i32 1) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvbitclri.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvbitclri_w(<8 x i32> %va) nounwind { -+; CHECK-LABEL: lasx_xvbitclri_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvbitclri.w $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvbitclri.w(<8 x i32> %va, i32 1) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvbitclri.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvbitclri_d(<4 x i64> %va) nounwind { -+; CHECK-LABEL: lasx_xvbitclri_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvbitclri.d $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvbitclri.d(<4 x i64> %va, i32 1) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitrev.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitrev.ll -new file mode 100644 -index 000000000000..fb4f9fbc2e4b ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitrev.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvbitrev.b(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvbitrev_b(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvbitrev_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvbitrev.b $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvbitrev.b(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvbitrev.h(<16 x i16>, <16 x i16>) -+ -+define <16 x i16> @lasx_xvbitrev_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvbitrev_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvbitrev.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvbitrev.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvbitrev.w(<8 x i32>, <8 x i32>) -+ -+define <8 x i32> @lasx_xvbitrev_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvbitrev_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvbitrev.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvbitrev.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvbitrev.d(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvbitrev_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvbitrev_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvbitrev.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvbitrev.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvbitrevi.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvbitrevi_b(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_xvbitrevi_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvbitrevi.b $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvbitrevi.b(<32 x i8> %va, i32 1) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvbitrevi.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvbitrevi_h(<16 x i16> %va) nounwind { -+; CHECK-LABEL: lasx_xvbitrevi_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvbitrevi.h $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvbitrevi.h(<16 x i16> %va, i32 1) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvbitrevi.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvbitrevi_w(<8 x i32> %va) nounwind { -+; CHECK-LABEL: lasx_xvbitrevi_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvbitrevi.w $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvbitrevi.w(<8 x i32> %va, i32 1) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvbitrevi.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvbitrevi_d(<4 x i64> %va) nounwind { -+; CHECK-LABEL: lasx_xvbitrevi_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvbitrevi.d $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvbitrevi.d(<4 x i64> %va, i32 1) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitsel.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitsel.ll -new file mode 100644 -index 000000000000..2e91407590ac ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitsel.ll -@@ -0,0 +1,14 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvbitsel.v(<32 x i8>, <32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvbitsel_v(<32 x i8> %va, <32 x i8> %vb, <32 x i8> %vc) nounwind { -+; CHECK-LABEL: lasx_xvbitsel_v: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvbitsel.v $xr0, $xr0, $xr1, $xr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvbitsel.v(<32 x i8> %va, <32 x i8> %vb, <32 x i8> %vc) -+ ret <32 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitseli.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitseli.ll -new file mode 100644 -index 000000000000..79dd55cbfef9 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitseli.ll -@@ -0,0 +1,14 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvbitseli.b(<32 x i8>, <32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvbitseli_b(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvbitseli_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvbitseli.b $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvbitseli.b(<32 x i8> %va, <32 x i8> %vb, i32 1) -+ ret <32 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitset.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitset.ll -new file mode 100644 -index 000000000000..83d1f0ef60c6 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitset.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvbitset.b(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvbitset_b(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvbitset_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvbitset.b $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvbitset.b(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvbitset.h(<16 x i16>, <16 x i16>) -+ -+define <16 x i16> @lasx_xvbitset_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvbitset_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvbitset.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvbitset.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvbitset.w(<8 x i32>, <8 x i32>) -+ -+define <8 x i32> @lasx_xvbitset_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvbitset_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvbitset.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvbitset.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvbitset.d(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvbitset_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvbitset_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvbitset.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvbitset.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvbitseti.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvbitseti_b(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_xvbitseti_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvbitseti.b $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvbitseti.b(<32 x i8> %va, i32 1) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvbitseti.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvbitseti_h(<16 x i16> %va) nounwind { -+; CHECK-LABEL: lasx_xvbitseti_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvbitseti.h $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvbitseti.h(<16 x i16> %va, i32 1) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvbitseti.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvbitseti_w(<8 x i32> %va) nounwind { -+; CHECK-LABEL: lasx_xvbitseti_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvbitseti.w $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvbitseti.w(<8 x i32> %va, i32 1) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvbitseti.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvbitseti_d(<4 x i64> %va) nounwind { -+; CHECK-LABEL: lasx_xvbitseti_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvbitseti.d $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvbitseti.d(<4 x i64> %va, i32 1) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsll.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsll.ll -new file mode 100644 -index 000000000000..cbb63ced5cc0 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsll.ll -@@ -0,0 +1,14 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvbsll.v(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvbsll_v(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_xvbsll_v: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvbsll.v $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvbsll.v(<32 x i8> %va, i32 1) -+ ret <32 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsrl.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsrl.ll -new file mode 100644 -index 000000000000..b0c26cbe3e35 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsrl.ll -@@ -0,0 +1,14 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvbsrl.v(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvbsrl_v(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_xvbsrl_v: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvbsrl.v $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvbsrl.v(<32 x i8> %va, i32 1) -+ ret <32 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-clo.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-clo.ll -new file mode 100644 -index 000000000000..29b2be03d54e ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-clo.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvclo.b(<32 x i8>) -+ -+define <32 x i8> @lasx_xvclo_b(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_xvclo_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvclo.b $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvclo.b(<32 x i8> %va) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvclo.h(<16 x i16>) -+ -+define <16 x i16> @lasx_xvclo_h(<16 x i16> %va) nounwind { -+; CHECK-LABEL: lasx_xvclo_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvclo.h $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvclo.h(<16 x i16> %va) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvclo.w(<8 x i32>) -+ -+define <8 x i32> @lasx_xvclo_w(<8 x i32> %va) nounwind { -+; CHECK-LABEL: lasx_xvclo_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvclo.w $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvclo.w(<8 x i32> %va) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvclo.d(<4 x i64>) -+ -+define <4 x i64> @lasx_xvclo_d(<4 x i64> %va) nounwind { -+; CHECK-LABEL: lasx_xvclo_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvclo.d $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvclo.d(<4 x i64> %va) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-clz.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-clz.ll -new file mode 100644 -index 000000000000..5247ceedbd14 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-clz.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvclz.b(<32 x i8>) -+ -+define <32 x i8> @lasx_xvclz_b(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_xvclz_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvclz.b $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvclz.b(<32 x i8> %va) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvclz.h(<16 x i16>) -+ -+define <16 x i16> @lasx_xvclz_h(<16 x i16> %va) nounwind { -+; CHECK-LABEL: lasx_xvclz_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvclz.h $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvclz.h(<16 x i16> %va) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvclz.w(<8 x i32>) -+ -+define <8 x i32> @lasx_xvclz_w(<8 x i32> %va) nounwind { -+; CHECK-LABEL: lasx_xvclz_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvclz.w $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvclz.w(<8 x i32> %va) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvclz.d(<4 x i64>) -+ -+define <4 x i64> @lasx_xvclz_d(<4 x i64> %va) nounwind { -+; CHECK-LABEL: lasx_xvclz_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvclz.d $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvclz.d(<4 x i64> %va) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-div.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-div.ll -new file mode 100644 -index 000000000000..813204092e94 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-div.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvdiv.b(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvdiv_b(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvdiv_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvdiv.b $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvdiv.b(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvdiv.h(<16 x i16>, <16 x i16>) -+ -+define <16 x i16> @lasx_xvdiv_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvdiv_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvdiv.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvdiv.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvdiv.w(<8 x i32>, <8 x i32>) -+ -+define <8 x i32> @lasx_xvdiv_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvdiv_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvdiv.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvdiv.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvdiv.d(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvdiv_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvdiv_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvdiv.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvdiv.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvdiv.bu(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvdiv_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvdiv_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvdiv.bu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvdiv.bu(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvdiv.hu(<16 x i16>, <16 x i16>) -+ -+define <16 x i16> @lasx_xvdiv_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvdiv_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvdiv.hu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvdiv.hu(<16 x i16> %va, <16 x i16> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvdiv.wu(<8 x i32>, <8 x i32>) -+ -+define <8 x i32> @lasx_xvdiv_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvdiv_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvdiv.wu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvdiv.wu(<8 x i32> %va, <8 x i32> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvdiv.du(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvdiv_du(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvdiv_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvdiv.du $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvdiv.du(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ext2xv.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ext2xv.ll -new file mode 100644 -index 000000000000..48721b52af00 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ext2xv.ll -@@ -0,0 +1,146 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <16 x i16> @llvm.loongarch.lasx.vext2xv.h.b(<32 x i8>) -+ -+define <16 x i16> @lasx_vext2xv_h_b(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_vext2xv_h_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vext2xv.h.b $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.vext2xv.h.b(<32 x i8> %va) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.vext2xv.w.b(<32 x i8>) -+ -+define <8 x i32> @lasx_vext2xv_w_b(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_vext2xv_w_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vext2xv.w.b $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.b(<32 x i8> %va) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.vext2xv.d.b(<32 x i8>) -+ -+define <4 x i64> @lasx_vext2xv_d_b(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_vext2xv_d_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vext2xv.d.b $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.b(<32 x i8> %va) -+ ret <4 x i64> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.vext2xv.w.h(<16 x i16>) -+ -+define <8 x i32> @lasx_vext2xv_w_h(<16 x i16> %va) nounwind { -+; CHECK-LABEL: lasx_vext2xv_w_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vext2xv.w.h $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.h(<16 x i16> %va) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.vext2xv.d.h(<16 x i16>) -+ -+define <4 x i64> @lasx_vext2xv_d_h(<16 x i16> %va) nounwind { -+; CHECK-LABEL: lasx_vext2xv_d_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vext2xv.d.h $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.h(<16 x i16> %va) -+ ret <4 x i64> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.vext2xv.d.w(<8 x i32>) -+ -+define <4 x i64> @lasx_vext2xv_d_w(<8 x i32> %va) nounwind { -+; CHECK-LABEL: lasx_vext2xv_d_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vext2xv.d.w $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.w(<8 x i32> %va) -+ ret <4 x i64> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.vext2xv.hu.bu(<32 x i8>) -+ -+define <16 x i16> @lasx_vext2xv_hu_bu(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_vext2xv_hu_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vext2xv.hu.bu $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.vext2xv.hu.bu(<32 x i8> %va) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.bu(<32 x i8>) -+ -+define <8 x i32> @lasx_vext2xv_wu_bu(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_vext2xv_wu_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vext2xv.wu.bu $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.bu(<32 x i8> %va) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.vext2xv.du.bu(<32 x i8>) -+ -+define <4 x i64> @lasx_vext2xv_du_bu(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_vext2xv_du_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vext2xv.du.bu $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.bu(<32 x i8> %va) -+ ret <4 x i64> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.hu(<16 x i16>) -+ -+define <8 x i32> @lasx_vext2xv_wu_hu(<16 x i16> %va) nounwind { -+; CHECK-LABEL: lasx_vext2xv_wu_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vext2xv.wu.hu $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.hu(<16 x i16> %va) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.vext2xv.du.hu(<16 x i16>) -+ -+define <4 x i64> @lasx_vext2xv_du_hu(<16 x i16> %va) nounwind { -+; CHECK-LABEL: lasx_vext2xv_du_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vext2xv.du.hu $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.hu(<16 x i16> %va) -+ ret <4 x i64> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.vext2xv.du.wu(<8 x i32>) -+ -+define <4 x i64> @lasx_vext2xv_du_wu(<8 x i32> %va) nounwind { -+; CHECK-LABEL: lasx_vext2xv_du_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vext2xv.du.wu $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.wu(<8 x i32> %va) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-exth.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-exth.ll -new file mode 100644 -index 000000000000..543589e61b12 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-exth.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvexth.h.b(<32 x i8>) -+ -+define <16 x i16> @lasx_xvexth_h_b(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_xvexth_h_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvexth.h.b $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvexth.h.b(<32 x i8> %va) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvexth.w.h(<16 x i16>) -+ -+define <8 x i32> @lasx_xvexth_w_h(<16 x i16> %va) nounwind { -+; CHECK-LABEL: lasx_xvexth_w_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvexth.w.h $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvexth.w.h(<16 x i16> %va) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvexth.d.w(<8 x i32>) -+ -+define <4 x i64> @lasx_xvexth_d_w(<8 x i32> %va) nounwind { -+; CHECK-LABEL: lasx_xvexth_d_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvexth.d.w $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvexth.d.w(<8 x i32> %va) -+ ret <4 x i64> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvexth.q.d(<4 x i64>) -+ -+define <4 x i64> @lasx_xvexth_q_d(<4 x i64> %va) nounwind { -+; CHECK-LABEL: lasx_xvexth_q_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvexth.q.d $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvexth.q.d(<4 x i64> %va) -+ ret <4 x i64> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvexth.hu.bu(<32 x i8>) -+ -+define <16 x i16> @lasx_xvexth_hu_bu(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_xvexth_hu_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvexth.hu.bu $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvexth.hu.bu(<32 x i8> %va) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvexth.wu.hu(<16 x i16>) -+ -+define <8 x i32> @lasx_xvexth_wu_hu(<16 x i16> %va) nounwind { -+; CHECK-LABEL: lasx_xvexth_wu_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvexth.wu.hu $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvexth.wu.hu(<16 x i16> %va) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvexth.du.wu(<8 x i32>) -+ -+define <4 x i64> @lasx_xvexth_du_wu(<8 x i32> %va) nounwind { -+; CHECK-LABEL: lasx_xvexth_du_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvexth.du.wu $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvexth.du.wu(<8 x i32> %va) -+ ret <4 x i64> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvexth.qu.du(<4 x i64>) -+ -+define <4 x i64> @lasx_xvexth_qu_du(<4 x i64> %va) nounwind { -+; CHECK-LABEL: lasx_xvexth_qu_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvexth.qu.du $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvexth.qu.du(<4 x i64> %va) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-extl.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-extl.ll -new file mode 100644 -index 000000000000..7040c8c784cd ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-extl.ll -@@ -0,0 +1,26 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvextl.q.d(<4 x i64>) -+ -+define <4 x i64> @lasx_xvextl_q_d(<4 x i64> %va) nounwind { -+; CHECK-LABEL: lasx_xvextl_q_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvextl.q.d $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvextl.q.d(<4 x i64> %va) -+ ret <4 x i64> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvextl.qu.du(<4 x i64>) -+ -+define <4 x i64> @lasx_xvextl_qu_du(<4 x i64> %va) nounwind { -+; CHECK-LABEL: lasx_xvextl_qu_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvextl.qu.du $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvextl.qu.du(<4 x i64> %va) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-extrins.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-extrins.ll -new file mode 100644 -index 000000000000..c8774a7b29c0 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-extrins.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvextrins.b(<32 x i8>, <32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvextrins_b(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvextrins_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvextrins.b $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvextrins.b(<32 x i8> %va, <32 x i8> %vb, i32 1) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvextrins.h(<16 x i16>, <16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvextrins_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvextrins_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvextrins.h $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvextrins.h(<16 x i16> %va, <16 x i16> %vb, i32 1) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvextrins.w(<8 x i32>, <8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvextrins_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvextrins_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvextrins.w $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvextrins.w(<8 x i32> %va, <8 x i32> %vb, i32 1) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvextrins.d(<4 x i64>, <4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvextrins_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvextrins_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvextrins.d $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvextrins.d(<4 x i64> %va, <4 x i64> %vb, i32 1) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fadd.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fadd.ll -new file mode 100644 -index 000000000000..563a0ce9e384 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fadd.ll -@@ -0,0 +1,26 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <8 x float> @llvm.loongarch.lasx.xvfadd.s(<8 x float>, <8 x float>) -+ -+define <8 x float> @lasx_xvfadd_s(<8 x float> %va, <8 x float> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfadd_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfadd.s $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x float> @llvm.loongarch.lasx.xvfadd.s(<8 x float> %va, <8 x float> %vb) -+ ret <8 x float> %res -+} -+ -+declare <4 x double> @llvm.loongarch.lasx.xvfadd.d(<4 x double>, <4 x double>) -+ -+define <4 x double> @lasx_xvfadd_d(<4 x double> %va, <4 x double> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfadd_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfadd.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x double> @llvm.loongarch.lasx.xvfadd.d(<4 x double> %va, <4 x double> %vb) -+ ret <4 x double> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fclass.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fclass.ll -new file mode 100644 -index 000000000000..901ca5bb0260 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fclass.ll -@@ -0,0 +1,26 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvfclass.s(<8 x float>) -+ -+define <8 x i32> @lasx_xvfclass_s(<8 x float> %va) nounwind { -+; CHECK-LABEL: lasx_xvfclass_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfclass.s $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvfclass.s(<8 x float> %va) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvfclass.d(<4 x double>) -+ -+define <4 x i64> @lasx_xvfclass_d(<4 x double> %va) nounwind { -+; CHECK-LABEL: lasx_xvfclass_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfclass.d $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvfclass.d(<4 x double> %va) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcmp.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcmp.ll -new file mode 100644 -index 000000000000..b01f908e71af ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcmp.ll -@@ -0,0 +1,530 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.caf.s(<8 x float>, <8 x float>) -+ -+define <8 x i32> @lasx_xvfcmp_caf_s(<8 x float> %va, <8 x float> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfcmp_caf_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfcmp.caf.s $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.caf.s(<8 x float> %va, <8 x float> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.caf.d(<4 x double>, <4 x double>) -+ -+define <4 x i64> @lasx_xvfcmp_caf_d(<4 x double> %va, <4 x double> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfcmp_caf_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfcmp.caf.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.caf.d(<4 x double> %va, <4 x double> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.cun.s(<8 x float>, <8 x float>) -+ -+define <8 x i32> @lasx_xvfcmp_cun_s(<8 x float> %va, <8 x float> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfcmp_cun_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfcmp.cun.s $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cun.s(<8 x float> %va, <8 x float> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.cun.d(<4 x double>, <4 x double>) -+ -+define <4 x i64> @lasx_xvfcmp_cun_d(<4 x double> %va, <4 x double> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfcmp_cun_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfcmp.cun.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cun.d(<4 x double> %va, <4 x double> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.ceq.s(<8 x float>, <8 x float>) -+ -+define <8 x i32> @lasx_xvfcmp_ceq_s(<8 x float> %va, <8 x float> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfcmp_ceq_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfcmp.ceq.s $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.ceq.s(<8 x float> %va, <8 x float> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.ceq.d(<4 x double>, <4 x double>) -+ -+define <4 x i64> @lasx_xvfcmp_ceq_d(<4 x double> %va, <4 x double> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfcmp_ceq_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfcmp.ceq.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.ceq.d(<4 x double> %va, <4 x double> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.cueq.s(<8 x float>, <8 x float>) -+ -+define <8 x i32> @lasx_xvfcmp_cueq_s(<8 x float> %va, <8 x float> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfcmp_cueq_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfcmp.cueq.s $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cueq.s(<8 x float> %va, <8 x float> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.cueq.d(<4 x double>, <4 x double>) -+ -+define <4 x i64> @lasx_xvfcmp_cueq_d(<4 x double> %va, <4 x double> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfcmp_cueq_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfcmp.cueq.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cueq.d(<4 x double> %va, <4 x double> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.clt.s(<8 x float>, <8 x float>) -+ -+define <8 x i32> @lasx_xvfcmp_clt_s(<8 x float> %va, <8 x float> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfcmp_clt_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfcmp.clt.s $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.clt.s(<8 x float> %va, <8 x float> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.clt.d(<4 x double>, <4 x double>) -+ -+define <4 x i64> @lasx_xvfcmp_clt_d(<4 x double> %va, <4 x double> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfcmp_clt_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfcmp.clt.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.clt.d(<4 x double> %va, <4 x double> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.cult.s(<8 x float>, <8 x float>) -+ -+define <8 x i32> @lasx_xvfcmp_cult_s(<8 x float> %va, <8 x float> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfcmp_cult_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfcmp.cult.s $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cult.s(<8 x float> %va, <8 x float> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.cult.d(<4 x double>, <4 x double>) -+ -+define <4 x i64> @lasx_xvfcmp_cult_d(<4 x double> %va, <4 x double> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfcmp_cult_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfcmp.cult.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cult.d(<4 x double> %va, <4 x double> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.cle.s(<8 x float>, <8 x float>) -+ -+define <8 x i32> @lasx_xvfcmp_cle_s(<8 x float> %va, <8 x float> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfcmp_cle_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfcmp.cle.s $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cle.s(<8 x float> %va, <8 x float> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.cle.d(<4 x double>, <4 x double>) -+ -+define <4 x i64> @lasx_xvfcmp_cle_d(<4 x double> %va, <4 x double> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfcmp_cle_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfcmp.cle.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cle.d(<4 x double> %va, <4 x double> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.cule.s(<8 x float>, <8 x float>) -+ -+define <8 x i32> @lasx_xvfcmp_cule_s(<8 x float> %va, <8 x float> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfcmp_cule_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfcmp.cule.s $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cule.s(<8 x float> %va, <8 x float> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.cule.d(<4 x double>, <4 x double>) -+ -+define <4 x i64> @lasx_xvfcmp_cule_d(<4 x double> %va, <4 x double> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfcmp_cule_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfcmp.cule.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cule.d(<4 x double> %va, <4 x double> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.cne.s(<8 x float>, <8 x float>) -+ -+define <8 x i32> @lasx_xvfcmp_cne_s(<8 x float> %va, <8 x float> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfcmp_cne_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfcmp.cne.s $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cne.s(<8 x float> %va, <8 x float> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.cne.d(<4 x double>, <4 x double>) -+ -+define <4 x i64> @lasx_xvfcmp_cne_d(<4 x double> %va, <4 x double> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfcmp_cne_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfcmp.cne.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cne.d(<4 x double> %va, <4 x double> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.cor.s(<8 x float>, <8 x float>) -+ -+define <8 x i32> @lasx_xvfcmp_cor_s(<8 x float> %va, <8 x float> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfcmp_cor_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfcmp.cor.s $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cor.s(<8 x float> %va, <8 x float> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.cor.d(<4 x double>, <4 x double>) -+ -+define <4 x i64> @lasx_xvfcmp_cor_d(<4 x double> %va, <4 x double> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfcmp_cor_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfcmp.cor.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cor.d(<4 x double> %va, <4 x double> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.cune.s(<8 x float>, <8 x float>) -+ -+define <8 x i32> @lasx_xvfcmp_cune_s(<8 x float> %va, <8 x float> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfcmp_cune_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfcmp.cune.s $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cune.s(<8 x float> %va, <8 x float> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.cune.d(<4 x double>, <4 x double>) -+ -+define <4 x i64> @lasx_xvfcmp_cune_d(<4 x double> %va, <4 x double> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfcmp_cune_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfcmp.cune.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cune.d(<4 x double> %va, <4 x double> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.saf.s(<8 x float>, <8 x float>) -+ -+define <8 x i32> @lasx_xvfcmp_saf_s(<8 x float> %va, <8 x float> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfcmp_saf_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfcmp.saf.s $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.saf.s(<8 x float> %va, <8 x float> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.saf.d(<4 x double>, <4 x double>) -+ -+define <4 x i64> @lasx_xvfcmp_saf_d(<4 x double> %va, <4 x double> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfcmp_saf_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfcmp.saf.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.saf.d(<4 x double> %va, <4 x double> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.sun.s(<8 x float>, <8 x float>) -+ -+define <8 x i32> @lasx_xvfcmp_sun_s(<8 x float> %va, <8 x float> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfcmp_sun_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfcmp.sun.s $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sun.s(<8 x float> %va, <8 x float> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.sun.d(<4 x double>, <4 x double>) -+ -+define <4 x i64> @lasx_xvfcmp_sun_d(<4 x double> %va, <4 x double> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfcmp_sun_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfcmp.sun.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sun.d(<4 x double> %va, <4 x double> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.seq.s(<8 x float>, <8 x float>) -+ -+define <8 x i32> @lasx_xvfcmp_seq_s(<8 x float> %va, <8 x float> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfcmp_seq_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfcmp.seq.s $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.seq.s(<8 x float> %va, <8 x float> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.seq.d(<4 x double>, <4 x double>) -+ -+define <4 x i64> @lasx_xvfcmp_seq_d(<4 x double> %va, <4 x double> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfcmp_seq_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfcmp.seq.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.seq.d(<4 x double> %va, <4 x double> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.sueq.s(<8 x float>, <8 x float>) -+ -+define <8 x i32> @lasx_xvfcmp_sueq_s(<8 x float> %va, <8 x float> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfcmp_sueq_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfcmp.sueq.s $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sueq.s(<8 x float> %va, <8 x float> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.sueq.d(<4 x double>, <4 x double>) -+ -+define <4 x i64> @lasx_xvfcmp_sueq_d(<4 x double> %va, <4 x double> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfcmp_sueq_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfcmp.sueq.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sueq.d(<4 x double> %va, <4 x double> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.slt.s(<8 x float>, <8 x float>) -+ -+define <8 x i32> @lasx_xvfcmp_slt_s(<8 x float> %va, <8 x float> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfcmp_slt_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfcmp.slt.s $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.slt.s(<8 x float> %va, <8 x float> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.slt.d(<4 x double>, <4 x double>) -+ -+define <4 x i64> @lasx_xvfcmp_slt_d(<4 x double> %va, <4 x double> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfcmp_slt_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfcmp.slt.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.slt.d(<4 x double> %va, <4 x double> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.sult.s(<8 x float>, <8 x float>) -+ -+define <8 x i32> @lasx_xvfcmp_sult_s(<8 x float> %va, <8 x float> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfcmp_sult_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfcmp.sult.s $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sult.s(<8 x float> %va, <8 x float> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.sult.d(<4 x double>, <4 x double>) -+ -+define <4 x i64> @lasx_xvfcmp_sult_d(<4 x double> %va, <4 x double> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfcmp_sult_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfcmp.sult.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sult.d(<4 x double> %va, <4 x double> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.sle.s(<8 x float>, <8 x float>) -+ -+define <8 x i32> @lasx_xvfcmp_sle_s(<8 x float> %va, <8 x float> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfcmp_sle_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfcmp.sle.s $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sle.s(<8 x float> %va, <8 x float> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.sle.d(<4 x double>, <4 x double>) -+ -+define <4 x i64> @lasx_xvfcmp_sle_d(<4 x double> %va, <4 x double> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfcmp_sle_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfcmp.sle.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sle.d(<4 x double> %va, <4 x double> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.sule.s(<8 x float>, <8 x float>) -+ -+define <8 x i32> @lasx_xvfcmp_sule_s(<8 x float> %va, <8 x float> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfcmp_sule_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfcmp.sule.s $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sule.s(<8 x float> %va, <8 x float> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.sule.d(<4 x double>, <4 x double>) -+ -+define <4 x i64> @lasx_xvfcmp_sule_d(<4 x double> %va, <4 x double> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfcmp_sule_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfcmp.sule.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sule.d(<4 x double> %va, <4 x double> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.sne.s(<8 x float>, <8 x float>) -+ -+define <8 x i32> @lasx_xvfcmp_sne_s(<8 x float> %va, <8 x float> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfcmp_sne_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfcmp.sne.s $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sne.s(<8 x float> %va, <8 x float> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.sne.d(<4 x double>, <4 x double>) -+ -+define <4 x i64> @lasx_xvfcmp_sne_d(<4 x double> %va, <4 x double> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfcmp_sne_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfcmp.sne.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sne.d(<4 x double> %va, <4 x double> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.sor.s(<8 x float>, <8 x float>) -+ -+define <8 x i32> @lasx_xvfcmp_sor_s(<8 x float> %va, <8 x float> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfcmp_sor_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfcmp.sor.s $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sor.s(<8 x float> %va, <8 x float> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.sor.d(<4 x double>, <4 x double>) -+ -+define <4 x i64> @lasx_xvfcmp_sor_d(<4 x double> %va, <4 x double> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfcmp_sor_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfcmp.sor.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sor.d(<4 x double> %va, <4 x double> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.sune.s(<8 x float>, <8 x float>) -+ -+define <8 x i32> @lasx_xvfcmp_sune_s(<8 x float> %va, <8 x float> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfcmp_sune_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfcmp.sune.s $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sune.s(<8 x float> %va, <8 x float> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.sune.d(<4 x double>, <4 x double>) -+ -+define <4 x i64> @lasx_xvfcmp_sune_d(<4 x double> %va, <4 x double> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfcmp_sune_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfcmp.sune.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sune.d(<4 x double> %va, <4 x double> %vb) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcvt.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcvt.ll -new file mode 100644 -index 000000000000..82bf1d3df72c ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcvt.ll -@@ -0,0 +1,26 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvfcvt.h.s(<8 x float>, <8 x float>) -+ -+define <16 x i16> @lasx_xvfcvt_h_s(<8 x float> %va, <8 x float> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfcvt_h_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfcvt.h.s $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvfcvt.h.s(<8 x float> %va, <8 x float> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x float> @llvm.loongarch.lasx.xvfcvt.s.d(<4 x double>, <4 x double>) -+ -+define <8 x float> @lasx_xvfcvt_s_d(<4 x double> %va, <4 x double> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfcvt_s_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfcvt.s.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x float> @llvm.loongarch.lasx.xvfcvt.s.d(<4 x double> %va, <4 x double> %vb) -+ ret <8 x float> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcvth.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcvth.ll -new file mode 100644 -index 000000000000..e1a6a2923e67 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcvth.ll -@@ -0,0 +1,26 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <8 x float> @llvm.loongarch.lasx.xvfcvth.s.h(<16 x i16>) -+ -+define <8 x float> @lasx_xvfcvth_s_h(<16 x i16> %va) nounwind { -+; CHECK-LABEL: lasx_xvfcvth_s_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfcvth.s.h $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x float> @llvm.loongarch.lasx.xvfcvth.s.h(<16 x i16> %va) -+ ret <8 x float> %res -+} -+ -+declare <4 x double> @llvm.loongarch.lasx.xvfcvth.d.s(<8 x float>) -+ -+define <4 x double> @lasx_xvfcvth_d_s(<8 x float> %va) nounwind { -+; CHECK-LABEL: lasx_xvfcvth_d_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfcvth.d.s $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x double> @llvm.loongarch.lasx.xvfcvth.d.s(<8 x float> %va) -+ ret <4 x double> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcvtl.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcvtl.ll -new file mode 100644 -index 000000000000..0b3e693c7f51 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcvtl.ll -@@ -0,0 +1,26 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <8 x float> @llvm.loongarch.lasx.xvfcvtl.s.h(<16 x i16>) -+ -+define <8 x float> @lasx_xvfcvtl_s_h(<16 x i16> %va) nounwind { -+; CHECK-LABEL: lasx_xvfcvtl_s_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfcvtl.s.h $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x float> @llvm.loongarch.lasx.xvfcvtl.s.h(<16 x i16> %va) -+ ret <8 x float> %res -+} -+ -+declare <4 x double> @llvm.loongarch.lasx.xvfcvtl.d.s(<8 x float>) -+ -+define <4 x double> @lasx_xvfcvtl_d_s(<8 x float> %va) nounwind { -+; CHECK-LABEL: lasx_xvfcvtl_d_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfcvtl.d.s $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x double> @llvm.loongarch.lasx.xvfcvtl.d.s(<8 x float> %va) -+ ret <4 x double> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fdiv.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fdiv.ll -new file mode 100644 -index 000000000000..49923ddd4e8d ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fdiv.ll -@@ -0,0 +1,26 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <8 x float> @llvm.loongarch.lasx.xvfdiv.s(<8 x float>, <8 x float>) -+ -+define <8 x float> @lasx_xvfdiv_s(<8 x float> %va, <8 x float> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfdiv_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfdiv.s $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x float> @llvm.loongarch.lasx.xvfdiv.s(<8 x float> %va, <8 x float> %vb) -+ ret <8 x float> %res -+} -+ -+declare <4 x double> @llvm.loongarch.lasx.xvfdiv.d(<4 x double>, <4 x double>) -+ -+define <4 x double> @lasx_xvfdiv_d(<4 x double> %va, <4 x double> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfdiv_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfdiv.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x double> @llvm.loongarch.lasx.xvfdiv.d(<4 x double> %va, <4 x double> %vb) -+ ret <4 x double> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ffint.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ffint.ll -new file mode 100644 -index 000000000000..24da0bd33838 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ffint.ll -@@ -0,0 +1,86 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <8 x float> @llvm.loongarch.lasx.xvffint.s.w(<8 x i32>) -+ -+define <8 x float> @lasx_xvffint_s_w(<8 x i32> %va) nounwind { -+; CHECK-LABEL: lasx_xvffint_s_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvffint.s.w $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x float> @llvm.loongarch.lasx.xvffint.s.w(<8 x i32> %va) -+ ret <8 x float> %res -+} -+ -+declare <4 x double> @llvm.loongarch.lasx.xvffint.d.l(<4 x i64>) -+ -+define <4 x double> @lasx_xvffint_d_l(<4 x i64> %va) nounwind { -+; CHECK-LABEL: lasx_xvffint_d_l: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvffint.d.l $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x double> @llvm.loongarch.lasx.xvffint.d.l(<4 x i64> %va) -+ ret <4 x double> %res -+} -+ -+declare <8 x float> @llvm.loongarch.lasx.xvffint.s.wu(<8 x i32>) -+ -+define <8 x float> @lasx_xvffint_s_wu(<8 x i32> %va) nounwind { -+; CHECK-LABEL: lasx_xvffint_s_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvffint.s.wu $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x float> @llvm.loongarch.lasx.xvffint.s.wu(<8 x i32> %va) -+ ret <8 x float> %res -+} -+ -+declare <4 x double> @llvm.loongarch.lasx.xvffint.d.lu(<4 x i64>) -+ -+define <4 x double> @lasx_xvffint_d_lu(<4 x i64> %va) nounwind { -+; CHECK-LABEL: lasx_xvffint_d_lu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvffint.d.lu $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x double> @llvm.loongarch.lasx.xvffint.d.lu(<4 x i64> %va) -+ ret <4 x double> %res -+} -+ -+declare <4 x double> @llvm.loongarch.lasx.xvffintl.d.w(<8 x i32>) -+ -+define <4 x double> @lasx_xvffintl_d_w(<8 x i32> %va) nounwind { -+; CHECK-LABEL: lasx_xvffintl_d_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvffintl.d.w $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x double> @llvm.loongarch.lasx.xvffintl.d.w(<8 x i32> %va) -+ ret <4 x double> %res -+} -+ -+declare <4 x double> @llvm.loongarch.lasx.xvffinth.d.w(<8 x i32>) -+ -+define <4 x double> @lasx_xvffinth_d_w(<8 x i32> %va) nounwind { -+; CHECK-LABEL: lasx_xvffinth_d_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvffinth.d.w $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x double> @llvm.loongarch.lasx.xvffinth.d.w(<8 x i32> %va) -+ ret <4 x double> %res -+} -+ -+declare <8 x float> @llvm.loongarch.lasx.xvffint.s.l(<4 x i64>, <4 x i64>) -+ -+define <8 x float> @lasx_xvffint_s_l(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvffint_s_l: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvffint.s.l $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x float> @llvm.loongarch.lasx.xvffint.s.l(<4 x i64> %va, <4 x i64> %vb) -+ ret <8 x float> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-flogb.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-flogb.ll -new file mode 100644 -index 000000000000..bccef4504d70 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-flogb.ll -@@ -0,0 +1,26 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <8 x float> @llvm.loongarch.lasx.xvflogb.s(<8 x float>) -+ -+define <8 x float> @lasx_xvflogb_s(<8 x float> %va) nounwind { -+; CHECK-LABEL: lasx_xvflogb_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvflogb.s $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x float> @llvm.loongarch.lasx.xvflogb.s(<8 x float> %va) -+ ret <8 x float> %res -+} -+ -+declare <4 x double> @llvm.loongarch.lasx.xvflogb.d(<4 x double>) -+ -+define <4 x double> @lasx_xvflogb_d(<4 x double> %va) nounwind { -+; CHECK-LABEL: lasx_xvflogb_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvflogb.d $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x double> @llvm.loongarch.lasx.xvflogb.d(<4 x double> %va) -+ ret <4 x double> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmadd.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmadd.ll -new file mode 100644 -index 000000000000..0fc06f971660 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmadd.ll -@@ -0,0 +1,26 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <8 x float> @llvm.loongarch.lasx.xvfmadd.s(<8 x float>, <8 x float>, <8 x float>) -+ -+define <8 x float> @lasx_xvfmadd_s(<8 x float> %va, <8 x float> %vb, <8 x float> %vc) nounwind { -+; CHECK-LABEL: lasx_xvfmadd_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfmadd.s $xr0, $xr0, $xr1, $xr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x float> @llvm.loongarch.lasx.xvfmadd.s(<8 x float> %va, <8 x float> %vb, <8 x float> %vc) -+ ret <8 x float> %res -+} -+ -+declare <4 x double> @llvm.loongarch.lasx.xvfmadd.d(<4 x double>, <4 x double>, <4 x double>) -+ -+define <4 x double> @lasx_xvfmadd_d(<4 x double> %va, <4 x double> %vb, <4 x double> %vc) nounwind { -+; CHECK-LABEL: lasx_xvfmadd_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfmadd.d $xr0, $xr0, $xr1, $xr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x double> @llvm.loongarch.lasx.xvfmadd.d(<4 x double> %va, <4 x double> %vb, <4 x double> %vc) -+ ret <4 x double> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmax.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmax.ll -new file mode 100644 -index 000000000000..2422fa0c00d8 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmax.ll -@@ -0,0 +1,26 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <8 x float> @llvm.loongarch.lasx.xvfmax.s(<8 x float>, <8 x float>) -+ -+define <8 x float> @lasx_xvfmax_s(<8 x float> %va, <8 x float> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfmax_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfmax.s $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x float> @llvm.loongarch.lasx.xvfmax.s(<8 x float> %va, <8 x float> %vb) -+ ret <8 x float> %res -+} -+ -+declare <4 x double> @llvm.loongarch.lasx.xvfmax.d(<4 x double>, <4 x double>) -+ -+define <4 x double> @lasx_xvfmax_d(<4 x double> %va, <4 x double> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfmax_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfmax.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x double> @llvm.loongarch.lasx.xvfmax.d(<4 x double> %va, <4 x double> %vb) -+ ret <4 x double> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmaxa.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmaxa.ll -new file mode 100644 -index 000000000000..cd9ccc656aef ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmaxa.ll -@@ -0,0 +1,26 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <8 x float> @llvm.loongarch.lasx.xvfmaxa.s(<8 x float>, <8 x float>) -+ -+define <8 x float> @lasx_xvfmaxa_s(<8 x float> %va, <8 x float> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfmaxa_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfmaxa.s $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x float> @llvm.loongarch.lasx.xvfmaxa.s(<8 x float> %va, <8 x float> %vb) -+ ret <8 x float> %res -+} -+ -+declare <4 x double> @llvm.loongarch.lasx.xvfmaxa.d(<4 x double>, <4 x double>) -+ -+define <4 x double> @lasx_xvfmaxa_d(<4 x double> %va, <4 x double> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfmaxa_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfmaxa.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x double> @llvm.loongarch.lasx.xvfmaxa.d(<4 x double> %va, <4 x double> %vb) -+ ret <4 x double> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmin.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmin.ll -new file mode 100644 -index 000000000000..effb3f9e1d75 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmin.ll -@@ -0,0 +1,26 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <8 x float> @llvm.loongarch.lasx.xvfmin.s(<8 x float>, <8 x float>) -+ -+define <8 x float> @lasx_xvfmin_s(<8 x float> %va, <8 x float> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfmin_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfmin.s $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x float> @llvm.loongarch.lasx.xvfmin.s(<8 x float> %va, <8 x float> %vb) -+ ret <8 x float> %res -+} -+ -+declare <4 x double> @llvm.loongarch.lasx.xvfmin.d(<4 x double>, <4 x double>) -+ -+define <4 x double> @lasx_xvfmin_d(<4 x double> %va, <4 x double> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfmin_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfmin.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x double> @llvm.loongarch.lasx.xvfmin.d(<4 x double> %va, <4 x double> %vb) -+ ret <4 x double> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmina.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmina.ll -new file mode 100644 -index 000000000000..753a6f31ba06 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmina.ll -@@ -0,0 +1,26 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <8 x float> @llvm.loongarch.lasx.xvfmina.s(<8 x float>, <8 x float>) -+ -+define <8 x float> @lasx_xvfmina_s(<8 x float> %va, <8 x float> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfmina_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfmina.s $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x float> @llvm.loongarch.lasx.xvfmina.s(<8 x float> %va, <8 x float> %vb) -+ ret <8 x float> %res -+} -+ -+declare <4 x double> @llvm.loongarch.lasx.xvfmina.d(<4 x double>, <4 x double>) -+ -+define <4 x double> @lasx_xvfmina_d(<4 x double> %va, <4 x double> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfmina_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfmina.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x double> @llvm.loongarch.lasx.xvfmina.d(<4 x double> %va, <4 x double> %vb) -+ ret <4 x double> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmsub.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmsub.ll -new file mode 100644 -index 000000000000..57909d0dd168 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmsub.ll -@@ -0,0 +1,26 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <8 x float> @llvm.loongarch.lasx.xvfmsub.s(<8 x float>, <8 x float>, <8 x float>) -+ -+define <8 x float> @lasx_xvfmsub_s(<8 x float> %va, <8 x float> %vb, <8 x float> %vc) nounwind { -+; CHECK-LABEL: lasx_xvfmsub_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfmsub.s $xr0, $xr0, $xr1, $xr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x float> @llvm.loongarch.lasx.xvfmsub.s(<8 x float> %va, <8 x float> %vb, <8 x float> %vc) -+ ret <8 x float> %res -+} -+ -+declare <4 x double> @llvm.loongarch.lasx.xvfmsub.d(<4 x double>, <4 x double>, <4 x double>) -+ -+define <4 x double> @lasx_xvfmsub_d(<4 x double> %va, <4 x double> %vb, <4 x double> %vc) nounwind { -+; CHECK-LABEL: lasx_xvfmsub_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfmsub.d $xr0, $xr0, $xr1, $xr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x double> @llvm.loongarch.lasx.xvfmsub.d(<4 x double> %va, <4 x double> %vb, <4 x double> %vc) -+ ret <4 x double> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmul.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmul.ll -new file mode 100644 -index 000000000000..9cad6f383066 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmul.ll -@@ -0,0 +1,26 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <8 x float> @llvm.loongarch.lasx.xvfmul.s(<8 x float>, <8 x float>) -+ -+define <8 x float> @lasx_xvfmul_s(<8 x float> %va, <8 x float> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfmul_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfmul.s $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x float> @llvm.loongarch.lasx.xvfmul.s(<8 x float> %va, <8 x float> %vb) -+ ret <8 x float> %res -+} -+ -+declare <4 x double> @llvm.loongarch.lasx.xvfmul.d(<4 x double>, <4 x double>) -+ -+define <4 x double> @lasx_xvfmul_d(<4 x double> %va, <4 x double> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfmul_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfmul.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x double> @llvm.loongarch.lasx.xvfmul.d(<4 x double> %va, <4 x double> %vb) -+ ret <4 x double> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fnmadd.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fnmadd.ll -new file mode 100644 -index 000000000000..c30993590f98 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fnmadd.ll -@@ -0,0 +1,26 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <8 x float> @llvm.loongarch.lasx.xvfnmadd.s(<8 x float>, <8 x float>, <8 x float>) -+ -+define <8 x float> @lasx_xvfnmadd_s(<8 x float> %va, <8 x float> %vb, <8 x float> %vc) nounwind { -+; CHECK-LABEL: lasx_xvfnmadd_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfnmadd.s $xr0, $xr0, $xr1, $xr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x float> @llvm.loongarch.lasx.xvfnmadd.s(<8 x float> %va, <8 x float> %vb, <8 x float> %vc) -+ ret <8 x float> %res -+} -+ -+declare <4 x double> @llvm.loongarch.lasx.xvfnmadd.d(<4 x double>, <4 x double>, <4 x double>) -+ -+define <4 x double> @lasx_xvfnmadd_d(<4 x double> %va, <4 x double> %vb, <4 x double> %vc) nounwind { -+; CHECK-LABEL: lasx_xvfnmadd_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfnmadd.d $xr0, $xr0, $xr1, $xr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x double> @llvm.loongarch.lasx.xvfnmadd.d(<4 x double> %va, <4 x double> %vb, <4 x double> %vc) -+ ret <4 x double> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fnmsub.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fnmsub.ll -new file mode 100644 -index 000000000000..2e7ca695be62 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fnmsub.ll -@@ -0,0 +1,26 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <8 x float> @llvm.loongarch.lasx.xvfnmsub.s(<8 x float>, <8 x float>, <8 x float>) -+ -+define <8 x float> @lasx_xvfnmsub_s(<8 x float> %va, <8 x float> %vb, <8 x float> %vc) nounwind { -+; CHECK-LABEL: lasx_xvfnmsub_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfnmsub.s $xr0, $xr0, $xr1, $xr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x float> @llvm.loongarch.lasx.xvfnmsub.s(<8 x float> %va, <8 x float> %vb, <8 x float> %vc) -+ ret <8 x float> %res -+} -+ -+declare <4 x double> @llvm.loongarch.lasx.xvfnmsub.d(<4 x double>, <4 x double>, <4 x double>) -+ -+define <4 x double> @lasx_xvfnmsub_d(<4 x double> %va, <4 x double> %vb, <4 x double> %vc) nounwind { -+; CHECK-LABEL: lasx_xvfnmsub_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfnmsub.d $xr0, $xr0, $xr1, $xr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x double> @llvm.loongarch.lasx.xvfnmsub.d(<4 x double> %va, <4 x double> %vb, <4 x double> %vc) -+ ret <4 x double> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frecip.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frecip.ll -new file mode 100644 -index 000000000000..da3a26df2824 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frecip.ll -@@ -0,0 +1,26 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <8 x float> @llvm.loongarch.lasx.xvfrecip.s(<8 x float>) -+ -+define <8 x float> @lasx_xvfrecip_s(<8 x float> %va) nounwind { -+; CHECK-LABEL: lasx_xvfrecip_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfrecip.s $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x float> @llvm.loongarch.lasx.xvfrecip.s(<8 x float> %va) -+ ret <8 x float> %res -+} -+ -+declare <4 x double> @llvm.loongarch.lasx.xvfrecip.d(<4 x double>) -+ -+define <4 x double> @lasx_xvfrecip_d(<4 x double> %va) nounwind { -+; CHECK-LABEL: lasx_xvfrecip_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfrecip.d $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x double> @llvm.loongarch.lasx.xvfrecip.d(<4 x double> %va) -+ ret <4 x double> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frint.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frint.ll -new file mode 100644 -index 000000000000..ddead27cd14b ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frint.ll -@@ -0,0 +1,122 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <8 x float> @llvm.loongarch.lasx.xvfrintrne.s(<8 x float>) -+ -+define <8 x float> @lasx_xvfrintrne_s(<8 x float> %va) nounwind { -+; CHECK-LABEL: lasx_xvfrintrne_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfrintrne.s $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x float> @llvm.loongarch.lasx.xvfrintrne.s(<8 x float> %va) -+ ret <8 x float> %res -+} -+ -+declare <4 x double> @llvm.loongarch.lasx.xvfrintrne.d(<4 x double>) -+ -+define <4 x double> @lasx_xvfrintrne_d(<4 x double> %va) nounwind { -+; CHECK-LABEL: lasx_xvfrintrne_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfrintrne.d $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x double> @llvm.loongarch.lasx.xvfrintrne.d(<4 x double> %va) -+ ret <4 x double> %res -+} -+ -+declare <8 x float> @llvm.loongarch.lasx.xvfrintrz.s(<8 x float>) -+ -+define <8 x float> @lasx_xvfrintrz_s(<8 x float> %va) nounwind { -+; CHECK-LABEL: lasx_xvfrintrz_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfrintrz.s $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x float> @llvm.loongarch.lasx.xvfrintrz.s(<8 x float> %va) -+ ret <8 x float> %res -+} -+ -+declare <4 x double> @llvm.loongarch.lasx.xvfrintrz.d(<4 x double>) -+ -+define <4 x double> @lasx_xvfrintrz_d(<4 x double> %va) nounwind { -+; CHECK-LABEL: lasx_xvfrintrz_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfrintrz.d $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x double> @llvm.loongarch.lasx.xvfrintrz.d(<4 x double> %va) -+ ret <4 x double> %res -+} -+ -+declare <8 x float> @llvm.loongarch.lasx.xvfrintrp.s(<8 x float>) -+ -+define <8 x float> @lasx_xvfrintrp_s(<8 x float> %va) nounwind { -+; CHECK-LABEL: lasx_xvfrintrp_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfrintrp.s $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x float> @llvm.loongarch.lasx.xvfrintrp.s(<8 x float> %va) -+ ret <8 x float> %res -+} -+ -+declare <4 x double> @llvm.loongarch.lasx.xvfrintrp.d(<4 x double>) -+ -+define <4 x double> @lasx_xvfrintrp_d(<4 x double> %va) nounwind { -+; CHECK-LABEL: lasx_xvfrintrp_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfrintrp.d $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x double> @llvm.loongarch.lasx.xvfrintrp.d(<4 x double> %va) -+ ret <4 x double> %res -+} -+ -+declare <8 x float> @llvm.loongarch.lasx.xvfrintrm.s(<8 x float>) -+ -+define <8 x float> @lasx_xvfrintrm_s(<8 x float> %va) nounwind { -+; CHECK-LABEL: lasx_xvfrintrm_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfrintrm.s $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x float> @llvm.loongarch.lasx.xvfrintrm.s(<8 x float> %va) -+ ret <8 x float> %res -+} -+ -+declare <4 x double> @llvm.loongarch.lasx.xvfrintrm.d(<4 x double>) -+ -+define <4 x double> @lasx_xvfrintrm_d(<4 x double> %va) nounwind { -+; CHECK-LABEL: lasx_xvfrintrm_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfrintrm.d $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x double> @llvm.loongarch.lasx.xvfrintrm.d(<4 x double> %va) -+ ret <4 x double> %res -+} -+ -+declare <8 x float> @llvm.loongarch.lasx.xvfrint.s(<8 x float>) -+ -+define <8 x float> @lasx_xvfrint_s(<8 x float> %va) nounwind { -+; CHECK-LABEL: lasx_xvfrint_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfrint.s $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x float> @llvm.loongarch.lasx.xvfrint.s(<8 x float> %va) -+ ret <8 x float> %res -+} -+ -+declare <4 x double> @llvm.loongarch.lasx.xvfrint.d(<4 x double>) -+ -+define <4 x double> @lasx_xvfrint_d(<4 x double> %va) nounwind { -+; CHECK-LABEL: lasx_xvfrint_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfrint.d $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x double> @llvm.loongarch.lasx.xvfrint.d(<4 x double> %va) -+ ret <4 x double> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frsqrt.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frsqrt.ll -new file mode 100644 -index 000000000000..6efa8122baf1 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frsqrt.ll -@@ -0,0 +1,26 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <8 x float> @llvm.loongarch.lasx.xvfrsqrt.s(<8 x float>) -+ -+define <8 x float> @lasx_xvfrsqrt_s(<8 x float> %va) nounwind { -+; CHECK-LABEL: lasx_xvfrsqrt_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfrsqrt.s $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x float> @llvm.loongarch.lasx.xvfrsqrt.s(<8 x float> %va) -+ ret <8 x float> %res -+} -+ -+declare <4 x double> @llvm.loongarch.lasx.xvfrsqrt.d(<4 x double>) -+ -+define <4 x double> @lasx_xvfrsqrt_d(<4 x double> %va) nounwind { -+; CHECK-LABEL: lasx_xvfrsqrt_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfrsqrt.d $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x double> @llvm.loongarch.lasx.xvfrsqrt.d(<4 x double> %va) -+ ret <4 x double> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frstp.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frstp.ll -new file mode 100644 -index 000000000000..e83e55a52a11 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frstp.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvfrstp.b(<32 x i8>, <32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvfrstp_b(<32 x i8> %va, <32 x i8> %vb, <32 x i8> %vc) nounwind { -+; CHECK-LABEL: lasx_xvfrstp_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfrstp.b $xr0, $xr1, $xr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvfrstp.b(<32 x i8> %va, <32 x i8> %vb, <32 x i8> %vc) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvfrstp.h(<16 x i16>, <16 x i16>, <16 x i16>) -+ -+define <16 x i16> @lasx_xvfrstp_h(<16 x i16> %va, <16 x i16> %vb, <16 x i16> %vc) nounwind { -+; CHECK-LABEL: lasx_xvfrstp_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfrstp.h $xr0, $xr1, $xr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvfrstp.h(<16 x i16> %va, <16 x i16> %vb, <16 x i16> %vc) -+ ret <16 x i16> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvfrstpi.b(<32 x i8>, <32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvfrstpi_b(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfrstpi_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfrstpi.b $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvfrstpi.b(<32 x i8> %va, <32 x i8> %vb, i32 1) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvfrstpi.h(<16 x i16>, <16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvfrstpi_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfrstpi_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfrstpi.h $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvfrstpi.h(<16 x i16> %va, <16 x i16> %vb, i32 1) -+ ret <16 x i16> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fsqrt.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fsqrt.ll -new file mode 100644 -index 000000000000..a13333d8d81c ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fsqrt.ll -@@ -0,0 +1,26 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <8 x float> @llvm.loongarch.lasx.xvfsqrt.s(<8 x float>) -+ -+define <8 x float> @lasx_xvfsqrt_s(<8 x float> %va) nounwind { -+; CHECK-LABEL: lasx_xvfsqrt_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfsqrt.s $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x float> @llvm.loongarch.lasx.xvfsqrt.s(<8 x float> %va) -+ ret <8 x float> %res -+} -+ -+declare <4 x double> @llvm.loongarch.lasx.xvfsqrt.d(<4 x double>) -+ -+define <4 x double> @lasx_xvfsqrt_d(<4 x double> %va) nounwind { -+; CHECK-LABEL: lasx_xvfsqrt_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfsqrt.d $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x double> @llvm.loongarch.lasx.xvfsqrt.d(<4 x double> %va) -+ ret <4 x double> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fsub.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fsub.ll -new file mode 100644 -index 000000000000..b52774a03618 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fsub.ll -@@ -0,0 +1,26 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <8 x float> @llvm.loongarch.lasx.xvfsub.s(<8 x float>, <8 x float>) -+ -+define <8 x float> @lasx_xvfsub_s(<8 x float> %va, <8 x float> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfsub_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfsub.s $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x float> @llvm.loongarch.lasx.xvfsub.s(<8 x float> %va, <8 x float> %vb) -+ ret <8 x float> %res -+} -+ -+declare <4 x double> @llvm.loongarch.lasx.xvfsub.d(<4 x double>, <4 x double>) -+ -+define <4 x double> @lasx_xvfsub_d(<4 x double> %va, <4 x double> %vb) nounwind { -+; CHECK-LABEL: lasx_xvfsub_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvfsub.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x double> @llvm.loongarch.lasx.xvfsub.d(<4 x double> %va, <4 x double> %vb) -+ ret <4 x double> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ftint.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ftint.ll -new file mode 100644 -index 000000000000..74cd507f16d2 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ftint.ll -@@ -0,0 +1,350 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.s(<8 x float>) -+ -+define <8 x i32> @lasx_xvftintrne_w_s(<8 x float> %va) nounwind { -+; CHECK-LABEL: lasx_xvftintrne_w_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvftintrne.w.s $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.s(<8 x float> %va) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvftintrne.l.d(<4 x double>) -+ -+define <4 x i64> @lasx_xvftintrne_l_d(<4 x double> %va) nounwind { -+; CHECK-LABEL: lasx_xvftintrne_l_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvftintrne.l.d $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvftintrne.l.d(<4 x double> %va) -+ ret <4 x i64> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.s(<8 x float>) -+ -+define <8 x i32> @lasx_xvftintrz_w_s(<8 x float> %va) nounwind { -+; CHECK-LABEL: lasx_xvftintrz_w_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvftintrz.w.s $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.s(<8 x float> %va) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvftintrz.l.d(<4 x double>) -+ -+define <4 x i64> @lasx_xvftintrz_l_d(<4 x double> %va) nounwind { -+; CHECK-LABEL: lasx_xvftintrz_l_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvftintrz.l.d $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvftintrz.l.d(<4 x double> %va) -+ ret <4 x i64> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.s(<8 x float>) -+ -+define <8 x i32> @lasx_xvftintrp_w_s(<8 x float> %va) nounwind { -+; CHECK-LABEL: lasx_xvftintrp_w_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvftintrp.w.s $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.s(<8 x float> %va) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvftintrp.l.d(<4 x double>) -+ -+define <4 x i64> @lasx_xvftintrp_l_d(<4 x double> %va) nounwind { -+; CHECK-LABEL: lasx_xvftintrp_l_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvftintrp.l.d $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvftintrp.l.d(<4 x double> %va) -+ ret <4 x i64> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.s(<8 x float>) -+ -+define <8 x i32> @lasx_xvftintrm_w_s(<8 x float> %va) nounwind { -+; CHECK-LABEL: lasx_xvftintrm_w_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvftintrm.w.s $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.s(<8 x float> %va) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvftintrm.l.d(<4 x double>) -+ -+define <4 x i64> @lasx_xvftintrm_l_d(<4 x double> %va) nounwind { -+; CHECK-LABEL: lasx_xvftintrm_l_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvftintrm.l.d $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvftintrm.l.d(<4 x double> %va) -+ ret <4 x i64> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvftint.w.s(<8 x float>) -+ -+define <8 x i32> @lasx_xvftint_w_s(<8 x float> %va) nounwind { -+; CHECK-LABEL: lasx_xvftint_w_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvftint.w.s $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvftint.w.s(<8 x float> %va) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvftint.l.d(<4 x double>) -+ -+define <4 x i64> @lasx_xvftint_l_d(<4 x double> %va) nounwind { -+; CHECK-LABEL: lasx_xvftint_l_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvftint.l.d $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvftint.l.d(<4 x double> %va) -+ ret <4 x i64> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvftintrz.wu.s(<8 x float>) -+ -+define <8 x i32> @lasx_xvftintrz_wu_s(<8 x float> %va) nounwind { -+; CHECK-LABEL: lasx_xvftintrz_wu_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvftintrz.wu.s $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvftintrz.wu.s(<8 x float> %va) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvftintrz.lu.d(<4 x double>) -+ -+define <4 x i64> @lasx_xvftintrz_lu_d(<4 x double> %va) nounwind { -+; CHECK-LABEL: lasx_xvftintrz_lu_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvftintrz.lu.d $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvftintrz.lu.d(<4 x double> %va) -+ ret <4 x i64> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvftint.wu.s(<8 x float>) -+ -+define <8 x i32> @lasx_xvftint_wu_s(<8 x float> %va) nounwind { -+; CHECK-LABEL: lasx_xvftint_wu_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvftint.wu.s $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvftint.wu.s(<8 x float> %va) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvftint.lu.d(<4 x double>) -+ -+define <4 x i64> @lasx_xvftint_lu_d(<4 x double> %va) nounwind { -+; CHECK-LABEL: lasx_xvftint_lu_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvftint.lu.d $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvftint.lu.d(<4 x double> %va) -+ ret <4 x i64> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.d(<4 x double>, <4 x double>) -+ -+define <8 x i32> @lasx_xvftintrne_w_d(<4 x double> %va, <4 x double> %vb) nounwind { -+; CHECK-LABEL: lasx_xvftintrne_w_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvftintrne.w.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.d(<4 x double> %va, <4 x double> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.d(<4 x double>, <4 x double>) -+ -+define <8 x i32> @lasx_xvftintrz_w_d(<4 x double> %va, <4 x double> %vb) nounwind { -+; CHECK-LABEL: lasx_xvftintrz_w_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvftintrz.w.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.d(<4 x double> %va, <4 x double> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.d(<4 x double>, <4 x double>) -+ -+define <8 x i32> @lasx_xvftintrp_w_d(<4 x double> %va, <4 x double> %vb) nounwind { -+; CHECK-LABEL: lasx_xvftintrp_w_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvftintrp.w.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.d(<4 x double> %va, <4 x double> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.d(<4 x double>, <4 x double>) -+ -+define <8 x i32> @lasx_xvftintrm_w_d(<4 x double> %va, <4 x double> %vb) nounwind { -+; CHECK-LABEL: lasx_xvftintrm_w_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvftintrm.w.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.d(<4 x double> %va, <4 x double> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvftint.w.d(<4 x double>, <4 x double>) -+ -+define <8 x i32> @lasx_xvftint_w_d(<4 x double> %va, <4 x double> %vb) nounwind { -+; CHECK-LABEL: lasx_xvftint_w_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvftint.w.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvftint.w.d(<4 x double> %va, <4 x double> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvftintrnel.l.s(<8 x float>) -+ -+define <4 x i64> @lasx_xvftintrnel_l_s(<8 x float> %va) nounwind { -+; CHECK-LABEL: lasx_xvftintrnel_l_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvftintrnel.l.s $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvftintrnel.l.s(<8 x float> %va) -+ ret <4 x i64> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvftintrneh.l.s(<8 x float>) -+ -+define <4 x i64> @lasx_xvftintrneh_l_s(<8 x float> %va) nounwind { -+; CHECK-LABEL: lasx_xvftintrneh_l_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvftintrneh.l.s $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvftintrneh.l.s(<8 x float> %va) -+ ret <4 x i64> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvftintrzl.l.s(<8 x float>) -+ -+define <4 x i64> @lasx_xvftintrzl_l_s(<8 x float> %va) nounwind { -+; CHECK-LABEL: lasx_xvftintrzl_l_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvftintrzl.l.s $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvftintrzl.l.s(<8 x float> %va) -+ ret <4 x i64> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvftintrzh.l.s(<8 x float>) -+ -+define <4 x i64> @lasx_xvftintrzh_l_s(<8 x float> %va) nounwind { -+; CHECK-LABEL: lasx_xvftintrzh_l_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvftintrzh.l.s $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvftintrzh.l.s(<8 x float> %va) -+ ret <4 x i64> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvftintrpl.l.s(<8 x float>) -+ -+define <4 x i64> @lasx_xvftintrpl_l_s(<8 x float> %va) nounwind { -+; CHECK-LABEL: lasx_xvftintrpl_l_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvftintrpl.l.s $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvftintrpl.l.s(<8 x float> %va) -+ ret <4 x i64> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvftintrph.l.s(<8 x float>) -+ -+define <4 x i64> @lasx_xvftintrph_l_s(<8 x float> %va) nounwind { -+; CHECK-LABEL: lasx_xvftintrph_l_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvftintrph.l.s $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvftintrph.l.s(<8 x float> %va) -+ ret <4 x i64> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvftintrml.l.s(<8 x float>) -+ -+define <4 x i64> @lasx_xvftintrml_l_s(<8 x float> %va) nounwind { -+; CHECK-LABEL: lasx_xvftintrml_l_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvftintrml.l.s $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvftintrml.l.s(<8 x float> %va) -+ ret <4 x i64> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvftintrmh.l.s(<8 x float>) -+ -+define <4 x i64> @lasx_xvftintrmh_l_s(<8 x float> %va) nounwind { -+; CHECK-LABEL: lasx_xvftintrmh_l_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvftintrmh.l.s $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvftintrmh.l.s(<8 x float> %va) -+ ret <4 x i64> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvftintl.l.s(<8 x float>) -+ -+define <4 x i64> @lasx_xvftintl_l_s(<8 x float> %va) nounwind { -+; CHECK-LABEL: lasx_xvftintl_l_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvftintl.l.s $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvftintl.l.s(<8 x float> %va) -+ ret <4 x i64> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvftinth.l.s(<8 x float>) -+ -+define <4 x i64> @lasx_xvftinth_l_s(<8 x float> %va) nounwind { -+; CHECK-LABEL: lasx_xvftinth_l_s: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvftinth.l.s $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvftinth.l.s(<8 x float> %va) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-haddw.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-haddw.ll -new file mode 100644 -index 000000000000..2c64ab23806b ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-haddw.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvhaddw.h.b(<32 x i8>, <32 x i8>) -+ -+define <16 x i16> @lasx_xvhaddw_h_b(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvhaddw_h_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvhaddw.h.b $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvhaddw.h.b(<32 x i8> %va, <32 x i8> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvhaddw.w.h(<16 x i16>, <16 x i16>) -+ -+define <8 x i32> @lasx_xvhaddw_w_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvhaddw_w_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvhaddw.w.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvhaddw.w.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvhaddw.d.w(<8 x i32>, <8 x i32>) -+ -+define <4 x i64> @lasx_xvhaddw_d_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvhaddw_d_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvhaddw.d.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvhaddw.d.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvhaddw.q.d(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvhaddw_q_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvhaddw_q_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvhaddw.q.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvhaddw.q.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvhaddw.hu.bu(<32 x i8>, <32 x i8>) -+ -+define <16 x i16> @lasx_xvhaddw_hu_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvhaddw_hu_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvhaddw.hu.bu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvhaddw.hu.bu(<32 x i8> %va, <32 x i8> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvhaddw.wu.hu(<16 x i16>, <16 x i16>) -+ -+define <8 x i32> @lasx_xvhaddw_wu_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvhaddw_wu_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvhaddw.wu.hu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvhaddw.wu.hu(<16 x i16> %va, <16 x i16> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvhaddw.du.wu(<8 x i32>, <8 x i32>) -+ -+define <4 x i64> @lasx_xvhaddw_du_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvhaddw_du_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvhaddw.du.wu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvhaddw.du.wu(<8 x i32> %va, <8 x i32> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvhaddw.qu.du(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvhaddw_qu_du(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvhaddw_qu_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvhaddw.qu.du $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvhaddw.qu.du(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-hsubw.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-hsubw.ll -new file mode 100644 -index 000000000000..a5223c1d89a0 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-hsubw.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvhsubw.h.b(<32 x i8>, <32 x i8>) -+ -+define <16 x i16> @lasx_xvhsubw_h_b(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvhsubw_h_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvhsubw.h.b $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvhsubw.h.b(<32 x i8> %va, <32 x i8> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvhsubw.w.h(<16 x i16>, <16 x i16>) -+ -+define <8 x i32> @lasx_xvhsubw_w_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvhsubw_w_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvhsubw.w.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvhsubw.w.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvhsubw.d.w(<8 x i32>, <8 x i32>) -+ -+define <4 x i64> @lasx_xvhsubw_d_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvhsubw_d_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvhsubw.d.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvhsubw.d.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvhsubw.q.d(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvhsubw_q_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvhsubw_q_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvhsubw.q.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvhsubw.q.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvhsubw.hu.bu(<32 x i8>, <32 x i8>) -+ -+define <16 x i16> @lasx_xvhsubw_hu_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvhsubw_hu_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvhsubw.hu.bu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvhsubw.hu.bu(<32 x i8> %va, <32 x i8> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvhsubw.wu.hu(<16 x i16>, <16 x i16>) -+ -+define <8 x i32> @lasx_xvhsubw_wu_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvhsubw_wu_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvhsubw.wu.hu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvhsubw.wu.hu(<16 x i16> %va, <16 x i16> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvhsubw.du.wu(<8 x i32>, <8 x i32>) -+ -+define <4 x i64> @lasx_xvhsubw_du_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvhsubw_du_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvhsubw.du.wu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvhsubw.du.wu(<8 x i32> %va, <8 x i32> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvhsubw.qu.du(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvhsubw_qu_du(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvhsubw_qu_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvhsubw.qu.du $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvhsubw.qu.du(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ilv.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ilv.ll -new file mode 100644 -index 000000000000..c9d0ca6b0324 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ilv.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvilvl.b(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvilvl_b(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvilvl_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvilvl.b $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvilvl.b(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvilvl.h(<16 x i16>, <16 x i16>) -+ -+define <16 x i16> @lasx_xvilvl_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvilvl_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvilvl.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvilvl.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvilvl.w(<8 x i32>, <8 x i32>) -+ -+define <8 x i32> @lasx_xvilvl_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvilvl_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvilvl.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvilvl.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvilvl.d(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvilvl_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvilvl_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvilvl.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvilvl.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvilvh.b(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvilvh_b(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvilvh_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvilvh.b $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvilvh.b(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvilvh.h(<16 x i16>, <16 x i16>) -+ -+define <16 x i16> @lasx_xvilvh_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvilvh_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvilvh.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvilvh.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvilvh.w(<8 x i32>, <8 x i32>) -+ -+define <8 x i32> @lasx_xvilvh_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvilvh_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvilvh.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvilvh.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvilvh.d(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvilvh_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvilvh_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvilvh.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvilvh.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insgr2vr.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insgr2vr.ll -new file mode 100644 -index 000000000000..ea98c96464ae ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insgr2vr.ll -@@ -0,0 +1,28 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvinsgr2vr.w(<8 x i32>, i32, i32) -+ -+define <8 x i32> @lasx_xvinsgr2vr_w(<8 x i32> %va) nounwind { -+; CHECK-LABEL: lasx_xvinsgr2vr_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: ori $a0, $zero, 1 -+; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvinsgr2vr.w(<8 x i32> %va, i32 1, i32 1) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvinsgr2vr.d(<4 x i64>, i64, i32) -+ -+define <4 x i64> @lasx_xvinsgr2vr_d(<4 x i64> %va) nounwind { -+; CHECK-LABEL: lasx_xvinsgr2vr_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: ori $a0, $zero, 1 -+; CHECK-NEXT: xvinsgr2vr.d $xr0, $a0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvinsgr2vr.d(<4 x i64> %va, i64 1, i32 1) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insve0.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insve0.ll -new file mode 100644 -index 000000000000..27ae819c4144 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insve0.ll -@@ -0,0 +1,26 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvinsve0.w(<8 x i32>, <8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvinsve0_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvinsve0_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvinsve0.w $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvinsve0.w(<8 x i32> %va, <8 x i32> %vb, i32 1) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvinsve0.d(<4 x i64>, <4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvinsve0_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvinsve0_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvinsve0.d $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvinsve0.d(<4 x i64> %va, <4 x i64> %vb, i32 1) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ld.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ld.ll -new file mode 100644 -index 000000000000..5ffc629db466 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ld.ll -@@ -0,0 +1,26 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvld(i8*, i32) -+ -+define <32 x i8> @lasx_xvld(i8* %p) nounwind { -+; CHECK-LABEL: lasx_xvld: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvld(i8* %p, i32 1) -+ ret <32 x i8> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvldx(i8*, i64) -+ -+define <32 x i8> @lasx_xvldx(i8* %p, i64 %b) nounwind { -+; CHECK-LABEL: lasx_xvldx: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvldx $xr0, $a0, $a1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvldx(i8* %p, i64 %b) -+ ret <32 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldi.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldi.ll -new file mode 100644 -index 000000000000..59f79dd32af3 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldi.ll -@@ -0,0 +1,62 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvldi(i32) -+ -+define <4 x i64> @lasx_xvldi() nounwind { -+; CHECK-LABEL: lasx_xvldi: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvldi $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvldi(i32 1) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvrepli.b(i32) -+ -+define <32 x i8> @lasx_xvrepli_b() nounwind { -+; CHECK-LABEL: lasx_xvrepli_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvrepli.b $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvrepli.b(i32 1) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvrepli.h(i32) -+ -+define <16 x i16> @lasx_xvrepli_h() nounwind { -+; CHECK-LABEL: lasx_xvrepli_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvrepli.h $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvrepli.h(i32 1) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvrepli.w(i32) -+ -+define <8 x i32> @lasx_xvrepli_w() nounwind { -+; CHECK-LABEL: lasx_xvrepli_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvrepli.w $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvrepli.w(i32 1) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvrepli.d(i32) -+ -+define <4 x i64> @lasx_xvrepli_d() nounwind { -+; CHECK-LABEL: lasx_xvrepli_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvrepli.d $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvrepli.d(i32 1) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldrepl.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldrepl.ll -new file mode 100644 -index 000000000000..ae6abdf81cbc ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldrepl.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvldrepl.b(i8*, i32) -+ -+define <32 x i8> @lasx_xvldrepl_b(i8* %p) nounwind { -+; CHECK-LABEL: lasx_xvldrepl_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvldrepl.b $xr0, $a0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvldrepl.b(i8* %p, i32 1) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvldrepl.h(i8*, i32) -+ -+define <16 x i16> @lasx_xvldrepl_h(i8* %p) nounwind { -+; CHECK-LABEL: lasx_xvldrepl_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvldrepl.h $xr0, $a0, 2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvldrepl.h(i8* %p, i32 2) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvldrepl.w(i8*, i32) -+ -+define <8 x i32> @lasx_xvldrepl_w(i8* %p) nounwind { -+; CHECK-LABEL: lasx_xvldrepl_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvldrepl.w $xr0, $a0, 4 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvldrepl.w(i8* %p, i32 4) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvldrepl.d(i8*, i32) -+ -+define <4 x i64> @lasx_xvldrepl_d(i8* %p) nounwind { -+; CHECK-LABEL: lasx_xvldrepl_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvldrepl.d $xr0, $a0, 8 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvldrepl.d(i8* %p, i32 8) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-madd.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-madd.ll -new file mode 100644 -index 000000000000..d3b09396727e ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-madd.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvmadd.b(<32 x i8>, <32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvmadd_b(<32 x i8> %va, <32 x i8> %vb, <32 x i8> %vc) nounwind { -+; CHECK-LABEL: lasx_xvmadd_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmadd.b $xr0, $xr1, $xr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvmadd.b(<32 x i8> %va, <32 x i8> %vb, <32 x i8> %vc) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvmadd.h(<16 x i16>, <16 x i16>, <16 x i16>) -+ -+define <16 x i16> @lasx_xvmadd_h(<16 x i16> %va, <16 x i16> %vb, <16 x i16> %vc) nounwind { -+; CHECK-LABEL: lasx_xvmadd_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmadd.h $xr0, $xr1, $xr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvmadd.h(<16 x i16> %va, <16 x i16> %vb, <16 x i16> %vc) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvmadd.w(<8 x i32>, <8 x i32>, <8 x i32>) -+ -+define <8 x i32> @lasx_xvmadd_w(<8 x i32> %va, <8 x i32> %vb, <8 x i32> %vc) nounwind { -+; CHECK-LABEL: lasx_xvmadd_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmadd.w $xr0, $xr1, $xr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvmadd.w(<8 x i32> %va, <8 x i32> %vb, <8 x i32> %vc) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvmadd.d(<4 x i64>, <4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvmadd_d(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) nounwind { -+; CHECK-LABEL: lasx_xvmadd_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmadd.d $xr0, $xr1, $xr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmadd.d(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-maddw.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-maddw.ll -new file mode 100644 -index 000000000000..146624a764a2 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-maddw.ll -@@ -0,0 +1,290 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.b(<16 x i16>, <32 x i8>, <32 x i8>) -+ -+define <16 x i16> @lasx_xvmaddwev_h_b(<16 x i16> %va, <32 x i8> %vb, <32 x i8> %vc) nounwind { -+; CHECK-LABEL: lasx_xvmaddwev_h_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmaddwev.h.b $xr0, $xr1, $xr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.b(<16 x i16> %va, <32 x i8> %vb, <32 x i8> %vc) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.h(<8 x i32>, <16 x i16>, <16 x i16>) -+ -+define <8 x i32> @lasx_xvmaddwev_w_h(<8 x i32> %va, <16 x i16> %vb, <16 x i16> %vc) nounwind { -+; CHECK-LABEL: lasx_xvmaddwev_w_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmaddwev.w.h $xr0, $xr1, $xr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.h(<8 x i32> %va, <16 x i16> %vb, <16 x i16> %vc) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.w(<4 x i64>, <8 x i32>, <8 x i32>) -+ -+define <4 x i64> @lasx_xvmaddwev_d_w(<4 x i64> %va, <8 x i32> %vb, <8 x i32> %vc) nounwind { -+; CHECK-LABEL: lasx_xvmaddwev_d_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmaddwev.d.w $xr0, $xr1, $xr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.w(<4 x i64> %va, <8 x i32> %vb, <8 x i32> %vc) -+ ret <4 x i64> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.d(<4 x i64>, <4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvmaddwev_q_d(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) nounwind { -+; CHECK-LABEL: lasx_xvmaddwev_q_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmaddwev.q.d $xr0, $xr1, $xr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.d(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) -+ ret <4 x i64> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu(<16 x i16>, <32 x i8>, <32 x i8>) -+ -+define <16 x i16> @lasx_xvmaddwev_h_bu(<16 x i16> %va, <32 x i8> %vb, <32 x i8> %vc) nounwind { -+; CHECK-LABEL: lasx_xvmaddwev_h_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmaddwev.h.bu $xr0, $xr1, $xr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu(<16 x i16> %va, <32 x i8> %vb, <32 x i8> %vc) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu(<8 x i32>, <16 x i16>, <16 x i16>) -+ -+define <8 x i32> @lasx_xvmaddwev_w_hu(<8 x i32> %va, <16 x i16> %vb, <16 x i16> %vc) nounwind { -+; CHECK-LABEL: lasx_xvmaddwev_w_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmaddwev.w.hu $xr0, $xr1, $xr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu(<8 x i32> %va, <16 x i16> %vb, <16 x i16> %vc) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu(<4 x i64>, <8 x i32>, <8 x i32>) -+ -+define <4 x i64> @lasx_xvmaddwev_d_wu(<4 x i64> %va, <8 x i32> %vb, <8 x i32> %vc) nounwind { -+; CHECK-LABEL: lasx_xvmaddwev_d_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmaddwev.d.wu $xr0, $xr1, $xr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu(<4 x i64> %va, <8 x i32> %vb, <8 x i32> %vc) -+ ret <4 x i64> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du(<4 x i64>, <4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvmaddwev_q_du(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) nounwind { -+; CHECK-LABEL: lasx_xvmaddwev_q_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmaddwev.q.du $xr0, $xr1, $xr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) -+ ret <4 x i64> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu.b(<16 x i16>, <32 x i8>, <32 x i8>) -+ -+define <16 x i16> @lasx_xvmaddwev_h_bu_b(<16 x i16> %va, <32 x i8> %vb, <32 x i8> %vc) nounwind { -+; CHECK-LABEL: lasx_xvmaddwev_h_bu_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmaddwev.h.bu.b $xr0, $xr1, $xr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu.b(<16 x i16> %va, <32 x i8> %vb, <32 x i8> %vc) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu.h(<8 x i32>, <16 x i16>, <16 x i16>) -+ -+define <8 x i32> @lasx_xvmaddwev_w_hu_h(<8 x i32> %va, <16 x i16> %vb, <16 x i16> %vc) nounwind { -+; CHECK-LABEL: lasx_xvmaddwev_w_hu_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmaddwev.w.hu.h $xr0, $xr1, $xr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu.h(<8 x i32> %va, <16 x i16> %vb, <16 x i16> %vc) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu.w(<4 x i64>, <8 x i32>, <8 x i32>) -+ -+define <4 x i64> @lasx_xvmaddwev_d_wu_w(<4 x i64> %va, <8 x i32> %vb, <8 x i32> %vc) nounwind { -+; CHECK-LABEL: lasx_xvmaddwev_d_wu_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmaddwev.d.wu.w $xr0, $xr1, $xr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu.w(<4 x i64> %va, <8 x i32> %vb, <8 x i32> %vc) -+ ret <4 x i64> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du.d(<4 x i64>, <4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvmaddwev_q_du_d(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) nounwind { -+; CHECK-LABEL: lasx_xvmaddwev_q_du_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmaddwev.q.du.d $xr0, $xr1, $xr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du.d(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) -+ ret <4 x i64> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.b(<16 x i16>, <32 x i8>, <32 x i8>) -+ -+define <16 x i16> @lasx_xvmaddwod_h_b(<16 x i16> %va, <32 x i8> %vb, <32 x i8> %vc) nounwind { -+; CHECK-LABEL: lasx_xvmaddwod_h_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmaddwod.h.b $xr0, $xr1, $xr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.b(<16 x i16> %va, <32 x i8> %vb, <32 x i8> %vc) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.h(<8 x i32>, <16 x i16>, <16 x i16>) -+ -+define <8 x i32> @lasx_xvmaddwod_w_h(<8 x i32> %va, <16 x i16> %vb, <16 x i16> %vc) nounwind { -+; CHECK-LABEL: lasx_xvmaddwod_w_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmaddwod.w.h $xr0, $xr1, $xr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.h(<8 x i32> %va, <16 x i16> %vb, <16 x i16> %vc) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.w(<4 x i64>, <8 x i32>, <8 x i32>) -+ -+define <4 x i64> @lasx_xvmaddwod_d_w(<4 x i64> %va, <8 x i32> %vb, <8 x i32> %vc) nounwind { -+; CHECK-LABEL: lasx_xvmaddwod_d_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmaddwod.d.w $xr0, $xr1, $xr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.w(<4 x i64> %va, <8 x i32> %vb, <8 x i32> %vc) -+ ret <4 x i64> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.d(<4 x i64>, <4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvmaddwod_q_d(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) nounwind { -+; CHECK-LABEL: lasx_xvmaddwod_q_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmaddwod.q.d $xr0, $xr1, $xr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.d(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) -+ ret <4 x i64> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu(<16 x i16>, <32 x i8>, <32 x i8>) -+ -+define <16 x i16> @lasx_xvmaddwod_h_bu(<16 x i16> %va, <32 x i8> %vb, <32 x i8> %vc) nounwind { -+; CHECK-LABEL: lasx_xvmaddwod_h_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmaddwod.h.bu $xr0, $xr1, $xr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu(<16 x i16> %va, <32 x i8> %vb, <32 x i8> %vc) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu(<8 x i32>, <16 x i16>, <16 x i16>) -+ -+define <8 x i32> @lasx_xvmaddwod_w_hu(<8 x i32> %va, <16 x i16> %vb, <16 x i16> %vc) nounwind { -+; CHECK-LABEL: lasx_xvmaddwod_w_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmaddwod.w.hu $xr0, $xr1, $xr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu(<8 x i32> %va, <16 x i16> %vb, <16 x i16> %vc) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu(<4 x i64>, <8 x i32>, <8 x i32>) -+ -+define <4 x i64> @lasx_xvmaddwod_d_wu(<4 x i64> %va, <8 x i32> %vb, <8 x i32> %vc) nounwind { -+; CHECK-LABEL: lasx_xvmaddwod_d_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmaddwod.d.wu $xr0, $xr1, $xr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu(<4 x i64> %va, <8 x i32> %vb, <8 x i32> %vc) -+ ret <4 x i64> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du(<4 x i64>, <4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvmaddwod_q_du(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) nounwind { -+; CHECK-LABEL: lasx_xvmaddwod_q_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmaddwod.q.du $xr0, $xr1, $xr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) -+ ret <4 x i64> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu.b(<16 x i16>, <32 x i8>, <32 x i8>) -+ -+define <16 x i16> @lasx_xvmaddwod_h_bu_b(<16 x i16> %va, <32 x i8> %vb, <32 x i8> %vc) nounwind { -+; CHECK-LABEL: lasx_xvmaddwod_h_bu_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmaddwod.h.bu.b $xr0, $xr1, $xr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu.b(<16 x i16> %va, <32 x i8> %vb, <32 x i8> %vc) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu.h(<8 x i32>, <16 x i16>, <16 x i16>) -+ -+define <8 x i32> @lasx_xvmaddwod_w_hu_h(<8 x i32> %va, <16 x i16> %vb, <16 x i16> %vc) nounwind { -+; CHECK-LABEL: lasx_xvmaddwod_w_hu_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmaddwod.w.hu.h $xr0, $xr1, $xr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu.h(<8 x i32> %va, <16 x i16> %vb, <16 x i16> %vc) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu.w(<4 x i64>, <8 x i32>, <8 x i32>) -+ -+define <4 x i64> @lasx_xvmaddwod_d_wu_w(<4 x i64> %va, <8 x i32> %vb, <8 x i32> %vc) nounwind { -+; CHECK-LABEL: lasx_xvmaddwod_d_wu_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmaddwod.d.wu.w $xr0, $xr1, $xr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu.w(<4 x i64> %va, <8 x i32> %vb, <8 x i32> %vc) -+ ret <4 x i64> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du.d(<4 x i64>, <4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvmaddwod_q_du_d(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) nounwind { -+; CHECK-LABEL: lasx_xvmaddwod_q_du_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmaddwod.q.du.d $xr0, $xr1, $xr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du.d(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-max.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-max.ll -new file mode 100644 -index 000000000000..9cf09df4439a ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-max.ll -@@ -0,0 +1,194 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvmax.b(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvmax_b(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmax_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmax.b $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvmax.b(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvmax.h(<16 x i16>, <16 x i16>) -+ -+define <16 x i16> @lasx_xvmax_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmax_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmax.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvmax.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvmax.w(<8 x i32>, <8 x i32>) -+ -+define <8 x i32> @lasx_xvmax_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmax_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmax.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvmax.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvmax.d(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvmax_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmax_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmax.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmax.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvmaxi.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvmaxi_b(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_xvmaxi_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmaxi.b $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvmaxi.b(<32 x i8> %va, i32 1) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvmaxi.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvmaxi_h(<16 x i16> %va) nounwind { -+; CHECK-LABEL: lasx_xvmaxi_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmaxi.h $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvmaxi.h(<16 x i16> %va, i32 1) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvmaxi.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvmaxi_w(<8 x i32> %va) nounwind { -+; CHECK-LABEL: lasx_xvmaxi_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmaxi.w $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvmaxi.w(<8 x i32> %va, i32 1) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvmaxi.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvmaxi_d(<4 x i64> %va) nounwind { -+; CHECK-LABEL: lasx_xvmaxi_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmaxi.d $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmaxi.d(<4 x i64> %va, i32 1) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvmax.bu(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_vmax_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_vmax_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmax.bu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvmax.bu(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvmax.hu(<16 x i16>, <16 x i16>) -+ -+define <16 x i16> @lasx_xvmax_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmax_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmax.hu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvmax.hu(<16 x i16> %va, <16 x i16> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvmax.wu(<8 x i32>, <8 x i32>) -+ -+define <8 x i32> @lasx_xvmax_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmax_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmax.wu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvmax.wu(<8 x i32> %va, <8 x i32> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvmax.du(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvmax_du(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmax_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmax.du $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmax.du(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvmaxi.bu(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvmaxi_bu(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_xvmaxi_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmaxi.bu $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvmaxi.bu(<32 x i8> %va, i32 1) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvmaxi.hu(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvmaxi_hu(<16 x i16> %va) nounwind { -+; CHECK-LABEL: lasx_xvmaxi_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmaxi.hu $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvmaxi.hu(<16 x i16> %va, i32 1) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvmaxi.wu(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvmaxi_wu(<8 x i32> %va) nounwind { -+; CHECK-LABEL: lasx_xvmaxi_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmaxi.wu $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvmaxi.wu(<8 x i32> %va, i32 1) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvmaxi.du(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvmaxi_du(<4 x i64> %va) nounwind { -+; CHECK-LABEL: lasx_xvmaxi_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmaxi.du $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmaxi.du(<4 x i64> %va, i32 1) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-min.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-min.ll -new file mode 100644 -index 000000000000..c94b1e4ea44c ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-min.ll -@@ -0,0 +1,194 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvmin.b(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvmin_b(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmin_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmin.b $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvmin.b(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvmin.h(<16 x i16>, <16 x i16>) -+ -+define <16 x i16> @lasx_xvmin_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmin_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmin.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvmin.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvmin.w(<8 x i32>, <8 x i32>) -+ -+define <8 x i32> @lasx_xvmin_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmin_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmin.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvmin.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvmin.d(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvmin_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmin_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmin.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmin.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvmini.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvmini_b(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_xvmini_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmini.b $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvmini.b(<32 x i8> %va, i32 1) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvmini.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvmini_h(<16 x i16> %va) nounwind { -+; CHECK-LABEL: lasx_xvmini_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmini.h $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvmini.h(<16 x i16> %va, i32 1) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvmini.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvmini_w(<8 x i32> %va) nounwind { -+; CHECK-LABEL: lasx_xvmini_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmini.w $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvmini.w(<8 x i32> %va, i32 1) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvmini.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvmini_d(<4 x i64> %va) nounwind { -+; CHECK-LABEL: lasx_xvmini_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmini.d $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmini.d(<4 x i64> %va, i32 1) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvmin.bu(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvmin_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmin_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmin.bu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvmin.bu(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvmin.hu(<16 x i16>, <16 x i16>) -+ -+define <16 x i16> @lasx_xvmin_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmin_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmin.hu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvmin.hu(<16 x i16> %va, <16 x i16> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvmin.wu(<8 x i32>, <8 x i32>) -+ -+define <8 x i32> @lasx_xvmin_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmin_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmin.wu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvmin.wu(<8 x i32> %va, <8 x i32> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvmin.du(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvmin_du(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmin_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmin.du $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmin.du(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvmini.bu(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvmini_bu(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_xvmini_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmini.bu $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvmini.bu(<32 x i8> %va, i32 1) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvmini.hu(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvmini_hu(<16 x i16> %va) nounwind { -+; CHECK-LABEL: lasx_xvmini_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmini.hu $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvmini.hu(<16 x i16> %va, i32 1) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvmini.wu(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvmini_wu(<8 x i32> %va) nounwind { -+; CHECK-LABEL: lasx_xvmini_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmini.wu $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvmini.wu(<8 x i32> %va, i32 1) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvmini.du(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvmini_du(<4 x i64> %va) nounwind { -+; CHECK-LABEL: lasx_xvmini_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmini.du $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmini.du(<4 x i64> %va, i32 1) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mod.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mod.ll -new file mode 100644 -index 000000000000..a177246bb235 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mod.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvmod.b(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvmod_b(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmod_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmod.b $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvmod.b(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvmod.h(<16 x i16>, <16 x i16>) -+ -+define <16 x i16> @lasx_xvmod_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmod_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmod.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvmod.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvmod.w(<8 x i32>, <8 x i32>) -+ -+define <8 x i32> @lasx_xvmod_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmod_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmod.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvmod.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvmod.d(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvmod_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmod_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmod.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmod.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvmod.bu(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvmod_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmod_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmod.bu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvmod.bu(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvmod.hu(<16 x i16>, <16 x i16>) -+ -+define <16 x i16> @lasx_xvmod_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmod_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmod.hu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvmod.hu(<16 x i16> %va, <16 x i16> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvmod.wu(<8 x i32>, <8 x i32>) -+ -+define <8 x i32> @lasx_xvmod_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmod_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmod.wu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvmod.wu(<8 x i32> %va, <8 x i32> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvmod.du(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvmod_du(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmod_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmod.du $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmod.du(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mskgez.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mskgez.ll -new file mode 100644 -index 000000000000..da87c20ad6ee ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mskgez.ll -@@ -0,0 +1,14 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvmskgez.b(<32 x i8>) -+ -+define <32 x i8> @lasx_xvmskgez_b(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_xvmskgez_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmskgez.b $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvmskgez.b(<32 x i8> %va) -+ ret <32 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mskltz.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mskltz.ll -new file mode 100644 -index 000000000000..b2218487535c ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mskltz.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvmskltz.b(<32 x i8>) -+ -+define <32 x i8> @lasx_xvmskltz_b(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_xvmskltz_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmskltz.b $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvmskltz.b(<32 x i8> %va) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvmskltz.h(<16 x i16>) -+ -+define <16 x i16> @lasx_xvmskltz_h(<16 x i16> %va) nounwind { -+; CHECK-LABEL: lasx_xvmskltz_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmskltz.h $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvmskltz.h(<16 x i16> %va) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvmskltz.w(<8 x i32>) -+ -+define <8 x i32> @lasx_xvmskltz_w(<8 x i32> %va) nounwind { -+; CHECK-LABEL: lasx_xvmskltz_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmskltz.w $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvmskltz.w(<8 x i32> %va) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvmskltz.d(<4 x i64>) -+ -+define <4 x i64> @lasx_xvmskltz_d(<4 x i64> %va) nounwind { -+; CHECK-LABEL: lasx_xvmskltz_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmskltz.d $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmskltz.d(<4 x i64> %va) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-msknz.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-msknz.ll -new file mode 100644 -index 000000000000..becd2c883a7e ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-msknz.ll -@@ -0,0 +1,14 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvmsknz.b(<32 x i8>) -+ -+define <32 x i8> @lasx_xvmsknz_b(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_xvmsknz_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmsknz.b $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvmsknz.b(<32 x i8> %va) -+ ret <32 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-msub.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-msub.ll -new file mode 100644 -index 000000000000..c89f9578b77d ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-msub.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvmsub.b(<32 x i8>, <32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvmsub_b(<32 x i8> %va, <32 x i8> %vb, <32 x i8> %vc) nounwind { -+; CHECK-LABEL: lasx_xvmsub_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmsub.b $xr0, $xr1, $xr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvmsub.b(<32 x i8> %va, <32 x i8> %vb, <32 x i8> %vc) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvmsub.h(<16 x i16>, <16 x i16>, <16 x i16>) -+ -+define <16 x i16> @lasx_xvmsub_h(<16 x i16> %va, <16 x i16> %vb, <16 x i16> %vc) nounwind { -+; CHECK-LABEL: lasx_xvmsub_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmsub.h $xr0, $xr1, $xr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvmsub.h(<16 x i16> %va, <16 x i16> %vb, <16 x i16> %vc) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvmsub.w(<8 x i32>, <8 x i32>, <8 x i32>) -+ -+define <8 x i32> @lasx_xvmsub_w(<8 x i32> %va, <8 x i32> %vb, <8 x i32> %vc) nounwind { -+; CHECK-LABEL: lasx_xvmsub_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmsub.w $xr0, $xr1, $xr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvmsub.w(<8 x i32> %va, <8 x i32> %vb, <8 x i32> %vc) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvmsub.d(<4 x i64>, <4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvmsub_d(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) nounwind { -+; CHECK-LABEL: lasx_xvmsub_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmsub.d $xr0, $xr1, $xr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmsub.d(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-muh.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-muh.ll -new file mode 100644 -index 000000000000..97461512ce16 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-muh.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvmuh.b(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvmuh_b(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmuh_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmuh.b $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvmuh.b(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvmuh.h(<16 x i16>, <16 x i16>) -+ -+define <16 x i16> @lasx_xvmuh_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmuh_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmuh.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvmuh.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvmuh.w(<8 x i32>, <8 x i32>) -+ -+define <8 x i32> @lasx_xvmuh_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmuh_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmuh.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvmuh.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvmuh.d(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvmuh_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmuh_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmuh.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmuh.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvmuh.bu(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvmuh_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmuh_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmuh.bu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvmuh.bu(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvmuh.hu(<16 x i16>, <16 x i16>) -+ -+define <16 x i16> @lasx_xvmuh_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmuh_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmuh.hu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvmuh.hu(<16 x i16> %va, <16 x i16> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvmuh.wu(<8 x i32>, <8 x i32>) -+ -+define <8 x i32> @lasx_xvmuh_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmuh_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmuh.wu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvmuh.wu(<8 x i32> %va, <8 x i32> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvmuh.du(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvmuh_du(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmuh_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmuh.du $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmuh.du(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mul.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mul.ll -new file mode 100644 -index 000000000000..d5d852e58a9f ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mul.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvmul.b(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvmul_b(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmul_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmul.b $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvmul.b(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvmul.h(<16 x i16>, <16 x i16>) -+ -+define <16 x i16> @lasx_xvmul_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmul_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmul.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvmul.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvmul.w(<8 x i32>, <8 x i32>) -+ -+define <8 x i32> @lasx_xvmul_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmul_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmul.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvmul.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvmul.d(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvmul_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmul_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmul.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmul.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mulw.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mulw.ll -new file mode 100644 -index 000000000000..f69e64aa7698 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mulw.ll -@@ -0,0 +1,290 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.b(<32 x i8>, <32 x i8>) -+ -+define <16 x i16> @lasx_xvmulwev_h_b(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmulwev_h_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmulwev.h.b $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.b(<32 x i8> %va, <32 x i8> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.h(<16 x i16>, <16 x i16>) -+ -+define <8 x i32> @lasx_xvmulwev_w_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmulwev_w_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmulwev.w.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.w(<8 x i32>, <8 x i32>) -+ -+define <4 x i64> @lasx_xvmulwev_d_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmulwev_d_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmulwev.d.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.d(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvmulwev_q_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmulwev_q_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmulwev.q.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu(<32 x i8>, <32 x i8>) -+ -+define <16 x i16> @lasx_xvmulwev_h_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmulwev_h_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmulwev.h.bu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu(<32 x i8> %va, <32 x i8> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu(<16 x i16>, <16 x i16>) -+ -+define <8 x i32> @lasx_xvmulwev_w_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmulwev_w_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmulwev.w.hu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu(<16 x i16> %va, <16 x i16> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu(<8 x i32>, <8 x i32>) -+ -+define <4 x i64> @lasx_xvmulwev_d_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmulwev_d_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmulwev.d.wu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu(<8 x i32> %va, <8 x i32> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvmulwev_q_du(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmulwev_q_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmulwev.q.du $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu.b(<32 x i8>, <32 x i8>) -+ -+define <16 x i16> @lasx_xvmulwev_h_bu_b(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmulwev_h_bu_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmulwev.h.bu.b $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu.b(<32 x i8> %va, <32 x i8> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu.h(<16 x i16>, <16 x i16>) -+ -+define <8 x i32> @lasx_xvmulwev_w_hu_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmulwev_w_hu_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmulwev.w.hu.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu.w(<8 x i32>, <8 x i32>) -+ -+define <4 x i64> @lasx_xvmulwev_d_wu_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmulwev_d_wu_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmulwev.d.wu.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du.d(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvmulwev_q_du_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmulwev_q_du_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmulwev.q.du.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.b(<32 x i8>, <32 x i8>) -+ -+define <16 x i16> @lasx_xvmulwod_h_b(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmulwod_h_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmulwod.h.b $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.b(<32 x i8> %va, <32 x i8> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.h(<16 x i16>, <16 x i16>) -+ -+define <8 x i32> @lasx_xvmulwod_w_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmulwod_w_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmulwod.w.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.w(<8 x i32>, <8 x i32>) -+ -+define <4 x i64> @lasx_xvmulwod_d_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmulwod_d_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmulwod.d.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.d(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvmulwod_q_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmulwod_q_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmulwod.q.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu(<32 x i8>, <32 x i8>) -+ -+define <16 x i16> @lasx_xvmulwod_h_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmulwod_h_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmulwod.h.bu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu(<32 x i8> %va, <32 x i8> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu(<16 x i16>, <16 x i16>) -+ -+define <8 x i32> @lasx_xvmulwod_w_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmulwod_w_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmulwod.w.hu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu(<16 x i16> %va, <16 x i16> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu(<8 x i32>, <8 x i32>) -+ -+define <4 x i64> @lasx_xvmulwod_d_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmulwod_d_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmulwod.d.wu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu(<8 x i32> %va, <8 x i32> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvmulwod_q_du(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmulwod_q_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmulwod.q.du $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu.b(<32 x i8>, <32 x i8>) -+ -+define <16 x i16> @lasx_xvmulwod_h_bu_b(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmulwod_h_bu_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmulwod.h.bu.b $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu.b(<32 x i8> %va, <32 x i8> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu.h(<16 x i16>, <16 x i16>) -+ -+define <8 x i32> @lasx_xvmulwod_w_hu_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmulwod_w_hu_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmulwod.w.hu.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu.w(<8 x i32>, <8 x i32>) -+ -+define <4 x i64> @lasx_xvmulwod_d_wu_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmulwod_d_wu_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmulwod.d.wu.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du.d(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvmulwod_q_du_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvmulwod_q_du_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvmulwod.q.du.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-neg.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-neg.ll -new file mode 100644 -index 000000000000..ecbedf334657 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-neg.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvneg.b(<32 x i8>) -+ -+define <32 x i8> @lasx_xvneg_b(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_xvneg_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvneg.b $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvneg.b(<32 x i8> %va) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvneg.h(<16 x i16>) -+ -+define <16 x i16> @lasx_xvneg_h(<16 x i16> %va) nounwind { -+; CHECK-LABEL: lasx_xvneg_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvneg.h $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvneg.h(<16 x i16> %va) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvneg.w(<8 x i32>) -+ -+define <8 x i32> @lasx_xvneg_w(<8 x i32> %va) nounwind { -+; CHECK-LABEL: lasx_xvneg_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvneg.w $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvneg.w(<8 x i32> %va) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvneg.d(<4 x i64>) -+ -+define <4 x i64> @lasx_xvneg_d(<4 x i64> %va) nounwind { -+; CHECK-LABEL: lasx_xvneg_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvneg.d $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvneg.d(<4 x i64> %va) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-nor.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-nor.ll -new file mode 100644 -index 000000000000..674746b7624e ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-nor.ll -@@ -0,0 +1,14 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvnor.v(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvnor_v(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvnor_v: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvnor.v $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvnor.v(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-nori.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-nori.ll -new file mode 100644 -index 000000000000..55eebf87ee92 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-nori.ll -@@ -0,0 +1,14 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvnori.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvnori_b(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_xvnori_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvnori.b $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvnori.b(<32 x i8> %va, i32 1) -+ ret <32 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-or.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-or.ll -new file mode 100644 -index 000000000000..16462cfafc54 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-or.ll -@@ -0,0 +1,14 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvor.v(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvor_v(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvor_v: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvor.v(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ori.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ori.ll -new file mode 100644 -index 000000000000..8e53d88bac37 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ori.ll -@@ -0,0 +1,14 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvori.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvori_b(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_xvori_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvori.b $xr0, $xr0, 3 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvori.b(<32 x i8> %va, i32 3) -+ ret <32 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-orn.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-orn.ll -new file mode 100644 -index 000000000000..3a335cdd3716 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-orn.ll -@@ -0,0 +1,14 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvorn.v(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvorn_v(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvorn_v: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvorn.v $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvorn.v(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pack.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pack.ll -new file mode 100644 -index 000000000000..512b30234917 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pack.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvpackev.b(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvpackev_b(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvpackev_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvpackev.b $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvpackev.b(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvpackev.h(<16 x i16>, <16 x i16>) -+ -+define <16 x i16> @lasx_xvpackev_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvpackev_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvpackev.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvpackev.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvpackev.w(<8 x i32>, <8 x i32>) -+ -+define <8 x i32> @lasx_xvpackev_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvpackev_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvpackev.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvpackev.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvpackev.d(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvpackev_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvpackev_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvpackev.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvpackev.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvpackod.b(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvpackod_b(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvpackod_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvpackod.b $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvpackod.b(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvpackod.h(<16 x i16>, <16 x i16>) -+ -+define <16 x i16> @lasx_xvpackod_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvpackod_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvpackod.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvpackod.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvpackod.w(<8 x i32>, <8 x i32>) -+ -+define <8 x i32> @lasx_xvpackod_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvpackod_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvpackod.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvpackod.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvpackod.d(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvpackod_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvpackod_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvpackod.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvpackod.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pcnt.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pcnt.ll -new file mode 100644 -index 000000000000..d77f1d2082c8 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pcnt.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvpcnt.b(<32 x i8>) -+ -+define <32 x i8> @lasx_xvpcnt_b(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_xvpcnt_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvpcnt.b $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvpcnt.b(<32 x i8> %va) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvpcnt.h(<16 x i16>) -+ -+define <16 x i16> @lasx_xvpcnt_h(<16 x i16> %va) nounwind { -+; CHECK-LABEL: lasx_xvpcnt_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvpcnt.h $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvpcnt.h(<16 x i16> %va) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvpcnt.w(<8 x i32>) -+ -+define <8 x i32> @lasx_xvpcnt_w(<8 x i32> %va) nounwind { -+; CHECK-LABEL: lasx_xvpcnt_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvpcnt.w $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvpcnt.w(<8 x i32> %va) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvpcnt.d(<4 x i64>) -+ -+define <4 x i64> @lasx_xvpcnt_d(<4 x i64> %va) nounwind { -+; CHECK-LABEL: lasx_xvpcnt_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvpcnt.d $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvpcnt.d(<4 x i64> %va) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-perm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-perm.ll -new file mode 100644 -index 000000000000..4ec434edd4ec ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-perm.ll -@@ -0,0 +1,14 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvperm.w(<8 x i32>, <8 x i32>) -+ -+define <8 x i32> @lasx_xvperm_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvperm_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvperm.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvperm.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <8 x i32> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-permi.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-permi.ll -new file mode 100644 -index 000000000000..0d9f9daabc44 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-permi.ll -@@ -0,0 +1,38 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvpermi.w(<8 x i32>, <8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvpermi_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvpermi_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvpermi.w $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvpermi.w(<8 x i32> %va, <8 x i32> %vb, i32 1) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvpermi.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvpermi_d(<4 x i64> %va) nounwind { -+; CHECK-LABEL: lasx_xvpermi_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvpermi.d $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvpermi.d(<4 x i64> %va, i32 1) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvpermi.q(<32 x i8>, <32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvpermi_q(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvpermi_q: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvpermi.q(<32 x i8> %va, <32 x i8> %vb, i32 1) -+ ret <32 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pick.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pick.ll -new file mode 100644 -index 000000000000..bbd6d693ca0b ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pick.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvpickev.b(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvpickev_b(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvpickev_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvpickev.b $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvpickev.b(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvpickev.h(<16 x i16>, <16 x i16>) -+ -+define <16 x i16> @lasx_xvpickev_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvpickev_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvpickev.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvpickev.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvpickev.w(<8 x i32>, <8 x i32>) -+ -+define <8 x i32> @lasx_xvpickev_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvpickev_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvpickev.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvpickev.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvpickev.d(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvpickev_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvpickev_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvpickev.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvpickev.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvpickod.b(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvpickod_b(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvpickod_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvpickod.b $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvpickod.b(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvpickod.h(<16 x i16>, <16 x i16>) -+ -+define <16 x i16> @lasx_xvpickod_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvpickod_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvpickod.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvpickod.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvpickod.w(<8 x i32>, <8 x i32>) -+ -+define <8 x i32> @lasx_xvpickod_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvpickod_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvpickod.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvpickod.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvpickod.d(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvpickod_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvpickod_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvpickod.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvpickod.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve.ll -new file mode 100644 -index 000000000000..546777bc72ab ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvpickve.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvpickve_w(<8 x i32> %va) nounwind { -+; CHECK-LABEL: lasx_xvpickve_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvpickve.w $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvpickve.w(<8 x i32> %va, i32 1) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvpickve.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvpickve_d(<4 x i64> %va) nounwind { -+; CHECK-LABEL: lasx_xvpickve_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvpickve.d $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvpickve.d(<4 x i64> %va, i32 1) -+ ret <4 x i64> %res -+} -+ -+declare <8 x float> @llvm.loongarch.lasx.xvpickve.w.f(<8 x float>, i32) -+ -+define <8 x float> @lasx_xvpickve_w_f(<8 x float> %va) nounwind { -+; CHECK-LABEL: lasx_xvpickve_w_f: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvpickve.w $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x float> @llvm.loongarch.lasx.xvpickve.w.f(<8 x float> %va, i32 1) -+ ret <8 x float> %res -+} -+ -+declare <4 x double> @llvm.loongarch.lasx.xvpickve.d.f(<4 x double>, i32) -+ -+define <4 x double> @lasx_xvpickve_d_f(<4 x double> %va) nounwind { -+; CHECK-LABEL: lasx_xvpickve_d_f: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvpickve.d $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x double> @llvm.loongarch.lasx.xvpickve.d.f(<4 x double> %va, i32 1) -+ ret <4 x double> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve2gr.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve2gr.ll -new file mode 100644 -index 000000000000..0617e7424321 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve2gr.ll -@@ -0,0 +1,53 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+ -+ -+ -+declare i32 @llvm.loongarch.lasx.xvpickve2gr.w(<8 x i32>, i32) -+ -+define i32 @lasx_xvpickve2gr_w(<8 x i32> %va) nounwind { -+; CHECK-LABEL: lasx_xvpickve2gr_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call i32 @llvm.loongarch.lasx.xvpickve2gr.w(<8 x i32> %va, i32 1) -+ ret i32 %res -+} -+ -+declare i64 @llvm.loongarch.lasx.xvpickve2gr.d(<4 x i64>, i32) -+ -+define i64 @lasx_xvpickve2gr_d(<4 x i64> %va) nounwind { -+; CHECK-LABEL: lasx_xvpickve2gr_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call i64 @llvm.loongarch.lasx.xvpickve2gr.d(<4 x i64> %va, i32 1) -+ ret i64 %res -+} -+ -+declare i32 @llvm.loongarch.lasx.xvpickve2gr.wu(<8 x i32>, i32) -+ -+define i32 @lasx_xvpickve2gr_wu(<8 x i32> %va) nounwind { -+; CHECK-LABEL: lasx_xvpickve2gr_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvpickve2gr.wu $a0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call i32 @llvm.loongarch.lasx.xvpickve2gr.wu(<8 x i32> %va, i32 1) -+ ret i32 %res -+} -+ -+declare i64 @llvm.loongarch.lasx.xvpickve2gr.du(<4 x i64>, i32) -+ -+define i64 @lasx_xvpickve2gr_du(<4 x i64> %va) nounwind { -+; CHECK-LABEL: lasx_xvpickve2gr_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvpickve2gr.du $a0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call i64 @llvm.loongarch.lasx.xvpickve2gr.du(<4 x i64> %va, i32 1) -+ ret i64 %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl128vei.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl128vei.ll -new file mode 100644 -index 000000000000..25fab44f461f ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl128vei.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvrepl128vei.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvrepl128vei_b(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_xvrepl128vei_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvrepl128vei.b $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvrepl128vei.b(<32 x i8> %va, i32 1) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvrepl128vei.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvrepl128vei_h(<16 x i16> %va) nounwind { -+; CHECK-LABEL: lasx_xvrepl128vei_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvrepl128vei.h $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvrepl128vei.h(<16 x i16> %va, i32 1) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvrepl128vei.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvrepl128vei_w(<8 x i32> %va) nounwind { -+; CHECK-LABEL: lasx_xvrepl128vei_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvrepl128vei.w $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvrepl128vei.w(<8 x i32> %va, i32 1) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvrepl128vei.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvrepl128vei_d(<4 x i64> %va) nounwind { -+; CHECK-LABEL: lasx_xvrepl128vei_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvrepl128vei.d $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvrepl128vei.d(<4 x i64> %va, i32 1) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-replgr2vr.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-replgr2vr.ll -new file mode 100644 -index 000000000000..c71abd2205c6 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-replgr2vr.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvreplgr2vr.b(i32) -+ -+define <32 x i8> @lasx_xvreplgr2vr_b(i32 %a) nounwind { -+; CHECK-LABEL: lasx_xvreplgr2vr_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvreplgr2vr.b $xr0, $a0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvreplgr2vr.b(i32 %a) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvreplgr2vr.h(i32) -+ -+define <16 x i16> @lasx_xvreplgr2vr_h(i32 %a) nounwind { -+; CHECK-LABEL: lasx_xvreplgr2vr_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvreplgr2vr.h $xr0, $a0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvreplgr2vr.h(i32 %a) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvreplgr2vr.w(i32) -+ -+define <8 x i32> @lasx_xvreplgr2vr_w(i32 %a) nounwind { -+; CHECK-LABEL: lasx_xvreplgr2vr_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvreplgr2vr.w $xr0, $a0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvreplgr2vr.w(i32 %a) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvreplgr2vr.d(i64) -+ -+define <4 x i64> @lasx_xvreplgr2vr_d(i64 %a) nounwind { -+; CHECK-LABEL: lasx_xvreplgr2vr_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvreplgr2vr.d $xr0, $a0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvreplgr2vr.d(i64 %a) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-replve.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-replve.ll -new file mode 100644 -index 000000000000..21d36ff7bb5e ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-replve.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvreplve.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvreplve_b(<32 x i8> %va, i32 %b) nounwind { -+; CHECK-LABEL: lasx_xvreplve_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvreplve.b $xr0, $xr0, $a0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvreplve.b(<32 x i8> %va, i32 %b) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvreplve.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvreplve_h(<16 x i16> %va, i32 %b) nounwind { -+; CHECK-LABEL: lasx_xvreplve_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvreplve.h $xr0, $xr0, $a0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvreplve.h(<16 x i16> %va, i32 %b) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvreplve.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvreplve_w(<8 x i32> %va, i32 %b) nounwind { -+; CHECK-LABEL: lasx_xvreplve_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvreplve.w $xr0, $xr0, $a0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvreplve.w(<8 x i32> %va, i32 %b) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvreplve.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvreplve_d(<4 x i64> %va, i32 %b) nounwind { -+; CHECK-LABEL: lasx_xvreplve_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvreplve.d $xr0, $xr0, $a0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvreplve.d(<4 x i64> %va, i32 %b) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-replve0.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-replve0.ll -new file mode 100644 -index 000000000000..7996bb36ef03 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-replve0.ll -@@ -0,0 +1,62 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvreplve0.b(<32 x i8>) -+ -+define <32 x i8> @lasx_xvreplve0_b(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_xvreplve0_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvreplve0.b $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvreplve0.b(<32 x i8> %va) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvreplve0.h(<16 x i16>) -+ -+define <16 x i16> @lasx_xvreplve0_h(<16 x i16> %va) nounwind { -+; CHECK-LABEL: lasx_xvreplve0_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvreplve0.h $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvreplve0.h(<16 x i16> %va) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvreplve0.w(<8 x i32>) -+ -+define <8 x i32> @lasx_xvreplve0_w(<8 x i32> %va) nounwind { -+; CHECK-LABEL: lasx_xvreplve0_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvreplve0.w $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvreplve0.w(<8 x i32> %va) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvreplve0.d(<4 x i64>) -+ -+define <4 x i64> @lasx_xvreplve0_d(<4 x i64> %va) nounwind { -+; CHECK-LABEL: lasx_xvreplve0_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvreplve0.d $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvreplve0.d(<4 x i64> %va) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvreplve0.q(<32 x i8>) -+ -+define <32 x i8> @lasx_xvreplve0_q(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_xvreplve0_q: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvreplve0.q $xr0, $xr0 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvreplve0.q(<32 x i8> %va) -+ ret <32 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-rotr.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-rotr.ll -new file mode 100644 -index 000000000000..64d2773864e9 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-rotr.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvrotr.b(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvrotr_b(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvrotr_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvrotr.b $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvrotr.b(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvrotr.h(<16 x i16>, <16 x i16>) -+ -+define <16 x i16> @lasx_xvrotr_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvrotr_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvrotr.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvrotr.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvrotr.w(<8 x i32>, <8 x i32>) -+ -+define <8 x i32> @lasx_xvrotr_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvrotr_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvrotr.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvrotr.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvrotr.d(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvrotr_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvrotr_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvrotr.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvrotr.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvrotri.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvrotri_b(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_xvrotri_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvrotri.b $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvrotri.b(<32 x i8> %va, i32 1) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvrotri.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvrotri_h(<16 x i16> %va) nounwind { -+; CHECK-LABEL: lasx_xvrotri_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvrotri.h $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvrotri.h(<16 x i16> %va, i32 1) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvrotri.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvrotri_w(<8 x i32> %va) nounwind { -+; CHECK-LABEL: lasx_xvrotri_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvrotri.w $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvrotri.w(<8 x i32> %va, i32 1) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvrotri.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvrotri_d(<4 x i64> %va) nounwind { -+; CHECK-LABEL: lasx_xvrotri_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvrotri.d $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvrotri.d(<4 x i64> %va, i32 1) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sadd.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sadd.ll -new file mode 100644 -index 000000000000..54a5e2e9c833 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sadd.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvsadd.b(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvsadd_b(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsadd_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsadd.b $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsadd.b(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsadd.h(<16 x i16>, <16 x i16>) -+ -+define <16 x i16> @lasx_xvsadd_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsadd_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsadd.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsadd.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsadd.w(<8 x i32>, <8 x i32>) -+ -+define <8 x i32> @lasx_xvsadd_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsadd_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsadd.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsadd.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsadd.d(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvsadd_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsadd_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsadd.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsadd.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvsadd.bu(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvsadd_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsadd_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsadd.bu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsadd.bu(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsadd.hu(<16 x i16>, <16 x i16>) -+ -+define <16 x i16> @lasx_xvsadd_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsadd_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsadd.hu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsadd.hu(<16 x i16> %va, <16 x i16> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsadd.wu(<8 x i32>, <8 x i32>) -+ -+define <8 x i32> @lasx_xvsadd_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsadd_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsadd.wu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsadd.wu(<8 x i32> %va, <8 x i32> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsadd.du(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvsadd_du(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsadd_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsadd.du $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsadd.du(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sat.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sat.ll -new file mode 100644 -index 000000000000..293b9dc9eb4d ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sat.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvsat.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvsat_b(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_xvsat_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsat.b $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsat.b(<32 x i8> %va, i32 1) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsat.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvsat_h(<16 x i16> %va) nounwind { -+; CHECK-LABEL: lasx_xvsat_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsat.h $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsat.h(<16 x i16> %va, i32 1) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsat.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvsat_w(<8 x i32> %va) nounwind { -+; CHECK-LABEL: lasx_xvsat_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsat.w $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsat.w(<8 x i32> %va, i32 1) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsat.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvsat_d(<4 x i64> %va) nounwind { -+; CHECK-LABEL: lasx_xvsat_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsat.d $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsat.d(<4 x i64> %va, i32 1) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvsat.bu(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvsat_bu(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_xvsat_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsat.bu $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsat.bu(<32 x i8> %va, i32 1) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsat.hu(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvsat_hu(<16 x i16> %va) nounwind { -+; CHECK-LABEL: lasx_xvsat_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsat.hu $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsat.hu(<16 x i16> %va, i32 1) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsat.wu(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvsat_wu(<8 x i32> %va) nounwind { -+; CHECK-LABEL: lasx_xvsat_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsat.wu $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsat.wu(<8 x i32> %va, i32 1) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsat.du(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvsat_du(<4 x i64> %va) nounwind { -+; CHECK-LABEL: lasx_xvsat_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsat.du $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsat.du(<4 x i64> %va, i32 1) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-seq.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-seq.ll -new file mode 100644 -index 000000000000..83bc93c88c73 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-seq.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvseq.b(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvseq_b(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvseq_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvseq.b $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvseq.b(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvseq.h(<16 x i16>, <16 x i16>) -+ -+define <16 x i16> @lasx_xvseq_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvseq_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvseq.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvseq.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvseq.w(<8 x i32>, <8 x i32>) -+ -+define <8 x i32> @lasx_xvseq_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvseq_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvseq.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvseq.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvseq.d(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvseq_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvseq_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvseq.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvseq.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvseqi.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvseqi_b(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_xvseqi_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvseqi.b $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvseqi.b(<32 x i8> %va, i32 1) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvseqi.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvseqi_h(<16 x i16> %va) nounwind { -+; CHECK-LABEL: lasx_xvseqi_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvseqi.h $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvseqi.h(<16 x i16> %va, i32 1) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvseqi.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvseqi_w(<8 x i32> %va) nounwind { -+; CHECK-LABEL: lasx_xvseqi_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvseqi.w $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvseqi.w(<8 x i32> %va, i32 1) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvseqi.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvseqi_d(<4 x i64> %va) nounwind { -+; CHECK-LABEL: lasx_xvseqi_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvseqi.d $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvseqi.d(<4 x i64> %va, i32 1) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-set.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-set.ll -new file mode 100644 -index 000000000000..6e3e2e0330f5 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-set.ll -@@ -0,0 +1,38 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare i32 @llvm.loongarch.lasx.xbz.v(<32 x i8>) -+ -+define i32 @lasx_xbz_v(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_xbz_v: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvseteqz.v $fcc0, $xr0 -+; CHECK-NEXT: bcnez $fcc0, .LBB0_2 -+; CHECK-NEXT: # %bb.1: # %entry -+; CHECK-NEXT: addi.w $a0, $zero, 0 -+; CHECK-NEXT: ret -+; CHECK-NEXT: .LBB0_2: # %entry -+; CHECK-NEXT: addi.w $a0, $zero, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call i32 @llvm.loongarch.lasx.xbz.v(<32 x i8> %va) -+ ret i32 %res -+} -+ -+declare i32 @llvm.loongarch.lasx.xbnz.v(<32 x i8>) -+ -+define i32 @lasx_xbnz_v(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_xbnz_v: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsetnez.v $fcc0, $xr0 -+; CHECK-NEXT: bcnez $fcc0, .LBB1_2 -+; CHECK-NEXT: # %bb.1: # %entry -+; CHECK-NEXT: addi.w $a0, $zero, 0 -+; CHECK-NEXT: ret -+; CHECK-NEXT: .LBB1_2: # %entry -+; CHECK-NEXT: addi.w $a0, $zero, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call i32 @llvm.loongarch.lasx.xbnz.v(<32 x i8> %va) -+ ret i32 %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-setallnez.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-setallnez.ll -new file mode 100644 -index 000000000000..a466b78bf8d2 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-setallnez.ll -@@ -0,0 +1,74 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare i32 @llvm.loongarch.lasx.xbnz.b(<32 x i8>) -+ -+define i32 @lasx_xbnz_b(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_xbnz_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsetallnez.b $fcc0, $xr0 -+; CHECK-NEXT: bcnez $fcc0, .LBB0_2 -+; CHECK-NEXT: # %bb.1: # %entry -+; CHECK-NEXT: addi.w $a0, $zero, 0 -+; CHECK-NEXT: ret -+; CHECK-NEXT: .LBB0_2: # %entry -+; CHECK-NEXT: addi.w $a0, $zero, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call i32 @llvm.loongarch.lasx.xbnz.b(<32 x i8> %va) -+ ret i32 %res -+} -+ -+declare i32 @llvm.loongarch.lasx.xbnz.h(<16 x i16>) -+ -+define i32 @lasx_xbnz_h(<16 x i16> %va) nounwind { -+; CHECK-LABEL: lasx_xbnz_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsetallnez.h $fcc0, $xr0 -+; CHECK-NEXT: bcnez $fcc0, .LBB1_2 -+; CHECK-NEXT: # %bb.1: # %entry -+; CHECK-NEXT: addi.w $a0, $zero, 0 -+; CHECK-NEXT: ret -+; CHECK-NEXT: .LBB1_2: # %entry -+; CHECK-NEXT: addi.w $a0, $zero, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call i32 @llvm.loongarch.lasx.xbnz.h(<16 x i16> %va) -+ ret i32 %res -+} -+ -+declare i32 @llvm.loongarch.lasx.xbnz.w(<8 x i32>) -+ -+define i32 @lasx_xbnz_w(<8 x i32> %va) nounwind { -+; CHECK-LABEL: lasx_xbnz_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsetallnez.w $fcc0, $xr0 -+; CHECK-NEXT: bcnez $fcc0, .LBB2_2 -+; CHECK-NEXT: # %bb.1: # %entry -+; CHECK-NEXT: addi.w $a0, $zero, 0 -+; CHECK-NEXT: ret -+; CHECK-NEXT: .LBB2_2: # %entry -+; CHECK-NEXT: addi.w $a0, $zero, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call i32 @llvm.loongarch.lasx.xbnz.w(<8 x i32> %va) -+ ret i32 %res -+} -+ -+declare i32 @llvm.loongarch.lasx.xbnz.d(<4 x i64>) -+ -+define i32 @lasx_xbnz_d(<4 x i64> %va) nounwind { -+; CHECK-LABEL: lasx_xbnz_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsetallnez.d $fcc0, $xr0 -+; CHECK-NEXT: bcnez $fcc0, .LBB3_2 -+; CHECK-NEXT: # %bb.1: # %entry -+; CHECK-NEXT: addi.w $a0, $zero, 0 -+; CHECK-NEXT: ret -+; CHECK-NEXT: .LBB3_2: # %entry -+; CHECK-NEXT: addi.w $a0, $zero, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call i32 @llvm.loongarch.lasx.xbnz.d(<4 x i64> %va) -+ ret i32 %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-setanyeqz.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-setanyeqz.ll -new file mode 100644 -index 000000000000..36e65fc5b328 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-setanyeqz.ll -@@ -0,0 +1,74 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare i32 @llvm.loongarch.lasx.xbz.b(<32 x i8>) -+ -+define i32 @lasx_xbz_b(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_xbz_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsetanyeqz.b $fcc0, $xr0 -+; CHECK-NEXT: bcnez $fcc0, .LBB0_2 -+; CHECK-NEXT: # %bb.1: # %entry -+; CHECK-NEXT: addi.w $a0, $zero, 0 -+; CHECK-NEXT: ret -+; CHECK-NEXT: .LBB0_2: # %entry -+; CHECK-NEXT: addi.w $a0, $zero, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call i32 @llvm.loongarch.lasx.xbz.b(<32 x i8> %va) -+ ret i32 %res -+} -+ -+declare i32 @llvm.loongarch.lasx.xbz.h(<16 x i16>) -+ -+define i32 @lasx_xbz_h(<16 x i16> %va) nounwind { -+; CHECK-LABEL: lasx_xbz_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsetanyeqz.h $fcc0, $xr0 -+; CHECK-NEXT: bcnez $fcc0, .LBB1_2 -+; CHECK-NEXT: # %bb.1: # %entry -+; CHECK-NEXT: addi.w $a0, $zero, 0 -+; CHECK-NEXT: ret -+; CHECK-NEXT: .LBB1_2: # %entry -+; CHECK-NEXT: addi.w $a0, $zero, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call i32 @llvm.loongarch.lasx.xbz.h(<16 x i16> %va) -+ ret i32 %res -+} -+ -+declare i32 @llvm.loongarch.lasx.xbz.w(<8 x i32>) -+ -+define i32 @lasx_xbz_w(<8 x i32> %va) nounwind { -+; CHECK-LABEL: lasx_xbz_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsetanyeqz.w $fcc0, $xr0 -+; CHECK-NEXT: bcnez $fcc0, .LBB2_2 -+; CHECK-NEXT: # %bb.1: # %entry -+; CHECK-NEXT: addi.w $a0, $zero, 0 -+; CHECK-NEXT: ret -+; CHECK-NEXT: .LBB2_2: # %entry -+; CHECK-NEXT: addi.w $a0, $zero, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call i32 @llvm.loongarch.lasx.xbz.w(<8 x i32> %va) -+ ret i32 %res -+} -+ -+declare i32 @llvm.loongarch.lasx.xbz.d(<4 x i64>) -+ -+define i32 @lasx_xbz_d(<4 x i64> %va) nounwind { -+; CHECK-LABEL: lasx_xbz_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsetanyeqz.d $fcc0, $xr0 -+; CHECK-NEXT: bcnez $fcc0, .LBB3_2 -+; CHECK-NEXT: # %bb.1: # %entry -+; CHECK-NEXT: addi.w $a0, $zero, 0 -+; CHECK-NEXT: ret -+; CHECK-NEXT: .LBB3_2: # %entry -+; CHECK-NEXT: addi.w $a0, $zero, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call i32 @llvm.loongarch.lasx.xbz.d(<4 x i64> %va) -+ ret i32 %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf.ll -new file mode 100644 -index 000000000000..9b9140f6ad62 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvshuf.b(<32 x i8>, <32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvshuf_b(<32 x i8> %va, <32 x i8> %vb, <32 x i8> %vc) nounwind { -+; CHECK-LABEL: lasx_xvshuf_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvshuf.b $xr0, $xr0, $xr1, $xr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvshuf.b(<32 x i8> %va, <32 x i8> %vb, <32 x i8> %vc) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvshuf.h(<16 x i16>, <16 x i16>, <16 x i16>) -+ -+define <16 x i16> @lasx_xvshuf_h(<16 x i16> %va, <16 x i16> %vb, <16 x i16> %vc) nounwind { -+; CHECK-LABEL: lasx_xvshuf_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvshuf.h $xr0, $xr1, $xr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvshuf.h(<16 x i16> %va, <16 x i16> %vb, <16 x i16> %vc) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvshuf.w(<8 x i32>, <8 x i32>, <8 x i32>) -+ -+define <8 x i32> @lasx_xvshuf_w(<8 x i32> %va, <8 x i32> %vb, <8 x i32> %vc) nounwind { -+; CHECK-LABEL: lasx_xvshuf_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvshuf.w $xr0, $xr1, $xr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvshuf.w(<8 x i32> %va, <8 x i32> %vb, <8 x i32> %vc) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvshuf.d(<4 x i64>, <4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvshuf_d(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) nounwind { -+; CHECK-LABEL: lasx_xvshuf_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvshuf.d $xr0, $xr1, $xr2 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvshuf.d(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf4i.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf4i.ll -new file mode 100644 -index 000000000000..31205086759c ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf4i.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvshuf4i.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvshuf4i_b(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_xvshuf4i_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvshuf4i.b $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvshuf4i.b(<32 x i8> %va, i32 1) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvshuf4i.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvshuf4i_h(<16 x i16> %va) nounwind { -+; CHECK-LABEL: lasx_xvshuf4i_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvshuf4i.h $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvshuf4i.h(<16 x i16> %va, i32 1) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvshuf4i.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvshuf4i_w(<8 x i32> %va) nounwind { -+; CHECK-LABEL: lasx_xvshuf4i_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvshuf4i.w $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvshuf4i.w(<8 x i32> %va, i32 1) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvshuf4i.d(<4 x i64>, <4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvshuf4i_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvshuf4i_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvshuf4i.d $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvshuf4i.d(<4 x i64> %va, <4 x i64> %vb, i32 1) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-signcov.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-signcov.ll -new file mode 100644 -index 000000000000..e6c6d8ccd0d3 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-signcov.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvsigncov.b(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvsigncov_b(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsigncov_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsigncov.b $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsigncov.b(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsigncov.h(<16 x i16>, <16 x i16>) -+ -+define <16 x i16> @lasx_xvsigncov_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsigncov_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsigncov.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsigncov.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsigncov.w(<8 x i32>, <8 x i32>) -+ -+define <8 x i32> @lasx_xvsigncov_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsigncov_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsigncov.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsigncov.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsigncov.d(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvsigncov_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsigncov_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsigncov.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsigncov.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sle.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sle.ll -new file mode 100644 -index 000000000000..8895efc84b84 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sle.ll -@@ -0,0 +1,194 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvsle.b(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvsle_b(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsle_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsle.b $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsle.b(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsle.h(<16 x i16>, <16 x i16>) -+ -+define <16 x i16> @lasx_xvsle_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsle_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsle.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsle.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsle.w(<8 x i32>, <8 x i32>) -+ -+define <8 x i32> @lasx_xvsle_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsle_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsle.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsle.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsle.d(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvsle_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsle_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsle.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsle.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvslei.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvslei_b(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_xvslei_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvslei.b $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvslei.b(<32 x i8> %va, i32 1) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvslei.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvslei_h(<16 x i16> %va) nounwind { -+; CHECK-LABEL: lasx_xvslei_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvslei.h $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvslei.h(<16 x i16> %va, i32 1) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvslei.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvslei_w(<8 x i32> %va) nounwind { -+; CHECK-LABEL: lasx_xvslei_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvslei.w $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvslei.w(<8 x i32> %va, i32 1) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvslei.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvslei_d(<4 x i64> %va) nounwind { -+; CHECK-LABEL: lasx_xvslei_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvslei.d $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvslei.d(<4 x i64> %va, i32 1) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvsle.bu(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvsle_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsle_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsle.bu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsle.bu(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsle.hu(<16 x i16>, <16 x i16>) -+ -+define <16 x i16> @lasx_xvsle_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsle_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsle.hu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsle.hu(<16 x i16> %va, <16 x i16> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsle.wu(<8 x i32>, <8 x i32>) -+ -+define <8 x i32> @lasx_xvsle_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsle_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsle.wu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsle.wu(<8 x i32> %va, <8 x i32> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsle.du(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvsle_du(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsle_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsle.du $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsle.du(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvslei.bu(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvslei_bu(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_xvslei_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvslei.bu $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvslei.bu(<32 x i8> %va, i32 1) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvslei.hu(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvslei_hu(<16 x i16> %va) nounwind { -+; CHECK-LABEL: lasx_xvslei_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvslei.hu $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvslei.hu(<16 x i16> %va, i32 1) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvslei.wu(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvslei_wu(<8 x i32> %va) nounwind { -+; CHECK-LABEL: lasx_xvslei_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvslei.wu $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvslei.wu(<8 x i32> %va, i32 1) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvslei.du(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvslei_du(<4 x i64> %va) nounwind { -+; CHECK-LABEL: lasx_xvslei_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvslei.du $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvslei.du(<4 x i64> %va, i32 1) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sll.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sll.ll -new file mode 100644 -index 000000000000..14110b613dbe ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sll.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvsll.b(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvsll_b(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsll_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsll.b $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsll.b(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsll.h(<16 x i16>, <16 x i16>) -+ -+define <16 x i16> @lasx_xvsll_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsll_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsll.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsll.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsll.w(<8 x i32>, <8 x i32>) -+ -+define <8 x i32> @lasx_xvsll_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsll_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsll.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsll.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsll.d(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvsll_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsll_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsll.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsll.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvslli.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvslli_b(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_xvslli_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvslli.b $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvslli.b(<32 x i8> %va, i32 1) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvslli.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvslli_h(<16 x i16> %va) nounwind { -+; CHECK-LABEL: lasx_xvslli_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvslli.h $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvslli.h(<16 x i16> %va, i32 1) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvslli.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvslli_w(<8 x i32> %va) nounwind { -+; CHECK-LABEL: lasx_xvslli_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvslli.w $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvslli.w(<8 x i32> %va, i32 1) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvslli.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvslli_d(<4 x i64> %va) nounwind { -+; CHECK-LABEL: lasx_xvslli_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvslli.d $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvslli.d(<4 x i64> %va, i32 1) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sllwil.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sllwil.ll -new file mode 100644 -index 000000000000..a72b8a6cbb4f ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sllwil.ll -@@ -0,0 +1,74 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsllwil.h.b(<32 x i8>, i32) -+ -+define <16 x i16> @lasx_xvsllwil_h_b(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_xvsllwil_h_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsllwil.h.b $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsllwil.h.b(<32 x i8> %va, i32 1) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsllwil.w.h(<16 x i16>, i32) -+ -+define <8 x i32> @lasx_xvsllwil_w_h(<16 x i16> %va) nounwind { -+; CHECK-LABEL: lasx_xvsllwil_w_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsllwil.w.h $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsllwil.w.h(<16 x i16> %va, i32 1) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsllwil.d.w(<8 x i32>, i32) -+ -+define <4 x i64> @lasx_xvsllwil_d_w(<8 x i32> %va) nounwind { -+; CHECK-LABEL: lasx_xvsllwil_d_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsllwil.d.w $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsllwil.d.w(<8 x i32> %va, i32 1) -+ ret <4 x i64> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsllwil.hu.bu(<32 x i8>, i32) -+ -+define <16 x i16> @lasx_xvsllwil_hu_bu(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_xvsllwil_hu_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsllwil.hu.bu $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsllwil.hu.bu(<32 x i8> %va, i32 1) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsllwil.wu.hu(<16 x i16>, i32) -+ -+define <8 x i32> @lasx_xvsllwil_wu_hu(<16 x i16> %va) nounwind { -+; CHECK-LABEL: lasx_xvsllwil_wu_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsllwil.wu.hu $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsllwil.wu.hu(<16 x i16> %va, i32 1) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsllwil.du.wu(<8 x i32>, i32) -+ -+define <4 x i64> @lasx_xvsllwil_du_wu(<8 x i32> %va) nounwind { -+; CHECK-LABEL: lasx_xvsllwil_du_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsllwil.du.wu $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsllwil.du.wu(<8 x i32> %va, i32 1) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-slt.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-slt.ll -new file mode 100644 -index 000000000000..3ea87adff110 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-slt.ll -@@ -0,0 +1,194 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvslt.b(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvslt_b(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvslt_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvslt.b $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvslt.b(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvslt.h(<16 x i16>, <16 x i16>) -+ -+define <16 x i16> @lasx_xvslt_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvslt_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvslt.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvslt.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvslt.w(<8 x i32>, <8 x i32>) -+ -+define <8 x i32> @lasx_xvslt_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvslt_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvslt.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvslt.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvslt.d(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvslt_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvslt_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvslt.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvslt.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvslti.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvslti_b(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_xvslti_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvslti.b $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvslti.b(<32 x i8> %va, i32 1) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvslti.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvslti_h(<16 x i16> %va) nounwind { -+; CHECK-LABEL: lasx_xvslti_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvslti.h $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvslti.h(<16 x i16> %va, i32 1) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvslti.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvslti_w(<8 x i32> %va) nounwind { -+; CHECK-LABEL: lasx_xvslti_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvslti.w $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvslti.w(<8 x i32> %va, i32 1) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvslti.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvslti_d(<4 x i64> %va) nounwind { -+; CHECK-LABEL: lasx_xvslti_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvslti.d $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvslti.d(<4 x i64> %va, i32 1) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvslt.bu(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvslt_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvslt_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvslt.bu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvslt.bu(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvslt.hu(<16 x i16>, <16 x i16>) -+ -+define <16 x i16> @lasx_xvslt_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvslt_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvslt.hu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvslt.hu(<16 x i16> %va, <16 x i16> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvslt.wu(<8 x i32>, <8 x i32>) -+ -+define <8 x i32> @lasx_xvslt_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvslt_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvslt.wu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvslt.wu(<8 x i32> %va, <8 x i32> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvslt.du(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvslt_du(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvslt_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvslt.du $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvslt.du(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvslti.bu(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvslti_bu(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_xvslti_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvslti.bu $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvslti.bu(<32 x i8> %va, i32 1) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvslti.hu(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvslti_hu(<16 x i16> %va) nounwind { -+; CHECK-LABEL: lasx_xvslti_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvslti.hu $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvslti.hu(<16 x i16> %va, i32 1) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvslti.wu(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvslti_wu(<8 x i32> %va) nounwind { -+; CHECK-LABEL: lasx_xvslti_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvslti.wu $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvslti.wu(<8 x i32> %va, i32 1) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvslti.du(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvslti_du(<4 x i64> %va) nounwind { -+; CHECK-LABEL: lasx_xvslti_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvslti.du $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvslti.du(<4 x i64> %va, i32 1) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sra.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sra.ll -new file mode 100644 -index 000000000000..a7498682559b ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sra.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvsra.b(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvsra_b(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsra_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsra.b $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsra.b(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsra.h(<16 x i16>, <16 x i16>) -+ -+define <16 x i16> @lasx_xvsra_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsra_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsra.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsra.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsra.w(<8 x i32>, <8 x i32>) -+ -+define <8 x i32> @lasx_xvsra_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsra_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsra.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsra.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsra.d(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvsra_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsra_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsra.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsra.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvsrai.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvsrai_b(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_xvsrai_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrai.b $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrai.b(<32 x i8> %va, i32 1) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsrai.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvsrai_h(<16 x i16> %va) nounwind { -+; CHECK-LABEL: lasx_xvsrai_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrai.h $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrai.h(<16 x i16> %va, i32 1) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsrai.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvsrai_w(<8 x i32> %va) nounwind { -+; CHECK-LABEL: lasx_xvsrai_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrai.w $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrai.w(<8 x i32> %va, i32 1) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsrai.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvsrai_d(<4 x i64> %va) nounwind { -+; CHECK-LABEL: lasx_xvsrai_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrai.d $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrai.d(<4 x i64> %va, i32 1) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sran.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sran.ll -new file mode 100644 -index 000000000000..f59ae4c19662 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sran.ll -@@ -0,0 +1,38 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvsran.b.h(<16 x i16>, <16 x i16>) -+ -+define <32 x i8> @lasx_xvsran_b_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsran_b_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsran.b.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsran.b.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsran.h.w(<8 x i32>, <8 x i32>) -+ -+define <16 x i16> @lasx_xvsran_h_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsran_h_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsran.h.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsran.h.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsran.w.d(<4 x i64>, <4 x i64>) -+ -+define <8 x i32> @lasx_xvsran_w_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsran_w_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsran.w.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsran.w.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <8 x i32> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srani.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srani.ll -new file mode 100644 -index 000000000000..91fb90da9c52 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srani.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvsrani.b.h(<32 x i8>, <32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvsrani_b_h(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsrani_b_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrani.b.h $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrani.b.h(<32 x i8> %va, <32 x i8> %vb, i32 1) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsrani.h.w(<16 x i16>, <16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvsrani_h_w(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsrani_h_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrani.h.w $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrani.h.w(<16 x i16> %va, <16 x i16> %vb, i32 1) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsrani.w.d(<8 x i32>, <8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvsrani_w_d(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsrani_w_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrani.w.d $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrani.w.d(<8 x i32> %va, <8 x i32> %vb, i32 1) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsrani.d.q(<4 x i64>, <4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvsrani_d_q(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsrani_d_q: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrani.d.q $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrani.d.q(<4 x i64> %va, <4 x i64> %vb, i32 1) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srar.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srar.ll -new file mode 100644 -index 000000000000..e2c160557c4d ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srar.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvsrar.b(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvsrar_b(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsrar_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrar.b $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrar.b(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsrar.h(<16 x i16>, <16 x i16>) -+ -+define <16 x i16> @lasx_xvsrar_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsrar_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrar.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrar.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsrar.w(<8 x i32>, <8 x i32>) -+ -+define <8 x i32> @lasx_xvsrar_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsrar_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrar.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrar.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsrar.d(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvsrar_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsrar_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrar.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrar.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvsrari.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvsrari_b(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_xvsrari_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrari.b $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrari.b(<32 x i8> %va, i32 1) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsrari.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvsrari_h(<16 x i16> %va) nounwind { -+; CHECK-LABEL: lasx_xvsrari_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrari.h $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrari.h(<16 x i16> %va, i32 1) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsrari.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvsrari_w(<8 x i32> %va) nounwind { -+; CHECK-LABEL: lasx_xvsrari_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrari.w $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrari.w(<8 x i32> %va, i32 1) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsrari.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvsrari_d(<4 x i64> %va) nounwind { -+; CHECK-LABEL: lasx_xvsrari_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrari.d $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrari.d(<4 x i64> %va, i32 1) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarn.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarn.ll -new file mode 100644 -index 000000000000..02dd989773ca ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarn.ll -@@ -0,0 +1,38 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvsrarn.b.h(<16 x i16>, <16 x i16>) -+ -+define <32 x i8> @lasx_xvsrarn_b_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsrarn_b_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrarn.b.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrarn.b.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsrarn.h.w(<8 x i32>, <8 x i32>) -+ -+define <16 x i16> @lasx_xvsrarn_h_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsrarn_h_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrarn.h.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrarn.h.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsrarn.w.d(<4 x i64>, <4 x i64>) -+ -+define <8 x i32> @lasx_xvsrarn_w_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsrarn_w_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrarn.w.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrarn.w.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <8 x i32> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarni.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarni.ll -new file mode 100644 -index 000000000000..a7d2c3739793 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarni.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvsrarni.b.h(<32 x i8>, <32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvsrarni_b_h(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsrarni_b_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrarni.b.h $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrarni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 1) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsrarni.h.w(<16 x i16>, <16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvsrarni_h_w(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsrarni_h_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrarni.h.w $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrarni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 1) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsrarni.w.d(<8 x i32>, <8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvsrarni_w_d(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsrarni_w_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrarni.w.d $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrarni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 1) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsrarni.d.q(<4 x i64>, <4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvsrarni_d_q(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsrarni_d_q: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrarni.d.q $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrarni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 1) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srl.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srl.ll -new file mode 100644 -index 000000000000..7b2992f2ca3b ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srl.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvsrl.b(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvsrl_b(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsrl_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrl.b $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrl.b(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsrl.h(<16 x i16>, <16 x i16>) -+ -+define <16 x i16> @lasx_xvsrl_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsrl_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrl.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrl.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsrl.w(<8 x i32>, <8 x i32>) -+ -+define <8 x i32> @lasx_xvsrl_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsrl_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrl.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrl.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsrl.d(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvsrl_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsrl_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrl.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrl.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvsrli.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvsrli_b(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_xvsrli_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrli.b $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrli.b(<32 x i8> %va, i32 1) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsrli.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvsrli_h(<16 x i16> %va) nounwind { -+; CHECK-LABEL: lasx_xvsrli_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrli.h $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrli.h(<16 x i16> %va, i32 1) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsrli.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvsrli_w(<8 x i32> %va) nounwind { -+; CHECK-LABEL: lasx_xvsrli_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrli.w $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrli.w(<8 x i32> %va, i32 1) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsrli.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvsrli_d(<4 x i64> %va) nounwind { -+; CHECK-LABEL: lasx_xvsrli_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrli.d $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrli.d(<4 x i64> %va, i32 1) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srln.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srln.ll -new file mode 100644 -index 000000000000..dc5c0e016ea0 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srln.ll -@@ -0,0 +1,38 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvsrln.b.h(<16 x i16>, <16 x i16>) -+ -+define <32 x i8> @lasx_xvsrln_b_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsrln_b_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrln.b.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrln.b.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsrln.h.w(<8 x i32>, <8 x i32>) -+ -+define <16 x i16> @lasx_xvsrln_h_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsrln_h_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrln.h.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrln.h.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsrln.w.d(<4 x i64>, <4 x i64>) -+ -+define <8 x i32> @lasx_xvsrln_w_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsrln_w_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrln.w.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrln.w.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <8 x i32> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlni.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlni.ll -new file mode 100644 -index 000000000000..0301ebb195e2 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlni.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvsrlni.b.h(<32 x i8>, <32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvsrlni_b_h(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsrlni_b_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrlni.b.h $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrlni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 1) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsrlni.h.w(<16 x i16>, <16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvsrlni_h_w(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsrlni_h_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrlni.h.w $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrlni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 1) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsrlni.w.d(<8 x i32>, <8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvsrlni_w_d(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsrlni_w_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrlni.w.d $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrlni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 1) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsrlni.d.q(<4 x i64>, <4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvsrlni_d_q(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsrlni_d_q: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrlni.d.q $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrlni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 1) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlr.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlr.ll -new file mode 100644 -index 000000000000..e04504158e27 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlr.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvsrlr.b(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvsrlr_b(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsrlr_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrlr.b $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrlr.b(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsrlr.h(<16 x i16>, <16 x i16>) -+ -+define <16 x i16> @lasx_xvsrlr_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsrlr_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrlr.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrlr.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsrlr.w(<8 x i32>, <8 x i32>) -+ -+define <8 x i32> @lasx_xvsrlr_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsrlr_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrlr.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrlr.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsrlr.d(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvsrlr_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsrlr_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrlr.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrlr.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvsrlri.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvsrlri_b(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_xvsrlri_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrlri.b $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrlri.b(<32 x i8> %va, i32 1) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsrlri.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvsrlri_h(<16 x i16> %va) nounwind { -+; CHECK-LABEL: lasx_xvsrlri_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrlri.h $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrlri.h(<16 x i16> %va, i32 1) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsrlri.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvsrlri_w(<8 x i32> %va) nounwind { -+; CHECK-LABEL: lasx_xvsrlri_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrlri.w $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrlri.w(<8 x i32> %va, i32 1) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsrlri.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvsrlri_d(<4 x i64> %va) nounwind { -+; CHECK-LABEL: lasx_xvsrlri_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrlri.d $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrlri.d(<4 x i64> %va, i32 1) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrn.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrn.ll -new file mode 100644 -index 000000000000..1e7df379c6e1 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrn.ll -@@ -0,0 +1,38 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvsrlrn.b.h(<16 x i16>, <16 x i16>) -+ -+define <32 x i8> @lasx_xvsrlrn_b_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsrlrn_b_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrlrn.b.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrlrn.b.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsrlrn.h.w(<8 x i32>, <8 x i32>) -+ -+define <16 x i16> @lasx_xvsrlrn_h_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsrlrn_h_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrlrn.h.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrlrn.h.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsrlrn.w.d(<4 x i64>, <4 x i64>) -+ -+define <8 x i32> @lasx_xvsrlrn_w_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsrlrn_w_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrlrn.w.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrlrn.w.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <8 x i32> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrni.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrni.ll -new file mode 100644 -index 000000000000..56dbafe8b1ac ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrni.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvsrlrni.b.h(<32 x i8>, <32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvsrlrni_b_h(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsrlrni_b_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrlrni.b.h $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrlrni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 1) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsrlrni.h.w(<16 x i16>, <16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvsrlrni_h_w(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsrlrni_h_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrlrni.h.w $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrlrni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 1) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsrlrni.w.d(<8 x i32>, <8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvsrlrni_w_d(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsrlrni_w_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrlrni.w.d $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrlrni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 1) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsrlrni.d.q(<4 x i64>, <4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvsrlrni_d_q(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsrlrni_d_q: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsrlrni.d.q $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrlrni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 1) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssran.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssran.ll -new file mode 100644 -index 000000000000..da1857dad145 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssran.ll -@@ -0,0 +1,74 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvssran.b.h(<16 x i16>, <16 x i16>) -+ -+define <32 x i8> @lasx_xvssran_b_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssran_b_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssran.b.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvssran.b.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvssran.h.w(<8 x i32>, <8 x i32>) -+ -+define <16 x i16> @lasx_xvssran_h_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssran_h_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssran.h.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvssran.h.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvssran.w.d(<4 x i64>, <4 x i64>) -+ -+define <8 x i32> @lasx_xvssran_w_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssran_w_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssran.w.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvssran.w.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvssran.bu.h(<16 x i16>, <16 x i16>) -+ -+define <32 x i8> @lasx_xvssran_bu_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssran_bu_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssran.bu.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvssran.bu.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvssran.hu.w(<8 x i32>, <8 x i32>) -+ -+define <16 x i16> @lasx_xvssran_hu_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssran_hu_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssran.hu.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvssran.hu.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvssran.wu.d(<4 x i64>, <4 x i64>) -+ -+define <8 x i32> @lasx_xvssran_wu_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssran_wu_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssran.wu.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvssran.wu.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <8 x i32> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrani.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrani.ll -new file mode 100644 -index 000000000000..9efa659b4a1e ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrani.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvssrani.b.h(<32 x i8>, <32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvssrani_b_h(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssrani_b_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssrani.b.h $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrani.b.h(<32 x i8> %va, <32 x i8> %vb, i32 1) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvssrani.h.w(<16 x i16>, <16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvssrani_h_w(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssrani_h_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssrani.h.w $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrani.h.w(<16 x i16> %va, <16 x i16> %vb, i32 1) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvssrani.w.d(<8 x i32>, <8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvssrani_w_d(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssrani_w_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssrani.w.d $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrani.w.d(<8 x i32> %va, <8 x i32> %vb, i32 1) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvssrani.d.q(<4 x i64>, <4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvssrani_d_q(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssrani_d_q: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssrani.d.q $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrani.d.q(<4 x i64> %va, <4 x i64> %vb, i32 1) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvssrani.bu.h(<32 x i8>, <32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvssrani_bu_h(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssrani_bu_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssrani.bu.h $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrani.bu.h(<32 x i8> %va, <32 x i8> %vb, i32 1) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvssrani.hu.w(<16 x i16>, <16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvssrani_hu_w(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssrani_hu_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssrani.hu.w $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrani.hu.w(<16 x i16> %va, <16 x i16> %vb, i32 1) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvssrani.wu.d(<8 x i32>, <8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvssrani_wu_d(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssrani_wu_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssrani.wu.d $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrani.wu.d(<8 x i32> %va, <8 x i32> %vb, i32 1) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvssrani.du.q(<4 x i64>, <4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvssrani_du_q(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssrani_du_q: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssrani.du.q $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrani.du.q(<4 x i64> %va, <4 x i64> %vb, i32 1) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarn.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarn.ll -new file mode 100644 -index 000000000000..b5d59ff06f4d ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarn.ll -@@ -0,0 +1,74 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvssrarn.b.h(<16 x i16>, <16 x i16>) -+ -+define <32 x i8> @lasx_xvssrarn_b_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssrarn_b_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssrarn.b.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrarn.b.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvssrarn.h.w(<8 x i32>, <8 x i32>) -+ -+define <16 x i16> @lasx_xvssrarn_h_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssrarn_h_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssrarn.h.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrarn.h.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvssrarn.w.d(<4 x i64>, <4 x i64>) -+ -+define <8 x i32> @lasx_xvssrarn_w_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssrarn_w_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssrarn.w.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrarn.w.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvssrarn.bu.h(<16 x i16>, <16 x i16>) -+ -+define <32 x i8> @lasx_xvssrarn_bu_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssrarn_bu_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssrarn.bu.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrarn.bu.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvssrarn.hu.w(<8 x i32>, <8 x i32>) -+ -+define <16 x i16> @lasx_xvssrarn_hu_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssrarn_hu_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssrarn.hu.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrarn.hu.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvssrarn.wu.d(<4 x i64>, <4 x i64>) -+ -+define <8 x i32> @lasx_xvssrarn_wu_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssrarn_wu_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssrarn.wu.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrarn.wu.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <8 x i32> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarni.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarni.ll -new file mode 100644 -index 000000000000..da411dad645b ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarni.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvssrarni.b.h(<32 x i8>, <32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvssrarni_b_h(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssrarni_b_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssrarni.b.h $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrarni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 1) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvssrarni.h.w(<16 x i16>, <16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvssrarni_h_w(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssrarni_h_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssrarni.h.w $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrarni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 1) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvssrarni.w.d(<8 x i32>, <8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvssrarni_w_d(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssrarni_w_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssrarni.w.d $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrarni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 1) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvssrarni.d.q(<4 x i64>, <4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvssrarni_d_q(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssrarni_d_q: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssrarni.d.q $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrarni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 1) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvssrarni.bu.h(<32 x i8>, <32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvssrarni_bu_h(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssrarni_bu_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssrarni.bu.h $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrarni.bu.h(<32 x i8> %va, <32 x i8> %vb, i32 1) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvssrarni.hu.w(<16 x i16>, <16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvssrarni_hu_w(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssrarni_hu_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssrarni.hu.w $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrarni.hu.w(<16 x i16> %va, <16 x i16> %vb, i32 1) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvssrarni.wu.d(<8 x i32>, <8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvssrarni_wu_d(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssrarni_wu_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssrarni.wu.d $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrarni.wu.d(<8 x i32> %va, <8 x i32> %vb, i32 1) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvssrarni.du.q(<4 x i64>, <4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvssrarni_du_q(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssrarni_du_q: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssrarni.du.q $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrarni.du.q(<4 x i64> %va, <4 x i64> %vb, i32 1) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrln.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrln.ll -new file mode 100644 -index 000000000000..c60b5bdf81a0 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrln.ll -@@ -0,0 +1,74 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvssrln.b.h(<16 x i16>, <16 x i16>) -+ -+define <32 x i8> @lasx_xvssrln_b_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssrln_b_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssrln.b.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrln.b.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvssrln.h.w(<8 x i32>, <8 x i32>) -+ -+define <16 x i16> @lasx_xvssrln_h_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssrln_h_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssrln.h.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrln.h.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvssrln.w.d(<4 x i64>, <4 x i64>) -+ -+define <8 x i32> @lasx_xvssrln_w_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssrln_w_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssrln.w.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrln.w.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvssrln.bu.h(<16 x i16>, <16 x i16>) -+ -+define <32 x i8> @lasx_xvssrln_bu_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssrln_bu_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssrln.bu.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrln.bu.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvssrln.hu.w(<8 x i32>, <8 x i32>) -+ -+define <16 x i16> @lasx_xvssrln_hu_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssrln_hu_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssrln.hu.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrln.hu.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvssrln.wu.d(<4 x i64>, <4 x i64>) -+ -+define <8 x i32> @lasx_xvssrln_wu_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssrln_wu_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssrln.wu.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrln.wu.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <8 x i32> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlni.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlni.ll -new file mode 100644 -index 000000000000..e57dd426bde8 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlni.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvssrlni.b.h(<32 x i8>, <32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvssrlni_b_h(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssrlni_b_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssrlni.b.h $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 1) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvssrlni.h.w(<16 x i16>, <16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvssrlni_h_w(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssrlni_h_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssrlni.h.w $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 1) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvssrlni.w.d(<8 x i32>, <8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvssrlni_w_d(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssrlni_w_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssrlni.w.d $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 1) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvssrlni.d.q(<4 x i64>, <4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvssrlni_d_q(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssrlni_d_q: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssrlni.d.q $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrlni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 1) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvssrlni.bu.h(<32 x i8>, <32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvssrlni_bu_h(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssrlni_bu_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssrlni.bu.h $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlni.bu.h(<32 x i8> %va, <32 x i8> %vb, i32 1) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvssrlni.hu.w(<16 x i16>, <16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvssrlni_hu_w(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssrlni_hu_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssrlni.hu.w $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlni.hu.w(<16 x i16> %va, <16 x i16> %vb, i32 1) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvssrlni.wu.d(<8 x i32>, <8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvssrlni_wu_d(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssrlni_wu_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssrlni.wu.d $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlni.wu.d(<8 x i32> %va, <8 x i32> %vb, i32 1) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvssrlni.du.q(<4 x i64>, <4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvssrlni_du_q(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssrlni_du_q: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssrlni.du.q $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrlni.du.q(<4 x i64> %va, <4 x i64> %vb, i32 1) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrn.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrn.ll -new file mode 100644 -index 000000000000..774cf1bd5e84 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrn.ll -@@ -0,0 +1,74 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvssrlrn.b.h(<16 x i16>, <16 x i16>) -+ -+define <32 x i8> @lasx_xvssrlrn_b_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssrlrn_b_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssrlrn.b.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.b.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvssrlrn.h.w(<8 x i32>, <8 x i32>) -+ -+define <16 x i16> @lasx_xvssrlrn_h_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssrlrn_h_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssrlrn.h.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.h.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvssrlrn.w.d(<4 x i64>, <4 x i64>) -+ -+define <8 x i32> @lasx_xvssrlrn_w_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssrlrn_w_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssrlrn.w.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.w.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvssrlrn.bu.h(<16 x i16>, <16 x i16>) -+ -+define <32 x i8> @lasx_xvssrlrn_bu_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssrlrn_bu_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssrlrn.bu.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.bu.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvssrlrn.hu.w(<8 x i32>, <8 x i32>) -+ -+define <16 x i16> @lasx_xvssrlrn_hu_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssrlrn_hu_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssrlrn.hu.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.hu.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvssrlrn.wu.d(<4 x i64>, <4 x i64>) -+ -+define <8 x i32> @lasx_xvssrlrn_wu_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssrlrn_wu_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssrlrn.wu.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.wu.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <8 x i32> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrni.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrni.ll -new file mode 100644 -index 000000000000..9a80516d8d78 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrni.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvssrlrni.b.h(<32 x i8>, <32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvssrlrni_b_h(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssrlrni_b_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssrlrni.b.h $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 1) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvssrlrni.h.w(<16 x i16>, <16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvssrlrni_h_w(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssrlrni_h_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssrlrni.h.w $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 1) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvssrlrni.w.d(<8 x i32>, <8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvssrlrni_w_d(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssrlrni_w_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssrlrni.w.d $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 1) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvssrlrni.d.q(<4 x i64>, <4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvssrlrni_d_q(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssrlrni_d_q: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssrlrni.d.q $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 1) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvssrlrni.bu.h(<32 x i8>, <32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvssrlrni_bu_h(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssrlrni_bu_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssrlrni.bu.h $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.bu.h(<32 x i8> %va, <32 x i8> %vb, i32 1) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvssrlrni.hu.w(<16 x i16>, <16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvssrlrni_hu_w(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssrlrni_hu_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssrlrni.hu.w $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.hu.w(<16 x i16> %va, <16 x i16> %vb, i32 1) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvssrlrni.wu.d(<8 x i32>, <8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvssrlrni_wu_d(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssrlrni_wu_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssrlrni.wu.d $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.wu.d(<8 x i32> %va, <8 x i32> %vb, i32 1) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvssrlrni.du.q(<4 x i64>, <4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvssrlrni_du_q(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssrlrni_du_q: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssrlrni.du.q $xr0, $xr1, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.du.q(<4 x i64> %va, <4 x i64> %vb, i32 1) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssub.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssub.ll -new file mode 100644 -index 000000000000..cd3ccd9f5262 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssub.ll -@@ -0,0 +1,98 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvssub.b(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvssub_b(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssub_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssub.b $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvssub.b(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvssub.h(<16 x i16>, <16 x i16>) -+ -+define <16 x i16> @lasx_xvssub_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssub_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssub.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvssub.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvssub.w(<8 x i32>, <8 x i32>) -+ -+define <8 x i32> @lasx_xvssub_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssub_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssub.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvssub.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvssub.d(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvssub_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssub_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssub.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvssub.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvssub.bu(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvssub_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssub_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssub.bu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvssub.bu(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvssub.hu(<16 x i16>, <16 x i16>) -+ -+define <16 x i16> @lasx_xvssub_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssub_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssub.hu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvssub.hu(<16 x i16> %va, <16 x i16> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvssub.wu(<8 x i32>, <8 x i32>) -+ -+define <8 x i32> @lasx_xvssub_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssub_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssub.wu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvssub.wu(<8 x i32> %va, <8 x i32> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvssub.du(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvssub_du(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvssub_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvssub.du $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvssub.du(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-st.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-st.ll -new file mode 100644 -index 000000000000..b69e7b813f0c ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-st.ll -@@ -0,0 +1,27 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare void @llvm.loongarch.lasx.xvst(<32 x i8>, i8*, i32) -+ -+define void @lasx_xvst(<32 x i8> %va, i8* %p) nounwind { -+; CHECK-LABEL: lasx_xvst: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvst $xr0, $a0, 1 -+; CHECK-NEXT: ret -+entry: -+ call void @llvm.loongarch.lasx.xvst(<32 x i8> %va, i8* %p, i32 1) -+ ret void -+} -+ -+declare void @llvm.loongarch.lasx.xvstx(<32 x i8>, i8*, i64) -+ -+define void @lasx_xvstx(<32 x i8> %va, i8* %p) nounwind { -+; CHECK-LABEL: lasx_xvstx: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: ori $a1, $zero, 1 -+; CHECK-NEXT: xvstx $xr0, $a0, $a1 -+; CHECK-NEXT: ret -+entry: -+ call void @llvm.loongarch.lasx.xvstx(<32 x i8> %va, i8* %p, i64 1) -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-stelm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-stelm.ll -new file mode 100644 -index 000000000000..52ef3c471412 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-stelm.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare void @llvm.loongarch.lasx.xvstelm.b(<32 x i8>, i8*, i32, i32) -+ -+define void @lasx_xvstelm_b(<32 x i8> %va, i8* %p) nounwind { -+; CHECK-LABEL: lasx_xvstelm_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvstelm.b $xr0, $a0, 1, 1 -+; CHECK-NEXT: ret -+entry: -+ call void @llvm.loongarch.lasx.xvstelm.b(<32 x i8> %va, i8* %p, i32 1, i32 1) -+ ret void -+} -+ -+declare void @llvm.loongarch.lasx.xvstelm.h(<16 x i16>, i8*, i32, i32) -+ -+define void @lasx_xvstelm_h(<16 x i16> %va, i8* %p) nounwind { -+; CHECK-LABEL: lasx_xvstelm_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvstelm.h $xr0, $a0, 2, 1 -+; CHECK-NEXT: ret -+entry: -+ call void @llvm.loongarch.lasx.xvstelm.h(<16 x i16> %va, i8* %p, i32 2, i32 1) -+ ret void -+} -+ -+declare void @llvm.loongarch.lasx.xvstelm.w(<8 x i32>, i8*, i32, i32) -+ -+define void @lasx_xvstelm_w(<8 x i32> %va, i8* %p) nounwind { -+; CHECK-LABEL: lasx_xvstelm_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvstelm.w $xr0, $a0, 4, 1 -+; CHECK-NEXT: ret -+entry: -+ call void @llvm.loongarch.lasx.xvstelm.w(<8 x i32> %va, i8* %p, i32 4, i32 1) -+ ret void -+} -+ -+declare void @llvm.loongarch.lasx.xvstelm.d(<4 x i64>, i8*, i32, i32) -+ -+define void @lasx_xvstelm_d(<4 x i64> %va, i8* %p) nounwind { -+; CHECK-LABEL: lasx_xvstelm_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvstelm.d $xr0, $a0, 8, 1 -+; CHECK-NEXT: ret -+entry: -+ call void @llvm.loongarch.lasx.xvstelm.d(<4 x i64> %va, i8* %p, i32 8, i32 1) -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sub.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sub.ll -new file mode 100644 -index 000000000000..4d69dd83dcde ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sub.ll -@@ -0,0 +1,62 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvsub.b(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvsub_b(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsub_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsub.b $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsub.b(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsub.h(<16 x i16>, <16 x i16>) -+ -+define <16 x i16> @lasx_xvsub_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsub_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsub.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsub.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsub.w(<8 x i32>, <8 x i32>) -+ -+define <8 x i32> @lasx_xvsub_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsub_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsub.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsub.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsub.d(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvsub_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsub_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsub.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsub.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsub.q(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvsub_q(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsub_q: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsub.q $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsub.q(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-subi.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-subi.ll -new file mode 100644 -index 000000000000..cc3235ff4657 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-subi.ll -@@ -0,0 +1,50 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvsubi.bu(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvsubi_bu(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_xvsubi_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsubi.bu $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsubi.bu(<32 x i8> %va, i32 1) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsubi.hu(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvsubi_hu(<16 x i16> %va) nounwind { -+; CHECK-LABEL: lasx_xvsubi_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsubi.hu $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsubi.hu(<16 x i16> %va, i32 1) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsubi.wu(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvsubi_wu(<8 x i32> %va) nounwind { -+; CHECK-LABEL: lasx_xvsubi_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsubi.wu $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsubi.wu(<8 x i32> %va, i32 1) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsubi.du(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvsubi_du(<4 x i64> %va) nounwind { -+; CHECK-LABEL: lasx_xvsubi_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsubi.du $xr0, $xr0, 1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsubi.du(<4 x i64> %va, i32 1) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-subw.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-subw.ll -new file mode 100644 -index 000000000000..6f203e894990 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-subw.ll -@@ -0,0 +1,194 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.b(<32 x i8>, <32 x i8>) -+ -+define <16 x i16> @lasx_xvsubwev_h_b(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsubwev_h_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsubwev.h.b $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.b(<32 x i8> %va, <32 x i8> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.h(<16 x i16>, <16 x i16>) -+ -+define <8 x i32> @lasx_xvsubwev_w_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsubwev_w_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsubwev.w.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.w(<8 x i32>, <8 x i32>) -+ -+define <4 x i64> @lasx_xvsubwev_d_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsubwev_d_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsubwev.d.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.d(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvsubwev_q_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsubwev_q_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsubwev.q.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.bu(<32 x i8>, <32 x i8>) -+ -+define <16 x i16> @lasx_xvsubwev_h_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsubwev_h_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsubwev.h.bu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.bu(<32 x i8> %va, <32 x i8> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.hu(<16 x i16>, <16 x i16>) -+ -+define <8 x i32> @lasx_xvsubwev_w_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsubwev_w_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsubwev.w.hu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.hu(<16 x i16> %va, <16 x i16> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.wu(<8 x i32>, <8 x i32>) -+ -+define <4 x i64> @lasx_xvsubwev_d_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsubwev_d_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsubwev.d.wu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.wu(<8 x i32> %va, <8 x i32> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.du(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvsubwev_q_du(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsubwev_q_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsubwev.q.du $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.du(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.b(<32 x i8>, <32 x i8>) -+ -+define <16 x i16> @lasx_xvsubwod_h_b(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsubwod_h_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsubwod.h.b $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.b(<32 x i8> %va, <32 x i8> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.h(<16 x i16>, <16 x i16>) -+ -+define <8 x i32> @lasx_xvsubwod_w_h(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsubwod_w_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsubwod.w.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.h(<16 x i16> %va, <16 x i16> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.w(<8 x i32>, <8 x i32>) -+ -+define <4 x i64> @lasx_xvsubwod_d_w(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsubwod_d_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsubwod.d.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.w(<8 x i32> %va, <8 x i32> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.d(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvsubwod_q_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsubwod_q_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsubwod.q.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.d(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.bu(<32 x i8>, <32 x i8>) -+ -+define <16 x i16> @lasx_xvsubwod_h_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsubwod_h_bu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsubwod.h.bu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.bu(<32 x i8> %va, <32 x i8> %vb) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.hu(<16 x i16>, <16 x i16>) -+ -+define <8 x i32> @lasx_xvsubwod_w_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsubwod_w_hu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsubwod.w.hu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.hu(<16 x i16> %va, <16 x i16> %vb) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.wu(<8 x i32>, <8 x i32>) -+ -+define <4 x i64> @lasx_xvsubwod_d_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsubwod_d_wu: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsubwod.d.wu $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.wu(<8 x i32> %va, <8 x i32> %vb) -+ ret <4 x i64> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.du(<4 x i64>, <4 x i64>) -+ -+define <4 x i64> @lasx_xvsubwod_q_du(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK-LABEL: lasx_xvsubwod_q_du: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvsubwod.q.du $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.du(<4 x i64> %va, <4 x i64> %vb) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-xor.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-xor.ll -new file mode 100644 -index 000000000000..6395b3d6f2e7 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-xor.ll -@@ -0,0 +1,14 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvxor.v(<32 x i8>, <32 x i8>) -+ -+define <32 x i8> @lasx_xvxor_v(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK-LABEL: lasx_xvxor_v: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvxor.v(<32 x i8> %va, <32 x i8> %vb) -+ ret <32 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-xori.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-xori.ll -new file mode 100644 -index 000000000000..c71d7e731165 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-xori.ll -@@ -0,0 +1,14 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvxori.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvxori_b(<32 x i8> %va) nounwind { -+; CHECK-LABEL: lasx_xvxori_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvxori.b $xr0, $xr0, 3 -+; CHECK-NEXT: ret -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvxori.b(<32 x i8> %va, i32 3) -+ ret <32 x i8> %res -+} --- -2.20.1 - - -From 45434adc9e68b15a6fc26f55659416ca2ef28ee3 Mon Sep 17 00:00:00 2001 -From: chenli -Date: Sat, 19 Aug 2023 17:14:12 +0800 -Subject: [PATCH 06/35] [LoongArch] Add testcases of LASX intrinsics with - immediates - -The testcases mainly cover three situations: -- the arguments which should be immediates are non immediates. -- the immediate is out of upper limit of the argument type. -- the immediate is out of lower limit of the argument type. - -Depends on D155830 - -Reviewed By: SixWeining - -Differential Revision: https://reviews.llvm.org/D157571 - -(cherry picked from commit 82bbf7003cabe2b6be8ab9b88bc96ecb8a64dc49) ---- - .../lasx/intrinsic-addi-invalid-imm.ll | 65 +++++++++ - .../LoongArch/lasx/intrinsic-addi-non-imm.ll | 37 +++++ - .../lasx/intrinsic-andi-invalid-imm.ll | 17 +++ - .../LoongArch/lasx/intrinsic-andi-non-imm.ll | 10 ++ - .../lasx/intrinsic-bitclr-invalid-imm.ll | 65 +++++++++ - .../lasx/intrinsic-bitclr-non-imm.ll | 37 +++++ - .../lasx/intrinsic-bitrev-invalid-imm.ll | 65 +++++++++ - .../lasx/intrinsic-bitrev-non-imm.ll | 37 +++++ - .../lasx/intrinsic-bitseli-invalid-imm.ll | 17 +++ - .../lasx/intrinsic-bitseli-non-imm.ll | 10 ++ - .../lasx/intrinsic-bitset-invalid-imm.ll | 65 +++++++++ - .../lasx/intrinsic-bitset-non-imm.ll | 37 +++++ - .../lasx/intrinsic-bsll-invalid-imm.ll | 17 +++ - .../LoongArch/lasx/intrinsic-bsll-non-imm.ll | 10 ++ - .../lasx/intrinsic-bsrl-invalid-imm.ll | 17 +++ - .../LoongArch/lasx/intrinsic-bsrl-non-imm.ll | 10 ++ - .../lasx/intrinsic-extrins-invalid-imm.ll | 65 +++++++++ - .../lasx/intrinsic-extrins-non-imm.ll | 37 +++++ - .../lasx/intrinsic-frstp-invalid-imm.ll | 33 +++++ - .../LoongArch/lasx/intrinsic-frstp-non-imm.ll | 19 +++ - .../lasx/intrinsic-insgr2vr-invalid-imm.ll | 33 +++++ - .../lasx/intrinsic-insgr2vr-non-imm.ll | 19 +++ - .../lasx/intrinsic-insve0-invalid-imm.ll | 33 +++++ - .../lasx/intrinsic-insve0-non-imm.ll | 19 +++ - .../lasx/intrinsic-ld-invalid-imm.ll | 17 +++ - .../LoongArch/lasx/intrinsic-ld-non-imm.ll | 10 ++ - .../lasx/intrinsic-ldi-invalid-imm.ll | 81 +++++++++++ - .../LoongArch/lasx/intrinsic-ldi-non-imm.ll | 46 +++++++ - .../lasx/intrinsic-ldrepl-invalid-imm.ll | 65 +++++++++ - .../lasx/intrinsic-ldrepl-non-imm.ll | 37 +++++ - .../lasx/intrinsic-max-invalid-imm.ll | 129 ++++++++++++++++++ - .../LoongArch/lasx/intrinsic-max-non-imm.ll | 73 ++++++++++ - .../lasx/intrinsic-min-invalid-imm.ll | 129 ++++++++++++++++++ - .../LoongArch/lasx/intrinsic-min-non-imm.ll | 73 ++++++++++ - .../lasx/intrinsic-nori-invalid-imm.ll | 17 +++ - .../LoongArch/lasx/intrinsic-nori-non-imm.ll | 10 ++ - .../lasx/intrinsic-ori-invalid-imm.ll | 17 +++ - .../LoongArch/lasx/intrinsic-ori-non-imm.ll | 10 ++ - .../lasx/intrinsic-permi-invalid-imm.ll | 49 +++++++ - .../LoongArch/lasx/intrinsic-permi-non-imm.ll | 28 ++++ - .../lasx/intrinsic-pickve-invalid-imm.ll | 65 +++++++++ - .../lasx/intrinsic-pickve-non-imm.ll | 37 +++++ - .../lasx/intrinsic-pickve2gr-invalid-imm.ll | 65 +++++++++ - .../lasx/intrinsic-pickve2gr-non-imm.ll | 37 +++++ - .../lasx/intrinsic-repl128vei-invalid-imm.ll | 65 +++++++++ - .../lasx/intrinsic-repl128vei-non-imm.ll | 37 +++++ - .../lasx/intrinsic-rotr-invalid-imm.ll | 65 +++++++++ - .../LoongArch/lasx/intrinsic-rotr-non-imm.ll | 37 +++++ - .../lasx/intrinsic-sat-invalid-imm.ll | 129 ++++++++++++++++++ - .../LoongArch/lasx/intrinsic-sat-non-imm.ll | 73 ++++++++++ - .../lasx/intrinsic-seq-invalid-imm.ll | 65 +++++++++ - .../LoongArch/lasx/intrinsic-seq-non-imm.ll | 37 +++++ - .../lasx/intrinsic-shuf4i-invalid-imm.ll | 65 +++++++++ - .../lasx/intrinsic-shuf4i-non-imm.ll | 37 +++++ - .../lasx/intrinsic-sle-invalid-imm.ll | 129 ++++++++++++++++++ - .../LoongArch/lasx/intrinsic-sle-non-imm.ll | 73 ++++++++++ - .../lasx/intrinsic-sll-invalid-imm.ll | 65 +++++++++ - .../LoongArch/lasx/intrinsic-sll-non-imm.ll | 37 +++++ - .../lasx/intrinsic-sllwil-invalid-imm.ll | 97 +++++++++++++ - .../lasx/intrinsic-sllwil-non-imm.ll | 55 ++++++++ - .../lasx/intrinsic-slt-invalid-imm.ll | 129 ++++++++++++++++++ - .../LoongArch/lasx/intrinsic-slt-non-imm.ll | 73 ++++++++++ - .../lasx/intrinsic-sra-invalid-imm.ll | 65 +++++++++ - .../LoongArch/lasx/intrinsic-sra-non-imm.ll | 37 +++++ - .../lasx/intrinsic-srani-invalid-imm.ll | 65 +++++++++ - .../LoongArch/lasx/intrinsic-srani-non-imm.ll | 37 +++++ - .../lasx/intrinsic-srar-invalid-imm.ll | 65 +++++++++ - .../LoongArch/lasx/intrinsic-srar-non-imm.ll | 37 +++++ - .../lasx/intrinsic-srarni-invalid-imm.ll | 65 +++++++++ - .../lasx/intrinsic-srarni-non-imm.ll | 37 +++++ - .../lasx/intrinsic-srl-invalid-imm.ll | 65 +++++++++ - .../LoongArch/lasx/intrinsic-srl-non-imm.ll | 37 +++++ - .../lasx/intrinsic-srlni-invalid-imm.ll | 65 +++++++++ - .../LoongArch/lasx/intrinsic-srlni-non-imm.ll | 37 +++++ - .../lasx/intrinsic-srlr-invalid-imm.ll | 65 +++++++++ - .../LoongArch/lasx/intrinsic-srlr-non-imm.ll | 37 +++++ - .../lasx/intrinsic-srlrni-invalid-imm.ll | 65 +++++++++ - .../lasx/intrinsic-srlrni-non-imm.ll | 37 +++++ - .../lasx/intrinsic-ssrani-invalid-imm.ll | 129 ++++++++++++++++++ - .../lasx/intrinsic-ssrani-non-imm.ll | 73 ++++++++++ - .../lasx/intrinsic-ssrarni-invalid-imm.ll | 129 ++++++++++++++++++ - .../lasx/intrinsic-ssrarni-non-imm.ll | 73 ++++++++++ - .../lasx/intrinsic-ssrlni-invalid-imm.ll | 129 ++++++++++++++++++ - .../lasx/intrinsic-ssrlni-non-imm.ll | 73 ++++++++++ - .../lasx/intrinsic-ssrlrni-invalid-imm.ll | 129 ++++++++++++++++++ - .../lasx/intrinsic-ssrlrni-non-imm.ll | 73 ++++++++++ - .../lasx/intrinsic-st-invalid-imm.ll | 17 +++ - .../LoongArch/lasx/intrinsic-st-non-imm.ll | 10 ++ - .../lasx/intrinsic-stelm-invalid-imm.ll | 121 ++++++++++++++++ - .../LoongArch/lasx/intrinsic-stelm-non-imm.ll | 65 +++++++++ - .../lasx/intrinsic-subi-invalid-imm.ll | 65 +++++++++ - .../LoongArch/lasx/intrinsic-subi-non-imm.ll | 37 +++++ - .../lasx/intrinsic-xori-invalid-imm.ll | 17 +++ - .../LoongArch/lasx/intrinsic-xori-non-imm.ll | 10 ++ - 94 files changed, 5003 insertions(+) - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-addi-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-addi-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-andi-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-andi-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitclr-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitclr-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitrev-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitrev-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitseli-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitseli-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitset-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitset-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsll-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsll-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsrl-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsrl-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-extrins-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-extrins-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-frstp-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-frstp-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-insgr2vr-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-insgr2vr-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-insve0-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-insve0-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ld-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ld-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldi-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldi-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldrepl-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldrepl-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-max-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-max-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-min-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-min-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-nori-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-nori-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ori-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ori-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-permi-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-permi-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve2gr-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve2gr-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl128vei-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl128vei-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-rotr-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-rotr-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-sat-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-sat-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-seq-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-seq-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf4i-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf4i-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-sle-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-sle-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-sll-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-sll-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-sllwil-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-sllwil-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-slt-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-slt-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-sra-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-sra-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-srani-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-srani-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-srar-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-srar-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarni-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarni-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-srl-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-srl-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlni-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlni-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlr-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlr-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrni-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrni-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrani-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrani-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarni-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarni-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlni-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlni-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrni-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrni-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-st-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-st-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-stelm-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-stelm-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-subi-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-subi-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-xori-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-xori-non-imm.ll - -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-addi-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-addi-invalid-imm.ll -new file mode 100644 -index 000000000000..4998847f0910 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-addi-invalid-imm.ll -@@ -0,0 +1,65 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvaddi.bu(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvaddi_bu_lo(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvaddi.bu: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvaddi.bu(<32 x i8> %va, i32 -1) -+ ret <32 x i8> %res -+} -+ -+define <32 x i8> @lasx_xvaddi_bu_hi(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvaddi.bu: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvaddi.bu(<32 x i8> %va, i32 32) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvaddi.hu(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvaddi_hu_lo(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvaddi.hu: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvaddi.hu(<16 x i16> %va, i32 -1) -+ ret <16 x i16> %res -+} -+ -+define <16 x i16> @lasx_xvaddi_hu_hi(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvaddi.hu: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvaddi.hu(<16 x i16> %va, i32 32) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvaddi.wu(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvaddi_wu_lo(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvaddi.wu: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvaddi.wu(<8 x i32> %va, i32 -1) -+ ret <8 x i32> %res -+} -+ -+define <8 x i32> @lasx_xvaddi_wu_hi(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvaddi.wu: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvaddi.wu(<8 x i32> %va, i32 32) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvaddi.du(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvaddi_du_lo(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvaddi.du: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvaddi.du(<4 x i64> %va, i32 -1) -+ ret <4 x i64> %res -+} -+ -+define <4 x i64> @lasx_xvaddi_du_hi(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvaddi.du: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvaddi.du(<4 x i64> %va, i32 32) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-addi-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-addi-non-imm.ll -new file mode 100644 -index 000000000000..f25f0e61a28e ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-addi-non-imm.ll -@@ -0,0 +1,37 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvaddi.bu(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvaddi_bu(<32 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvaddi.bu(<32 x i8> %va, i32 %b) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvaddi.hu(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvaddi_hu(<16 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvaddi.hu(<16 x i16> %va, i32 %b) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvaddi.wu(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvaddi_wu(<8 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvaddi.wu(<8 x i32> %va, i32 %b) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvaddi.du(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvaddi_du(<4 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvaddi.du(<4 x i64> %va, i32 %b) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-andi-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-andi-invalid-imm.ll -new file mode 100644 -index 000000000000..60f0b765f954 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-andi-invalid-imm.ll -@@ -0,0 +1,17 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvandi.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvandi_b_lo(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvandi.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvandi.b(<32 x i8> %va, i32 -1) -+ ret <32 x i8> %res -+} -+ -+define <32 x i8> @lasx_xvandi_b_hi(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvandi.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvandi.b(<32 x i8> %va, i32 256) -+ ret <32 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-andi-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-andi-non-imm.ll -new file mode 100644 -index 000000000000..1273dc6b450b ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-andi-non-imm.ll -@@ -0,0 +1,10 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvandi.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvandi_b(<32 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvandi.b(<32 x i8> %va, i32 %b) -+ ret <32 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitclr-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitclr-invalid-imm.ll -new file mode 100644 -index 000000000000..ecc287e89bbc ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitclr-invalid-imm.ll -@@ -0,0 +1,65 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvbitclri.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvbitclri_b_lo(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvbitclri.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvbitclri.b(<32 x i8> %va, i32 -1) -+ ret <32 x i8> %res -+} -+ -+define <32 x i8> @lasx_xvbitclri_b_hi(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvbitclri.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvbitclri.b(<32 x i8> %va, i32 8) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvbitclri.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvbitclri_h_lo(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvbitclri.h: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvbitclri.h(<16 x i16> %va, i32 -1) -+ ret <16 x i16> %res -+} -+ -+define <16 x i16> @lasx_xvbitclri_h_hi(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvbitclri.h: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvbitclri.h(<16 x i16> %va, i32 16) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvbitclri.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvbitclri_w_lo(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvbitclri.w: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvbitclri.w(<8 x i32> %va, i32 -1) -+ ret <8 x i32> %res -+} -+ -+define <8 x i32> @lasx_xvbitclri_w_hi(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvbitclri.w: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvbitclri.w(<8 x i32> %va, i32 32) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvbitclri.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvbitclri_d_lo(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvbitclri.d: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvbitclri.d(<4 x i64> %va, i32 -1) -+ ret <4 x i64> %res -+} -+ -+define <4 x i64> @lasx_xvbitclri_d_hi(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvbitclri.d: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvbitclri.d(<4 x i64> %va, i32 64) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitclr-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitclr-non-imm.ll -new file mode 100644 -index 000000000000..09da85411082 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitclr-non-imm.ll -@@ -0,0 +1,37 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvbitclri.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvbitclri_b(<32 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvbitclri.b(<32 x i8> %va, i32 %b) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvbitclri.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvbitclri_h(<16 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvbitclri.h(<16 x i16> %va, i32 %b) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvbitclri.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvbitclri_w(<8 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvbitclri.w(<8 x i32> %va, i32 %b) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvbitclri.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvbitclri_d(<4 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvbitclri.d(<4 x i64> %va, i32 %b) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitrev-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitrev-invalid-imm.ll -new file mode 100644 -index 000000000000..dff0884fdd5a ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitrev-invalid-imm.ll -@@ -0,0 +1,65 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvbitrevi.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvbitrevi_b_lo(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvbitrevi.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvbitrevi.b(<32 x i8> %va, i32 -1) -+ ret <32 x i8> %res -+} -+ -+define <32 x i8> @lasx_xvbitrevi_b_hi(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvbitrevi.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvbitrevi.b(<32 x i8> %va, i32 8) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvbitrevi.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvbitrevi_lo(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvbitrevi.h: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvbitrevi.h(<16 x i16> %va, i32 -1) -+ ret <16 x i16> %res -+} -+ -+define <16 x i16> @lasx_xvbitrevi_h_hi(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvbitrevi.h: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvbitrevi.h(<16 x i16> %va, i32 16) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvbitrevi.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvbitrevi_w_lo(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvbitrevi.w: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvbitrevi.w(<8 x i32> %va, i32 -1) -+ ret <8 x i32> %res -+} -+ -+define <8 x i32> @lasx_xvbitrevi_w_hi(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvbitrevi.w: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvbitrevi.w(<8 x i32> %va, i32 32) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvbitrevi.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvbitrevi_d_lo(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvbitrevi.d: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvbitrevi.d(<4 x i64> %va, i32 -1) -+ ret <4 x i64> %res -+} -+ -+define <4 x i64> @lasx_xvbitrevi_d_hi(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvbitrevi.d: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvbitrevi.d(<4 x i64> %va, i32 64) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitrev-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitrev-non-imm.ll -new file mode 100644 -index 000000000000..e1aef1a82f0c ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitrev-non-imm.ll -@@ -0,0 +1,37 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvbitrevi.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvbitrevi_b(<32 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvbitrevi.b(<32 x i8> %va, i32 %b) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvbitrevi.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvbitrevi_h(<16 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvbitrevi.h(<16 x i16> %va, i32 %b) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvbitrevi.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvbitrevi_w(<8 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvbitrevi.w(<8 x i32> %va, i32 %b) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvbitrevi.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvbitrevi_d(<4 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvbitrevi.d(<4 x i64> %va, i32 %b) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitseli-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitseli-invalid-imm.ll -new file mode 100644 -index 000000000000..3f6fd44f842c ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitseli-invalid-imm.ll -@@ -0,0 +1,17 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvbitseli.b(<32 x i8>, <32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvbitseli_b_lo(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvbitseli.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvbitseli.b(<32 x i8> %va, <32 x i8> %vb, i32 -1) -+ ret <32 x i8> %res -+} -+ -+define <32 x i8> @lasx_xvbitseli_b_hi(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvbitseli.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvbitseli.b(<32 x i8> %va, <32 x i8> %vb, i32 256) -+ ret <32 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitseli-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitseli-non-imm.ll -new file mode 100644 -index 000000000000..40533ab96d86 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitseli-non-imm.ll -@@ -0,0 +1,10 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvbitseli.b(<32 x i8>, <32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvbitseli_b(<32 x i8> %va, <32 x i8> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvbitseli.b(<32 x i8> %va, <32 x i8> %vb, i32 %c) -+ ret <32 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitset-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitset-invalid-imm.ll -new file mode 100644 -index 000000000000..17a77ece7775 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitset-invalid-imm.ll -@@ -0,0 +1,65 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvbitseti.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvbitseti_b_lo(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvbitseti.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvbitseti.b(<32 x i8> %va, i32 -1) -+ ret <32 x i8> %res -+} -+ -+define <32 x i8> @lasx_xvbitseti_b_hi(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvbitseti.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvbitseti.b(<32 x i8> %va, i32 8) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvbitseti.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvbitseti_h_lo(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvbitseti.h: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvbitseti.h(<16 x i16> %va, i32 -1) -+ ret <16 x i16> %res -+} -+ -+define <16 x i16> @lasx_xvbitseti_h_hi(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvbitseti.h: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvbitseti.h(<16 x i16> %va, i32 16) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvbitseti.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvbitseti_w_lo(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvbitseti.w: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvbitseti.w(<8 x i32> %va, i32 -1) -+ ret <8 x i32> %res -+} -+ -+define <8 x i32> @lasx_xvbitseti_w_hi(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvbitseti.w: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvbitseti.w(<8 x i32> %va, i32 32) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvbitseti.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvbitseti_d_lo(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvbitseti.d: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvbitseti.d(<4 x i64> %va, i32 -1) -+ ret <4 x i64> %res -+} -+ -+define <4 x i64> @lasx_xvbitseti_d_hi(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvbitseti.d: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvbitseti.d(<4 x i64> %va, i32 64) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitset-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitset-non-imm.ll -new file mode 100644 -index 000000000000..613285804e0e ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitset-non-imm.ll -@@ -0,0 +1,37 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvbitseti.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvbitseti_b(<32 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvbitseti.b(<32 x i8> %va, i32 %b) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvbitseti.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvbitseti_h(<16 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvbitseti.h(<16 x i16> %va, i32 %b) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvbitseti.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvbitseti_w(<8 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvbitseti.w(<8 x i32> %va, i32 %b) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvbitseti.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvbitseti_d(<4 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvbitseti.d(<4 x i64> %va, i32 %b) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsll-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsll-invalid-imm.ll -new file mode 100644 -index 000000000000..1da08a633bd2 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsll-invalid-imm.ll -@@ -0,0 +1,17 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvbsll.v(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvbsll_v_lo(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvbsll.v: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvbsll.v(<32 x i8> %va, i32 -1) -+ ret <32 x i8> %res -+} -+ -+define <32 x i8> @lasx_xvbsll_v_hi(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvbsll.v: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvbsll.v(<32 x i8> %va, i32 32) -+ ret <32 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsll-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsll-non-imm.ll -new file mode 100644 -index 000000000000..e19a3232c179 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsll-non-imm.ll -@@ -0,0 +1,10 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvbsll.v(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvbsll_v(<32 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvbsll.v(<32 x i8> %va, i32 %b) -+ ret <32 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsrl-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsrl-invalid-imm.ll -new file mode 100644 -index 000000000000..5d2b63391e67 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsrl-invalid-imm.ll -@@ -0,0 +1,17 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvbsrl.v(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvbsrl_v_lo(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvbsrl.v: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvbsrl.v(<32 x i8> %va, i32 -1) -+ ret <32 x i8> %res -+} -+ -+define <32 x i8> @lasx_xvbsrl_v_hi(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvbsrl.v: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvbsrl.v(<32 x i8> %va, i32 32) -+ ret <32 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsrl-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsrl-non-imm.ll -new file mode 100644 -index 000000000000..8dfd0ca579b8 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsrl-non-imm.ll -@@ -0,0 +1,10 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvbsrl.v(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvbsrl_v(<32 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvbsrl.v(<32 x i8> %va, i32 %b) -+ ret <32 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-extrins-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-extrins-invalid-imm.ll -new file mode 100644 -index 000000000000..1301b8a146eb ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-extrins-invalid-imm.ll -@@ -0,0 +1,65 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvextrins.b(<32 x i8>, <32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvextrins_b_lo(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvextrins.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvextrins.b(<32 x i8> %va, <32 x i8> %vb, i32 -1) -+ ret <32 x i8> %res -+} -+ -+define <32 x i8> @lasx_xvextrins_b_hi(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvextrins.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvextrins.b(<32 x i8> %va, <32 x i8> %vb, i32 256) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvextrins.h(<16 x i16>, <16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvextrins_h_lo(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvextrins.h: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvextrins.h(<16 x i16> %va, <16 x i16> %vb, i32 -1) -+ ret <16 x i16> %res -+} -+ -+define <16 x i16> @lasx_xvextrins_h_hi(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvextrins.h: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvextrins.h(<16 x i16> %va, <16 x i16> %vb, i32 256) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvextrins.w(<8 x i32>, <8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvextrins_w_lo(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvextrins.w: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvextrins.w(<8 x i32> %va, <8 x i32> %vb, i32 -1) -+ ret <8 x i32> %res -+} -+ -+define <8 x i32> @lasx_xvextrins_w_hi(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvextrins.w: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvextrins.w(<8 x i32> %va, <8 x i32> %vb, i32 256) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvextrins.d(<4 x i64>, <4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvextrins_d_lo(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvextrins.d: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvextrins.d(<4 x i64> %va, <4 x i64> %vb, i32 -1) -+ ret <4 x i64> %res -+} -+ -+define <4 x i64> @lasx_xvextrins_d_hi(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvextrins.d: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvextrins.d(<4 x i64> %va, <4 x i64> %vb, i32 256) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-extrins-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-extrins-non-imm.ll -new file mode 100644 -index 000000000000..bca8f8b3c778 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-extrins-non-imm.ll -@@ -0,0 +1,37 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvextrins.b(<32 x i8>, <32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvextrins_b(<32 x i8> %va, <32 x i8> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvextrins.b(<32 x i8> %va, <32 x i8> %vb, i32 %c) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvextrins.h(<16 x i16>, <16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvextrins_h(<16 x i16> %va, <16 x i16> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvextrins.h(<16 x i16> %va, <16 x i16> %vb, i32 %c) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvextrins.w(<8 x i32>, <8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvextrins_w(<8 x i32> %va, <8 x i32> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvextrins.w(<8 x i32> %va, <8 x i32> %vb, i32 %c) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvextrins.d(<4 x i64>, <4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvextrins_d(<4 x i64> %va, <4 x i64> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvextrins.d(<4 x i64> %va, <4 x i64> %vb, i32 %c) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frstp-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frstp-invalid-imm.ll -new file mode 100644 -index 000000000000..64b4632669d2 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frstp-invalid-imm.ll -@@ -0,0 +1,33 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvfrstpi.b(<32 x i8>, <32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvfrstpi_b_lo(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvfrstpi.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvfrstpi.b(<32 x i8> %va, <32 x i8> %vb, i32 -1) -+ ret <32 x i8> %res -+} -+ -+define <32 x i8> @lasx_xvfrstpi_b_hi(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvfrstpi.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvfrstpi.b(<32 x i8> %va, <32 x i8> %vb, i32 32) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvfrstpi.h(<16 x i16>, <16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvfrstpi_h_lo(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvfrstpi.h: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvfrstpi.h(<16 x i16> %va, <16 x i16> %vb, i32 -1) -+ ret <16 x i16> %res -+} -+ -+define <16 x i16> @lasx_xvfrstpi_h_hi(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvfrstpi.h: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvfrstpi.h(<16 x i16> %va, <16 x i16> %vb, i32 32) -+ ret <16 x i16> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frstp-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frstp-non-imm.ll -new file mode 100644 -index 000000000000..ca92cff9b2d1 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frstp-non-imm.ll -@@ -0,0 +1,19 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvfrstpi.b(<32 x i8>, <32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvfrstpi_b(<32 x i8> %va, <32 x i8> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvfrstpi.b(<32 x i8> %va, <32 x i8> %vb, i32 %c) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvfrstpi.h(<16 x i16>, <16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvfrstpi_h(<16 x i16> %va, <16 x i16> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvfrstpi.h(<16 x i16> %va, <16 x i16> %vb, i32 %c) -+ ret <16 x i16> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insgr2vr-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insgr2vr-invalid-imm.ll -new file mode 100644 -index 000000000000..4982f2c7d43a ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insgr2vr-invalid-imm.ll -@@ -0,0 +1,33 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvinsgr2vr.w(<8 x i32>, i32, i32) -+ -+define <8 x i32> @lasx_xvinsgr2vr_w_lo(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvinsgr2vr.w: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvinsgr2vr.w(<8 x i32> %va, i32 1, i32 -1) -+ ret <8 x i32> %res -+} -+ -+define <8 x i32> @lasx_xvinsgr2vr_w_hi(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvinsgr2vr.w: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvinsgr2vr.w(<8 x i32> %va, i32 1, i32 8) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvinsgr2vr.d(<4 x i64>, i64, i32) -+ -+define <4 x i64> @lasx_xvinsgr2vr_d_lo(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvinsgr2vr.d: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvinsgr2vr.d(<4 x i64> %va, i64 1, i32 -1) -+ ret <4 x i64> %res -+} -+ -+define <4 x i64> @lasx_xvinsgr2vr_d_hi(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvinsgr2vr.d: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvinsgr2vr.d(<4 x i64> %va, i64 1, i32 4) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insgr2vr-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insgr2vr-non-imm.ll -new file mode 100644 -index 000000000000..3accabf6dbd9 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insgr2vr-non-imm.ll -@@ -0,0 +1,19 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvinsgr2vr.w(<8 x i32>, i32, i32) -+ -+define <8 x i32> @lasx_xvinsgr2vr_w(<8 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvinsgr2vr.w(<8 x i32> %va, i32 1, i32 %b) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvinsgr2vr.d(<4 x i64>, i64, i32) -+ -+define <4 x i64> @lasx_xvinsgr2vr_d(<4 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvinsgr2vr.d(<4 x i64> %va, i64 1, i32 %b) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insve0-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insve0-invalid-imm.ll -new file mode 100644 -index 000000000000..a54fa8515fba ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insve0-invalid-imm.ll -@@ -0,0 +1,33 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvinsve0.w(<8 x i32>, <8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvinsve0_w_lo(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvinsve0.w: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvinsve0.w(<8 x i32> %va, <8 x i32> %vb, i32 -1) -+ ret <8 x i32> %res -+} -+ -+define <8 x i32> @lasx_xvinsve0_w_hi(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvinsve0.w: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvinsve0.w(<8 x i32> %va, <8 x i32> %vb, i32 8) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvinsve0.d(<4 x i64>, <4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvinsve0_d_lo(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvinsve0.d: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvinsve0.d(<4 x i64> %va, <4 x i64> %vb, i32 -1) -+ ret <4 x i64> %res -+} -+ -+define <4 x i64> @lasx_xvinsve0_d_hi(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvinsve0.d: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvinsve0.d(<4 x i64> %va, <4 x i64> %vb, i32 4) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insve0-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insve0-non-imm.ll -new file mode 100644 -index 000000000000..53e59db11aa6 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insve0-non-imm.ll -@@ -0,0 +1,19 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvinsve0.w(<8 x i32>, <8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvinsve0_w(<8 x i32> %va, <8 x i32> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvinsve0.w(<8 x i32> %va, <8 x i32> %vb, i32 %c) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvinsve0.d(<4 x i64>, <4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvinsve0_d(<4 x i64> %va, <4 x i64> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvinsve0.d(<4 x i64> %va, <4 x i64> %vb, i32 %c) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ld-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ld-invalid-imm.ll -new file mode 100644 -index 000000000000..20dd8a45d7f0 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ld-invalid-imm.ll -@@ -0,0 +1,17 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvld(i8*, i32) -+ -+define <32 x i8> @lasx_xvld_lo(i8* %p) nounwind { -+; CHECK: llvm.loongarch.lasx.xvld: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvld(i8* %p, i32 -2049) -+ ret <32 x i8> %res -+} -+ -+define <32 x i8> @lasx_xvld_hi(i8* %p) nounwind { -+; CHECK: llvm.loongarch.lasx.xvld: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvld(i8* %p, i32 2048) -+ ret <32 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ld-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ld-non-imm.ll -new file mode 100644 -index 000000000000..b23436a44832 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ld-non-imm.ll -@@ -0,0 +1,10 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvld(i8*, i32) -+ -+define <32 x i8> @lasx_xvld(i8* %p, i32 %a) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvld(i8* %p, i32 %a) -+ ret <32 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldi-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldi-invalid-imm.ll -new file mode 100644 -index 000000000000..f3dd3650cf8a ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldi-invalid-imm.ll -@@ -0,0 +1,81 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvldi(i32) -+ -+define <4 x i64> @lasx_xvldi_lo() nounwind { -+; CHECK: llvm.loongarch.lasx.xvldi: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvldi(i32 -4097) -+ ret <4 x i64> %res -+} -+ -+define <4 x i64> @lasx_xvldi_hi() nounwind { -+; CHECK: llvm.loongarch.lasx.xvldi: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvldi(i32 4096) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvrepli.b(i32) -+ -+define <32 x i8> @lasx_xvrepli_b_lo() nounwind { -+; CHECK: llvm.loongarch.lasx.xvrepli.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvrepli.b(i32 -513) -+ ret <32 x i8> %res -+} -+ -+define <32 x i8> @lasx_xvrepli_b_hi() nounwind { -+; CHECK: llvm.loongarch.lasx.xvrepli.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvrepli.b(i32 512) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvrepli.h(i32) -+ -+define <16 x i16> @lasx_xvrepli_h_lo() nounwind { -+; CHECK: llvm.loongarch.lasx.xvrepli.h: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvrepli.h(i32 -513) -+ ret <16 x i16> %res -+} -+ -+define <16 x i16> @lasx_xvrepli_h_hi() nounwind { -+; CHECK: llvm.loongarch.lasx.xvrepli.h: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvrepli.h(i32 512) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvrepli.w(i32) -+ -+define <8 x i32> @lasx_xvrepli_w_lo() nounwind { -+; CHECK: llvm.loongarch.lasx.xvrepli.w: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvrepli.w(i32 -513) -+ ret <8 x i32> %res -+} -+ -+define <8 x i32> @lasx_xvrepli_w_hi() nounwind { -+; CHECK: llvm.loongarch.lasx.xvrepli.w: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvrepli.w(i32 512) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvrepli.d(i32) -+ -+define <4 x i64> @lasx_xvrepli_d_lo() nounwind { -+; CHECK: llvm.loongarch.lasx.xvrepli.d: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvrepli.d(i32 -513) -+ ret <4 x i64> %res -+} -+ -+define <4 x i64> @lasx_xvrepli_d_hi() nounwind { -+; CHECK: llvm.loongarch.lasx.xvrepli.d: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvrepli.d(i32 512) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldi-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldi-non-imm.ll -new file mode 100644 -index 000000000000..6466818bf674 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldi-non-imm.ll -@@ -0,0 +1,46 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvldi(i32) -+ -+define <4 x i64> @lasx_xvldi(i32 %a) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvldi(i32 %a) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvrepli.b(i32) -+ -+define <32 x i8> @lasx_xvrepli_b(i32 %a) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvrepli.b(i32 %a) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvrepli.h(i32) -+ -+define <16 x i16> @lasx_xvrepli_h(i32 %a) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvrepli.h(i32 %a) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvrepli.w(i32) -+ -+define <8 x i32> @lasx_xvrepli_w(i32 %a) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvrepli.w(i32 %a) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvrepli.d(i32) -+ -+define <4 x i64> @lasx_xvrepli_d(i32 %a) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvrepli.d(i32 %a) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldrepl-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldrepl-invalid-imm.ll -new file mode 100644 -index 000000000000..cb62a839985a ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldrepl-invalid-imm.ll -@@ -0,0 +1,65 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvldrepl.b(i8*, i32) -+ -+define <32 x i8> @lasx_xvldrepl_b_lo(i8* %p) nounwind { -+; CHECK: llvm.loongarch.lasx.xvldrepl.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvldrepl.b(i8* %p, i32 -2049) -+ ret <32 x i8> %res -+} -+ -+define <32 x i8> @lasx_xvldrepl_b_hi(i8* %p) nounwind { -+; CHECK: llvm.loongarch.lasx.xvldrepl.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvldrepl.b(i8* %p, i32 2048) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvldrepl.h(i8*, i32) -+ -+define <16 x i16> @lasx_xvldrepl_h_lo(i8* %p) nounwind { -+; CHECK: llvm.loongarch.lasx.xvldrepl.h: argument out of range or not a multiple of 2. -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvldrepl.h(i8* %p, i32 -2050) -+ ret <16 x i16> %res -+} -+ -+define <16 x i16> @lasx_xvldrepl_h_hi(i8* %p) nounwind { -+; CHECK: llvm.loongarch.lasx.xvldrepl.h: argument out of range or not a multiple of 2. -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvldrepl.h(i8* %p, i32 2048) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvldrepl.w(i8*, i32) -+ -+define <8 x i32> @lasx_xvldrepl_w_lo(i8* %p) nounwind { -+; CHECK: llvm.loongarch.lasx.xvldrepl.w: argument out of range or not a multiple of 4. -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvldrepl.w(i8* %p, i32 -2052) -+ ret <8 x i32> %res -+} -+ -+define <8 x i32> @lasx_xvldrepl_w_hi(i8* %p) nounwind { -+; CHECK: llvm.loongarch.lasx.xvldrepl.w: argument out of range or not a multiple of 4. -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvldrepl.w(i8* %p, i32 2048) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvldrepl.d(i8*, i32) -+ -+define <4 x i64> @lasx_xvldrepl_d_lo(i8* %p) nounwind { -+; CHECK: llvm.loongarch.lasx.xvldrepl.d: argument out of range or not a multiple of 8. -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvldrepl.d(i8* %p, i32 -2056) -+ ret <4 x i64> %res -+} -+ -+define <4 x i64> @lasx_xvldrepl_d_hi(i8* %p) nounwind { -+; CHECK: llvm.loongarch.lasx.xvldrepl.d: argument out of range or not a multiple of 8. -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvldrepl.d(i8* %p, i32 2048) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldrepl-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldrepl-non-imm.ll -new file mode 100644 -index 000000000000..075d663b0dd7 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldrepl-non-imm.ll -@@ -0,0 +1,37 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvldrepl.b(i8*, i32) -+ -+define <32 x i8> @lasx_xvldrepl_b(i8* %p, i32 %a) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvldrepl.b(i8* %p, i32 %a) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvldrepl.h(i8*, i32) -+ -+define <16 x i16> @lasx_xvldrepl_h(i8* %p, i32 %a) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvldrepl.h(i8* %p, i32 %a) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvldrepl.w(i8*, i32) -+ -+define <8 x i32> @lasx_xvldrepl_w(i8* %p, i32 %a) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvldrepl.w(i8* %p, i32 %a) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvldrepl.d(i8*, i32) -+ -+define <4 x i64> @lasx_xvldrepl_d(i8* %p, i32 %a) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvldrepl.d(i8* %p, i32 %a) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-max-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-max-invalid-imm.ll -new file mode 100644 -index 000000000000..a671e9979b2f ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-max-invalid-imm.ll -@@ -0,0 +1,129 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvmaxi.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvmaxi_b_lo(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvmaxi.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvmaxi.b(<32 x i8> %va, i32 -17) -+ ret <32 x i8> %res -+} -+ -+define <32 x i8> @lasx_xvmaxi_b_hi(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvmaxi.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvmaxi.b(<32 x i8> %va, i32 16) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvmaxi.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvmaxi_h_lo(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvmaxi.h: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvmaxi.h(<16 x i16> %va, i32 -17) -+ ret <16 x i16> %res -+} -+ -+define <16 x i16> @lasx_xvmaxi_h_hi(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvmaxi.h: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvmaxi.h(<16 x i16> %va, i32 16) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvmaxi.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvmaxi_w_lo(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvmaxi.w: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvmaxi.w(<8 x i32> %va, i32 -17) -+ ret <8 x i32> %res -+} -+ -+define <8 x i32> @lasx_xvmaxi_w_hi(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvmaxi.w: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvmaxi.w(<8 x i32> %va, i32 16) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvmaxi.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvmaxi_d_lo(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvmaxi.d: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmaxi.d(<4 x i64> %va, i32 -17) -+ ret <4 x i64> %res -+} -+ -+define <4 x i64> @lasx_xvmaxi_d_hi(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvmaxi.d: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmaxi.d(<4 x i64> %va, i32 16) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvmaxi.bu(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvmaxi_bu_lo(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvmaxi.bu: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvmaxi.bu(<32 x i8> %va, i32 -1) -+ ret <32 x i8> %res -+} -+ -+define <32 x i8> @lasx_xvmaxi_bu_hi(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvmaxi.bu: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvmaxi.bu(<32 x i8> %va, i32 32) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvmaxi.hu(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvmaxi_hu_lo(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvmaxi.hu: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvmaxi.hu(<16 x i16> %va, i32 -1) -+ ret <16 x i16> %res -+} -+ -+define <16 x i16> @lasx_xvmaxi_hu_hi(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvmaxi.hu: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvmaxi.hu(<16 x i16> %va, i32 32) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvmaxi.wu(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvmaxi_wu_lo(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvmaxi.wu: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvmaxi.wu(<8 x i32> %va, i32 -1) -+ ret <8 x i32> %res -+} -+ -+define <8 x i32> @lasx_xvmaxi_wu_hi(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvmaxi.wu: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvmaxi.wu(<8 x i32> %va, i32 32) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvmaxi.du(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvmaxi_du_lo(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvmaxi.du: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmaxi.du(<4 x i64> %va, i32 -1) -+ ret <4 x i64> %res -+} -+ -+define <4 x i64> @lasx_xvmaxi_du_hi(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvmaxi.du: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmaxi.du(<4 x i64> %va, i32 32) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-max-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-max-non-imm.ll -new file mode 100644 -index 000000000000..b85798b53c92 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-max-non-imm.ll -@@ -0,0 +1,73 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvmaxi.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvmaxi_b(<32 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvmaxi.b(<32 x i8> %va, i32 %b) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvmaxi.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvmaxi_h(<16 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvmaxi.h(<16 x i16> %va, i32 %b) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvmaxi.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvmaxi_w(<8 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvmaxi.w(<8 x i32> %va, i32 %b) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvmaxi.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvmaxi_d(<4 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmaxi.d(<4 x i64> %va, i32 %b) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvmaxi.bu(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvmaxi_bu(<32 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvmaxi.bu(<32 x i8> %va, i32 %b) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvmaxi.hu(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvmaxi_hu(<16 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvmaxi.hu(<16 x i16> %va, i32 %b) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvmaxi.wu(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvmaxi_wu(<8 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvmaxi.wu(<8 x i32> %va, i32 %b) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvmaxi.du(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvmaxi_du(<4 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmaxi.du(<4 x i64> %va, i32 %b) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-min-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-min-invalid-imm.ll -new file mode 100644 -index 000000000000..5ed4104c295f ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-min-invalid-imm.ll -@@ -0,0 +1,129 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvmini.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvmini_b_lo(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvmini.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvmini.b(<32 x i8> %va, i32 -17) -+ ret <32 x i8> %res -+} -+ -+define <32 x i8> @lasx_xvmini_b_hi(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvmini.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvmini.b(<32 x i8> %va, i32 16) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvmini.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvmini_h_lo(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvmini.h: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvmini.h(<16 x i16> %va, i32 -17) -+ ret <16 x i16> %res -+} -+ -+define <16 x i16> @lasx_xvmini_h_hi(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvmini.h: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvmini.h(<16 x i16> %va, i32 16) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvmini.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvmini_w_lo(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvmini.w: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvmini.w(<8 x i32> %va, i32 -17) -+ ret <8 x i32> %res -+} -+ -+define <8 x i32> @lasx_xvmini_w_hi(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvmini.w: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvmini.w(<8 x i32> %va, i32 16) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvmini.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvmini_d_lo(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvmini.d: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmini.d(<4 x i64> %va, i32 -17) -+ ret <4 x i64> %res -+} -+ -+define <4 x i64> @lasx_xvmini_d_hi(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvmini.d: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmini.d(<4 x i64> %va, i32 16) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvmini.bu(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvmini_bu_lo(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvmini.bu: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvmini.bu(<32 x i8> %va, i32 -1) -+ ret <32 x i8> %res -+} -+ -+define <32 x i8> @lasx_xvmini_bu_hi(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvmini.bu: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvmini.bu(<32 x i8> %va, i32 32) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvmini.hu(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvmini_hu_lo(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvmini.hu: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvmini.hu(<16 x i16> %va, i32 -1) -+ ret <16 x i16> %res -+} -+ -+define <16 x i16> @lasx_xvmini_hu_hi(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvmini.hu: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvmini.hu(<16 x i16> %va, i32 32) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvmini.wu(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvmini_wu_lo(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvmini.wu: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvmini.wu(<8 x i32> %va, i32 -1) -+ ret <8 x i32> %res -+} -+ -+define <8 x i32> @lasx_xvmini_wu_hi(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvmini.wu: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvmini.wu(<8 x i32> %va, i32 32) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvmini.du(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvmini_du_lo(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvmini.du: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmini.du(<4 x i64> %va, i32 -1) -+ ret <4 x i64> %res -+} -+ -+define <4 x i64> @lasx_xvmini_du_hi(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvmini.du: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmini.du(<4 x i64> %va, i32 32) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-min-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-min-non-imm.ll -new file mode 100644 -index 000000000000..b81931977aad ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-min-non-imm.ll -@@ -0,0 +1,73 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvmini.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvmini_b(<32 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvmini.b(<32 x i8> %va, i32 %b) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvmini.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvmini_h(<16 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvmini.h(<16 x i16> %va, i32 %b) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvmini.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvmini_w(<8 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvmini.w(<8 x i32> %va, i32 %b) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvmini.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvmini_d(<4 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmini.d(<4 x i64> %va, i32 %b) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvmini.bu(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvmini_bu(<32 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvmini.bu(<32 x i8> %va, i32 %b) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvmini.hu(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvmini_hu(<16 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvmini.hu(<16 x i16> %va, i32 %b) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvmini.wu(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvmini_wu(<8 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvmini.wu(<8 x i32> %va, i32 %b) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvmini.du(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvmini_du(<4 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvmini.du(<4 x i64> %va, i32 %b) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-nori-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-nori-invalid-imm.ll -new file mode 100644 -index 000000000000..1130e094bf1f ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-nori-invalid-imm.ll -@@ -0,0 +1,17 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvnori.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvnori_b_lo(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvnori.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvnori.b(<32 x i8> %va, i32 -1) -+ ret <32 x i8> %res -+} -+ -+define <32 x i8> @lasx_xvnori_b_hi(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvnori.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvnori.b(<32 x i8> %va, i32 256) -+ ret <32 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-nori-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-nori-non-imm.ll -new file mode 100644 -index 000000000000..8f2333064d64 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-nori-non-imm.ll -@@ -0,0 +1,10 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvnori.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvnori_b(<32 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvnori.b(<32 x i8> %va, i32 %b) -+ ret <32 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ori-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ori-invalid-imm.ll -new file mode 100644 -index 000000000000..90dec8e55f2d ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ori-invalid-imm.ll -@@ -0,0 +1,17 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvori.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvori_b_lo(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvori.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvori.b(<32 x i8> %va, i32 -1) -+ ret <32 x i8> %res -+} -+ -+define <32 x i8> @lasx_xvori_b_hi(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvori.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvori.b(<32 x i8> %va, i32 256) -+ ret <32 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ori-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ori-non-imm.ll -new file mode 100644 -index 000000000000..ae6571d98f4a ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ori-non-imm.ll -@@ -0,0 +1,10 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvori.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvori_b(<32 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvori.b(<32 x i8> %va, i32 %b) -+ ret <32 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-permi-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-permi-invalid-imm.ll -new file mode 100644 -index 000000000000..41f4856bd8f7 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-permi-invalid-imm.ll -@@ -0,0 +1,49 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvpermi.w(<8 x i32>, <8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvpermi_w_lo(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvpermi.w: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvpermi.w(<8 x i32> %va, <8 x i32> %vb, i32 -1) -+ ret <8 x i32> %res -+} -+ -+define <8 x i32> @lasx_xvpermi_w_hi(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvpermi.w: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvpermi.w(<8 x i32> %va, <8 x i32> %vb, i32 256) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvpermi.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvpermi_d_lo(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvpermi.d: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvpermi.d(<4 x i64> %va, i32 -1) -+ ret <4 x i64> %res -+} -+ -+define <4 x i64> @lasx_xvpermi_d_hi(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvpermi.d: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvpermi.d(<4 x i64> %va, i32 256) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvpermi.q(<32 x i8>, <32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvpermi_q_lo(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvpermi.q: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvpermi.q(<32 x i8> %va, <32 x i8> %vb, i32 -1) -+ ret <32 x i8> %res -+} -+ -+define <32 x i8> @lasx_xvpermi_q_hi(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvpermi.q: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvpermi.q(<32 x i8> %va, <32 x i8> %vb, i32 256) -+ ret <32 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-permi-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-permi-non-imm.ll -new file mode 100644 -index 000000000000..afb335c5d6ca ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-permi-non-imm.ll -@@ -0,0 +1,28 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvpermi.w(<8 x i32>, <8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvpermi_w(<8 x i32> %va, <8 x i32> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvpermi.w(<8 x i32> %va, <8 x i32> %vb, i32 %c) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvpermi.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvpermi_d(<4 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvpermi.d(<4 x i64> %va, i32 %b) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvpermi.q(<32 x i8>, <32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvpermi_q(<32 x i8> %va, <32 x i8> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvpermi.q(<32 x i8> %va, <32 x i8> %vb, i32 %c) -+ ret <32 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve-invalid-imm.ll -new file mode 100644 -index 000000000000..cfc6ec42874e ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve-invalid-imm.ll -@@ -0,0 +1,65 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvpickve.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvpickve_w_lo(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvpickve.w: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvpickve.w(<8 x i32> %va, i32 -1) -+ ret <8 x i32> %res -+} -+ -+define <8 x i32> @lasx_xvpickve_w_hi(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvpickve.w: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvpickve.w(<8 x i32> %va, i32 8) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvpickve.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvpickve_d_lo(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvpickve.d: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvpickve.d(<4 x i64> %va, i32 -1) -+ ret <4 x i64> %res -+} -+ -+define <4 x i64> @lasx_xvpickve_d_hi(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvpickve.d: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvpickve.d(<4 x i64> %va, i32 4) -+ ret <4 x i64> %res -+} -+ -+declare <8 x float> @llvm.loongarch.lasx.xvpickve.w.f(<8 x float>, i32) -+ -+define <8 x float> @lasx_xvpickve_w_f_lo(<8 x float> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvpickve.w.f: argument out of range -+entry: -+ %res = call <8 x float> @llvm.loongarch.lasx.xvpickve.w.f(<8 x float> %va, i32 -1) -+ ret <8 x float> %res -+} -+ -+define <8 x float> @lasx_xvpickve_w_f_hi(<8 x float> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvpickve.w.f: argument out of range -+entry: -+ %res = call <8 x float> @llvm.loongarch.lasx.xvpickve.w.f(<8 x float> %va, i32 8) -+ ret <8 x float> %res -+} -+ -+declare <4 x double> @llvm.loongarch.lasx.xvpickve.d.f(<4 x double>, i32) -+ -+define <4 x double> @lasx_xvpickve_d_f_lo(<4 x double> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvpickve.d.f: argument out of range -+entry: -+ %res = call <4 x double> @llvm.loongarch.lasx.xvpickve.d.f(<4 x double> %va, i32 -1) -+ ret <4 x double> %res -+} -+ -+define <4 x double> @lasx_xvpickve_d_f_hi(<4 x double> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvpickve.d.f: argument out of range -+entry: -+ %res = call <4 x double> @llvm.loongarch.lasx.xvpickve.d.f(<4 x double> %va, i32 4) -+ ret <4 x double> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve-non-imm.ll -new file mode 100644 -index 000000000000..be1f19a89737 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve-non-imm.ll -@@ -0,0 +1,37 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvpickve.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvpickve_w(<8 x i32> %va, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvpickve.w(<8 x i32> %va, i32 %c) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvpickve.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvpickve_d(<4 x i64> %va, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvpickve.d(<4 x i64> %va, i32 %c) -+ ret <4 x i64> %res -+} -+ -+declare <8 x float> @llvm.loongarch.lasx.xvpickve.w.f(<8 x float>, i32) -+ -+define <8 x float> @lasx_xvpickve_w_f(<8 x float> %va, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x float> @llvm.loongarch.lasx.xvpickve.w.f(<8 x float> %va, i32 %c) -+ ret <8 x float> %res -+} -+ -+declare <4 x double> @llvm.loongarch.lasx.xvpickve.d.f(<4 x double>, i32) -+ -+define <4 x double> @lasx_xvpickve_d_f(<4 x double> %va, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x double> @llvm.loongarch.lasx.xvpickve.d.f(<4 x double> %va, i32 %c) -+ ret <4 x double> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve2gr-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve2gr-invalid-imm.ll -new file mode 100644 -index 000000000000..93056b272dfc ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve2gr-invalid-imm.ll -@@ -0,0 +1,65 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare i32 @llvm.loongarch.lasx.xvpickve2gr.w(<8 x i32>, i32) -+ -+define i32 @lasx_xvpickve2gr_w_lo(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvpickve2gr.w: argument out of range -+entry: -+ %res = call i32 @llvm.loongarch.lasx.xvpickve2gr.w(<8 x i32> %va, i32 -1) -+ ret i32 %res -+} -+ -+define i32 @lasx_xvpickve2gr_w_hi(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvpickve2gr.w: argument out of range -+entry: -+ %res = call i32 @llvm.loongarch.lasx.xvpickve2gr.w(<8 x i32> %va, i32 8) -+ ret i32 %res -+} -+ -+declare i64 @llvm.loongarch.lasx.xvpickve2gr.d(<4 x i64>, i32) -+ -+define i64 @lasx_xvpickve2gr_d_lo(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvpickve2gr.d: argument out of range -+entry: -+ %res = call i64 @llvm.loongarch.lasx.xvpickve2gr.d(<4 x i64> %va, i32 -1) -+ ret i64 %res -+} -+ -+define i64 @lasx_xvpickve2gr_d_hi(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvpickve2gr.d: argument out of range -+entry: -+ %res = call i64 @llvm.loongarch.lasx.xvpickve2gr.d(<4 x i64> %va, i32 4) -+ ret i64 %res -+} -+ -+declare i32 @llvm.loongarch.lasx.xvpickve2gr.wu(<8 x i32>, i32) -+ -+define i32 @lasx_xvpickve2gr_wu_lo(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvpickve2gr.wu: argument out of range -+entry: -+ %res = call i32 @llvm.loongarch.lasx.xvpickve2gr.wu(<8 x i32> %va, i32 -1) -+ ret i32 %res -+} -+ -+define i32 @lasx_xvpickve2gr_wu_hi(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvpickve2gr.wu: argument out of range -+entry: -+ %res = call i32 @llvm.loongarch.lasx.xvpickve2gr.wu(<8 x i32> %va, i32 8) -+ ret i32 %res -+} -+ -+declare i64 @llvm.loongarch.lasx.xvpickve2gr.du(<4 x i64>, i32) -+ -+define i64 @lasx_xvpickve2gr_du_lo(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvpickve2gr.du: argument out of range -+entry: -+ %res = call i64 @llvm.loongarch.lasx.xvpickve2gr.du(<4 x i64> %va, i32 -1) -+ ret i64 %res -+} -+ -+define i64 @lasx_xvpickve2gr_du_hi(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvpickve2gr.du: argument out of range -+entry: -+ %res = call i64 @llvm.loongarch.lasx.xvpickve2gr.du(<4 x i64> %va, i32 4) -+ ret i64 %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve2gr-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve2gr-non-imm.ll -new file mode 100644 -index 000000000000..0fa8c94adc60 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve2gr-non-imm.ll -@@ -0,0 +1,37 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare i32 @llvm.loongarch.lasx.xvpickve2gr.w(<8 x i32>, i32) -+ -+define i32 @lasx_xvpickve2gr_w(<8 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call i32 @llvm.loongarch.lasx.xvpickve2gr.w(<8 x i32> %va, i32 %b) -+ ret i32 %res -+} -+ -+declare i64 @llvm.loongarch.lasx.xvpickve2gr.d(<4 x i64>, i32) -+ -+define i64 @lasx_xvpickve2gr_d(<4 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call i64 @llvm.loongarch.lasx.xvpickve2gr.d(<4 x i64> %va, i32 %b) -+ ret i64 %res -+} -+ -+declare i32 @llvm.loongarch.lasx.xvpickve2gr.wu(<8 x i32>, i32) -+ -+define i32 @lasx_xvpickve2gr_wu(<8 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call i32 @llvm.loongarch.lasx.xvpickve2gr.wu(<8 x i32> %va, i32 %b) -+ ret i32 %res -+} -+ -+declare i64 @llvm.loongarch.lasx.xvpickve2gr.du(<4 x i64>, i32) -+ -+define i64 @lasx_xvpickve2gr_du(<4 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call i64 @llvm.loongarch.lasx.xvpickve2gr.du(<4 x i64> %va, i32 %b) -+ ret i64 %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl128vei-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl128vei-invalid-imm.ll -new file mode 100644 -index 000000000000..a0cb309c54e1 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl128vei-invalid-imm.ll -@@ -0,0 +1,65 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvrepl128vei.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvrepl128vei_b_lo(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvrepl128vei.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvrepl128vei.b(<32 x i8> %va, i32 -1) -+ ret <32 x i8> %res -+} -+ -+define <32 x i8> @lasx_xvrepl128vei_b_hi(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvrepl128vei.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvrepl128vei.b(<32 x i8> %va, i32 16) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvrepl128vei.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvrepl128vei_h_lo(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvrepl128vei.h: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvrepl128vei.h(<16 x i16> %va, i32 -1) -+ ret <16 x i16> %res -+} -+ -+define <16 x i16> @lasx_xvrepl128vei_h_hi(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvrepl128vei.h: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvrepl128vei.h(<16 x i16> %va, i32 8) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvrepl128vei.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvrepl128vei_w_lo(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvrepl128vei.w: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvrepl128vei.w(<8 x i32> %va, i32 -1) -+ ret <8 x i32> %res -+} -+ -+define <8 x i32> @lasx_xvrepl128vei_w_hi(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvrepl128vei.w: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvrepl128vei.w(<8 x i32> %va, i32 4) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvrepl128vei.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvrepl128vei_d_lo(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvrepl128vei.d: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvrepl128vei.d(<4 x i64> %va, i32 -1) -+ ret <4 x i64> %res -+} -+ -+define <4 x i64> @lasx_xvrepl128vei_d_hi(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvrepl128vei.d: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvrepl128vei.d(<4 x i64> %va, i32 2) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl128vei-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl128vei-non-imm.ll -new file mode 100644 -index 000000000000..c537ffa66ba7 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl128vei-non-imm.ll -@@ -0,0 +1,37 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvrepl128vei.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvrepl128vei_b(<32 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvrepl128vei.b(<32 x i8> %va, i32 %b) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvrepl128vei.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvrepl128vei_h(<16 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvrepl128vei.h(<16 x i16> %va, i32 %b) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvrepl128vei.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvrepl128vei_w(<8 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvrepl128vei.w(<8 x i32> %va, i32 %b) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvrepl128vei.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvrepl128vei_d(<4 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvrepl128vei.d(<4 x i64> %va, i32 %b) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-rotr-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-rotr-invalid-imm.ll -new file mode 100644 -index 000000000000..40abdf497605 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-rotr-invalid-imm.ll -@@ -0,0 +1,65 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvrotri.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvrotri_b_lo(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvrotri.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvrotri.b(<32 x i8> %va, i32 -1) -+ ret <32 x i8> %res -+} -+ -+define <32 x i8> @lasx_xvrotri_b_hi(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvrotri.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvrotri.b(<32 x i8> %va, i32 8) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvrotri.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvrotri_h_lo(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvrotri.h: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvrotri.h(<16 x i16> %va, i32 -1) -+ ret <16 x i16> %res -+} -+ -+define <16 x i16> @lasx_xvrotri_h_hi(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvrotri.h: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvrotri.h(<16 x i16> %va, i32 16) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvrotri.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvrotri_w_lo(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvrotri.w: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvrotri.w(<8 x i32> %va, i32 -1) -+ ret <8 x i32> %res -+} -+ -+define <8 x i32> @lasx_xvrotri_w_hi(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvrotri.w: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvrotri.w(<8 x i32> %va, i32 32) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvrotri.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvrotri_d_lo(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvrotri.d: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvrotri.d(<4 x i64> %va, i32 -1) -+ ret <4 x i64> %res -+} -+ -+define <4 x i64> @lasx_xvrotri_d_hi(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvrotri.d: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvrotri.d(<4 x i64> %va, i32 64) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-rotr-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-rotr-non-imm.ll -new file mode 100644 -index 000000000000..dd38301d0534 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-rotr-non-imm.ll -@@ -0,0 +1,37 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvrotri.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvrotri_b(<32 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvrotri.b(<32 x i8> %va, i32 %b) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvrotri.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvrotri_h(<16 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvrotri.h(<16 x i16> %va, i32 %b) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvrotri.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvrotri_w(<8 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvrotri.w(<8 x i32> %va, i32 %b) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvrotri.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvrotri_d(<4 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvrotri.d(<4 x i64> %va, i32 %b) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sat-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sat-invalid-imm.ll -new file mode 100644 -index 000000000000..839fbc9990d3 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sat-invalid-imm.ll -@@ -0,0 +1,129 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvsat.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvsat_b_lo(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsat.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsat.b(<32 x i8> %va, i32 -1) -+ ret <32 x i8> %res -+} -+ -+define <32 x i8> @lasx_xvsat_b_hi(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsat.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsat.b(<32 x i8> %va, i32 8) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsat.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvsat_h_lo(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsat.h: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsat.h(<16 x i16> %va, i32 -1) -+ ret <16 x i16> %res -+} -+ -+define <16 x i16> @lasx_xvsat_h_hi(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsat.h: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsat.h(<16 x i16> %va, i32 16) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsat.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvsat_w_lo(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsat.w: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsat.w(<8 x i32> %va, i32 -1) -+ ret <8 x i32> %res -+} -+ -+define <8 x i32> @lasx_xvsat_w_hi(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsat.w: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsat.w(<8 x i32> %va, i32 32) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsat.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvsat_d_lo(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsat.d: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsat.d(<4 x i64> %va, i32 -1) -+ ret <4 x i64> %res -+} -+ -+define <4 x i64> @lasx_xvsat_d_hi(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsat.d: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsat.d(<4 x i64> %va, i32 64) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvsat.bu(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvsat_bu_lo(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsat.bu: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsat.bu(<32 x i8> %va, i32 -1) -+ ret <32 x i8> %res -+} -+ -+define <32 x i8> @lasx_xvsat_bu_hi(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsat.bu: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsat.bu(<32 x i8> %va, i32 8) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsat.hu(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvsat_hu_lo(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsat.hu: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsat.hu(<16 x i16> %va, i32 -1) -+ ret <16 x i16> %res -+} -+ -+define <16 x i16> @lasx_xvsat_hu_hi(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsat.hu: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsat.hu(<16 x i16> %va, i32 16) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsat.wu(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvsat_wu_lo(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsat.wu: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsat.wu(<8 x i32> %va, i32 -1) -+ ret <8 x i32> %res -+} -+ -+define <8 x i32> @lasx_xvsat_wu_hi(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsat.wu: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsat.wu(<8 x i32> %va, i32 32) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsat.du(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvsat_du_lo(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsat.du: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsat.du(<4 x i64> %va, i32 -1) -+ ret <4 x i64> %res -+} -+ -+define <4 x i64> @lasx_xvsat_du_hi(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsat.du: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsat.du(<4 x i64> %va, i32 64) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sat-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sat-non-imm.ll -new file mode 100644 -index 000000000000..b73b32ebd3b0 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sat-non-imm.ll -@@ -0,0 +1,73 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvsat.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvsat_b(<32 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsat.b(<32 x i8> %va, i32 %b) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsat.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvsat_h(<16 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsat.h(<16 x i16> %va, i32 %b) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsat.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvsat_w(<8 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsat.w(<8 x i32> %va, i32 %b) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsat.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvsat_d(<4 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsat.d(<4 x i64> %va, i32 %b) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvsat.bu(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvsat_bu(<32 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsat.bu(<32 x i8> %va, i32 %b) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsat.hu(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvsat_hu(<16 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsat.hu(<16 x i16> %va, i32 %b) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsat.wu(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvsat_wu(<8 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsat.wu(<8 x i32> %va, i32 %b) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsat.du(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvsat_du(<4 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsat.du(<4 x i64> %va, i32 %b) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-seq-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-seq-invalid-imm.ll -new file mode 100644 -index 000000000000..bb6ef0cc6574 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-seq-invalid-imm.ll -@@ -0,0 +1,65 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvseqi.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvseqi_b_lo(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvseqi.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvseqi.b(<32 x i8> %va, i32 -17) -+ ret <32 x i8> %res -+} -+ -+define <32 x i8> @lasx_xvseqi_b_hi(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvseqi.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvseqi.b(<32 x i8> %va, i32 16) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvseqi.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvseqi_h_lo(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvseqi.h: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvseqi.h(<16 x i16> %va, i32 -17) -+ ret <16 x i16> %res -+} -+ -+define <16 x i16> @lasx_xvseqi_h_hi(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvseqi.h: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvseqi.h(<16 x i16> %va, i32 16) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvseqi.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvseqi_w_lo(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvseqi.w: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvseqi.w(<8 x i32> %va, i32 -17) -+ ret <8 x i32> %res -+} -+ -+define <8 x i32> @lasx_xvseqi_w_hi(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvseqi.w: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvseqi.w(<8 x i32> %va, i32 16) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvseqi.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvseqi_d_lo(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvseqi.d: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvseqi.d(<4 x i64> %va, i32 -17) -+ ret <4 x i64> %res -+} -+ -+define <4 x i64> @lasx_xvseqi_d_hi(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvseqi.d: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvseqi.d(<4 x i64> %va, i32 16) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-seq-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-seq-non-imm.ll -new file mode 100644 -index 000000000000..fb2c6206da7b ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-seq-non-imm.ll -@@ -0,0 +1,37 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvseqi.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvseqi_b(<32 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvseqi.b(<32 x i8> %va, i32 %b) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvseqi.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvseqi_h(<16 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvseqi.h(<16 x i16> %va, i32 %b) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvseqi.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvseqi_w(<8 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvseqi.w(<8 x i32> %va, i32 %b) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvseqi.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvseqi_d(<4 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvseqi.d(<4 x i64> %va, i32 %b) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf4i-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf4i-invalid-imm.ll -new file mode 100644 -index 000000000000..9217d1f6a05d ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf4i-invalid-imm.ll -@@ -0,0 +1,65 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvshuf4i.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvshuf4i_b_lo(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvshuf4i.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvshuf4i.b(<32 x i8> %va, i32 -1) -+ ret <32 x i8> %res -+} -+ -+define <32 x i8> @lasx_xvshuf4i_b_hi(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvshuf4i.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvshuf4i.b(<32 x i8> %va, i32 256) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvshuf4i.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvshuf4i_h_lo(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvshuf4i.h: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvshuf4i.h(<16 x i16> %va, i32 -1) -+ ret <16 x i16> %res -+} -+ -+define <16 x i16> @lasx_xvshuf4i_h_hi(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvshuf4i.h: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvshuf4i.h(<16 x i16> %va, i32 256) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvshuf4i.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvshuf4i_w_lo(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvshuf4i.w: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvshuf4i.w(<8 x i32> %va, i32 -1) -+ ret <8 x i32> %res -+} -+ -+define <8 x i32> @lasx_xvshuf4i_w_hi(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvshuf4i.w: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvshuf4i.w(<8 x i32> %va, i32 256) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvshuf4i.d(<4 x i64>, <4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvshuf4i_d_lo(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvshuf4i.d: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvshuf4i.d(<4 x i64> %va, <4 x i64> %vb, i32 -1) -+ ret <4 x i64> %res -+} -+ -+define <4 x i64> @lasx_xvshuf4i_d_hi(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvshuf4i.d: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvshuf4i.d(<4 x i64> %va, <4 x i64> %vb, i32 256) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf4i-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf4i-non-imm.ll -new file mode 100644 -index 000000000000..8d6d1c694193 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf4i-non-imm.ll -@@ -0,0 +1,37 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvshuf4i.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvshuf4i_b(<32 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvshuf4i.b(<32 x i8> %va, i32 %b) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvshuf4i.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvshuf4i_h(<16 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvshuf4i.h(<16 x i16> %va, i32 %b) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvshuf4i.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvshuf4i_w(<8 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvshuf4i.w(<8 x i32> %va, i32 %b) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvshuf4i.d(<4 x i64>, <4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvshuf4i_d(<4 x i64> %va, <4 x i64> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvshuf4i.d(<4 x i64> %va, <4 x i64> %vb, i32 %c) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sle-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sle-invalid-imm.ll -new file mode 100644 -index 000000000000..5b10aca9801d ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sle-invalid-imm.ll -@@ -0,0 +1,129 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvslei.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvslei_b_lo(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvslei.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvslei.b(<32 x i8> %va, i32 -17) -+ ret <32 x i8> %res -+} -+ -+define <32 x i8> @lasx_xvslei_b_hi(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvslei.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvslei.b(<32 x i8> %va, i32 16) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvslei.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvslei_h_lo(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvslei.h: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvslei.h(<16 x i16> %va, i32 -17) -+ ret <16 x i16> %res -+} -+ -+define <16 x i16> @lasx_xvslei_h_hi(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvslei.h: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvslei.h(<16 x i16> %va, i32 16) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvslei.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvslei_w_lo(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvslei.w: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvslei.w(<8 x i32> %va, i32 -17) -+ ret <8 x i32> %res -+} -+ -+define <8 x i32> @lasx_xvslei_w_hi(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvslei.w: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvslei.w(<8 x i32> %va, i32 16) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvslei.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvslei_d_lo(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvslei.d: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvslei.d(<4 x i64> %va, i32 -17) -+ ret <4 x i64> %res -+} -+ -+define <4 x i64> @lasx_xvslei_d_hi(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvslei.d: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvslei.d(<4 x i64> %va, i32 16) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvslei.bu(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvslei_bu_lo(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvslei.bu: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvslei.bu(<32 x i8> %va, i32 -1) -+ ret <32 x i8> %res -+} -+ -+define <32 x i8> @lasx_xvslei_bu_hi(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvslei.bu: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvslei.bu(<32 x i8> %va, i32 32) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvslei.hu(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvslei_hu_lo(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvslei.hu: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvslei.hu(<16 x i16> %va, i32 -1) -+ ret <16 x i16> %res -+} -+ -+define <16 x i16> @lasx_xvslei_hu_hi(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvslei.hu: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvslei.hu(<16 x i16> %va, i32 32) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvslei.wu(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvslei_wu_lo(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvslei.wu: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvslei.wu(<8 x i32> %va, i32 -1) -+ ret <8 x i32> %res -+} -+ -+define <8 x i32> @lasx_xvslei_wu_hi(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvslei.wu: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvslei.wu(<8 x i32> %va, i32 32) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvslei.du(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvslei_du_lo(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvslei.du: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvslei.du(<4 x i64> %va, i32 -1) -+ ret <4 x i64> %res -+} -+ -+define <4 x i64> @lasx_xvslei_du_hi(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvslei.du: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvslei.du(<4 x i64> %va, i32 32) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sle-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sle-non-imm.ll -new file mode 100644 -index 000000000000..903bc10d88b7 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sle-non-imm.ll -@@ -0,0 +1,73 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvslei.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvslei_b(<32 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvslei.b(<32 x i8> %va, i32 %b) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvslei.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvslei_h(<16 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvslei.h(<16 x i16> %va, i32 %b) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvslei.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvslei_w(<8 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvslei.w(<8 x i32> %va, i32 %b) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvslei.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvslei_d(<4 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvslei.d(<4 x i64> %va, i32 %b) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvslei.bu(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvslei_bu(<32 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvslei.bu(<32 x i8> %va, i32 %b) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvslei.hu(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvslei_hu(<16 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvslei.hu(<16 x i16> %va, i32 %b) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvslei.wu(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvslei_wu(<8 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvslei.wu(<8 x i32> %va, i32 %b) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvslei.du(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvslei_du(<4 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvslei.du(<4 x i64> %va, i32 %b) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sll-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sll-invalid-imm.ll -new file mode 100644 -index 000000000000..bf8205376a6c ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sll-invalid-imm.ll -@@ -0,0 +1,65 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvslli.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvslli_b_lo(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvslli.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvslli.b(<32 x i8> %va, i32 -1) -+ ret <32 x i8> %res -+} -+ -+define <32 x i8> @lasx_xvslli_b_hi(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvslli.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvslli.b(<32 x i8> %va, i32 8) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvslli.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvslli_h_lo(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvslli.h: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvslli.h(<16 x i16> %va, i32 -1) -+ ret <16 x i16> %res -+} -+ -+define <16 x i16> @lasx_xvslli_h_hi(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvslli.h: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvslli.h(<16 x i16> %va, i32 16) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvslli.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvslli_w_lo(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvslli.w: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvslli.w(<8 x i32> %va, i32 -1) -+ ret <8 x i32> %res -+} -+ -+define <8 x i32> @lasx_xvslli_w_hi(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvslli.w: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvslli.w(<8 x i32> %va, i32 32) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvslli.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvslli_d_lo(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvslli.d: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvslli.d(<4 x i64> %va, i32 -1) -+ ret <4 x i64> %res -+} -+ -+define <4 x i64> @lasx_xvslli_d_hi(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvslli.d: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvslli.d(<4 x i64> %va, i32 64) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sll-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sll-non-imm.ll -new file mode 100644 -index 000000000000..b5368a86b5c3 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sll-non-imm.ll -@@ -0,0 +1,37 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvslli.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvslli_b(<32 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvslli.b(<32 x i8> %va, i32 %b) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvslli.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvslli_h(<16 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvslli.h(<16 x i16> %va, i32 %b) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvslli.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvslli_w(<8 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvslli.w(<8 x i32> %va, i32 %b) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvslli.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvslli_d(<4 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvslli.d(<4 x i64> %va, i32 %b) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sllwil-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sllwil-invalid-imm.ll -new file mode 100644 -index 000000000000..18803767d6c0 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sllwil-invalid-imm.ll -@@ -0,0 +1,97 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsllwil.h.b(<32 x i8>, i32) -+ -+define <16 x i16> @lasx_xvsllwil_h_b_lo(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsllwil.h.b: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsllwil.h.b(<32 x i8> %va, i32 -1) -+ ret <16 x i16> %res -+} -+ -+define <16 x i16> @lasx_xvsllwil_h_b_hi(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsllwil.h.b: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsllwil.h.b(<32 x i8> %va, i32 8) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsllwil.w.h(<16 x i16>, i32) -+ -+define <8 x i32> @lasx_xvsllwil_w_h_lo(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsllwil.w.h: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsllwil.w.h(<16 x i16> %va, i32 -1) -+ ret <8 x i32> %res -+} -+ -+define <8 x i32> @lasx_xvsllwil_w_h_hi(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsllwil.w.h: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsllwil.w.h(<16 x i16> %va, i32 16) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsllwil.d.w(<8 x i32>, i32) -+ -+define <4 x i64> @lasx_xvsllwil_d_w_lo(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsllwil.d.w: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsllwil.d.w(<8 x i32> %va, i32 -1) -+ ret <4 x i64> %res -+} -+ -+define <4 x i64> @lasx_xvsllwil_d_w_hi(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsllwil.d.w: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsllwil.d.w(<8 x i32> %va, i32 32) -+ ret <4 x i64> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsllwil.hu.bu(<32 x i8>, i32) -+ -+define <16 x i16> @lasx_xvsllwil_hu_bu_lo(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsllwil.hu.bu: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsllwil.hu.bu(<32 x i8> %va, i32 -1) -+ ret <16 x i16> %res -+} -+ -+define <16 x i16> @lasx_xvsllwil_hu_bu_hi(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsllwil.hu.bu: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsllwil.hu.bu(<32 x i8> %va, i32 8) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsllwil.wu.hu(<16 x i16>, i32) -+ -+define <8 x i32> @lasx_xvsllwil_wu_hu_lo(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsllwil.wu.hu: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsllwil.wu.hu(<16 x i16> %va, i32 -1) -+ ret <8 x i32> %res -+} -+ -+define <8 x i32> @lasx_xvsllwil_wu_hu_hi(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsllwil.wu.hu: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsllwil.wu.hu(<16 x i16> %va, i32 16) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsllwil.du.wu(<8 x i32>, i32) -+ -+define <4 x i64> @lasx_xvsllwil_du_wu_lo(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsllwil.du.wu: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsllwil.du.wu(<8 x i32> %va, i32 -1) -+ ret <4 x i64> %res -+} -+ -+define <4 x i64> @lasx_xvsllwil_du_wu_hi(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsllwil.du.wu: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsllwil.du.wu(<8 x i32> %va, i32 32) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sllwil-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sllwil-non-imm.ll -new file mode 100644 -index 000000000000..3f5d4d631671 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sllwil-non-imm.ll -@@ -0,0 +1,55 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsllwil.h.b(<32 x i8>, i32) -+ -+define <16 x i16> @lasx_xvsllwil_h_b(<32 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsllwil.h.b(<32 x i8> %va, i32 %b) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsllwil.w.h(<16 x i16>, i32) -+ -+define <8 x i32> @lasx_xvsllwil_w_h(<16 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsllwil.w.h(<16 x i16> %va, i32 %b) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsllwil.d.w(<8 x i32>, i32) -+ -+define <4 x i64> @lasx_xvsllwil_d_w(<8 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsllwil.d.w(<8 x i32> %va, i32 %b) -+ ret <4 x i64> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsllwil.hu.bu(<32 x i8>, i32) -+ -+define <16 x i16> @lasx_xvsllwil_hu_bu(<32 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsllwil.hu.bu(<32 x i8> %va, i32 %b) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsllwil.wu.hu(<16 x i16>, i32) -+ -+define <8 x i32> @lasx_xvsllwil_wu_hu(<16 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsllwil.wu.hu(<16 x i16> %va, i32 %b) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsllwil.du.wu(<8 x i32>, i32) -+ -+define <4 x i64> @lasx_xvsllwil_du_wu(<8 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsllwil.du.wu(<8 x i32> %va, i32 %b) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-slt-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-slt-invalid-imm.ll -new file mode 100644 -index 000000000000..dc0567da4e47 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-slt-invalid-imm.ll -@@ -0,0 +1,129 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvslti.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvslti_b_lo(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvslti.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvslti.b(<32 x i8> %va, i32 -17) -+ ret <32 x i8> %res -+} -+ -+define <32 x i8> @lasx_xvslti_b_hi(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvslti.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvslti.b(<32 x i8> %va, i32 16) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvslti.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvslti_h_lo(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvslti.h: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvslti.h(<16 x i16> %va, i32 -17) -+ ret <16 x i16> %res -+} -+ -+define <16 x i16> @lasx_xvslti_h_hi(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvslti.h: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvslti.h(<16 x i16> %va, i32 16) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvslti.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvslti_w_lo(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvslti.w: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvslti.w(<8 x i32> %va, i32 -17) -+ ret <8 x i32> %res -+} -+ -+define <8 x i32> @lasx_xvslti_w_hi(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvslti.w: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvslti.w(<8 x i32> %va, i32 16) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvslti.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvslti_d_lo(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvslti.d: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvslti.d(<4 x i64> %va, i32 -17) -+ ret <4 x i64> %res -+} -+ -+define <4 x i64> @lasx_xvslti_d_hi(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvslti.d: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvslti.d(<4 x i64> %va, i32 16) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvslti.bu(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvslti_bu_lo(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvslti.bu: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvslti.bu(<32 x i8> %va, i32 -1) -+ ret <32 x i8> %res -+} -+ -+define <32 x i8> @lasx_xvslti_bu_hi(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvslti.bu: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvslti.bu(<32 x i8> %va, i32 32) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvslti.hu(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvslti_hu_lo(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvslti.hu: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvslti.hu(<16 x i16> %va, i32 -1) -+ ret <16 x i16> %res -+} -+ -+define <16 x i16> @lasx_xvslti_hu_hi(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvslti.hu: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvslti.hu(<16 x i16> %va, i32 32) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvslti.wu(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvslti_wu_lo(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvslti.wu: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvslti.wu(<8 x i32> %va, i32 -1) -+ ret <8 x i32> %res -+} -+ -+define <8 x i32> @lasx_xvslti_wu_hi(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvslti.wu: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvslti.wu(<8 x i32> %va, i32 32) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvslti.du(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvslti_du_lo(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvslti.du: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvslti.du(<4 x i64> %va, i32 -1) -+ ret <4 x i64> %res -+} -+ -+define <4 x i64> @lasx_xvslti_du_hi(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvslti.du: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvslti.du(<4 x i64> %va, i32 32) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-slt-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-slt-non-imm.ll -new file mode 100644 -index 000000000000..a2cedc8d3ef3 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-slt-non-imm.ll -@@ -0,0 +1,73 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvslti.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvslti_b(<32 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvslti.b(<32 x i8> %va, i32 %b) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvslti.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvslti_h(<16 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvslti.h(<16 x i16> %va, i32 %b) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvslti.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvslti_w(<8 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvslti.w(<8 x i32> %va, i32 %b) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvslti.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvslti_d(<4 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvslti.d(<4 x i64> %va, i32 %b) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvslti.bu(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvslti_bu(<32 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvslti.bu(<32 x i8> %va, i32 %b) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvslti.hu(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvslti_hu(<16 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvslti.hu(<16 x i16> %va, i32 %b) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvslti.wu(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvslti_wu(<8 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvslti.wu(<8 x i32> %va, i32 %b) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvslti.du(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvslti_du(<4 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvslti.du(<4 x i64> %va, i32 %b) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sra-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sra-invalid-imm.ll -new file mode 100644 -index 000000000000..15b522d5e7e3 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sra-invalid-imm.ll -@@ -0,0 +1,65 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvsrai.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvsrai_b_lo(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrai.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrai.b(<32 x i8> %va, i32 -1) -+ ret <32 x i8> %res -+} -+ -+define <32 x i8> @lasx_xvsrai_b_hi(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrai.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrai.b(<32 x i8> %va, i32 8) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsrai.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvsrai_h_lo(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrai.h: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrai.h(<16 x i16> %va, i32 -1) -+ ret <16 x i16> %res -+} -+ -+define <16 x i16> @lasx_xvsrai_h_hi(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrai.h: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrai.h(<16 x i16> %va, i32 16) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsrai.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvsrai_w_lo(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrai.w: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrai.w(<8 x i32> %va, i32 -1) -+ ret <8 x i32> %res -+} -+ -+define <8 x i32> @lasx_xvsrai_w_hi(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrai.w: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrai.w(<8 x i32> %va, i32 32) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsrai.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvsrai_d_lo(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrai.d: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrai.d(<4 x i64> %va, i32 -1) -+ ret <4 x i64> %res -+} -+ -+define <4 x i64> @lasx_xvsrai_d_hi(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrai.d: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrai.d(<4 x i64> %va, i32 64) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sra-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sra-non-imm.ll -new file mode 100644 -index 000000000000..fefee7246ae6 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sra-non-imm.ll -@@ -0,0 +1,37 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvsrai.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvsrai_b(<32 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrai.b(<32 x i8> %va, i32 %b) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsrai.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvsrai_h(<16 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrai.h(<16 x i16> %va, i32 %b) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsrai.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvsrai_w(<8 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrai.w(<8 x i32> %va, i32 %b) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsrai.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvsrai_d(<4 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrai.d(<4 x i64> %va, i32 %b) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srani-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srani-invalid-imm.ll -new file mode 100644 -index 000000000000..bedbfc4889d2 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srani-invalid-imm.ll -@@ -0,0 +1,65 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvsrani.b.h(<32 x i8>, <32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvsrani_b_h_lo(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrani.b.h: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrani.b.h(<32 x i8> %va, <32 x i8> %vb, i32 -1) -+ ret <32 x i8> %res -+} -+ -+define <32 x i8> @lasx_xvsrani_b_h_hi(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrani.b.h: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrani.b.h(<32 x i8> %va, <32 x i8> %vb, i32 16) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsrani.h.w(<16 x i16>, <16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvsrani_h_w_lo(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrani.h.w: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrani.h.w(<16 x i16> %va, <16 x i16> %vb, i32 -1) -+ ret <16 x i16> %res -+} -+ -+define <16 x i16> @lasx_xvsrani_h_w_hi(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrani.h.w: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrani.h.w(<16 x i16> %va, <16 x i16> %vb, i32 32) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsrani.w.d(<8 x i32>, <8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvsrani_w_d_lo(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrani.w.d: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrani.w.d(<8 x i32> %va, <8 x i32> %vb, i32 -1) -+ ret <8 x i32> %res -+} -+ -+define <8 x i32> @lasx_xvsrani_w_d_hi(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrani.w.d: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrani.w.d(<8 x i32> %va, <8 x i32> %vb, i32 64) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsrani.d.q(<4 x i64>, <4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvsrani_d_q_lo(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrani.d.q: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrani.d.q(<4 x i64> %va, <4 x i64> %vb, i32 -1) -+ ret <4 x i64> %res -+} -+ -+define <4 x i64> @lasx_xvsrani_d_q_hi(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrani.d.q: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrani.d.q(<4 x i64> %va, <4 x i64> %vb, i32 128) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srani-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srani-non-imm.ll -new file mode 100644 -index 000000000000..3c17f2b6090a ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srani-non-imm.ll -@@ -0,0 +1,37 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvsrani.b.h(<32 x i8>, <32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvsrani_b_h(<32 x i8> %va, <32 x i8> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrani.b.h(<32 x i8> %va, <32 x i8> %vb, i32 %c) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsrani.h.w(<16 x i16>, <16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvsrani_h_w(<16 x i16> %va, <16 x i16> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrani.h.w(<16 x i16> %va, <16 x i16> %vb, i32 %c) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsrani.w.d(<8 x i32>, <8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvsrani_w_d(<8 x i32> %va, <8 x i32> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrani.w.d(<8 x i32> %va, <8 x i32> %vb, i32 %c) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsrani.d.q(<4 x i64>, <4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvsrani_d_q(<4 x i64> %va, <4 x i64> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrani.d.q(<4 x i64> %va, <4 x i64> %vb, i32 %c) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srar-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srar-invalid-imm.ll -new file mode 100644 -index 000000000000..e417e3cc5bbf ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srar-invalid-imm.ll -@@ -0,0 +1,65 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvsrari.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvsrari_b_lo(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrari.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrari.b(<32 x i8> %va, i32 -1) -+ ret <32 x i8> %res -+} -+ -+define <32 x i8> @lasx_xvsrari_b_hi(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrari.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrari.b(<32 x i8> %va, i32 8) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsrari.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvsrari_h_lo(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrari.h: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrari.h(<16 x i16> %va, i32 -1) -+ ret <16 x i16> %res -+} -+ -+define <16 x i16> @lasx_xvsrari_h_hi(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrari.h: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrari.h(<16 x i16> %va, i32 16) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsrari.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvsrari_w_lo(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrari.w: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrari.w(<8 x i32> %va, i32 -1) -+ ret <8 x i32> %res -+} -+ -+define <8 x i32> @lasx_xvsrari_w_hi(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrari.w: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrari.w(<8 x i32> %va, i32 32) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsrari.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvsrari_d_lo(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrari.d: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrari.d(<4 x i64> %va, i32 -1) -+ ret <4 x i64> %res -+} -+ -+define <4 x i64> @lasx_xvsrari_d_hi(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrari.d: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrari.d(<4 x i64> %va, i32 64) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srar-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srar-non-imm.ll -new file mode 100644 -index 000000000000..15fed7966f1c ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srar-non-imm.ll -@@ -0,0 +1,37 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvsrari.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvsrari_b(<32 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrari.b(<32 x i8> %va, i32 %b) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsrari.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvsrari_h(<16 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrari.h(<16 x i16> %va, i32 %b) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsrari.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvsrari_w(<8 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrari.w(<8 x i32> %va, i32 %b) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsrari.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvsrari_d(<4 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrari.d(<4 x i64> %va, i32 %b) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarni-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarni-invalid-imm.ll -new file mode 100644 -index 000000000000..83e977827e2d ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarni-invalid-imm.ll -@@ -0,0 +1,65 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvsrarni.b.h(<32 x i8>, <32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvsrarni_b_h_lo(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrarni.b.h: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrarni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 -1) -+ ret <32 x i8> %res -+} -+ -+define <32 x i8> @lasx_xvsrarni_b_h_hi(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrarni.b.h: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrarni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 16) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsrarni.h.w(<16 x i16>, <16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvsrarni_h_w_lo(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrarni.h.w: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrarni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 -1) -+ ret <16 x i16> %res -+} -+ -+define <16 x i16> @lasx_xvsrarni_h_w_hi(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrarni.h.w: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrarni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 32) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsrarni.w.d(<8 x i32>, <8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvsrarni_w_d_lo(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrarni.w.d: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrarni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 -1) -+ ret <8 x i32> %res -+} -+ -+define <8 x i32> @lasx_xvsrarni_w_d_hi(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrarni.w.d: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrarni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 64) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsrarni.d.q(<4 x i64>, <4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvsrarni_d_q_lo(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrarni.d.q: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrarni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 -1) -+ ret <4 x i64> %res -+} -+ -+define <4 x i64> @lasx_xvsrarni_d_q_hi(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrarni.d.q: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrarni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 128) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarni-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarni-non-imm.ll -new file mode 100644 -index 000000000000..eb577a29fb33 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarni-non-imm.ll -@@ -0,0 +1,37 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvsrarni.b.h(<32 x i8>, <32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvsrarni_b_h(<32 x i8> %va, <32 x i8> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrarni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 %c) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsrarni.h.w(<16 x i16>, <16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvsrarni_h_w(<16 x i16> %va, <16 x i16> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrarni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 %c) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsrarni.w.d(<8 x i32>, <8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvsrarni_w_d(<8 x i32> %va, <8 x i32> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrarni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 %c) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsrarni.d.q(<4 x i64>, <4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvsrarni_d_q(<4 x i64> %va, <4 x i64> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrarni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 %c) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srl-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srl-invalid-imm.ll -new file mode 100644 -index 000000000000..3ab02dcb97ed ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srl-invalid-imm.ll -@@ -0,0 +1,65 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvsrli.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvsrli_b_lo(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrli.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrli.b(<32 x i8> %va, i32 -1) -+ ret <32 x i8> %res -+} -+ -+define <32 x i8> @lasx_xvsrli_b_hi(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrli.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrli.b(<32 x i8> %va, i32 8) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsrli.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvsrli_h_lo(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrli.h: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrli.h(<16 x i16> %va, i32 -1) -+ ret <16 x i16> %res -+} -+ -+define <16 x i16> @lasx_xvsrli_h_hi(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrli.h: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrli.h(<16 x i16> %va, i32 16) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsrli.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvsrli_w_lo(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrli.w: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrli.w(<8 x i32> %va, i32 -1) -+ ret <8 x i32> %res -+} -+ -+define <8 x i32> @lasx_xvsrli_w_hi(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrli.w: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrli.w(<8 x i32> %va, i32 32) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsrli.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvsrli_d_lo(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrli.d: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrli.d(<4 x i64> %va, i32 -1) -+ ret <4 x i64> %res -+} -+ -+define <4 x i64> @lasx_xvsrli_d_hi(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrli.d: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrli.d(<4 x i64> %va, i32 64) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srl-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srl-non-imm.ll -new file mode 100644 -index 000000000000..bc085aeaa232 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srl-non-imm.ll -@@ -0,0 +1,37 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvsrli.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvsrli_b(<32 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrli.b(<32 x i8> %va, i32 %b) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsrli.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvsrli_h(<16 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrli.h(<16 x i16> %va, i32 %b) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsrli.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvsrli_w(<8 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrli.w(<8 x i32> %va, i32 %b) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsrli.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvsrli_d(<4 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrli.d(<4 x i64> %va, i32 %b) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlni-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlni-invalid-imm.ll -new file mode 100644 -index 000000000000..9e7c94305630 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlni-invalid-imm.ll -@@ -0,0 +1,65 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvsrlni.b.h(<32 x i8>, <32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvsrlni_b_h_lo(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrlni.b.h: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrlni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 -1) -+ ret <32 x i8> %res -+} -+ -+define <32 x i8> @lasx_xvsrlni_b_h_hi(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrlni.b.h: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrlni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 16) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsrlni.h.w(<16 x i16>, <16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvsrlni_h_w_lo(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrlni.h.w: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrlni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 -1) -+ ret <16 x i16> %res -+} -+ -+define <16 x i16> @lasx_xvsrlni_h_w_hi(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrlni.h.w: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrlni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 32) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsrlni.w.d(<8 x i32>, <8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvsrlni_w_d_lo(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrlni.w.d: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrlni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 -1) -+ ret <8 x i32> %res -+} -+ -+define <8 x i32> @lasx_xvsrlni_w_d_hi(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrlni.w.d: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrlni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 64) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsrlni.d.q(<4 x i64>, <4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvsrlni_d_q_lo(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrlni.d.q: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrlni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 -1) -+ ret <4 x i64> %res -+} -+ -+define <4 x i64> @lasx_xvsrlni_d_q_hi(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrlni.d.q: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrlni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 128) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlni-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlni-non-imm.ll -new file mode 100644 -index 000000000000..66d800470003 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlni-non-imm.ll -@@ -0,0 +1,37 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvsrlni.b.h(<32 x i8>, <32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvsrlni_b_h(<32 x i8> %va, <32 x i8> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrlni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 %c) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsrlni.h.w(<16 x i16>, <16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvsrlni_h_w(<16 x i16> %va, <16 x i16> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrlni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 %c) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsrlni.w.d(<8 x i32>, <8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvsrlni_w_d(<8 x i32> %va, <8 x i32> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrlni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 %c) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsrlni.d.q(<4 x i64>, <4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvsrlni_d_q(<4 x i64> %va, <4 x i64> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrlni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 %c) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlr-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlr-invalid-imm.ll -new file mode 100644 -index 000000000000..52621ddc6f49 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlr-invalid-imm.ll -@@ -0,0 +1,65 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvsrlri.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvsrlri_b_lo(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrlri.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrlri.b(<32 x i8> %va, i32 -1) -+ ret <32 x i8> %res -+} -+ -+define <32 x i8> @lasx_xvsrlri_b_hi(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrlri.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrlri.b(<32 x i8> %va, i32 8) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsrlri.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvsrlri_h_lo(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrlri.h: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrlri.h(<16 x i16> %va, i32 -1) -+ ret <16 x i16> %res -+} -+ -+define <16 x i16> @lasx_xvsrlri_h_hi(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrlri.h: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrlri.h(<16 x i16> %va, i32 16) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsrlri.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvsrlri_w_lo(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrlri.w: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrlri.w(<8 x i32> %va, i32 -1) -+ ret <8 x i32> %res -+} -+ -+define <8 x i32> @lasx_xvsrlri_w_hi(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrlri.w: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrlri.w(<8 x i32> %va, i32 32) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsrlri.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvsrlri_d_lo(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrlri.d: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrlri.d(<4 x i64> %va, i32 -1) -+ ret <4 x i64> %res -+} -+ -+define <4 x i64> @lasx_xvsrlri_d_hi(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrlri.d: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrlri.d(<4 x i64> %va, i32 64) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlr-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlr-non-imm.ll -new file mode 100644 -index 000000000000..5663e3475b12 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlr-non-imm.ll -@@ -0,0 +1,37 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvsrlri.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvsrlri_b(<32 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrlri.b(<32 x i8> %va, i32 %b) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsrlri.h(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvsrlri_h(<16 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrlri.h(<16 x i16> %va, i32 %b) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsrlri.w(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvsrlri_w(<8 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrlri.w(<8 x i32> %va, i32 %b) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsrlri.d(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvsrlri_d(<4 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrlri.d(<4 x i64> %va, i32 %b) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrni-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrni-invalid-imm.ll -new file mode 100644 -index 000000000000..2d65a75b175a ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrni-invalid-imm.ll -@@ -0,0 +1,65 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvsrlrni.b.h(<32 x i8>, <32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvsrlrni_b_h_lo(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrlrni.b.h: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrlrni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 -1) -+ ret <32 x i8> %res -+} -+ -+define <32 x i8> @lasx_xvsrlrni_b_h_hi(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrlrni.b.h: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrlrni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 16) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsrlrni.h.w(<16 x i16>, <16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvsrlrni_h_w_lo(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrlrni.h.w: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrlrni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 -1) -+ ret <16 x i16> %res -+} -+ -+define <16 x i16> @lasx_xvsrlrni_h_w_hi(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrlrni.h.w: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrlrni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 32) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsrlrni.w.d(<8 x i32>, <8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvsrlrni_w_d_lo(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrlrni.w.d: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrlrni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 -1) -+ ret <8 x i32> %res -+} -+ -+define <8 x i32> @lasx_xvsrlrni_w_d_hi(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrlrni.w.d: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrlrni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 64) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsrlrni.d.q(<4 x i64>, <4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvsrlrni_d_q_lo(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrlrni.d.q: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrlrni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 -1) -+ ret <4 x i64> %res -+} -+ -+define <4 x i64> @lasx_xvsrlrni_d_q_hi(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsrlrni.d.q: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrlrni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 128) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrni-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrni-non-imm.ll -new file mode 100644 -index 000000000000..82da0d21d013 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrni-non-imm.ll -@@ -0,0 +1,37 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvsrlrni.b.h(<32 x i8>, <32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvsrlrni_b_h(<32 x i8> %va, <32 x i8> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrlrni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 %c) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsrlrni.h.w(<16 x i16>, <16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvsrlrni_h_w(<16 x i16> %va, <16 x i16> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrlrni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 %c) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsrlrni.w.d(<8 x i32>, <8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvsrlrni_w_d(<8 x i32> %va, <8 x i32> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrlrni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 %c) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsrlrni.d.q(<4 x i64>, <4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvsrlrni_d_q(<4 x i64> %va, <4 x i64> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrlrni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 %c) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrani-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrani-invalid-imm.ll -new file mode 100644 -index 000000000000..e10d5d7bd488 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrani-invalid-imm.ll -@@ -0,0 +1,129 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvssrani.b.h(<32 x i8>, <32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvssrani_b_h_lo(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrani.b.h: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrani.b.h(<32 x i8> %va, <32 x i8> %vb, i32 -1) -+ ret <32 x i8> %res -+} -+ -+define <32 x i8> @lasx_xvssrani_b_h_hi(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrani.b.h: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrani.b.h(<32 x i8> %va, <32 x i8> %vb, i32 16) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvssrani.h.w(<16 x i16>, <16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvssrani_h_w_lo(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrani.h.w: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrani.h.w(<16 x i16> %va, <16 x i16> %vb, i32 -1) -+ ret <16 x i16> %res -+} -+ -+define <16 x i16> @lasx_xvssrani_h_w_hi(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrani.h.w: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrani.h.w(<16 x i16> %va, <16 x i16> %vb, i32 32) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvssrani.w.d(<8 x i32>, <8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvssrani_w_d_lo(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrani.w.d: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrani.w.d(<8 x i32> %va, <8 x i32> %vb, i32 -1) -+ ret <8 x i32> %res -+} -+ -+define <8 x i32> @lasx_xvssrani_w_d_hi(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrani.w.d: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrani.w.d(<8 x i32> %va, <8 x i32> %vb, i32 64) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvssrani.d.q(<4 x i64>, <4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvssrani_d_q_lo(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrani.d.q: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrani.d.q(<4 x i64> %va, <4 x i64> %vb, i32 -1) -+ ret <4 x i64> %res -+} -+ -+define <4 x i64> @lasx_xvssrani_d_q_hi(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrani.d.q: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrani.d.q(<4 x i64> %va, <4 x i64> %vb, i32 128) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvssrani.bu.h(<32 x i8>, <32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvssrani_bu_h_lo(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrani.bu.h: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrani.bu.h(<32 x i8> %va, <32 x i8> %vb, i32 -1) -+ ret <32 x i8> %res -+} -+ -+define <32 x i8> @lasx_xvssrani_bu_h_hi(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrani.bu.h: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrani.bu.h(<32 x i8> %va, <32 x i8> %vb, i32 16) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvssrani.hu.w(<16 x i16>, <16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvssrani_hu_w_lo(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrani.hu.w: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrani.hu.w(<16 x i16> %va, <16 x i16> %vb, i32 -1) -+ ret <16 x i16> %res -+} -+ -+define <16 x i16> @lasx_xvssrani_hu_w_hi(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrani.hu.w: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrani.hu.w(<16 x i16> %va, <16 x i16> %vb, i32 32) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvssrani.wu.d(<8 x i32>, <8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvssrani_wu_d_lo(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrani.wu.d: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrani.wu.d(<8 x i32> %va, <8 x i32> %vb, i32 -1) -+ ret <8 x i32> %res -+} -+ -+define <8 x i32> @lasx_xvssrani_wu_d_hi(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrani.wu.d: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrani.wu.d(<8 x i32> %va, <8 x i32> %vb, i32 64) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvssrani.du.q(<4 x i64>, <4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvssrani_du_q_lo(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrani.du.q: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrani.du.q(<4 x i64> %va, <4 x i64> %vb, i32 -1) -+ ret <4 x i64> %res -+} -+ -+define <4 x i64> @lasx_xvssrani_du_q_hi(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrani.du.q: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrani.du.q(<4 x i64> %va, <4 x i64> %vb, i32 128) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrani-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrani-non-imm.ll -new file mode 100644 -index 000000000000..a928cc2de8c8 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrani-non-imm.ll -@@ -0,0 +1,73 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvssrani.b.h(<32 x i8>, <32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvssrani_b_h(<32 x i8> %va, <32 x i8> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrani.b.h(<32 x i8> %va, <32 x i8> %vb, i32 %c) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvssrani.h.w(<16 x i16>, <16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvssrani_h_w(<16 x i16> %va, <16 x i16> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrani.h.w(<16 x i16> %va, <16 x i16> %vb, i32 %c) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvssrani.w.d(<8 x i32>, <8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvssrani_w_d(<8 x i32> %va, <8 x i32> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrani.w.d(<8 x i32> %va, <8 x i32> %vb, i32 %c) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvssrani.d.q(<4 x i64>, <4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvssrani_d_q(<4 x i64> %va, <4 x i64> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrani.d.q(<4 x i64> %va, <4 x i64> %vb, i32 %c) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvssrani.bu.h(<32 x i8>, <32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvssrani_bu_h(<32 x i8> %va, <32 x i8> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrani.bu.h(<32 x i8> %va, <32 x i8> %vb, i32 %c) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvssrani.hu.w(<16 x i16>, <16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvssrani_hu_w(<16 x i16> %va, <16 x i16> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrani.hu.w(<16 x i16> %va, <16 x i16> %vb, i32 %c) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvssrani.wu.d(<8 x i32>, <8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvssrani_wu_d(<8 x i32> %va, <8 x i32> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrani.wu.d(<8 x i32> %va, <8 x i32> %vb, i32 %c) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvssrani.du.q(<4 x i64>, <4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvssrani_du_q(<4 x i64> %va, <4 x i64> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrani.du.q(<4 x i64> %va, <4 x i64> %vb, i32 %c) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarni-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarni-invalid-imm.ll -new file mode 100644 -index 000000000000..42cd6ac99754 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarni-invalid-imm.ll -@@ -0,0 +1,129 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvssrarni.b.h(<32 x i8>, <32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvssrarni_b_h_lo(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrarni.b.h: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrarni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 -1) -+ ret <32 x i8> %res -+} -+ -+define <32 x i8> @lasx_xvssrarni_b_h_hi(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrarni.b.h: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrarni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 16) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvssrarni.h.w(<16 x i16>, <16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvssrarni_h_w_lo(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrarni.h.w: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrarni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 -1) -+ ret <16 x i16> %res -+} -+ -+define <16 x i16> @lasx_xvssrarni_h_w_hi(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrarni.h.w: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrarni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 32) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvssrarni.w.d(<8 x i32>, <8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvssrarni_w_d_lo(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrarni.w.d: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrarni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 -1) -+ ret <8 x i32> %res -+} -+ -+define <8 x i32> @lasx_xvssrarni_w_d_hi(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrarni.w.d: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrarni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 64) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvssrarni.d.q(<4 x i64>, <4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvssrarni_d_q_lo(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrarni.d.q: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrarni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 -1) -+ ret <4 x i64> %res -+} -+ -+define <4 x i64> @lasx_xvssrarni_d_q_hi(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrarni.d.q: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrarni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 128) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvssrarni.bu.h(<32 x i8>, <32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvssrarni_bu_h_lo(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrarni.bu.h: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrarni.bu.h(<32 x i8> %va, <32 x i8> %vb, i32 -1) -+ ret <32 x i8> %res -+} -+ -+define <32 x i8> @lasx_xvssrarni_bu_h_hi(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrarni.bu.h: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrarni.bu.h(<32 x i8> %va, <32 x i8> %vb, i32 16) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvssrarni.hu.w(<16 x i16>, <16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvssrarni_hu_w_lo(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrarni.hu.w: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrarni.hu.w(<16 x i16> %va, <16 x i16> %vb, i32 -1) -+ ret <16 x i16> %res -+} -+ -+define <16 x i16> @lasx_xvssrarni_hu_w_hi(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrarni.hu.w: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrarni.hu.w(<16 x i16> %va, <16 x i16> %vb, i32 32) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvssrarni.wu.d(<8 x i32>, <8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvssrarni_wu_d_lo(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrarni.wu.d: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrarni.wu.d(<8 x i32> %va, <8 x i32> %vb, i32 -1) -+ ret <8 x i32> %res -+} -+ -+define <8 x i32> @lasx_xvssrarni_wu_d_hi(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrarni.wu.d: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrarni.wu.d(<8 x i32> %va, <8 x i32> %vb, i32 64) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvssrarni.du.q(<4 x i64>, <4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvssrarni_du_q_lo(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrarni.du.q: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrarni.du.q(<4 x i64> %va, <4 x i64> %vb, i32 -1) -+ ret <4 x i64> %res -+} -+ -+define <4 x i64> @lasx_xvssrarni_du_q_hi(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrarni.du.q: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrarni.du.q(<4 x i64> %va, <4 x i64> %vb, i32 128) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarni-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarni-non-imm.ll -new file mode 100644 -index 000000000000..f050e7d79b0f ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarni-non-imm.ll -@@ -0,0 +1,73 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvssrarni.b.h(<32 x i8>, <32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvssrarni_b_h(<32 x i8> %va, <32 x i8> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrarni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 %c) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvssrarni.h.w(<16 x i16>, <16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvssrarni_h_w(<16 x i16> %va, <16 x i16> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrarni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 %c) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvssrarni.w.d(<8 x i32>, <8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvssrarni_w_d(<8 x i32> %va, <8 x i32> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrarni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 %c) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvssrarni.d.q(<4 x i64>, <4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvssrarni_d_q(<4 x i64> %va, <4 x i64> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrarni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 %c) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvssrarni.bu.h(<32 x i8>, <32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvssrarni_bu_h(<32 x i8> %va, <32 x i8> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrarni.bu.h(<32 x i8> %va, <32 x i8> %vb, i32 %c) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvssrarni.hu.w(<16 x i16>, <16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvssrarni_hu_w(<16 x i16> %va, <16 x i16> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrarni.hu.w(<16 x i16> %va, <16 x i16> %vb, i32 %c) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvssrarni.wu.d(<8 x i32>, <8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvssrarni_wu_d(<8 x i32> %va, <8 x i32> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrarni.wu.d(<8 x i32> %va, <8 x i32> %vb, i32 %c) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvssrarni.du.q(<4 x i64>, <4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvssrarni_du_q(<4 x i64> %va, <4 x i64> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrarni.du.q(<4 x i64> %va, <4 x i64> %vb, i32 %c) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlni-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlni-invalid-imm.ll -new file mode 100644 -index 000000000000..26be21a83aa4 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlni-invalid-imm.ll -@@ -0,0 +1,129 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvssrlni.b.h(<32 x i8>, <32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvssrlni_b_h_lo(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrlni.b.h: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 -1) -+ ret <32 x i8> %res -+} -+ -+define <32 x i8> @lasx_xvssrlni_b_h_hi(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrlni.b.h: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 16) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvssrlni.h.w(<16 x i16>, <16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvssrlni_h_w_lo(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrlni.h.w: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 -1) -+ ret <16 x i16> %res -+} -+ -+define <16 x i16> @lasx_xvssrlni_h_w_hi(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrlni.h.w: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 32) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvssrlni.w.d(<8 x i32>, <8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvssrlni_w_d_lo(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrlni.w.d: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 -1) -+ ret <8 x i32> %res -+} -+ -+define <8 x i32> @lasx_xvssrlni_w_d_hi(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrlni.w.d: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 64) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvssrlni.d.q(<4 x i64>, <4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvssrlni_d_q_lo(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrlni.d.q: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrlni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 -1) -+ ret <4 x i64> %res -+} -+ -+define <4 x i64> @lasx_xvssrlni_d_q_hi(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrlni.d.q: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrlni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 128) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvssrlni.bu.h(<32 x i8>, <32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvssrlni_bu_h_lo(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrlni.bu.h: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlni.bu.h(<32 x i8> %va, <32 x i8> %vb, i32 -1) -+ ret <32 x i8> %res -+} -+ -+define <32 x i8> @lasx_xvssrlni_bu_h_hi(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrlni.bu.h: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlni.bu.h(<32 x i8> %va, <32 x i8> %vb, i32 16) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvssrlni.hu.w(<16 x i16>, <16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvssrlni_hu_w_lo(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrlni.hu.w: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlni.hu.w(<16 x i16> %va, <16 x i16> %vb, i32 -1) -+ ret <16 x i16> %res -+} -+ -+define <16 x i16> @lasx_xvssrlni_hu_w_hi(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrlni.hu.w: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlni.hu.w(<16 x i16> %va, <16 x i16> %vb, i32 32) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvssrlni.wu.d(<8 x i32>, <8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvssrlni_wu_d_lo(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrlni.wu.d: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlni.wu.d(<8 x i32> %va, <8 x i32> %vb, i32 -1) -+ ret <8 x i32> %res -+} -+ -+define <8 x i32> @lasx_xvssrlni_wu_d_hi(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrlni.wu.d: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlni.wu.d(<8 x i32> %va, <8 x i32> %vb, i32 64) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvssrlni.du.q(<4 x i64>, <4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvssrlni_du_q_lo(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrlni.du.q: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrlni.du.q(<4 x i64> %va, <4 x i64> %vb, i32 -1) -+ ret <4 x i64> %res -+} -+ -+define <4 x i64> @lasx_xvssrlni_du_q_hi(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrlni.du.q: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrlni.du.q(<4 x i64> %va, <4 x i64> %vb, i32 128) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlni-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlni-non-imm.ll -new file mode 100644 -index 000000000000..72da2a746dd5 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlni-non-imm.ll -@@ -0,0 +1,73 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvssrlni.b.h(<32 x i8>, <32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvssrlni_b_h(<32 x i8> %va, <32 x i8> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 %c) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvssrlni.h.w(<16 x i16>, <16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvssrlni_h_w(<16 x i16> %va, <16 x i16> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 %c) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvssrlni.w.d(<8 x i32>, <8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvssrlni_w_d(<8 x i32> %va, <8 x i32> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 %c) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvssrlni.d.q(<4 x i64>, <4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvssrlni_d_q(<4 x i64> %va, <4 x i64> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrlni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 %c) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvssrlni.bu.h(<32 x i8>, <32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvssrlni_bu_h(<32 x i8> %va, <32 x i8> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlni.bu.h(<32 x i8> %va, <32 x i8> %vb, i32 %c) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvssrlni.hu.w(<16 x i16>, <16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvssrlni_hu_w(<16 x i16> %va, <16 x i16> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlni.hu.w(<16 x i16> %va, <16 x i16> %vb, i32 %c) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvssrlni.wu.d(<8 x i32>, <8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvssrlni_wu_d(<8 x i32> %va, <8 x i32> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlni.wu.d(<8 x i32> %va, <8 x i32> %vb, i32 %c) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvssrlni.du.q(<4 x i64>, <4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvssrlni_du_q(<4 x i64> %va, <4 x i64> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrlni.du.q(<4 x i64> %va, <4 x i64> %vb, i32 %c) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrni-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrni-invalid-imm.ll -new file mode 100644 -index 000000000000..cd778e2c0627 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrni-invalid-imm.ll -@@ -0,0 +1,129 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvssrlrni.b.h(<32 x i8>, <32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvssrlrni_b_h_lo(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrlrni.b.h: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 -1) -+ ret <32 x i8> %res -+} -+ -+define <32 x i8> @lasx_xvssrlrni_b_h_hi(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrlrni.b.h: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 16) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvssrlrni.h.w(<16 x i16>, <16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvssrlrni_h_w_lo(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrlrni.h.w: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 -1) -+ ret <16 x i16> %res -+} -+ -+define <16 x i16> @lasx_xvssrlrni_h_w_hi(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrlrni.h.w: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 32) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvssrlrni.w.d(<8 x i32>, <8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvssrlrni_w_d_lo(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrlrni.w.d: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 -1) -+ ret <8 x i32> %res -+} -+ -+define <8 x i32> @lasx_xvssrlrni_w_d_hi(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrlrni.w.d: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 64) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvssrlrni.d.q(<4 x i64>, <4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvssrlrni_d_q_lo(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrlrni.d.q: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 -1) -+ ret <4 x i64> %res -+} -+ -+define <4 x i64> @lasx_xvssrlrni_d_q_hi(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrlrni.d.q: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 128) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvssrlrni.bu.h(<32 x i8>, <32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvssrlrni_bu_h_lo(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrlrni.bu.h: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.bu.h(<32 x i8> %va, <32 x i8> %vb, i32 -1) -+ ret <32 x i8> %res -+} -+ -+define <32 x i8> @lasx_xvssrlrni_bu_h_hi(<32 x i8> %va, <32 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrlrni.bu.h: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.bu.h(<32 x i8> %va, <32 x i8> %vb, i32 16) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvssrlrni.hu.w(<16 x i16>, <16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvssrlrni_hu_w_lo(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrlrni.hu.w: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.hu.w(<16 x i16> %va, <16 x i16> %vb, i32 -1) -+ ret <16 x i16> %res -+} -+ -+define <16 x i16> @lasx_xvssrlrni_hu_w_hi(<16 x i16> %va, <16 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrlrni.hu.w: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.hu.w(<16 x i16> %va, <16 x i16> %vb, i32 32) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvssrlrni.wu.d(<8 x i32>, <8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvssrlrni_wu_d_lo(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrlrni.wu.d: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.wu.d(<8 x i32> %va, <8 x i32> %vb, i32 -1) -+ ret <8 x i32> %res -+} -+ -+define <8 x i32> @lasx_xvssrlrni_wu_d_hi(<8 x i32> %va, <8 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrlrni.wu.d: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.wu.d(<8 x i32> %va, <8 x i32> %vb, i32 64) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvssrlrni.du.q(<4 x i64>, <4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvssrlrni_du_q_lo(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrlrni.du.q: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.du.q(<4 x i64> %va, <4 x i64> %vb, i32 -1) -+ ret <4 x i64> %res -+} -+ -+define <4 x i64> @lasx_xvssrlrni_du_q_hi(<4 x i64> %va, <4 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lasx.xvssrlrni.du.q: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.du.q(<4 x i64> %va, <4 x i64> %vb, i32 128) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrni-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrni-non-imm.ll -new file mode 100644 -index 000000000000..a10c54329149 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrni-non-imm.ll -@@ -0,0 +1,73 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvssrlrni.b.h(<32 x i8>, <32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvssrlrni_b_h(<32 x i8> %va, <32 x i8> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 %c) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvssrlrni.h.w(<16 x i16>, <16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvssrlrni_h_w(<16 x i16> %va, <16 x i16> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 %c) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvssrlrni.w.d(<8 x i32>, <8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvssrlrni_w_d(<8 x i32> %va, <8 x i32> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 %c) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvssrlrni.d.q(<4 x i64>, <4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvssrlrni_d_q(<4 x i64> %va, <4 x i64> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 %c) -+ ret <4 x i64> %res -+} -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvssrlrni.bu.h(<32 x i8>, <32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvssrlrni_bu_h(<32 x i8> %va, <32 x i8> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.bu.h(<32 x i8> %va, <32 x i8> %vb, i32 %c) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvssrlrni.hu.w(<16 x i16>, <16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvssrlrni_hu_w(<16 x i16> %va, <16 x i16> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.hu.w(<16 x i16> %va, <16 x i16> %vb, i32 %c) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvssrlrni.wu.d(<8 x i32>, <8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvssrlrni_wu_d(<8 x i32> %va, <8 x i32> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.wu.d(<8 x i32> %va, <8 x i32> %vb, i32 %c) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvssrlrni.du.q(<4 x i64>, <4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvssrlrni_du_q(<4 x i64> %va, <4 x i64> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.du.q(<4 x i64> %va, <4 x i64> %vb, i32 %c) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-st-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-st-invalid-imm.ll -new file mode 100644 -index 000000000000..0177f2b77b93 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-st-invalid-imm.ll -@@ -0,0 +1,17 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare void @llvm.loongarch.lasx.xvst(<32 x i8>, i8*, i32) -+ -+define void @lasx_xvst_lo(<32 x i8> %va, i8* %p) nounwind { -+; CHECK: llvm.loongarch.lasx.xvst: argument out of range -+entry: -+ call void @llvm.loongarch.lasx.xvst(<32 x i8> %va, i8* %p, i32 -2049) -+ ret void -+} -+ -+define void @lasx_xvst_hi(<32 x i8> %va, i8* %p) nounwind { -+; CHECK: llvm.loongarch.lasx.xvst: argument out of range -+entry: -+ call void @llvm.loongarch.lasx.xvst(<32 x i8> %va, i8* %p, i32 2048) -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-st-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-st-non-imm.ll -new file mode 100644 -index 000000000000..c19207aad6b8 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-st-non-imm.ll -@@ -0,0 +1,10 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare void @llvm.loongarch.lasx.xvst(<32 x i8>, i8*, i32) -+ -+define void @lasx_xvst(<32 x i8> %va, i8* %p, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ call void @llvm.loongarch.lasx.xvst(<32 x i8> %va, i8* %p, i32 %b) -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-stelm-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-stelm-invalid-imm.ll -new file mode 100644 -index 000000000000..0ea2484e090d ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-stelm-invalid-imm.ll -@@ -0,0 +1,121 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare void @llvm.loongarch.lasx.xvstelm.b(<32 x i8>, i8*, i32, i32) -+ -+define void @lasx_xvstelm_b_lo(<32 x i8> %va, i8* %p) nounwind { -+; CHECK: llvm.loongarch.lasx.xvstelm.b: argument out of range -+entry: -+ call void @llvm.loongarch.lasx.xvstelm.b(<32 x i8> %va, i8* %p, i32 -129, i32 1) -+ ret void -+} -+ -+define void @lasx_xvstelm_b_hi(<32 x i8> %va, i8* %p) nounwind { -+; CHECK: llvm.loongarch.lasx.xvstelm.b: argument out of range -+entry: -+ call void @llvm.loongarch.lasx.xvstelm.b(<32 x i8> %va, i8* %p, i32 128, i32 1) -+ ret void -+} -+ -+define void @lasx_xvstelm_b_idx_lo(<32 x i8> %va, i8* %p) nounwind { -+; CHECK: llvm.loongarch.lasx.xvstelm.b: argument out of range -+entry: -+ call void @llvm.loongarch.lasx.xvstelm.b(<32 x i8> %va, i8* %p, i32 1, i32 -1) -+ ret void -+} -+ -+define void @lasx_xvstelm_b_idx_hi(<32 x i8> %va, i8* %p) nounwind { -+; CHECK: llvm.loongarch.lasx.xvstelm.b: argument out of range -+entry: -+ call void @llvm.loongarch.lasx.xvstelm.b(<32 x i8> %va, i8* %p, i32 1, i32 32) -+ ret void -+} -+ -+declare void @llvm.loongarch.lasx.xvstelm.h(<16 x i16>, i8*, i32, i32) -+ -+define void @lasx_xvstelm_h_lo(<16 x i16> %va, i8* %p) nounwind { -+; CHECK: llvm.loongarch.lasx.xvstelm.h: argument out of range or not a multiple of 2. -+entry: -+ call void @llvm.loongarch.lasx.xvstelm.h(<16 x i16> %va, i8* %p, i32 -258, i32 1) -+ ret void -+} -+ -+define void @lasx_xvstelm_h_hi(<16 x i16> %va, i8* %p) nounwind { -+; CHECK: llvm.loongarch.lasx.xvstelm.h: argument out of range or not a multiple of 2. -+entry: -+ call void @llvm.loongarch.lasx.xvstelm.h(<16 x i16> %va, i8* %p, i32 256, i32 1) -+ ret void -+} -+ -+define void @lasx_xvstelm_h_idx_lo(<16 x i16> %va, i8* %p) nounwind { -+; CHECK: llvm.loongarch.lasx.xvstelm.h: argument out of range or not a multiple of 2. -+entry: -+ call void @llvm.loongarch.lasx.xvstelm.h(<16 x i16> %va, i8* %p, i32 2, i32 -1) -+ ret void -+} -+ -+define void @lasx_xvstelm_h_idx_hi(<16 x i16> %va, i8* %p) nounwind { -+; CHECK: llvm.loongarch.lasx.xvstelm.h: argument out of range or not a multiple of 2. -+entry: -+ call void @llvm.loongarch.lasx.xvstelm.h(<16 x i16> %va, i8* %p, i32 2, i32 16) -+ ret void -+} -+ -+declare void @llvm.loongarch.lasx.xvstelm.w(<8 x i32>, i8*, i32, i32) -+ -+define void @lasx_xvstelm_w_lo(<8 x i32> %va, i8* %p) nounwind { -+; CHECK: llvm.loongarch.lasx.xvstelm.w: argument out of range or not a multiple of 4. -+entry: -+ call void @llvm.loongarch.lasx.xvstelm.w(<8 x i32> %va, i8* %p, i32 -516, i32 1) -+ ret void -+} -+ -+define void @lasx_xvstelm_w_hi(<8 x i32> %va, i8* %p) nounwind { -+; CHECK: llvm.loongarch.lasx.xvstelm.w: argument out of range or not a multiple of 4. -+entry: -+ call void @llvm.loongarch.lasx.xvstelm.w(<8 x i32> %va, i8* %p, i32 512, i32 1) -+ ret void -+} -+ -+define void @lasx_xvstelm_w_idx_lo(<8 x i32> %va, i8* %p) nounwind { -+; CHECK: llvm.loongarch.lasx.xvstelm.w: argument out of range or not a multiple of 4. -+entry: -+ call void @llvm.loongarch.lasx.xvstelm.w(<8 x i32> %va, i8* %p, i32 4, i32 -1) -+ ret void -+} -+ -+define void @lasx_xvstelm_w_idx_hi(<8 x i32> %va, i8* %p) nounwind { -+; CHECK: llvm.loongarch.lasx.xvstelm.w: argument out of range or not a multiple of 4. -+entry: -+ call void @llvm.loongarch.lasx.xvstelm.w(<8 x i32> %va, i8* %p, i32 4, i32 8) -+ ret void -+} -+ -+declare void @llvm.loongarch.lasx.xvstelm.d(<4 x i64>, i8*, i32, i32) -+ -+define void @lasx_xvstelm_d_lo(<4 x i64> %va, i8* %p) nounwind { -+; CHECK: llvm.loongarch.lasx.xvstelm.d: argument out of range or not a multiple of 8. -+entry: -+ call void @llvm.loongarch.lasx.xvstelm.d(<4 x i64> %va, i8* %p, i32 -1032, i32 1) -+ ret void -+} -+ -+define void @lasx_xvstelm_d_hi(<4 x i64> %va, i8* %p) nounwind { -+; CHECK: llvm.loongarch.lasx.xvstelm.d: argument out of range or not a multiple of 8. -+entry: -+ call void @llvm.loongarch.lasx.xvstelm.d(<4 x i64> %va, i8* %p, i32 1024, i32 1) -+ ret void -+} -+ -+define void @lasx_xvstelm_d_idx_lo(<4 x i64> %va, i8* %p) nounwind { -+; CHECK: llvm.loongarch.lasx.xvstelm.d: argument out of range or not a multiple of 8. -+entry: -+ call void @llvm.loongarch.lasx.xvstelm.d(<4 x i64> %va, i8* %p, i32 8, i32 -1) -+ ret void -+} -+ -+define void @lasx_xvstelm_d_idx_hi(<4 x i64> %va, i8* %p) nounwind { -+; CHECK: llvm.loongarch.lasx.xvstelm.d: argument out of range or not a multiple of 8. -+entry: -+ call void @llvm.loongarch.lasx.xvstelm.d(<4 x i64> %va, i8* %p, i32 8, i32 4) -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-stelm-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-stelm-non-imm.ll -new file mode 100644 -index 000000000000..42c7c0da1746 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-stelm-non-imm.ll -@@ -0,0 +1,65 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare void @llvm.loongarch.lasx.xvstelm.b(<32 x i8>, i8*, i32, i32) -+ -+define void @lasx_xvstelm_b(<32 x i8> %va, i8* %p, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ call void @llvm.loongarch.lasx.xvstelm.b(<32 x i8> %va, i8* %p, i32 %b, i32 1) -+ ret void -+} -+ -+define void @lasx_xvstelm_b_idx(<32 x i8> %va, i8* %p, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ call void @llvm.loongarch.lasx.xvstelm.b(<32 x i8> %va, i8* %p, i32 1, i32 %b) -+ ret void -+} -+ -+declare void @llvm.loongarch.lasx.xvstelm.h(<16 x i16>, i8*, i32, i32) -+ -+define void @lasx_xvstelm_h(<16 x i16> %va, i8* %p, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ call void @llvm.loongarch.lasx.xvstelm.h(<16 x i16> %va, i8* %p, i32 %b, i32 1) -+ ret void -+} -+ -+define void @lasx_xvstelm_h_idx(<16 x i16> %va, i8* %p, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ call void @llvm.loongarch.lasx.xvstelm.h(<16 x i16> %va, i8* %p, i32 2, i32 %b) -+ ret void -+} -+ -+declare void @llvm.loongarch.lasx.xvstelm.w(<8 x i32>, i8*, i32, i32) -+ -+define void @lasx_xvstelm_w(<8 x i32> %va, i8* %p, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ call void @llvm.loongarch.lasx.xvstelm.w(<8 x i32> %va, i8* %p, i32 %b, i32 1) -+ ret void -+} -+ -+define void @lasx_xvstelm_w_idx(<8 x i32> %va, i8* %p, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ call void @llvm.loongarch.lasx.xvstelm.w(<8 x i32> %va, i8* %p, i32 4, i32 %b) -+ ret void -+} -+ -+declare void @llvm.loongarch.lasx.xvstelm.d(<4 x i64>, i8*, i32, i32) -+ -+define void @lasx_xvstelm_d(<4 x i64> %va, i8* %p, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ call void @llvm.loongarch.lasx.xvstelm.d(<4 x i64> %va, i8* %p, i32 %b, i32 1) -+ ret void -+} -+ -+define void @lasx_xvstelm_d_idx(<4 x i64> %va, i8* %p, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ call void @llvm.loongarch.lasx.xvstelm.d(<4 x i64> %va, i8* %p, i32 8, i32 %b) -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-subi-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-subi-invalid-imm.ll -new file mode 100644 -index 000000000000..810008c17f7e ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-subi-invalid-imm.ll -@@ -0,0 +1,65 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvsubi.bu(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvsubi_bu_lo(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsubi.bu: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsubi.bu(<32 x i8> %va, i32 -1) -+ ret <32 x i8> %res -+} -+ -+define <32 x i8> @lasx_xvsubi_bu_hi(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsubi.bu: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsubi.bu(<32 x i8> %va, i32 32) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsubi.hu(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvsubi_hu_lo(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsubi.hu: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsubi.hu(<16 x i16> %va, i32 -1) -+ ret <16 x i16> %res -+} -+ -+define <16 x i16> @lasx_xvsubi_hu_hi(<16 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsubi.hu: argument out of range -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsubi.hu(<16 x i16> %va, i32 32) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsubi.wu(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvsubi_wu_lo(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsubi.wu: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsubi.wu(<8 x i32> %va, i32 -1) -+ ret <8 x i32> %res -+} -+ -+define <8 x i32> @lasx_xvsubi_wu_hi(<8 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsubi.wu: argument out of range -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsubi.wu(<8 x i32> %va, i32 32) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsubi.du(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvsubi_du_lo(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsubi.du: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsubi.du(<4 x i64> %va, i32 -1) -+ ret <4 x i64> %res -+} -+ -+define <4 x i64> @lasx_xvsubi_du_hi(<4 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvsubi.du: argument out of range -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsubi.du(<4 x i64> %va, i32 32) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-subi-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-subi-non-imm.ll -new file mode 100644 -index 000000000000..924b89ce9d6c ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-subi-non-imm.ll -@@ -0,0 +1,37 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvsubi.bu(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvsubi_bu(<32 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvsubi.bu(<32 x i8> %va, i32 %b) -+ ret <32 x i8> %res -+} -+ -+declare <16 x i16> @llvm.loongarch.lasx.xvsubi.hu(<16 x i16>, i32) -+ -+define <16 x i16> @lasx_xvsubi_hu(<16 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i16> @llvm.loongarch.lasx.xvsubi.hu(<16 x i16> %va, i32 %b) -+ ret <16 x i16> %res -+} -+ -+declare <8 x i32> @llvm.loongarch.lasx.xvsubi.wu(<8 x i32>, i32) -+ -+define <8 x i32> @lasx_xvsubi_wu(<8 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i32> @llvm.loongarch.lasx.xvsubi.wu(<8 x i32> %va, i32 %b) -+ ret <8 x i32> %res -+} -+ -+declare <4 x i64> @llvm.loongarch.lasx.xvsubi.du(<4 x i64>, i32) -+ -+define <4 x i64> @lasx_xvsubi_du(<4 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i64> @llvm.loongarch.lasx.xvsubi.du(<4 x i64> %va, i32 %b) -+ ret <4 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-xori-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-xori-invalid-imm.ll -new file mode 100644 -index 000000000000..0170d204cf42 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-xori-invalid-imm.ll -@@ -0,0 +1,17 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvxori.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvxori_b_lo(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvxori.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvxori.b(<32 x i8> %va, i32 -1) -+ ret <32 x i8> %res -+} -+ -+define <32 x i8> @lasx_xvxori_b_hi(<32 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lasx.xvxori.b: argument out of range -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvxori.b(<32 x i8> %va, i32 256) -+ ret <32 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-xori-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-xori-non-imm.ll -new file mode 100644 -index 000000000000..1478f691a1cc ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-xori-non-imm.ll -@@ -0,0 +1,10 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s -+ -+declare <32 x i8> @llvm.loongarch.lasx.xvxori.b(<32 x i8>, i32) -+ -+define <32 x i8> @lasx_xvxori_b(<32 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <32 x i8> @llvm.loongarch.lasx.xvxori.b(<32 x i8> %va, i32 %b) -+ ret <32 x i8> %res -+} --- -2.20.1 - - -From 7f172768f1132b99d4bacf4daf119a9154428b52 Mon Sep 17 00:00:00 2001 -From: chenli -Date: Sat, 19 Aug 2023 17:15:19 +0800 -Subject: [PATCH 07/35] [LoongArch][MC] Add invalid immediate testcases for LSX - instructions - -Reviewed By: SixWeining - -Differential Revision: https://reviews.llvm.org/D157573 - -(cherry picked from commit 2f4b6695836e16ec075061cd2508444bd403ad7d) ---- - llvm/test/MC/LoongArch/lsx/invalid-imm.s | 1149 +++++++++++++++++++++- - 1 file changed, 1143 insertions(+), 6 deletions(-) - -diff --git a/llvm/test/MC/LoongArch/lsx/invalid-imm.s b/llvm/test/MC/LoongArch/lsx/invalid-imm.s -index fb7e24c83488..c3f9aaa08281 100644 ---- a/llvm/test/MC/LoongArch/lsx/invalid-imm.s -+++ b/llvm/test/MC/LoongArch/lsx/invalid-imm.s -@@ -3,53 +3,1190 @@ - # RUN: not llvm-mc --triple=loongarch64 %s 2>&1 | FileCheck %s - - ## uimm1 -+vstelm.d $vr0, $a0, 8, -1 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 1] -+ -+vstelm.d $vr0, $a0, 8, 2 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 1] -+ -+vreplvei.d $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 1] -+ - vreplvei.d $vr0, $vr1, 2 - # CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 1] - -+vpickve2gr.du $a0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 1] -+ -+vpickve2gr.du $a0, $vr1, 2 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 1] -+ -+vpickve2gr.d $a0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 1] -+ -+vpickve2gr.d $a0, $vr1, 2 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 1] -+ -+vinsgr2vr.d $vr0, $a0, -1 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 1] -+ -+vinsgr2vr.d $vr0, $a0, 2 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 1] -+ -+## uimm2 -+vstelm.w $vr0, $a0, 4, -1 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 3] -+ -+vstelm.w $vr0, $a0, 4, 4 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 3] -+ -+vreplvei.w $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 3] -+ -+vreplvei.w $vr0, $vr1, 4 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 3] -+ -+vpickve2gr.wu $a0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 3] -+ -+vpickve2gr.wu $a0, $vr1, 4 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 3] -+ -+vpickve2gr.w $a0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 3] -+ -+vpickve2gr.w $a0, $vr1, 4 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 3] -+ -+vinsgr2vr.w $vr0, $a0, -1 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 3] -+ -+vinsgr2vr.w $vr0, $a0, 4 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 3] -+ -+## uimm3 -+vstelm.h $vr0, $a0, 2, -1 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 7] -+ -+vstelm.h $vr0, $a0, 2, 8 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 7] -+ -+vreplvei.h $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 7] -+ -+vreplvei.h $vr0, $vr1, 8 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 7] -+ -+vpickve2gr.hu $a0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 7] -+ -+vpickve2gr.hu $a0, $vr1, 8 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 7] -+ -+vpickve2gr.h $a0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 7] -+ -+vpickve2gr.h $a0, $vr1, 8 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 7] -+ -+vinsgr2vr.h $vr0, $a0, -1 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 7] -+ -+vinsgr2vr.h $vr0, $a0, 8 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 7] -+ -+vbitrevi.b $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 7] -+ -+vbitrevi.b $vr0, $vr1, 8 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 7] -+ -+vbitseti.b $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 7] -+ -+vbitseti.b $vr0, $vr1, 8 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 7] -+ -+vbitclri.b $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 7] -+ -+vbitclri.b $vr0, $vr1, 8 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 7] -+ -+vsrari.b $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 7] -+ -+vsrari.b $vr0, $vr1, 8 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 7] -+ -+vsrlri.b $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 7] -+ -+vsrlri.b $vr0, $vr1, 8 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 7] -+ -+vsllwil.hu.bu $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 7] -+ -+vsllwil.hu.bu $vr0, $vr1, 8 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 7] -+ -+vsllwil.h.b $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 7] -+ -+vsllwil.h.b $vr0, $vr1, 8 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 7] -+ -+vrotri.b $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 7] -+ -+vrotri.b $vr0, $vr1, 8 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 7] -+ -+vsrai.b $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 7] -+ -+vsrai.b $vr0, $vr1, 8 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 7] -+ -+vsrli.b $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 7] -+ -+vsrli.b $vr0, $vr1, 8 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 7] -+ -+vslli.b $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 7] -+ -+vslli.b $vr0, $vr1, 8 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 7] -+ -+vsat.b $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:20: error: immediate must be an integer in the range [0, 7] -+ -+vsat.b $vr0, $vr1, 8 -+# CHECK: :[[#@LINE-1]]:20: error: immediate must be an integer in the range [0, 7] -+ -+vsat.bu $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 7] -+ -+vsat.bu $vr0, $vr1, 8 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 7] -+ - ## uimm4 -+vstelm.b $vr0, $a0, 1, -1 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 15] -+ -+vstelm.b $vr0, $a0, 1, 16 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 15] -+ -+vreplvei.b $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 15] -+ -+vreplvei.b $vr0, $vr1, 16 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 15] -+ -+vpickve2gr.bu $a0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] -+ -+vpickve2gr.bu $a0, $vr1, 16 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] -+ -+vpickve2gr.b $a0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] -+ -+vpickve2gr.b $a0, $vr1, 16 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] -+ -+vinsgr2vr.b $vr0, $a0, -1 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 15] -+ -+vinsgr2vr.b $vr0, $a0, 16 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 15] -+ -+vbitrevi.h $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 15] -+ -+vbitrevi.h $vr0, $vr1, 16 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 15] -+ -+vbitseti.h $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 15] -+ -+vbitseti.h $vr0, $vr1, 16 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 15] -+ -+vbitclri.h $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 15] -+ -+vbitclri.h $vr0, $vr1, 16 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 15] -+ -+vssrarni.bu.h $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 15] -+ -+vssrarni.bu.h $vr0, $vr1, 16 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 15] -+ -+vssrlrni.bu.h $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 15] -+ -+vssrlrni.bu.h $vr0, $vr1, 16 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 15] -+ -+vssrarni.b.h $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] -+ -+vssrarni.b.h $vr0, $vr1, 16 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] -+ -+vssrlrni.b.h $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] -+ -+vssrlrni.b.h $vr0, $vr1, 16 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] -+ -+vssrani.bu.h $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] -+ -+vssrani.bu.h $vr0, $vr1, 16 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] -+ -+vssrlni.bu.h $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] -+ -+vssrlni.bu.h $vr0, $vr1, 16 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] -+ -+vssrani.b.h $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] -+ -+vssrani.b.h $vr0, $vr1, 16 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] -+ -+vssrlni.b.h $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] -+ -+vssrlni.b.h $vr0, $vr1, 16 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] -+ -+vsrarni.b.h $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] -+ -+vsrarni.b.h $vr0, $vr1, 16 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] -+ -+vsrlrni.b.h $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] -+ -+vsrlrni.b.h $vr0, $vr1, 16 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] -+ -+vsrani.b.h $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 15] -+ -+vsrani.b.h $vr0, $vr1, 16 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 15] -+ -+vsrlni.b.h $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 15] -+ -+vsrlni.b.h $vr0, $vr1, 16 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 15] -+ -+vsrari.h $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 15] -+ -+vsrari.h $vr0, $vr1, 16 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 15] -+ -+vsrlri.h $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 15] -+ -+vsrlri.h $vr0, $vr1, 16 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 15] -+ -+vsllwil.wu.hu $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 15] -+ -+vsllwil.wu.hu $vr0, $vr1, 16 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 15] -+ -+vsllwil.w.h $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] -+ -+vsllwil.w.h $vr0, $vr1, 16 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] -+ -+vrotri.h $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 15] -+ -+vrotri.h $vr0, $vr1, 16 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 15] -+ -+vsrai.h $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 15] -+ -+vsrai.h $vr0, $vr1, 16 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 15] -+ -+vsrli.h $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 15] -+ -+vsrli.h $vr0, $vr1, 16 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 15] -+ -+vslli.h $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 15] -+ -+vslli.h $vr0, $vr1, 16 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 15] -+ -+vsat.h $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:20: error: immediate must be an integer in the range [0, 15] -+ - vsat.h $vr0, $vr1, 16 - # CHECK: :[[#@LINE-1]]:20: error: immediate must be an integer in the range [0, 15] - -+vsat.hu $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 15] -+ -+vsat.hu $vr0, $vr1, 16 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 15] -+ -+## uimm5 -+vbsrl.v $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 31] -+ -+vbsrl.v $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 31] -+ -+vbsll.v $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 31] -+ -+vbsll.v $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 31] -+ -+vslti.du $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vslti.du $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vslti.wu $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vslti.wu $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vslti.hu $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vslti.hu $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vslti.bu $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vslti.bu $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vslei.du $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vslei.du $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vslei.wu $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vslei.wu $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vslei.hu $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vslei.hu $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vslei.bu $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vslei.bu $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vfrstpi.h $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+vfrstpi.h $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+vfrstpi.b $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+vfrstpi.b $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+vbitrevi.w $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 31] -+ -+vbitrevi.w $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 31] -+ -+vbitseti.w $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 31] -+ -+vbitseti.w $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 31] -+ -+vbitclri.w $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 31] -+ -+vbitclri.w $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 31] -+ -+vssrarni.hu.w $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 31] -+ -+vssrarni.hu.w $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 31] -+ -+vssrlrni.hu.w $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 31] -+ -+vssrlrni.hu.w $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 31] -+ -+vssrarni.h.w $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] -+ -+vssrarni.h.w $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] -+ -+vssrlrni.h.w $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] -+ -+vssrlrni.h.w $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] -+ -+vssrani.hu.w $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] -+ -+vssrani.hu.w $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] -+ -+vssrlni.hu.w $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] -+ -+vssrlni.hu.w $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] -+ -+vssrani.h.w $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] -+ -+vssrani.h.w $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] -+ -+vssrlni.h.w $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] -+ -+vssrlni.h.w $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] -+ -+vsrarni.h.w $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] -+ -+vsrarni.h.w $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] -+ -+vsrlrni.h.w $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] -+ -+vsrlrni.h.w $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] -+ -+vsrani.h.w $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 31] -+ -+vsrani.h.w $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 31] -+ -+vsrlni.h.w $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 31] -+ -+vsrlni.h.w $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 31] -+ -+vsrari.w $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vsrari.w $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vsrlri.w $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vsrlri.w $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vsllwil.du.wu $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 31] -+ -+vsllwil.du.wu $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 31] -+ -+vsllwil.d.w $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] -+ -+vsllwil.d.w $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] -+ -+vrotri.w $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vrotri.w $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vsrai.w $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 31] -+ -+vsrai.w $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 31] -+ -+vsrli.w $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 31] -+ -+vsrli.w $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 31] -+ -+vslli.w $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 31] -+ -+vslli.w $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 31] -+ -+vaddi.bu $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vaddi.bu $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vaddi.hu $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vaddi.hu $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vaddi.wu $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vaddi.wu $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vaddi.du $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vaddi.du $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vsubi.bu $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vsubi.bu $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vsubi.hu $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vsubi.hu $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vsubi.wu $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vsubi.wu $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vsubi.du $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vsubi.du $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vmaxi.bu $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vmaxi.bu $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vmaxi.hu $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vmaxi.hu $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vmaxi.wu $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vmaxi.wu $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vmaxi.du $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vmaxi.du $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vmini.bu $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vmini.bu $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vmini.hu $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vmini.hu $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vmini.wu $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vmini.wu $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vmini.du $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vmini.du $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+vsat.w $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:20: error: immediate must be an integer in the range [0, 31] -+ -+vsat.w $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:20: error: immediate must be an integer in the range [0, 31] -+ -+vsat.wu $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 31] -+ -+vsat.wu $vr0, $vr1, 32 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 31] -+ - ## simm5 -+vslti.d $vr0, $vr1, -17 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] -+ -+vslti.d $vr0, $vr1, 16 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] -+ -+vslti.w $vr0, $vr1, -17 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] -+ -+vslti.w $vr0, $vr1, 16 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] -+ -+vslti.h $vr0, $vr1, -17 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] -+ -+vslti.h $vr0, $vr1, 16 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] -+ -+vslti.b $vr0, $vr1, -17 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] -+ -+vslti.b $vr0, $vr1, 16 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] -+ -+vslei.d $vr0, $vr1, -17 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] -+ -+vslei.d $vr0, $vr1, 16 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] -+ -+vslei.w $vr0, $vr1, -17 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] -+ -+vslei.w $vr0, $vr1, 16 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] -+ -+vslei.h $vr0, $vr1, -17 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] -+ -+vslei.h $vr0, $vr1, 16 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] -+ -+vslei.b $vr0, $vr1, -17 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] -+ -+vslei.b $vr0, $vr1, 16 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] -+ -+vseqi.d $vr0, $vr1, -17 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] -+ -+vseqi.d $vr0, $vr1, 16 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] -+ -+vseqi.w $vr0, $vr1, -17 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] -+ -+vseqi.w $vr0, $vr1, 16 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] -+ -+vseqi.h $vr0, $vr1, -17 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] -+ -+vseqi.h $vr0, $vr1, 16 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] -+ -+vseqi.b $vr0, $vr1, -17 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] -+ - vseqi.b $vr0, $vr1, 16 - # CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] - -+vmaxi.b $vr0, $vr1, -17 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] -+ -+vmaxi.b $vr0, $vr1, 16 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] -+ -+vmaxi.h $vr0, $vr1, -17 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] -+ -+vmaxi.h $vr0, $vr1, 16 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] -+ -+vmaxi.w $vr0, $vr1, -17 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] -+ -+vmaxi.w $vr0, $vr1, 16 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] -+ -+vmaxi.d $vr0, $vr1, -17 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] -+ -+vmaxi.d $vr0, $vr1, 16 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] -+ -+vmini.b $vr0, $vr1, -17 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] -+ -+vmini.b $vr0, $vr1, 16 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] -+ -+vmini.h $vr0, $vr1, -17 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] -+ -+vmini.h $vr0, $vr1, 16 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] -+ -+vmini.w $vr0, $vr1, -17 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] -+ -+vmini.w $vr0, $vr1, 16 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] -+ -+vmini.d $vr0, $vr1, -17 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] -+ -+vmini.d $vr0, $vr1, 16 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] -+ -+## uimm6 -+vbitrevi.d $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 63] -+ -+vbitrevi.d $vr0, $vr1, 64 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 63] -+ -+vbitseti.d $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 63] -+ -+vbitseti.d $vr0, $vr1, 64 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 63] -+ -+vbitclri.d $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 63] -+ -+vbitclri.d $vr0, $vr1, 64 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 63] -+ -+vssrarni.wu.d $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 63] -+ -+vssrarni.wu.d $vr0, $vr1, 64 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 63] -+ -+vssrlrni.wu.d $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 63] -+ -+vssrlrni.wu.d $vr0, $vr1, 64 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 63] -+ -+vssrarni.w.d $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 63] -+ -+vssrarni.w.d $vr0, $vr1, 64 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 63] -+ -+vssrlrni.w.d $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 63] -+ -+vssrlrni.w.d $vr0, $vr1, 64 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 63] -+ -+vssrani.wu.d $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 63] -+ -+vssrani.wu.d $vr0, $vr1, 64 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 63] -+ -+vssrlni.wu.d $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 63] -+ -+vssrlni.wu.d $vr0, $vr1, 64 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 63] -+ -+vssrani.w.d $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] -+ -+vssrani.w.d $vr0, $vr1, 64 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] -+ -+vssrlni.w.d $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] -+ -+vssrlni.w.d $vr0, $vr1, 64 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] -+ -+vsrarni.w.d $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] -+ -+vsrarni.w.d $vr0, $vr1, 64 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] -+ -+vsrlrni.w.d $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] -+ -+vsrlrni.w.d $vr0, $vr1, 64 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] -+ -+vsrani.w.d $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 63] -+ -+vsrani.w.d $vr0, $vr1, 64 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 63] -+ -+vsrlni.w.d $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 63] -+ -+vsrlni.w.d $vr0, $vr1, 64 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 63] -+ -+vsrari.d $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 63] -+ -+vsrari.d $vr0, $vr1, 64 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 63] -+ -+vsrlri.d $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 63] -+ -+vsrlri.d $vr0, $vr1, 64 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 63] -+ -+vrotri.d $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 63] -+ -+vrotri.d $vr0, $vr1, 64 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 63] -+ -+vsrai.d $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 63] -+ -+vsrai.d $vr0, $vr1, 64 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 63] -+ -+vsrli.d $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 63] -+ -+vsrli.d $vr0, $vr1, 64 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 63] -+ -+vslli.d $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 63] -+ -+vslli.d $vr0, $vr1, 64 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 63] -+ -+vsat.d $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:20: error: immediate must be an integer in the range [0, 63] -+ -+vsat.d $vr0, $vr1, 64 -+# CHECK: :[[#@LINE-1]]:20: error: immediate must be an integer in the range [0, 63] -+ -+vsat.du $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 63] -+ -+vsat.du $vr0, $vr1, 64 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 63] -+ - ## uimm7 -+vssrarni.du.q $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 127] -+ -+vssrarni.du.q $vr0, $vr1, 128 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 127] -+ -+vssrlrni.du.q $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 127] -+ -+vssrlrni.du.q $vr0, $vr1, 128 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 127] -+ -+vssrarni.d.q $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 127] -+ -+vssrarni.d.q $vr0, $vr1, 128 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 127] -+ -+vssrlrni.d.q $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 127] -+ -+vssrlrni.d.q $vr0, $vr1, 128 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 127] -+ -+vssrani.du.q $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 127] -+ -+vssrani.du.q $vr0, $vr1, 128 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 127] -+ -+vssrlni.du.q $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 127] -+ -+vssrlni.du.q $vr0, $vr1, 128 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 127] -+ -+vssrani.d.q $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 127] -+ -+vssrani.d.q $vr0, $vr1, 128 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 127] -+ -+vssrlni.d.q $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 127] -+ -+vssrlni.d.q $vr0, $vr1, 128 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 127] -+ -+vsrarni.d.q $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 127] -+ -+vsrarni.d.q $vr0, $vr1, 128 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 127] -+ -+vsrlrni.d.q $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 127] -+ -+vsrlrni.d.q $vr0, $vr1, 128 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 127] -+ -+vsrani.d.q $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 127] -+ -+vsrani.d.q $vr0, $vr1, 128 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 127] -+ -+vsrlni.d.q $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 127] -+ - vsrlni.d.q $vr0, $vr1, 128 - # CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 127] - --## simm8 -+## uimm8 -+vextrins.d $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] -+ -+vextrins.d $vr0, $vr1, 256 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] -+ -+vextrins.w $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] -+ -+vextrins.w $vr0, $vr1, 256 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] -+ -+vextrins.h $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] -+ -+vextrins.h $vr0, $vr1, 256 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] -+ -+vextrins.b $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] -+ -+vextrins.b $vr0, $vr1, 256 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] -+ -+vpermi.w $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 255] -+ - vpermi.w $vr0, $vr1, 256 - # CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 255] - -+vshuf4i.d $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 255] -+ -+vshuf4i.d $vr0, $vr1, 256 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 255] -+ -+vshuf4i.w $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 255] -+ -+vshuf4i.w $vr0, $vr1, 256 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 255] -+ -+vshuf4i.h $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 255] -+ -+vshuf4i.h $vr0, $vr1, 256 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 255] -+ -+vshuf4i.b $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 255] -+ -+vshuf4i.b $vr0, $vr1, 256 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 255] -+ -+vbitseli.b $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] -+ -+vbitseli.b $vr0, $vr1, 256 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] -+ -+vandi.b $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 255] -+ -+vandi.b $vr0, $vr1, 256 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 255] -+ -+vori.b $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:20: error: immediate must be an integer in the range [0, 255] -+ -+vori.b $vr0, $vr1, 256 -+# CHECK: :[[#@LINE-1]]:20: error: immediate must be an integer in the range [0, 255] -+ -+vxori.b $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 255] -+ -+vxori.b $vr0, $vr1, 256 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 255] -+ -+vnori.b $vr0, $vr1, -1 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 255] -+ -+vnori.b $vr0, $vr1, 256 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 255] -+ -+## simm8 -+vstelm.b $vr0, $a0, -129, 1 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-128, 127] -+ -+vstelm.b $vr0, $a0, 128, 1 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-128, 127] -+ - ## simm8_lsl1 --vstelm.h $vr0, $a0, 255, 1 -+vstelm.h $vr0, $a0, -258, 1 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be a multiple of 2 in the range [-256, 254] -+ -+vstelm.h $vr0, $a0, 256, 1 - # CHECK: :[[#@LINE-1]]:21: error: immediate must be a multiple of 2 in the range [-256, 254] - - ## simm8_lsl2 --vstelm.w $vr0, $a0, 512, 1 -+vstelm.w $vr0, $a0, -516, 1 - # CHECK: :[[#@LINE-1]]:21: error: immediate must be a multiple of 4 in the range [-512, 508] - --## simm10 --vrepli.b $vr0, 512 --# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-512, 511] -+vstelm.w $vr0, $a0, 512, 1 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be a multiple of 4 in the range [-512, 508] - - ## simm8_lsl3 -+vstelm.d $vr0, $a0, -1032, 1 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be a multiple of 8 in the range [-1024, 1016] -+ - vstelm.d $vr0, $a0, 1024, 1 - # CHECK: :[[#@LINE-1]]:21: error: immediate must be a multiple of 8 in the range [-1024, 1016] - - ## simm9_lsl3 -+vldrepl.d $vr0, $a0, -2056 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be a multiple of 8 in the range [-2048, 2040] -+ - vldrepl.d $vr0, $a0, 2048 - # CHECK: :[[#@LINE-1]]:22: error: immediate must be a multiple of 8 in the range [-2048, 2040] - - ## simm10_lsl2 -+vldrepl.w $vr0, $a0, -2052 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be a multiple of 4 in the range [-2048, 2044] -+ - vldrepl.w $vr0, $a0, 2048 - # CHECK: :[[#@LINE-1]]:22: error: immediate must be a multiple of 4 in the range [-2048, 2044] - -+## simm10 -+vrepli.b $vr0, -513 -+# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-512, 511] -+ -+vrepli.b $vr0, 512 -+# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-512, 511] -+ -+vrepli.h $vr0, -513 -+# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-512, 511] -+ -+vrepli.h $vr0, 512 -+# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-512, 511] -+ -+vrepli.w $vr0, -513 -+# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-512, 511] -+ -+vrepli.w $vr0, 512 -+# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-512, 511] -+ -+vrepli.d $vr0, -513 -+# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-512, 511] -+ -+vrepli.d $vr0, 512 -+# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-512, 511] -+ - ## simm11_lsl1 -+vldrepl.h $vr0, $a0, -2050 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be a multiple of 2 in the range [-2048, 2046] -+ - vldrepl.h $vr0, $a0, 2048 - # CHECK: :[[#@LINE-1]]:22: error: immediate must be a multiple of 2 in the range [-2048, 2046] - -+## simm12 -+vldrepl.b $vr0, $a0, -2049 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-2048, 2047] -+ -+vldrepl.b $vr0, $a0, 2048 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-2048, 2047] -+ -+vst $vr0, $a0, -2049 -+# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-2048, 2047] -+ -+vst $vr0, $a0, 2048 -+# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-2048, 2047] -+ -+vld $vr0, $a0, -2049 -+# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-2048, 2047] -+ -+vld $vr0, $a0, 2048 -+# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-2048, 2047] -+ - ## simm13 -+vldi $vr0, -4097 -+# CHECK: :[[#@LINE-1]]:12: error: immediate must be an integer in the range [-4096, 4095] -+ - vldi $vr0, 4096 - # CHECK: :[[#@LINE-1]]:12: error: immediate must be an integer in the range [-4096, 4095] --- -2.20.1 - - -From aca10c260dfde07f2248a70e3d37770ee75e8e7a Mon Sep 17 00:00:00 2001 -From: chenli -Date: Sat, 19 Aug 2023 17:16:09 +0800 -Subject: [PATCH 08/35] [LoongArch][MC] Add invalid immediate testcases for - LASX instructions - -Reviewed By: SixWeining - -Differential Revision: https://reviews.llvm.org/D157574 - -(cherry picked from commit d163ae8c255f663707d4b0d5de03fcb18274b3eb) ---- - llvm/test/MC/LoongArch/lasx/invalid-imm.s | 1149 ++++++++++++++++++++- - 1 file changed, 1143 insertions(+), 6 deletions(-) - -diff --git a/llvm/test/MC/LoongArch/lasx/invalid-imm.s b/llvm/test/MC/LoongArch/lasx/invalid-imm.s -index 5c61a7a42009..6f64a6f87802 100644 ---- a/llvm/test/MC/LoongArch/lasx/invalid-imm.s -+++ b/llvm/test/MC/LoongArch/lasx/invalid-imm.s -@@ -3,53 +3,1190 @@ - # RUN: not llvm-mc --triple=loongarch64 %s 2>&1 | FileCheck %s - - ## uimm1 -+xvrepl128vei.d $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 1] -+ - xvrepl128vei.d $xr0, $xr1, 2 - # CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 1] - -+## uimm2 -+xvpickve.d $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 3] -+ -+xvpickve.d $xr0, $xr1, 4 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 3] -+ -+xvinsve0.d $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 3] -+ -+xvinsve0.d $xr0, $xr1, 4 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 3] -+ -+xvinsgr2vr.d $xr0, $a0, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 3] -+ -+xvinsgr2vr.d $xr0, $a0, 4 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 3] -+ -+xvpickve2gr.d $a0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 3] -+ -+xvpickve2gr.d $a0, $xr1, 4 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 3] -+ -+xvpickve2gr.du $a0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 3] -+ -+xvpickve2gr.du $a0, $xr1, 4 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 3] -+ -+xvstelm.d $xr0, $a0, 8, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 3] -+ -+xvstelm.d $xr0, $a0, 8, 4 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 3] -+ -+xvrepl128vei.w $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 3] -+ -+xvrepl128vei.w $xr0, $xr1, 4 -+# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 3] -+ -+## uimm3 -+xvpickve.w $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 7] -+ -+xvpickve.w $xr0, $xr1, 8 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 7] -+ -+xvinsve0.w $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 7] -+ -+xvinsve0.w $xr0, $xr1, 8 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 7] -+ -+xvinsgr2vr.w $xr0, $a0, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 7] -+ -+xvinsgr2vr.w $xr0, $a0, 8 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 7] -+ -+xvpickve2gr.wu $a0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 7] -+ -+xvpickve2gr.wu $a0, $xr1, 8 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 7] -+ -+xvpickve2gr.w $a0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 7] -+ -+xvpickve2gr.w $a0, $xr1, 8 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 7] -+ -+xvstelm.w $xr0, $a0, 4, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 7] -+ -+xvstelm.w $xr0, $a0, 4, 8 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 7] -+ -+xvrepl128vei.h $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 7] -+ -+xvrepl128vei.h $xr0, $xr1, 8 -+# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 7] -+ -+xvbitrevi.b $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 7] -+ -+xvbitrevi.b $xr0, $xr1, 8 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 7] -+ -+xvbitseti.b $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 7] -+ -+xvbitseti.b $xr0, $xr1, 8 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 7] -+ -+xvbitclri.b $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 7] -+ -+xvbitclri.b $xr0, $xr1, 8 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 7] -+ -+xvsrari.b $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 7] -+ -+xvsrari.b $xr0, $xr1, 8 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 7] -+ -+xvsrlri.b $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 7] -+ -+xvsrlri.b $xr0, $xr1, 8 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 7] -+ -+xvsllwil.hu.bu $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 7] -+ -+xvsllwil.hu.bu $xr0, $xr1, 8 -+# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 7] -+ -+xvsllwil.h.b $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 7] -+ -+xvsllwil.h.b $xr0, $xr1, 8 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 7] -+ -+xvrotri.b $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 7] -+ -+xvrotri.b $xr0, $xr1, 8 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 7] -+ -+xvsrai.b $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 7] -+ -+xvsrai.b $xr0, $xr1, 8 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 7] -+ -+xvsrli.b $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 7] -+ -+xvsrli.b $xr0, $xr1, 8 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 7] -+ -+xvslli.b $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 7] -+ -+xvslli.b $xr0, $xr1, 8 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 7] -+ -+xvsat.b $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 7] -+ -+xvsat.b $xr0, $xr1, 8 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 7] -+ -+xvsat.bu $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 7] -+ -+xvsat.bu $xr0, $xr1, 8 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 7] -+ - ## uimm4 -+xvstelm.h $xr0, $a0, 2, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] -+ -+xvstelm.h $xr0, $a0, 2, 16 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] -+ -+xvrepl128vei.b $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 15] -+ -+xvrepl128vei.b $xr0, $xr1, 16 -+# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 15] -+ -+xvbitrevi.h $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] -+ -+xvbitrevi.h $xr0, $xr1, 16 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] -+ -+xvbitseti.h $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] -+ -+xvbitseti.h $xr0, $xr1, 16 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] -+ -+xvbitclri.h $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] -+ -+xvbitclri.h $xr0, $xr1, 16 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] -+ -+xvssrarni.bu.h $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 15] -+ -+xvssrarni.bu.h $xr0, $xr1, 16 -+# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 15] -+ -+xvssrlrni.bu.h $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 15] -+ -+xvssrlrni.bu.h $xr0, $xr1, 16 -+# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 15] -+ -+xvssrarni.b.h $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 15] -+ -+xvssrarni.b.h $xr0, $xr1, 16 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 15] -+ -+xvssrlrni.b.h $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 15] -+ -+xvssrlrni.b.h $xr0, $xr1, 16 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 15] -+ -+xvssrani.bu.h $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 15] -+ -+xvssrani.bu.h $xr0, $xr1, 16 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 15] -+ -+xvssrlni.bu.h $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 15] -+ -+xvssrlni.bu.h $xr0, $xr1, 16 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 15] -+ -+xvssrani.b.h $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] -+ -+xvssrani.b.h $xr0, $xr1, 16 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] -+ -+xvssrlni.b.h $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] -+ -+xvssrlni.b.h $xr0, $xr1, 16 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] -+ -+xvsrarni.b.h $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] -+ -+xvsrarni.b.h $xr0, $xr1, 16 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] -+ -+xvsrlrni.b.h $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] -+ -+xvsrlrni.b.h $xr0, $xr1, 16 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] -+ -+xvsrani.b.h $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] -+ -+xvsrani.b.h $xr0, $xr1, 16 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] -+ -+xvsrlni.b.h $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] -+ -+xvsrlni.b.h $xr0, $xr1, 16 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] -+ -+xvsrari.h $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 15] -+ -+xvsrari.h $xr0, $xr1, 16 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 15] -+ -+xvsrlri.h $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 15] -+ -+xvsrlri.h $xr0, $xr1, 16 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 15] -+ -+xvsllwil.wu.hu $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 15] -+ -+xvsllwil.wu.hu $xr0, $xr1, 16 -+# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 15] -+ -+xvsllwil.w.h $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] -+ -+xvsllwil.w.h $xr0, $xr1, 16 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] -+ -+xvrotri.h $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 15] -+ -+xvrotri.h $xr0, $xr1, 16 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 15] -+ -+xvsrai.h $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 15] -+ -+xvsrai.h $xr0, $xr1, 16 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 15] -+ -+xvsrli.h $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 15] -+ -+xvsrli.h $xr0, $xr1, 16 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 15] -+ -+xvslli.h $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 15] -+ -+xvslli.h $xr0, $xr1, 16 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 15] -+ -+xvsat.h $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 15] -+ - xvsat.h $xr0, $xr1, 16 - # CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 15] - -+xvsat.hu $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 15] -+ -+xvsat.hu $xr0, $xr1, 16 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 15] -+ -+## uimm5 -+xvstelm.b $xr0, $a0, 1, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] -+ -+xvstelm.b $xr0, $a0, 1, 32 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] -+ -+xvbsrl.v $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+xvbsrl.v $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+xvbsll.v $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+xvbsll.v $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+xvslti.du $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvslti.du $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvslti.wu $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvslti.wu $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvslti.hu $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvslti.hu $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvslti.bu $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvslti.bu $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvslei.du $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvslei.du $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvslei.wu $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvslei.wu $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvslei.hu $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvslei.hu $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvslei.bu $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvslei.bu $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvfrstpi.h $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 31] -+ -+xvfrstpi.h $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 31] -+ -+xvfrstpi.b $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 31] -+ -+xvfrstpi.b $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 31] -+ -+xvbitrevi.w $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] -+ -+xvbitrevi.w $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] -+ -+xvbitseti.w $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] -+ -+xvbitseti.w $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] -+ -+xvbitclri.w $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] -+ -+xvbitclri.w $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] -+ -+xvssrarni.hu.w $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 31] -+ -+xvssrarni.hu.w $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 31] -+ -+xvssrlrni.hu.w $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 31] -+ -+xvssrlrni.hu.w $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 31] -+ -+xvssrarni.h.w $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 31] -+ -+xvssrarni.h.w $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 31] -+ -+xvssrlrni.h.w $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 31] -+ -+xvssrlrni.h.w $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 31] -+ -+xvssrani.hu.w $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 31] -+ -+xvssrani.hu.w $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 31] -+ -+xvssrlni.hu.w $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 31] -+ -+xvssrlni.hu.w $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 31] -+ -+xvssrani.h.w $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] -+ -+xvssrani.h.w $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] -+ -+xvssrlni.h.w $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] -+ -+xvssrlni.h.w $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] -+ -+xvsrarni.h.w $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] -+ -+xvsrarni.h.w $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] -+ -+xvsrlrni.h.w $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] -+ -+xvsrlrni.h.w $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] -+ -+xvsrani.h.w $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] -+ -+xvsrani.h.w $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] -+ -+xvsrlni.h.w $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] -+ -+xvsrlni.h.w $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] -+ -+xvsrari.w $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvsrari.w $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvsrlri.w $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvsrlri.w $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvsllwil.du.wu $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 31] -+ -+xvsllwil.du.wu $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 31] -+ -+xvsllwil.d.w $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] -+ -+xvsllwil.d.w $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] -+ -+xvrotri.w $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvrotri.w $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvsrai.w $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+xvsrai.w $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+xvsrli.w $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+xvsrli.w $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+xvslli.w $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+xvslli.w $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+xvaddi.bu $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvaddi.bu $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvaddi.hu $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvaddi.hu $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvaddi.wu $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvaddi.wu $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvaddi.du $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvaddi.du $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvsubi.bu $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvsubi.bu $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvsubi.hu $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvsubi.hu $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvsubi.wu $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvsubi.wu $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvsubi.du $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvsubi.du $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvmaxi.bu $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvmaxi.bu $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvmaxi.hu $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvmaxi.hu $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvmaxi.wu $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvmaxi.wu $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvmaxi.du $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvmaxi.du $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvmini.bu $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvmini.bu $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvmini.hu $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvmini.hu $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvmini.wu $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvmini.wu $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvmini.du $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvmini.du $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] -+ -+xvsat.w $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 31] -+ -+xvsat.w $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 31] -+ -+xvsat.wu $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ -+xvsat.wu $xr0, $xr1, 32 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -+ - ## simm5 -+xvslti.d $xr0, $xr1, -17 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] -+ -+xvslti.d $xr0, $xr1, 16 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] -+ -+xvslti.w $xr0, $xr1, -17 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] -+ -+xvslti.w $xr0, $xr1, 16 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] -+ -+xvslti.h $xr0, $xr1, -17 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] -+ -+xvslti.h $xr0, $xr1, 16 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] -+ -+xvslti.b $xr0, $xr1, -17 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] -+ -+xvslti.b $xr0, $xr1, 16 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] -+ -+xvslei.d $xr0, $xr1, -17 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] -+ -+xvslei.d $xr0, $xr1, 16 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] -+ -+xvslei.w $xr0, $xr1, -17 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] -+ -+xvslei.w $xr0, $xr1, 16 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] -+ -+xvslei.h $xr0, $xr1, -17 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] -+ -+xvslei.h $xr0, $xr1, 16 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] -+ -+xvslei.b $xr0, $xr1, -17 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] -+ -+xvslei.b $xr0, $xr1, 16 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] -+ -+xvseqi.d $xr0, $xr1, -17 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] -+ -+xvseqi.d $xr0, $xr1, 16 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] -+ -+xvseqi.w $xr0, $xr1, -17 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] -+ -+xvseqi.w $xr0, $xr1, 16 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] -+ -+xvseqi.h $xr0, $xr1, -17 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] -+ -+xvseqi.h $xr0, $xr1, 16 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] -+ -+xvseqi.b $xr0, $xr1, -17 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] -+ - xvseqi.b $xr0, $xr1, 16 - # CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] - -+xvmaxi.b $xr0, $xr1, -17 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] -+ -+xvmaxi.b $xr0, $xr1, 16 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] -+ -+xvmaxi.h $xr0, $xr1, -17 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] -+ -+xvmaxi.h $xr0, $xr1, 16 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] -+ -+xvmaxi.w $xr0, $xr1, -17 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] -+ -+xvmaxi.w $xr0, $xr1, 16 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] -+ -+xvmaxi.d $xr0, $xr1, -17 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] -+ -+xvmaxi.d $xr0, $xr1, 16 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] -+ -+xvmini.b $xr0, $xr1, -17 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] -+ -+xvmini.b $xr0, $xr1, 16 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] -+ -+xvmini.h $xr0, $xr1, -17 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] -+ -+xvmini.h $xr0, $xr1, 16 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] -+ -+xvmini.w $xr0, $xr1, -17 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] -+ -+xvmini.w $xr0, $xr1, 16 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] -+ -+xvmini.d $xr0, $xr1, -17 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] -+ -+xvmini.d $xr0, $xr1, 16 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] -+ -+## uimm6 -+xvbitrevi.d $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] -+ -+xvbitrevi.d $xr0, $xr1, 64 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] -+ -+xvbitseti.d $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] -+ -+xvbitseti.d $xr0, $xr1, 64 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] -+ -+xvbitclri.d $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] -+ -+xvbitclri.d $xr0, $xr1, 64 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] -+ -+xvssrarni.wu.d $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 63] -+ -+xvssrarni.wu.d $xr0, $xr1, 64 -+# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 63] -+ -+xvssrlrni.wu.d $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 63] -+ -+xvssrlrni.wu.d $xr0, $xr1, 64 -+# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 63] -+ -+xvssrarni.w.d $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 63] -+ -+xvssrarni.w.d $xr0, $xr1, 64 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 63] -+ -+xvssrlrni.w.d $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 63] -+ -+xvssrlrni.w.d $xr0, $xr1, 64 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 63] -+ -+xvssrani.wu.d $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 63] -+ -+xvssrani.wu.d $xr0, $xr1, 64 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 63] -+ -+xvssrlni.wu.d $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 63] -+ -+xvssrlni.wu.d $xr0, $xr1, 64 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 63] -+ -+xvssrani.w.d $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 63] -+ -+xvssrani.w.d $xr0, $xr1, 64 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 63] -+ -+xvssrlni.w.d $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 63] -+ -+xvssrlni.w.d $xr0, $xr1, 64 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 63] -+ -+xvsrarni.w.d $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 63] -+ -+xvsrarni.w.d $xr0, $xr1, 64 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 63] -+ -+xvsrlrni.w.d $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 63] -+ -+xvsrlrni.w.d $xr0, $xr1, 64 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 63] -+ -+xvsrani.w.d $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] -+ -+xvsrani.w.d $xr0, $xr1, 64 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] -+ -+xvsrlni.w.d $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] -+ -+xvsrlni.w.d $xr0, $xr1, 64 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] -+ -+xvsrari.d $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 63] -+ -+xvsrari.d $xr0, $xr1, 64 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 63] -+ -+xvsrlri.d $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 63] -+ -+xvsrlri.d $xr0, $xr1, 64 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 63] -+ -+xvrotri.d $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 63] -+ -+xvrotri.d $xr0, $xr1, 64 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 63] -+ -+xvsrai.d $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 63] -+ -+xvsrai.d $xr0, $xr1, 64 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 63] -+ -+xvsrli.d $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 63] -+ -+xvsrli.d $xr0, $xr1, 64 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 63] -+ -+xvslli.d $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 63] -+ -+xvslli.d $xr0, $xr1, 64 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 63] -+ -+xvsat.d $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 63] -+ -+xvsat.d $xr0, $xr1, 64 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 63] -+ -+xvsat.du $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 63] -+ -+xvsat.du $xr0, $xr1, 64 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 63] -+ - ## uimm7 -+xvssrarni.du.q $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 127] -+ -+xvssrarni.du.q $xr0, $xr1, 128 -+# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 127] -+ -+xvssrlrni.du.q $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 127] -+ -+xvssrlrni.du.q $xr0, $xr1, 128 -+# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 127] -+ -+xvssrarni.d.q $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 127] -+ -+xvssrarni.d.q $xr0, $xr1, 128 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 127] -+ -+xvssrlrni.d.q $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 127] -+ -+xvssrlrni.d.q $xr0, $xr1, 128 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 127] -+ -+xvssrani.du.q $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 127] -+ -+xvssrani.du.q $xr0, $xr1, 128 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 127] -+ -+xvssrlni.du.q $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 127] -+ -+xvssrlni.du.q $xr0, $xr1, 128 -+# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 127] -+ -+xvssrani.d.q $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 127] -+ -+xvssrani.d.q $xr0, $xr1, 128 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 127] -+ -+xvssrlni.d.q $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 127] -+ -+xvssrlni.d.q $xr0, $xr1, 128 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 127] -+ -+xvsrarni.d.q $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 127] -+ -+xvsrarni.d.q $xr0, $xr1, 128 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 127] -+ -+xvsrlrni.d.q $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 127] -+ -+xvsrlrni.d.q $xr0, $xr1, 128 -+# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 127] -+ -+xvsrani.d.q $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 127] -+ -+xvsrani.d.q $xr0, $xr1, 128 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 127] -+ -+xvsrlni.d.q $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 127] -+ - xvsrlni.d.q $xr0, $xr1, 128 - # CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 127] - --## simm8 -+## uimm8 -+xvextrins.d $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 255] -+ -+xvextrins.d $xr0, $xr1, 256 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 255] -+ -+xvextrins.w $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 255] -+ -+xvextrins.w $xr0, $xr1, 256 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 255] -+ -+xvextrins.h $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 255] -+ -+xvextrins.h $xr0, $xr1, 256 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 255] -+ -+xvextrins.b $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 255] -+ -+xvextrins.b $xr0, $xr1, 256 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 255] -+ -+xvpermi.q $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 255] -+ -+xvpermi.q $xr0, $xr1, 256 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 255] -+ -+xvpermi.d $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 255] -+ -+xvpermi.d $xr0, $xr1, 256 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 255] -+ -+xvpermi.w $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 255] -+ - xvpermi.w $xr0, $xr1, 256 - # CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 255] - -+xvshuf4i.d $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] -+ -+xvshuf4i.d $xr0, $xr1, 256 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] -+ -+xvshuf4i.w $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] -+ -+xvshuf4i.w $xr0, $xr1, 256 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] -+ -+xvshuf4i.h $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] -+ -+xvshuf4i.h $xr0, $xr1, 256 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] -+ -+xvshuf4i.b $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] -+ -+xvshuf4i.b $xr0, $xr1, 256 -+# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] -+ -+xvbitseli.b $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 255] -+ -+xvbitseli.b $xr0, $xr1, 256 -+# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 255] -+ -+xvandi.b $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 255] -+ -+xvandi.b $xr0, $xr1, 256 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 255] -+ -+xvori.b $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 255] -+ -+xvori.b $xr0, $xr1, 256 -+# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 255] -+ -+xvxori.b $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 255] -+ -+xvxori.b $xr0, $xr1, 256 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 255] -+ -+xvnori.b $xr0, $xr1, -1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 255] -+ -+xvnori.b $xr0, $xr1, 256 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 255] -+ -+## simm8 -+xvstelm.b $xr0, $a0, -129, 1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-128, 127] -+ -+xvstelm.b $xr0, $a0, 128, 1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-128, 127] -+ - ## simm8_lsl1 --xvstelm.h $xr0, $a0, 255, 1 -+xvstelm.h $xr0, $a0, -258, 1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be a multiple of 2 in the range [-256, 254] -+ -+xvstelm.h $xr0, $a0, 256, 1 - # CHECK: :[[#@LINE-1]]:22: error: immediate must be a multiple of 2 in the range [-256, 254] - - ## simm8_lsl2 --xvstelm.w $xr0, $a0, 512, 1 -+xvstelm.w $xr0, $a0, -516, 1 - # CHECK: :[[#@LINE-1]]:22: error: immediate must be a multiple of 4 in the range [-512, 508] - --## simm10 --xvrepli.b $xr0, 512 --# CHECK: :[[#@LINE-1]]:17: error: immediate must be an integer in the range [-512, 511] -+xvstelm.w $xr0, $a0, 512, 1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be a multiple of 4 in the range [-512, 508] - - ## simm8_lsl3 -+xvstelm.d $xr0, $a0, -1032, 1 -+# CHECK: :[[#@LINE-1]]:22: error: immediate must be a multiple of 8 in the range [-1024, 1016] -+ - xvstelm.d $xr0, $a0, 1024, 1 - # CHECK: :[[#@LINE-1]]:22: error: immediate must be a multiple of 8 in the range [-1024, 1016] - - ## simm9_lsl3 -+xvldrepl.d $xr0, $a0, -2056 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be a multiple of 8 in the range [-2048, 2040] -+ - xvldrepl.d $xr0, $a0, 2048 - # CHECK: :[[#@LINE-1]]:23: error: immediate must be a multiple of 8 in the range [-2048, 2040] - - ## simm10_lsl2 -+xvldrepl.w $xr0, $a0, -2052 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be a multiple of 4 in the range [-2048, 2044] -+ - xvldrepl.w $xr0, $a0, 2048 - # CHECK: :[[#@LINE-1]]:23: error: immediate must be a multiple of 4 in the range [-2048, 2044] - -+## simm10 -+xvrepli.b $xr0, -513 -+# CHECK: :[[#@LINE-1]]:17: error: immediate must be an integer in the range [-512, 511] -+ -+xvrepli.b $xr0, 512 -+# CHECK: :[[#@LINE-1]]:17: error: immediate must be an integer in the range [-512, 511] -+ -+xvrepli.h $xr0, -513 -+# CHECK: :[[#@LINE-1]]:17: error: immediate must be an integer in the range [-512, 511] -+ -+xvrepli.h $xr0, 512 -+# CHECK: :[[#@LINE-1]]:17: error: immediate must be an integer in the range [-512, 511] -+ -+xvrepli.w $xr0, -513 -+# CHECK: :[[#@LINE-1]]:17: error: immediate must be an integer in the range [-512, 511] -+ -+xvrepli.w $xr0, 512 -+# CHECK: :[[#@LINE-1]]:17: error: immediate must be an integer in the range [-512, 511] -+ -+xvrepli.d $xr0, -513 -+# CHECK: :[[#@LINE-1]]:17: error: immediate must be an integer in the range [-512, 511] -+ -+xvrepli.d $xr0, 512 -+# CHECK: :[[#@LINE-1]]:17: error: immediate must be an integer in the range [-512, 511] -+ - ## simm11_lsl1 -+xvldrepl.h $xr0, $a0, -2050 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be a multiple of 2 in the range [-2048, 2046] -+ - xvldrepl.h $xr0, $a0, 2048 - # CHECK: :[[#@LINE-1]]:23: error: immediate must be a multiple of 2 in the range [-2048, 2046] - -+## simm12 -+xvldrepl.b $xr0, $a0, -2049 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [-2048, 2047] -+ -+xvldrepl.b $xr0, $a0, 2048 -+# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [-2048, 2047] -+ -+xvst $xr0, $a0, -2049 -+# CHECK: :[[#@LINE-1]]:17: error: immediate must be an integer in the range [-2048, 2047] -+ -+xvst $xr0, $a0, 2048 -+# CHECK: :[[#@LINE-1]]:17: error: immediate must be an integer in the range [-2048, 2047] -+ -+xvld $xr0, $a0, -2049 -+# CHECK: :[[#@LINE-1]]:17: error: immediate must be an integer in the range [-2048, 2047] -+ -+xvld $xr0, $a0, 2048 -+# CHECK: :[[#@LINE-1]]:17: error: immediate must be an integer in the range [-2048, 2047] -+ - ## simm13 -+xvldi $xr0, -4097 -+# CHECK: :[[#@LINE-1]]:13: error: immediate must be an integer in the range [-4096, 4095] -+ - xvldi $xr0, 4096 - # CHECK: :[[#@LINE-1]]:13: error: immediate must be an integer in the range [-4096, 4095] --- -2.20.1 - - -From 73373a6158629eb02ed9fe0e540c21ffb84a549f Mon Sep 17 00:00:00 2001 -From: chenli -Date: Mon, 21 Aug 2023 11:03:49 +0800 -Subject: [PATCH 09/35] [LoongArch] Add testcases of LSX intrinsics with - immediates - -The testcases mainly cover three situations: -- the arguments which should be immediates are non immediates. -- the immediate is out of upper limit of the argument type. -- the immediate is out of lower limit of the argument type. - -Depends on D155829 - -Reviewed By: SixWeining - -Differential Revision: https://reviews.llvm.org/D157570 - -(cherry picked from commit 0c76f46ca676ebecbdf2c9f7e8b05421a234bbed) ---- - .../lsx/intrinsic-addi-invalid-imm.ll | 65 +++++++++ - .../LoongArch/lsx/intrinsic-addi-non-imm.ll | 37 +++++ - .../lsx/intrinsic-andi-invalid-imm.ll | 17 +++ - .../LoongArch/lsx/intrinsic-andi-non-imm.ll | 10 ++ - .../lsx/intrinsic-bitclr-invalid-imm.ll | 65 +++++++++ - .../LoongArch/lsx/intrinsic-bitclr-non-imm.ll | 37 +++++ - .../lsx/intrinsic-bitrev-invalid-imm.ll | 65 +++++++++ - .../LoongArch/lsx/intrinsic-bitrev-non-imm.ll | 37 +++++ - .../lsx/intrinsic-bitseli-invalid-imm.ll | 17 +++ - .../lsx/intrinsic-bitseli-non-imm.ll | 10 ++ - .../lsx/intrinsic-bitset-invalid-imm.ll | 65 +++++++++ - .../LoongArch/lsx/intrinsic-bitset-non-imm.ll | 37 +++++ - .../lsx/intrinsic-bsll-invalid-imm.ll | 17 +++ - .../LoongArch/lsx/intrinsic-bsll-non-imm.ll | 10 ++ - .../lsx/intrinsic-bsrl-invalid-imm.ll | 17 +++ - .../LoongArch/lsx/intrinsic-bsrl-non-imm.ll | 10 ++ - .../lsx/intrinsic-extrins-invalid-imm.ll | 65 +++++++++ - .../lsx/intrinsic-extrins-non-imm.ll | 37 +++++ - .../lsx/intrinsic-frstp-invalid-imm.ll | 33 +++++ - .../LoongArch/lsx/intrinsic-frstp-non-imm.ll | 19 +++ - .../lsx/intrinsic-insgr2vr-invalid-imm.ll | 65 +++++++++ - .../lsx/intrinsic-insgr2vr-non-imm.ll | 37 +++++ - .../LoongArch/lsx/intrinsic-ld-invalid-imm.ll | 17 +++ - .../LoongArch/lsx/intrinsic-ld-non-imm.ll | 10 ++ - .../lsx/intrinsic-ldi-invalid-imm.ll | 81 +++++++++++ - .../LoongArch/lsx/intrinsic-ldi-non-imm.ll | 46 +++++++ - .../lsx/intrinsic-ldrepl-invalid-imm.ll | 65 +++++++++ - .../LoongArch/lsx/intrinsic-ldrepl-non-imm.ll | 37 +++++ - .../lsx/intrinsic-max-invalid-imm.ll | 129 ++++++++++++++++++ - .../LoongArch/lsx/intrinsic-max-non-imm.ll | 73 ++++++++++ - .../lsx/intrinsic-min-invalid-imm.ll | 129 ++++++++++++++++++ - .../LoongArch/lsx/intrinsic-min-non-imm.ll | 73 ++++++++++ - .../lsx/intrinsic-nori-invalid-imm.ll | 17 +++ - .../LoongArch/lsx/intrinsic-nori-non-imm.ll | 10 ++ - .../lsx/intrinsic-ori-invalid-imm.ll | 17 +++ - .../LoongArch/lsx/intrinsic-ori-non-imm.ll | 10 ++ - .../lsx/intrinsic-permi-invalid-imm.ll | 17 +++ - .../LoongArch/lsx/intrinsic-permi-non-imm.ll | 10 ++ - .../lsx/intrinsic-pickve2gr-invalid-imm.ll | 129 ++++++++++++++++++ - .../lsx/intrinsic-pickve2gr-non-imm.ll | 73 ++++++++++ - .../lsx/intrinsic-replvei-invalid-imm.ll | 65 +++++++++ - .../lsx/intrinsic-replvei-non-imm.ll | 37 +++++ - .../lsx/intrinsic-rotr-invalid-imm.ll | 65 +++++++++ - .../LoongArch/lsx/intrinsic-rotr-non-imm.ll | 37 +++++ - .../lsx/intrinsic-sat-invalid-imm.ll | 129 ++++++++++++++++++ - .../LoongArch/lsx/intrinsic-sat-non-imm.ll | 73 ++++++++++ - .../lsx/intrinsic-seq-invalid-imm.ll | 65 +++++++++ - .../LoongArch/lsx/intrinsic-seq-non-imm.ll | 37 +++++ - .../lsx/intrinsic-shuf4i-invalid-imm.ll | 65 +++++++++ - .../LoongArch/lsx/intrinsic-shuf4i-non-imm.ll | 37 +++++ - .../lsx/intrinsic-sle-invalid-imm.ll | 129 ++++++++++++++++++ - .../LoongArch/lsx/intrinsic-sle-non-imm.ll | 73 ++++++++++ - .../lsx/intrinsic-sll-invalid-imm.ll | 65 +++++++++ - .../LoongArch/lsx/intrinsic-sll-non-imm.ll | 37 +++++ - .../lsx/intrinsic-sllwil-invalid-imm.ll | 97 +++++++++++++ - .../LoongArch/lsx/intrinsic-sllwil-non-imm.ll | 55 ++++++++ - .../lsx/intrinsic-slt-invalid-imm.ll | 129 ++++++++++++++++++ - .../LoongArch/lsx/intrinsic-slt-non-imm.ll | 73 ++++++++++ - .../lsx/intrinsic-sra-invalid-imm.ll | 65 +++++++++ - .../LoongArch/lsx/intrinsic-sra-non-imm.ll | 37 +++++ - .../lsx/intrinsic-srani-invalid-imm.ll | 65 +++++++++ - .../LoongArch/lsx/intrinsic-srani-non-imm.ll | 37 +++++ - .../lsx/intrinsic-srar-invalid-imm.ll | 65 +++++++++ - .../LoongArch/lsx/intrinsic-srar-non-imm.ll | 37 +++++ - .../lsx/intrinsic-srarni-invalid-imm.ll | 65 +++++++++ - .../LoongArch/lsx/intrinsic-srarni-non-imm.ll | 37 +++++ - .../lsx/intrinsic-srl-invalid-imm.ll | 65 +++++++++ - .../LoongArch/lsx/intrinsic-srl-non-imm.ll | 37 +++++ - .../lsx/intrinsic-srlni-invalid-imm.ll | 65 +++++++++ - .../LoongArch/lsx/intrinsic-srlni-non-imm.ll | 37 +++++ - .../lsx/intrinsic-srlr-invalid-imm.ll | 65 +++++++++ - .../LoongArch/lsx/intrinsic-srlr-non-imm.ll | 37 +++++ - .../lsx/intrinsic-srlrni-invalid-imm.ll | 65 +++++++++ - .../LoongArch/lsx/intrinsic-srlrni-non-imm.ll | 37 +++++ - .../lsx/intrinsic-ssrani-invalid-imm.ll | 129 ++++++++++++++++++ - .../LoongArch/lsx/intrinsic-ssrani-non-imm.ll | 73 ++++++++++ - .../lsx/intrinsic-ssrarni-invalid-imm.ll | 129 ++++++++++++++++++ - .../lsx/intrinsic-ssrarni-non-imm.ll | 73 ++++++++++ - .../lsx/intrinsic-ssrlni-invalid-imm.ll | 129 ++++++++++++++++++ - .../LoongArch/lsx/intrinsic-ssrlni-non-imm.ll | 73 ++++++++++ - .../lsx/intrinsic-ssrlrni-invalid-imm.ll | 129 ++++++++++++++++++ - .../lsx/intrinsic-ssrlrni-non-imm.ll | 73 ++++++++++ - .../LoongArch/lsx/intrinsic-st-invalid-imm.ll | 17 +++ - .../LoongArch/lsx/intrinsic-st-non-imm.ll | 10 ++ - .../lsx/intrinsic-stelm-invalid-imm.ll | 121 ++++++++++++++++ - .../LoongArch/lsx/intrinsic-stelm-non-imm.ll | 65 +++++++++ - .../lsx/intrinsic-subi-invalid-imm.ll | 65 +++++++++ - .../LoongArch/lsx/intrinsic-subi-non-imm.ll | 37 +++++ - .../lsx/intrinsic-xori-invalid-imm.ll | 17 +++ - .../LoongArch/lsx/intrinsic-xori-non-imm.ll | 10 ++ - 90 files changed, 4949 insertions(+) - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-addi-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-addi-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-andi-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-andi-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitclr-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitclr-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitrev-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitrev-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitseli-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitseli-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitset-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitset-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsll-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsll-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsrl-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsrl-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-extrins-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-extrins-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-frstp-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-frstp-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-insgr2vr-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-insgr2vr-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ld-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ld-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldi-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldi-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldrepl-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldrepl-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-max-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-max-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-min-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-min-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-nori-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-nori-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ori-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ori-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-permi-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-permi-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-pickve2gr-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-pickve2gr-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-replvei-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-replvei-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-rotr-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-rotr-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-sat-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-sat-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-seq-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-seq-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf4i-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf4i-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-sle-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-sle-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-sll-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-sll-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-sllwil-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-sllwil-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-slt-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-slt-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-sra-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-sra-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-srani-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-srani-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-srar-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-srar-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarni-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarni-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-srl-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-srl-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlni-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlni-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlr-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlr-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrni-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrni-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrani-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrani-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarni-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarni-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlni-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlni-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrni-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrni-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-st-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-st-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-stelm-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-stelm-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-subi-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-subi-non-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-xori-invalid-imm.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-xori-non-imm.ll - -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-addi-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-addi-invalid-imm.ll -new file mode 100644 -index 000000000000..6875872b6f83 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-addi-invalid-imm.ll -@@ -0,0 +1,65 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vaddi.bu(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vaddi_bu_lo(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vaddi.bu: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vaddi.bu(<16 x i8> %va, i32 -1) -+ ret <16 x i8> %res -+} -+ -+define <16 x i8> @lsx_vaddi_bu_hi(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vaddi.bu: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vaddi.bu(<16 x i8> %va, i32 32) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vaddi.hu(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vaddi_hu_lo(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vaddi.hu: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vaddi.hu(<8 x i16> %va, i32 -1) -+ ret <8 x i16> %res -+} -+ -+define <8 x i16> @lsx_vaddi_hu_hi(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vaddi.hu: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vaddi.hu(<8 x i16> %va, i32 32) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vaddi.wu(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vaddi_wu_lo(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vaddi.wu: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vaddi.wu(<4 x i32> %va, i32 -1) -+ ret <4 x i32> %res -+} -+ -+define <4 x i32> @lsx_vaddi_wu_hi(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vaddi.wu: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vaddi.wu(<4 x i32> %va, i32 32) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vaddi.du(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vaddi_du_lo(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vaddi.du: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vaddi.du(<2 x i64> %va, i32 -1) -+ ret <2 x i64> %res -+} -+ -+define <2 x i64> @lsx_vaddi_du_hi(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vaddi.du: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vaddi.du(<2 x i64> %va, i32 32) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-addi-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-addi-non-imm.ll -new file mode 100644 -index 000000000000..87d32b3ce02a ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-addi-non-imm.ll -@@ -0,0 +1,37 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vaddi.bu(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vaddi_bu(<16 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vaddi.bu(<16 x i8> %va, i32 %b) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vaddi.hu(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vaddi_hu(<8 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vaddi.hu(<8 x i16> %va, i32 %b) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vaddi.wu(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vaddi_wu(<4 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vaddi.wu(<4 x i32> %va, i32 %b) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vaddi.du(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vaddi_du(<2 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vaddi.du(<2 x i64> %va, i32 %b) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-andi-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-andi-invalid-imm.ll -new file mode 100644 -index 000000000000..82a117b2aba5 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-andi-invalid-imm.ll -@@ -0,0 +1,17 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vandi.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vandi_b_lo(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vandi.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vandi.b(<16 x i8> %va, i32 -1) -+ ret <16 x i8> %res -+} -+ -+define <16 x i8> @lsx_vandi_b_hi(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vandi.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vandi.b(<16 x i8> %va, i32 256) -+ ret <16 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-andi-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-andi-non-imm.ll -new file mode 100644 -index 000000000000..c0c35c775266 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-andi-non-imm.ll -@@ -0,0 +1,10 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vandi.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vandi_b(<16 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vandi.b(<16 x i8> %va, i32 %b) -+ ret <16 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitclr-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitclr-invalid-imm.ll -new file mode 100644 -index 000000000000..b020806cd86c ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitclr-invalid-imm.ll -@@ -0,0 +1,65 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vbitclri.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vbitclri_b_lo(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vbitclri.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vbitclri.b(<16 x i8> %va, i32 -1) -+ ret <16 x i8> %res -+} -+ -+define <16 x i8> @lsx_vbitclri_b_hi(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vbitclri.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vbitclri.b(<16 x i8> %va, i32 8) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vbitclri.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vbitclri_h_lo(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vbitclri.h: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vbitclri.h(<8 x i16> %va, i32 -1) -+ ret <8 x i16> %res -+} -+ -+define <8 x i16> @lsx_vbitclri_h_hi(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vbitclri.h: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vbitclri.h(<8 x i16> %va, i32 16) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vbitclri.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vbitclri_w_lo(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vbitclri.w: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vbitclri.w(<4 x i32> %va, i32 -1) -+ ret <4 x i32> %res -+} -+ -+define <4 x i32> @lsx_vbitclri_w_hi(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vbitclri.w: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vbitclri.w(<4 x i32> %va, i32 32) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vbitclri.d(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vbitclri_d_lo(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vbitclri.d: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vbitclri.d(<2 x i64> %va, i32 -1) -+ ret <2 x i64> %res -+} -+ -+define <2 x i64> @lsx_vbitclri_d_hi(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vbitclri.d: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vbitclri.d(<2 x i64> %va, i32 64) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitclr-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitclr-non-imm.ll -new file mode 100644 -index 000000000000..df6cdb99cdbc ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitclr-non-imm.ll -@@ -0,0 +1,37 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vbitclri.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vbitclri_b(<16 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vbitclri.b(<16 x i8> %va, i32 %b) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vbitclri.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vbitclri_h(<8 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vbitclri.h(<8 x i16> %va, i32 %b) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vbitclri.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vbitclri_w(<4 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vbitclri.w(<4 x i32> %va, i32 %b) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vbitclri.d(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vbitclri_d(<2 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vbitclri.d(<2 x i64> %va, i32 %b) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitrev-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitrev-invalid-imm.ll -new file mode 100644 -index 000000000000..24b6ec3284cb ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitrev-invalid-imm.ll -@@ -0,0 +1,65 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vbitrevi.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vbitrevi_b_lo(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vbitrevi.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vbitrevi.b(<16 x i8> %va, i32 -1) -+ ret <16 x i8> %res -+} -+ -+define <16 x i8> @lsx_vbitrevi_b_hi(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vbitrevi.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vbitrevi.b(<16 x i8> %va, i32 8) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vbitrevi.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vbitrevi_lo(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vbitrevi.h: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vbitrevi.h(<8 x i16> %va, i32 -1) -+ ret <8 x i16> %res -+} -+ -+define <8 x i16> @lsx_vbitrevi_h_hi(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vbitrevi.h: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vbitrevi.h(<8 x i16> %va, i32 16) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vbitrevi.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vbitrevi_w_lo(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vbitrevi.w: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vbitrevi.w(<4 x i32> %va, i32 -1) -+ ret <4 x i32> %res -+} -+ -+define <4 x i32> @lsx_vbitrevi_w_hi(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vbitrevi.w: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vbitrevi.w(<4 x i32> %va, i32 32) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vbitrevi.d(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vbitrevi_d_lo(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vbitrevi.d: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vbitrevi.d(<2 x i64> %va, i32 -1) -+ ret <2 x i64> %res -+} -+ -+define <2 x i64> @lsx_vbitrevi_d_hi(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vbitrevi.d: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vbitrevi.d(<2 x i64> %va, i32 64) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitrev-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitrev-non-imm.ll -new file mode 100644 -index 000000000000..3ffb494c9907 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitrev-non-imm.ll -@@ -0,0 +1,37 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vbitrevi.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vbitrevi_b(<16 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vbitrevi.b(<16 x i8> %va, i32 %b) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vbitrevi.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vbitrevi_h(<8 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vbitrevi.h(<8 x i16> %va, i32 %b) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vbitrevi.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vbitrevi_w(<4 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vbitrevi.w(<4 x i32> %va, i32 %b) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vbitrevi.d(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vbitrevi_d(<2 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vbitrevi.d(<2 x i64> %va, i32 %b) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitseli-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitseli-invalid-imm.ll -new file mode 100644 -index 000000000000..bc63b40e9fca ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitseli-invalid-imm.ll -@@ -0,0 +1,17 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vbitseli.b(<16 x i8>, <16 x i8>, i32) -+ -+define <16 x i8> @lsx_vbitseli_b_lo(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vbitseli.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vbitseli.b(<16 x i8> %va, <16 x i8> %vb, i32 -1) -+ ret <16 x i8> %res -+} -+ -+define <16 x i8> @lsx_vbitseli_b_hi(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vbitseli.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vbitseli.b(<16 x i8> %va, <16 x i8> %vb, i32 256) -+ ret <16 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitseli-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitseli-non-imm.ll -new file mode 100644 -index 000000000000..52c1eb7d2024 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitseli-non-imm.ll -@@ -0,0 +1,10 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vbitseli.b(<16 x i8>, <16 x i8>, i32) -+ -+define <16 x i8> @lsx_vbitseli_b(<16 x i8> %va, <16 x i8> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vbitseli.b(<16 x i8> %va, <16 x i8> %vb, i32 %c) -+ ret <16 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitset-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitset-invalid-imm.ll -new file mode 100644 -index 000000000000..e57e14d8cb07 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitset-invalid-imm.ll -@@ -0,0 +1,65 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vbitseti.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vbitseti_b_lo(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vbitseti.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vbitseti.b(<16 x i8> %va, i32 -1) -+ ret <16 x i8> %res -+} -+ -+define <16 x i8> @lsx_vbitseti_b_hi(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vbitseti.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vbitseti.b(<16 x i8> %va, i32 8) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vbitseti.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vbitseti_h_lo(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vbitseti.h: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vbitseti.h(<8 x i16> %va, i32 -1) -+ ret <8 x i16> %res -+} -+ -+define <8 x i16> @lsx_vbitseti_h_hi(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vbitseti.h: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vbitseti.h(<8 x i16> %va, i32 16) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vbitseti.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vbitseti_w_lo(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vbitseti.w: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vbitseti.w(<4 x i32> %va, i32 -1) -+ ret <4 x i32> %res -+} -+ -+define <4 x i32> @lsx_vbitseti_w_hi(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vbitseti.w: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vbitseti.w(<4 x i32> %va, i32 32) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vbitseti.d(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vbitseti_d_lo(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vbitseti.d: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vbitseti.d(<2 x i64> %va, i32 -1) -+ ret <2 x i64> %res -+} -+ -+define <2 x i64> @lsx_vbitseti_d_hi(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vbitseti.d: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vbitseti.d(<2 x i64> %va, i32 64) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitset-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitset-non-imm.ll -new file mode 100644 -index 000000000000..9b2bde015ed9 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitset-non-imm.ll -@@ -0,0 +1,37 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vbitseti.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vbitseti_b(<16 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vbitseti.b(<16 x i8> %va, i32 %b) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vbitseti.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vbitseti_h(<8 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vbitseti.h(<8 x i16> %va, i32 %b) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vbitseti.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vbitseti_w(<4 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vbitseti.w(<4 x i32> %va, i32 %b) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vbitseti.d(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vbitseti_d(<2 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vbitseti.d(<2 x i64> %va, i32 %b) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsll-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsll-invalid-imm.ll -new file mode 100644 -index 000000000000..eb49af49c9be ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsll-invalid-imm.ll -@@ -0,0 +1,17 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vbsll.v(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vbsll_v_lo(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vbsll.v: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vbsll.v(<16 x i8> %va, i32 -1) -+ ret <16 x i8> %res -+} -+ -+define <16 x i8> @lsx_vbsll_v_hi(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vbsll.v: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vbsll.v(<16 x i8> %va, i32 32) -+ ret <16 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsll-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsll-non-imm.ll -new file mode 100644 -index 000000000000..5b10c9e91a4f ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsll-non-imm.ll -@@ -0,0 +1,10 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vbsll.v(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vbsll_v(<16 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vbsll.v(<16 x i8> %va, i32 %b) -+ ret <16 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsrl-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsrl-invalid-imm.ll -new file mode 100644 -index 000000000000..bf56822e2ef5 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsrl-invalid-imm.ll -@@ -0,0 +1,17 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vbsrl.v(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vbsrl_v_lo(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vbsrl.v: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vbsrl.v(<16 x i8> %va, i32 -1) -+ ret <16 x i8> %res -+} -+ -+define <16 x i8> @lsx_vbsrl_v_hi(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vbsrl.v: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vbsrl.v(<16 x i8> %va, i32 32) -+ ret <16 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsrl-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsrl-non-imm.ll -new file mode 100644 -index 000000000000..0bc038c869ce ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsrl-non-imm.ll -@@ -0,0 +1,10 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vbsrl.v(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vbsrl_v(<16 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vbsrl.v(<16 x i8> %va, i32 %b) -+ ret <16 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-extrins-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-extrins-invalid-imm.ll -new file mode 100644 -index 000000000000..7f94234ed603 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-extrins-invalid-imm.ll -@@ -0,0 +1,65 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vextrins.b(<16 x i8>, <16 x i8>, i32) -+ -+define <16 x i8> @lsx_vextrins_b_lo(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vextrins.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vextrins.b(<16 x i8> %va, <16 x i8> %vb, i32 -1) -+ ret <16 x i8> %res -+} -+ -+define <16 x i8> @lsx_vextrins_b_hi(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vextrins.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vextrins.b(<16 x i8> %va, <16 x i8> %vb, i32 256) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vextrins.h(<8 x i16>, <8 x i16>, i32) -+ -+define <8 x i16> @lsx_vextrins_h_lo(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vextrins.h: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vextrins.h(<8 x i16> %va, <8 x i16> %vb, i32 -1) -+ ret <8 x i16> %res -+} -+ -+define <8 x i16> @lsx_vextrins_h_hi(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vextrins.h: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vextrins.h(<8 x i16> %va, <8 x i16> %vb, i32 256) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vextrins.w(<4 x i32>, <4 x i32>, i32) -+ -+define <4 x i32> @lsx_vextrins_w_lo(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vextrins.w: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vextrins.w(<4 x i32> %va, <4 x i32> %vb, i32 -1) -+ ret <4 x i32> %res -+} -+ -+define <4 x i32> @lsx_vextrins_w_hi(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vextrins.w: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vextrins.w(<4 x i32> %va, <4 x i32> %vb, i32 256) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vextrins.d(<2 x i64>, <2 x i64>, i32) -+ -+define <2 x i64> @lsx_vextrins_d_lo(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vextrins.d: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vextrins.d(<2 x i64> %va, <2 x i64> %vb, i32 -1) -+ ret <2 x i64> %res -+} -+ -+define <2 x i64> @lsx_vextrins_d_hi(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vextrins.d: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vextrins.d(<2 x i64> %va, <2 x i64> %vb, i32 256) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-extrins-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-extrins-non-imm.ll -new file mode 100644 -index 000000000000..e834002bb60b ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-extrins-non-imm.ll -@@ -0,0 +1,37 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vextrins.b(<16 x i8>, <16 x i8>, i32) -+ -+define <16 x i8> @lsx_vextrins_b(<16 x i8> %va, <16 x i8> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vextrins.b(<16 x i8> %va, <16 x i8> %vb, i32 %c) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vextrins.h(<8 x i16>, <8 x i16>, i32) -+ -+define <8 x i16> @lsx_vextrins_h(<8 x i16> %va, <8 x i16> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vextrins.h(<8 x i16> %va, <8 x i16> %vb, i32 %c) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vextrins.w(<4 x i32>, <4 x i32>, i32) -+ -+define <4 x i32> @lsx_vextrins_w(<4 x i32> %va, <4 x i32> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vextrins.w(<4 x i32> %va, <4 x i32> %vb, i32 %c) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vextrins.d(<2 x i64>, <2 x i64>, i32) -+ -+define <2 x i64> @lsx_vextrins_d(<2 x i64> %va, <2 x i64> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vextrins.d(<2 x i64> %va, <2 x i64> %vb, i32 %c) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frstp-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frstp-invalid-imm.ll -new file mode 100644 -index 000000000000..0184c855c9c1 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frstp-invalid-imm.ll -@@ -0,0 +1,33 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vfrstpi.b(<16 x i8>, <16 x i8>, i32) -+ -+define <16 x i8> @lsx_vfrstpi_b_lo(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vfrstpi.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vfrstpi.b(<16 x i8> %va, <16 x i8> %vb, i32 -1) -+ ret <16 x i8> %res -+} -+ -+define <16 x i8> @lsx_vfrstpi_b_hi(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vfrstpi.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vfrstpi.b(<16 x i8> %va, <16 x i8> %vb, i32 32) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vfrstpi.h(<8 x i16>, <8 x i16>, i32) -+ -+define <8 x i16> @lsx_vfrstpi_h_lo(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vfrstpi.h: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vfrstpi.h(<8 x i16> %va, <8 x i16> %vb, i32 -1) -+ ret <8 x i16> %res -+} -+ -+define <8 x i16> @lsx_vfrstpi_h_hi(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vfrstpi.h: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vfrstpi.h(<8 x i16> %va, <8 x i16> %vb, i32 32) -+ ret <8 x i16> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frstp-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frstp-non-imm.ll -new file mode 100644 -index 000000000000..9583f672a305 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frstp-non-imm.ll -@@ -0,0 +1,19 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vfrstpi.b(<16 x i8>, <16 x i8>, i32) -+ -+define <16 x i8> @lsx_vfrstpi_b(<16 x i8> %va, <16 x i8> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vfrstpi.b(<16 x i8> %va, <16 x i8> %vb, i32 %c) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vfrstpi.h(<8 x i16>, <8 x i16>, i32) -+ -+define <8 x i16> @lsx_vfrstpi_h(<8 x i16> %va, <8 x i16> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vfrstpi.h(<8 x i16> %va, <8 x i16> %vb, i32 %c) -+ ret <8 x i16> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-insgr2vr-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-insgr2vr-invalid-imm.ll -new file mode 100644 -index 000000000000..3d4f84fb6e03 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-insgr2vr-invalid-imm.ll -@@ -0,0 +1,65 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vinsgr2vr.b(<16 x i8>, i32, i32) -+ -+define <16 x i8> @lsx_vinsgr2vr_b_lo(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vinsgr2vr.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vinsgr2vr.b(<16 x i8> %va, i32 1, i32 -1) -+ ret <16 x i8> %res -+} -+ -+define <16 x i8> @lsx_vinsgr2vr_b_hi(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vinsgr2vr.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vinsgr2vr.b(<16 x i8> %va, i32 1, i32 16) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vinsgr2vr.h(<8 x i16>, i32, i32) -+ -+define <8 x i16> @lsx_vinsgr2vr_h_lo(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vinsgr2vr.h: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vinsgr2vr.h(<8 x i16> %va, i32 1, i32 -1) -+ ret <8 x i16> %res -+} -+ -+define <8 x i16> @lsx_vinsgr2vr_h_hi(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vinsgr2vr.h: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vinsgr2vr.h(<8 x i16> %va, i32 1, i32 8) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vinsgr2vr.w(<4 x i32>, i32, i32) -+ -+define <4 x i32> @lsx_vinsgr2vr_w_lo(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vinsgr2vr.w: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vinsgr2vr.w(<4 x i32> %va, i32 1, i32 -1) -+ ret <4 x i32> %res -+} -+ -+define <4 x i32> @lsx_vinsgr2vr_w_hi(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vinsgr2vr.w: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vinsgr2vr.w(<4 x i32> %va, i32 1, i32 4) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vinsgr2vr.d(<2 x i64>, i64, i32) -+ -+define <2 x i64> @lsx_vinsgr2vr_d_lo(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vinsgr2vr.d: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vinsgr2vr.d(<2 x i64> %va, i64 1, i32 -1) -+ ret <2 x i64> %res -+} -+ -+define <2 x i64> @lsx_vinsgr2vr_d_hi(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vinsgr2vr.d: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vinsgr2vr.d(<2 x i64> %va, i64 1, i32 2) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-insgr2vr-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-insgr2vr-non-imm.ll -new file mode 100644 -index 000000000000..2a4c2218de8c ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-insgr2vr-non-imm.ll -@@ -0,0 +1,37 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vinsgr2vr.b(<16 x i8>, i32, i32) -+ -+define <16 x i8> @lsx_vinsgr2vr_b(<16 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vinsgr2vr.b(<16 x i8> %va, i32 1, i32 %b) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vinsgr2vr.h(<8 x i16>, i32, i32) -+ -+define <8 x i16> @lsx_vinsgr2vr_h(<8 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vinsgr2vr.h(<8 x i16> %va, i32 1, i32 %b) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vinsgr2vr.w(<4 x i32>, i32, i32) -+ -+define <4 x i32> @lsx_vinsgr2vr_w(<4 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vinsgr2vr.w(<4 x i32> %va, i32 1, i32 %b) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vinsgr2vr.d(<2 x i64>, i64, i32) -+ -+define <2 x i64> @lsx_vinsgr2vr_d(<2 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vinsgr2vr.d(<2 x i64> %va, i64 1, i32 %b) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ld-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ld-invalid-imm.ll -new file mode 100644 -index 000000000000..3aeb30ce66b4 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ld-invalid-imm.ll -@@ -0,0 +1,17 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vld(i8*, i32) -+ -+define <16 x i8> @lsx_vld_lo(i8* %p) nounwind { -+; CHECK: llvm.loongarch.lsx.vld: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vld(i8* %p, i32 -2049) -+ ret <16 x i8> %res -+} -+ -+define <16 x i8> @lsx_vld_hi(i8* %p) nounwind { -+; CHECK: llvm.loongarch.lsx.vld: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vld(i8* %p, i32 2048) -+ ret <16 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ld-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ld-non-imm.ll -new file mode 100644 -index 000000000000..db6a0318d87a ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ld-non-imm.ll -@@ -0,0 +1,10 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vld(i8*, i32) -+ -+define <16 x i8> @lsx_vld(i8* %p, i32 %a) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vld(i8* %p, i32 %a) -+ ret <16 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldi-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldi-invalid-imm.ll -new file mode 100644 -index 000000000000..57f6f8e81d91 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldi-invalid-imm.ll -@@ -0,0 +1,81 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <2 x i64> @llvm.loongarch.lsx.vldi(i32) -+ -+define <2 x i64> @lsx_vldi_lo() nounwind { -+; CHECK: llvm.loongarch.lsx.vldi: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vldi(i32 -4097) -+ ret <2 x i64> %res -+} -+ -+define <2 x i64> @lsx_vldi_hi() nounwind { -+; CHECK: llvm.loongarch.lsx.vldi: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vldi(i32 4096) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vrepli.b(i32) -+ -+define <16 x i8> @lsx_vrepli_b_lo() nounwind { -+; CHECK: llvm.loongarch.lsx.vrepli.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vrepli.b(i32 -513) -+ ret <16 x i8> %res -+} -+ -+define <16 x i8> @lsx_vrepli_b_hi() nounwind { -+; CHECK: llvm.loongarch.lsx.vrepli.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vrepli.b(i32 512) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vrepli.h(i32) -+ -+define <8 x i16> @lsx_vrepli_h_lo() nounwind { -+; CHECK: llvm.loongarch.lsx.vrepli.h: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vrepli.h(i32 -513) -+ ret <8 x i16> %res -+} -+ -+define <8 x i16> @lsx_vrepli_h_hi() nounwind { -+; CHECK: llvm.loongarch.lsx.vrepli.h: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vrepli.h(i32 512) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vrepli.w(i32) -+ -+define <4 x i32> @lsx_vrepli_w_lo() nounwind { -+; CHECK: llvm.loongarch.lsx.vrepli.w: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vrepli.w(i32 -513) -+ ret <4 x i32> %res -+} -+ -+define <4 x i32> @lsx_vrepli_w_hi() nounwind { -+; CHECK: llvm.loongarch.lsx.vrepli.w: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vrepli.w(i32 512) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vrepli.d(i32) -+ -+define <2 x i64> @lsx_vrepli_d_lo() nounwind { -+; CHECK: llvm.loongarch.lsx.vrepli.d: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vrepli.d(i32 -513) -+ ret <2 x i64> %res -+} -+ -+define <2 x i64> @lsx_vrepli_d_hi() nounwind { -+; CHECK: llvm.loongarch.lsx.vrepli.d: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vrepli.d(i32 512) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldi-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldi-non-imm.ll -new file mode 100644 -index 000000000000..a8f8278f8097 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldi-non-imm.ll -@@ -0,0 +1,46 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <2 x i64> @llvm.loongarch.lsx.vldi(i32) -+ -+define <2 x i64> @lsx_vldi(i32 %a) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vldi(i32 %a) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vrepli.b(i32) -+ -+define <16 x i8> @lsx_vrepli_b(i32 %a) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vrepli.b(i32 %a) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vrepli.h(i32) -+ -+define <8 x i16> @lsx_vrepli_h(i32 %a) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vrepli.h(i32 %a) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vrepli.w(i32) -+ -+define <4 x i32> @lsx_vrepli_w(i32 %a) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vrepli.w(i32 %a) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vrepli.d(i32) -+ -+define <2 x i64> @lsx_vrepli_d(i32 %a) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vrepli.d(i32 %a) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldrepl-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldrepl-invalid-imm.ll -new file mode 100644 -index 000000000000..cb640e1245da ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldrepl-invalid-imm.ll -@@ -0,0 +1,65 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vldrepl.b(i8*, i32) -+ -+define <16 x i8> @lsx_vldrepl_b_lo(i8* %p) nounwind { -+; CHECK: llvm.loongarch.lsx.vldrepl.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vldrepl.b(i8* %p, i32 -2049) -+ ret <16 x i8> %res -+} -+ -+define <16 x i8> @lsx_vldrepl_b_hi(i8* %p) nounwind { -+; CHECK: llvm.loongarch.lsx.vldrepl.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vldrepl.b(i8* %p, i32 2048) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vldrepl.h(i8*, i32) -+ -+define <8 x i16> @lsx_vldrepl_h_lo(i8* %p) nounwind { -+; CHECK: llvm.loongarch.lsx.vldrepl.h: argument out of range or not a multiple of 2. -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vldrepl.h(i8* %p, i32 -2050) -+ ret <8 x i16> %res -+} -+ -+define <8 x i16> @lsx_vldrepl_h_hi(i8* %p) nounwind { -+; CHECK: llvm.loongarch.lsx.vldrepl.h: argument out of range or not a multiple of 2. -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vldrepl.h(i8* %p, i32 2048) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vldrepl.w(i8*, i32) -+ -+define <4 x i32> @lsx_vldrepl_w_lo(i8* %p) nounwind { -+; CHECK: llvm.loongarch.lsx.vldrepl.w: argument out of range or not a multiple of 4. -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vldrepl.w(i8* %p, i32 -2052) -+ ret <4 x i32> %res -+} -+ -+define <4 x i32> @lsx_vldrepl_w_hi(i8* %p) nounwind { -+; CHECK: llvm.loongarch.lsx.vldrepl.w: argument out of range or not a multiple of 4. -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vldrepl.w(i8* %p, i32 2048) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vldrepl.d(i8*, i32) -+ -+define <2 x i64> @lsx_vldrepl_d_lo(i8* %p) nounwind { -+; CHECK: llvm.loongarch.lsx.vldrepl.d: argument out of range or not a multiple of 8. -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vldrepl.d(i8* %p, i32 -2056) -+ ret <2 x i64> %res -+} -+ -+define <2 x i64> @lsx_vldrepl_d_hi(i8* %p) nounwind { -+; CHECK: llvm.loongarch.lsx.vldrepl.d: argument out of range or not a multiple of 8. -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vldrepl.d(i8* %p, i32 2048) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldrepl-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldrepl-non-imm.ll -new file mode 100644 -index 000000000000..e60b21913c69 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldrepl-non-imm.ll -@@ -0,0 +1,37 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vldrepl.b(i8*, i32) -+ -+define <16 x i8> @lsx_vldrepl_b(i8* %p, i32 %a) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vldrepl.b(i8* %p, i32 %a) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vldrepl.h(i8*, i32) -+ -+define <8 x i16> @lsx_vldrepl_h(i8* %p, i32 %a) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vldrepl.h(i8* %p, i32 %a) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vldrepl.w(i8*, i32) -+ -+define <4 x i32> @lsx_vldrepl_w(i8* %p, i32 %a) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vldrepl.w(i8* %p, i32 %a) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vldrepl.d(i8*, i32) -+ -+define <2 x i64> @lsx_vldrepl_d(i8* %p, i32 %a) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vldrepl.d(i8* %p, i32 %a) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-max-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-max-invalid-imm.ll -new file mode 100644 -index 000000000000..667ba32723fc ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-max-invalid-imm.ll -@@ -0,0 +1,129 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vmaxi.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vmaxi_b_lo(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vmaxi.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vmaxi.b(<16 x i8> %va, i32 -17) -+ ret <16 x i8> %res -+} -+ -+define <16 x i8> @lsx_vmaxi_b_hi(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vmaxi.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vmaxi.b(<16 x i8> %va, i32 16) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vmaxi.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vmaxi_h_lo(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vmaxi.h: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vmaxi.h(<8 x i16> %va, i32 -17) -+ ret <8 x i16> %res -+} -+ -+define <8 x i16> @lsx_vmaxi_h_hi(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vmaxi.h: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vmaxi.h(<8 x i16> %va, i32 16) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vmaxi.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vmaxi_w_lo(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vmaxi.w: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vmaxi.w(<4 x i32> %va, i32 -17) -+ ret <4 x i32> %res -+} -+ -+define <4 x i32> @lsx_vmaxi_w_hi(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vmaxi.w: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vmaxi.w(<4 x i32> %va, i32 16) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vmaxi.d(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vmaxi_d_lo(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vmaxi.d: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmaxi.d(<2 x i64> %va, i32 -17) -+ ret <2 x i64> %res -+} -+ -+define <2 x i64> @lsx_vmaxi_d_hi(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vmaxi.d: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmaxi.d(<2 x i64> %va, i32 16) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vmaxi.bu(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vmaxi_bu_lo(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vmaxi.bu: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vmaxi.bu(<16 x i8> %va, i32 -1) -+ ret <16 x i8> %res -+} -+ -+define <16 x i8> @lsx_vmaxi_bu_hi(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vmaxi.bu: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vmaxi.bu(<16 x i8> %va, i32 32) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vmaxi.hu(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vmaxi_hu_lo(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vmaxi.hu: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vmaxi.hu(<8 x i16> %va, i32 -1) -+ ret <8 x i16> %res -+} -+ -+define <8 x i16> @lsx_vmaxi_hu_hi(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vmaxi.hu: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vmaxi.hu(<8 x i16> %va, i32 32) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vmaxi.wu(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vmaxi_wu_lo(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vmaxi.wu: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vmaxi.wu(<4 x i32> %va, i32 -1) -+ ret <4 x i32> %res -+} -+ -+define <4 x i32> @lsx_vmaxi_wu_hi(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vmaxi.wu: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vmaxi.wu(<4 x i32> %va, i32 32) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vmaxi.du(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vmaxi_du_lo(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vmaxi.du: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmaxi.du(<2 x i64> %va, i32 -1) -+ ret <2 x i64> %res -+} -+ -+define <2 x i64> @lsx_vmaxi_du_hi(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vmaxi.du: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmaxi.du(<2 x i64> %va, i32 32) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-max-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-max-non-imm.ll -new file mode 100644 -index 000000000000..34bbe3495670 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-max-non-imm.ll -@@ -0,0 +1,73 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vmaxi.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vmaxi_b(<16 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vmaxi.b(<16 x i8> %va, i32 %b) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vmaxi.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vmaxi_h(<8 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vmaxi.h(<8 x i16> %va, i32 %b) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vmaxi.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vmaxi_w(<4 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vmaxi.w(<4 x i32> %va, i32 %b) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vmaxi.d(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vmaxi_d(<2 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmaxi.d(<2 x i64> %va, i32 %b) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vmaxi.bu(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vmaxi_bu(<16 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vmaxi.bu(<16 x i8> %va, i32 %b) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vmaxi.hu(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vmaxi_hu(<8 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vmaxi.hu(<8 x i16> %va, i32 %b) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vmaxi.wu(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vmaxi_wu(<4 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vmaxi.wu(<4 x i32> %va, i32 %b) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vmaxi.du(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vmaxi_du(<2 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmaxi.du(<2 x i64> %va, i32 %b) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-min-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-min-invalid-imm.ll -new file mode 100644 -index 000000000000..b73bada4f06f ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-min-invalid-imm.ll -@@ -0,0 +1,129 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vmini.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vmini_b_lo(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vmini.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vmini.b(<16 x i8> %va, i32 -17) -+ ret <16 x i8> %res -+} -+ -+define <16 x i8> @lsx_vmini_b_hi(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vmini.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vmini.b(<16 x i8> %va, i32 16) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vmini.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vmini_h_lo(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vmini.h: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vmini.h(<8 x i16> %va, i32 -17) -+ ret <8 x i16> %res -+} -+ -+define <8 x i16> @lsx_vmini_h_hi(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vmini.h: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vmini.h(<8 x i16> %va, i32 16) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vmini.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vmini_w_lo(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vmini.w: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vmini.w(<4 x i32> %va, i32 -17) -+ ret <4 x i32> %res -+} -+ -+define <4 x i32> @lsx_vmini_w_hi(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vmini.w: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vmini.w(<4 x i32> %va, i32 16) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vmini.d(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vmini_d_lo(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vmini.d: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmini.d(<2 x i64> %va, i32 -17) -+ ret <2 x i64> %res -+} -+ -+define <2 x i64> @lsx_vmini_d_hi(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vmini.d: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmini.d(<2 x i64> %va, i32 16) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vmini.bu(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vmini_bu_lo(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vmini.bu: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vmini.bu(<16 x i8> %va, i32 -1) -+ ret <16 x i8> %res -+} -+ -+define <16 x i8> @lsx_vmini_bu_hi(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vmini.bu: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vmini.bu(<16 x i8> %va, i32 32) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vmini.hu(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vmini_hu_lo(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vmini.hu: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vmini.hu(<8 x i16> %va, i32 -1) -+ ret <8 x i16> %res -+} -+ -+define <8 x i16> @lsx_vmini_hu_hi(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vmini.hu: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vmini.hu(<8 x i16> %va, i32 32) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vmini.wu(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vmini_wu_lo(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vmini.wu: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vmini.wu(<4 x i32> %va, i32 -1) -+ ret <4 x i32> %res -+} -+ -+define <4 x i32> @lsx_vmini_wu_hi(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vmini.wu: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vmini.wu(<4 x i32> %va, i32 32) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vmini.du(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vmini_du_lo(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vmini.du: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmini.du(<2 x i64> %va, i32 -1) -+ ret <2 x i64> %res -+} -+ -+define <2 x i64> @lsx_vmini_du_hi(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vmini.du: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmini.du(<2 x i64> %va, i32 32) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-min-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-min-non-imm.ll -new file mode 100644 -index 000000000000..5d9b98cec4d0 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-min-non-imm.ll -@@ -0,0 +1,73 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vmini.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vmini_b(<16 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vmini.b(<16 x i8> %va, i32 %b) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vmini.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vmini_h(<8 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vmini.h(<8 x i16> %va, i32 %b) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vmini.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vmini_w(<4 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vmini.w(<4 x i32> %va, i32 %b) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vmini.d(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vmini_d(<2 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmini.d(<2 x i64> %va, i32 %b) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vmini.bu(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vmini_bu(<16 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vmini.bu(<16 x i8> %va, i32 %b) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vmini.hu(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vmini_hu(<8 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vmini.hu(<8 x i16> %va, i32 %b) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vmini.wu(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vmini_wu(<4 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vmini.wu(<4 x i32> %va, i32 %b) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vmini.du(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vmini_du(<2 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vmini.du(<2 x i64> %va, i32 %b) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-nori-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-nori-invalid-imm.ll -new file mode 100644 -index 000000000000..8c59d8fb9fa5 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-nori-invalid-imm.ll -@@ -0,0 +1,17 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vnori.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vnori_b_lo(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vnori.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vnori.b(<16 x i8> %va, i32 -1) -+ ret <16 x i8> %res -+} -+ -+define <16 x i8> @lsx_vnori_b_hi(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vnori.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vnori.b(<16 x i8> %va, i32 256) -+ ret <16 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-nori-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-nori-non-imm.ll -new file mode 100644 -index 000000000000..322a39c106a6 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-nori-non-imm.ll -@@ -0,0 +1,10 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vnori.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vnori_b(<16 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vnori.b(<16 x i8> %va, i32 %b) -+ ret <16 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ori-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ori-invalid-imm.ll -new file mode 100644 -index 000000000000..4a7fc7e109d9 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ori-invalid-imm.ll -@@ -0,0 +1,17 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vori.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vori_b_lo(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vori.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vori.b(<16 x i8> %va, i32 -1) -+ ret <16 x i8> %res -+} -+ -+define <16 x i8> @lsx_vori_b_hi(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vori.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vori.b(<16 x i8> %va, i32 256) -+ ret <16 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ori-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ori-non-imm.ll -new file mode 100644 -index 000000000000..5644b8581dce ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ori-non-imm.ll -@@ -0,0 +1,10 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vori.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vori_b(<16 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vori.b(<16 x i8> %va, i32 %b) -+ ret <16 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-permi-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-permi-invalid-imm.ll -new file mode 100644 -index 000000000000..e439bbae6130 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-permi-invalid-imm.ll -@@ -0,0 +1,17 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <4 x i32> @llvm.loongarch.lsx.vpermi.w(<4 x i32>, <4 x i32>, i32) -+ -+define <4 x i32> @lsx_vpermi_w_lo(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vpermi.w: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vpermi.w(<4 x i32> %va, <4 x i32> %vb, i32 -1) -+ ret <4 x i32> %res -+} -+ -+define <4 x i32> @lsx_vpermi_w_hi(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vpermi.w: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vpermi.w(<4 x i32> %va, <4 x i32> %vb, i32 256) -+ ret <4 x i32> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-permi-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-permi-non-imm.ll -new file mode 100644 -index 000000000000..bdfc08ed680a ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-permi-non-imm.ll -@@ -0,0 +1,10 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <4 x i32> @llvm.loongarch.lsx.vpermi.w(<4 x i32>, <4 x i32>, i32) -+ -+define <4 x i32> @lsx_vpermi_w(<4 x i32> %va, <4 x i32> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vpermi.w(<4 x i32> %va, <4 x i32> %vb, i32 %c) -+ ret <4 x i32> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pickve2gr-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pickve2gr-invalid-imm.ll -new file mode 100644 -index 000000000000..3430c54d2194 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pickve2gr-invalid-imm.ll -@@ -0,0 +1,129 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare i32 @llvm.loongarch.lsx.vpickve2gr.b(<16 x i8>, i32) -+ -+define i32 @lsx_vpickve2gr_b_lo(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vpickve2gr.b: argument out of range -+entry: -+ %res = call i32 @llvm.loongarch.lsx.vpickve2gr.b(<16 x i8> %va, i32 -1) -+ ret i32 %res -+} -+ -+define i32 @lsx_vpickve2gr_b_hi(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vpickve2gr.b: argument out of range -+entry: -+ %res = call i32 @llvm.loongarch.lsx.vpickve2gr.b(<16 x i8> %va, i32 16) -+ ret i32 %res -+} -+ -+declare i32 @llvm.loongarch.lsx.vpickve2gr.h(<8 x i16>, i32) -+ -+define i32 @lsx_vpickve2gr_h_lo(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vpickve2gr.h: argument out of range -+entry: -+ %res = call i32 @llvm.loongarch.lsx.vpickve2gr.h(<8 x i16> %va, i32 -1) -+ ret i32 %res -+} -+ -+define i32 @lsx_vpickve2gr_h_hi(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vpickve2gr.h: argument out of range -+entry: -+ %res = call i32 @llvm.loongarch.lsx.vpickve2gr.h(<8 x i16> %va, i32 8) -+ ret i32 %res -+} -+ -+declare i32 @llvm.loongarch.lsx.vpickve2gr.w(<4 x i32>, i32) -+ -+define i32 @lsx_vpickve2gr_w_lo(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vpickve2gr.w: argument out of range -+entry: -+ %res = call i32 @llvm.loongarch.lsx.vpickve2gr.w(<4 x i32> %va, i32 -1) -+ ret i32 %res -+} -+ -+define i32 @lsx_vpickve2gr_w_hi(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vpickve2gr.w: argument out of range -+entry: -+ %res = call i32 @llvm.loongarch.lsx.vpickve2gr.w(<4 x i32> %va, i32 4) -+ ret i32 %res -+} -+ -+declare i64 @llvm.loongarch.lsx.vpickve2gr.d(<2 x i64>, i32) -+ -+define i64 @lsx_vpickve2gr_d_lo(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vpickve2gr.d: argument out of range -+entry: -+ %res = call i64 @llvm.loongarch.lsx.vpickve2gr.d(<2 x i64> %va, i32 -1) -+ ret i64 %res -+} -+ -+define i64 @lsx_vpickve2gr_d_hi(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vpickve2gr.d: argument out of range -+entry: -+ %res = call i64 @llvm.loongarch.lsx.vpickve2gr.d(<2 x i64> %va, i32 2) -+ ret i64 %res -+} -+ -+declare i32 @llvm.loongarch.lsx.vpickve2gr.bu(<16 x i8>, i32) -+ -+define i32 @lsx_vpickve2gr_bu_lo(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vpickve2gr.bu: argument out of range -+entry: -+ %res = call i32 @llvm.loongarch.lsx.vpickve2gr.bu(<16 x i8> %va, i32 -1) -+ ret i32 %res -+} -+ -+define i32 @lsx_vpickve2gr_bu_hi(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vpickve2gr.bu: argument out of range -+entry: -+ %res = call i32 @llvm.loongarch.lsx.vpickve2gr.bu(<16 x i8> %va, i32 16) -+ ret i32 %res -+} -+ -+declare i32 @llvm.loongarch.lsx.vpickve2gr.hu(<8 x i16>, i32) -+ -+define i32 @lsx_vpickve2gr_hu_lo(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vpickve2gr.hu: argument out of range -+entry: -+ %res = call i32 @llvm.loongarch.lsx.vpickve2gr.hu(<8 x i16> %va, i32 -1) -+ ret i32 %res -+} -+ -+define i32 @lsx_vpickve2gr_hu_hi(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vpickve2gr.hu: argument out of range -+entry: -+ %res = call i32 @llvm.loongarch.lsx.vpickve2gr.hu(<8 x i16> %va, i32 8) -+ ret i32 %res -+} -+ -+declare i32 @llvm.loongarch.lsx.vpickve2gr.wu(<4 x i32>, i32) -+ -+define i32 @lsx_vpickve2gr_wu_lo(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vpickve2gr.wu: argument out of range -+entry: -+ %res = call i32 @llvm.loongarch.lsx.vpickve2gr.wu(<4 x i32> %va, i32 -1) -+ ret i32 %res -+} -+ -+define i32 @lsx_vpickve2gr_wu_hi(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vpickve2gr.wu: argument out of range -+entry: -+ %res = call i32 @llvm.loongarch.lsx.vpickve2gr.wu(<4 x i32> %va, i32 4) -+ ret i32 %res -+} -+ -+declare i64 @llvm.loongarch.lsx.vpickve2gr.du(<2 x i64>, i32) -+ -+define i64 @lsx_vpickve2gr_du_lo(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vpickve2gr.du: argument out of range -+entry: -+ %res = call i64 @llvm.loongarch.lsx.vpickve2gr.du(<2 x i64> %va, i32 -1) -+ ret i64 %res -+} -+ -+define i64 @lsx_vpickve2gr_du_hi(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vpickve2gr.du: argument out of range -+entry: -+ %res = call i64 @llvm.loongarch.lsx.vpickve2gr.du(<2 x i64> %va, i32 2) -+ ret i64 %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pickve2gr-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pickve2gr-non-imm.ll -new file mode 100644 -index 000000000000..6dd3c1f27a81 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pickve2gr-non-imm.ll -@@ -0,0 +1,73 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare i32 @llvm.loongarch.lsx.vpickve2gr.b(<16 x i8>, i32) -+ -+define i32 @lsx_vpickve2gr_b(<16 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call i32 @llvm.loongarch.lsx.vpickve2gr.b(<16 x i8> %va, i32 %b) -+ ret i32 %res -+} -+ -+declare i32 @llvm.loongarch.lsx.vpickve2gr.h(<8 x i16>, i32) -+ -+define i32 @lsx_vpickve2gr_h(<8 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call i32 @llvm.loongarch.lsx.vpickve2gr.h(<8 x i16> %va, i32 %b) -+ ret i32 %res -+} -+ -+declare i32 @llvm.loongarch.lsx.vpickve2gr.w(<4 x i32>, i32) -+ -+define i32 @lsx_vpickve2gr_w(<4 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call i32 @llvm.loongarch.lsx.vpickve2gr.w(<4 x i32> %va, i32 %b) -+ ret i32 %res -+} -+ -+declare i64 @llvm.loongarch.lsx.vpickve2gr.d(<2 x i64>, i32) -+ -+define i64 @lsx_vpickve2gr_d(<2 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call i64 @llvm.loongarch.lsx.vpickve2gr.d(<2 x i64> %va, i32 %b) -+ ret i64 %res -+} -+ -+declare i32 @llvm.loongarch.lsx.vpickve2gr.bu(<16 x i8>, i32) -+ -+define i32 @lsx_vpickve2gr_bu(<16 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call i32 @llvm.loongarch.lsx.vpickve2gr.bu(<16 x i8> %va, i32 %b) -+ ret i32 %res -+} -+ -+declare i32 @llvm.loongarch.lsx.vpickve2gr.hu(<8 x i16>, i32) -+ -+define i32 @lsx_vpickve2gr_hu(<8 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call i32 @llvm.loongarch.lsx.vpickve2gr.hu(<8 x i16> %va, i32 %b) -+ ret i32 %res -+} -+ -+declare i32 @llvm.loongarch.lsx.vpickve2gr.wu(<4 x i32>, i32) -+ -+define i32 @lsx_vpickve2gr_wu(<4 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call i32 @llvm.loongarch.lsx.vpickve2gr.wu(<4 x i32> %va, i32 %b) -+ ret i32 %res -+} -+ -+declare i64 @llvm.loongarch.lsx.vpickve2gr.du(<2 x i64>, i32) -+ -+define i64 @lsx_vpickve2gr_du(<2 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call i64 @llvm.loongarch.lsx.vpickve2gr.du(<2 x i64> %va, i32 %b) -+ ret i64 %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replvei-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replvei-invalid-imm.ll -new file mode 100644 -index 000000000000..d625441122a6 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replvei-invalid-imm.ll -@@ -0,0 +1,65 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vreplvei.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vreplvei_b_lo(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vreplvei.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vreplvei.b(<16 x i8> %va, i32 -1) -+ ret <16 x i8> %res -+} -+ -+define <16 x i8> @lsx_vreplvei_b_hi(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vreplvei.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vreplvei.b(<16 x i8> %va, i32 16) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vreplvei.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vreplvei_h_lo(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vreplvei.h: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vreplvei.h(<8 x i16> %va, i32 -1) -+ ret <8 x i16> %res -+} -+ -+define <8 x i16> @lsx_vreplvei_h_hi(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vreplvei.h: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vreplvei.h(<8 x i16> %va, i32 8) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vreplvei.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vreplvei_w_lo(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vreplvei.w: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vreplvei.w(<4 x i32> %va, i32 -1) -+ ret <4 x i32> %res -+} -+ -+define <4 x i32> @lsx_vreplvei_w_hi(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vreplvei.w: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vreplvei.w(<4 x i32> %va, i32 4) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vreplvei.d(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vreplvei_d_lo(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vreplvei.d: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vreplvei.d(<2 x i64> %va, i32 -1) -+ ret <2 x i64> %res -+} -+ -+define <2 x i64> @lsx_vreplvei_d_hi(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vreplvei.d: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vreplvei.d(<2 x i64> %va, i32 2) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replvei-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replvei-non-imm.ll -new file mode 100644 -index 000000000000..3d271bb2b307 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replvei-non-imm.ll -@@ -0,0 +1,37 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vreplvei.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vreplvei_b(<16 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vreplvei.b(<16 x i8> %va, i32 %b) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vreplvei.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vreplvei_h(<8 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vreplvei.h(<8 x i16> %va, i32 %b) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vreplvei.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vreplvei_w(<4 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vreplvei.w(<4 x i32> %va, i32 %b) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vreplvei.d(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vreplvei_d(<2 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vreplvei.d(<2 x i64> %va, i32 %b) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-rotr-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-rotr-invalid-imm.ll -new file mode 100644 -index 000000000000..3c53b36672ad ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-rotr-invalid-imm.ll -@@ -0,0 +1,65 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vrotri.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vrotri_b_lo(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vrotri.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vrotri.b(<16 x i8> %va, i32 -1) -+ ret <16 x i8> %res -+} -+ -+define <16 x i8> @lsx_vrotri_b_hi(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vrotri.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vrotri.b(<16 x i8> %va, i32 8) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vrotri.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vrotri_h_lo(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vrotri.h: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vrotri.h(<8 x i16> %va, i32 -1) -+ ret <8 x i16> %res -+} -+ -+define <8 x i16> @lsx_vrotri_h_hi(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vrotri.h: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vrotri.h(<8 x i16> %va, i32 16) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vrotri.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vrotri_w_lo(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vrotri.w: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vrotri.w(<4 x i32> %va, i32 -1) -+ ret <4 x i32> %res -+} -+ -+define <4 x i32> @lsx_vrotri_w_hi(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vrotri.w: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vrotri.w(<4 x i32> %va, i32 32) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vrotri.d(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vrotri_d_lo(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vrotri.d: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vrotri.d(<2 x i64> %va, i32 -1) -+ ret <2 x i64> %res -+} -+ -+define <2 x i64> @lsx_vrotri_d_hi(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vrotri.d: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vrotri.d(<2 x i64> %va, i32 64) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-rotr-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-rotr-non-imm.ll -new file mode 100644 -index 000000000000..fd8ba3a1c633 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-rotr-non-imm.ll -@@ -0,0 +1,37 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vrotri.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vrotri_b(<16 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vrotri.b(<16 x i8> %va, i32 %b) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vrotri.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vrotri_h(<8 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vrotri.h(<8 x i16> %va, i32 %b) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vrotri.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vrotri_w(<4 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vrotri.w(<4 x i32> %va, i32 %b) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vrotri.d(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vrotri_d(<2 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vrotri.d(<2 x i64> %va, i32 %b) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sat-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sat-invalid-imm.ll -new file mode 100644 -index 000000000000..45fa4e43be19 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sat-invalid-imm.ll -@@ -0,0 +1,129 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vsat.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vsat_b_lo(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsat.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsat.b(<16 x i8> %va, i32 -1) -+ ret <16 x i8> %res -+} -+ -+define <16 x i8> @lsx_vsat_b_hi(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsat.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsat.b(<16 x i8> %va, i32 8) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsat.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vsat_h_lo(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsat.h: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsat.h(<8 x i16> %va, i32 -1) -+ ret <8 x i16> %res -+} -+ -+define <8 x i16> @lsx_vsat_h_hi(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsat.h: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsat.h(<8 x i16> %va, i32 16) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsat.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vsat_w_lo(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsat.w: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsat.w(<4 x i32> %va, i32 -1) -+ ret <4 x i32> %res -+} -+ -+define <4 x i32> @lsx_vsat_w_hi(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsat.w: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsat.w(<4 x i32> %va, i32 32) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsat.d(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vsat_d_lo(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsat.d: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsat.d(<2 x i64> %va, i32 -1) -+ ret <2 x i64> %res -+} -+ -+define <2 x i64> @lsx_vsat_d_hi(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsat.d: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsat.d(<2 x i64> %va, i32 64) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vsat.bu(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vsat_bu_lo(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsat.bu: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsat.bu(<16 x i8> %va, i32 -1) -+ ret <16 x i8> %res -+} -+ -+define <16 x i8> @lsx_vsat_bu_hi(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsat.bu: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsat.bu(<16 x i8> %va, i32 8) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsat.hu(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vsat_hu_lo(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsat.hu: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsat.hu(<8 x i16> %va, i32 -1) -+ ret <8 x i16> %res -+} -+ -+define <8 x i16> @lsx_vsat_hu_hi(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsat.hu: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsat.hu(<8 x i16> %va, i32 16) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsat.wu(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vsat_wu_lo(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsat.wu: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsat.wu(<4 x i32> %va, i32 -1) -+ ret <4 x i32> %res -+} -+ -+define <4 x i32> @lsx_vsat_wu_hi(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsat.wu: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsat.wu(<4 x i32> %va, i32 32) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsat.du(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vsat_du_lo(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsat.du: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsat.du(<2 x i64> %va, i32 -1) -+ ret <2 x i64> %res -+} -+ -+define <2 x i64> @lsx_vsat_du_hi(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsat.du: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsat.du(<2 x i64> %va, i32 64) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sat-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sat-non-imm.ll -new file mode 100644 -index 000000000000..afdbe0c1ce0b ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sat-non-imm.ll -@@ -0,0 +1,73 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vsat.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vsat_b(<16 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsat.b(<16 x i8> %va, i32 %b) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsat.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vsat_h(<8 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsat.h(<8 x i16> %va, i32 %b) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsat.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vsat_w(<4 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsat.w(<4 x i32> %va, i32 %b) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsat.d(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vsat_d(<2 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsat.d(<2 x i64> %va, i32 %b) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vsat.bu(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vsat_bu(<16 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsat.bu(<16 x i8> %va, i32 %b) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsat.hu(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vsat_hu(<8 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsat.hu(<8 x i16> %va, i32 %b) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsat.wu(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vsat_wu(<4 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsat.wu(<4 x i32> %va, i32 %b) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsat.du(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vsat_du(<2 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsat.du(<2 x i64> %va, i32 %b) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-seq-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-seq-invalid-imm.ll -new file mode 100644 -index 000000000000..220398ff28cd ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-seq-invalid-imm.ll -@@ -0,0 +1,65 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vseqi.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vseqi_b_lo(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vseqi.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vseqi.b(<16 x i8> %va, i32 -17) -+ ret <16 x i8> %res -+} -+ -+define <16 x i8> @lsx_vseqi_b_hi(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vseqi.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vseqi.b(<16 x i8> %va, i32 16) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vseqi.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vseqi_h_lo(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vseqi.h: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vseqi.h(<8 x i16> %va, i32 -17) -+ ret <8 x i16> %res -+} -+ -+define <8 x i16> @lsx_vseqi_h_hi(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vseqi.h: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vseqi.h(<8 x i16> %va, i32 16) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vseqi.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vseqi_w_lo(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vseqi.w: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vseqi.w(<4 x i32> %va, i32 -17) -+ ret <4 x i32> %res -+} -+ -+define <4 x i32> @lsx_vseqi_w_hi(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vseqi.w: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vseqi.w(<4 x i32> %va, i32 16) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vseqi.d(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vseqi_d_lo(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vseqi.d: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vseqi.d(<2 x i64> %va, i32 -17) -+ ret <2 x i64> %res -+} -+ -+define <2 x i64> @lsx_vseqi_d_hi(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vseqi.d: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vseqi.d(<2 x i64> %va, i32 16) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-seq-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-seq-non-imm.ll -new file mode 100644 -index 000000000000..5fa1dd30475c ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-seq-non-imm.ll -@@ -0,0 +1,37 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vseqi.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vseqi_b(<16 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vseqi.b(<16 x i8> %va, i32 %b) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vseqi.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vseqi_h(<8 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vseqi.h(<8 x i16> %va, i32 %b) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vseqi.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vseqi_w(<4 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vseqi.w(<4 x i32> %va, i32 %b) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vseqi.d(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vseqi_d(<2 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vseqi.d(<2 x i64> %va, i32 %b) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf4i-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf4i-invalid-imm.ll -new file mode 100644 -index 000000000000..4d6fadf08c26 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf4i-invalid-imm.ll -@@ -0,0 +1,65 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vshuf4i.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vshuf4i_b_lo(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vshuf4i.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vshuf4i.b(<16 x i8> %va, i32 -1) -+ ret <16 x i8> %res -+} -+ -+define <16 x i8> @lsx_vshuf4i_b_hi(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vshuf4i.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vshuf4i.b(<16 x i8> %va, i32 256) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vshuf4i.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vshuf4i_h_lo(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vshuf4i.h: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vshuf4i.h(<8 x i16> %va, i32 -1) -+ ret <8 x i16> %res -+} -+ -+define <8 x i16> @lsx_vshuf4i_h_hi(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vshuf4i.h: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vshuf4i.h(<8 x i16> %va, i32 256) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vshuf4i.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vshuf4i_w_lo(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vshuf4i.w: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vshuf4i.w(<4 x i32> %va, i32 -1) -+ ret <4 x i32> %res -+} -+ -+define <4 x i32> @lsx_vshuf4i_w_hi(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vshuf4i.w: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vshuf4i.w(<4 x i32> %va, i32 256) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vshuf4i.d(<2 x i64>, <2 x i64>, i32) -+ -+define <2 x i64> @lsx_vshuf4i_d_lo(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vshuf4i.d: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vshuf4i.d(<2 x i64> %va, <2 x i64> %vb, i32 -1) -+ ret <2 x i64> %res -+} -+ -+define <2 x i64> @lsx_vshuf4i_d_hi(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vshuf4i.d: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vshuf4i.d(<2 x i64> %va, <2 x i64> %vb, i32 256) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf4i-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf4i-non-imm.ll -new file mode 100644 -index 000000000000..a7d138bcc00b ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf4i-non-imm.ll -@@ -0,0 +1,37 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vshuf4i.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vshuf4i_b(<16 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vshuf4i.b(<16 x i8> %va, i32 %b) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vshuf4i.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vshuf4i_h(<8 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vshuf4i.h(<8 x i16> %va, i32 %b) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vshuf4i.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vshuf4i_w(<4 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vshuf4i.w(<4 x i32> %va, i32 %b) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vshuf4i.d(<2 x i64>, <2 x i64>, i32) -+ -+define <2 x i64> @lsx_vshuf4i_d(<2 x i64> %va, <2 x i64> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vshuf4i.d(<2 x i64> %va, <2 x i64> %vb, i32 %c) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sle-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sle-invalid-imm.ll -new file mode 100644 -index 000000000000..4c945e296711 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sle-invalid-imm.ll -@@ -0,0 +1,129 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vslei.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vslei_b_lo(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vslei.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vslei.b(<16 x i8> %va, i32 -17) -+ ret <16 x i8> %res -+} -+ -+define <16 x i8> @lsx_vslei_b_hi(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vslei.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vslei.b(<16 x i8> %va, i32 16) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vslei.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vslei_h_lo(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vslei.h: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vslei.h(<8 x i16> %va, i32 -17) -+ ret <8 x i16> %res -+} -+ -+define <8 x i16> @lsx_vslei_h_hi(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vslei.h: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vslei.h(<8 x i16> %va, i32 16) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vslei.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vslei_w_lo(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vslei.w: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vslei.w(<4 x i32> %va, i32 -17) -+ ret <4 x i32> %res -+} -+ -+define <4 x i32> @lsx_vslei_w_hi(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vslei.w: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vslei.w(<4 x i32> %va, i32 16) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vslei.d(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vslei_d_lo(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vslei.d: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vslei.d(<2 x i64> %va, i32 -17) -+ ret <2 x i64> %res -+} -+ -+define <2 x i64> @lsx_vslei_d_hi(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vslei.d: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vslei.d(<2 x i64> %va, i32 16) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vslei.bu(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vslei_bu_lo(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vslei.bu: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vslei.bu(<16 x i8> %va, i32 -1) -+ ret <16 x i8> %res -+} -+ -+define <16 x i8> @lsx_vslei_bu_hi(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vslei.bu: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vslei.bu(<16 x i8> %va, i32 32) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vslei.hu(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vslei_hu_lo(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vslei.hu: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vslei.hu(<8 x i16> %va, i32 -1) -+ ret <8 x i16> %res -+} -+ -+define <8 x i16> @lsx_vslei_hu_hi(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vslei.hu: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vslei.hu(<8 x i16> %va, i32 32) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vslei.wu(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vslei_wu_lo(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vslei.wu: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vslei.wu(<4 x i32> %va, i32 -1) -+ ret <4 x i32> %res -+} -+ -+define <4 x i32> @lsx_vslei_wu_hi(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vslei.wu: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vslei.wu(<4 x i32> %va, i32 32) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vslei.du(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vslei_du_lo(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vslei.du: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vslei.du(<2 x i64> %va, i32 -1) -+ ret <2 x i64> %res -+} -+ -+define <2 x i64> @lsx_vslei_du_hi(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vslei.du: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vslei.du(<2 x i64> %va, i32 32) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sle-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sle-non-imm.ll -new file mode 100644 -index 000000000000..0fc137bf0549 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sle-non-imm.ll -@@ -0,0 +1,73 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vslei.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vslei_b(<16 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vslei.b(<16 x i8> %va, i32 %b) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vslei.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vslei_h(<8 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vslei.h(<8 x i16> %va, i32 %b) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vslei.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vslei_w(<4 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vslei.w(<4 x i32> %va, i32 %b) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vslei.d(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vslei_d(<2 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vslei.d(<2 x i64> %va, i32 %b) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vslei.bu(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vslei_bu(<16 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vslei.bu(<16 x i8> %va, i32 %b) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vslei.hu(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vslei_hu(<8 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vslei.hu(<8 x i16> %va, i32 %b) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vslei.wu(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vslei_wu(<4 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vslei.wu(<4 x i32> %va, i32 %b) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vslei.du(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vslei_du(<2 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vslei.du(<2 x i64> %va, i32 %b) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sll-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sll-invalid-imm.ll -new file mode 100644 -index 000000000000..75406f94887c ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sll-invalid-imm.ll -@@ -0,0 +1,65 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vslli.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vslli_b_lo(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vslli.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vslli.b(<16 x i8> %va, i32 -1) -+ ret <16 x i8> %res -+} -+ -+define <16 x i8> @lsx_vslli_b_hi(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vslli.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vslli.b(<16 x i8> %va, i32 8) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vslli.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vslli_h_lo(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vslli.h: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vslli.h(<8 x i16> %va, i32 -1) -+ ret <8 x i16> %res -+} -+ -+define <8 x i16> @lsx_vslli_h_hi(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vslli.h: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vslli.h(<8 x i16> %va, i32 16) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vslli.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vslli_w_lo(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vslli.w: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vslli.w(<4 x i32> %va, i32 -1) -+ ret <4 x i32> %res -+} -+ -+define <4 x i32> @lsx_vslli_w_hi(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vslli.w: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vslli.w(<4 x i32> %va, i32 32) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vslli.d(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vslli_d_lo(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vslli.d: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vslli.d(<2 x i64> %va, i32 -1) -+ ret <2 x i64> %res -+} -+ -+define <2 x i64> @lsx_vslli_d_hi(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vslli.d: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vslli.d(<2 x i64> %va, i32 64) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sll-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sll-non-imm.ll -new file mode 100644 -index 000000000000..7474b5e29734 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sll-non-imm.ll -@@ -0,0 +1,37 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vslli.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vslli_b(<16 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vslli.b(<16 x i8> %va, i32 %b) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vslli.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vslli_h(<8 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vslli.h(<8 x i16> %va, i32 %b) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vslli.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vslli_w(<4 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vslli.w(<4 x i32> %va, i32 %b) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vslli.d(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vslli_d(<2 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vslli.d(<2 x i64> %va, i32 %b) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sllwil-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sllwil-invalid-imm.ll -new file mode 100644 -index 000000000000..bda3523a0b5c ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sllwil-invalid-imm.ll -@@ -0,0 +1,97 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsllwil.h.b(<16 x i8>, i32) -+ -+define <8 x i16> @lsx_vsllwil_h_b_lo(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsllwil.h.b: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsllwil.h.b(<16 x i8> %va, i32 -1) -+ ret <8 x i16> %res -+} -+ -+define <8 x i16> @lsx_vsllwil_h_b_hi(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsllwil.h.b: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsllwil.h.b(<16 x i8> %va, i32 8) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsllwil.w.h(<8 x i16>, i32) -+ -+define <4 x i32> @lsx_vsllwil_w_h_lo(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsllwil.w.h: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsllwil.w.h(<8 x i16> %va, i32 -1) -+ ret <4 x i32> %res -+} -+ -+define <4 x i32> @lsx_vsllwil_w_h_hi(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsllwil.w.h: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsllwil.w.h(<8 x i16> %va, i32 16) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsllwil.d.w(<4 x i32>, i32) -+ -+define <2 x i64> @lsx_vsllwil_d_w_lo(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsllwil.d.w: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsllwil.d.w(<4 x i32> %va, i32 -1) -+ ret <2 x i64> %res -+} -+ -+define <2 x i64> @lsx_vsllwil_d_w_hi(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsllwil.d.w: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsllwil.d.w(<4 x i32> %va, i32 32) -+ ret <2 x i64> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsllwil.hu.bu(<16 x i8>, i32) -+ -+define <8 x i16> @lsx_vsllwil_hu_bu_lo(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsllwil.hu.bu: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsllwil.hu.bu(<16 x i8> %va, i32 -1) -+ ret <8 x i16> %res -+} -+ -+define <8 x i16> @lsx_vsllwil_hu_bu_hi(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsllwil.hu.bu: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsllwil.hu.bu(<16 x i8> %va, i32 8) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsllwil.wu.hu(<8 x i16>, i32) -+ -+define <4 x i32> @lsx_vsllwil_wu_hu_lo(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsllwil.wu.hu: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsllwil.wu.hu(<8 x i16> %va, i32 -1) -+ ret <4 x i32> %res -+} -+ -+define <4 x i32> @lsx_vsllwil_wu_hu_hi(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsllwil.wu.hu: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsllwil.wu.hu(<8 x i16> %va, i32 16) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsllwil.du.wu(<4 x i32>, i32) -+ -+define <2 x i64> @lsx_vsllwil_du_wu_lo(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsllwil.du.wu: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsllwil.du.wu(<4 x i32> %va, i32 -1) -+ ret <2 x i64> %res -+} -+ -+define <2 x i64> @lsx_vsllwil_du_wu_hi(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsllwil.du.wu: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsllwil.du.wu(<4 x i32> %va, i32 32) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sllwil-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sllwil-non-imm.ll -new file mode 100644 -index 000000000000..a03656d5ca07 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sllwil-non-imm.ll -@@ -0,0 +1,55 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsllwil.h.b(<16 x i8>, i32) -+ -+define <8 x i16> @lsx_vsllwil_h_b(<16 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsllwil.h.b(<16 x i8> %va, i32 %b) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsllwil.w.h(<8 x i16>, i32) -+ -+define <4 x i32> @lsx_vsllwil_w_h(<8 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsllwil.w.h(<8 x i16> %va, i32 %b) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsllwil.d.w(<4 x i32>, i32) -+ -+define <2 x i64> @lsx_vsllwil_d_w(<4 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsllwil.d.w(<4 x i32> %va, i32 %b) -+ ret <2 x i64> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsllwil.hu.bu(<16 x i8>, i32) -+ -+define <8 x i16> @lsx_vsllwil_hu_bu(<16 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsllwil.hu.bu(<16 x i8> %va, i32 %b) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsllwil.wu.hu(<8 x i16>, i32) -+ -+define <4 x i32> @lsx_vsllwil_wu_hu(<8 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsllwil.wu.hu(<8 x i16> %va, i32 %b) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsllwil.du.wu(<4 x i32>, i32) -+ -+define <2 x i64> @lsx_vsllwil_du_wu(<4 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsllwil.du.wu(<4 x i32> %va, i32 %b) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-slt-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-slt-invalid-imm.ll -new file mode 100644 -index 000000000000..f6d014b19d6c ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-slt-invalid-imm.ll -@@ -0,0 +1,129 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vslti.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vslti_b_lo(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vslti.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vslti.b(<16 x i8> %va, i32 -17) -+ ret <16 x i8> %res -+} -+ -+define <16 x i8> @lsx_vslti_b_hi(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vslti.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vslti.b(<16 x i8> %va, i32 16) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vslti.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vslti_h_lo(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vslti.h: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vslti.h(<8 x i16> %va, i32 -17) -+ ret <8 x i16> %res -+} -+ -+define <8 x i16> @lsx_vslti_h_hi(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vslti.h: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vslti.h(<8 x i16> %va, i32 16) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vslti.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vslti_w_lo(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vslti.w: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vslti.w(<4 x i32> %va, i32 -17) -+ ret <4 x i32> %res -+} -+ -+define <4 x i32> @lsx_vslti_w_hi(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vslti.w: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vslti.w(<4 x i32> %va, i32 16) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vslti.d(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vslti_d_lo(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vslti.d: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vslti.d(<2 x i64> %va, i32 -17) -+ ret <2 x i64> %res -+} -+ -+define <2 x i64> @lsx_vslti_d_hi(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vslti.d: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vslti.d(<2 x i64> %va, i32 16) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vslti.bu(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vslti_bu_lo(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vslti.bu: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vslti.bu(<16 x i8> %va, i32 -1) -+ ret <16 x i8> %res -+} -+ -+define <16 x i8> @lsx_vslti_bu_hi(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vslti.bu: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vslti.bu(<16 x i8> %va, i32 32) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vslti.hu(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vslti_hu_lo(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vslti.hu: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vslti.hu(<8 x i16> %va, i32 -1) -+ ret <8 x i16> %res -+} -+ -+define <8 x i16> @lsx_vslti_hu_hi(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vslti.hu: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vslti.hu(<8 x i16> %va, i32 32) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vslti.wu(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vslti_wu_lo(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vslti.wu: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vslti.wu(<4 x i32> %va, i32 -1) -+ ret <4 x i32> %res -+} -+ -+define <4 x i32> @lsx_vslti_wu_hi(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vslti.wu: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vslti.wu(<4 x i32> %va, i32 32) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vslti.du(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vslti_du_lo(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vslti.du: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vslti.du(<2 x i64> %va, i32 -1) -+ ret <2 x i64> %res -+} -+ -+define <2 x i64> @lsx_vslti_du_hi(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vslti.du: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vslti.du(<2 x i64> %va, i32 32) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-slt-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-slt-non-imm.ll -new file mode 100644 -index 000000000000..9a8b757dab4e ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-slt-non-imm.ll -@@ -0,0 +1,73 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vslti.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vslti_b(<16 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vslti.b(<16 x i8> %va, i32 %b) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vslti.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vslti_h(<8 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vslti.h(<8 x i16> %va, i32 %b) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vslti.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vslti_w(<4 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vslti.w(<4 x i32> %va, i32 %b) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vslti.d(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vslti_d(<2 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vslti.d(<2 x i64> %va, i32 %b) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vslti.bu(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vslti_bu(<16 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vslti.bu(<16 x i8> %va, i32 %b) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vslti.hu(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vslti_hu(<8 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vslti.hu(<8 x i16> %va, i32 %b) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vslti.wu(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vslti_wu(<4 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vslti.wu(<4 x i32> %va, i32 %b) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vslti.du(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vslti_du(<2 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vslti.du(<2 x i64> %va, i32 %b) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sra-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sra-invalid-imm.ll -new file mode 100644 -index 000000000000..2a033a21b565 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sra-invalid-imm.ll -@@ -0,0 +1,65 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vsrai.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vsrai_b_lo(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrai.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsrai.b(<16 x i8> %va, i32 -1) -+ ret <16 x i8> %res -+} -+ -+define <16 x i8> @lsx_vsrai_b_hi(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrai.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsrai.b(<16 x i8> %va, i32 8) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsrai.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vsrai_h_lo(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrai.h: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsrai.h(<8 x i16> %va, i32 -1) -+ ret <8 x i16> %res -+} -+ -+define <8 x i16> @lsx_vsrai_h_hi(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrai.h: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsrai.h(<8 x i16> %va, i32 16) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsrai.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vsrai_w_lo(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrai.w: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsrai.w(<4 x i32> %va, i32 -1) -+ ret <4 x i32> %res -+} -+ -+define <4 x i32> @lsx_vsrai_w_hi(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrai.w: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsrai.w(<4 x i32> %va, i32 32) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsrai.d(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vsrai_d_lo(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrai.d: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsrai.d(<2 x i64> %va, i32 -1) -+ ret <2 x i64> %res -+} -+ -+define <2 x i64> @lsx_vsrai_d_hi(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrai.d: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsrai.d(<2 x i64> %va, i32 64) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sra-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sra-non-imm.ll -new file mode 100644 -index 000000000000..c3b328145864 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sra-non-imm.ll -@@ -0,0 +1,37 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vsrai.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vsrai_b(<16 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsrai.b(<16 x i8> %va, i32 %b) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsrai.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vsrai_h(<8 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsrai.h(<8 x i16> %va, i32 %b) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsrai.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vsrai_w(<4 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsrai.w(<4 x i32> %va, i32 %b) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsrai.d(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vsrai_d(<2 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsrai.d(<2 x i64> %va, i32 %b) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srani-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srani-invalid-imm.ll -new file mode 100644 -index 000000000000..d68064e9b902 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srani-invalid-imm.ll -@@ -0,0 +1,65 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vsrani.b.h(<16 x i8>, <16 x i8>, i32) -+ -+define <16 x i8> @lsx_vsrani_b_h_lo(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrani.b.h: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsrani.b.h(<16 x i8> %va, <16 x i8> %vb, i32 -1) -+ ret <16 x i8> %res -+} -+ -+define <16 x i8> @lsx_vsrani_b_h_hi(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrani.b.h: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsrani.b.h(<16 x i8> %va, <16 x i8> %vb, i32 16) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsrani.h.w(<8 x i16>, <8 x i16>, i32) -+ -+define <8 x i16> @lsx_vsrani_h_w_lo(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrani.h.w: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsrani.h.w(<8 x i16> %va, <8 x i16> %vb, i32 -1) -+ ret <8 x i16> %res -+} -+ -+define <8 x i16> @lsx_vsrani_h_w_hi(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrani.h.w: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsrani.h.w(<8 x i16> %va, <8 x i16> %vb, i32 32) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsrani.w.d(<4 x i32>, <4 x i32>, i32) -+ -+define <4 x i32> @lsx_vsrani_w_d_lo(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrani.w.d: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsrani.w.d(<4 x i32> %va, <4 x i32> %vb, i32 -1) -+ ret <4 x i32> %res -+} -+ -+define <4 x i32> @lsx_vsrani_w_d_hi(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrani.w.d: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsrani.w.d(<4 x i32> %va, <4 x i32> %vb, i32 64) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsrani.d.q(<2 x i64>, <2 x i64>, i32) -+ -+define <2 x i64> @lsx_vsrani_d_q_lo(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrani.d.q: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsrani.d.q(<2 x i64> %va, <2 x i64> %vb, i32 -1) -+ ret <2 x i64> %res -+} -+ -+define <2 x i64> @lsx_vsrani_d_q_hi(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrani.d.q: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsrani.d.q(<2 x i64> %va, <2 x i64> %vb, i32 128) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srani-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srani-non-imm.ll -new file mode 100644 -index 000000000000..38cfde214dc1 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srani-non-imm.ll -@@ -0,0 +1,37 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vsrani.b.h(<16 x i8>, <16 x i8>, i32) -+ -+define <16 x i8> @lsx_vsrani_b_h(<16 x i8> %va, <16 x i8> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsrani.b.h(<16 x i8> %va, <16 x i8> %vb, i32 %c) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsrani.h.w(<8 x i16>, <8 x i16>, i32) -+ -+define <8 x i16> @lsx_vsrani_h_w(<8 x i16> %va, <8 x i16> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsrani.h.w(<8 x i16> %va, <8 x i16> %vb, i32 %c) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsrani.w.d(<4 x i32>, <4 x i32>, i32) -+ -+define <4 x i32> @lsx_vsrani_w_d(<4 x i32> %va, <4 x i32> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsrani.w.d(<4 x i32> %va, <4 x i32> %vb, i32 %c) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsrani.d.q(<2 x i64>, <2 x i64>, i32) -+ -+define <2 x i64> @lsx_vsrani_d_q(<2 x i64> %va, <2 x i64> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsrani.d.q(<2 x i64> %va, <2 x i64> %vb, i32 %c) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srar-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srar-invalid-imm.ll -new file mode 100644 -index 000000000000..b6c2d70cebbc ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srar-invalid-imm.ll -@@ -0,0 +1,65 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vsrari.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vsrari_b_lo(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrari.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsrari.b(<16 x i8> %va, i32 -1) -+ ret <16 x i8> %res -+} -+ -+define <16 x i8> @lsx_vsrari_b_hi(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrari.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsrari.b(<16 x i8> %va, i32 8) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsrari.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vsrari_h_lo(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrari.h: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsrari.h(<8 x i16> %va, i32 -1) -+ ret <8 x i16> %res -+} -+ -+define <8 x i16> @lsx_vsrari_h_hi(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrari.h: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsrari.h(<8 x i16> %va, i32 16) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsrari.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vsrari_w_lo(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrari.w: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsrari.w(<4 x i32> %va, i32 -1) -+ ret <4 x i32> %res -+} -+ -+define <4 x i32> @lsx_vsrari_w_hi(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrari.w: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsrari.w(<4 x i32> %va, i32 32) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsrari.d(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vsrari_d_lo(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrari.d: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsrari.d(<2 x i64> %va, i32 -1) -+ ret <2 x i64> %res -+} -+ -+define <2 x i64> @lsx_vsrari_d_hi(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrari.d: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsrari.d(<2 x i64> %va, i32 64) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srar-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srar-non-imm.ll -new file mode 100644 -index 000000000000..2ad8adcd823b ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srar-non-imm.ll -@@ -0,0 +1,37 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vsrari.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vsrari_b(<16 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsrari.b(<16 x i8> %va, i32 %b) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsrari.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vsrari_h(<8 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsrari.h(<8 x i16> %va, i32 %b) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsrari.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vsrari_w(<4 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsrari.w(<4 x i32> %va, i32 %b) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsrari.d(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vsrari_d(<2 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsrari.d(<2 x i64> %va, i32 %b) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarni-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarni-invalid-imm.ll -new file mode 100644 -index 000000000000..d24cf92a0392 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarni-invalid-imm.ll -@@ -0,0 +1,65 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vsrarni.b.h(<16 x i8>, <16 x i8>, i32) -+ -+define <16 x i8> @lsx_vsrarni_b_h_lo(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrarni.b.h: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsrarni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 -1) -+ ret <16 x i8> %res -+} -+ -+define <16 x i8> @lsx_vsrarni_b_h_hi(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrarni.b.h: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsrarni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 16) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsrarni.h.w(<8 x i16>, <8 x i16>, i32) -+ -+define <8 x i16> @lsx_vsrarni_h_w_lo(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrarni.h.w: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsrarni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 -1) -+ ret <8 x i16> %res -+} -+ -+define <8 x i16> @lsx_vsrarni_h_w_hi(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrarni.h.w: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsrarni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 32) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsrarni.w.d(<4 x i32>, <4 x i32>, i32) -+ -+define <4 x i32> @lsx_vsrarni_w_d_lo(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrarni.w.d: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsrarni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 -1) -+ ret <4 x i32> %res -+} -+ -+define <4 x i32> @lsx_vsrarni_w_d_hi(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrarni.w.d: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsrarni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 64) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsrarni.d.q(<2 x i64>, <2 x i64>, i32) -+ -+define <2 x i64> @lsx_vsrarni_d_q_lo(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrarni.d.q: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsrarni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 -1) -+ ret <2 x i64> %res -+} -+ -+define <2 x i64> @lsx_vsrarni_d_q_hi(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrarni.d.q: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsrarni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 128) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarni-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarni-non-imm.ll -new file mode 100644 -index 000000000000..19de7445cba1 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarni-non-imm.ll -@@ -0,0 +1,37 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vsrarni.b.h(<16 x i8>, <16 x i8>, i32) -+ -+define <16 x i8> @lsx_vsrarni_b_h(<16 x i8> %va, <16 x i8> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsrarni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 %c) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsrarni.h.w(<8 x i16>, <8 x i16>, i32) -+ -+define <8 x i16> @lsx_vsrarni_h_w(<8 x i16> %va, <8 x i16> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsrarni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 %c) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsrarni.w.d(<4 x i32>, <4 x i32>, i32) -+ -+define <4 x i32> @lsx_vsrarni_w_d(<4 x i32> %va, <4 x i32> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsrarni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 %c) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsrarni.d.q(<2 x i64>, <2 x i64>, i32) -+ -+define <2 x i64> @lsx_vsrarni_d_q(<2 x i64> %va, <2 x i64> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsrarni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 %c) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srl-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srl-invalid-imm.ll -new file mode 100644 -index 000000000000..3beff790afab ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srl-invalid-imm.ll -@@ -0,0 +1,65 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vsrli.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vsrli_b_lo(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrli.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsrli.b(<16 x i8> %va, i32 -1) -+ ret <16 x i8> %res -+} -+ -+define <16 x i8> @lsx_vsrli_b_hi(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrli.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsrli.b(<16 x i8> %va, i32 8) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsrli.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vsrli_h_lo(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrli.h: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsrli.h(<8 x i16> %va, i32 -1) -+ ret <8 x i16> %res -+} -+ -+define <8 x i16> @lsx_vsrli_h_hi(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrli.h: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsrli.h(<8 x i16> %va, i32 16) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsrli.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vsrli_w_lo(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrli.w: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsrli.w(<4 x i32> %va, i32 -1) -+ ret <4 x i32> %res -+} -+ -+define <4 x i32> @lsx_vsrli_w_hi(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrli.w: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsrli.w(<4 x i32> %va, i32 32) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsrli.d(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vsrli_d_lo(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrli.d: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsrli.d(<2 x i64> %va, i32 -1) -+ ret <2 x i64> %res -+} -+ -+define <2 x i64> @lsx_vsrli_d_hi(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrli.d: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsrli.d(<2 x i64> %va, i32 64) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srl-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srl-non-imm.ll -new file mode 100644 -index 000000000000..98652aca0d62 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srl-non-imm.ll -@@ -0,0 +1,37 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vsrli.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vsrli_b(<16 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsrli.b(<16 x i8> %va, i32 %b) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsrli.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vsrli_h(<8 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsrli.h(<8 x i16> %va, i32 %b) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsrli.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vsrli_w(<4 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsrli.w(<4 x i32> %va, i32 %b) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsrli.d(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vsrli_d(<2 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsrli.d(<2 x i64> %va, i32 %b) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlni-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlni-invalid-imm.ll -new file mode 100644 -index 000000000000..054c4f393548 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlni-invalid-imm.ll -@@ -0,0 +1,65 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vsrlni.b.h(<16 x i8>, <16 x i8>, i32) -+ -+define <16 x i8> @lsx_vsrlni_b_h_lo(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrlni.b.h: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsrlni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 -1) -+ ret <16 x i8> %res -+} -+ -+define <16 x i8> @lsx_vsrlni_b_h_hi(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrlni.b.h: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsrlni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 16) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsrlni.h.w(<8 x i16>, <8 x i16>, i32) -+ -+define <8 x i16> @lsx_vsrlni_h_w_lo(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrlni.h.w: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsrlni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 -1) -+ ret <8 x i16> %res -+} -+ -+define <8 x i16> @lsx_vsrlni_h_w_hi(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrlni.h.w: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsrlni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 32) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsrlni.w.d(<4 x i32>, <4 x i32>, i32) -+ -+define <4 x i32> @lsx_vsrlni_w_d_lo(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrlni.w.d: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsrlni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 -1) -+ ret <4 x i32> %res -+} -+ -+define <4 x i32> @lsx_vsrlni_w_d_hi(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrlni.w.d: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsrlni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 64) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsrlni.d.q(<2 x i64>, <2 x i64>, i32) -+ -+define <2 x i64> @lsx_vsrlni_d_q_lo(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrlni.d.q: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsrlni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 -1) -+ ret <2 x i64> %res -+} -+ -+define <2 x i64> @lsx_vsrlni_d_q_hi(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrlni.d.q: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsrlni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 128) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlni-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlni-non-imm.ll -new file mode 100644 -index 000000000000..76341df197fd ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlni-non-imm.ll -@@ -0,0 +1,37 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vsrlni.b.h(<16 x i8>, <16 x i8>, i32) -+ -+define <16 x i8> @lsx_vsrlni_b_h(<16 x i8> %va, <16 x i8> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsrlni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 %c) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsrlni.h.w(<8 x i16>, <8 x i16>, i32) -+ -+define <8 x i16> @lsx_vsrlni_h_w(<8 x i16> %va, <8 x i16> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsrlni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 %c) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsrlni.w.d(<4 x i32>, <4 x i32>, i32) -+ -+define <4 x i32> @lsx_vsrlni_w_d(<4 x i32> %va, <4 x i32> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsrlni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 %c) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsrlni.d.q(<2 x i64>, <2 x i64>, i32) -+ -+define <2 x i64> @lsx_vsrlni_d_q(<2 x i64> %va, <2 x i64> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsrlni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 %c) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlr-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlr-invalid-imm.ll -new file mode 100644 -index 000000000000..bcbd38e26e5f ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlr-invalid-imm.ll -@@ -0,0 +1,65 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vsrlri.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vsrlri_b_lo(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrlri.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsrlri.b(<16 x i8> %va, i32 -1) -+ ret <16 x i8> %res -+} -+ -+define <16 x i8> @lsx_vsrlri_b_hi(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrlri.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsrlri.b(<16 x i8> %va, i32 8) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsrlri.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vsrlri_h_lo(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrlri.h: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsrlri.h(<8 x i16> %va, i32 -1) -+ ret <8 x i16> %res -+} -+ -+define <8 x i16> @lsx_vsrlri_h_hi(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrlri.h: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsrlri.h(<8 x i16> %va, i32 16) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsrlri.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vsrlri_w_lo(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrlri.w: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsrlri.w(<4 x i32> %va, i32 -1) -+ ret <4 x i32> %res -+} -+ -+define <4 x i32> @lsx_vsrlri_w_hi(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrlri.w: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsrlri.w(<4 x i32> %va, i32 32) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsrlri.d(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vsrlri_d_lo(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrlri.d: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsrlri.d(<2 x i64> %va, i32 -1) -+ ret <2 x i64> %res -+} -+ -+define <2 x i64> @lsx_vsrlri_d_hi(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrlri.d: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsrlri.d(<2 x i64> %va, i32 64) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlr-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlr-non-imm.ll -new file mode 100644 -index 000000000000..4862b1546ccf ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlr-non-imm.ll -@@ -0,0 +1,37 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vsrlri.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vsrlri_b(<16 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsrlri.b(<16 x i8> %va, i32 %b) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsrlri.h(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vsrlri_h(<8 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsrlri.h(<8 x i16> %va, i32 %b) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsrlri.w(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vsrlri_w(<4 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsrlri.w(<4 x i32> %va, i32 %b) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsrlri.d(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vsrlri_d(<2 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsrlri.d(<2 x i64> %va, i32 %b) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrni-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrni-invalid-imm.ll -new file mode 100644 -index 000000000000..8988ae88f9eb ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrni-invalid-imm.ll -@@ -0,0 +1,65 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vsrlrni.b.h(<16 x i8>, <16 x i8>, i32) -+ -+define <16 x i8> @lsx_vsrlrni_b_h_lo(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrlrni.b.h: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsrlrni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 -1) -+ ret <16 x i8> %res -+} -+ -+define <16 x i8> @lsx_vsrlrni_b_h_hi(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrlrni.b.h: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsrlrni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 16) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsrlrni.h.w(<8 x i16>, <8 x i16>, i32) -+ -+define <8 x i16> @lsx_vsrlrni_h_w_lo(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrlrni.h.w: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsrlrni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 -1) -+ ret <8 x i16> %res -+} -+ -+define <8 x i16> @lsx_vsrlrni_h_w_hi(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrlrni.h.w: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsrlrni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 32) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsrlrni.w.d(<4 x i32>, <4 x i32>, i32) -+ -+define <4 x i32> @lsx_vsrlrni_w_d_lo(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrlrni.w.d: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsrlrni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 -1) -+ ret <4 x i32> %res -+} -+ -+define <4 x i32> @lsx_vsrlrni_w_d_hi(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrlrni.w.d: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsrlrni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 64) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsrlrni.d.q(<2 x i64>, <2 x i64>, i32) -+ -+define <2 x i64> @lsx_vsrlrni_d_q_lo(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrlrni.d.q: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsrlrni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 -1) -+ ret <2 x i64> %res -+} -+ -+define <2 x i64> @lsx_vsrlrni_d_q_hi(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vsrlrni.d.q: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsrlrni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 128) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrni-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrni-non-imm.ll -new file mode 100644 -index 000000000000..e5530db56fed ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrni-non-imm.ll -@@ -0,0 +1,37 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vsrlrni.b.h(<16 x i8>, <16 x i8>, i32) -+ -+define <16 x i8> @lsx_vsrlrni_b_h(<16 x i8> %va, <16 x i8> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsrlrni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 %c) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsrlrni.h.w(<8 x i16>, <8 x i16>, i32) -+ -+define <8 x i16> @lsx_vsrlrni_h_w(<8 x i16> %va, <8 x i16> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsrlrni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 %c) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsrlrni.w.d(<4 x i32>, <4 x i32>, i32) -+ -+define <4 x i32> @lsx_vsrlrni_w_d(<4 x i32> %va, <4 x i32> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsrlrni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 %c) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsrlrni.d.q(<2 x i64>, <2 x i64>, i32) -+ -+define <2 x i64> @lsx_vsrlrni_d_q(<2 x i64> %va, <2 x i64> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsrlrni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 %c) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrani-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrani-invalid-imm.ll -new file mode 100644 -index 000000000000..f7817921ebeb ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrani-invalid-imm.ll -@@ -0,0 +1,129 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vssrani.b.h(<16 x i8>, <16 x i8>, i32) -+ -+define <16 x i8> @lsx_vssrani_b_h_lo(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrani.b.h: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vssrani.b.h(<16 x i8> %va, <16 x i8> %vb, i32 -1) -+ ret <16 x i8> %res -+} -+ -+define <16 x i8> @lsx_vssrani_b_h_hi(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrani.b.h: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vssrani.b.h(<16 x i8> %va, <16 x i8> %vb, i32 16) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vssrani.h.w(<8 x i16>, <8 x i16>, i32) -+ -+define <8 x i16> @lsx_vssrani_h_w_lo(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrani.h.w: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vssrani.h.w(<8 x i16> %va, <8 x i16> %vb, i32 -1) -+ ret <8 x i16> %res -+} -+ -+define <8 x i16> @lsx_vssrani_h_w_hi(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrani.h.w: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vssrani.h.w(<8 x i16> %va, <8 x i16> %vb, i32 32) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vssrani.w.d(<4 x i32>, <4 x i32>, i32) -+ -+define <4 x i32> @lsx_vssrani_w_d_lo(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrani.w.d: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vssrani.w.d(<4 x i32> %va, <4 x i32> %vb, i32 -1) -+ ret <4 x i32> %res -+} -+ -+define <4 x i32> @lsx_vssrani_w_d_hi(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrani.w.d: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vssrani.w.d(<4 x i32> %va, <4 x i32> %vb, i32 64) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vssrani.d.q(<2 x i64>, <2 x i64>, i32) -+ -+define <2 x i64> @lsx_vssrani_d_q_lo(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrani.d.q: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vssrani.d.q(<2 x i64> %va, <2 x i64> %vb, i32 -1) -+ ret <2 x i64> %res -+} -+ -+define <2 x i64> @lsx_vssrani_d_q_hi(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrani.d.q: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vssrani.d.q(<2 x i64> %va, <2 x i64> %vb, i32 128) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vssrani.bu.h(<16 x i8>, <16 x i8>, i32) -+ -+define <16 x i8> @lsx_vssrani_bu_h_lo(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrani.bu.h: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vssrani.bu.h(<16 x i8> %va, <16 x i8> %vb, i32 -1) -+ ret <16 x i8> %res -+} -+ -+define <16 x i8> @lsx_vssrani_bu_h_hi(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrani.bu.h: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vssrani.bu.h(<16 x i8> %va, <16 x i8> %vb, i32 16) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vssrani.hu.w(<8 x i16>, <8 x i16>, i32) -+ -+define <8 x i16> @lsx_vssrani_hu_w_lo(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrani.hu.w: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vssrani.hu.w(<8 x i16> %va, <8 x i16> %vb, i32 -1) -+ ret <8 x i16> %res -+} -+ -+define <8 x i16> @lsx_vssrani_hu_w_hi(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrani.hu.w: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vssrani.hu.w(<8 x i16> %va, <8 x i16> %vb, i32 32) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vssrani.wu.d(<4 x i32>, <4 x i32>, i32) -+ -+define <4 x i32> @lsx_vssrani_wu_d_lo(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrani.wu.d: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vssrani.wu.d(<4 x i32> %va, <4 x i32> %vb, i32 -1) -+ ret <4 x i32> %res -+} -+ -+define <4 x i32> @lsx_vssrani_wu_d_hi(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrani.wu.d: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vssrani.wu.d(<4 x i32> %va, <4 x i32> %vb, i32 64) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vssrani.du.q(<2 x i64>, <2 x i64>, i32) -+ -+define <2 x i64> @lsx_vssrani_du_q_lo(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrani.du.q: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vssrani.du.q(<2 x i64> %va, <2 x i64> %vb, i32 -1) -+ ret <2 x i64> %res -+} -+ -+define <2 x i64> @lsx_vssrani_du_q_hi(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrani.du.q: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vssrani.du.q(<2 x i64> %va, <2 x i64> %vb, i32 128) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrani-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrani-non-imm.ll -new file mode 100644 -index 000000000000..a80ede9c5243 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrani-non-imm.ll -@@ -0,0 +1,73 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vssrani.b.h(<16 x i8>, <16 x i8>, i32) -+ -+define <16 x i8> @lsx_vssrani_b_h(<16 x i8> %va, <16 x i8> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vssrani.b.h(<16 x i8> %va, <16 x i8> %vb, i32 %c) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vssrani.h.w(<8 x i16>, <8 x i16>, i32) -+ -+define <8 x i16> @lsx_vssrani_h_w(<8 x i16> %va, <8 x i16> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vssrani.h.w(<8 x i16> %va, <8 x i16> %vb, i32 %c) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vssrani.w.d(<4 x i32>, <4 x i32>, i32) -+ -+define <4 x i32> @lsx_vssrani_w_d(<4 x i32> %va, <4 x i32> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vssrani.w.d(<4 x i32> %va, <4 x i32> %vb, i32 %c) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vssrani.d.q(<2 x i64>, <2 x i64>, i32) -+ -+define <2 x i64> @lsx_vssrani_d_q(<2 x i64> %va, <2 x i64> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vssrani.d.q(<2 x i64> %va, <2 x i64> %vb, i32 %c) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vssrani.bu.h(<16 x i8>, <16 x i8>, i32) -+ -+define <16 x i8> @lsx_vssrani_bu_h(<16 x i8> %va, <16 x i8> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vssrani.bu.h(<16 x i8> %va, <16 x i8> %vb, i32 %c) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vssrani.hu.w(<8 x i16>, <8 x i16>, i32) -+ -+define <8 x i16> @lsx_vssrani_hu_w(<8 x i16> %va, <8 x i16> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vssrani.hu.w(<8 x i16> %va, <8 x i16> %vb, i32 %c) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vssrani.wu.d(<4 x i32>, <4 x i32>, i32) -+ -+define <4 x i32> @lsx_vssrani_wu_d(<4 x i32> %va, <4 x i32> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vssrani.wu.d(<4 x i32> %va, <4 x i32> %vb, i32 %c) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vssrani.du.q(<2 x i64>, <2 x i64>, i32) -+ -+define <2 x i64> @lsx_vssrani_du_q(<2 x i64> %va, <2 x i64> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vssrani.du.q(<2 x i64> %va, <2 x i64> %vb, i32 %c) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarni-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarni-invalid-imm.ll -new file mode 100644 -index 000000000000..4edda8c0a24a ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarni-invalid-imm.ll -@@ -0,0 +1,129 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vssrarni.b.h(<16 x i8>, <16 x i8>, i32) -+ -+define <16 x i8> @lsx_vssrarni_b_h_lo(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrarni.b.h: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vssrarni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 -1) -+ ret <16 x i8> %res -+} -+ -+define <16 x i8> @lsx_vssrarni_b_h_hi(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrarni.b.h: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vssrarni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 16) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vssrarni.h.w(<8 x i16>, <8 x i16>, i32) -+ -+define <8 x i16> @lsx_vssrarni_h_w_lo(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrarni.h.w: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vssrarni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 -1) -+ ret <8 x i16> %res -+} -+ -+define <8 x i16> @lsx_vssrarni_h_w_hi(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrarni.h.w: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vssrarni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 32) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vssrarni.w.d(<4 x i32>, <4 x i32>, i32) -+ -+define <4 x i32> @lsx_vssrarni_w_d_lo(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrarni.w.d: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vssrarni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 -1) -+ ret <4 x i32> %res -+} -+ -+define <4 x i32> @lsx_vssrarni_w_d_hi(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrarni.w.d: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vssrarni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 64) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vssrarni.d.q(<2 x i64>, <2 x i64>, i32) -+ -+define <2 x i64> @lsx_vssrarni_d_q_lo(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrarni.d.q: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vssrarni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 -1) -+ ret <2 x i64> %res -+} -+ -+define <2 x i64> @lsx_vssrarni_d_q_hi(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrarni.d.q: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vssrarni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 128) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vssrarni.bu.h(<16 x i8>, <16 x i8>, i32) -+ -+define <16 x i8> @lsx_vssrarni_bu_h_lo(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrarni.bu.h: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vssrarni.bu.h(<16 x i8> %va, <16 x i8> %vb, i32 -1) -+ ret <16 x i8> %res -+} -+ -+define <16 x i8> @lsx_vssrarni_bu_h_hi(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrarni.bu.h: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vssrarni.bu.h(<16 x i8> %va, <16 x i8> %vb, i32 16) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vssrarni.hu.w(<8 x i16>, <8 x i16>, i32) -+ -+define <8 x i16> @lsx_vssrarni_hu_w_lo(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrarni.hu.w: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vssrarni.hu.w(<8 x i16> %va, <8 x i16> %vb, i32 -1) -+ ret <8 x i16> %res -+} -+ -+define <8 x i16> @lsx_vssrarni_hu_w_hi(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrarni.hu.w: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vssrarni.hu.w(<8 x i16> %va, <8 x i16> %vb, i32 32) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vssrarni.wu.d(<4 x i32>, <4 x i32>, i32) -+ -+define <4 x i32> @lsx_vssrarni_wu_d_lo(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrarni.wu.d: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vssrarni.wu.d(<4 x i32> %va, <4 x i32> %vb, i32 -1) -+ ret <4 x i32> %res -+} -+ -+define <4 x i32> @lsx_vssrarni_wu_d_hi(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrarni.wu.d: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vssrarni.wu.d(<4 x i32> %va, <4 x i32> %vb, i32 64) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vssrarni.du.q(<2 x i64>, <2 x i64>, i32) -+ -+define <2 x i64> @lsx_vssrarni_du_q_lo(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrarni.du.q: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vssrarni.du.q(<2 x i64> %va, <2 x i64> %vb, i32 -1) -+ ret <2 x i64> %res -+} -+ -+define <2 x i64> @lsx_vssrarni_du_q_hi(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrarni.du.q: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vssrarni.du.q(<2 x i64> %va, <2 x i64> %vb, i32 128) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarni-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarni-non-imm.ll -new file mode 100644 -index 000000000000..a77e6e764c9d ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarni-non-imm.ll -@@ -0,0 +1,73 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vssrarni.b.h(<16 x i8>, <16 x i8>, i32) -+ -+define <16 x i8> @lsx_vssrarni_b_h(<16 x i8> %va, <16 x i8> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vssrarni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 %c) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vssrarni.h.w(<8 x i16>, <8 x i16>, i32) -+ -+define <8 x i16> @lsx_vssrarni_h_w(<8 x i16> %va, <8 x i16> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vssrarni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 %c) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vssrarni.w.d(<4 x i32>, <4 x i32>, i32) -+ -+define <4 x i32> @lsx_vssrarni_w_d(<4 x i32> %va, <4 x i32> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vssrarni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 %c) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vssrarni.d.q(<2 x i64>, <2 x i64>, i32) -+ -+define <2 x i64> @lsx_vssrarni_d_q(<2 x i64> %va, <2 x i64> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vssrarni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 %c) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vssrarni.bu.h(<16 x i8>, <16 x i8>, i32) -+ -+define <16 x i8> @lsx_vssrarni_bu_h(<16 x i8> %va, <16 x i8> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vssrarni.bu.h(<16 x i8> %va, <16 x i8> %vb, i32 %c) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vssrarni.hu.w(<8 x i16>, <8 x i16>, i32) -+ -+define <8 x i16> @lsx_vssrarni_hu_w(<8 x i16> %va, <8 x i16> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vssrarni.hu.w(<8 x i16> %va, <8 x i16> %vb, i32 %c) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vssrarni.wu.d(<4 x i32>, <4 x i32>, i32) -+ -+define <4 x i32> @lsx_vssrarni_wu_d(<4 x i32> %va, <4 x i32> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vssrarni.wu.d(<4 x i32> %va, <4 x i32> %vb, i32 %c) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vssrarni.du.q(<2 x i64>, <2 x i64>, i32) -+ -+define <2 x i64> @lsx_vssrarni_du_q(<2 x i64> %va, <2 x i64> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vssrarni.du.q(<2 x i64> %va, <2 x i64> %vb, i32 %c) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlni-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlni-invalid-imm.ll -new file mode 100644 -index 000000000000..6218af1fa773 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlni-invalid-imm.ll -@@ -0,0 +1,129 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vssrlni.b.h(<16 x i8>, <16 x i8>, i32) -+ -+define <16 x i8> @lsx_vssrlni_b_h_lo(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrlni.b.h: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vssrlni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 -1) -+ ret <16 x i8> %res -+} -+ -+define <16 x i8> @lsx_vssrlni_b_h_hi(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrlni.b.h: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vssrlni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 16) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vssrlni.h.w(<8 x i16>, <8 x i16>, i32) -+ -+define <8 x i16> @lsx_vssrlni_h_w_lo(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrlni.h.w: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vssrlni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 -1) -+ ret <8 x i16> %res -+} -+ -+define <8 x i16> @lsx_vssrlni_h_w_hi(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrlni.h.w: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vssrlni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 32) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vssrlni.w.d(<4 x i32>, <4 x i32>, i32) -+ -+define <4 x i32> @lsx_vssrlni_w_d_lo(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrlni.w.d: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vssrlni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 -1) -+ ret <4 x i32> %res -+} -+ -+define <4 x i32> @lsx_vssrlni_w_d_hi(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrlni.w.d: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vssrlni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 64) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vssrlni.d.q(<2 x i64>, <2 x i64>, i32) -+ -+define <2 x i64> @lsx_vssrlni_d_q_lo(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrlni.d.q: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vssrlni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 -1) -+ ret <2 x i64> %res -+} -+ -+define <2 x i64> @lsx_vssrlni_d_q_hi(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrlni.d.q: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vssrlni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 128) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vssrlni.bu.h(<16 x i8>, <16 x i8>, i32) -+ -+define <16 x i8> @lsx_vssrlni_bu_h_lo(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrlni.bu.h: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vssrlni.bu.h(<16 x i8> %va, <16 x i8> %vb, i32 -1) -+ ret <16 x i8> %res -+} -+ -+define <16 x i8> @lsx_vssrlni_bu_h_hi(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrlni.bu.h: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vssrlni.bu.h(<16 x i8> %va, <16 x i8> %vb, i32 16) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vssrlni.hu.w(<8 x i16>, <8 x i16>, i32) -+ -+define <8 x i16> @lsx_vssrlni_hu_w_lo(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrlni.hu.w: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vssrlni.hu.w(<8 x i16> %va, <8 x i16> %vb, i32 -1) -+ ret <8 x i16> %res -+} -+ -+define <8 x i16> @lsx_vssrlni_hu_w_hi(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrlni.hu.w: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vssrlni.hu.w(<8 x i16> %va, <8 x i16> %vb, i32 32) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vssrlni.wu.d(<4 x i32>, <4 x i32>, i32) -+ -+define <4 x i32> @lsx_vssrlni_wu_d_lo(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrlni.wu.d: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vssrlni.wu.d(<4 x i32> %va, <4 x i32> %vb, i32 -1) -+ ret <4 x i32> %res -+} -+ -+define <4 x i32> @lsx_vssrlni_wu_d_hi(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrlni.wu.d: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vssrlni.wu.d(<4 x i32> %va, <4 x i32> %vb, i32 64) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vssrlni.du.q(<2 x i64>, <2 x i64>, i32) -+ -+define <2 x i64> @lsx_vssrlni_du_q_lo(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrlni.du.q: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vssrlni.du.q(<2 x i64> %va, <2 x i64> %vb, i32 -1) -+ ret <2 x i64> %res -+} -+ -+define <2 x i64> @lsx_vssrlni_du_q_hi(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrlni.du.q: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vssrlni.du.q(<2 x i64> %va, <2 x i64> %vb, i32 128) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlni-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlni-non-imm.ll -new file mode 100644 -index 000000000000..688be826f467 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlni-non-imm.ll -@@ -0,0 +1,73 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vssrlni.b.h(<16 x i8>, <16 x i8>, i32) -+ -+define <16 x i8> @lsx_vssrlni_b_h(<16 x i8> %va, <16 x i8> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vssrlni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 %c) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vssrlni.h.w(<8 x i16>, <8 x i16>, i32) -+ -+define <8 x i16> @lsx_vssrlni_h_w(<8 x i16> %va, <8 x i16> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vssrlni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 %c) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vssrlni.w.d(<4 x i32>, <4 x i32>, i32) -+ -+define <4 x i32> @lsx_vssrlni_w_d(<4 x i32> %va, <4 x i32> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vssrlni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 %c) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vssrlni.d.q(<2 x i64>, <2 x i64>, i32) -+ -+define <2 x i64> @lsx_vssrlni_d_q(<2 x i64> %va, <2 x i64> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vssrlni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 %c) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vssrlni.bu.h(<16 x i8>, <16 x i8>, i32) -+ -+define <16 x i8> @lsx_vssrlni_bu_h(<16 x i8> %va, <16 x i8> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vssrlni.bu.h(<16 x i8> %va, <16 x i8> %vb, i32 %c) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vssrlni.hu.w(<8 x i16>, <8 x i16>, i32) -+ -+define <8 x i16> @lsx_vssrlni_hu_w(<8 x i16> %va, <8 x i16> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vssrlni.hu.w(<8 x i16> %va, <8 x i16> %vb, i32 %c) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vssrlni.wu.d(<4 x i32>, <4 x i32>, i32) -+ -+define <4 x i32> @lsx_vssrlni_wu_d(<4 x i32> %va, <4 x i32> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vssrlni.wu.d(<4 x i32> %va, <4 x i32> %vb, i32 %c) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vssrlni.du.q(<2 x i64>, <2 x i64>, i32) -+ -+define <2 x i64> @lsx_vssrlni_du_q(<2 x i64> %va, <2 x i64> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vssrlni.du.q(<2 x i64> %va, <2 x i64> %vb, i32 %c) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrni-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrni-invalid-imm.ll -new file mode 100644 -index 000000000000..98a0c5b3cd28 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrni-invalid-imm.ll -@@ -0,0 +1,129 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vssrlrni.b.h(<16 x i8>, <16 x i8>, i32) -+ -+define <16 x i8> @lsx_vssrlrni_b_h_lo(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrlrni.b.h: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vssrlrni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 -1) -+ ret <16 x i8> %res -+} -+ -+define <16 x i8> @lsx_vssrlrni_b_h_hi(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrlrni.b.h: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vssrlrni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 16) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vssrlrni.h.w(<8 x i16>, <8 x i16>, i32) -+ -+define <8 x i16> @lsx_vssrlrni_h_w_lo(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrlrni.h.w: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vssrlrni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 -1) -+ ret <8 x i16> %res -+} -+ -+define <8 x i16> @lsx_vssrlrni_h_w_hi(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrlrni.h.w: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vssrlrni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 32) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vssrlrni.w.d(<4 x i32>, <4 x i32>, i32) -+ -+define <4 x i32> @lsx_vssrlrni_w_d_lo(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrlrni.w.d: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vssrlrni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 -1) -+ ret <4 x i32> %res -+} -+ -+define <4 x i32> @lsx_vssrlrni_w_d_hi(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrlrni.w.d: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vssrlrni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 64) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vssrlrni.d.q(<2 x i64>, <2 x i64>, i32) -+ -+define <2 x i64> @lsx_vssrlrni_d_q_lo(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrlrni.d.q: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vssrlrni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 -1) -+ ret <2 x i64> %res -+} -+ -+define <2 x i64> @lsx_vssrlrni_d_q_hi(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrlrni.d.q: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vssrlrni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 128) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vssrlrni.bu.h(<16 x i8>, <16 x i8>, i32) -+ -+define <16 x i8> @lsx_vssrlrni_bu_h_lo(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrlrni.bu.h: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vssrlrni.bu.h(<16 x i8> %va, <16 x i8> %vb, i32 -1) -+ ret <16 x i8> %res -+} -+ -+define <16 x i8> @lsx_vssrlrni_bu_h_hi(<16 x i8> %va, <16 x i8> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrlrni.bu.h: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vssrlrni.bu.h(<16 x i8> %va, <16 x i8> %vb, i32 16) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vssrlrni.hu.w(<8 x i16>, <8 x i16>, i32) -+ -+define <8 x i16> @lsx_vssrlrni_hu_w_lo(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrlrni.hu.w: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vssrlrni.hu.w(<8 x i16> %va, <8 x i16> %vb, i32 -1) -+ ret <8 x i16> %res -+} -+ -+define <8 x i16> @lsx_vssrlrni_hu_w_hi(<8 x i16> %va, <8 x i16> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrlrni.hu.w: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vssrlrni.hu.w(<8 x i16> %va, <8 x i16> %vb, i32 32) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vssrlrni.wu.d(<4 x i32>, <4 x i32>, i32) -+ -+define <4 x i32> @lsx_vssrlrni_wu_d_lo(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrlrni.wu.d: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vssrlrni.wu.d(<4 x i32> %va, <4 x i32> %vb, i32 -1) -+ ret <4 x i32> %res -+} -+ -+define <4 x i32> @lsx_vssrlrni_wu_d_hi(<4 x i32> %va, <4 x i32> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrlrni.wu.d: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vssrlrni.wu.d(<4 x i32> %va, <4 x i32> %vb, i32 64) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vssrlrni.du.q(<2 x i64>, <2 x i64>, i32) -+ -+define <2 x i64> @lsx_vssrlrni_du_q_lo(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrlrni.du.q: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vssrlrni.du.q(<2 x i64> %va, <2 x i64> %vb, i32 -1) -+ ret <2 x i64> %res -+} -+ -+define <2 x i64> @lsx_vssrlrni_du_q_hi(<2 x i64> %va, <2 x i64> %vb) nounwind { -+; CHECK: llvm.loongarch.lsx.vssrlrni.du.q: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vssrlrni.du.q(<2 x i64> %va, <2 x i64> %vb, i32 128) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrni-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrni-non-imm.ll -new file mode 100644 -index 000000000000..c389b4fd6023 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrni-non-imm.ll -@@ -0,0 +1,73 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vssrlrni.b.h(<16 x i8>, <16 x i8>, i32) -+ -+define <16 x i8> @lsx_vssrlrni_b_h(<16 x i8> %va, <16 x i8> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vssrlrni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 %c) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vssrlrni.h.w(<8 x i16>, <8 x i16>, i32) -+ -+define <8 x i16> @lsx_vssrlrni_h_w(<8 x i16> %va, <8 x i16> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vssrlrni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 %c) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vssrlrni.w.d(<4 x i32>, <4 x i32>, i32) -+ -+define <4 x i32> @lsx_vssrlrni_w_d(<4 x i32> %va, <4 x i32> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vssrlrni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 %c) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vssrlrni.d.q(<2 x i64>, <2 x i64>, i32) -+ -+define <2 x i64> @lsx_vssrlrni_d_q(<2 x i64> %va, <2 x i64> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vssrlrni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 %c) -+ ret <2 x i64> %res -+} -+ -+declare <16 x i8> @llvm.loongarch.lsx.vssrlrni.bu.h(<16 x i8>, <16 x i8>, i32) -+ -+define <16 x i8> @lsx_vssrlrni_bu_h(<16 x i8> %va, <16 x i8> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vssrlrni.bu.h(<16 x i8> %va, <16 x i8> %vb, i32 %c) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vssrlrni.hu.w(<8 x i16>, <8 x i16>, i32) -+ -+define <8 x i16> @lsx_vssrlrni_hu_w(<8 x i16> %va, <8 x i16> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vssrlrni.hu.w(<8 x i16> %va, <8 x i16> %vb, i32 %c) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vssrlrni.wu.d(<4 x i32>, <4 x i32>, i32) -+ -+define <4 x i32> @lsx_vssrlrni_wu_d(<4 x i32> %va, <4 x i32> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vssrlrni.wu.d(<4 x i32> %va, <4 x i32> %vb, i32 %c) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vssrlrni.du.q(<2 x i64>, <2 x i64>, i32) -+ -+define <2 x i64> @lsx_vssrlrni_du_q(<2 x i64> %va, <2 x i64> %vb, i32 %c) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vssrlrni.du.q(<2 x i64> %va, <2 x i64> %vb, i32 %c) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-st-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-st-invalid-imm.ll -new file mode 100644 -index 000000000000..64518380964b ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-st-invalid-imm.ll -@@ -0,0 +1,17 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare void @llvm.loongarch.lsx.vst(<16 x i8>, i8*, i32) -+ -+define void @lsx_vst_lo(<16 x i8> %va, i8* %p) nounwind { -+; CHECK: llvm.loongarch.lsx.vst: argument out of range -+entry: -+ call void @llvm.loongarch.lsx.vst(<16 x i8> %va, i8* %p, i32 -2049) -+ ret void -+} -+ -+define void @lsx_vst_hi(<16 x i8> %va, i8* %p) nounwind { -+; CHECK: llvm.loongarch.lsx.vst: argument out of range -+entry: -+ call void @llvm.loongarch.lsx.vst(<16 x i8> %va, i8* %p, i32 2048) -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-st-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-st-non-imm.ll -new file mode 100644 -index 000000000000..119ed9b78658 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-st-non-imm.ll -@@ -0,0 +1,10 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare void @llvm.loongarch.lsx.vst(<16 x i8>, i8*, i32) -+ -+define void @lsx_vst(<16 x i8> %va, i8* %p, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ call void @llvm.loongarch.lsx.vst(<16 x i8> %va, i8* %p, i32 %b) -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-stelm-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-stelm-invalid-imm.ll -new file mode 100644 -index 000000000000..277abcbd34cc ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-stelm-invalid-imm.ll -@@ -0,0 +1,121 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare void @llvm.loongarch.lsx.vstelm.b(<16 x i8>, i8*, i32, i32) -+ -+define void @lsx_vstelm_b_lo(<16 x i8> %va, i8* %p) nounwind { -+; CHECK: llvm.loongarch.lsx.vstelm.b: argument out of range -+entry: -+ call void @llvm.loongarch.lsx.vstelm.b(<16 x i8> %va, i8* %p, i32 -129, i32 15) -+ ret void -+} -+ -+define void @lsx_vstelm_b_hi(<16 x i8> %va, i8* %p) nounwind { -+; CHECK: llvm.loongarch.lsx.vstelm.b: argument out of range -+entry: -+ call void @llvm.loongarch.lsx.vstelm.b(<16 x i8> %va, i8* %p, i32 128, i32 15) -+ ret void -+} -+ -+define void @lsx_vstelm_b_idx_lo(<16 x i8> %va, i8* %p) nounwind { -+; CHECK: llvm.loongarch.lsx.vstelm.b: argument out of range -+entry: -+ call void @llvm.loongarch.lsx.vstelm.b(<16 x i8> %va, i8* %p, i32 1, i32 -1) -+ ret void -+} -+ -+define void @lsx_vstelm_b_idx_hi(<16 x i8> %va, i8* %p) nounwind { -+; CHECK: llvm.loongarch.lsx.vstelm.b: argument out of range -+entry: -+ call void @llvm.loongarch.lsx.vstelm.b(<16 x i8> %va, i8* %p, i32 1, i32 16) -+ ret void -+} -+ -+declare void @llvm.loongarch.lsx.vstelm.h(<8 x i16>, i8*, i32, i32) -+ -+define void @lsx_vstelm_h_lo(<8 x i16> %va, i8* %p) nounwind { -+; CHECK: llvm.loongarch.lsx.vstelm.h: argument out of range or not a multiple of 2. -+entry: -+ call void @llvm.loongarch.lsx.vstelm.h(<8 x i16> %va, i8* %p, i32 -258, i32 7) -+ ret void -+} -+ -+define void @lsx_vstelm_h_hi(<8 x i16> %va, i8* %p) nounwind { -+; CHECK: llvm.loongarch.lsx.vstelm.h: argument out of range or not a multiple of 2. -+entry: -+ call void @llvm.loongarch.lsx.vstelm.h(<8 x i16> %va, i8* %p, i32 256, i32 7) -+ ret void -+} -+ -+define void @lsx_vstelm_h_idx_lo(<8 x i16> %va, i8* %p) nounwind { -+; CHECK: llvm.loongarch.lsx.vstelm.h: argument out of range or not a multiple of 2. -+entry: -+ call void @llvm.loongarch.lsx.vstelm.h(<8 x i16> %va, i8* %p, i32 2, i32 -1) -+ ret void -+} -+ -+define void @lsx_vstelm_h_idx_hi(<8 x i16> %va, i8* %p) nounwind { -+; CHECK: llvm.loongarch.lsx.vstelm.h: argument out of range or not a multiple of 2. -+entry: -+ call void @llvm.loongarch.lsx.vstelm.h(<8 x i16> %va, i8* %p, i32 2, i32 8) -+ ret void -+} -+ -+declare void @llvm.loongarch.lsx.vstelm.w(<4 x i32>, i8*, i32, i32) -+ -+define void @lsx_vstelm_w_lo(<4 x i32> %va, i8* %p) nounwind { -+; CHECK: llvm.loongarch.lsx.vstelm.w: argument out of range or not a multiple of 4. -+entry: -+ call void @llvm.loongarch.lsx.vstelm.w(<4 x i32> %va, i8* %p, i32 -516, i32 3) -+ ret void -+} -+ -+define void @lsx_vstelm_w_hi(<4 x i32> %va, i8* %p) nounwind { -+; CHECK: llvm.loongarch.lsx.vstelm.w: argument out of range or not a multiple of 4. -+entry: -+ call void @llvm.loongarch.lsx.vstelm.w(<4 x i32> %va, i8* %p, i32 512, i32 3) -+ ret void -+} -+ -+define void @lsx_vstelm_w_idx_lo(<4 x i32> %va, i8* %p) nounwind { -+; CHECK: llvm.loongarch.lsx.vstelm.w: argument out of range or not a multiple of 4. -+entry: -+ call void @llvm.loongarch.lsx.vstelm.w(<4 x i32> %va, i8* %p, i32 4, i32 -1) -+ ret void -+} -+ -+define void @lsx_vstelm_w_idx_hi(<4 x i32> %va, i8* %p) nounwind { -+; CHECK: llvm.loongarch.lsx.vstelm.w: argument out of range or not a multiple of 4. -+entry: -+ call void @llvm.loongarch.lsx.vstelm.w(<4 x i32> %va, i8* %p, i32 4, i32 4) -+ ret void -+} -+ -+declare void @llvm.loongarch.lsx.vstelm.d(<2 x i64>, i8*, i32, i32) -+ -+define void @lsx_vstelm_d_lo(<2 x i64> %va, i8* %p) nounwind { -+; CHECK: llvm.loongarch.lsx.vstelm.d: argument out of range or not a multiple of 8. -+entry: -+ call void @llvm.loongarch.lsx.vstelm.d(<2 x i64> %va, i8* %p, i32 -1032, i32 1) -+ ret void -+} -+ -+define void @lsx_vstelm_d_hi(<2 x i64> %va, i8* %p) nounwind { -+; CHECK: llvm.loongarch.lsx.vstelm.d: argument out of range or not a multiple of 8. -+entry: -+ call void @llvm.loongarch.lsx.vstelm.d(<2 x i64> %va, i8* %p, i32 1024, i32 1) -+ ret void -+} -+ -+define void @lsx_vstelm_d_idx_lo(<2 x i64> %va, i8* %p) nounwind { -+; CHECK: llvm.loongarch.lsx.vstelm.d: argument out of range or not a multiple of 8. -+entry: -+ call void @llvm.loongarch.lsx.vstelm.d(<2 x i64> %va, i8* %p, i32 8, i32 -1) -+ ret void -+} -+ -+define void @lsx_vstelm_d_idx_hi(<2 x i64> %va, i8* %p) nounwind { -+; CHECK: llvm.loongarch.lsx.vstelm.d: argument out of range or not a multiple of 8. -+entry: -+ call void @llvm.loongarch.lsx.vstelm.d(<2 x i64> %va, i8* %p, i32 8, i32 2) -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-stelm-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-stelm-non-imm.ll -new file mode 100644 -index 000000000000..f53932f79035 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-stelm-non-imm.ll -@@ -0,0 +1,65 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare void @llvm.loongarch.lsx.vstelm.b(<16 x i8>, i8*, i32, i32) -+ -+define void @lsx_vstelm_b(<16 x i8> %va, i8* %p, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ call void @llvm.loongarch.lsx.vstelm.b(<16 x i8> %va, i8* %p, i32 %b, i32 1) -+ ret void -+} -+ -+define void @lsx_vstelm_b_idx(<16 x i8> %va, i8* %p, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ call void @llvm.loongarch.lsx.vstelm.b(<16 x i8> %va, i8* %p, i32 1, i32 %b) -+ ret void -+} -+ -+declare void @llvm.loongarch.lsx.vstelm.h(<8 x i16>, i8*, i32, i32) -+ -+define void @lsx_vstelm_h(<8 x i16> %va, i8* %p, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ call void @llvm.loongarch.lsx.vstelm.h(<8 x i16> %va, i8* %p, i32 %b, i32 1) -+ ret void -+} -+ -+define void @lsx_vstelm_h_idx(<8 x i16> %va, i8* %p, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ call void @llvm.loongarch.lsx.vstelm.h(<8 x i16> %va, i8* %p, i32 2, i32 %b) -+ ret void -+} -+ -+declare void @llvm.loongarch.lsx.vstelm.w(<4 x i32>, i8*, i32, i32) -+ -+define void @lsx_vstelm_w(<4 x i32> %va, i8* %p, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ call void @llvm.loongarch.lsx.vstelm.w(<4 x i32> %va, i8* %p, i32 %b, i32 1) -+ ret void -+} -+ -+define void @lsx_vstelm_w_idx(<4 x i32> %va, i8* %p, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ call void @llvm.loongarch.lsx.vstelm.w(<4 x i32> %va, i8* %p, i32 4, i32 %b) -+ ret void -+} -+ -+declare void @llvm.loongarch.lsx.vstelm.d(<2 x i64>, i8*, i32, i32) -+ -+define void @lsx_vstelm_d(<2 x i64> %va, i8* %p, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ call void @llvm.loongarch.lsx.vstelm.d(<2 x i64> %va, i8* %p, i32 %b, i32 1) -+ ret void -+} -+ -+define void @lsx_vstelm_d_idx(<2 x i64> %va, i8* %p, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ call void @llvm.loongarch.lsx.vstelm.d(<2 x i64> %va, i8* %p, i32 8, i32 %b) -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-subi-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-subi-invalid-imm.ll -new file mode 100644 -index 000000000000..96cc1241fbf3 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-subi-invalid-imm.ll -@@ -0,0 +1,65 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vsubi.bu(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vsubi_bu_lo(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsubi.bu: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsubi.bu(<16 x i8> %va, i32 -1) -+ ret <16 x i8> %res -+} -+ -+define <16 x i8> @lsx_vsubi_bu_hi(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsubi.bu: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsubi.bu(<16 x i8> %va, i32 32) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsubi.hu(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vsubi_hu_lo(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsubi.hu: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsubi.hu(<8 x i16> %va, i32 -1) -+ ret <8 x i16> %res -+} -+ -+define <8 x i16> @lsx_vsubi_hu_hi(<8 x i16> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsubi.hu: argument out of range -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsubi.hu(<8 x i16> %va, i32 32) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsubi.wu(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vsubi_wu_lo(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsubi.wu: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsubi.wu(<4 x i32> %va, i32 -1) -+ ret <4 x i32> %res -+} -+ -+define <4 x i32> @lsx_vsubi_wu_hi(<4 x i32> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsubi.wu: argument out of range -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsubi.wu(<4 x i32> %va, i32 32) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsubi.du(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vsubi_du_lo(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsubi.du: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsubi.du(<2 x i64> %va, i32 -1) -+ ret <2 x i64> %res -+} -+ -+define <2 x i64> @lsx_vsubi_du_hi(<2 x i64> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vsubi.du: argument out of range -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsubi.du(<2 x i64> %va, i32 32) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-subi-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-subi-non-imm.ll -new file mode 100644 -index 000000000000..162f9ad131c7 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-subi-non-imm.ll -@@ -0,0 +1,37 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vsubi.bu(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vsubi_bu(<16 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vsubi.bu(<16 x i8> %va, i32 %b) -+ ret <16 x i8> %res -+} -+ -+declare <8 x i16> @llvm.loongarch.lsx.vsubi.hu(<8 x i16>, i32) -+ -+define <8 x i16> @lsx_vsubi_hu(<8 x i16> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <8 x i16> @llvm.loongarch.lsx.vsubi.hu(<8 x i16> %va, i32 %b) -+ ret <8 x i16> %res -+} -+ -+declare <4 x i32> @llvm.loongarch.lsx.vsubi.wu(<4 x i32>, i32) -+ -+define <4 x i32> @lsx_vsubi_wu(<4 x i32> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <4 x i32> @llvm.loongarch.lsx.vsubi.wu(<4 x i32> %va, i32 %b) -+ ret <4 x i32> %res -+} -+ -+declare <2 x i64> @llvm.loongarch.lsx.vsubi.du(<2 x i64>, i32) -+ -+define <2 x i64> @lsx_vsubi_du(<2 x i64> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <2 x i64> @llvm.loongarch.lsx.vsubi.du(<2 x i64> %va, i32 %b) -+ ret <2 x i64> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-xori-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-xori-invalid-imm.ll -new file mode 100644 -index 000000000000..5f5613189ac8 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-xori-invalid-imm.ll -@@ -0,0 +1,17 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vxori.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vxori_b_lo(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vxori.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vxori.b(<16 x i8> %va, i32 -1) -+ ret <16 x i8> %res -+} -+ -+define <16 x i8> @lsx_vxori_b_hi(<16 x i8> %va) nounwind { -+; CHECK: llvm.loongarch.lsx.vxori.b: argument out of range -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vxori.b(<16 x i8> %va, i32 256) -+ ret <16 x i8> %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-xori-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-xori-non-imm.ll -new file mode 100644 -index 000000000000..4238d89120f1 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-xori-non-imm.ll -@@ -0,0 +1,10 @@ -+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s -+ -+declare <16 x i8> @llvm.loongarch.lsx.vxori.b(<16 x i8>, i32) -+ -+define <16 x i8> @lsx_vxori_b(<16 x i8> %va, i32 %b) nounwind { -+; CHECK: immarg operand has non-immediate parameter -+entry: -+ %res = call <16 x i8> @llvm.loongarch.lsx.vxori.b(<16 x i8> %va, i32 %b) -+ ret <16 x i8> %res -+} --- -2.20.1 - - -From 270c5590dbdb266e6df5bec97b38594824d9815a Mon Sep 17 00:00:00 2001 -From: wanglei -Date: Wed, 23 Aug 2023 15:28:00 +0800 -Subject: [PATCH 10/35] [LoongArch] Reorder LoongArchTargetLowering(). NFC - -(cherry picked from commit 3693909ca47f1fafc97b441c91f5656acdd3907c) - -[LoongArch] Fix Subtarget.is64Bit - -(cherry picked from commit 749f36dae311000e1d69351707f4f24a72090c94) ---- - .../LoongArch/LoongArchISelLowering.cpp | 152 ++++++++++-------- - 1 file changed, 82 insertions(+), 70 deletions(-) - -diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -index 3a40cd06a3eb..2f8ce57d3f5f 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -@@ -47,20 +47,14 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, - : TargetLowering(TM), Subtarget(STI) { - - MVT GRLenVT = Subtarget.getGRLenVT(); -+ - // Set up the register classes. -+ - addRegisterClass(GRLenVT, &LoongArch::GPRRegClass); - if (Subtarget.hasBasicF()) - addRegisterClass(MVT::f32, &LoongArch::FPR32RegClass); - if (Subtarget.hasBasicD()) - addRegisterClass(MVT::f64, &LoongArch::FPR64RegClass); -- if (Subtarget.hasExtLSX()) -- for (auto VT : {MVT::v4f32, MVT::v2f64, MVT::v16i8, MVT::v8i16, MVT::v4i32, -- MVT::v2i64}) -- addRegisterClass(VT, &LoongArch::LSX128RegClass); -- if (Subtarget.hasExtLASX()) -- for (auto VT : {MVT::v8f32, MVT::v4f64, MVT::v32i8, MVT::v16i16, MVT::v8i32, -- MVT::v4i64}) -- addRegisterClass(VT, &LoongArch::LASX256RegClass); - - static const MVT::SimpleValueType LSXVTs[] = { - MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64}; -@@ -75,38 +69,57 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, - for (MVT VT : LASXVTs) - addRegisterClass(VT, &LoongArch::LASX256RegClass); - -+ // Set operations for LA32 and LA64. -+ - setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, GRLenVT, - MVT::i1, Promote); - -- // TODO: add necessary setOperationAction calls later. - setOperationAction(ISD::SHL_PARTS, GRLenVT, Custom); - setOperationAction(ISD::SRA_PARTS, GRLenVT, Custom); - setOperationAction(ISD::SRL_PARTS, GRLenVT, Custom); - setOperationAction(ISD::FP_TO_SINT, GRLenVT, Custom); - setOperationAction(ISD::ROTL, GRLenVT, Expand); - setOperationAction(ISD::CTPOP, GRLenVT, Expand); -- setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal); -- setOperationAction(ISD::TRAP, MVT::Other, Legal); -- setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); -- setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); - - setOperationAction({ISD::GlobalAddress, ISD::BlockAddress, ISD::ConstantPool, -- ISD::JumpTable}, -+ ISD::JumpTable, ISD::GlobalTLSAddress}, - GRLenVT, Custom); - -- setOperationAction(ISD::GlobalTLSAddress, GRLenVT, Custom); -- -- setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); -- -- setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom); -- if (Subtarget.is64Bit()) -- setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom); -+ setOperationAction(ISD::EH_DWARF_CFA, GRLenVT, Custom); - - setOperationAction(ISD::DYNAMIC_STACKALLOC, GRLenVT, Expand); - setOperationAction({ISD::STACKSAVE, ISD::STACKRESTORE}, MVT::Other, Expand); - setOperationAction(ISD::VASTART, MVT::Other, Custom); - setOperationAction({ISD::VAARG, ISD::VACOPY, ISD::VAEND}, MVT::Other, Expand); - -+ setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal); -+ setOperationAction(ISD::TRAP, MVT::Other, Legal); -+ -+ setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); -+ setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); -+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); -+ -+ // Expand bitreverse.i16 with native-width bitrev and shift for now, before -+ // we get to know which of sll and revb.2h is faster. -+ setOperationAction(ISD::BITREVERSE, MVT::i8, Custom); -+ setOperationAction(ISD::BITREVERSE, GRLenVT, Legal); -+ -+ // LA32 does not have REVB.2W and REVB.D due to the 64-bit operands, and -+ // the narrower REVB.W does not exist. But LA32 does have REVB.2H, so i16 -+ // and i32 could still be byte-swapped relatively cheaply. -+ setOperationAction(ISD::BSWAP, MVT::i16, Custom); -+ -+ setOperationAction(ISD::BR_JT, MVT::Other, Expand); -+ setOperationAction(ISD::BR_CC, GRLenVT, Expand); -+ setOperationAction(ISD::SELECT_CC, GRLenVT, Expand); -+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); -+ setOperationAction({ISD::SMUL_LOHI, ISD::UMUL_LOHI}, GRLenVT, Expand); -+ -+ setOperationAction(ISD::FP_TO_UINT, GRLenVT, Custom); -+ setOperationAction(ISD::UINT_TO_FP, GRLenVT, Expand); -+ -+ // Set operations for LA64 only. -+ - if (Subtarget.is64Bit()) { - setOperationAction(ISD::SHL, MVT::i32, Custom); - setOperationAction(ISD::SRA, MVT::i32, Custom); -@@ -117,50 +130,39 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, - setOperationAction(ISD::ROTL, MVT::i32, Custom); - setOperationAction(ISD::CTTZ, MVT::i32, Custom); - setOperationAction(ISD::CTLZ, MVT::i32, Custom); -- setOperationAction(ISD::INTRINSIC_VOID, MVT::i32, Custom); -- setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i32, Custom); -- setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); -+ setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom); - setOperationAction(ISD::READ_REGISTER, MVT::i32, Custom); - setOperationAction(ISD::WRITE_REGISTER, MVT::i32, Custom); -+ setOperationAction(ISD::INTRINSIC_VOID, MVT::i32, Custom); - setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom); -- if (Subtarget.hasBasicF() && !Subtarget.hasBasicD()) -- setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); -- if (Subtarget.hasBasicF()) -- setOperationAction(ISD::FRINT, MVT::f32, Legal); -- if (Subtarget.hasBasicD()) -- setOperationAction(ISD::FRINT, MVT::f64, Legal); -- } -+ setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i32, Custom); - -- // LA32 does not have REVB.2W and REVB.D due to the 64-bit operands, and -- // the narrower REVB.W does not exist. But LA32 does have REVB.2H, so i16 -- // and i32 could still be byte-swapped relatively cheaply. -- setOperationAction(ISD::BSWAP, MVT::i16, Custom); -- if (Subtarget.is64Bit()) { -+ setOperationAction(ISD::BITREVERSE, MVT::i32, Custom); - setOperationAction(ISD::BSWAP, MVT::i32, Custom); - } - -- // Expand bitreverse.i16 with native-width bitrev and shift for now, before -- // we get to know which of sll and revb.2h is faster. -- setOperationAction(ISD::BITREVERSE, MVT::i8, Custom); -- if (Subtarget.is64Bit()) { -- setOperationAction(ISD::BITREVERSE, MVT::i32, Custom); -- setOperationAction(ISD::BITREVERSE, MVT::i64, Legal); -- } else { -- setOperationAction(ISD::BITREVERSE, MVT::i32, Legal); -- setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom); -+ // Set operations for LA32 only. -+ -+ if (!Subtarget.is64Bit()) { - setOperationAction(ISD::READ_REGISTER, MVT::i64, Custom); - setOperationAction(ISD::WRITE_REGISTER, MVT::i64, Custom); -- setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); - setOperationAction(ISD::INTRINSIC_VOID, MVT::i64, Custom); - setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom); -+ setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom); -+ -+ // Set libcalls. -+ setLibcallName(RTLIB::MUL_I128, nullptr); - } - - static const ISD::CondCode FPCCToExpand[] = { - ISD::SETOGT, ISD::SETOGE, ISD::SETUGT, ISD::SETUGE, - ISD::SETGE, ISD::SETNE, ISD::SETGT}; - -+ // Set operations for 'F' feature. -+ - if (Subtarget.hasBasicF()) { - setCondCodeAction(FPCCToExpand, MVT::f32, Expand); -+ - setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); - setOperationAction(ISD::BR_CC, MVT::f32, Expand); - setOperationAction(ISD::FMA, MVT::f32, Legal); -@@ -173,14 +175,30 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, - setOperationAction(ISD::FSINCOS, MVT::f32, Expand); - setOperationAction(ISD::FPOW, MVT::f32, Expand); - setOperationAction(ISD::FREM, MVT::f32, Expand); -+ -+ if (Subtarget.is64Bit()) -+ setOperationAction(ISD::FRINT, MVT::f32, Legal); -+ -+ if (!Subtarget.hasBasicD()) { -+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); -+ if (Subtarget.is64Bit()) { -+ setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); -+ setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); -+ } -+ } - } -+ -+ // Set operations for 'D' feature. -+ - if (Subtarget.hasBasicD()) { -+ setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); -+ setTruncStoreAction(MVT::f64, MVT::f32, Expand); - setCondCodeAction(FPCCToExpand, MVT::f64, Expand); -+ - setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); - setOperationAction(ISD::BR_CC, MVT::f64, Expand); - setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Legal); - setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Legal); -- setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); - setOperationAction(ISD::FMA, MVT::f64, Legal); - setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal); - setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal); -@@ -189,35 +207,35 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, - setOperationAction(ISD::FSINCOS, MVT::f64, Expand); - setOperationAction(ISD::FPOW, MVT::f64, Expand); - setOperationAction(ISD::FREM, MVT::f64, Expand); -- setTruncStoreAction(MVT::f64, MVT::f32, Expand); -- } -- -- setOperationAction(ISD::BR_JT, MVT::Other, Expand); - -- setOperationAction(ISD::BR_CC, GRLenVT, Expand); -- setOperationAction(ISD::SELECT_CC, GRLenVT, Expand); -- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); -- setOperationAction({ISD::SMUL_LOHI, ISD::UMUL_LOHI}, GRLenVT, Expand); -- if (!Subtarget.is64Bit()) -- setLibcallName(RTLIB::MUL_I128, nullptr); -- -- setOperationAction(ISD::FP_TO_UINT, GRLenVT, Custom); -- setOperationAction(ISD::UINT_TO_FP, GRLenVT, Expand); -- if ((Subtarget.is64Bit() && Subtarget.hasBasicF() && -- !Subtarget.hasBasicD())) { -- setOperationAction(ISD::SINT_TO_FP, GRLenVT, Custom); -- setOperationAction(ISD::UINT_TO_FP, GRLenVT, Custom); -+ if (Subtarget.is64Bit()) -+ setOperationAction(ISD::FRINT, MVT::f64, Legal); - } - -+ // Set operations for 'LSX' feature. -+ - if (Subtarget.hasExtLSX()) - setOperationAction({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, - {MVT::v2i64, MVT::v4i32, MVT::v8i16, MVT::v16i8}, Legal); - -+ // Set operations for 'LASX' feature. -+ - if (Subtarget.hasExtLASX()) - setOperationAction({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, - {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}, - Legal); - -+ // Set DAG combine for LA32 and LA64. -+ -+ setTargetDAGCombine(ISD::AND); -+ setTargetDAGCombine(ISD::OR); -+ setTargetDAGCombine(ISD::SRL); -+ -+ // Set DAG combine for 'LSX' feature. -+ -+ if (Subtarget.hasExtLSX()) -+ setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); -+ - // Compute derived properties from the register classes. - computeRegisterProperties(Subtarget.getRegisterInfo()); - -@@ -235,12 +253,6 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, - setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment()); - setPrefLoopAlignment(Subtarget.getPrefLoopAlignment()); - setMaxBytesForAlignment(Subtarget.getMaxBytesForAlignment()); -- -- setTargetDAGCombine(ISD::AND); -- setTargetDAGCombine(ISD::OR); -- setTargetDAGCombine(ISD::SRL); -- if (Subtarget.hasExtLSX()) -- setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); - } - - bool LoongArchTargetLowering::isOffsetFoldingLegal( --- -2.20.1 - - -From 9b554aa98f070e4fdbf2a76cca811db411ec3312 Mon Sep 17 00:00:00 2001 -From: wanglei -Date: Tue, 29 Aug 2023 19:16:20 +0800 -Subject: [PATCH 11/35] [LoongArch] Fix typos. NFC - -(cherry picked from commit 30b6b27385f8ddc550df54a097434a121ae56d12) ---- - .../LoongArch/LoongArchLASXInstrInfo.td | 52 +++++++++---------- - .../Target/LoongArch/LoongArchLSXInstrInfo.td | 50 +++++++++--------- - 2 files changed, 51 insertions(+), 51 deletions(-) - -diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -index a3afd4789dfc..947950be2b8f 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -@@ -1545,10 +1545,10 @@ foreach Inst = ["XVEXTH_Q_D", "XVEXTH_QU_DU", "XVMSKLTZ_D", - // Pat<(Intrinsic timm:$imm) - // (LAInst timm:$imm)>; - def : Pat<(int_loongarch_lasx_xvldi timm:$imm), -- (XVLDI (to_valide_timm timm:$imm))>; -+ (XVLDI (to_valid_timm timm:$imm))>; - foreach Inst = ["XVREPLI_B", "XVREPLI_H", "XVREPLI_W", "XVREPLI_D"] in - def : Pat<(deriveLASXIntrinsic.ret timm:$imm), -- (!cast("Pseudo"#Inst) (to_valide_timm timm:$imm))>; -+ (!cast("Pseudo"#Inst) (to_valid_timm timm:$imm))>; - - // vty: v32i8/v16i16/v8i32/v4i64 - // Pat<(Intrinsic vty:$xj, timm:$imm) -@@ -1558,25 +1558,25 @@ foreach Inst = ["XVSAT_B", "XVSAT_BU", "XVNORI_B", "XVROTRI_B", "XVSLLWIL_H_B", - "XVSEQI_B", "XVSLEI_B", "XVSLEI_BU", "XVSLTI_B", "XVSLTI_BU", - "XVREPL128VEI_B", "XVBSLL_V", "XVBSRL_V", "XVSHUF4I_B"] in - def : Pat<(deriveLASXIntrinsic.ret (v32i8 LASX256:$xj), timm:$imm), -- (!cast(Inst) LASX256:$xj, (to_valide_timm timm:$imm))>; -+ (!cast(Inst) LASX256:$xj, (to_valid_timm timm:$imm))>; - foreach Inst = ["XVSAT_H", "XVSAT_HU", "XVROTRI_H", "XVSLLWIL_W_H", - "XVSLLWIL_WU_HU", "XVSRLRI_H", "XVSRARI_H", - "XVSEQI_H", "XVSLEI_H", "XVSLEI_HU", "XVSLTI_H", "XVSLTI_HU", - "XVREPL128VEI_H", "XVSHUF4I_H"] in - def : Pat<(deriveLASXIntrinsic.ret (v16i16 LASX256:$xj), timm:$imm), -- (!cast(Inst) LASX256:$xj, (to_valide_timm timm:$imm))>; -+ (!cast(Inst) LASX256:$xj, (to_valid_timm timm:$imm))>; - foreach Inst = ["XVSAT_W", "XVSAT_WU", "XVROTRI_W", "XVSLLWIL_D_W", - "XVSLLWIL_DU_WU", "XVSRLRI_W", "XVSRARI_W", - "XVSEQI_W", "XVSLEI_W", "XVSLEI_WU", "XVSLTI_W", "XVSLTI_WU", - "XVREPL128VEI_W", "XVSHUF4I_W", "XVPICKVE_W"] in - def : Pat<(deriveLASXIntrinsic.ret (v8i32 LASX256:$xj), timm:$imm), -- (!cast(Inst) LASX256:$xj, (to_valide_timm timm:$imm))>; -+ (!cast(Inst) LASX256:$xj, (to_valid_timm timm:$imm))>; - foreach Inst = ["XVSAT_D", "XVSAT_DU", "XVROTRI_D", "XVSRLRI_D", "XVSRARI_D", - "XVSEQI_D", "XVSLEI_D", "XVSLEI_DU", "XVSLTI_D", "XVSLTI_DU", - "XVPICKVE2GR_D", "XVPICKVE2GR_DU", - "XVREPL128VEI_D", "XVPERMI_D", "XVPICKVE_D"] in - def : Pat<(deriveLASXIntrinsic.ret (v4i64 LASX256:$xj), timm:$imm), -- (!cast(Inst) LASX256:$xj, (to_valide_timm timm:$imm))>; -+ (!cast(Inst) LASX256:$xj, (to_valid_timm timm:$imm))>; - - // vty: v32i8/v16i16/v8i32/v4i64 - // Pat<(Intrinsic vty:$xd, vty:$xj, timm:$imm) -@@ -1588,7 +1588,7 @@ foreach Inst = ["XVSRLNI_B_H", "XVSRANI_B_H", "XVSRLRNI_B_H", "XVSRARNI_B_H", - def : Pat<(deriveLASXIntrinsic.ret - (v32i8 LASX256:$xd), (v32i8 LASX256:$xj), timm:$imm), - (!cast(Inst) LASX256:$xd, LASX256:$xj, -- (to_valide_timm timm:$imm))>; -+ (to_valid_timm timm:$imm))>; - foreach Inst = ["XVSRLNI_H_W", "XVSRANI_H_W", "XVSRLRNI_H_W", "XVSRARNI_H_W", - "XVSSRLNI_H_W", "XVSSRANI_H_W", "XVSSRLNI_HU_W", "XVSSRANI_HU_W", - "XVSSRLRNI_H_W", "XVSSRARNI_H_W", "XVSSRLRNI_HU_W", "XVSSRARNI_HU_W", -@@ -1596,7 +1596,7 @@ foreach Inst = ["XVSRLNI_H_W", "XVSRANI_H_W", "XVSRLRNI_H_W", "XVSRARNI_H_W", - def : Pat<(deriveLASXIntrinsic.ret - (v16i16 LASX256:$xd), (v16i16 LASX256:$xj), timm:$imm), - (!cast(Inst) LASX256:$xd, LASX256:$xj, -- (to_valide_timm timm:$imm))>; -+ (to_valid_timm timm:$imm))>; - foreach Inst = ["XVSRLNI_W_D", "XVSRANI_W_D", "XVSRLRNI_W_D", "XVSRARNI_W_D", - "XVSSRLNI_W_D", "XVSSRANI_W_D", "XVSSRLNI_WU_D", "XVSSRANI_WU_D", - "XVSSRLRNI_W_D", "XVSSRARNI_W_D", "XVSSRLRNI_WU_D", "XVSSRARNI_WU_D", -@@ -1604,7 +1604,7 @@ foreach Inst = ["XVSRLNI_W_D", "XVSRANI_W_D", "XVSRLRNI_W_D", "XVSRARNI_W_D", - def : Pat<(deriveLASXIntrinsic.ret - (v8i32 LASX256:$xd), (v8i32 LASX256:$xj), timm:$imm), - (!cast(Inst) LASX256:$xd, LASX256:$xj, -- (to_valide_timm timm:$imm))>; -+ (to_valid_timm timm:$imm))>; - foreach Inst = ["XVSRLNI_D_Q", "XVSRANI_D_Q", "XVSRLRNI_D_Q", "XVSRARNI_D_Q", - "XVSSRLNI_D_Q", "XVSSRANI_D_Q", "XVSSRLNI_DU_Q", "XVSSRANI_DU_Q", - "XVSSRLRNI_D_Q", "XVSSRARNI_D_Q", "XVSSRLRNI_DU_Q", "XVSSRARNI_DU_Q", -@@ -1612,7 +1612,7 @@ foreach Inst = ["XVSRLNI_D_Q", "XVSRANI_D_Q", "XVSRLRNI_D_Q", "XVSRARNI_D_Q", - def : Pat<(deriveLASXIntrinsic.ret - (v4i64 LASX256:$xd), (v4i64 LASX256:$xj), timm:$imm), - (!cast(Inst) LASX256:$xd, LASX256:$xj, -- (to_valide_timm timm:$imm))>; -+ (to_valid_timm timm:$imm))>; - - // vty: v32i8/v16i16/v8i32/v4i64 - // Pat<(Intrinsic vty:$xd, vty:$xj, vty:$xk), -@@ -1693,42 +1693,42 @@ foreach Inst = ["XVFLOGB_D", "XVFCLASS_D", "XVFSQRT_D", "XVFRECIP_D", "XVFRSQRT_ - (!cast(Inst) LASX256:$xj)>; - - def : Pat<(int_loongarch_lasx_xvpickve_w_f v8f32:$xj, timm:$imm), -- (XVPICKVE_W v8f32:$xj, (to_valide_timm timm:$imm))>; -+ (XVPICKVE_W v8f32:$xj, (to_valid_timm timm:$imm))>; - def : Pat<(int_loongarch_lasx_xvpickve_d_f v4f64:$xj, timm:$imm), -- (XVPICKVE_D v4f64:$xj, (to_valide_timm timm:$imm))>; -+ (XVPICKVE_D v4f64:$xj, (to_valid_timm timm:$imm))>; - - // load - def : Pat<(int_loongarch_lasx_xvld GPR:$rj, timm:$imm), -- (XVLD GPR:$rj, (to_valide_timm timm:$imm))>; -+ (XVLD GPR:$rj, (to_valid_timm timm:$imm))>; - def : Pat<(int_loongarch_lasx_xvldx GPR:$rj, GPR:$rk), - (XVLDX GPR:$rj, GPR:$rk)>; - - def : Pat<(int_loongarch_lasx_xvldrepl_b GPR:$rj, timm:$imm), -- (XVLDREPL_B GPR:$rj, (to_valide_timm timm:$imm))>; -+ (XVLDREPL_B GPR:$rj, (to_valid_timm timm:$imm))>; - def : Pat<(int_loongarch_lasx_xvldrepl_h GPR:$rj, timm:$imm), -- (XVLDREPL_H GPR:$rj, (to_valide_timm timm:$imm))>; -+ (XVLDREPL_H GPR:$rj, (to_valid_timm timm:$imm))>; - def : Pat<(int_loongarch_lasx_xvldrepl_w GPR:$rj, timm:$imm), -- (XVLDREPL_W GPR:$rj, (to_valide_timm timm:$imm))>; -+ (XVLDREPL_W GPR:$rj, (to_valid_timm timm:$imm))>; - def : Pat<(int_loongarch_lasx_xvldrepl_d GPR:$rj, timm:$imm), -- (XVLDREPL_D GPR:$rj, (to_valide_timm timm:$imm))>; -+ (XVLDREPL_D GPR:$rj, (to_valid_timm timm:$imm))>; - - // store - def : Pat<(int_loongarch_lasx_xvst LASX256:$xd, GPR:$rj, timm:$imm), -- (XVST LASX256:$xd, GPR:$rj, (to_valide_timm timm:$imm))>; -+ (XVST LASX256:$xd, GPR:$rj, (to_valid_timm timm:$imm))>; - def : Pat<(int_loongarch_lasx_xvstx LASX256:$xd, GPR:$rj, GPR:$rk), - (XVSTX LASX256:$xd, GPR:$rj, GPR:$rk)>; - - def : Pat<(int_loongarch_lasx_xvstelm_b v32i8:$xd, GPR:$rj, timm:$imm, timm:$idx), -- (XVSTELM_B v32i8:$xd, GPR:$rj, (to_valide_timm timm:$imm), -- (to_valide_timm timm:$idx))>; -+ (XVSTELM_B v32i8:$xd, GPR:$rj, (to_valid_timm timm:$imm), -+ (to_valid_timm timm:$idx))>; - def : Pat<(int_loongarch_lasx_xvstelm_h v16i16:$xd, GPR:$rj, timm:$imm, timm:$idx), -- (XVSTELM_H v16i16:$xd, GPR:$rj, (to_valide_timm timm:$imm), -- (to_valide_timm timm:$idx))>; -+ (XVSTELM_H v16i16:$xd, GPR:$rj, (to_valid_timm timm:$imm), -+ (to_valid_timm timm:$idx))>; - def : Pat<(int_loongarch_lasx_xvstelm_w v8i32:$xd, GPR:$rj, timm:$imm, timm:$idx), -- (XVSTELM_W v8i32:$xd, GPR:$rj, (to_valide_timm timm:$imm), -- (to_valide_timm timm:$idx))>; -+ (XVSTELM_W v8i32:$xd, GPR:$rj, (to_valid_timm timm:$imm), -+ (to_valid_timm timm:$idx))>; - def : Pat<(int_loongarch_lasx_xvstelm_d v4i64:$xd, GPR:$rj, timm:$imm, timm:$idx), -- (XVSTELM_D v4i64:$xd, GPR:$rj, (to_valide_timm timm:$imm), -- (to_valide_timm timm:$idx))>; -+ (XVSTELM_D v4i64:$xd, GPR:$rj, (to_valid_timm timm:$imm), -+ (to_valid_timm timm:$idx))>; - - } // Predicates = [HasExtLASX] -diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -index 13332be0bc38..e021adcecf4d 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -+++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -@@ -145,7 +145,7 @@ def lsxsplati32 : PatFrag<(ops node:$e0), - def lsxsplati64 : PatFrag<(ops node:$e0), - (v2i64 (build_vector node:$e0, node:$e0))>; - --def to_valide_timm : SDNodeXForm(N); - return CurDAG->getTargetConstant(CN->getSExtValue(), SDLoc(N), Subtarget->getGRLenVT()); - }]>; -@@ -1639,10 +1639,10 @@ foreach Inst = ["VEXTH_Q_D", "VEXTH_QU_DU", "VMSKLTZ_D", - // Pat<(Intrinsic timm:$imm) - // (LAInst timm:$imm)>; - def : Pat<(int_loongarch_lsx_vldi timm:$imm), -- (VLDI (to_valide_timm timm:$imm))>; -+ (VLDI (to_valid_timm timm:$imm))>; - foreach Inst = ["VREPLI_B", "VREPLI_H", "VREPLI_W", "VREPLI_D"] in - def : Pat<(deriveLSXIntrinsic.ret timm:$imm), -- (!cast("Pseudo"#Inst) (to_valide_timm timm:$imm))>; -+ (!cast("Pseudo"#Inst) (to_valid_timm timm:$imm))>; - - // vty: v16i8/v8i16/v4i32/v2i64 - // Pat<(Intrinsic vty:$vj, timm:$imm) -@@ -1652,25 +1652,25 @@ foreach Inst = ["VSAT_B", "VSAT_BU", "VNORI_B", "VROTRI_B", "VSLLWIL_H_B", - "VSEQI_B", "VSLEI_B", "VSLEI_BU", "VSLTI_B", "VSLTI_BU", - "VREPLVEI_B", "VBSLL_V", "VBSRL_V", "VSHUF4I_B"] in - def : Pat<(deriveLSXIntrinsic.ret (v16i8 LSX128:$vj), timm:$imm), -- (!cast(Inst) LSX128:$vj, (to_valide_timm timm:$imm))>; -+ (!cast(Inst) LSX128:$vj, (to_valid_timm timm:$imm))>; - foreach Inst = ["VSAT_H", "VSAT_HU", "VROTRI_H", "VSLLWIL_W_H", - "VSLLWIL_WU_HU", "VSRLRI_H", "VSRARI_H", - "VSEQI_H", "VSLEI_H", "VSLEI_HU", "VSLTI_H", "VSLTI_HU", - "VREPLVEI_H", "VSHUF4I_H"] in - def : Pat<(deriveLSXIntrinsic.ret (v8i16 LSX128:$vj), timm:$imm), -- (!cast(Inst) LSX128:$vj, (to_valide_timm timm:$imm))>; -+ (!cast(Inst) LSX128:$vj, (to_valid_timm timm:$imm))>; - foreach Inst = ["VSAT_W", "VSAT_WU", "VROTRI_W", "VSLLWIL_D_W", - "VSLLWIL_DU_WU", "VSRLRI_W", "VSRARI_W", - "VSEQI_W", "VSLEI_W", "VSLEI_WU", "VSLTI_W", "VSLTI_WU", - "VREPLVEI_W", "VSHUF4I_W"] in - def : Pat<(deriveLSXIntrinsic.ret (v4i32 LSX128:$vj), timm:$imm), -- (!cast(Inst) LSX128:$vj, (to_valide_timm timm:$imm))>; -+ (!cast(Inst) LSX128:$vj, (to_valid_timm timm:$imm))>; - foreach Inst = ["VSAT_D", "VSAT_DU", "VROTRI_D", "VSRLRI_D", "VSRARI_D", - "VSEQI_D", "VSLEI_D", "VSLEI_DU", "VSLTI_D", "VSLTI_DU", - "VPICKVE2GR_D", "VPICKVE2GR_DU", - "VREPLVEI_D"] in - def : Pat<(deriveLSXIntrinsic.ret (v2i64 LSX128:$vj), timm:$imm), -- (!cast(Inst) LSX128:$vj, (to_valide_timm timm:$imm))>; -+ (!cast(Inst) LSX128:$vj, (to_valid_timm timm:$imm))>; - - // vty: v16i8/v8i16/v4i32/v2i64 - // Pat<(Intrinsic vty:$vd, vty:$vj, timm:$imm) -@@ -1682,7 +1682,7 @@ foreach Inst = ["VSRLNI_B_H", "VSRANI_B_H", "VSRLRNI_B_H", "VSRARNI_B_H", - def : Pat<(deriveLSXIntrinsic.ret - (v16i8 LSX128:$vd), (v16i8 LSX128:$vj), timm:$imm), - (!cast(Inst) LSX128:$vd, LSX128:$vj, -- (to_valide_timm timm:$imm))>; -+ (to_valid_timm timm:$imm))>; - foreach Inst = ["VSRLNI_H_W", "VSRANI_H_W", "VSRLRNI_H_W", "VSRARNI_H_W", - "VSSRLNI_H_W", "VSSRANI_H_W", "VSSRLNI_HU_W", "VSSRANI_HU_W", - "VSSRLRNI_H_W", "VSSRARNI_H_W", "VSSRLRNI_HU_W", "VSSRARNI_HU_W", -@@ -1690,7 +1690,7 @@ foreach Inst = ["VSRLNI_H_W", "VSRANI_H_W", "VSRLRNI_H_W", "VSRARNI_H_W", - def : Pat<(deriveLSXIntrinsic.ret - (v8i16 LSX128:$vd), (v8i16 LSX128:$vj), timm:$imm), - (!cast(Inst) LSX128:$vd, LSX128:$vj, -- (to_valide_timm timm:$imm))>; -+ (to_valid_timm timm:$imm))>; - foreach Inst = ["VSRLNI_W_D", "VSRANI_W_D", "VSRLRNI_W_D", "VSRARNI_W_D", - "VSSRLNI_W_D", "VSSRANI_W_D", "VSSRLNI_WU_D", "VSSRANI_WU_D", - "VSSRLRNI_W_D", "VSSRARNI_W_D", "VSSRLRNI_WU_D", "VSSRARNI_WU_D", -@@ -1698,7 +1698,7 @@ foreach Inst = ["VSRLNI_W_D", "VSRANI_W_D", "VSRLRNI_W_D", "VSRARNI_W_D", - def : Pat<(deriveLSXIntrinsic.ret - (v4i32 LSX128:$vd), (v4i32 LSX128:$vj), timm:$imm), - (!cast(Inst) LSX128:$vd, LSX128:$vj, -- (to_valide_timm timm:$imm))>; -+ (to_valid_timm timm:$imm))>; - foreach Inst = ["VSRLNI_D_Q", "VSRANI_D_Q", "VSRLRNI_D_Q", "VSRARNI_D_Q", - "VSSRLNI_D_Q", "VSSRANI_D_Q", "VSSRLNI_DU_Q", "VSSRANI_DU_Q", - "VSSRLRNI_D_Q", "VSSRARNI_D_Q", "VSSRLRNI_DU_Q", "VSSRARNI_DU_Q", -@@ -1706,7 +1706,7 @@ foreach Inst = ["VSRLNI_D_Q", "VSRANI_D_Q", "VSRLRNI_D_Q", "VSRARNI_D_Q", - def : Pat<(deriveLSXIntrinsic.ret - (v2i64 LSX128:$vd), (v2i64 LSX128:$vj), timm:$imm), - (!cast(Inst) LSX128:$vd, LSX128:$vj, -- (to_valide_timm timm:$imm))>; -+ (to_valid_timm timm:$imm))>; - - // vty: v16i8/v8i16/v4i32/v2i64 - // Pat<(Intrinsic vty:$vd, vty:$vj, vty:$vk), -@@ -1788,36 +1788,36 @@ foreach Inst = ["VFLOGB_D", "VFCLASS_D", "VFSQRT_D", "VFRECIP_D", "VFRSQRT_D", - - // load - def : Pat<(int_loongarch_lsx_vld GPR:$rj, timm:$imm), -- (VLD GPR:$rj, (to_valide_timm timm:$imm))>; -+ (VLD GPR:$rj, (to_valid_timm timm:$imm))>; - def : Pat<(int_loongarch_lsx_vldx GPR:$rj, GPR:$rk), - (VLDX GPR:$rj, GPR:$rk)>; - - def : Pat<(int_loongarch_lsx_vldrepl_b GPR:$rj, timm:$imm), -- (VLDREPL_B GPR:$rj, (to_valide_timm timm:$imm))>; -+ (VLDREPL_B GPR:$rj, (to_valid_timm timm:$imm))>; - def : Pat<(int_loongarch_lsx_vldrepl_h GPR:$rj, timm:$imm), -- (VLDREPL_H GPR:$rj, (to_valide_timm timm:$imm))>; -+ (VLDREPL_H GPR:$rj, (to_valid_timm timm:$imm))>; - def : Pat<(int_loongarch_lsx_vldrepl_w GPR:$rj, timm:$imm), -- (VLDREPL_W GPR:$rj, (to_valide_timm timm:$imm))>; -+ (VLDREPL_W GPR:$rj, (to_valid_timm timm:$imm))>; - def : Pat<(int_loongarch_lsx_vldrepl_d GPR:$rj, timm:$imm), -- (VLDREPL_D GPR:$rj, (to_valide_timm timm:$imm))>; -+ (VLDREPL_D GPR:$rj, (to_valid_timm timm:$imm))>; - - // store - def : Pat<(int_loongarch_lsx_vst LSX128:$vd, GPR:$rj, timm:$imm), -- (VST LSX128:$vd, GPR:$rj, (to_valide_timm timm:$imm))>; -+ (VST LSX128:$vd, GPR:$rj, (to_valid_timm timm:$imm))>; - def : Pat<(int_loongarch_lsx_vstx LSX128:$vd, GPR:$rj, GPR:$rk), - (VSTX LSX128:$vd, GPR:$rj, GPR:$rk)>; - - def : Pat<(int_loongarch_lsx_vstelm_b v16i8:$vd, GPR:$rj, timm:$imm, timm:$idx), -- (VSTELM_B v16i8:$vd, GPR:$rj, (to_valide_timm timm:$imm), -- (to_valide_timm timm:$idx))>; -+ (VSTELM_B v16i8:$vd, GPR:$rj, (to_valid_timm timm:$imm), -+ (to_valid_timm timm:$idx))>; - def : Pat<(int_loongarch_lsx_vstelm_h v8i16:$vd, GPR:$rj, timm:$imm, timm:$idx), -- (VSTELM_H v8i16:$vd, GPR:$rj, (to_valide_timm timm:$imm), -- (to_valide_timm timm:$idx))>; -+ (VSTELM_H v8i16:$vd, GPR:$rj, (to_valid_timm timm:$imm), -+ (to_valid_timm timm:$idx))>; - def : Pat<(int_loongarch_lsx_vstelm_w v4i32:$vd, GPR:$rj, timm:$imm, timm:$idx), -- (VSTELM_W v4i32:$vd, GPR:$rj, (to_valide_timm timm:$imm), -- (to_valide_timm timm:$idx))>; -+ (VSTELM_W v4i32:$vd, GPR:$rj, (to_valid_timm timm:$imm), -+ (to_valid_timm timm:$idx))>; - def : Pat<(int_loongarch_lsx_vstelm_d v2i64:$vd, GPR:$rj, timm:$imm, timm:$idx), -- (VSTELM_D v2i64:$vd, GPR:$rj, (to_valide_timm timm:$imm), -- (to_valide_timm timm:$idx))>; -+ (VSTELM_D v2i64:$vd, GPR:$rj, (to_valid_timm timm:$imm), -+ (to_valid_timm timm:$idx))>; - - } // Predicates = [HasExtLSX] --- -2.20.1 - - -From 14892c2a03810b1e01aa62e8a5f12e4f4272bf23 Mon Sep 17 00:00:00 2001 -From: wanglei -Date: Tue, 24 Oct 2023 15:46:56 +0800 -Subject: [PATCH 12/35] [LoongArch] Set some operations action for LSX and LASX - -First, expand all truncationg stores and extending loads. Second, -expand everything for `fixedlen_vector_valuetypes`. Finally, we -selectively turn on ones that can be effectively codegen'd. - -Simultaneously, this patch adds floating-point vector types to -load/store patterns. Additional test cases will be included in the IR -instruction test patchs. - -(cherry picked from commit f2441a06c609cedbb7e11303907f07bf0ca5cb2f) ---- - .../LoongArch/LoongArchISelLowering.cpp | 74 +++++++++++++++++-- - .../LoongArch/LoongArchLASXInstrInfo.td | 2 +- - .../Target/LoongArch/LoongArchLSXInstrInfo.td | 2 +- - 3 files changed, 69 insertions(+), 9 deletions(-) - -diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -index 2f8ce57d3f5f..d3627cec2e8c 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -@@ -214,16 +214,76 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, - - // Set operations for 'LSX' feature. - -- if (Subtarget.hasExtLSX()) -- setOperationAction({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, -- {MVT::v2i64, MVT::v4i32, MVT::v8i16, MVT::v16i8}, Legal); -+ if (Subtarget.hasExtLSX()) { -+ for (MVT VT : MVT::fixedlen_vector_valuetypes()) { -+ // Expand all truncating stores and extending loads. -+ for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) { -+ setTruncStoreAction(VT, InnerVT, Expand); -+ setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand); -+ setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand); -+ setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand); -+ } -+ // By default everything must be expanded. Then we will selectively turn -+ // on ones that can be effectively codegen'd. -+ for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) -+ setOperationAction(Op, VT, Expand); -+ } -+ -+ for (MVT VT : LSXVTs) { -+ setOperationAction({ISD::LOAD, ISD::STORE}, VT, Legal); -+ setOperationAction(ISD::BITCAST, VT, Legal); -+ setOperationAction(ISD::UNDEF, VT, Legal); -+ -+ // FIXME: For BUILD_VECTOR, it is temporarily set to `Legal` here, and it -+ // will be `Custom` handled in the future. -+ setOperationAction(ISD::BUILD_VECTOR, VT, Legal); -+ setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Legal); -+ } -+ for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) { -+ setOperationAction({ISD::ADD, ISD::SUB}, VT, Legal); -+ setOperationAction({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, VT, -+ Legal); -+ setOperationAction({ISD::MUL, ISD::SDIV, ISD::SREM, ISD::UDIV, ISD::UREM}, -+ VT, Legal); -+ setOperationAction({ISD::AND, ISD::OR, ISD::XOR}, VT, Legal); -+ setOperationAction({ISD::SHL, ISD::SRA, ISD::SRL}, VT, Legal); -+ setOperationAction(ISD::CTPOP, VT, Legal); -+ } -+ for (MVT VT : {MVT::v4f32, MVT::v2f64}) { -+ setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal); -+ setOperationAction({ISD::FMUL, ISD::FDIV}, VT, Legal); -+ setOperationAction(ISD::FMA, VT, Legal); -+ } -+ } - - // Set operations for 'LASX' feature. - -- if (Subtarget.hasExtLASX()) -- setOperationAction({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, -- {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}, -- Legal); -+ if (Subtarget.hasExtLASX()) { -+ for (MVT VT : LASXVTs) { -+ setOperationAction({ISD::LOAD, ISD::STORE}, VT, Legal); -+ setOperationAction(ISD::BITCAST, VT, Legal); -+ setOperationAction(ISD::UNDEF, VT, Legal); -+ -+ // FIXME: Same as above. -+ setOperationAction(ISD::BUILD_VECTOR, VT, Legal); -+ setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Legal); -+ } -+ for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) { -+ setOperationAction({ISD::ADD, ISD::SUB}, VT, Legal); -+ setOperationAction({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, VT, -+ Legal); -+ setOperationAction({ISD::MUL, ISD::SDIV, ISD::SREM, ISD::UDIV, ISD::UREM}, -+ VT, Legal); -+ setOperationAction({ISD::AND, ISD::OR, ISD::XOR}, VT, Legal); -+ setOperationAction({ISD::SHL, ISD::SRA, ISD::SRL}, VT, Legal); -+ setOperationAction(ISD::CTPOP, VT, Legal); -+ } -+ for (MVT VT : {MVT::v8f32, MVT::v4f64}) { -+ setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal); -+ setOperationAction({ISD::FMUL, ISD::FDIV}, VT, Legal); -+ setOperationAction(ISD::FMA, VT, Legal); -+ } -+ } - - // Set DAG combine for LA32 and LA64. - -diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -index 947950be2b8f..e19aa92266b1 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -@@ -1394,7 +1394,7 @@ def : Pat<(loongarch_vreplve v4i64:$xj, GRLenVT:$rk), - (XVREPLVE_D v4i64:$xj, GRLenVT:$rk)>; - - // Loads/Stores --foreach vt = [v32i8, v16i16, v8i32, v4i64] in { -+foreach vt = [v32i8, v16i16, v8i32, v4i64, v8f32, v4f64] in { - defm : LdPat; - def : RegRegLdPat; - defm : StPat; -diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -index e021adcecf4d..9391b1a8a20c 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -+++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -@@ -1494,7 +1494,7 @@ def : Pat<(loongarch_vreplve v2i64:$vj, GRLenVT:$rk), - (VREPLVE_D v2i64:$vj, GRLenVT:$rk)>; - - // Loads/Stores --foreach vt = [v16i8, v8i16, v4i32, v2i64] in { -+foreach vt = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in { - defm : LdPat; - def : RegRegLdPat; - defm : StPat; --- -2.20.1 - - -From 85d34e0b7e9947dda7ea981aa1dc10714fd44de5 Mon Sep 17 00:00:00 2001 -From: wanglei -Date: Thu, 30 Nov 2023 17:29:18 +0800 -Subject: [PATCH 13/35] [LoongArch] Add codegen support for extractelement - (#73759) - -Add codegen support for extractelement when enable `lsx` or `lasx` -feature. - -(cherry picked from commit b72456120f1db38ed7068fb592fcf768c6d5cce2) ---- - .../LoongArch/LoongArchISelLowering.cpp | 2 + - .../Target/LoongArch/LoongArchInstrInfo.cpp | 8 + - .../LoongArch/LoongArchLASXInstrInfo.td | 38 ++++ - .../Target/LoongArch/LoongArchLSXInstrInfo.td | 38 ++++ - .../lasx/ir-instruction/extractelement.ll | 172 ++++++++++++++++++ - .../lsx/ir-instruction/extractelement.ll | 170 +++++++++++++++++ - 6 files changed, 428 insertions(+) - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/extractelement.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ir-instruction/extractelement.ll - -diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -index d3627cec2e8c..26e94a53b344 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -@@ -238,6 +238,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, - // will be `Custom` handled in the future. - setOperationAction(ISD::BUILD_VECTOR, VT, Legal); - setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Legal); -+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal); - } - for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) { - setOperationAction({ISD::ADD, ISD::SUB}, VT, Legal); -@@ -267,6 +268,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, - // FIXME: Same as above. - setOperationAction(ISD::BUILD_VECTOR, VT, Legal); - setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Legal); -+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal); - } - for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) { - setOperationAction({ISD::ADD, ISD::SUB}, VT, Legal); -diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp -index ddd1c9943fac..6576100d3b32 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp -+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp -@@ -90,6 +90,14 @@ void LoongArchInstrInfo::copyPhysReg(MachineBasicBlock &MBB, - Opc = LoongArch::FMOV_S; - } else if (LoongArch::FPR64RegClass.contains(DstReg, SrcReg)) { - Opc = LoongArch::FMOV_D; -+ } else if (LoongArch::GPRRegClass.contains(DstReg) && -+ LoongArch::FPR32RegClass.contains(SrcReg)) { -+ // FPR32 -> GPR copies -+ Opc = LoongArch::MOVFR2GR_S; -+ } else if (LoongArch::GPRRegClass.contains(DstReg) && -+ LoongArch::FPR64RegClass.contains(SrcReg)) { -+ // FPR64 -> GPR copies -+ Opc = LoongArch::MOVFR2GR_D; - } else { - // TODO: support other copies. - llvm_unreachable("Impossible reg-to-reg copy"); -diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -index e19aa92266b1..380206ddcf10 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -@@ -1401,6 +1401,44 @@ foreach vt = [v32i8, v16i16, v8i32, v4i64, v8f32, v4f64] in { - def : RegRegStPat; - } - -+// Vector extraction with constant index. -+def : Pat<(i64 (vector_extract v32i8:$xj, uimm4:$imm)), -+ (VPICKVE2GR_B (EXTRACT_SUBREG v32i8:$xj, sub_128), uimm4:$imm)>; -+def : Pat<(i64 (vector_extract v16i16:$xj, uimm3:$imm)), -+ (VPICKVE2GR_H (EXTRACT_SUBREG v16i16:$xj, sub_128), uimm3:$imm)>; -+def : Pat<(i64 (vector_extract v8i32:$xj, uimm2:$imm)), -+ (VPICKVE2GR_W (EXTRACT_SUBREG v8i32:$xj, sub_128), uimm2:$imm)>; -+def : Pat<(i64 (vector_extract v4i64:$xj, uimm1:$imm)), -+ (VPICKVE2GR_D (EXTRACT_SUBREG v4i64:$xj, sub_128), uimm1:$imm)>; -+def : Pat<(f32 (vector_extract v8f32:$xj, uimm2:$imm)), -+ (f32 (EXTRACT_SUBREG (XVREPL128VEI_W v8f32:$xj, uimm2:$imm), sub_32))>; -+def : Pat<(f64 (vector_extract v4f64:$xj, uimm1:$imm)), -+ (f64 (EXTRACT_SUBREG (XVREPL128VEI_D v4f64:$xj, uimm1:$imm), sub_64))>; -+ -+// Vector extraction with variable index. -+def : Pat<(i64 (vector_extract v32i8:$xj, i64:$rk)), -+ (SRAI_W (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG (XVREPLVE_B v32i8:$xj, -+ i64:$rk), -+ sub_32)), -+ GPR), (i64 24))>; -+def : Pat<(i64 (vector_extract v16i16:$xj, i64:$rk)), -+ (SRAI_W (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG (XVREPLVE_H v16i16:$xj, -+ i64:$rk), -+ sub_32)), -+ GPR), (i64 16))>; -+def : Pat<(i64 (vector_extract v8i32:$xj, i64:$rk)), -+ (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG (XVREPLVE_W v8i32:$xj, i64:$rk), -+ sub_32)), -+ GPR)>; -+def : Pat<(i64 (vector_extract v4i64:$xj, i64:$rk)), -+ (COPY_TO_REGCLASS (f64 (EXTRACT_SUBREG (XVREPLVE_D v4i64:$xj, i64:$rk), -+ sub_64)), -+ GPR)>; -+def : Pat<(f32 (vector_extract v8f32:$xj, i64:$rk)), -+ (f32 (EXTRACT_SUBREG (XVREPLVE_W v8f32:$xj, i64:$rk), sub_32))>; -+def : Pat<(f64 (vector_extract v4f64:$xj, i64:$rk)), -+ (f64 (EXTRACT_SUBREG (XVREPLVE_D v4f64:$xj, i64:$rk), sub_64))>; -+ - } // Predicates = [HasExtLASX] - - /// Intrinsic pattern -diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -index 9391b1a8a20c..980870e34503 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -+++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -@@ -1501,6 +1501,44 @@ foreach vt = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in { - def : RegRegStPat; - } - -+// Vector extraction with constant index. -+def : Pat<(i64 (vector_extract v16i8:$vj, uimm4:$imm)), -+ (VPICKVE2GR_B v16i8:$vj, uimm4:$imm)>; -+def : Pat<(i64 (vector_extract v8i16:$vj, uimm3:$imm)), -+ (VPICKVE2GR_H v8i16:$vj, uimm3:$imm)>; -+def : Pat<(i64 (vector_extract v4i32:$vj, uimm2:$imm)), -+ (VPICKVE2GR_W v4i32:$vj, uimm2:$imm)>; -+def : Pat<(i64 (vector_extract v2i64:$vj, uimm1:$imm)), -+ (VPICKVE2GR_D v2i64:$vj, uimm1:$imm)>; -+def : Pat<(f32 (vector_extract v4f32:$vj, uimm2:$imm)), -+ (f32 (EXTRACT_SUBREG (VREPLVEI_W v4f32:$vj, uimm2:$imm), sub_32))>; -+def : Pat<(f64 (vector_extract v2f64:$vj, uimm1:$imm)), -+ (f64 (EXTRACT_SUBREG (VREPLVEI_D v2f64:$vj, uimm1:$imm), sub_64))>; -+ -+// Vector extraction with variable index. -+def : Pat<(i64 (vector_extract v16i8:$vj, i64:$rk)), -+ (SRAI_W (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG (VREPLVE_B v16i8:$vj, -+ i64:$rk), -+ sub_32)), -+ GPR), (i64 24))>; -+def : Pat<(i64 (vector_extract v8i16:$vj, i64:$rk)), -+ (SRAI_W (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG (VREPLVE_H v8i16:$vj, -+ i64:$rk), -+ sub_32)), -+ GPR), (i64 16))>; -+def : Pat<(i64 (vector_extract v4i32:$vj, i64:$rk)), -+ (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG (VREPLVE_W v4i32:$vj, i64:$rk), -+ sub_32)), -+ GPR)>; -+def : Pat<(i64 (vector_extract v2i64:$vj, i64:$rk)), -+ (COPY_TO_REGCLASS (f64 (EXTRACT_SUBREG (VREPLVE_D v2i64:$vj, i64:$rk), -+ sub_64)), -+ GPR)>; -+def : Pat<(f32 (vector_extract v4f32:$vj, i64:$rk)), -+ (f32 (EXTRACT_SUBREG (VREPLVE_W v4f32:$vj, i64:$rk), sub_32))>; -+def : Pat<(f64 (vector_extract v2f64:$vj, i64:$rk)), -+ (f64 (EXTRACT_SUBREG (VREPLVE_D v2f64:$vj, i64:$rk), sub_64))>; -+ - } // Predicates = [HasExtLSX] - - /// Intrinsic pattern -diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/extractelement.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/extractelement.ll -new file mode 100644 -index 000000000000..78f584cd09a8 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/extractelement.ll -@@ -0,0 +1,172 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+define void @extract_32xi8(ptr %src, ptr %dst) nounwind { -+; CHECK-LABEL: extract_32xi8: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a0, 0 -+; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 1 -+; CHECK-NEXT: st.b $a0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load volatile <32 x i8>, ptr %src -+ %e = extractelement <32 x i8> %v, i32 1 -+ store i8 %e, ptr %dst -+ ret void -+} -+ -+define void @extract_16xi16(ptr %src, ptr %dst) nounwind { -+; CHECK-LABEL: extract_16xi16: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a0, 0 -+; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 1 -+; CHECK-NEXT: st.h $a0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load volatile <16 x i16>, ptr %src -+ %e = extractelement <16 x i16> %v, i32 1 -+ store i16 %e, ptr %dst -+ ret void -+} -+ -+define void @extract_8xi32(ptr %src, ptr %dst) nounwind { -+; CHECK-LABEL: extract_8xi32: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a0, 0 -+; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 1 -+; CHECK-NEXT: st.w $a0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load volatile <8 x i32>, ptr %src -+ %e = extractelement <8 x i32> %v, i32 1 -+ store i32 %e, ptr %dst -+ ret void -+} -+ -+define void @extract_4xi64(ptr %src, ptr %dst) nounwind { -+; CHECK-LABEL: extract_4xi64: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a0, 0 -+; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 1 -+; CHECK-NEXT: st.d $a0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load volatile <4 x i64>, ptr %src -+ %e = extractelement <4 x i64> %v, i32 1 -+ store i64 %e, ptr %dst -+ ret void -+} -+ -+define void @extract_8xfloat(ptr %src, ptr %dst) nounwind { -+; CHECK-LABEL: extract_8xfloat: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a0, 0 -+; CHECK-NEXT: ori $a0, $zero, 7 -+; CHECK-NEXT: xvreplve.w $xr0, $xr0, $a0 -+; CHECK-NEXT: fst.s $fa0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load volatile <8 x float>, ptr %src -+ %e = extractelement <8 x float> %v, i32 7 -+ store float %e, ptr %dst -+ ret void -+} -+ -+define void @extract_4xdouble(ptr %src, ptr %dst) nounwind { -+; CHECK-LABEL: extract_4xdouble: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a0, 0 -+; CHECK-NEXT: ori $a0, $zero, 3 -+; CHECK-NEXT: xvreplve.d $xr0, $xr0, $a0 -+; CHECK-NEXT: fst.d $fa0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load volatile <4 x double>, ptr %src -+ %e = extractelement <4 x double> %v, i32 3 -+ store double %e, ptr %dst -+ ret void -+} -+ -+define void @extract_32xi8_idx(ptr %src, ptr %dst, i32 %idx) nounwind { -+; CHECK-LABEL: extract_32xi8_idx: -+; CHECK: # %bb.0: -+; CHECK-NEXT: bstrpick.d $a2, $a2, 31, 0 -+; CHECK-NEXT: xvld $xr0, $a0, 0 -+; CHECK-NEXT: xvreplve.b $xr0, $xr0, $a2 -+; CHECK-NEXT: movfr2gr.s $a0, $fa0 -+; CHECK-NEXT: srai.w $a0, $a0, 24 -+; CHECK-NEXT: st.b $a0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load volatile <32 x i8>, ptr %src -+ %e = extractelement <32 x i8> %v, i32 %idx -+ store i8 %e, ptr %dst -+ ret void -+} -+ -+define void @extract_16xi16_idx(ptr %src, ptr %dst, i32 %idx) nounwind { -+; CHECK-LABEL: extract_16xi16_idx: -+; CHECK: # %bb.0: -+; CHECK-NEXT: bstrpick.d $a2, $a2, 31, 0 -+; CHECK-NEXT: xvld $xr0, $a0, 0 -+; CHECK-NEXT: xvreplve.h $xr0, $xr0, $a2 -+; CHECK-NEXT: movfr2gr.s $a0, $fa0 -+; CHECK-NEXT: srai.w $a0, $a0, 16 -+; CHECK-NEXT: st.h $a0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load volatile <16 x i16>, ptr %src -+ %e = extractelement <16 x i16> %v, i32 %idx -+ store i16 %e, ptr %dst -+ ret void -+} -+ -+define void @extract_8xi32_idx(ptr %src, ptr %dst, i32 %idx) nounwind { -+; CHECK-LABEL: extract_8xi32_idx: -+; CHECK: # %bb.0: -+; CHECK-NEXT: bstrpick.d $a2, $a2, 31, 0 -+; CHECK-NEXT: xvld $xr0, $a0, 0 -+; CHECK-NEXT: xvreplve.w $xr0, $xr0, $a2 -+; CHECK-NEXT: movfr2gr.s $a0, $fa0 -+; CHECK-NEXT: st.w $a0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load volatile <8 x i32>, ptr %src -+ %e = extractelement <8 x i32> %v, i32 %idx -+ store i32 %e, ptr %dst -+ ret void -+} -+ -+define void @extract_4xi64_idx(ptr %src, ptr %dst, i32 %idx) nounwind { -+; CHECK-LABEL: extract_4xi64_idx: -+; CHECK: # %bb.0: -+; CHECK-NEXT: bstrpick.d $a2, $a2, 31, 0 -+; CHECK-NEXT: xvld $xr0, $a0, 0 -+; CHECK-NEXT: xvreplve.d $xr0, $xr0, $a2 -+; CHECK-NEXT: movfr2gr.d $a0, $fa0 -+; CHECK-NEXT: st.d $a0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load volatile <4 x i64>, ptr %src -+ %e = extractelement <4 x i64> %v, i32 %idx -+ store i64 %e, ptr %dst -+ ret void -+} -+ -+define void @extract_8xfloat_idx(ptr %src, ptr %dst, i32 %idx) nounwind { -+; CHECK-LABEL: extract_8xfloat_idx: -+; CHECK: # %bb.0: -+; CHECK-NEXT: bstrpick.d $a2, $a2, 31, 0 -+; CHECK-NEXT: xvld $xr0, $a0, 0 -+; CHECK-NEXT: xvreplve.w $xr0, $xr0, $a2 -+; CHECK-NEXT: fst.s $fa0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load volatile <8 x float>, ptr %src -+ %e = extractelement <8 x float> %v, i32 %idx -+ store float %e, ptr %dst -+ ret void -+} -+ -+define void @extract_4xdouble_idx(ptr %src, ptr %dst, i32 %idx) nounwind { -+; CHECK-LABEL: extract_4xdouble_idx: -+; CHECK: # %bb.0: -+; CHECK-NEXT: bstrpick.d $a2, $a2, 31, 0 -+; CHECK-NEXT: xvld $xr0, $a0, 0 -+; CHECK-NEXT: xvreplve.d $xr0, $xr0, $a2 -+; CHECK-NEXT: fst.d $fa0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load volatile <4 x double>, ptr %src -+ %e = extractelement <4 x double> %v, i32 %idx -+ store double %e, ptr %dst -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/extractelement.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/extractelement.ll -new file mode 100644 -index 000000000000..b8798c97861e ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/extractelement.ll -@@ -0,0 +1,170 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+define void @extract_16xi8(ptr %src, ptr %dst) nounwind { -+; CHECK-LABEL: extract_16xi8: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a0, 0 -+; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 1 -+; CHECK-NEXT: st.b $a0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load volatile <16 x i8>, ptr %src -+ %e = extractelement <16 x i8> %v, i32 1 -+ store i8 %e, ptr %dst -+ ret void -+} -+ -+define void @extract_8xi16(ptr %src, ptr %dst) nounwind { -+; CHECK-LABEL: extract_8xi16: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a0, 0 -+; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 1 -+; CHECK-NEXT: st.h $a0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load volatile <8 x i16>, ptr %src -+ %e = extractelement <8 x i16> %v, i32 1 -+ store i16 %e, ptr %dst -+ ret void -+} -+ -+define void @extract_4xi32(ptr %src, ptr %dst) nounwind { -+; CHECK-LABEL: extract_4xi32: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a0, 0 -+; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 1 -+; CHECK-NEXT: st.w $a0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load volatile <4 x i32>, ptr %src -+ %e = extractelement <4 x i32> %v, i32 1 -+ store i32 %e, ptr %dst -+ ret void -+} -+ -+define void @extract_2xi64(ptr %src, ptr %dst) nounwind { -+; CHECK-LABEL: extract_2xi64: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a0, 0 -+; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 1 -+; CHECK-NEXT: st.d $a0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load volatile <2 x i64>, ptr %src -+ %e = extractelement <2 x i64> %v, i32 1 -+ store i64 %e, ptr %dst -+ ret void -+} -+ -+define void @extract_4xfloat(ptr %src, ptr %dst) nounwind { -+; CHECK-LABEL: extract_4xfloat: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a0, 0 -+; CHECK-NEXT: vreplvei.w $vr0, $vr0, 1 -+; CHECK-NEXT: fst.s $fa0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load volatile <4 x float>, ptr %src -+ %e = extractelement <4 x float> %v, i32 1 -+ store float %e, ptr %dst -+ ret void -+} -+ -+define void @extract_2xdouble(ptr %src, ptr %dst) nounwind { -+; CHECK-LABEL: extract_2xdouble: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a0, 0 -+; CHECK-NEXT: vreplvei.d $vr0, $vr0, 1 -+; CHECK-NEXT: fst.d $fa0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load volatile <2 x double>, ptr %src -+ %e = extractelement <2 x double> %v, i32 1 -+ store double %e, ptr %dst -+ ret void -+} -+ -+define void @extract_16xi8_idx(ptr %src, ptr %dst, i32 %idx) nounwind { -+; CHECK-LABEL: extract_16xi8_idx: -+; CHECK: # %bb.0: -+; CHECK-NEXT: bstrpick.d $a2, $a2, 31, 0 -+; CHECK-NEXT: vld $vr0, $a0, 0 -+; CHECK-NEXT: vreplve.b $vr0, $vr0, $a2 -+; CHECK-NEXT: movfr2gr.s $a0, $fa0 -+; CHECK-NEXT: srai.w $a0, $a0, 24 -+; CHECK-NEXT: st.b $a0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load volatile <16 x i8>, ptr %src -+ %e = extractelement <16 x i8> %v, i32 %idx -+ store i8 %e, ptr %dst -+ ret void -+} -+ -+define void @extract_8xi16_idx(ptr %src, ptr %dst, i32 %idx) nounwind { -+; CHECK-LABEL: extract_8xi16_idx: -+; CHECK: # %bb.0: -+; CHECK-NEXT: bstrpick.d $a2, $a2, 31, 0 -+; CHECK-NEXT: vld $vr0, $a0, 0 -+; CHECK-NEXT: vreplve.h $vr0, $vr0, $a2 -+; CHECK-NEXT: movfr2gr.s $a0, $fa0 -+; CHECK-NEXT: srai.w $a0, $a0, 16 -+; CHECK-NEXT: st.h $a0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load volatile <8 x i16>, ptr %src -+ %e = extractelement <8 x i16> %v, i32 %idx -+ store i16 %e, ptr %dst -+ ret void -+} -+ -+define void @extract_4xi32_idx(ptr %src, ptr %dst, i32 %idx) nounwind { -+; CHECK-LABEL: extract_4xi32_idx: -+; CHECK: # %bb.0: -+; CHECK-NEXT: bstrpick.d $a2, $a2, 31, 0 -+; CHECK-NEXT: vld $vr0, $a0, 0 -+; CHECK-NEXT: vreplve.w $vr0, $vr0, $a2 -+; CHECK-NEXT: movfr2gr.s $a0, $fa0 -+; CHECK-NEXT: st.w $a0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load volatile <4 x i32>, ptr %src -+ %e = extractelement <4 x i32> %v, i32 %idx -+ store i32 %e, ptr %dst -+ ret void -+} -+ -+define void @extract_2xi64_idx(ptr %src, ptr %dst, i32 %idx) nounwind { -+; CHECK-LABEL: extract_2xi64_idx: -+; CHECK: # %bb.0: -+; CHECK-NEXT: bstrpick.d $a2, $a2, 31, 0 -+; CHECK-NEXT: vld $vr0, $a0, 0 -+; CHECK-NEXT: vreplve.d $vr0, $vr0, $a2 -+; CHECK-NEXT: movfr2gr.d $a0, $fa0 -+; CHECK-NEXT: st.d $a0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load volatile <2 x i64>, ptr %src -+ %e = extractelement <2 x i64> %v, i32 %idx -+ store i64 %e, ptr %dst -+ ret void -+} -+ -+define void @extract_4xfloat_idx(ptr %src, ptr %dst, i32 %idx) nounwind { -+; CHECK-LABEL: extract_4xfloat_idx: -+; CHECK: # %bb.0: -+; CHECK-NEXT: bstrpick.d $a2, $a2, 31, 0 -+; CHECK-NEXT: vld $vr0, $a0, 0 -+; CHECK-NEXT: vreplve.w $vr0, $vr0, $a2 -+; CHECK-NEXT: fst.s $fa0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load volatile <4 x float>, ptr %src -+ %e = extractelement <4 x float> %v, i32 %idx -+ store float %e, ptr %dst -+ ret void -+} -+ -+define void @extract_2xdouble_idx(ptr %src, ptr %dst, i32 %idx) nounwind { -+; CHECK-LABEL: extract_2xdouble_idx: -+; CHECK: # %bb.0: -+; CHECK-NEXT: bstrpick.d $a2, $a2, 31, 0 -+; CHECK-NEXT: vld $vr0, $a0, 0 -+; CHECK-NEXT: vreplve.d $vr0, $vr0, $a2 -+; CHECK-NEXT: fst.d $fa0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load volatile <2 x double>, ptr %src -+ %e = extractelement <2 x double> %v, i32 %idx -+ store double %e, ptr %dst -+ ret void -+} --- -2.20.1 - - -From eb1dc17f9111c2bf2d20d366a9b46c4bda0606f6 Mon Sep 17 00:00:00 2001 -From: leecheechen -Date: Thu, 30 Nov 2023 21:41:18 +0800 -Subject: [PATCH 14/35] [LoongArch] Add some binary IR instructions testcases - for LSX (#73929) - -The IR instructions include: -- Binary Operations: add fadd sub fsub mul fmul udiv sdiv fdiv -- Bitwise Binary Operations: shl lshr ashr - -(cherry picked from commit 29a0f3ec2b47630ce229953fe7250e741b6c10b6) ---- - .../LoongArch/lsx/ir-instruction/add.ll | 122 +++++++++ - .../LoongArch/lsx/ir-instruction/ashr.ll | 178 +++++++++++++ - .../LoongArch/lsx/ir-instruction/fadd.ll | 34 +++ - .../LoongArch/lsx/ir-instruction/fdiv.ll | 34 +++ - .../LoongArch/lsx/ir-instruction/fmul.ll | 34 +++ - .../LoongArch/lsx/ir-instruction/fsub.ll | 34 +++ - .../LoongArch/lsx/ir-instruction/lshr.ll | 178 +++++++++++++ - .../LoongArch/lsx/ir-instruction/mul.ll | 242 ++++++++++++++++++ - .../LoongArch/lsx/ir-instruction/sdiv.ll | 134 ++++++++++ - .../LoongArch/lsx/ir-instruction/shl.ll | 178 +++++++++++++ - .../LoongArch/lsx/ir-instruction/sub.ll | 122 +++++++++ - .../LoongArch/lsx/ir-instruction/udiv.ll | 122 +++++++++ - 12 files changed, 1412 insertions(+) - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ir-instruction/add.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ir-instruction/ashr.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fadd.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fdiv.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fmul.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fsub.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ir-instruction/lshr.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ir-instruction/mul.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sdiv.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shl.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sub.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ir-instruction/udiv.ll - -diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/add.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/add.ll -new file mode 100644 -index 000000000000..2a7c37c2ae34 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/add.ll -@@ -0,0 +1,122 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+define void @add_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: add_v16i8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vadd.b $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i8>, ptr %a0 -+ %v1 = load <16 x i8>, ptr %a1 -+ %v2 = add <16 x i8> %v0, %v1 -+ store <16 x i8> %v2, ptr %res -+ ret void -+} -+ -+define void @add_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: add_v8i16: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vadd.h $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i16>, ptr %a0 -+ %v1 = load <8 x i16>, ptr %a1 -+ %v2 = add <8 x i16> %v0, %v1 -+ store <8 x i16> %v2, ptr %res -+ ret void -+} -+ -+define void @add_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: add_v4i32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vadd.w $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i32>, ptr %a0 -+ %v1 = load <4 x i32>, ptr %a1 -+ %v2 = add <4 x i32> %v0, %v1 -+ store <4 x i32> %v2, ptr %res -+ ret void -+} -+ -+define void @add_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: add_v2i64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vadd.d $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <2 x i64>, ptr %a0 -+ %v1 = load <2 x i64>, ptr %a1 -+ %v2 = add <2 x i64> %v0, %v1 -+ store <2 x i64> %v2, ptr %res -+ ret void -+} -+ -+define void @add_v16i8_31(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: add_v16i8_31: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vaddi.bu $vr0, $vr0, 31 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i8>, ptr %a0 -+ %v1 = add <16 x i8> %v0, -+ store <16 x i8> %v1, ptr %res -+ ret void -+} -+ -+define void @add_v8i16_31(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: add_v8i16_31: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vaddi.hu $vr0, $vr0, 31 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i16>, ptr %a0 -+ %v1 = add <8 x i16> %v0, -+ store <8 x i16> %v1, ptr %res -+ ret void -+} -+ -+define void @add_v4i32_31(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: add_v4i32_31: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vaddi.wu $vr0, $vr0, 31 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i32>, ptr %a0 -+ %v1 = add <4 x i32> %v0, -+ store <4 x i32> %v1, ptr %res -+ ret void -+} -+ -+define void @add_v2i64_31(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: add_v2i64_31: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vaddi.du $vr0, $vr0, 31 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <2 x i64>, ptr %a0 -+ %v1 = add <2 x i64> %v0, -+ store <2 x i64> %v1, ptr %res -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/ashr.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/ashr.ll -new file mode 100644 -index 000000000000..fbc570d77ba8 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/ashr.ll -@@ -0,0 +1,178 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+define void @ashr_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: ashr_v16i8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vsra.b $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i8>, ptr %a0 -+ %v1 = load <16 x i8>, ptr %a1 -+ %v2 = ashr <16 x i8> %v0, %v1 -+ store <16 x i8> %v2, ptr %res -+ ret void -+} -+ -+define void @ashr_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: ashr_v8i16: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vsra.h $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i16>, ptr %a0 -+ %v1 = load <8 x i16>, ptr %a1 -+ %v2 = ashr <8 x i16> %v0, %v1 -+ store <8 x i16> %v2, ptr %res -+ ret void -+} -+ -+define void @ashr_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: ashr_v4i32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vsra.w $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i32>, ptr %a0 -+ %v1 = load <4 x i32>, ptr %a1 -+ %v2 = ashr <4 x i32> %v0, %v1 -+ store <4 x i32> %v2, ptr %res -+ ret void -+} -+ -+define void @ashr_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: ashr_v2i64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vsra.d $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <2 x i64>, ptr %a0 -+ %v1 = load <2 x i64>, ptr %a1 -+ %v2 = ashr <2 x i64> %v0, %v1 -+ store <2 x i64> %v2, ptr %res -+ ret void -+} -+ -+define void @ashr_v16i8_1(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: ashr_v16i8_1: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vsrai.b $vr0, $vr0, 1 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i8>, ptr %a0 -+ %v1 = ashr <16 x i8> %v0, -+ store <16 x i8> %v1, ptr %res -+ ret void -+} -+ -+define void @ashr_v16i8_7(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: ashr_v16i8_7: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vsrai.b $vr0, $vr0, 7 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i8>, ptr %a0 -+ %v1 = ashr <16 x i8> %v0, -+ store <16 x i8> %v1, ptr %res -+ ret void -+} -+ -+define void @ashr_v8i16_1(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: ashr_v8i16_1: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vsrai.h $vr0, $vr0, 1 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i16>, ptr %a0 -+ %v1 = ashr <8 x i16> %v0, -+ store <8 x i16> %v1, ptr %res -+ ret void -+} -+ -+define void @ashr_v8i16_15(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: ashr_v8i16_15: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vsrai.h $vr0, $vr0, 15 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i16>, ptr %a0 -+ %v1 = ashr <8 x i16> %v0, -+ store <8 x i16> %v1, ptr %res -+ ret void -+} -+ -+define void @ashr_v4i32_1(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: ashr_v4i32_1: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vsrai.w $vr0, $vr0, 1 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i32>, ptr %a0 -+ %v1 = ashr <4 x i32> %v0, -+ store <4 x i32> %v1, ptr %res -+ ret void -+} -+ -+define void @ashr_v4i32_31(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: ashr_v4i32_31: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vsrai.w $vr0, $vr0, 31 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i32>, ptr %a0 -+ %v1 = ashr <4 x i32> %v0, -+ store <4 x i32> %v1, ptr %res -+ ret void -+} -+ -+define void @ashr_v2i64_1(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: ashr_v2i64_1: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vsrai.d $vr0, $vr0, 1 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <2 x i64>, ptr %a0 -+ %v1 = ashr <2 x i64> %v0, -+ store <2 x i64> %v1, ptr %res -+ ret void -+} -+ -+define void @ashr_v2i64_63(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: ashr_v2i64_63: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vsrai.d $vr0, $vr0, 63 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <2 x i64>, ptr %a0 -+ %v1 = ashr <2 x i64> %v0, -+ store <2 x i64> %v1, ptr %res -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fadd.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fadd.ll -new file mode 100644 -index 000000000000..1fa1f611c4a3 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fadd.ll -@@ -0,0 +1,34 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+define void @fadd_v4f32(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: fadd_v4f32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vfadd.s $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x float>, ptr %a0 -+ %v1 = load <4 x float>, ptr %a1 -+ %v2 = fadd <4 x float> %v0, %v1 -+ store <4 x float> %v2, ptr %res -+ ret void -+} -+ -+define void @fadd_v2f64(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: fadd_v2f64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vfadd.d $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <2 x double>, ptr %a0 -+ %v1 = load <2 x double>, ptr %a1 -+ %v2 = fadd <2 x double> %v0, %v1 -+ store <2 x double> %v2, ptr %res -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fdiv.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fdiv.ll -new file mode 100644 -index 000000000000..eb7c8bd9616e ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fdiv.ll -@@ -0,0 +1,34 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+define void @fdiv_v4f32(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: fdiv_v4f32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vfdiv.s $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x float>, ptr %a0 -+ %v1 = load <4 x float>, ptr %a1 -+ %v2 = fdiv <4 x float> %v0, %v1 -+ store <4 x float> %v2, ptr %res -+ ret void -+} -+ -+define void @fdiv_v2f64(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: fdiv_v2f64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vfdiv.d $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <2 x double>, ptr %a0 -+ %v1 = load <2 x double>, ptr %a1 -+ %v2 = fdiv <2 x double> %v0, %v1 -+ store <2 x double> %v2, ptr %res -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fmul.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fmul.ll -new file mode 100644 -index 000000000000..e7fb527f7805 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fmul.ll -@@ -0,0 +1,34 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+define void @fmul_v4f32(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: fmul_v4f32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vfmul.s $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x float>, ptr %a0 -+ %v1 = load <4 x float>, ptr %a1 -+ %v2 = fmul <4 x float> %v0, %v1 -+ store <4 x float> %v2, ptr %res -+ ret void -+} -+ -+define void @fmul_v2f64(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: fmul_v2f64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vfmul.d $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <2 x double>, ptr %a0 -+ %v1 = load <2 x double>, ptr %a1 -+ %v2 = fmul <2 x double> %v0, %v1 -+ store <2 x double> %v2, ptr %res -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fsub.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fsub.ll -new file mode 100644 -index 000000000000..df98182321da ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fsub.ll -@@ -0,0 +1,34 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+define void @fsub_v4f32(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: fsub_v4f32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vfsub.s $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x float>, ptr %a0 -+ %v1 = load <4 x float>, ptr %a1 -+ %v2 = fsub <4 x float> %v0, %v1 -+ store <4 x float> %v2, ptr %res -+ ret void -+} -+ -+define void @fsub_v2f64(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: fsub_v2f64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vfsub.d $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <2 x double>, ptr %a0 -+ %v1 = load <2 x double>, ptr %a1 -+ %v2 = fsub <2 x double> %v0, %v1 -+ store <2 x double> %v2, ptr %res -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/lshr.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/lshr.ll -new file mode 100644 -index 000000000000..dada52f93060 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/lshr.ll -@@ -0,0 +1,178 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+define void @lshr_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: lshr_v16i8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vsrl.b $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i8>, ptr %a0 -+ %v1 = load <16 x i8>, ptr %a1 -+ %v2 = lshr <16 x i8> %v0, %v1 -+ store <16 x i8> %v2, ptr %res -+ ret void -+} -+ -+define void @lshr_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: lshr_v8i16: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vsrl.h $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i16>, ptr %a0 -+ %v1 = load <8 x i16>, ptr %a1 -+ %v2 = lshr <8 x i16> %v0, %v1 -+ store <8 x i16> %v2, ptr %res -+ ret void -+} -+ -+define void @lshr_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: lshr_v4i32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vsrl.w $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i32>, ptr %a0 -+ %v1 = load <4 x i32>, ptr %a1 -+ %v2 = lshr <4 x i32> %v0, %v1 -+ store <4 x i32> %v2, ptr %res -+ ret void -+} -+ -+define void @lshr_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: lshr_v2i64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vsrl.d $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <2 x i64>, ptr %a0 -+ %v1 = load <2 x i64>, ptr %a1 -+ %v2 = lshr <2 x i64> %v0, %v1 -+ store <2 x i64> %v2, ptr %res -+ ret void -+} -+ -+define void @lshr_v16i8_1(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: lshr_v16i8_1: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vsrli.b $vr0, $vr0, 1 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i8>, ptr %a0 -+ %v1 = lshr <16 x i8> %v0, -+ store <16 x i8> %v1, ptr %res -+ ret void -+} -+ -+define void @lshr_v16i8_7(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: lshr_v16i8_7: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vsrli.b $vr0, $vr0, 7 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i8>, ptr %a0 -+ %v1 = lshr <16 x i8> %v0, -+ store <16 x i8> %v1, ptr %res -+ ret void -+} -+ -+define void @lshr_v8i16_1(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: lshr_v8i16_1: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vsrli.h $vr0, $vr0, 1 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i16>, ptr %a0 -+ %v1 = lshr <8 x i16> %v0, -+ store <8 x i16> %v1, ptr %res -+ ret void -+} -+ -+define void @lshr_v8i16_15(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: lshr_v8i16_15: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vsrli.h $vr0, $vr0, 15 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i16>, ptr %a0 -+ %v1 = lshr <8 x i16> %v0, -+ store <8 x i16> %v1, ptr %res -+ ret void -+} -+ -+define void @lshr_v4i32_1(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: lshr_v4i32_1: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vsrli.w $vr0, $vr0, 1 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i32>, ptr %a0 -+ %v1 = lshr <4 x i32> %v0, -+ store <4 x i32> %v1, ptr %res -+ ret void -+} -+ -+define void @lshr_v4i32_31(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: lshr_v4i32_31: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vsrli.w $vr0, $vr0, 31 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i32>, ptr %a0 -+ %v1 = lshr <4 x i32> %v0, -+ store <4 x i32> %v1, ptr %res -+ ret void -+} -+ -+define void @lshr_v2i64_1(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: lshr_v2i64_1: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vsrli.d $vr0, $vr0, 1 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <2 x i64>, ptr %a0 -+ %v1 = lshr <2 x i64> %v0, -+ store <2 x i64> %v1, ptr %res -+ ret void -+} -+ -+define void @lshr_v2i64_63(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: lshr_v2i64_63: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vsrli.d $vr0, $vr0, 63 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <2 x i64>, ptr %a0 -+ %v1 = lshr <2 x i64> %v0, -+ store <2 x i64> %v1, ptr %res -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/mul.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/mul.ll -new file mode 100644 -index 000000000000..5060240cd8b1 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/mul.ll -@@ -0,0 +1,242 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+define void @mul_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: mul_v16i8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vmul.b $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i8>, ptr %a0 -+ %v1 = load <16 x i8>, ptr %a1 -+ %v2 = mul <16 x i8> %v0, %v1 -+ store <16 x i8> %v2, ptr %res -+ ret void -+} -+ -+define void @mul_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: mul_v8i16: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vmul.h $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i16>, ptr %a0 -+ %v1 = load <8 x i16>, ptr %a1 -+ %v2 = mul <8 x i16> %v0, %v1 -+ store <8 x i16> %v2, ptr %res -+ ret void -+} -+ -+define void @mul_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: mul_v4i32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vmul.w $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i32>, ptr %a0 -+ %v1 = load <4 x i32>, ptr %a1 -+ %v2 = mul <4 x i32> %v0, %v1 -+ store <4 x i32> %v2, ptr %res -+ ret void -+} -+ -+define void @mul_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: mul_v2i64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vmul.d $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <2 x i64>, ptr %a0 -+ %v1 = load <2 x i64>, ptr %a1 -+ %v2 = mul <2 x i64> %v0, %v1 -+ store <2 x i64> %v2, ptr %res -+ ret void -+} -+ -+define void @mul_square_v16i8(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: mul_square_v16i8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vmul.b $vr0, $vr0, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i8>, ptr %a0 -+ %v1 = mul <16 x i8> %v0, %v0 -+ store <16 x i8> %v1, ptr %res -+ ret void -+} -+ -+define void @mul_square_v8i16(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: mul_square_v8i16: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vmul.h $vr0, $vr0, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i16>, ptr %a0 -+ %v1 = mul <8 x i16> %v0, %v0 -+ store <8 x i16> %v1, ptr %res -+ ret void -+} -+ -+define void @mul_square_v4i32(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: mul_square_v4i32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vmul.w $vr0, $vr0, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i32>, ptr %a0 -+ %v1 = mul <4 x i32> %v0, %v0 -+ store <4 x i32> %v1, ptr %res -+ ret void -+} -+ -+define void @mul_square_v2i64(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: mul_square_v2i64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vmul.d $vr0, $vr0, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <2 x i64>, ptr %a0 -+ %v1 = mul <2 x i64> %v0, %v0 -+ store <2 x i64> %v1, ptr %res -+ ret void -+} -+ -+define void @mul_v16i8_8(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: mul_v16i8_8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vslli.b $vr0, $vr0, 3 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i8>, ptr %a0 -+ %v1 = mul <16 x i8> %v0, -+ store <16 x i8> %v1, ptr %res -+ ret void -+} -+ -+define void @mul_v8i16_8(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: mul_v8i16_8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vslli.h $vr0, $vr0, 3 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i16>, ptr %a0 -+ %v1 = mul <8 x i16> %v0, -+ store <8 x i16> %v1, ptr %res -+ ret void -+} -+ -+define void @mul_v4i32_8(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: mul_v4i32_8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vslli.w $vr0, $vr0, 3 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i32>, ptr %a0 -+ %v1 = mul <4 x i32> %v0, -+ store <4 x i32> %v1, ptr %res -+ ret void -+} -+ -+define void @mul_v2i64_8(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: mul_v2i64_8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vslli.d $vr0, $vr0, 3 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <2 x i64>, ptr %a0 -+ %v1 = mul <2 x i64> %v0, -+ store <2 x i64> %v1, ptr %res -+ ret void -+} -+ -+define void @mul_v16i8_17(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: mul_v16i8_17: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: ori $a2, $zero, 17 -+; CHECK-NEXT: vreplgr2vr.b $vr0, $a2 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vmul.b $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i8>, ptr %a0 -+ %v1 = mul <16 x i8> %v0, -+ store <16 x i8> %v1, ptr %res -+ ret void -+} -+ -+define void @mul_v8i16_17(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: mul_v8i16_17: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: ori $a2, $zero, 17 -+; CHECK-NEXT: vreplgr2vr.h $vr0, $a2 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vmul.h $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i16>, ptr %a0 -+ %v1 = mul <8 x i16> %v0, -+ store <8 x i16> %v1, ptr %res -+ ret void -+} -+ -+define void @mul_v4i32_17(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: mul_v4i32_17: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: ori $a2, $zero, 17 -+; CHECK-NEXT: vreplgr2vr.w $vr0, $a2 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vmul.w $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i32>, ptr %a0 -+ %v1 = mul <4 x i32> %v0, -+ store <4 x i32> %v1, ptr %res -+ ret void -+} -+ -+define void @mul_v2i64_17(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: mul_v2i64_17: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: ori $a2, $zero, 17 -+; CHECK-NEXT: vreplgr2vr.d $vr0, $a2 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vmul.d $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <2 x i64>, ptr %a0 -+ %v1 = mul <2 x i64> %v0, -+ store <2 x i64> %v1, ptr %res -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sdiv.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sdiv.ll -new file mode 100644 -index 000000000000..b68f73a74913 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sdiv.ll -@@ -0,0 +1,134 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+define void @sdiv_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: sdiv_v16i8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vdiv.b $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i8>, ptr %a0 -+ %v1 = load <16 x i8>, ptr %a1 -+ %v2 = sdiv <16 x i8> %v0, %v1 -+ store <16 x i8> %v2, ptr %res -+ ret void -+} -+ -+define void @sdiv_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: sdiv_v8i16: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vdiv.h $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i16>, ptr %a0 -+ %v1 = load <8 x i16>, ptr %a1 -+ %v2 = sdiv <8 x i16> %v0, %v1 -+ store <8 x i16> %v2, ptr %res -+ ret void -+} -+ -+define void @sdiv_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: sdiv_v4i32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vdiv.w $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i32>, ptr %a0 -+ %v1 = load <4 x i32>, ptr %a1 -+ %v2 = sdiv <4 x i32> %v0, %v1 -+ store <4 x i32> %v2, ptr %res -+ ret void -+} -+ -+define void @sdiv_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: sdiv_v2i64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vdiv.d $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <2 x i64>, ptr %a0 -+ %v1 = load <2 x i64>, ptr %a1 -+ %v2 = sdiv <2 x i64> %v0, %v1 -+ store <2 x i64> %v2, ptr %res -+ ret void -+} -+ -+define void @sdiv_v16i8_8(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: sdiv_v16i8_8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vsrai.b $vr1, $vr0, 7 -+; CHECK-NEXT: vsrli.b $vr1, $vr1, 5 -+; CHECK-NEXT: vadd.b $vr0, $vr0, $vr1 -+; CHECK-NEXT: vsrai.b $vr0, $vr0, 3 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i8>, ptr %a0 -+ %v1 = sdiv <16 x i8> %v0, -+ store <16 x i8> %v1, ptr %res -+ ret void -+} -+ -+define void @sdiv_v8i16_8(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: sdiv_v8i16_8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vsrai.h $vr1, $vr0, 15 -+; CHECK-NEXT: vsrli.h $vr1, $vr1, 13 -+; CHECK-NEXT: vadd.h $vr0, $vr0, $vr1 -+; CHECK-NEXT: vsrai.h $vr0, $vr0, 3 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i16>, ptr %a0 -+ %v1 = sdiv <8 x i16> %v0, -+ store <8 x i16> %v1, ptr %res -+ ret void -+} -+ -+define void @sdiv_v4i32_8(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: sdiv_v4i32_8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vsrai.w $vr1, $vr0, 31 -+; CHECK-NEXT: vsrli.w $vr1, $vr1, 29 -+; CHECK-NEXT: vadd.w $vr0, $vr0, $vr1 -+; CHECK-NEXT: vsrai.w $vr0, $vr0, 3 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i32>, ptr %a0 -+ %v1 = sdiv <4 x i32> %v0, -+ store <4 x i32> %v1, ptr %res -+ ret void -+} -+ -+define void @sdiv_v2i64_8(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: sdiv_v2i64_8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vsrai.d $vr1, $vr0, 63 -+; CHECK-NEXT: vsrli.d $vr1, $vr1, 61 -+; CHECK-NEXT: vadd.d $vr0, $vr0, $vr1 -+; CHECK-NEXT: vsrai.d $vr0, $vr0, 3 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <2 x i64>, ptr %a0 -+ %v1 = sdiv <2 x i64> %v0, -+ store <2 x i64> %v1, ptr %res -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shl.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shl.ll -new file mode 100644 -index 000000000000..fa0aebaf28b3 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shl.ll -@@ -0,0 +1,178 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+define void @shl_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: shl_v16i8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vsll.b $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i8>, ptr %a0 -+ %v1 = load <16 x i8>, ptr %a1 -+ %v2 = shl <16 x i8> %v0, %v1 -+ store <16 x i8> %v2, ptr %res -+ ret void -+} -+ -+define void @shl_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: shl_v8i16: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vsll.h $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i16>, ptr %a0 -+ %v1 = load <8 x i16>, ptr %a1 -+ %v2 = shl <8 x i16> %v0, %v1 -+ store <8 x i16> %v2, ptr %res -+ ret void -+} -+ -+define void @shl_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: shl_v4i32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vsll.w $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i32>, ptr %a0 -+ %v1 = load <4 x i32>, ptr %a1 -+ %v2 = shl <4 x i32> %v0, %v1 -+ store <4 x i32> %v2, ptr %res -+ ret void -+} -+ -+define void @shl_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: shl_v2i64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vsll.d $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <2 x i64>, ptr %a0 -+ %v1 = load <2 x i64>, ptr %a1 -+ %v2 = shl <2 x i64> %v0, %v1 -+ store <2 x i64> %v2, ptr %res -+ ret void -+} -+ -+define void @shl_v16i8_1(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: shl_v16i8_1: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vslli.b $vr0, $vr0, 1 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i8>, ptr %a0 -+ %v1 = shl <16 x i8> %v0, -+ store <16 x i8> %v1, ptr %res -+ ret void -+} -+ -+define void @shl_v16i8_7(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: shl_v16i8_7: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vslli.b $vr0, $vr0, 7 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i8>, ptr %a0 -+ %v1 = shl <16 x i8> %v0, -+ store <16 x i8> %v1, ptr %res -+ ret void -+} -+ -+define void @shl_v8i16_1(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: shl_v8i16_1: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vslli.h $vr0, $vr0, 1 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i16>, ptr %a0 -+ %v1 = shl <8 x i16> %v0, -+ store <8 x i16> %v1, ptr %res -+ ret void -+} -+ -+define void @shl_v8i16_15(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: shl_v8i16_15: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vslli.h $vr0, $vr0, 15 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i16>, ptr %a0 -+ %v1 = shl <8 x i16> %v0, -+ store <8 x i16> %v1, ptr %res -+ ret void -+} -+ -+define void @shl_v4i32_1(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: shl_v4i32_1: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vslli.w $vr0, $vr0, 1 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i32>, ptr %a0 -+ %v1 = shl <4 x i32> %v0, -+ store <4 x i32> %v1, ptr %res -+ ret void -+} -+ -+define void @shl_v4i32_31(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: shl_v4i32_31: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vslli.w $vr0, $vr0, 31 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i32>, ptr %a0 -+ %v1 = shl <4 x i32> %v0, -+ store <4 x i32> %v1, ptr %res -+ ret void -+} -+ -+define void @shl_v2i64_1(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: shl_v2i64_1: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vslli.d $vr0, $vr0, 1 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <2 x i64>, ptr %a0 -+ %v1 = shl <2 x i64> %v0, -+ store <2 x i64> %v1, ptr %res -+ ret void -+} -+ -+define void @shl_v2i64_63(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: shl_v2i64_63: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vslli.d $vr0, $vr0, 63 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <2 x i64>, ptr %a0 -+ %v1 = shl <2 x i64> %v0, -+ store <2 x i64> %v1, ptr %res -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sub.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sub.ll -new file mode 100644 -index 000000000000..25b4623a47d1 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sub.ll -@@ -0,0 +1,122 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+define void @sub_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: sub_v16i8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vsub.b $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i8>, ptr %a0 -+ %v1 = load <16 x i8>, ptr %a1 -+ %v2 = sub <16 x i8> %v0, %v1 -+ store <16 x i8> %v2, ptr %res -+ ret void -+} -+ -+define void @sub_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: sub_v8i16: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vsub.h $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i16>, ptr %a0 -+ %v1 = load <8 x i16>, ptr %a1 -+ %v2 = sub <8 x i16> %v0, %v1 -+ store <8 x i16> %v2, ptr %res -+ ret void -+} -+ -+define void @sub_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: sub_v4i32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vsub.w $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i32>, ptr %a0 -+ %v1 = load <4 x i32>, ptr %a1 -+ %v2 = sub <4 x i32> %v0, %v1 -+ store <4 x i32> %v2, ptr %res -+ ret void -+} -+ -+define void @sub_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: sub_v2i64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vsub.d $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <2 x i64>, ptr %a0 -+ %v1 = load <2 x i64>, ptr %a1 -+ %v2 = sub <2 x i64> %v0, %v1 -+ store <2 x i64> %v2, ptr %res -+ ret void -+} -+ -+define void @sub_v16i8_31(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: sub_v16i8_31: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vsubi.bu $vr0, $vr0, 31 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i8>, ptr %a0 -+ %v1 = sub <16 x i8> %v0, -+ store <16 x i8> %v1, ptr %res -+ ret void -+} -+ -+define void @sub_v8i16_31(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: sub_v8i16_31: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vsubi.hu $vr0, $vr0, 31 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i16>, ptr %a0 -+ %v1 = sub <8 x i16> %v0, -+ store <8 x i16> %v1, ptr %res -+ ret void -+} -+ -+define void @sub_v4i32_31(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: sub_v4i32_31: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vsubi.wu $vr0, $vr0, 31 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i32>, ptr %a0 -+ %v1 = sub <4 x i32> %v0, -+ store <4 x i32> %v1, ptr %res -+ ret void -+} -+ -+define void @sub_v2i64_31(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: sub_v2i64_31: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vsubi.du $vr0, $vr0, 31 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <2 x i64>, ptr %a0 -+ %v1 = sub <2 x i64> %v0, -+ store <2 x i64> %v1, ptr %res -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/udiv.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/udiv.ll -new file mode 100644 -index 000000000000..abb60b91dd48 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/udiv.ll -@@ -0,0 +1,122 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+define void @udiv_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: udiv_v16i8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vdiv.bu $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i8>, ptr %a0 -+ %v1 = load <16 x i8>, ptr %a1 -+ %v2 = udiv <16 x i8> %v0, %v1 -+ store <16 x i8> %v2, ptr %res -+ ret void -+} -+ -+define void @udiv_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: udiv_v8i16: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vdiv.hu $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i16>, ptr %a0 -+ %v1 = load <8 x i16>, ptr %a1 -+ %v2 = udiv <8 x i16> %v0, %v1 -+ store <8 x i16> %v2, ptr %res -+ ret void -+} -+ -+define void @udiv_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: udiv_v4i32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vdiv.wu $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i32>, ptr %a0 -+ %v1 = load <4 x i32>, ptr %a1 -+ %v2 = udiv <4 x i32> %v0, %v1 -+ store <4 x i32> %v2, ptr %res -+ ret void -+} -+ -+define void @udiv_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: udiv_v2i64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vdiv.du $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <2 x i64>, ptr %a0 -+ %v1 = load <2 x i64>, ptr %a1 -+ %v2 = udiv <2 x i64> %v0, %v1 -+ store <2 x i64> %v2, ptr %res -+ ret void -+} -+ -+define void @udiv_v16i8_8(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: udiv_v16i8_8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vsrli.b $vr0, $vr0, 3 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i8>, ptr %a0 -+ %v1 = udiv <16 x i8> %v0, -+ store <16 x i8> %v1, ptr %res -+ ret void -+} -+ -+define void @udiv_v8i16_8(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: udiv_v8i16_8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vsrli.h $vr0, $vr0, 3 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i16>, ptr %a0 -+ %v1 = udiv <8 x i16> %v0, -+ store <8 x i16> %v1, ptr %res -+ ret void -+} -+ -+define void @udiv_v4i32_8(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: udiv_v4i32_8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vsrli.w $vr0, $vr0, 3 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i32>, ptr %a0 -+ %v1 = udiv <4 x i32> %v0, -+ store <4 x i32> %v1, ptr %res -+ ret void -+} -+ -+define void @udiv_v2i64_8(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: udiv_v2i64_8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vsrli.d $vr0, $vr0, 3 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <2 x i64>, ptr %a0 -+ %v1 = udiv <2 x i64> %v0, -+ store <2 x i64> %v1, ptr %res -+ ret void -+} --- -2.20.1 - - -From 30b414d9f2eb968e9f4cc6ffc76389a6f93b2907 Mon Sep 17 00:00:00 2001 -From: wanglei -Date: Thu, 16 Nov 2023 20:05:01 +0800 -Subject: [PATCH 15/35] [LoongArch] Add codegen support for insertelement - -(cherry picked from commit f2cbd1fdf702afe31d0198c9185e08dc2b104252) ---- - .../LoongArch/LoongArchISelLowering.cpp | 82 +++++- - .../Target/LoongArch/LoongArchISelLowering.h | 1 + - .../LoongArch/LoongArchLASXInstrInfo.td | 18 ++ - .../Target/LoongArch/LoongArchLSXInstrInfo.td | 5 + - .../lasx/ir-instruction/insertelement.ll | 276 ++++++++++++++++++ - .../lsx/ir-instruction/insertelement.ll | 196 +++++++++++++ - 6 files changed, 576 insertions(+), 2 deletions(-) - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insertelement.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ir-instruction/insertelement.ll - -diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -index 26e94a53b344..492339ce2151 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -@@ -237,7 +237,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, - // FIXME: For BUILD_VECTOR, it is temporarily set to `Legal` here, and it - // will be `Custom` handled in the future. - setOperationAction(ISD::BUILD_VECTOR, VT, Legal); -- setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Legal); -+ setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); - setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal); - } - for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) { -@@ -267,7 +267,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, - - // FIXME: Same as above. - setOperationAction(ISD::BUILD_VECTOR, VT, Legal); -- setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Legal); -+ setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); - setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal); - } - for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) { -@@ -369,10 +369,20 @@ SDValue LoongArchTargetLowering::LowerOperation(SDValue Op, - return lowerRETURNADDR(Op, DAG); - case ISD::WRITE_REGISTER: - return lowerWRITE_REGISTER(Op, DAG); -+ case ISD::INSERT_VECTOR_ELT: -+ return lowerINSERT_VECTOR_ELT(Op, DAG); - } - return SDValue(); - } - -+SDValue -+LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, -+ SelectionDAG &DAG) const { -+ if (isa(Op->getOperand(2))) -+ return Op; -+ return SDValue(); -+} -+ - SDValue LoongArchTargetLowering::lowerWRITE_REGISTER(SDValue Op, - SelectionDAG &DAG) const { - -@@ -3040,6 +3050,71 @@ emitVecCondBranchPseudo(MachineInstr &MI, MachineBasicBlock *BB, - return SinkBB; - } - -+static MachineBasicBlock * -+emitPseudoXVINSGR2VR(MachineInstr &MI, MachineBasicBlock *BB, -+ const LoongArchSubtarget &Subtarget) { -+ unsigned InsOp; -+ unsigned HalfSize; -+ switch (MI.getOpcode()) { -+ default: -+ llvm_unreachable("Unexpected opcode"); -+ case LoongArch::PseudoXVINSGR2VR_B: -+ HalfSize = 16; -+ InsOp = LoongArch::VINSGR2VR_B; -+ break; -+ case LoongArch::PseudoXVINSGR2VR_H: -+ HalfSize = 8; -+ InsOp = LoongArch::VINSGR2VR_H; -+ break; -+ } -+ const TargetInstrInfo *TII = Subtarget.getInstrInfo(); -+ const TargetRegisterClass *RC = &LoongArch::LASX256RegClass; -+ const TargetRegisterClass *SubRC = &LoongArch::LSX128RegClass; -+ DebugLoc DL = MI.getDebugLoc(); -+ MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); -+ // XDst = vector_insert XSrc, Elt, Idx -+ Register XDst = MI.getOperand(0).getReg(); -+ Register XSrc = MI.getOperand(1).getReg(); -+ Register Elt = MI.getOperand(2).getReg(); -+ unsigned Idx = MI.getOperand(3).getImm(); -+ -+ Register ScratchReg1 = XSrc; -+ if (Idx >= HalfSize) { -+ ScratchReg1 = MRI.createVirtualRegister(RC); -+ BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), ScratchReg1) -+ .addReg(XSrc) -+ .addReg(XSrc) -+ .addImm(1); -+ } -+ -+ Register ScratchSubReg1 = MRI.createVirtualRegister(SubRC); -+ Register ScratchSubReg2 = MRI.createVirtualRegister(SubRC); -+ BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), ScratchSubReg1) -+ .addReg(ScratchReg1, 0, LoongArch::sub_128); -+ BuildMI(*BB, MI, DL, TII->get(InsOp), ScratchSubReg2) -+ .addReg(ScratchSubReg1) -+ .addReg(Elt) -+ .addImm(Idx >= HalfSize ? Idx - HalfSize : Idx); -+ -+ Register ScratchReg2 = XDst; -+ if (Idx >= HalfSize) -+ ScratchReg2 = MRI.createVirtualRegister(RC); -+ -+ BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), ScratchReg2) -+ .addImm(0) -+ .addReg(ScratchSubReg2) -+ .addImm(LoongArch::sub_128); -+ -+ if (Idx >= HalfSize) -+ BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), XDst) -+ .addReg(XSrc) -+ .addReg(ScratchReg2) -+ .addImm(2); -+ -+ MI.eraseFromParent(); -+ return BB; -+} -+ - MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter( - MachineInstr &MI, MachineBasicBlock *BB) const { - const TargetInstrInfo *TII = Subtarget.getInstrInfo(); -@@ -3095,6 +3170,9 @@ MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter( - case LoongArch::PseudoXVBNZ_W: - case LoongArch::PseudoXVBNZ_D: - return emitVecCondBranchPseudo(MI, BB, Subtarget); -+ case LoongArch::PseudoXVINSGR2VR_B: -+ case LoongArch::PseudoXVINSGR2VR_H: -+ return emitPseudoXVINSGR2VR(MI, BB, Subtarget); - } - } - -diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h -index 7765057ebffb..29028ff963d0 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h -+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h -@@ -275,6 +275,7 @@ private: - SDValue lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; - SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; - SDValue lowerWRITE_REGISTER(SDValue Op, SelectionDAG &DAG) const; -+ SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; - - bool isFPImmLegal(const APFloat &Imm, EVT VT, - bool ForCodeSize) const override; -diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -index 380206ddcf10..475565db15c9 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -@@ -1065,6 +1065,13 @@ def PseudoXVBZ_W : VecCond; - def PseudoXVBZ_D : VecCond; - def PseudoXVBZ : VecCond; - -+let usesCustomInserter = 1, Constraints = "$xd = $dst" in { -+def PseudoXVINSGR2VR_B -+ : Pseudo<(outs LASX256:$dst), (ins LASX256:$xd, GPR:$rj, uimm5:$imm)>; -+def PseudoXVINSGR2VR_H -+ : Pseudo<(outs LASX256:$dst), (ins LASX256:$xd, GPR:$rj, uimm4:$imm)>; -+} // usesCustomInserter = 1, Constraints = "$xd = $dst" -+ - } // Predicates = [HasExtLASX] - - multiclass PatXr { -@@ -1365,12 +1372,23 @@ def : Pat<(fma v8f32:$xj, v8f32:$xk, v8f32:$xa), - def : Pat<(fma v4f64:$xj, v4f64:$xk, v4f64:$xa), - (XVFMADD_D v4f64:$xj, v4f64:$xk, v4f64:$xa)>; - -+// PseudoXVINSGR2VR_{B/H} -+def : Pat<(vector_insert v32i8:$xd, GRLenVT:$rj, uimm5:$imm), -+ (PseudoXVINSGR2VR_B v32i8:$xd, GRLenVT:$rj, uimm5:$imm)>; -+def : Pat<(vector_insert v16i16:$xd, GRLenVT:$rj, uimm4:$imm), -+ (PseudoXVINSGR2VR_H v16i16:$xd, GRLenVT:$rj, uimm4:$imm)>; -+ - // XVINSGR2VR_{W/D} - def : Pat<(vector_insert v8i32:$xd, GRLenVT:$rj, uimm3:$imm), - (XVINSGR2VR_W v8i32:$xd, GRLenVT:$rj, uimm3:$imm)>; - def : Pat<(vector_insert v4i64:$xd, GRLenVT:$rj, uimm2:$imm), - (XVINSGR2VR_D v4i64:$xd, GRLenVT:$rj, uimm2:$imm)>; - -+def : Pat<(vector_insert v8f32:$vd, FPR32:$fj, uimm3:$imm), -+ (XVINSGR2VR_W $vd, (COPY_TO_REGCLASS FPR32:$fj, GPR), uimm3:$imm)>; -+def : Pat<(vector_insert v4f64:$vd, FPR64:$fj, uimm2:$imm), -+ (XVINSGR2VR_D $vd, (COPY_TO_REGCLASS FPR64:$fj, GPR), uimm2:$imm)>; -+ - // XVPICKVE2GR_W[U] - def : Pat<(loongarch_vpick_sext_elt v8i32:$xd, uimm3:$imm, i32), - (XVPICKVE2GR_W v8i32:$xd, uimm3:$imm)>; -diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -index 980870e34503..d8fd132a1c59 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -+++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -@@ -1462,6 +1462,11 @@ def : Pat<(vector_insert v4i32:$vd, GRLenVT:$rj, uimm2:$imm), - def : Pat<(vector_insert v2i64:$vd, GRLenVT:$rj, uimm1:$imm), - (VINSGR2VR_D v2i64:$vd, GRLenVT:$rj, uimm1:$imm)>; - -+def : Pat<(vector_insert v4f32:$vd, FPR32:$fj, uimm2:$imm), -+ (VINSGR2VR_W $vd, (COPY_TO_REGCLASS FPR32:$fj, GPR), uimm2:$imm)>; -+def : Pat<(vector_insert v2f64:$vd, FPR64:$fj, uimm1:$imm), -+ (VINSGR2VR_D $vd, (COPY_TO_REGCLASS FPR64:$fj, GPR), uimm1:$imm)>; -+ - // VPICKVE2GR_{B/H/W}[U] - def : Pat<(loongarch_vpick_sext_elt v16i8:$vd, uimm4:$imm, i8), - (VPICKVE2GR_B v16i8:$vd, uimm4:$imm)>; -diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insertelement.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insertelement.ll -new file mode 100644 -index 000000000000..e571a5d2e4cf ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insertelement.ll -@@ -0,0 +1,276 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+define void @insert_32xi8(ptr %src, ptr %dst, i8 %in) nounwind { -+; CHECK-LABEL: insert_32xi8: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a0, 0 -+; CHECK-NEXT: vinsgr2vr.b $vr0, $a2, 1 -+; CHECK-NEXT: xvst $xr0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load volatile <32 x i8>, ptr %src -+ %v_new = insertelement <32 x i8> %v, i8 %in, i32 1 -+ store <32 x i8> %v_new, ptr %dst -+ ret void -+} -+ -+define void @insert_32xi8_upper(ptr %src, ptr %dst, i8 %in) nounwind { -+; CHECK-LABEL: insert_32xi8_upper: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a0, 0 -+; CHECK-NEXT: xvori.b $xr1, $xr0, 0 -+; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 -+; CHECK-NEXT: vinsgr2vr.b $vr1, $a2, 0 -+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 -+; CHECK-NEXT: xvst $xr0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load volatile <32 x i8>, ptr %src -+ %v_new = insertelement <32 x i8> %v, i8 %in, i32 16 -+ store <32 x i8> %v_new, ptr %dst -+ ret void -+} -+ -+define void @insert_16xi16(ptr %src, ptr %dst, i16 %in) nounwind { -+; CHECK-LABEL: insert_16xi16: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a0, 0 -+; CHECK-NEXT: vinsgr2vr.h $vr0, $a2, 1 -+; CHECK-NEXT: xvst $xr0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load volatile <16 x i16>, ptr %src -+ %v_new = insertelement <16 x i16> %v, i16 %in, i32 1 -+ store <16 x i16> %v_new, ptr %dst -+ ret void -+} -+ -+define void @insert_16xi16_upper(ptr %src, ptr %dst, i16 %in) nounwind { -+; CHECK-LABEL: insert_16xi16_upper: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a0, 0 -+; CHECK-NEXT: xvori.b $xr1, $xr0, 0 -+; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 -+; CHECK-NEXT: vinsgr2vr.h $vr1, $a2, 0 -+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 -+; CHECK-NEXT: xvst $xr0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load volatile <16 x i16>, ptr %src -+ %v_new = insertelement <16 x i16> %v, i16 %in, i32 8 -+ store <16 x i16> %v_new, ptr %dst -+ ret void -+} -+ -+define void @insert_8xi32(ptr %src, ptr %dst, i32 %in) nounwind { -+; CHECK-LABEL: insert_8xi32: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a0, 0 -+; CHECK-NEXT: xvinsgr2vr.w $xr0, $a2, 1 -+; CHECK-NEXT: xvst $xr0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load volatile <8 x i32>, ptr %src -+ %v_new = insertelement <8 x i32> %v, i32 %in, i32 1 -+ store <8 x i32> %v_new, ptr %dst -+ ret void -+} -+ -+define void @insert_4xi64(ptr %src, ptr %dst, i64 %in) nounwind { -+; CHECK-LABEL: insert_4xi64: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a0, 0 -+; CHECK-NEXT: xvinsgr2vr.d $xr0, $a2, 1 -+; CHECK-NEXT: xvst $xr0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load volatile <4 x i64>, ptr %src -+ %v_new = insertelement <4 x i64> %v, i64 %in, i32 1 -+ store <4 x i64> %v_new, ptr %dst -+ ret void -+} -+ -+define void @insert_8xfloat(ptr %src, ptr %dst, float %in) nounwind { -+; CHECK-LABEL: insert_8xfloat: -+; CHECK: # %bb.0: -+; CHECK-NEXT: movfr2gr.s $a2, $fa0 -+; CHECK-NEXT: xvld $xr0, $a0, 0 -+; CHECK-NEXT: xvinsgr2vr.w $xr0, $a2, 1 -+; CHECK-NEXT: xvst $xr0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load volatile <8 x float>, ptr %src -+ %v_new = insertelement <8 x float> %v, float %in, i32 1 -+ store <8 x float> %v_new, ptr %dst -+ ret void -+} -+ -+define void @insert_4xdouble(ptr %src, ptr %dst, double %in) nounwind { -+; CHECK-LABEL: insert_4xdouble: -+; CHECK: # %bb.0: -+; CHECK-NEXT: movfr2gr.d $a2, $fa0 -+; CHECK-NEXT: xvld $xr0, $a0, 0 -+; CHECK-NEXT: xvinsgr2vr.d $xr0, $a2, 1 -+; CHECK-NEXT: xvst $xr0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load volatile <4 x double>, ptr %src -+ %v_new = insertelement <4 x double> %v, double %in, i32 1 -+ store <4 x double> %v_new, ptr %dst -+ ret void -+} -+ -+define void @insert_32xi8_idx(ptr %src, ptr %dst, i8 %in, i32 %idx) nounwind { -+; CHECK-LABEL: insert_32xi8_idx: -+; CHECK: # %bb.0: -+; CHECK-NEXT: addi.d $sp, $sp, -64 -+; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill -+; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill -+; CHECK-NEXT: addi.d $fp, $sp, 64 -+; CHECK-NEXT: srli.d $a4, $sp, 5 -+; CHECK-NEXT: slli.d $sp, $a4, 5 -+; CHECK-NEXT: xvld $xr0, $a0, 0 -+; CHECK-NEXT: xvst $xr0, $sp, 0 -+; CHECK-NEXT: addi.d $a0, $sp, 0 -+; CHECK-NEXT: bstrins.d $a0, $a3, 4, 0 -+; CHECK-NEXT: st.b $a2, $a0, 0 -+; CHECK-NEXT: xvld $xr0, $sp, 0 -+; CHECK-NEXT: xvst $xr0, $a1, 0 -+; CHECK-NEXT: addi.d $sp, $fp, -64 -+; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload -+; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload -+; CHECK-NEXT: addi.d $sp, $sp, 64 -+; CHECK-NEXT: ret -+ %v = load volatile <32 x i8>, ptr %src -+ %v_new = insertelement <32 x i8> %v, i8 %in, i32 %idx -+ store <32 x i8> %v_new, ptr %dst -+ ret void -+} -+ -+define void @insert_16xi16_idx(ptr %src, ptr %dst, i16 %in, i32 %idx) nounwind { -+; CHECK-LABEL: insert_16xi16_idx: -+; CHECK: # %bb.0: -+; CHECK-NEXT: addi.d $sp, $sp, -64 -+; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill -+; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill -+; CHECK-NEXT: addi.d $fp, $sp, 64 -+; CHECK-NEXT: srli.d $a4, $sp, 5 -+; CHECK-NEXT: slli.d $sp, $a4, 5 -+; CHECK-NEXT: xvld $xr0, $a0, 0 -+; CHECK-NEXT: xvst $xr0, $sp, 0 -+; CHECK-NEXT: addi.d $a0, $sp, 0 -+; CHECK-NEXT: bstrins.d $a0, $a3, 4, 1 -+; CHECK-NEXT: st.h $a2, $a0, 0 -+; CHECK-NEXT: xvld $xr0, $sp, 0 -+; CHECK-NEXT: xvst $xr0, $a1, 0 -+; CHECK-NEXT: addi.d $sp, $fp, -64 -+; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload -+; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload -+; CHECK-NEXT: addi.d $sp, $sp, 64 -+; CHECK-NEXT: ret -+ %v = load volatile <16 x i16>, ptr %src -+ %v_new = insertelement <16 x i16> %v, i16 %in, i32 %idx -+ store <16 x i16> %v_new, ptr %dst -+ ret void -+} -+ -+define void @insert_8xi32_idx(ptr %src, ptr %dst, i32 %in, i32 %idx) nounwind { -+; CHECK-LABEL: insert_8xi32_idx: -+; CHECK: # %bb.0: -+; CHECK-NEXT: addi.d $sp, $sp, -64 -+; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill -+; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill -+; CHECK-NEXT: addi.d $fp, $sp, 64 -+; CHECK-NEXT: srli.d $a4, $sp, 5 -+; CHECK-NEXT: slli.d $sp, $a4, 5 -+; CHECK-NEXT: xvld $xr0, $a0, 0 -+; CHECK-NEXT: xvst $xr0, $sp, 0 -+; CHECK-NEXT: addi.d $a0, $sp, 0 -+; CHECK-NEXT: bstrins.d $a0, $a3, 4, 2 -+; CHECK-NEXT: st.w $a2, $a0, 0 -+; CHECK-NEXT: xvld $xr0, $sp, 0 -+; CHECK-NEXT: xvst $xr0, $a1, 0 -+; CHECK-NEXT: addi.d $sp, $fp, -64 -+; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload -+; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload -+; CHECK-NEXT: addi.d $sp, $sp, 64 -+; CHECK-NEXT: ret -+ %v = load volatile <8 x i32>, ptr %src -+ %v_new = insertelement <8 x i32> %v, i32 %in, i32 %idx -+ store <8 x i32> %v_new, ptr %dst -+ ret void -+} -+ -+define void @insert_4xi64_idx(ptr %src, ptr %dst, i64 %in, i32 %idx) nounwind { -+; CHECK-LABEL: insert_4xi64_idx: -+; CHECK: # %bb.0: -+; CHECK-NEXT: addi.d $sp, $sp, -64 -+; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill -+; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill -+; CHECK-NEXT: addi.d $fp, $sp, 64 -+; CHECK-NEXT: srli.d $a4, $sp, 5 -+; CHECK-NEXT: slli.d $sp, $a4, 5 -+; CHECK-NEXT: xvld $xr0, $a0, 0 -+; CHECK-NEXT: xvst $xr0, $sp, 0 -+; CHECK-NEXT: addi.d $a0, $sp, 0 -+; CHECK-NEXT: bstrins.d $a0, $a3, 4, 3 -+; CHECK-NEXT: st.d $a2, $a0, 0 -+; CHECK-NEXT: xvld $xr0, $sp, 0 -+; CHECK-NEXT: xvst $xr0, $a1, 0 -+; CHECK-NEXT: addi.d $sp, $fp, -64 -+; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload -+; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload -+; CHECK-NEXT: addi.d $sp, $sp, 64 -+; CHECK-NEXT: ret -+ %v = load volatile <4 x i64>, ptr %src -+ %v_new = insertelement <4 x i64> %v, i64 %in, i32 %idx -+ store <4 x i64> %v_new, ptr %dst -+ ret void -+} -+ -+define void @insert_8xfloat_idx(ptr %src, ptr %dst, float %in, i32 %idx) nounwind { -+; CHECK-LABEL: insert_8xfloat_idx: -+; CHECK: # %bb.0: -+; CHECK-NEXT: addi.d $sp, $sp, -64 -+; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill -+; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill -+; CHECK-NEXT: addi.d $fp, $sp, 64 -+; CHECK-NEXT: srli.d $a3, $sp, 5 -+; CHECK-NEXT: slli.d $sp, $a3, 5 -+; CHECK-NEXT: xvld $xr1, $a0, 0 -+; CHECK-NEXT: xvst $xr1, $sp, 0 -+; CHECK-NEXT: addi.d $a0, $sp, 0 -+; CHECK-NEXT: bstrins.d $a0, $a2, 4, 2 -+; CHECK-NEXT: fst.s $fa0, $a0, 0 -+; CHECK-NEXT: xvld $xr0, $sp, 0 -+; CHECK-NEXT: xvst $xr0, $a1, 0 -+; CHECK-NEXT: addi.d $sp, $fp, -64 -+; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload -+; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload -+; CHECK-NEXT: addi.d $sp, $sp, 64 -+; CHECK-NEXT: ret -+ %v = load volatile <8 x float>, ptr %src -+ %v_new = insertelement <8 x float> %v, float %in, i32 %idx -+ store <8 x float> %v_new, ptr %dst -+ ret void -+} -+ -+define void @insert_4xdouble_idx(ptr %src, ptr %dst, double %in, i32 %idx) nounwind { -+; CHECK-LABEL: insert_4xdouble_idx: -+; CHECK: # %bb.0: -+; CHECK-NEXT: addi.d $sp, $sp, -64 -+; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill -+; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill -+; CHECK-NEXT: addi.d $fp, $sp, 64 -+; CHECK-NEXT: srli.d $a3, $sp, 5 -+; CHECK-NEXT: slli.d $sp, $a3, 5 -+; CHECK-NEXT: xvld $xr1, $a0, 0 -+; CHECK-NEXT: xvst $xr1, $sp, 0 -+; CHECK-NEXT: addi.d $a0, $sp, 0 -+; CHECK-NEXT: bstrins.d $a0, $a2, 4, 3 -+; CHECK-NEXT: fst.d $fa0, $a0, 0 -+; CHECK-NEXT: xvld $xr0, $sp, 0 -+; CHECK-NEXT: xvst $xr0, $a1, 0 -+; CHECK-NEXT: addi.d $sp, $fp, -64 -+; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload -+; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload -+; CHECK-NEXT: addi.d $sp, $sp, 64 -+; CHECK-NEXT: ret -+ %v = load volatile <4 x double>, ptr %src -+ %v_new = insertelement <4 x double> %v, double %in, i32 %idx -+ store <4 x double> %v_new, ptr %dst -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/insertelement.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/insertelement.ll -new file mode 100644 -index 000000000000..a9834591aa0e ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/insertelement.ll -@@ -0,0 +1,196 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+define void @insert_16xi8(ptr %src, ptr %dst, i8 %ins) nounwind { -+; CHECK-LABEL: insert_16xi8: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a0, 0 -+; CHECK-NEXT: vinsgr2vr.b $vr0, $a2, 1 -+; CHECK-NEXT: vst $vr0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load volatile <16 x i8>, ptr %src -+ %v_new = insertelement <16 x i8> %v, i8 %ins, i32 1 -+ store <16 x i8> %v_new, ptr %dst -+ ret void -+} -+ -+define void @insert_8xi16(ptr %src, ptr %dst, i16 %ins) nounwind { -+; CHECK-LABEL: insert_8xi16: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a0, 0 -+; CHECK-NEXT: vinsgr2vr.h $vr0, $a2, 1 -+; CHECK-NEXT: vst $vr0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load volatile <8 x i16>, ptr %src -+ %v_new = insertelement <8 x i16> %v, i16 %ins, i32 1 -+ store <8 x i16> %v_new, ptr %dst -+ ret void -+} -+ -+define void @insert_4xi32(ptr %src, ptr %dst, i32 %ins) nounwind { -+; CHECK-LABEL: insert_4xi32: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a0, 0 -+; CHECK-NEXT: vinsgr2vr.w $vr0, $a2, 1 -+; CHECK-NEXT: vst $vr0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load volatile <4 x i32>, ptr %src -+ %v_new = insertelement <4 x i32> %v, i32 %ins, i32 1 -+ store <4 x i32> %v_new, ptr %dst -+ ret void -+} -+ -+define void @insert_2xi64(ptr %src, ptr %dst, i64 %ins) nounwind { -+; CHECK-LABEL: insert_2xi64: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a0, 0 -+; CHECK-NEXT: vinsgr2vr.d $vr0, $a2, 1 -+; CHECK-NEXT: vst $vr0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load volatile <2 x i64>, ptr %src -+ %v_new = insertelement <2 x i64> %v, i64 %ins, i32 1 -+ store <2 x i64> %v_new, ptr %dst -+ ret void -+} -+ -+define void @insert_4xfloat(ptr %src, ptr %dst, float %ins) nounwind { -+; CHECK-LABEL: insert_4xfloat: -+; CHECK: # %bb.0: -+; CHECK-NEXT: movfr2gr.s $a2, $fa0 -+; CHECK-NEXT: vld $vr0, $a0, 0 -+; CHECK-NEXT: vinsgr2vr.w $vr0, $a2, 1 -+; CHECK-NEXT: vst $vr0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load volatile <4 x float>, ptr %src -+ %v_new = insertelement <4 x float> %v, float %ins, i32 1 -+ store <4 x float> %v_new, ptr %dst -+ ret void -+} -+ -+define void @insert_2xdouble(ptr %src, ptr %dst, double %ins) nounwind { -+; CHECK-LABEL: insert_2xdouble: -+; CHECK: # %bb.0: -+; CHECK-NEXT: movfr2gr.d $a2, $fa0 -+; CHECK-NEXT: vld $vr0, $a0, 0 -+; CHECK-NEXT: vinsgr2vr.d $vr0, $a2, 1 -+; CHECK-NEXT: vst $vr0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load volatile <2 x double>, ptr %src -+ %v_new = insertelement <2 x double> %v, double %ins, i32 1 -+ store <2 x double> %v_new, ptr %dst -+ ret void -+} -+ -+define void @insert_16xi8_idx(ptr %src, ptr %dst, i8 %ins, i32 %idx) nounwind { -+; CHECK-LABEL: insert_16xi8_idx: -+; CHECK: # %bb.0: -+; CHECK-NEXT: addi.d $sp, $sp, -16 -+; CHECK-NEXT: vld $vr0, $a0, 0 -+; CHECK-NEXT: vst $vr0, $sp, 0 -+; CHECK-NEXT: addi.d $a0, $sp, 0 -+; CHECK-NEXT: bstrins.d $a0, $a3, 3, 0 -+; CHECK-NEXT: st.b $a2, $a0, 0 -+; CHECK-NEXT: vld $vr0, $sp, 0 -+; CHECK-NEXT: vst $vr0, $a1, 0 -+; CHECK-NEXT: addi.d $sp, $sp, 16 -+; CHECK-NEXT: ret -+ %v = load volatile <16 x i8>, ptr %src -+ %v_new = insertelement <16 x i8> %v, i8 %ins, i32 %idx -+ store <16 x i8> %v_new, ptr %dst -+ ret void -+} -+ -+define void @insert_8xi16_idx(ptr %src, ptr %dst, i16 %ins, i32 %idx) nounwind { -+; CHECK-LABEL: insert_8xi16_idx: -+; CHECK: # %bb.0: -+; CHECK-NEXT: addi.d $sp, $sp, -16 -+; CHECK-NEXT: vld $vr0, $a0, 0 -+; CHECK-NEXT: vst $vr0, $sp, 0 -+; CHECK-NEXT: addi.d $a0, $sp, 0 -+; CHECK-NEXT: bstrins.d $a0, $a3, 3, 1 -+; CHECK-NEXT: st.h $a2, $a0, 0 -+; CHECK-NEXT: vld $vr0, $sp, 0 -+; CHECK-NEXT: vst $vr0, $a1, 0 -+; CHECK-NEXT: addi.d $sp, $sp, 16 -+; CHECK-NEXT: ret -+ %v = load volatile <8 x i16>, ptr %src -+ %v_new = insertelement <8 x i16> %v, i16 %ins, i32 %idx -+ store <8 x i16> %v_new, ptr %dst -+ ret void -+} -+ -+define void @insert_4xi32_idx(ptr %src, ptr %dst, i32 %ins, i32 %idx) nounwind { -+; CHECK-LABEL: insert_4xi32_idx: -+; CHECK: # %bb.0: -+; CHECK-NEXT: addi.d $sp, $sp, -16 -+; CHECK-NEXT: vld $vr0, $a0, 0 -+; CHECK-NEXT: vst $vr0, $sp, 0 -+; CHECK-NEXT: addi.d $a0, $sp, 0 -+; CHECK-NEXT: bstrins.d $a0, $a3, 3, 2 -+; CHECK-NEXT: st.w $a2, $a0, 0 -+; CHECK-NEXT: vld $vr0, $sp, 0 -+; CHECK-NEXT: vst $vr0, $a1, 0 -+; CHECK-NEXT: addi.d $sp, $sp, 16 -+; CHECK-NEXT: ret -+ %v = load volatile <4 x i32>, ptr %src -+ %v_new = insertelement <4 x i32> %v, i32 %ins, i32 %idx -+ store <4 x i32> %v_new, ptr %dst -+ ret void -+} -+ -+define void @insert_2xi64_idx(ptr %src, ptr %dst, i64 %ins, i32 %idx) nounwind { -+; CHECK-LABEL: insert_2xi64_idx: -+; CHECK: # %bb.0: -+; CHECK-NEXT: addi.d $sp, $sp, -16 -+; CHECK-NEXT: vld $vr0, $a0, 0 -+; CHECK-NEXT: vst $vr0, $sp, 0 -+; CHECK-NEXT: addi.d $a0, $sp, 0 -+; CHECK-NEXT: bstrins.d $a0, $a3, 3, 3 -+; CHECK-NEXT: st.d $a2, $a0, 0 -+; CHECK-NEXT: vld $vr0, $sp, 0 -+; CHECK-NEXT: vst $vr0, $a1, 0 -+; CHECK-NEXT: addi.d $sp, $sp, 16 -+; CHECK-NEXT: ret -+ %v = load volatile <2 x i64>, ptr %src -+ %v_new = insertelement <2 x i64> %v, i64 %ins, i32 %idx -+ store <2 x i64> %v_new, ptr %dst -+ ret void -+} -+ -+define void @insert_4xfloat_idx(ptr %src, ptr %dst, float %ins, i32 %idx) nounwind { -+; CHECK-LABEL: insert_4xfloat_idx: -+; CHECK: # %bb.0: -+; CHECK-NEXT: addi.d $sp, $sp, -16 -+; CHECK-NEXT: vld $vr1, $a0, 0 -+; CHECK-NEXT: vst $vr1, $sp, 0 -+; CHECK-NEXT: addi.d $a0, $sp, 0 -+; CHECK-NEXT: bstrins.d $a0, $a2, 3, 2 -+; CHECK-NEXT: fst.s $fa0, $a0, 0 -+; CHECK-NEXT: vld $vr0, $sp, 0 -+; CHECK-NEXT: vst $vr0, $a1, 0 -+; CHECK-NEXT: addi.d $sp, $sp, 16 -+; CHECK-NEXT: ret -+ %v = load volatile <4 x float>, ptr %src -+ %v_new = insertelement <4 x float> %v, float %ins, i32 %idx -+ store <4 x float> %v_new, ptr %dst -+ ret void -+} -+ -+define void @insert_2xdouble_idx(ptr %src, ptr %dst, double %ins, i32 %idx) nounwind { -+; CHECK-LABEL: insert_2xdouble_idx: -+; CHECK: # %bb.0: -+; CHECK-NEXT: addi.d $sp, $sp, -16 -+; CHECK-NEXT: vld $vr1, $a0, 0 -+; CHECK-NEXT: vst $vr1, $sp, 0 -+; CHECK-NEXT: addi.d $a0, $sp, 0 -+; CHECK-NEXT: bstrins.d $a0, $a2, 3, 3 -+; CHECK-NEXT: fst.d $fa0, $a0, 0 -+; CHECK-NEXT: vld $vr0, $sp, 0 -+; CHECK-NEXT: vst $vr0, $a1, 0 -+; CHECK-NEXT: addi.d $sp, $sp, 16 -+; CHECK-NEXT: ret -+ %v = load volatile <2 x double>, ptr %src -+ %v_new = insertelement <2 x double> %v, double %ins, i32 %idx -+ store <2 x double> %v_new, ptr %dst -+ ret void -+} --- -2.20.1 - - -From 8c15dfb5e6c74537f5748936702e4d077d09815d Mon Sep 17 00:00:00 2001 -From: wanglei -Date: Wed, 25 Oct 2023 17:00:32 +0800 -Subject: [PATCH 16/35] [LoongArch] Custom lowering `ISD::BUILD_VECTOR` - -(cherry picked from commit add224c0a094d20389d3659f7b6e496df461a976) ---- - .../LoongArch/LoongArchISelDAGToDAG.cpp | 52 +- - .../LoongArch/LoongArchISelLowering.cpp | 102 +++- - .../Target/LoongArch/LoongArchISelLowering.h | 1 + - .../LoongArch/LoongArchLASXInstrInfo.td | 13 + - .../Target/LoongArch/LoongArchLSXInstrInfo.td | 12 +- - .../CodeGen/LoongArch/lasx/build-vector.ll | 551 ++++++++++++++++++ - .../CodeGen/LoongArch/lsx/build-vector.ll | 376 ++++++++++++ - .../LoongArch/lsx/ir-instruction/mul.ll | 28 +- - 8 files changed, 1112 insertions(+), 23 deletions(-) - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/build-vector.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/build-vector.ll - -diff --git a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp -index f55184019988..01b2f720f902 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp -+++ b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp -@@ -77,13 +77,63 @@ void LoongArchDAGToDAGISel::Select(SDNode *Node) { - return; - } - case ISD::BITCAST: { -- if (VT.is128BitVector() || VT.is512BitVector()) { -+ if (VT.is128BitVector() || VT.is256BitVector()) { - ReplaceUses(SDValue(Node, 0), Node->getOperand(0)); - CurDAG->RemoveDeadNode(Node); - return; - } - break; - } -+ case ISD::BUILD_VECTOR: { -+ // Select appropriate [x]vrepli.[bhwd] instructions for constant splats of -+ // 128/256-bit when LSX/LASX is enabled. -+ BuildVectorSDNode *BVN = cast(Node); -+ APInt SplatValue, SplatUndef; -+ unsigned SplatBitSize; -+ bool HasAnyUndefs; -+ unsigned Op; -+ EVT ViaVecTy; -+ bool Is128Vec = BVN->getValueType(0).is128BitVector(); -+ bool Is256Vec = BVN->getValueType(0).is256BitVector(); -+ -+ if (!Subtarget->hasExtLSX() || (!Is128Vec && !Is256Vec)) -+ break; -+ if (!BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, -+ HasAnyUndefs, 8)) -+ break; -+ -+ switch (SplatBitSize) { -+ default: -+ break; -+ case 8: -+ Op = Is256Vec ? LoongArch::PseudoXVREPLI_B : LoongArch::PseudoVREPLI_B; -+ ViaVecTy = Is256Vec ? MVT::v32i8 : MVT::v16i8; -+ break; -+ case 16: -+ Op = Is256Vec ? LoongArch::PseudoXVREPLI_H : LoongArch::PseudoVREPLI_H; -+ ViaVecTy = Is256Vec ? MVT::v16i16 : MVT::v8i16; -+ break; -+ case 32: -+ Op = Is256Vec ? LoongArch::PseudoXVREPLI_W : LoongArch::PseudoVREPLI_W; -+ ViaVecTy = Is256Vec ? MVT::v8i32 : MVT::v4i32; -+ break; -+ case 64: -+ Op = Is256Vec ? LoongArch::PseudoXVREPLI_D : LoongArch::PseudoVREPLI_D; -+ ViaVecTy = Is256Vec ? MVT::v4i64 : MVT::v2i64; -+ break; -+ } -+ -+ SDNode *Res; -+ // If we have a signed 10 bit integer, we can splat it directly. -+ if (SplatValue.isSignedIntN(10)) { -+ SDValue Imm = CurDAG->getTargetConstant(SplatValue, DL, -+ ViaVecTy.getVectorElementType()); -+ Res = CurDAG->getMachineNode(Op, DL, ViaVecTy, Imm); -+ ReplaceNode(Node, Res); -+ return; -+ } -+ break; -+ } - } - - // Select the default instruction. -diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -index 492339ce2151..1b60bfc3bddb 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -@@ -234,11 +234,9 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, - setOperationAction(ISD::BITCAST, VT, Legal); - setOperationAction(ISD::UNDEF, VT, Legal); - -- // FIXME: For BUILD_VECTOR, it is temporarily set to `Legal` here, and it -- // will be `Custom` handled in the future. -- setOperationAction(ISD::BUILD_VECTOR, VT, Legal); - setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); - setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal); -+ setOperationAction(ISD::BUILD_VECTOR, VT, Custom); - } - for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) { - setOperationAction({ISD::ADD, ISD::SUB}, VT, Legal); -@@ -265,10 +263,9 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, - setOperationAction(ISD::BITCAST, VT, Legal); - setOperationAction(ISD::UNDEF, VT, Legal); - -- // FIXME: Same as above. -- setOperationAction(ISD::BUILD_VECTOR, VT, Legal); - setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); - setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal); -+ setOperationAction(ISD::BUILD_VECTOR, VT, Custom); - } - for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) { - setOperationAction({ISD::ADD, ISD::SUB}, VT, Legal); -@@ -371,10 +368,105 @@ SDValue LoongArchTargetLowering::LowerOperation(SDValue Op, - return lowerWRITE_REGISTER(Op, DAG); - case ISD::INSERT_VECTOR_ELT: - return lowerINSERT_VECTOR_ELT(Op, DAG); -+ case ISD::BUILD_VECTOR: -+ return lowerBUILD_VECTOR(Op, DAG); - } - return SDValue(); - } - -+static bool isConstantOrUndef(const SDValue Op) { -+ if (Op->isUndef()) -+ return true; -+ if (isa(Op)) -+ return true; -+ if (isa(Op)) -+ return true; -+ return false; -+} -+ -+static bool isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode *Op) { -+ for (unsigned i = 0; i < Op->getNumOperands(); ++i) -+ if (isConstantOrUndef(Op->getOperand(i))) -+ return true; -+ return false; -+} -+ -+SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op, -+ SelectionDAG &DAG) const { -+ BuildVectorSDNode *Node = cast(Op); -+ EVT ResTy = Op->getValueType(0); -+ SDLoc DL(Op); -+ APInt SplatValue, SplatUndef; -+ unsigned SplatBitSize; -+ bool HasAnyUndefs; -+ bool Is128Vec = ResTy.is128BitVector(); -+ bool Is256Vec = ResTy.is256BitVector(); -+ -+ if ((!Subtarget.hasExtLSX() || !Is128Vec) && -+ (!Subtarget.hasExtLASX() || !Is256Vec)) -+ return SDValue(); -+ -+ if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs, -+ /*MinSplatBits=*/8) && -+ SplatBitSize <= 64) { -+ // We can only cope with 8, 16, 32, or 64-bit elements. -+ if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 && -+ SplatBitSize != 64) -+ return SDValue(); -+ -+ EVT ViaVecTy; -+ -+ switch (SplatBitSize) { -+ default: -+ return SDValue(); -+ case 8: -+ ViaVecTy = Is128Vec ? MVT::v16i8 : MVT::v32i8; -+ break; -+ case 16: -+ ViaVecTy = Is128Vec ? MVT::v8i16 : MVT::v16i16; -+ break; -+ case 32: -+ ViaVecTy = Is128Vec ? MVT::v4i32 : MVT::v8i32; -+ break; -+ case 64: -+ ViaVecTy = Is128Vec ? MVT::v2i64 : MVT::v4i64; -+ break; -+ } -+ -+ // SelectionDAG::getConstant will promote SplatValue appropriately. -+ SDValue Result = DAG.getConstant(SplatValue, DL, ViaVecTy); -+ -+ // Bitcast to the type we originally wanted. -+ if (ViaVecTy != ResTy) -+ Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result); -+ -+ return Result; -+ } -+ -+ if (DAG.isSplatValue(Op, /*AllowUndefs=*/false)) -+ return Op; -+ -+ if (!isConstantOrUndefBUILD_VECTOR(Node)) { -+ // Use INSERT_VECTOR_ELT operations rather than expand to stores. -+ // The resulting code is the same length as the expansion, but it doesn't -+ // use memory operations. -+ EVT ResTy = Node->getValueType(0); -+ -+ assert(ResTy.isVector()); -+ -+ unsigned NumElts = ResTy.getVectorNumElements(); -+ SDValue Vector = DAG.getUNDEF(ResTy); -+ for (unsigned i = 0; i < NumElts; ++i) { -+ Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector, -+ Node->getOperand(i), -+ DAG.getConstant(i, DL, Subtarget.getGRLenVT())); -+ } -+ return Vector; -+ } -+ -+ return SDValue(); -+} -+ - SDValue - LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, - SelectionDAG &DAG) const { -diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h -index 29028ff963d0..111376306374 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h -+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h -@@ -276,6 +276,7 @@ private: - SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; - SDValue lowerWRITE_REGISTER(SDValue Op, SelectionDAG &DAG) const; - SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; -+ SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; - - bool isFPImmLegal(const APFloat &Imm, EVT VT, - bool ForCodeSize) const override; -diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -index 475565db15c9..4487152fb42b 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -@@ -33,6 +33,13 @@ def lasxsplati32 - def lasxsplati64 - : PatFrag<(ops node:$e0), - (v4i64 (build_vector node:$e0, node:$e0, node:$e0, node:$e0))>; -+def lasxsplatf32 -+ : PatFrag<(ops node:$e0), -+ (v8f32 (build_vector node:$e0, node:$e0, node:$e0, node:$e0, -+ node:$e0, node:$e0, node:$e0, node:$e0))>; -+def lasxsplatf64 -+ : PatFrag<(ops node:$e0), -+ (v4f64 (build_vector node:$e0, node:$e0, node:$e0, node:$e0))>; - - //===----------------------------------------------------------------------===// - // Instruction class templates -@@ -1411,6 +1418,12 @@ def : Pat<(loongarch_vreplve v8i32:$xj, GRLenVT:$rk), - def : Pat<(loongarch_vreplve v4i64:$xj, GRLenVT:$rk), - (XVREPLVE_D v4i64:$xj, GRLenVT:$rk)>; - -+// XVREPL128VEI_{W/D} -+def : Pat<(lasxsplatf32 FPR32:$fj), -+ (XVREPL128VEI_W (SUBREG_TO_REG (i64 0), FPR32:$fj, sub_32), 0)>; -+def : Pat<(lasxsplatf64 FPR64:$fj), -+ (XVREPL128VEI_D (SUBREG_TO_REG (i64 0), FPR64:$fj, sub_64), 0)>; -+ - // Loads/Stores - foreach vt = [v32i8, v16i16, v8i32, v4i64, v8f32, v4f64] in { - defm : LdPat; -diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -index d8fd132a1c59..deac5015882d 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -+++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -@@ -141,9 +141,13 @@ def lsxsplati16 : PatFrag<(ops node:$e0), - def lsxsplati32 : PatFrag<(ops node:$e0), - (v4i32 (build_vector node:$e0, node:$e0, - node:$e0, node:$e0))>; -- - def lsxsplati64 : PatFrag<(ops node:$e0), - (v2i64 (build_vector node:$e0, node:$e0))>; -+def lsxsplatf32 : PatFrag<(ops node:$e0), -+ (v4f32 (build_vector node:$e0, node:$e0, -+ node:$e0, node:$e0))>; -+def lsxsplatf64 : PatFrag<(ops node:$e0), -+ (v2f64 (build_vector node:$e0, node:$e0))>; - - def to_valid_timm : SDNodeXForm(N); -@@ -1498,6 +1502,12 @@ def : Pat<(loongarch_vreplve v4i32:$vj, GRLenVT:$rk), - def : Pat<(loongarch_vreplve v2i64:$vj, GRLenVT:$rk), - (VREPLVE_D v2i64:$vj, GRLenVT:$rk)>; - -+// VREPLVEI_{W/D} -+def : Pat<(lsxsplatf32 FPR32:$fj), -+ (VREPLVEI_W (SUBREG_TO_REG (i64 0), FPR32:$fj, sub_32), 0)>; -+def : Pat<(lsxsplatf64 FPR64:$fj), -+ (VREPLVEI_D (SUBREG_TO_REG (i64 0), FPR64:$fj, sub_64), 0)>; -+ - // Loads/Stores - foreach vt = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in { - defm : LdPat; -diff --git a/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll b/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll -new file mode 100644 -index 000000000000..6824ab5cda8d ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll -@@ -0,0 +1,551 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+define void @buildvector_v32i8_splat(ptr %dst, i8 %a0) nounwind { -+; CHECK-LABEL: buildvector_v32i8_splat: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvreplgr2vr.b $xr0, $a1 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %insert = insertelement <32 x i8> undef, i8 %a0, i8 0 -+ %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -+ store <32 x i8> %splat, ptr %dst -+ ret void -+} -+ -+define void @buildvector_v16i16_splat(ptr %dst, i16 %a0) nounwind { -+; CHECK-LABEL: buildvector_v16i16_splat: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvreplgr2vr.h $xr0, $a1 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %insert = insertelement <16 x i16> undef, i16 %a0, i8 0 -+ %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer -+ store <16 x i16> %splat, ptr %dst -+ ret void -+} -+ -+define void @buildvector_v8i32_splat(ptr %dst, i32 %a0) nounwind { -+; CHECK-LABEL: buildvector_v8i32_splat: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvreplgr2vr.w $xr0, $a1 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %insert = insertelement <8 x i32> undef, i32 %a0, i8 0 -+ %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -+ store <8 x i32> %splat, ptr %dst -+ ret void -+} -+ -+define void @buildvector_v4i64_splat(ptr %dst, i64 %a0) nounwind { -+; CHECK-LABEL: buildvector_v4i64_splat: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvreplgr2vr.d $xr0, $a1 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %insert = insertelement <4 x i64> undef, i64 %a0, i8 0 -+ %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer -+ store <4 x i64> %splat, ptr %dst -+ ret void -+} -+ -+define void @buildvector_v8f32_splat(ptr %dst, float %a0) nounwind { -+; CHECK-LABEL: buildvector_v8f32_splat: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: # kill: def $f0 killed $f0 def $xr0 -+; CHECK-NEXT: xvrepl128vei.w $xr0, $xr0, 0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %insert = insertelement <8 x float> undef, float %a0, i8 0 -+ %splat = shufflevector <8 x float> %insert, <8 x float> undef, <8 x i32> zeroinitializer -+ store <8 x float> %splat, ptr %dst -+ ret void -+} -+ -+define void @buildvector_v4f64_splat(ptr %dst, double %a0) nounwind { -+; CHECK-LABEL: buildvector_v4f64_splat: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0 -+; CHECK-NEXT: xvrepl128vei.d $xr0, $xr0, 0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %insert = insertelement <4 x double> undef, double %a0, i8 0 -+ %splat = shufflevector <4 x double> %insert, <4 x double> undef, <4 x i32> zeroinitializer -+ store <4 x double> %splat, ptr %dst -+ ret void -+} -+ -+define void @buildvector_v32i8_const_splat(ptr %dst) nounwind { -+; CHECK-LABEL: buildvector_v32i8_const_splat: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvrepli.b $xr0, 1 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ store <32 x i8> , ptr %dst -+ ret void -+} -+ -+define void @buildvector_v16i16_const_splat(ptr %dst) nounwind { -+; CHECK-LABEL: buildvector_v16i16_const_splat: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvrepli.h $xr0, 1 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ store <16 x i16> , ptr %dst -+ ret void -+} -+ -+define void @buildvector_v8i32_const_splat(ptr %dst) nounwind { -+; CHECK-LABEL: buildvector_v8i32_const_splat: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvrepli.w $xr0, 1 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ store <8 x i32> , ptr %dst -+ ret void -+} -+ -+define void @buildvector_v4i64_const_splat(ptr %dst) nounwind { -+; CHECK-LABEL: buildvector_v4i64_const_splat: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvrepli.d $xr0, 1 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ store <4 x i64> , ptr %dst -+ ret void -+} -+ -+define void @buildvector_v2f32_const_splat(ptr %dst) nounwind { -+; CHECK-LABEL: buildvector_v2f32_const_splat: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: lu12i.w $a1, 260096 -+; CHECK-NEXT: xvreplgr2vr.w $xr0, $a1 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ store <8 x float> , ptr %dst -+ ret void -+} -+ -+define void @buildvector_v4f64_const_splat(ptr %dst) nounwind { -+; CHECK-LABEL: buildvector_v4f64_const_splat: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: lu52i.d $a1, $zero, 1023 -+; CHECK-NEXT: xvreplgr2vr.d $xr0, $a1 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ store <4 x double> , ptr %dst -+ ret void -+} -+ -+define void @buildvector_v32i8_const(ptr %dst) nounwind { -+; CHECK-LABEL: buildvector_v32i8_const: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI12_0) -+; CHECK-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI12_0) -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ store <32 x i8> , ptr %dst -+ ret void -+} -+ -+define void @buildvector_v16i16_const(ptr %dst) nounwind { -+; CHECK-LABEL: buildvector_v16i16_const: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI13_0) -+; CHECK-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI13_0) -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ store <16 x i16> , ptr %dst -+ ret void -+} -+ -+define void @buildvector_v8i32_const(ptr %dst) nounwind { -+; CHECK-LABEL: buildvector_v8i32_const: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI14_0) -+; CHECK-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI14_0) -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ store <8 x i32> , ptr %dst -+ ret void -+} -+ -+define void @buildvector_v4i64_const(ptr %dst) nounwind { -+; CHECK-LABEL: buildvector_v4i64_const: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI15_0) -+; CHECK-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI15_0) -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ store <4 x i64> , ptr %dst -+ ret void -+} -+ -+define void @buildvector_v2f32_const(ptr %dst) nounwind { -+; CHECK-LABEL: buildvector_v2f32_const: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI16_0) -+; CHECK-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI16_0) -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ store <8 x float> , ptr %dst -+ ret void -+} -+ -+define void @buildvector_v4f64_const(ptr %dst) nounwind { -+; CHECK-LABEL: buildvector_v4f64_const: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI17_0) -+; CHECK-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI17_0) -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ store <4 x double> , ptr %dst -+ ret void -+} -+ -+define void @buildvector_v32i8(ptr %dst, i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7, i8 %a8, i8 %a9, i8 %a10, i8 %a11, i8 %a12, i8 %a13, i8 %a14, i8 %a15, i8 %a16, i8 %a17, i8 %a18, i8 %a19, i8 %a20, i8 %a21, i8 %a22, i8 %a23, i8 %a24, i8 %a25, i8 %a26, i8 %a27, i8 %a28, i8 %a29, i8 %a30, i8 %a31) nounwind { -+; CHECK-LABEL: buildvector_v32i8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 0 -+; CHECK-NEXT: vinsgr2vr.b $vr0, $a2, 1 -+; CHECK-NEXT: vinsgr2vr.b $vr0, $a3, 2 -+; CHECK-NEXT: vinsgr2vr.b $vr0, $a4, 3 -+; CHECK-NEXT: vinsgr2vr.b $vr0, $a5, 4 -+; CHECK-NEXT: vinsgr2vr.b $vr0, $a6, 5 -+; CHECK-NEXT: vinsgr2vr.b $vr0, $a7, 6 -+; CHECK-NEXT: ld.b $a1, $sp, 0 -+; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 7 -+; CHECK-NEXT: ld.b $a1, $sp, 8 -+; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 8 -+; CHECK-NEXT: ld.b $a1, $sp, 16 -+; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 9 -+; CHECK-NEXT: ld.b $a1, $sp, 24 -+; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 10 -+; CHECK-NEXT: ld.b $a1, $sp, 32 -+; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 11 -+; CHECK-NEXT: ld.b $a1, $sp, 40 -+; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 12 -+; CHECK-NEXT: ld.b $a1, $sp, 48 -+; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 13 -+; CHECK-NEXT: ld.b $a1, $sp, 56 -+; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 14 -+; CHECK-NEXT: ld.b $a1, $sp, 64 -+; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 15 -+; CHECK-NEXT: ld.b $a1, $sp, 72 -+; CHECK-NEXT: xvori.b $xr1, $xr0, 0 -+; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 -+; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 0 -+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 -+; CHECK-NEXT: ld.b $a1, $sp, 80 -+; CHECK-NEXT: xvori.b $xr1, $xr0, 0 -+; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 -+; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 1 -+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 -+; CHECK-NEXT: ld.b $a1, $sp, 88 -+; CHECK-NEXT: xvori.b $xr1, $xr0, 0 -+; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 -+; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 2 -+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 -+; CHECK-NEXT: ld.b $a1, $sp, 96 -+; CHECK-NEXT: xvori.b $xr1, $xr0, 0 -+; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 -+; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 3 -+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 -+; CHECK-NEXT: ld.b $a1, $sp, 104 -+; CHECK-NEXT: xvori.b $xr1, $xr0, 0 -+; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 -+; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 4 -+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 -+; CHECK-NEXT: ld.b $a1, $sp, 112 -+; CHECK-NEXT: xvori.b $xr1, $xr0, 0 -+; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 -+; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 5 -+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 -+; CHECK-NEXT: ld.b $a1, $sp, 120 -+; CHECK-NEXT: xvori.b $xr1, $xr0, 0 -+; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 -+; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 6 -+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 -+; CHECK-NEXT: ld.b $a1, $sp, 128 -+; CHECK-NEXT: xvori.b $xr1, $xr0, 0 -+; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 -+; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 7 -+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 -+; CHECK-NEXT: ld.b $a1, $sp, 136 -+; CHECK-NEXT: xvori.b $xr1, $xr0, 0 -+; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 -+; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 8 -+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 -+; CHECK-NEXT: ld.b $a1, $sp, 144 -+; CHECK-NEXT: xvori.b $xr1, $xr0, 0 -+; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 -+; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 9 -+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 -+; CHECK-NEXT: ld.b $a1, $sp, 152 -+; CHECK-NEXT: xvori.b $xr1, $xr0, 0 -+; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 -+; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 10 -+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 -+; CHECK-NEXT: ld.b $a1, $sp, 160 -+; CHECK-NEXT: xvori.b $xr1, $xr0, 0 -+; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 -+; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 11 -+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 -+; CHECK-NEXT: ld.b $a1, $sp, 168 -+; CHECK-NEXT: xvori.b $xr1, $xr0, 0 -+; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 -+; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 12 -+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 -+; CHECK-NEXT: ld.b $a1, $sp, 176 -+; CHECK-NEXT: xvori.b $xr1, $xr0, 0 -+; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 -+; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 13 -+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 -+; CHECK-NEXT: ld.b $a1, $sp, 184 -+; CHECK-NEXT: xvori.b $xr1, $xr0, 0 -+; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 -+; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 14 -+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 -+; CHECK-NEXT: ld.b $a1, $sp, 192 -+; CHECK-NEXT: xvori.b $xr1, $xr0, 0 -+; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 -+; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 15 -+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %ins0 = insertelement <32 x i8> undef, i8 %a0, i32 0 -+ %ins1 = insertelement <32 x i8> %ins0, i8 %a1, i32 1 -+ %ins2 = insertelement <32 x i8> %ins1, i8 %a2, i32 2 -+ %ins3 = insertelement <32 x i8> %ins2, i8 %a3, i32 3 -+ %ins4 = insertelement <32 x i8> %ins3, i8 %a4, i32 4 -+ %ins5 = insertelement <32 x i8> %ins4, i8 %a5, i32 5 -+ %ins6 = insertelement <32 x i8> %ins5, i8 %a6, i32 6 -+ %ins7 = insertelement <32 x i8> %ins6, i8 %a7, i32 7 -+ %ins8 = insertelement <32 x i8> %ins7, i8 %a8, i32 8 -+ %ins9 = insertelement <32 x i8> %ins8, i8 %a9, i32 9 -+ %ins10 = insertelement <32 x i8> %ins9, i8 %a10, i32 10 -+ %ins11 = insertelement <32 x i8> %ins10, i8 %a11, i32 11 -+ %ins12 = insertelement <32 x i8> %ins11, i8 %a12, i32 12 -+ %ins13 = insertelement <32 x i8> %ins12, i8 %a13, i32 13 -+ %ins14 = insertelement <32 x i8> %ins13, i8 %a14, i32 14 -+ %ins15 = insertelement <32 x i8> %ins14, i8 %a15, i32 15 -+ %ins16 = insertelement <32 x i8> %ins15, i8 %a16, i32 16 -+ %ins17 = insertelement <32 x i8> %ins16, i8 %a17, i32 17 -+ %ins18 = insertelement <32 x i8> %ins17, i8 %a18, i32 18 -+ %ins19 = insertelement <32 x i8> %ins18, i8 %a19, i32 19 -+ %ins20 = insertelement <32 x i8> %ins19, i8 %a20, i32 20 -+ %ins21 = insertelement <32 x i8> %ins20, i8 %a21, i32 21 -+ %ins22 = insertelement <32 x i8> %ins21, i8 %a22, i32 22 -+ %ins23 = insertelement <32 x i8> %ins22, i8 %a23, i32 23 -+ %ins24 = insertelement <32 x i8> %ins23, i8 %a24, i32 24 -+ %ins25 = insertelement <32 x i8> %ins24, i8 %a25, i32 25 -+ %ins26 = insertelement <32 x i8> %ins25, i8 %a26, i32 26 -+ %ins27 = insertelement <32 x i8> %ins26, i8 %a27, i32 27 -+ %ins28 = insertelement <32 x i8> %ins27, i8 %a28, i32 28 -+ %ins29 = insertelement <32 x i8> %ins28, i8 %a29, i32 29 -+ %ins30 = insertelement <32 x i8> %ins29, i8 %a30, i32 30 -+ %ins31 = insertelement <32 x i8> %ins30, i8 %a31, i32 31 -+ store <32 x i8> %ins31, ptr %dst -+ ret void -+} -+ -+define void @buildvector_v16i16(ptr %dst, i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, i16 %a5, i16 %a6, i16 %a7, i16 %a8, i16 %a9, i16 %a10, i16 %a11, i16 %a12, i16 %a13, i16 %a14, i16 %a15) nounwind { -+; CHECK-LABEL: buildvector_v16i16: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 0 -+; CHECK-NEXT: vinsgr2vr.h $vr0, $a2, 1 -+; CHECK-NEXT: vinsgr2vr.h $vr0, $a3, 2 -+; CHECK-NEXT: vinsgr2vr.h $vr0, $a4, 3 -+; CHECK-NEXT: vinsgr2vr.h $vr0, $a5, 4 -+; CHECK-NEXT: vinsgr2vr.h $vr0, $a6, 5 -+; CHECK-NEXT: vinsgr2vr.h $vr0, $a7, 6 -+; CHECK-NEXT: ld.h $a1, $sp, 0 -+; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 7 -+; CHECK-NEXT: ld.h $a1, $sp, 8 -+; CHECK-NEXT: xvori.b $xr1, $xr0, 0 -+; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 -+; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 0 -+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 -+; CHECK-NEXT: ld.h $a1, $sp, 16 -+; CHECK-NEXT: xvori.b $xr1, $xr0, 0 -+; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 -+; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 1 -+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 -+; CHECK-NEXT: ld.h $a1, $sp, 24 -+; CHECK-NEXT: xvori.b $xr1, $xr0, 0 -+; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 -+; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 2 -+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 -+; CHECK-NEXT: ld.h $a1, $sp, 32 -+; CHECK-NEXT: xvori.b $xr1, $xr0, 0 -+; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 -+; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 3 -+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 -+; CHECK-NEXT: ld.h $a1, $sp, 40 -+; CHECK-NEXT: xvori.b $xr1, $xr0, 0 -+; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 -+; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 4 -+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 -+; CHECK-NEXT: ld.h $a1, $sp, 48 -+; CHECK-NEXT: xvori.b $xr1, $xr0, 0 -+; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 -+; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 5 -+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 -+; CHECK-NEXT: ld.h $a1, $sp, 56 -+; CHECK-NEXT: xvori.b $xr1, $xr0, 0 -+; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 -+; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 6 -+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 -+; CHECK-NEXT: ld.h $a1, $sp, 64 -+; CHECK-NEXT: xvori.b $xr1, $xr0, 0 -+; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 -+; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 7 -+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %ins0 = insertelement <16 x i16> undef, i16 %a0, i32 0 -+ %ins1 = insertelement <16 x i16> %ins0, i16 %a1, i32 1 -+ %ins2 = insertelement <16 x i16> %ins1, i16 %a2, i32 2 -+ %ins3 = insertelement <16 x i16> %ins2, i16 %a3, i32 3 -+ %ins4 = insertelement <16 x i16> %ins3, i16 %a4, i32 4 -+ %ins5 = insertelement <16 x i16> %ins4, i16 %a5, i32 5 -+ %ins6 = insertelement <16 x i16> %ins5, i16 %a6, i32 6 -+ %ins7 = insertelement <16 x i16> %ins6, i16 %a7, i32 7 -+ %ins8 = insertelement <16 x i16> %ins7, i16 %a8, i32 8 -+ %ins9 = insertelement <16 x i16> %ins8, i16 %a9, i32 9 -+ %ins10 = insertelement <16 x i16> %ins9, i16 %a10, i32 10 -+ %ins11 = insertelement <16 x i16> %ins10, i16 %a11, i32 11 -+ %ins12 = insertelement <16 x i16> %ins11, i16 %a12, i32 12 -+ %ins13 = insertelement <16 x i16> %ins12, i16 %a13, i32 13 -+ %ins14 = insertelement <16 x i16> %ins13, i16 %a14, i32 14 -+ %ins15 = insertelement <16 x i16> %ins14, i16 %a15, i32 15 -+ store <16 x i16> %ins15, ptr %dst -+ ret void -+} -+ -+define void @buildvector_v8i32(ptr %dst, i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7) nounwind { -+; CHECK-LABEL: buildvector_v8i32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvinsgr2vr.w $xr0, $a1, 0 -+; CHECK-NEXT: xvinsgr2vr.w $xr0, $a2, 1 -+; CHECK-NEXT: xvinsgr2vr.w $xr0, $a3, 2 -+; CHECK-NEXT: xvinsgr2vr.w $xr0, $a4, 3 -+; CHECK-NEXT: xvinsgr2vr.w $xr0, $a5, 4 -+; CHECK-NEXT: xvinsgr2vr.w $xr0, $a6, 5 -+; CHECK-NEXT: xvinsgr2vr.w $xr0, $a7, 6 -+; CHECK-NEXT: ld.w $a1, $sp, 0 -+; CHECK-NEXT: xvinsgr2vr.w $xr0, $a1, 7 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %ins0 = insertelement <8 x i32> undef, i32 %a0, i32 0 -+ %ins1 = insertelement <8 x i32> %ins0, i32 %a1, i32 1 -+ %ins2 = insertelement <8 x i32> %ins1, i32 %a2, i32 2 -+ %ins3 = insertelement <8 x i32> %ins2, i32 %a3, i32 3 -+ %ins4 = insertelement <8 x i32> %ins3, i32 %a4, i32 4 -+ %ins5 = insertelement <8 x i32> %ins4, i32 %a5, i32 5 -+ %ins6 = insertelement <8 x i32> %ins5, i32 %a6, i32 6 -+ %ins7 = insertelement <8 x i32> %ins6, i32 %a7, i32 7 -+ store <8 x i32> %ins7, ptr %dst -+ ret void -+} -+ -+define void @buildvector_v4i64(ptr %dst, i64 %a0, i64 %a1, i64 %a2, i64 %a3) nounwind { -+; CHECK-LABEL: buildvector_v4i64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvinsgr2vr.d $xr0, $a1, 0 -+; CHECK-NEXT: xvinsgr2vr.d $xr0, $a2, 1 -+; CHECK-NEXT: xvinsgr2vr.d $xr0, $a3, 2 -+; CHECK-NEXT: xvinsgr2vr.d $xr0, $a4, 3 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %ins0 = insertelement <4 x i64> undef, i64 %a0, i32 0 -+ %ins1 = insertelement <4 x i64> %ins0, i64 %a1, i32 1 -+ %ins2 = insertelement <4 x i64> %ins1, i64 %a2, i32 2 -+ %ins3 = insertelement <4 x i64> %ins2, i64 %a3, i32 3 -+ store <4 x i64> %ins3, ptr %dst -+ ret void -+} -+ -+define void @buildvector_v8f32(ptr %dst, float %a0, float %a1, float %a2, float %a3, float %a4, float %a5, float %a6, float %a7) nounwind { -+; CHECK-LABEL: buildvector_v8f32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: movfr2gr.s $a1, $fa0 -+; CHECK-NEXT: xvinsgr2vr.w $xr0, $a1, 0 -+; CHECK-NEXT: movfr2gr.s $a1, $fa1 -+; CHECK-NEXT: xvinsgr2vr.w $xr0, $a1, 1 -+; CHECK-NEXT: movfr2gr.s $a1, $fa2 -+; CHECK-NEXT: xvinsgr2vr.w $xr0, $a1, 2 -+; CHECK-NEXT: movfr2gr.s $a1, $fa3 -+; CHECK-NEXT: xvinsgr2vr.w $xr0, $a1, 3 -+; CHECK-NEXT: movfr2gr.s $a1, $fa4 -+; CHECK-NEXT: xvinsgr2vr.w $xr0, $a1, 4 -+; CHECK-NEXT: movfr2gr.s $a1, $fa5 -+; CHECK-NEXT: xvinsgr2vr.w $xr0, $a1, 5 -+; CHECK-NEXT: movfr2gr.s $a1, $fa6 -+; CHECK-NEXT: xvinsgr2vr.w $xr0, $a1, 6 -+; CHECK-NEXT: movfr2gr.s $a1, $fa7 -+; CHECK-NEXT: xvinsgr2vr.w $xr0, $a1, 7 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %ins0 = insertelement <8 x float> undef, float %a0, i32 0 -+ %ins1 = insertelement <8 x float> %ins0, float %a1, i32 1 -+ %ins2 = insertelement <8 x float> %ins1, float %a2, i32 2 -+ %ins3 = insertelement <8 x float> %ins2, float %a3, i32 3 -+ %ins4 = insertelement <8 x float> %ins3, float %a4, i32 4 -+ %ins5 = insertelement <8 x float> %ins4, float %a5, i32 5 -+ %ins6 = insertelement <8 x float> %ins5, float %a6, i32 6 -+ %ins7 = insertelement <8 x float> %ins6, float %a7, i32 7 -+ store <8 x float> %ins7, ptr %dst -+ ret void -+} -+ -+define void @buildvector_v4f64(ptr %dst, double %a0, double %a1, double %a2, double %a3) nounwind { -+; CHECK-LABEL: buildvector_v4f64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: movfr2gr.d $a1, $fa0 -+; CHECK-NEXT: xvinsgr2vr.d $xr0, $a1, 0 -+; CHECK-NEXT: movfr2gr.d $a1, $fa1 -+; CHECK-NEXT: xvinsgr2vr.d $xr0, $a1, 1 -+; CHECK-NEXT: movfr2gr.d $a1, $fa2 -+; CHECK-NEXT: xvinsgr2vr.d $xr0, $a1, 2 -+; CHECK-NEXT: movfr2gr.d $a1, $fa3 -+; CHECK-NEXT: xvinsgr2vr.d $xr0, $a1, 3 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %ins0 = insertelement <4 x double> undef, double %a0, i32 0 -+ %ins1 = insertelement <4 x double> %ins0, double %a1, i32 1 -+ %ins2 = insertelement <4 x double> %ins1, double %a2, i32 2 -+ %ins3 = insertelement <4 x double> %ins2, double %a3, i32 3 -+ store <4 x double> %ins3, ptr %dst -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/build-vector.ll b/llvm/test/CodeGen/LoongArch/lsx/build-vector.ll -new file mode 100644 -index 000000000000..3a74db5e1acb ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/build-vector.ll -@@ -0,0 +1,376 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+define void @buildvector_v16i8_splat(ptr %dst, i8 %a0) nounwind { -+; CHECK-LABEL: buildvector_v16i8_splat: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vreplgr2vr.b $vr0, $a1 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %insert = insertelement <16 x i8> undef, i8 %a0, i8 0 -+ %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer -+ store <16 x i8> %splat, ptr %dst -+ ret void -+} -+ -+define void @buildvector_v8i16_splat(ptr %dst, i16 %a0) nounwind { -+; CHECK-LABEL: buildvector_v8i16_splat: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vreplgr2vr.h $vr0, $a1 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %insert = insertelement <8 x i16> undef, i16 %a0, i8 0 -+ %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer -+ store <8 x i16> %splat, ptr %dst -+ ret void -+} -+ -+define void @buildvector_v4i32_splat(ptr %dst, i32 %a0) nounwind { -+; CHECK-LABEL: buildvector_v4i32_splat: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vreplgr2vr.w $vr0, $a1 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %insert = insertelement <4 x i32> undef, i32 %a0, i8 0 -+ %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer -+ store <4 x i32> %splat, ptr %dst -+ ret void -+} -+ -+define void @buildvector_v2i64_splat(ptr %dst, i64 %a0) nounwind { -+; CHECK-LABEL: buildvector_v2i64_splat: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vreplgr2vr.d $vr0, $a1 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %insert = insertelement <2 x i64> undef, i64 %a0, i8 0 -+ %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer -+ store <2 x i64> %splat, ptr %dst -+ ret void -+} -+ -+define void @buildvector_v4f32_splat(ptr %dst, float %a0) nounwind { -+; CHECK-LABEL: buildvector_v4f32_splat: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: # kill: def $f0 killed $f0 def $vr0 -+; CHECK-NEXT: vreplvei.w $vr0, $vr0, 0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %insert = insertelement <4 x float> undef, float %a0, i8 0 -+ %splat = shufflevector <4 x float> %insert, <4 x float> undef, <4 x i32> zeroinitializer -+ store <4 x float> %splat, ptr %dst -+ ret void -+} -+ -+define void @buildvector_v2f64_splat(ptr %dst, double %a0) nounwind { -+; CHECK-LABEL: buildvector_v2f64_splat: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0 -+; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %insert = insertelement <2 x double> undef, double %a0, i8 0 -+ %splat = shufflevector <2 x double> %insert, <2 x double> undef, <2 x i32> zeroinitializer -+ store <2 x double> %splat, ptr %dst -+ ret void -+} -+ -+define void @buildvector_v16i8_const_splat(ptr %dst) nounwind { -+; CHECK-LABEL: buildvector_v16i8_const_splat: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vrepli.b $vr0, 1 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ store <16 x i8> , ptr %dst -+ ret void -+} -+ -+define void @buildvector_v8i16_const_splat(ptr %dst) nounwind { -+; CHECK-LABEL: buildvector_v8i16_const_splat: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vrepli.h $vr0, 1 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ store <8 x i16> , ptr %dst -+ ret void -+} -+ -+define void @buildvector_v4i32_const_splat(ptr %dst) nounwind { -+; CHECK-LABEL: buildvector_v4i32_const_splat: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vrepli.w $vr0, 1 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ store <4 x i32> , ptr %dst -+ ret void -+} -+ -+define void @buildvector_v2i64_const_splat(ptr %dst) nounwind { -+; CHECK-LABEL: buildvector_v2i64_const_splat: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vrepli.d $vr0, 1 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ store <2 x i64> , ptr %dst -+ ret void -+} -+ -+define void @buildvector_v2f32_const_splat(ptr %dst) nounwind { -+; CHECK-LABEL: buildvector_v2f32_const_splat: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: lu12i.w $a1, 260096 -+; CHECK-NEXT: vreplgr2vr.w $vr0, $a1 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ store <4 x float> , ptr %dst -+ ret void -+} -+ -+define void @buildvector_v2f64_const_splat(ptr %dst) nounwind { -+; CHECK-LABEL: buildvector_v2f64_const_splat: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: lu52i.d $a1, $zero, 1023 -+; CHECK-NEXT: vreplgr2vr.d $vr0, $a1 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ store <2 x double> , ptr %dst -+ ret void -+} -+ -+define void @buildvector_v16i8_const(ptr %dst) nounwind { -+; CHECK-LABEL: buildvector_v16i8_const: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI12_0) -+; CHECK-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI12_0) -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ store <16 x i8> , ptr %dst -+ ret void -+} -+ -+define void @buildvector_v8i16_const(ptr %dst) nounwind { -+; CHECK-LABEL: buildvector_v8i16_const: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI13_0) -+; CHECK-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI13_0) -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ store <8 x i16> , ptr %dst -+ ret void -+} -+ -+define void @buildvector_v4i32_const(ptr %dst) nounwind { -+; CHECK-LABEL: buildvector_v4i32_const: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI14_0) -+; CHECK-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI14_0) -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ store <4 x i32> , ptr %dst -+ ret void -+} -+ -+define void @buildvector_v2i64_const(ptr %dst) nounwind { -+; CHECK-LABEL: buildvector_v2i64_const: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI15_0) -+; CHECK-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI15_0) -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ store <2 x i64> , ptr %dst -+ ret void -+} -+ -+define void @buildvector_v2f32_const(ptr %dst) nounwind { -+; CHECK-LABEL: buildvector_v2f32_const: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI16_0) -+; CHECK-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI16_0) -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ store <4 x float> , ptr %dst -+ ret void -+} -+ -+define void @buildvector_v2f64_const(ptr %dst) nounwind { -+; CHECK-LABEL: buildvector_v2f64_const: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI17_0) -+; CHECK-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI17_0) -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ store <2 x double> , ptr %dst -+ ret void -+} -+ -+define void @buildvector_v16i8(ptr %dst, i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7, i8 %a8, i8 %a9, i8 %a10, i8 %a11, i8 %a12, i8 %a13, i8 %a14, i8 %a15) nounwind { -+; CHECK-LABEL: buildvector_v16i8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 0 -+; CHECK-NEXT: vinsgr2vr.b $vr0, $a2, 1 -+; CHECK-NEXT: vinsgr2vr.b $vr0, $a3, 2 -+; CHECK-NEXT: vinsgr2vr.b $vr0, $a4, 3 -+; CHECK-NEXT: vinsgr2vr.b $vr0, $a5, 4 -+; CHECK-NEXT: vinsgr2vr.b $vr0, $a6, 5 -+; CHECK-NEXT: vinsgr2vr.b $vr0, $a7, 6 -+; CHECK-NEXT: ld.b $a1, $sp, 0 -+; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 7 -+; CHECK-NEXT: ld.b $a1, $sp, 8 -+; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 8 -+; CHECK-NEXT: ld.b $a1, $sp, 16 -+; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 9 -+; CHECK-NEXT: ld.b $a1, $sp, 24 -+; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 10 -+; CHECK-NEXT: ld.b $a1, $sp, 32 -+; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 11 -+; CHECK-NEXT: ld.b $a1, $sp, 40 -+; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 12 -+; CHECK-NEXT: ld.b $a1, $sp, 48 -+; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 13 -+; CHECK-NEXT: ld.b $a1, $sp, 56 -+; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 14 -+; CHECK-NEXT: ld.b $a1, $sp, 64 -+; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 15 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %ins0 = insertelement <16 x i8> undef, i8 %a0, i32 0 -+ %ins1 = insertelement <16 x i8> %ins0, i8 %a1, i32 1 -+ %ins2 = insertelement <16 x i8> %ins1, i8 %a2, i32 2 -+ %ins3 = insertelement <16 x i8> %ins2, i8 %a3, i32 3 -+ %ins4 = insertelement <16 x i8> %ins3, i8 %a4, i32 4 -+ %ins5 = insertelement <16 x i8> %ins4, i8 %a5, i32 5 -+ %ins6 = insertelement <16 x i8> %ins5, i8 %a6, i32 6 -+ %ins7 = insertelement <16 x i8> %ins6, i8 %a7, i32 7 -+ %ins8 = insertelement <16 x i8> %ins7, i8 %a8, i32 8 -+ %ins9 = insertelement <16 x i8> %ins8, i8 %a9, i32 9 -+ %ins10 = insertelement <16 x i8> %ins9, i8 %a10, i32 10 -+ %ins11 = insertelement <16 x i8> %ins10, i8 %a11, i32 11 -+ %ins12 = insertelement <16 x i8> %ins11, i8 %a12, i32 12 -+ %ins13 = insertelement <16 x i8> %ins12, i8 %a13, i32 13 -+ %ins14 = insertelement <16 x i8> %ins13, i8 %a14, i32 14 -+ %ins15 = insertelement <16 x i8> %ins14, i8 %a15, i32 15 -+ store <16 x i8> %ins15, ptr %dst -+ ret void -+} -+ -+define void @buildvector_v8i16(ptr %dst, i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, i16 %a5, i16 %a6, i16 %a7) nounwind { -+; CHECK-LABEL: buildvector_v8i16: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 0 -+; CHECK-NEXT: vinsgr2vr.h $vr0, $a2, 1 -+; CHECK-NEXT: vinsgr2vr.h $vr0, $a3, 2 -+; CHECK-NEXT: vinsgr2vr.h $vr0, $a4, 3 -+; CHECK-NEXT: vinsgr2vr.h $vr0, $a5, 4 -+; CHECK-NEXT: vinsgr2vr.h $vr0, $a6, 5 -+; CHECK-NEXT: vinsgr2vr.h $vr0, $a7, 6 -+; CHECK-NEXT: ld.h $a1, $sp, 0 -+; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 7 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %ins0 = insertelement <8 x i16> undef, i16 %a0, i32 0 -+ %ins1 = insertelement <8 x i16> %ins0, i16 %a1, i32 1 -+ %ins2 = insertelement <8 x i16> %ins1, i16 %a2, i32 2 -+ %ins3 = insertelement <8 x i16> %ins2, i16 %a3, i32 3 -+ %ins4 = insertelement <8 x i16> %ins3, i16 %a4, i32 4 -+ %ins5 = insertelement <8 x i16> %ins4, i16 %a5, i32 5 -+ %ins6 = insertelement <8 x i16> %ins5, i16 %a6, i32 6 -+ %ins7 = insertelement <8 x i16> %ins6, i16 %a7, i32 7 -+ store <8 x i16> %ins7, ptr %dst -+ ret void -+} -+ -+define void @buildvector_v4i32(ptr %dst, i32 %a0, i32 %a1, i32 %a2, i32 %a3) nounwind { -+; CHECK-LABEL: buildvector_v4i32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vinsgr2vr.w $vr0, $a1, 0 -+; CHECK-NEXT: vinsgr2vr.w $vr0, $a2, 1 -+; CHECK-NEXT: vinsgr2vr.w $vr0, $a3, 2 -+; CHECK-NEXT: vinsgr2vr.w $vr0, $a4, 3 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %ins0 = insertelement <4 x i32> undef, i32 %a0, i32 0 -+ %ins1 = insertelement <4 x i32> %ins0, i32 %a1, i32 1 -+ %ins2 = insertelement <4 x i32> %ins1, i32 %a2, i32 2 -+ %ins3 = insertelement <4 x i32> %ins2, i32 %a3, i32 3 -+ store <4 x i32> %ins3, ptr %dst -+ ret void -+} -+ -+define void @buildvector_v2i64(ptr %dst, i64 %a0, i64 %a1) nounwind { -+; CHECK-LABEL: buildvector_v2i64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vinsgr2vr.d $vr0, $a1, 0 -+; CHECK-NEXT: vinsgr2vr.d $vr0, $a2, 1 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %ins0 = insertelement <2 x i64> undef, i64 %a0, i32 0 -+ %ins1 = insertelement <2 x i64> %ins0, i64 %a1, i32 1 -+ store <2 x i64> %ins1, ptr %dst -+ ret void -+} -+ -+define void @buildvector_v4f32(ptr %dst, float %a0, float %a1, float %a2, float %a3) nounwind { -+; CHECK-LABEL: buildvector_v4f32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: movfr2gr.s $a1, $fa0 -+; CHECK-NEXT: vinsgr2vr.w $vr0, $a1, 0 -+; CHECK-NEXT: movfr2gr.s $a1, $fa1 -+; CHECK-NEXT: vinsgr2vr.w $vr0, $a1, 1 -+; CHECK-NEXT: movfr2gr.s $a1, $fa2 -+; CHECK-NEXT: vinsgr2vr.w $vr0, $a1, 2 -+; CHECK-NEXT: movfr2gr.s $a1, $fa3 -+; CHECK-NEXT: vinsgr2vr.w $vr0, $a1, 3 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %ins0 = insertelement <4 x float> undef, float %a0, i32 0 -+ %ins1 = insertelement <4 x float> %ins0, float %a1, i32 1 -+ %ins2 = insertelement <4 x float> %ins1, float %a2, i32 2 -+ %ins3 = insertelement <4 x float> %ins2, float %a3, i32 3 -+ store <4 x float> %ins3, ptr %dst -+ ret void -+} -+ -+define void @buildvector_v2f64(ptr %dst, double %a0, double %a1) nounwind { -+; CHECK-LABEL: buildvector_v2f64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: movfr2gr.d $a1, $fa0 -+; CHECK-NEXT: vinsgr2vr.d $vr0, $a1, 0 -+; CHECK-NEXT: movfr2gr.d $a1, $fa1 -+; CHECK-NEXT: vinsgr2vr.d $vr0, $a1, 1 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %ins0 = insertelement <2 x double> undef, double %a0, i32 0 -+ %ins1 = insertelement <2 x double> %ins0, double %a1, i32 1 -+ store <2 x double> %ins1, ptr %dst -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/mul.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/mul.ll -index 5060240cd8b1..d0be9cb7e3c8 100644 ---- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/mul.ll -+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/mul.ll -@@ -180,10 +180,9 @@ entry: - define void @mul_v16i8_17(ptr %res, ptr %a0) nounwind { - ; CHECK-LABEL: mul_v16i8_17: - ; CHECK: # %bb.0: # %entry --; CHECK-NEXT: ori $a2, $zero, 17 --; CHECK-NEXT: vreplgr2vr.b $vr0, $a2 --; CHECK-NEXT: vld $vr1, $a1, 0 --; CHECK-NEXT: vmul.b $vr0, $vr1, $vr0 -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vrepli.b $vr1, 17 -+; CHECK-NEXT: vmul.b $vr0, $vr0, $vr1 - ; CHECK-NEXT: vst $vr0, $a0, 0 - ; CHECK-NEXT: ret - entry: -@@ -196,10 +195,9 @@ entry: - define void @mul_v8i16_17(ptr %res, ptr %a0) nounwind { - ; CHECK-LABEL: mul_v8i16_17: - ; CHECK: # %bb.0: # %entry --; CHECK-NEXT: ori $a2, $zero, 17 --; CHECK-NEXT: vreplgr2vr.h $vr0, $a2 --; CHECK-NEXT: vld $vr1, $a1, 0 --; CHECK-NEXT: vmul.h $vr0, $vr1, $vr0 -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vrepli.h $vr1, 17 -+; CHECK-NEXT: vmul.h $vr0, $vr0, $vr1 - ; CHECK-NEXT: vst $vr0, $a0, 0 - ; CHECK-NEXT: ret - entry: -@@ -212,10 +210,9 @@ entry: - define void @mul_v4i32_17(ptr %res, ptr %a0) nounwind { - ; CHECK-LABEL: mul_v4i32_17: - ; CHECK: # %bb.0: # %entry --; CHECK-NEXT: ori $a2, $zero, 17 --; CHECK-NEXT: vreplgr2vr.w $vr0, $a2 --; CHECK-NEXT: vld $vr1, $a1, 0 --; CHECK-NEXT: vmul.w $vr0, $vr1, $vr0 -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vrepli.w $vr1, 17 -+; CHECK-NEXT: vmul.w $vr0, $vr0, $vr1 - ; CHECK-NEXT: vst $vr0, $a0, 0 - ; CHECK-NEXT: ret - entry: -@@ -228,10 +225,9 @@ entry: - define void @mul_v2i64_17(ptr %res, ptr %a0) nounwind { - ; CHECK-LABEL: mul_v2i64_17: - ; CHECK: # %bb.0: # %entry --; CHECK-NEXT: ori $a2, $zero, 17 --; CHECK-NEXT: vreplgr2vr.d $vr0, $a2 --; CHECK-NEXT: vld $vr1, $a1, 0 --; CHECK-NEXT: vmul.d $vr0, $vr1, $vr0 -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vrepli.d $vr1, 17 -+; CHECK-NEXT: vmul.d $vr0, $vr0, $vr1 - ; CHECK-NEXT: vst $vr0, $a0, 0 - ; CHECK-NEXT: ret - entry: --- -2.20.1 - - -From 62970fc545cedb4640ded25af832fd233c16dc85 Mon Sep 17 00:00:00 2001 -From: wanglei -Date: Tue, 14 Nov 2023 17:58:52 +0800 -Subject: [PATCH 17/35] [LoongArch] Add more and/or/xor patterns for vector - types - -(cherry picked from commit ca66df3b021017fedf08f0779f5bfc7898dbdd29) ---- - .../LoongArch/LoongArchLASXInstrInfo.td | 21 +-- - .../Target/LoongArch/LoongArchLSXInstrInfo.td | 21 +-- - .../LoongArch/lasx/ir-instruction/and.ll | 125 ++++++++++++++++++ - .../LoongArch/lasx/ir-instruction/or.ll | 125 ++++++++++++++++++ - .../LoongArch/lasx/ir-instruction/xor.ll | 125 ++++++++++++++++++ - .../LoongArch/lsx/ir-instruction/and.ll | 125 ++++++++++++++++++ - .../LoongArch/lsx/ir-instruction/or.ll | 125 ++++++++++++++++++ - .../LoongArch/lsx/ir-instruction/xor.ll | 125 ++++++++++++++++++ - 8 files changed, 774 insertions(+), 18 deletions(-) - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/and.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/or.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/xor.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ir-instruction/and.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ir-instruction/or.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ir-instruction/xor.ll - -diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -index 4487152fb42b..a5652472481a 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -@@ -1184,10 +1184,6 @@ multiclass PatShiftXrUimm { - (!cast(Inst#"_D") LASX256:$xj, uimm6:$imm)>; - } - --class PatXrXrB -- : Pat<(OpNode (v32i8 LASX256:$xj), (v32i8 LASX256:$xk)), -- (Inst LASX256:$xj, LASX256:$xk)>; -- - let Predicates = [HasExtLASX] in { - - // XVADD_{B/H/W/D} -@@ -1235,13 +1231,20 @@ defm : PatXrXr; - defm : PatXrXrU; - - // XVAND_V --def : PatXrXrB; --// XVNOR_V --def : PatXrXrB; -+foreach vt = [v32i8, v16i16, v8i32, v4i64] in -+def : Pat<(and (vt LASX256:$xj), (vt LASX256:$xk)), -+ (XVAND_V LASX256:$xj, LASX256:$xk)>; -+// XVOR_V -+foreach vt = [v32i8, v16i16, v8i32, v4i64] in -+def : Pat<(or (vt LASX256:$xj), (vt LASX256:$xk)), -+ (XVOR_V LASX256:$xj, LASX256:$xk)>; - // XVXOR_V --def : PatXrXrB; -+foreach vt = [v32i8, v16i16, v8i32, v4i64] in -+def : Pat<(xor (vt LASX256:$xj), (vt LASX256:$xk)), -+ (XVXOR_V LASX256:$xj, LASX256:$xk)>; - // XVNOR_V --def : Pat<(vnot (or (v32i8 LASX256:$xj), (v32i8 LASX256:$xk))), -+foreach vt = [v32i8, v16i16, v8i32, v4i64] in -+def : Pat<(vnot (or (vt LASX256:$xj), (vt LASX256:$xk))), - (XVNOR_V LASX256:$xj, LASX256:$xk)>; - - // XVANDI_B -diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -index deac5015882d..5645ce51194a 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -+++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -@@ -1261,10 +1261,6 @@ multiclass PatShiftVrUimm { - (!cast(Inst#"_D") LSX128:$vj, uimm6:$imm)>; - } - --class PatVrVrB -- : Pat<(OpNode (v16i8 LSX128:$vj), (v16i8 LSX128:$vk)), -- (Inst LSX128:$vj, LSX128:$vk)>; -- - let Predicates = [HasExtLSX] in { - - // VADD_{B/H/W/D} -@@ -1312,13 +1308,20 @@ defm : PatVrVr; - defm : PatVrVrU; - - // VAND_V --def : PatVrVrB; --// VNOR_V --def : PatVrVrB; -+foreach vt = [v16i8, v8i16, v4i32, v2i64] in -+def : Pat<(and (vt LSX128:$vj), (vt LSX128:$vk)), -+ (VAND_V LSX128:$vj, LSX128:$vk)>; -+// VOR_V -+foreach vt = [v16i8, v8i16, v4i32, v2i64] in -+def : Pat<(or (vt LSX128:$vj), (vt LSX128:$vk)), -+ (VOR_V LSX128:$vj, LSX128:$vk)>; - // VXOR_V --def : PatVrVrB; -+foreach vt = [v16i8, v8i16, v4i32, v2i64] in -+def : Pat<(xor (vt LSX128:$vj), (vt LSX128:$vk)), -+ (VXOR_V LSX128:$vj, LSX128:$vk)>; - // VNOR_V --def : Pat<(vnot (or (v16i8 LSX128:$vj), (v16i8 LSX128:$vk))), -+foreach vt = [v16i8, v8i16, v4i32, v2i64] in -+def : Pat<(vnot (or (vt LSX128:$vj), (vt LSX128:$vk))), - (VNOR_V LSX128:$vj, LSX128:$vk)>; - - // VANDI_B -diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/and.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/and.ll -new file mode 100644 -index 000000000000..98c87cadeeb5 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/and.ll -@@ -0,0 +1,125 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+define void @and_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: and_v32i8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvand.v $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <32 x i8>, ptr %a0 -+ %v1 = load <32 x i8>, ptr %a1 -+ %v2 = and <32 x i8> %v0, %v1 -+ store <32 x i8> %v2, ptr %res -+ ret void -+} -+ -+define void @and_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: and_v16i16: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvand.v $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i16>, ptr %a0 -+ %v1 = load <16 x i16>, ptr %a1 -+ %v2 = and <16 x i16> %v0, %v1 -+ store <16 x i16> %v2, ptr %res -+ ret void -+} -+ -+define void @and_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: and_v8i32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvand.v $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i32>, ptr %a0 -+ %v1 = load <8 x i32>, ptr %a1 -+ %v2 = and <8 x i32> %v0, %v1 -+ store <8 x i32> %v2, ptr %res -+ ret void -+} -+ -+define void @and_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: and_v4i64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvand.v $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i64>, ptr %a0 -+ %v1 = load <4 x i64>, ptr %a1 -+ %v2 = and <4 x i64> %v0, %v1 -+ store <4 x i64> %v2, ptr %res -+ ret void -+} -+ -+define void @and_u_v32i8(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: and_u_v32i8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvandi.b $xr0, $xr0, 31 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <32 x i8>, ptr %a0 -+ %v1 = and <32 x i8> %v0, -+ store <32 x i8> %v1, ptr %res -+ ret void -+} -+ -+define void @and_u_v16i16(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: and_u_v16i16: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvrepli.h $xr1, 31 -+; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i16>, ptr %a0 -+ %v1 = and <16 x i16> %v0, -+ store <16 x i16> %v1, ptr %res -+ ret void -+} -+ -+define void @and_u_v8i32(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: and_u_v8i32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvrepli.w $xr1, 31 -+; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i32>, ptr %a0 -+ %v1 = and <8 x i32> %v0, -+ store <8 x i32> %v1, ptr %res -+ ret void -+} -+ -+define void @and_u_v4i64(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: and_u_v4i64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvrepli.d $xr1, 31 -+; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i64>, ptr %a0 -+ %v1 = and <4 x i64> %v0, -+ store <4 x i64> %v1, ptr %res -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/or.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/or.ll -new file mode 100644 -index 000000000000..f37cbf1cefed ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/or.ll -@@ -0,0 +1,125 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+define void @or_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: or_v32i8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvor.v $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <32 x i8>, ptr %a0 -+ %v1 = load <32 x i8>, ptr %a1 -+ %v2 = or <32 x i8> %v0, %v1 -+ store <32 x i8> %v2, ptr %res -+ ret void -+} -+ -+define void @or_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: or_v16i16: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvor.v $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i16>, ptr %a0 -+ %v1 = load <16 x i16>, ptr %a1 -+ %v2 = or <16 x i16> %v0, %v1 -+ store <16 x i16> %v2, ptr %res -+ ret void -+} -+ -+define void @or_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: or_v8i32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvor.v $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i32>, ptr %a0 -+ %v1 = load <8 x i32>, ptr %a1 -+ %v2 = or <8 x i32> %v0, %v1 -+ store <8 x i32> %v2, ptr %res -+ ret void -+} -+ -+define void @or_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: or_v4i64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvor.v $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i64>, ptr %a0 -+ %v1 = load <4 x i64>, ptr %a1 -+ %v2 = or <4 x i64> %v0, %v1 -+ store <4 x i64> %v2, ptr %res -+ ret void -+} -+ -+define void @or_u_v32i8(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: or_u_v32i8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvori.b $xr0, $xr0, 31 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <32 x i8>, ptr %a0 -+ %v1 = or <32 x i8> %v0, -+ store <32 x i8> %v1, ptr %res -+ ret void -+} -+ -+define void @or_u_v16i16(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: or_u_v16i16: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvrepli.h $xr1, 31 -+; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i16>, ptr %a0 -+ %v1 = or <16 x i16> %v0, -+ store <16 x i16> %v1, ptr %res -+ ret void -+} -+ -+define void @or_u_v8i32(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: or_u_v8i32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvrepli.w $xr1, 31 -+; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i32>, ptr %a0 -+ %v1 = or <8 x i32> %v0, -+ store <8 x i32> %v1, ptr %res -+ ret void -+} -+ -+define void @or_u_v4i64(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: or_u_v4i64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvrepli.d $xr1, 31 -+; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i64>, ptr %a0 -+ %v1 = or <4 x i64> %v0, -+ store <4 x i64> %v1, ptr %res -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/xor.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/xor.ll -new file mode 100644 -index 000000000000..c2fb1462b7a2 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/xor.ll -@@ -0,0 +1,125 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+define void @xor_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: xor_v32i8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvxor.v $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <32 x i8>, ptr %a0 -+ %v1 = load <32 x i8>, ptr %a1 -+ %v2 = xor <32 x i8> %v0, %v1 -+ store <32 x i8> %v2, ptr %res -+ ret void -+} -+ -+define void @xor_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: xor_v16i16: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvxor.v $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i16>, ptr %a0 -+ %v1 = load <16 x i16>, ptr %a1 -+ %v2 = xor <16 x i16> %v0, %v1 -+ store <16 x i16> %v2, ptr %res -+ ret void -+} -+ -+define void @xor_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: xor_v8i32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvxor.v $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i32>, ptr %a0 -+ %v1 = load <8 x i32>, ptr %a1 -+ %v2 = xor <8 x i32> %v0, %v1 -+ store <8 x i32> %v2, ptr %res -+ ret void -+} -+ -+define void @xor_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: xor_v4i64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvxor.v $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i64>, ptr %a0 -+ %v1 = load <4 x i64>, ptr %a1 -+ %v2 = xor <4 x i64> %v0, %v1 -+ store <4 x i64> %v2, ptr %res -+ ret void -+} -+ -+define void @xor_u_v32i8(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: xor_u_v32i8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvxori.b $xr0, $xr0, 31 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <32 x i8>, ptr %a0 -+ %v1 = xor <32 x i8> %v0, -+ store <32 x i8> %v1, ptr %res -+ ret void -+} -+ -+define void @xor_u_v16i16(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: xor_u_v16i16: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvrepli.h $xr1, 31 -+; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i16>, ptr %a0 -+ %v1 = xor <16 x i16> %v0, -+ store <16 x i16> %v1, ptr %res -+ ret void -+} -+ -+define void @xor_u_v8i32(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: xor_u_v8i32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvrepli.w $xr1, 31 -+; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i32>, ptr %a0 -+ %v1 = xor <8 x i32> %v0, -+ store <8 x i32> %v1, ptr %res -+ ret void -+} -+ -+define void @xor_u_v4i64(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: xor_u_v4i64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvrepli.d $xr1, 31 -+; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i64>, ptr %a0 -+ %v1 = xor <4 x i64> %v0, -+ store <4 x i64> %v1, ptr %res -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/and.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/and.ll -new file mode 100644 -index 000000000000..523255159a81 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/and.ll -@@ -0,0 +1,125 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+define void @and_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: and_v16i8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vand.v $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i8>, ptr %a0 -+ %v1 = load <16 x i8>, ptr %a1 -+ %v2 = and <16 x i8> %v0, %v1 -+ store <16 x i8> %v2, ptr %res -+ ret void -+} -+ -+define void @and_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: and_v8i16: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vand.v $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i16>, ptr %a0 -+ %v1 = load <8 x i16>, ptr %a1 -+ %v2 = and <8 x i16> %v0, %v1 -+ store <8 x i16> %v2, ptr %res -+ ret void -+} -+ -+define void @and_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: and_v4i32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vand.v $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i32>, ptr %a0 -+ %v1 = load <4 x i32>, ptr %a1 -+ %v2 = and <4 x i32> %v0, %v1 -+ store <4 x i32> %v2, ptr %res -+ ret void -+} -+ -+define void @and_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: and_v2i64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vand.v $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <2 x i64>, ptr %a0 -+ %v1 = load <2 x i64>, ptr %a1 -+ %v2 = and <2 x i64> %v0, %v1 -+ store <2 x i64> %v2, ptr %res -+ ret void -+} -+ -+define void @and_u_v16i8(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: and_u_v16i8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vandi.b $vr0, $vr0, 31 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i8>, ptr %a0 -+ %v1 = and <16 x i8> %v0, -+ store <16 x i8> %v1, ptr %res -+ ret void -+} -+ -+define void @and_u_v8i16(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: and_u_v8i16: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vrepli.h $vr1, 31 -+; CHECK-NEXT: vand.v $vr0, $vr0, $vr1 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i16>, ptr %a0 -+ %v1 = and <8 x i16> %v0, -+ store <8 x i16> %v1, ptr %res -+ ret void -+} -+ -+define void @and_u_v4i32(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: and_u_v4i32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vrepli.w $vr1, 31 -+; CHECK-NEXT: vand.v $vr0, $vr0, $vr1 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i32>, ptr %a0 -+ %v1 = and <4 x i32> %v0, -+ store <4 x i32> %v1, ptr %res -+ ret void -+} -+ -+define void @and_u_v2i64(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: and_u_v2i64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vrepli.d $vr1, 31 -+; CHECK-NEXT: vand.v $vr0, $vr0, $vr1 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <2 x i64>, ptr %a0 -+ %v1 = and <2 x i64> %v0, -+ store <2 x i64> %v1, ptr %res -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/or.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/or.ll -new file mode 100644 -index 000000000000..f124512acce7 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/or.ll -@@ -0,0 +1,125 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+define void @or_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: or_v16i8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vor.v $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i8>, ptr %a0 -+ %v1 = load <16 x i8>, ptr %a1 -+ %v2 = or <16 x i8> %v0, %v1 -+ store <16 x i8> %v2, ptr %res -+ ret void -+} -+ -+define void @or_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: or_v8i16: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vor.v $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i16>, ptr %a0 -+ %v1 = load <8 x i16>, ptr %a1 -+ %v2 = or <8 x i16> %v0, %v1 -+ store <8 x i16> %v2, ptr %res -+ ret void -+} -+ -+define void @or_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: or_v4i32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vor.v $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i32>, ptr %a0 -+ %v1 = load <4 x i32>, ptr %a1 -+ %v2 = or <4 x i32> %v0, %v1 -+ store <4 x i32> %v2, ptr %res -+ ret void -+} -+ -+define void @or_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: or_v2i64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vor.v $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <2 x i64>, ptr %a0 -+ %v1 = load <2 x i64>, ptr %a1 -+ %v2 = or <2 x i64> %v0, %v1 -+ store <2 x i64> %v2, ptr %res -+ ret void -+} -+ -+define void @or_u_v16i8(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: or_u_v16i8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vori.b $vr0, $vr0, 31 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i8>, ptr %a0 -+ %v1 = or <16 x i8> %v0, -+ store <16 x i8> %v1, ptr %res -+ ret void -+} -+ -+define void @or_u_v8i16(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: or_u_v8i16: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vrepli.h $vr1, 31 -+; CHECK-NEXT: vor.v $vr0, $vr0, $vr1 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i16>, ptr %a0 -+ %v1 = or <8 x i16> %v0, -+ store <8 x i16> %v1, ptr %res -+ ret void -+} -+ -+define void @or_u_v4i32(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: or_u_v4i32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vrepli.w $vr1, 31 -+; CHECK-NEXT: vor.v $vr0, $vr0, $vr1 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i32>, ptr %a0 -+ %v1 = or <4 x i32> %v0, -+ store <4 x i32> %v1, ptr %res -+ ret void -+} -+ -+define void @or_u_v2i64(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: or_u_v2i64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vrepli.d $vr1, 31 -+; CHECK-NEXT: vor.v $vr0, $vr0, $vr1 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <2 x i64>, ptr %a0 -+ %v1 = or <2 x i64> %v0, -+ store <2 x i64> %v1, ptr %res -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/xor.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/xor.ll -new file mode 100644 -index 000000000000..ce3e49c990ff ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/xor.ll -@@ -0,0 +1,125 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+define void @xor_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: xor_v16i8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vxor.v $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i8>, ptr %a0 -+ %v1 = load <16 x i8>, ptr %a1 -+ %v2 = xor <16 x i8> %v0, %v1 -+ store <16 x i8> %v2, ptr %res -+ ret void -+} -+ -+define void @xor_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: xor_v8i16: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vxor.v $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i16>, ptr %a0 -+ %v1 = load <8 x i16>, ptr %a1 -+ %v2 = xor <8 x i16> %v0, %v1 -+ store <8 x i16> %v2, ptr %res -+ ret void -+} -+ -+define void @xor_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: xor_v4i32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vxor.v $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i32>, ptr %a0 -+ %v1 = load <4 x i32>, ptr %a1 -+ %v2 = xor <4 x i32> %v0, %v1 -+ store <4 x i32> %v2, ptr %res -+ ret void -+} -+ -+define void @xor_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: xor_v2i64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vxor.v $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <2 x i64>, ptr %a0 -+ %v1 = load <2 x i64>, ptr %a1 -+ %v2 = xor <2 x i64> %v0, %v1 -+ store <2 x i64> %v2, ptr %res -+ ret void -+} -+ -+define void @xor_u_v16i8(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: xor_u_v16i8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vxori.b $vr0, $vr0, 31 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i8>, ptr %a0 -+ %v1 = xor <16 x i8> %v0, -+ store <16 x i8> %v1, ptr %res -+ ret void -+} -+ -+define void @xor_u_v8i16(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: xor_u_v8i16: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vrepli.h $vr1, 31 -+; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i16>, ptr %a0 -+ %v1 = xor <8 x i16> %v0, -+ store <8 x i16> %v1, ptr %res -+ ret void -+} -+ -+define void @xor_u_v4i32(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: xor_u_v4i32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vrepli.w $vr1, 31 -+; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i32>, ptr %a0 -+ %v1 = xor <4 x i32> %v0, -+ store <4 x i32> %v1, ptr %res -+ ret void -+} -+ -+define void @xor_u_v2i64(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: xor_u_v2i64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vrepli.d $vr1, 31 -+; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <2 x i64>, ptr %a0 -+ %v1 = xor <2 x i64> %v0, -+ store <2 x i64> %v1, ptr %res -+ ret void -+} --- -2.20.1 - - -From f33b8ed69368098a23f9f14a1d3b8d62aca8b48f Mon Sep 17 00:00:00 2001 -From: leecheechen -Date: Fri, 1 Dec 2023 13:14:11 +0800 -Subject: [PATCH 18/35] [LoongArch] Add some binary IR instructions testcases - for LASX (#74031) - -The IR instructions include: -- Binary Operations: add fadd sub fsub mul fmul udiv sdiv fdiv -- Bitwise Binary Operations: shl lshr ashr - -(cherry picked from commit dbbc7c31c8e55d72dc243b244e386a25132e7215) ---- - .../LoongArch/lasx/ir-instruction/add.ll | 122 +++++++++ - .../LoongArch/lasx/ir-instruction/ashr.ll | 178 +++++++++++++ - .../LoongArch/lasx/ir-instruction/fadd.ll | 34 +++ - .../LoongArch/lasx/ir-instruction/fdiv.ll | 34 +++ - .../LoongArch/lasx/ir-instruction/fmul.ll | 34 +++ - .../LoongArch/lasx/ir-instruction/fsub.ll | 34 +++ - .../LoongArch/lasx/ir-instruction/lshr.ll | 178 +++++++++++++ - .../LoongArch/lasx/ir-instruction/mul.ll | 238 ++++++++++++++++++ - .../LoongArch/lasx/ir-instruction/sdiv.ll | 134 ++++++++++ - .../LoongArch/lasx/ir-instruction/shl.ll | 178 +++++++++++++ - .../LoongArch/lasx/ir-instruction/sub.ll | 122 +++++++++ - .../LoongArch/lasx/ir-instruction/udiv.ll | 122 +++++++++ - 12 files changed, 1408 insertions(+) - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/add.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/ashr.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fadd.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fdiv.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fmul.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fsub.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/lshr.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/mul.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sdiv.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shl.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sub.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/udiv.ll - -diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/add.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/add.ll -new file mode 100644 -index 000000000000..8e4d0dc6f1c3 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/add.ll -@@ -0,0 +1,122 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+define void @add_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: add_v32i8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvadd.b $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <32 x i8>, ptr %a0 -+ %v1 = load <32 x i8>, ptr %a1 -+ %v2 = add <32 x i8> %v0, %v1 -+ store <32 x i8> %v2, ptr %res -+ ret void -+} -+ -+define void @add_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: add_v16i16: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvadd.h $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i16>, ptr %a0 -+ %v1 = load <16 x i16>, ptr %a1 -+ %v2 = add <16 x i16> %v0, %v1 -+ store <16 x i16> %v2, ptr %res -+ ret void -+} -+ -+define void @add_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: add_v8i32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvadd.w $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i32>, ptr %a0 -+ %v1 = load <8 x i32>, ptr %a1 -+ %v2 = add <8 x i32> %v0, %v1 -+ store <8 x i32> %v2, ptr %res -+ ret void -+} -+ -+define void @add_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: add_v4i64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvadd.d $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i64>, ptr %a0 -+ %v1 = load <4 x i64>, ptr %a1 -+ %v2 = add <4 x i64> %v0, %v1 -+ store <4 x i64> %v2, ptr %res -+ ret void -+} -+ -+define void @add_v32i8_31(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: add_v32i8_31: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvaddi.bu $xr0, $xr0, 31 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <32 x i8>, ptr %a0 -+ %v1 = add <32 x i8> %v0, -+ store <32 x i8> %v1, ptr %res -+ ret void -+} -+ -+define void @add_v16i16_31(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: add_v16i16_31: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvaddi.hu $xr0, $xr0, 31 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i16>, ptr %a0 -+ %v1 = add <16 x i16> %v0, -+ store <16 x i16> %v1, ptr %res -+ ret void -+} -+ -+define void @add_v8i32_31(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: add_v8i32_31: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvaddi.wu $xr0, $xr0, 31 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i32>, ptr %a0 -+ %v1 = add <8 x i32> %v0, -+ store <8 x i32> %v1, ptr %res -+ ret void -+} -+ -+define void @add_v4i64_31(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: add_v4i64_31: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvaddi.du $xr0, $xr0, 31 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i64>, ptr %a0 -+ %v1 = add <4 x i64> %v0, -+ store <4 x i64> %v1, ptr %res -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/ashr.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/ashr.ll -new file mode 100644 -index 000000000000..fcbf0f1400fe ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/ashr.ll -@@ -0,0 +1,178 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+define void @ashr_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: ashr_v32i8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvsra.b $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <32 x i8>, ptr %a0 -+ %v1 = load <32 x i8>, ptr %a1 -+ %v2 = ashr <32 x i8> %v0, %v1 -+ store <32 x i8> %v2, ptr %res -+ ret void -+} -+ -+define void @ashr_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: ashr_v16i16: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvsra.h $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i16>, ptr %a0 -+ %v1 = load <16 x i16>, ptr %a1 -+ %v2 = ashr <16 x i16> %v0, %v1 -+ store <16 x i16> %v2, ptr %res -+ ret void -+} -+ -+define void @ashr_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: ashr_v8i32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvsra.w $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i32>, ptr %a0 -+ %v1 = load <8 x i32>, ptr %a1 -+ %v2 = ashr <8 x i32> %v0, %v1 -+ store <8 x i32> %v2, ptr %res -+ ret void -+} -+ -+define void @ashr_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: ashr_v4i64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvsra.d $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i64>, ptr %a0 -+ %v1 = load <4 x i64>, ptr %a1 -+ %v2 = ashr <4 x i64> %v0, %v1 -+ store <4 x i64> %v2, ptr %res -+ ret void -+} -+ -+define void @ashr_v32i8_1(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: ashr_v32i8_1: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvsrai.b $xr0, $xr0, 1 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <32 x i8>, ptr %a0 -+ %v1 = ashr <32 x i8> %v0, -+ store <32 x i8> %v1, ptr %res -+ ret void -+} -+ -+define void @ashr_v32i8_7(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: ashr_v32i8_7: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvsrai.b $xr0, $xr0, 7 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <32 x i8>, ptr %a0 -+ %v1 = ashr <32 x i8> %v0, -+ store <32 x i8> %v1, ptr %res -+ ret void -+} -+ -+define void @ashr_v16i16_1(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: ashr_v16i16_1: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvsrai.h $xr0, $xr0, 1 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i16>, ptr %a0 -+ %v1 = ashr <16 x i16> %v0, -+ store <16 x i16> %v1, ptr %res -+ ret void -+} -+ -+define void @ashr_v16i16_15(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: ashr_v16i16_15: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvsrai.h $xr0, $xr0, 15 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i16>, ptr %a0 -+ %v1 = ashr <16 x i16> %v0, -+ store <16 x i16> %v1, ptr %res -+ ret void -+} -+ -+define void @ashr_v8i32_1(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: ashr_v8i32_1: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvsrai.w $xr0, $xr0, 1 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i32>, ptr %a0 -+ %v1 = ashr <8 x i32> %v0, -+ store <8 x i32> %v1, ptr %res -+ ret void -+} -+ -+define void @ashr_v8i32_31(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: ashr_v8i32_31: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvsrai.w $xr0, $xr0, 31 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i32>, ptr %a0 -+ %v1 = ashr <8 x i32> %v0, -+ store <8 x i32> %v1, ptr %res -+ ret void -+} -+ -+define void @ashr_v4i64_1(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: ashr_v4i64_1: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvsrai.d $xr0, $xr0, 1 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i64>, ptr %a0 -+ %v1 = ashr <4 x i64> %v0, -+ store <4 x i64> %v1, ptr %res -+ ret void -+} -+ -+define void @ashr_v4i64_63(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: ashr_v4i64_63: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvsrai.d $xr0, $xr0, 63 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i64>, ptr %a0 -+ %v1 = ashr <4 x i64> %v0, -+ store <4 x i64> %v1, ptr %res -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fadd.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fadd.ll -new file mode 100644 -index 000000000000..365bb305fc5a ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fadd.ll -@@ -0,0 +1,34 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+define void @fadd_v8f32(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: fadd_v8f32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvfadd.s $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x float>, ptr %a0 -+ %v1 = load <8 x float>, ptr %a1 -+ %v2 = fadd <8 x float> %v0, %v1 -+ store <8 x float> %v2, ptr %res -+ ret void -+} -+ -+define void @fadd_v4f64(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: fadd_v4f64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvfadd.d $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x double>, ptr %a0 -+ %v1 = load <4 x double>, ptr %a1 -+ %v2 = fadd <4 x double> %v0, %v1 -+ store <4 x double> %v2, ptr %res -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fdiv.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fdiv.ll -new file mode 100644 -index 000000000000..284121a79a49 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fdiv.ll -@@ -0,0 +1,34 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+define void @fdiv_v8f32(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: fdiv_v8f32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvfdiv.s $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x float>, ptr %a0 -+ %v1 = load <8 x float>, ptr %a1 -+ %v2 = fdiv <8 x float> %v0, %v1 -+ store <8 x float> %v2, ptr %res -+ ret void -+} -+ -+define void @fdiv_v4f64(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: fdiv_v4f64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvfdiv.d $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x double>, ptr %a0 -+ %v1 = load <4 x double>, ptr %a1 -+ %v2 = fdiv <4 x double> %v0, %v1 -+ store <4 x double> %v2, ptr %res -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fmul.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fmul.ll -new file mode 100644 -index 000000000000..a48dca8d2847 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fmul.ll -@@ -0,0 +1,34 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+define void @fmul_v8f32(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: fmul_v8f32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvfmul.s $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x float>, ptr %a0 -+ %v1 = load <8 x float>, ptr %a1 -+ %v2 = fmul <8 x float> %v0, %v1 -+ store <8 x float> %v2, ptr %res -+ ret void -+} -+ -+define void @fmul_v4f64(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: fmul_v4f64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvfmul.d $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x double>, ptr %a0 -+ %v1 = load <4 x double>, ptr %a1 -+ %v2 = fmul <4 x double> %v0, %v1 -+ store <4 x double> %v2, ptr %res -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fsub.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fsub.ll -new file mode 100644 -index 000000000000..6164aa5a55c7 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fsub.ll -@@ -0,0 +1,34 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+define void @fsub_v8f32(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: fsub_v8f32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvfsub.s $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x float>, ptr %a0 -+ %v1 = load <8 x float>, ptr %a1 -+ %v2 = fsub <8 x float> %v0, %v1 -+ store <8 x float> %v2, ptr %res -+ ret void -+} -+ -+define void @fsub_v4f64(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: fsub_v4f64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvfsub.d $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x double>, ptr %a0 -+ %v1 = load <4 x double>, ptr %a1 -+ %v2 = fsub <4 x double> %v0, %v1 -+ store <4 x double> %v2, ptr %res -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/lshr.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/lshr.ll -new file mode 100644 -index 000000000000..24be69d8032a ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/lshr.ll -@@ -0,0 +1,178 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+define void @lshr_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: lshr_v32i8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvsrl.b $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <32 x i8>, ptr %a0 -+ %v1 = load <32 x i8>, ptr %a1 -+ %v2 = lshr <32 x i8> %v0, %v1 -+ store <32 x i8> %v2, ptr %res -+ ret void -+} -+ -+define void @lshr_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: lshr_v16i16: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvsrl.h $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i16>, ptr %a0 -+ %v1 = load <16 x i16>, ptr %a1 -+ %v2 = lshr <16 x i16> %v0, %v1 -+ store <16 x i16> %v2, ptr %res -+ ret void -+} -+ -+define void @lshr_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: lshr_v8i32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvsrl.w $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i32>, ptr %a0 -+ %v1 = load <8 x i32>, ptr %a1 -+ %v2 = lshr <8 x i32> %v0, %v1 -+ store <8 x i32> %v2, ptr %res -+ ret void -+} -+ -+define void @lshr_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: lshr_v4i64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvsrl.d $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i64>, ptr %a0 -+ %v1 = load <4 x i64>, ptr %a1 -+ %v2 = lshr <4 x i64> %v0, %v1 -+ store <4 x i64> %v2, ptr %res -+ ret void -+} -+ -+define void @lshr_v32i8_1(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: lshr_v32i8_1: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvsrli.b $xr0, $xr0, 1 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <32 x i8>, ptr %a0 -+ %v1 = lshr <32 x i8> %v0, -+ store <32 x i8> %v1, ptr %res -+ ret void -+} -+ -+define void @lshr_v32i8_7(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: lshr_v32i8_7: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvsrli.b $xr0, $xr0, 7 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <32 x i8>, ptr %a0 -+ %v1 = lshr <32 x i8> %v0, -+ store <32 x i8> %v1, ptr %res -+ ret void -+} -+ -+define void @lshr_v16i16_1(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: lshr_v16i16_1: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvsrli.h $xr0, $xr0, 1 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i16>, ptr %a0 -+ %v1 = lshr <16 x i16> %v0, -+ store <16 x i16> %v1, ptr %res -+ ret void -+} -+ -+define void @lshr_v16i16_15(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: lshr_v16i16_15: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvsrli.h $xr0, $xr0, 15 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i16>, ptr %a0 -+ %v1 = lshr <16 x i16> %v0, -+ store <16 x i16> %v1, ptr %res -+ ret void -+} -+ -+define void @lshr_v8i32_1(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: lshr_v8i32_1: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvsrli.w $xr0, $xr0, 1 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i32>, ptr %a0 -+ %v1 = lshr <8 x i32> %v0, -+ store <8 x i32> %v1, ptr %res -+ ret void -+} -+ -+define void @lshr_v8i32_31(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: lshr_v8i32_31: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvsrli.w $xr0, $xr0, 31 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i32>, ptr %a0 -+ %v1 = lshr <8 x i32> %v0, -+ store <8 x i32> %v1, ptr %res -+ ret void -+} -+ -+define void @lshr_v4i64_1(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: lshr_v4i64_1: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvsrli.d $xr0, $xr0, 1 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i64>, ptr %a0 -+ %v1 = lshr <4 x i64> %v0, -+ store <4 x i64> %v1, ptr %res -+ ret void -+} -+ -+define void @lshr_v4i64_63(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: lshr_v4i64_63: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvsrli.d $xr0, $xr0, 63 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i64>, ptr %a0 -+ %v1 = lshr <4 x i64> %v0, -+ store <4 x i64> %v1, ptr %res -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/mul.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/mul.ll -new file mode 100644 -index 000000000000..dcb893caa255 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/mul.ll -@@ -0,0 +1,238 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+define void @mul_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: mul_v32i8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvmul.b $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <32 x i8>, ptr %a0 -+ %v1 = load <32 x i8>, ptr %a1 -+ %v2 = mul <32 x i8> %v0, %v1 -+ store <32 x i8> %v2, ptr %res -+ ret void -+} -+ -+define void @mul_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: mul_v16i16: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvmul.h $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i16>, ptr %a0 -+ %v1 = load <16 x i16>, ptr %a1 -+ %v2 = mul <16 x i16> %v0, %v1 -+ store <16 x i16> %v2, ptr %res -+ ret void -+} -+ -+define void @mul_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: mul_v8i32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvmul.w $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i32>, ptr %a0 -+ %v1 = load <8 x i32>, ptr %a1 -+ %v2 = mul <8 x i32> %v0, %v1 -+ store <8 x i32> %v2, ptr %res -+ ret void -+} -+ -+define void @mul_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: mul_v4i64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvmul.d $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i64>, ptr %a0 -+ %v1 = load <4 x i64>, ptr %a1 -+ %v2 = mul <4 x i64> %v0, %v1 -+ store <4 x i64> %v2, ptr %res -+ ret void -+} -+ -+define void @mul_square_v32i8(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: mul_square_v32i8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvmul.b $xr0, $xr0, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <32 x i8>, ptr %a0 -+ %v1 = mul <32 x i8> %v0, %v0 -+ store <32 x i8> %v1, ptr %res -+ ret void -+} -+ -+define void @mul_square_v16i16(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: mul_square_v16i16: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvmul.h $xr0, $xr0, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i16>, ptr %a0 -+ %v1 = mul <16 x i16> %v0, %v0 -+ store <16 x i16> %v1, ptr %res -+ ret void -+} -+ -+define void @mul_square_v8i32(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: mul_square_v8i32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvmul.w $xr0, $xr0, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i32>, ptr %a0 -+ %v1 = mul <8 x i32> %v0, %v0 -+ store <8 x i32> %v1, ptr %res -+ ret void -+} -+ -+define void @mul_square_v4i64(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: mul_square_v4i64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvmul.d $xr0, $xr0, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i64>, ptr %a0 -+ %v1 = mul <4 x i64> %v0, %v0 -+ store <4 x i64> %v1, ptr %res -+ ret void -+} -+ -+define void @mul_v32i8_8(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: mul_v32i8_8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvslli.b $xr0, $xr0, 3 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <32 x i8>, ptr %a0 -+ %v1 = mul <32 x i8> %v0, -+ store <32 x i8> %v1, ptr %res -+ ret void -+} -+ -+define void @mul_v16i16_8(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: mul_v16i16_8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvslli.h $xr0, $xr0, 3 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i16>, ptr %a0 -+ %v1 = mul <16 x i16> %v0, -+ store <16 x i16> %v1, ptr %res -+ ret void -+} -+ -+define void @mul_v8i32_8(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: mul_v8i32_8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvslli.w $xr0, $xr0, 3 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i32>, ptr %a0 -+ %v1 = mul <8 x i32> %v0, -+ store <8 x i32> %v1, ptr %res -+ ret void -+} -+ -+define void @mul_v4i64_8(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: mul_v4i64_8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvslli.d $xr0, $xr0, 3 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i64>, ptr %a0 -+ %v1 = mul <4 x i64> %v0, -+ store <4 x i64> %v1, ptr %res -+ ret void -+} -+ -+define void @mul_v32i8_17(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: mul_v32i8_17: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvrepli.b $xr1, 17 -+; CHECK-NEXT: xvmul.b $xr0, $xr0, $xr1 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <32 x i8>, ptr %a0 -+ %v1 = mul <32 x i8> %v0, -+ store <32 x i8> %v1, ptr %res -+ ret void -+} -+ -+define void @mul_v16i16_17(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: mul_v16i16_17: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvrepli.h $xr1, 17 -+; CHECK-NEXT: xvmul.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i16>, ptr %a0 -+ %v1 = mul <16 x i16> %v0, -+ store <16 x i16> %v1, ptr %res -+ ret void -+} -+ -+define void @mul_v8i32_17(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: mul_v8i32_17: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvrepli.w $xr1, 17 -+; CHECK-NEXT: xvmul.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i32>, ptr %a0 -+ %v1 = mul <8 x i32> %v0, -+ store <8 x i32> %v1, ptr %res -+ ret void -+} -+ -+define void @mul_v4i64_17(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: mul_v4i64_17: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvrepli.d $xr1, 17 -+; CHECK-NEXT: xvmul.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i64>, ptr %a0 -+ %v1 = mul <4 x i64> %v0, -+ store <4 x i64> %v1, ptr %res -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sdiv.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sdiv.ll -new file mode 100644 -index 000000000000..e3635a5f14a2 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sdiv.ll -@@ -0,0 +1,134 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+define void @sdiv_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: sdiv_v32i8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvdiv.b $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <32 x i8>, ptr %a0 -+ %v1 = load <32 x i8>, ptr %a1 -+ %v2 = sdiv <32 x i8> %v0, %v1 -+ store <32 x i8> %v2, ptr %res -+ ret void -+} -+ -+define void @sdiv_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: sdiv_v16i16: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvdiv.h $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i16>, ptr %a0 -+ %v1 = load <16 x i16>, ptr %a1 -+ %v2 = sdiv <16 x i16> %v0, %v1 -+ store <16 x i16> %v2, ptr %res -+ ret void -+} -+ -+define void @sdiv_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: sdiv_v8i32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvdiv.w $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i32>, ptr %a0 -+ %v1 = load <8 x i32>, ptr %a1 -+ %v2 = sdiv <8 x i32> %v0, %v1 -+ store <8 x i32> %v2, ptr %res -+ ret void -+} -+ -+define void @sdiv_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: sdiv_v4i64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvdiv.d $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i64>, ptr %a0 -+ %v1 = load <4 x i64>, ptr %a1 -+ %v2 = sdiv <4 x i64> %v0, %v1 -+ store <4 x i64> %v2, ptr %res -+ ret void -+} -+ -+define void @sdiv_v32i8_8(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: sdiv_v32i8_8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvsrai.b $xr1, $xr0, 7 -+; CHECK-NEXT: xvsrli.b $xr1, $xr1, 5 -+; CHECK-NEXT: xvadd.b $xr0, $xr0, $xr1 -+; CHECK-NEXT: xvsrai.b $xr0, $xr0, 3 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <32 x i8>, ptr %a0 -+ %v1 = sdiv <32 x i8> %v0, -+ store <32 x i8> %v1, ptr %res -+ ret void -+} -+ -+define void @sdiv_v16i16_8(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: sdiv_v16i16_8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvsrai.h $xr1, $xr0, 15 -+; CHECK-NEXT: xvsrli.h $xr1, $xr1, 13 -+; CHECK-NEXT: xvadd.h $xr0, $xr0, $xr1 -+; CHECK-NEXT: xvsrai.h $xr0, $xr0, 3 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i16>, ptr %a0 -+ %v1 = sdiv <16 x i16> %v0, -+ store <16 x i16> %v1, ptr %res -+ ret void -+} -+ -+define void @sdiv_v8i32_8(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: sdiv_v8i32_8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvsrai.w $xr1, $xr0, 31 -+; CHECK-NEXT: xvsrli.w $xr1, $xr1, 29 -+; CHECK-NEXT: xvadd.w $xr0, $xr0, $xr1 -+; CHECK-NEXT: xvsrai.w $xr0, $xr0, 3 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i32>, ptr %a0 -+ %v1 = sdiv <8 x i32> %v0, -+ store <8 x i32> %v1, ptr %res -+ ret void -+} -+ -+define void @sdiv_v4i64_8(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: sdiv_v4i64_8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvsrai.d $xr1, $xr0, 63 -+; CHECK-NEXT: xvsrli.d $xr1, $xr1, 61 -+; CHECK-NEXT: xvadd.d $xr0, $xr0, $xr1 -+; CHECK-NEXT: xvsrai.d $xr0, $xr0, 3 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i64>, ptr %a0 -+ %v1 = sdiv <4 x i64> %v0, -+ store <4 x i64> %v1, ptr %res -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shl.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shl.ll -new file mode 100644 -index 000000000000..8a02c7e3ac97 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shl.ll -@@ -0,0 +1,178 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+define void @shl_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: shl_v32i8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvsll.b $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <32 x i8>, ptr %a0 -+ %v1 = load <32 x i8>, ptr %a1 -+ %v2 = shl <32 x i8> %v0, %v1 -+ store <32 x i8> %v2, ptr %res -+ ret void -+} -+ -+define void @shl_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: shl_v16i16: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvsll.h $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i16>, ptr %a0 -+ %v1 = load <16 x i16>, ptr %a1 -+ %v2 = shl <16 x i16> %v0, %v1 -+ store <16 x i16> %v2, ptr %res -+ ret void -+} -+ -+define void @shl_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: shl_v8i32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvsll.w $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i32>, ptr %a0 -+ %v1 = load <8 x i32>, ptr %a1 -+ %v2 = shl <8 x i32> %v0, %v1 -+ store <8 x i32> %v2, ptr %res -+ ret void -+} -+ -+define void @shl_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: shl_v4i64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvsll.d $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i64>, ptr %a0 -+ %v1 = load <4 x i64>, ptr %a1 -+ %v2 = shl <4 x i64> %v0, %v1 -+ store <4 x i64> %v2, ptr %res -+ ret void -+} -+ -+define void @shl_v32i8_1(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: shl_v32i8_1: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvslli.b $xr0, $xr0, 1 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <32 x i8>, ptr %a0 -+ %v1 = shl <32 x i8> %v0, -+ store <32 x i8> %v1, ptr %res -+ ret void -+} -+ -+define void @shl_v32i8_7(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: shl_v32i8_7: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvslli.b $xr0, $xr0, 7 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <32 x i8>, ptr %a0 -+ %v1 = shl <32 x i8> %v0, -+ store <32 x i8> %v1, ptr %res -+ ret void -+} -+ -+define void @shl_v16i16_1(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: shl_v16i16_1: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvslli.h $xr0, $xr0, 1 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i16>, ptr %a0 -+ %v1 = shl <16 x i16> %v0, -+ store <16 x i16> %v1, ptr %res -+ ret void -+} -+ -+define void @shl_v16i16_15(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: shl_v16i16_15: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvslli.h $xr0, $xr0, 15 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i16>, ptr %a0 -+ %v1 = shl <16 x i16> %v0, -+ store <16 x i16> %v1, ptr %res -+ ret void -+} -+ -+define void @shl_v8i32_1(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: shl_v8i32_1: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvslli.w $xr0, $xr0, 1 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i32>, ptr %a0 -+ %v1 = shl <8 x i32> %v0, -+ store <8 x i32> %v1, ptr %res -+ ret void -+} -+ -+define void @shl_v8i32_31(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: shl_v8i32_31: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvslli.w $xr0, $xr0, 31 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i32>, ptr %a0 -+ %v1 = shl <8 x i32> %v0, -+ store <8 x i32> %v1, ptr %res -+ ret void -+} -+ -+define void @shl_v4i64_1(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: shl_v4i64_1: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvslli.d $xr0, $xr0, 1 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i64>, ptr %a0 -+ %v1 = shl <4 x i64> %v0, -+ store <4 x i64> %v1, ptr %res -+ ret void -+} -+ -+define void @shl_v4i64_63(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: shl_v4i64_63: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvslli.d $xr0, $xr0, 63 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i64>, ptr %a0 -+ %v1 = shl <4 x i64> %v0, -+ store <4 x i64> %v1, ptr %res -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sub.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sub.ll -new file mode 100644 -index 000000000000..bcfff1651477 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sub.ll -@@ -0,0 +1,122 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+define void @sub_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: sub_v32i8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvsub.b $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <32 x i8>, ptr %a0 -+ %v1 = load <32 x i8>, ptr %a1 -+ %v2 = sub <32 x i8> %v0, %v1 -+ store <32 x i8> %v2, ptr %res -+ ret void -+} -+ -+define void @sub_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: sub_v16i16: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvsub.h $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i16>, ptr %a0 -+ %v1 = load <16 x i16>, ptr %a1 -+ %v2 = sub <16 x i16> %v0, %v1 -+ store <16 x i16> %v2, ptr %res -+ ret void -+} -+ -+define void @sub_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: sub_v8i32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvsub.w $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i32>, ptr %a0 -+ %v1 = load <8 x i32>, ptr %a1 -+ %v2 = sub <8 x i32> %v0, %v1 -+ store <8 x i32> %v2, ptr %res -+ ret void -+} -+ -+define void @sub_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: sub_v4i64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvsub.d $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i64>, ptr %a0 -+ %v1 = load <4 x i64>, ptr %a1 -+ %v2 = sub <4 x i64> %v0, %v1 -+ store <4 x i64> %v2, ptr %res -+ ret void -+} -+ -+define void @sub_v32i8_31(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: sub_v32i8_31: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvsubi.bu $xr0, $xr0, 31 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <32 x i8>, ptr %a0 -+ %v1 = sub <32 x i8> %v0, -+ store <32 x i8> %v1, ptr %res -+ ret void -+} -+ -+define void @sub_v16i16_31(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: sub_v16i16_31: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvsubi.hu $xr0, $xr0, 31 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i16>, ptr %a0 -+ %v1 = sub <16 x i16> %v0, -+ store <16 x i16> %v1, ptr %res -+ ret void -+} -+ -+define void @sub_v8i32_31(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: sub_v8i32_31: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvsubi.wu $xr0, $xr0, 31 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i32>, ptr %a0 -+ %v1 = sub <8 x i32> %v0, -+ store <8 x i32> %v1, ptr %res -+ ret void -+} -+ -+define void @sub_v4i64_31(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: sub_v4i64_31: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvsubi.du $xr0, $xr0, 31 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i64>, ptr %a0 -+ %v1 = sub <4 x i64> %v0, -+ store <4 x i64> %v1, ptr %res -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/udiv.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/udiv.ll -new file mode 100644 -index 000000000000..e78084c7186d ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/udiv.ll -@@ -0,0 +1,122 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+define void @udiv_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: udiv_v32i8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvdiv.bu $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <32 x i8>, ptr %a0 -+ %v1 = load <32 x i8>, ptr %a1 -+ %v2 = udiv <32 x i8> %v0, %v1 -+ store <32 x i8> %v2, ptr %res -+ ret void -+} -+ -+define void @udiv_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: udiv_v16i16: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvdiv.hu $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i16>, ptr %a0 -+ %v1 = load <16 x i16>, ptr %a1 -+ %v2 = udiv <16 x i16> %v0, %v1 -+ store <16 x i16> %v2, ptr %res -+ ret void -+} -+ -+define void @udiv_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: udiv_v8i32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvdiv.wu $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i32>, ptr %a0 -+ %v1 = load <8 x i32>, ptr %a1 -+ %v2 = udiv <8 x i32> %v0, %v1 -+ store <8 x i32> %v2, ptr %res -+ ret void -+} -+ -+define void @udiv_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: udiv_v4i64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvdiv.du $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i64>, ptr %a0 -+ %v1 = load <4 x i64>, ptr %a1 -+ %v2 = udiv <4 x i64> %v0, %v1 -+ store <4 x i64> %v2, ptr %res -+ ret void -+} -+ -+define void @udiv_v32i8_8(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: udiv_v32i8_8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvsrli.b $xr0, $xr0, 3 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <32 x i8>, ptr %a0 -+ %v1 = udiv <32 x i8> %v0, -+ store <32 x i8> %v1, ptr %res -+ ret void -+} -+ -+define void @udiv_v16i16_8(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: udiv_v16i16_8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvsrli.h $xr0, $xr0, 3 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i16>, ptr %a0 -+ %v1 = udiv <16 x i16> %v0, -+ store <16 x i16> %v1, ptr %res -+ ret void -+} -+ -+define void @udiv_v8i32_8(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: udiv_v8i32_8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvsrli.w $xr0, $xr0, 3 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i32>, ptr %a0 -+ %v1 = udiv <8 x i32> %v0, -+ store <8 x i32> %v1, ptr %res -+ ret void -+} -+ -+define void @udiv_v4i64_8(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: udiv_v4i64_8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvsrli.d $xr0, $xr0, 3 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i64>, ptr %a0 -+ %v1 = udiv <4 x i64> %v0, -+ store <4 x i64> %v1, ptr %res -+ ret void -+} --- -2.20.1 - - -From 1b20d45ced302fa921b54294758687bc2c1df220 Mon Sep 17 00:00:00 2001 -From: wanglei -Date: Sat, 2 Dec 2023 14:25:17 +0800 -Subject: [PATCH 19/35] [LoongArch] Override TargetLowering::isShuffleMaskLegal - - By default, `isShuffleMaskLegal` always returns true, which can result - in the expansion of `BUILD_VECTOR` into a `VECTOR_SHUFFLE` node in - certain situations. Subsequently, the `VECTOR_SHUFFLE` node is expanded - again into a `BUILD_VECTOR`, leading to an infinite loop. - To address this, we always return false, allowing the expansion of - `BUILD_VECTOR` through the stack. - -(cherry picked from commit 66a3e4fafb6eae19764f8a192ca3a116c0554211) ---- - .../LoongArch/LoongArchISelLowering.cpp | 10 +++++++++ - .../Target/LoongArch/LoongArchISelLowering.h | 5 +++++ - .../CodeGen/LoongArch/lsx/build-vector.ll | 22 +++++++++++++++++++ - 3 files changed, 37 insertions(+) - -diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -index 1b60bfc3bddb..e45f21265d7b 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -@@ -239,6 +239,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, - setOperationAction(ISD::BUILD_VECTOR, VT, Custom); - } - for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) { -+ setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); - setOperationAction({ISD::ADD, ISD::SUB}, VT, Legal); - setOperationAction({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, VT, - Legal); -@@ -268,6 +269,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, - setOperationAction(ISD::BUILD_VECTOR, VT, Custom); - } - for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) { -+ setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); - setOperationAction({ISD::ADD, ISD::SUB}, VT, Legal); - setOperationAction({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, VT, - Legal); -@@ -370,10 +372,18 @@ SDValue LoongArchTargetLowering::LowerOperation(SDValue Op, - return lowerINSERT_VECTOR_ELT(Op, DAG); - case ISD::BUILD_VECTOR: - return lowerBUILD_VECTOR(Op, DAG); -+ case ISD::VECTOR_SHUFFLE: -+ return lowerVECTOR_SHUFFLE(Op, DAG); - } - return SDValue(); - } - -+SDValue LoongArchTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op, -+ SelectionDAG &DAG) const { -+ // TODO: custom shuffle. -+ return SDValue(); -+} -+ - static bool isConstantOrUndef(const SDValue Op) { - if (Op->isUndef()) - return true; -diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h -index 111376306374..2c35f9e5d378 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h -+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h -@@ -230,6 +230,10 @@ public: - MachineMemOperand::Flags Flags = MachineMemOperand::MONone, - unsigned *Fast = nullptr) const override; - -+ bool isShuffleMaskLegal(ArrayRef Mask, EVT VT) const override { -+ return false; -+ } -+ - private: - /// Target-specific function used to lower LoongArch calling conventions. - typedef bool LoongArchCCAssignFn(const DataLayout &DL, LoongArchABI::ABI ABI, -@@ -277,6 +281,7 @@ private: - SDValue lowerWRITE_REGISTER(SDValue Op, SelectionDAG &DAG) const; - SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; - SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; -+ SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; - - bool isFPImmLegal(const APFloat &Imm, EVT VT, - bool ForCodeSize) const override; -diff --git a/llvm/test/CodeGen/LoongArch/lsx/build-vector.ll b/llvm/test/CodeGen/LoongArch/lsx/build-vector.ll -index 3a74db5e1acb..ed1f610a5fa6 100644 ---- a/llvm/test/CodeGen/LoongArch/lsx/build-vector.ll -+++ b/llvm/test/CodeGen/LoongArch/lsx/build-vector.ll -@@ -374,3 +374,25 @@ entry: - store <2 x double> %ins1, ptr %dst - ret void - } -+ -+;; BUILD_VECTOR through stack. -+;; If `isShuffleMaskLegal` returns true, it will lead to an infinite loop. -+define void @extract1_i32_zext_insert0_i64_undef(ptr %src, ptr %dst) nounwind { -+; CHECK-LABEL: extract1_i32_zext_insert0_i64_undef: -+; CHECK: # %bb.0: -+; CHECK-NEXT: addi.d $sp, $sp, -16 -+; CHECK-NEXT: vld $vr0, $a0, 0 -+; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 1 -+; CHECK-NEXT: bstrpick.d $a0, $a0, 31, 0 -+; CHECK-NEXT: st.d $a0, $sp, 0 -+; CHECK-NEXT: vld $vr0, $sp, 0 -+; CHECK-NEXT: vst $vr0, $a1, 0 -+; CHECK-NEXT: addi.d $sp, $sp, 16 -+; CHECK-NEXT: ret -+ %v = load volatile <4 x i32>, ptr %src -+ %e = extractelement <4 x i32> %v, i32 1 -+ %z = zext i32 %e to i64 -+ %r = insertelement <2 x i64> undef, i64 %z, i32 0 -+ store <2 x i64> %r, ptr %dst -+ ret void -+} --- -2.20.1 - - -From aa1ff5f878a37004975a017d84b2e87df0ea8235 Mon Sep 17 00:00:00 2001 -From: wanglei -Date: Sat, 2 Dec 2023 16:24:33 +0800 -Subject: [PATCH 20/35] Reland "[LoongArch] Support CTLZ with lsx/lasx" - -This patch simultaneously adds tests for `CTPOP`. - -This relands 07cec73dcd095035257eec1f213d273b10988130 with fix tests. - -(cherry picked from commit a60a5421b60be1bce0272385fa16846ada5eed5e) ---- - .../LoongArch/LoongArchISelLowering.cpp | 13 +- - .../LoongArch/LoongArchLASXInstrInfo.td | 11 +- - .../Target/LoongArch/LoongArchLSXInstrInfo.td | 11 +- - .../test/CodeGen/LoongArch/lasx/ctpop-ctlz.ll | 115 ++++++++++++++++++ - llvm/test/CodeGen/LoongArch/lsx/ctpop-ctlz.ll | 115 ++++++++++++++++++ - 5 files changed, 255 insertions(+), 10 deletions(-) - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ctpop-ctlz.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ctpop-ctlz.ll - -diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -index e45f21265d7b..358263b1a258 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -@@ -247,7 +247,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, - VT, Legal); - setOperationAction({ISD::AND, ISD::OR, ISD::XOR}, VT, Legal); - setOperationAction({ISD::SHL, ISD::SRA, ISD::SRL}, VT, Legal); -- setOperationAction(ISD::CTPOP, VT, Legal); -+ setOperationAction({ISD::CTPOP, ISD::CTLZ}, VT, Legal); - } - for (MVT VT : {MVT::v4f32, MVT::v2f64}) { - setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal); -@@ -277,7 +277,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, - VT, Legal); - setOperationAction({ISD::AND, ISD::OR, ISD::XOR}, VT, Legal); - setOperationAction({ISD::SHL, ISD::SRA, ISD::SRL}, VT, Legal); -- setOperationAction(ISD::CTPOP, VT, Legal); -+ setOperationAction({ISD::CTPOP, ISD::CTLZ}, VT, Legal); - } - for (MVT VT : {MVT::v8f32, MVT::v4f64}) { - setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal); -@@ -2800,6 +2800,15 @@ performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, - case Intrinsic::loongarch_lasx_xvsrai_d: - return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1), - lowerVectorSplatImm<6>(N, 2, DAG)); -+ case Intrinsic::loongarch_lsx_vclz_b: -+ case Intrinsic::loongarch_lsx_vclz_h: -+ case Intrinsic::loongarch_lsx_vclz_w: -+ case Intrinsic::loongarch_lsx_vclz_d: -+ case Intrinsic::loongarch_lasx_xvclz_b: -+ case Intrinsic::loongarch_lasx_xvclz_h: -+ case Intrinsic::loongarch_lasx_xvclz_w: -+ case Intrinsic::loongarch_lasx_xvclz_d: -+ return DAG.getNode(ISD::CTLZ, DL, N->getValueType(0), N->getOperand(1)); - case Intrinsic::loongarch_lsx_vpcnt_b: - case Intrinsic::loongarch_lsx_vpcnt_h: - case Intrinsic::loongarch_lsx_vpcnt_w: -diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -index a5652472481a..960ac627578c 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -@@ -1273,6 +1273,9 @@ defm : PatXrXr; - defm : PatShiftXrXr; - defm : PatShiftXrUimm; - -+// XVCLZ_{B/H/W/D} -+defm : PatXr; -+ - // XVPCNT_{B/H/W/D} - defm : PatXr; - -@@ -1590,26 +1593,26 @@ foreach Inst = ["XVMADDWEV_Q_D", "XVMADDWOD_Q_D", "XVMADDWEV_Q_DU", - // (LAInst vty:$xj)>; - foreach Inst = ["XVEXTH_H_B", "XVEXTH_HU_BU", - "XVMSKLTZ_B", "XVMSKGEZ_B", "XVMSKNZ_B", -- "XVCLO_B", "XVCLZ_B", "VEXT2XV_H_B", "VEXT2XV_HU_BU", -+ "XVCLO_B", "VEXT2XV_H_B", "VEXT2XV_HU_BU", - "VEXT2XV_W_B", "VEXT2XV_WU_BU", "VEXT2XV_D_B", - "VEXT2XV_DU_BU", "XVREPLVE0_B", "XVREPLVE0_Q"] in - def : Pat<(deriveLASXIntrinsic.ret (v32i8 LASX256:$xj)), - (!cast(Inst) LASX256:$xj)>; - foreach Inst = ["XVEXTH_W_H", "XVEXTH_WU_HU", "XVMSKLTZ_H", -- "XVCLO_H", "XVCLZ_H", "XVFCVTL_S_H", "XVFCVTH_S_H", -+ "XVCLO_H", "XVFCVTL_S_H", "XVFCVTH_S_H", - "VEXT2XV_W_H", "VEXT2XV_WU_HU", "VEXT2XV_D_H", - "VEXT2XV_DU_HU", "XVREPLVE0_H"] in - def : Pat<(deriveLASXIntrinsic.ret (v16i16 LASX256:$xj)), - (!cast(Inst) LASX256:$xj)>; - foreach Inst = ["XVEXTH_D_W", "XVEXTH_DU_WU", "XVMSKLTZ_W", -- "XVCLO_W", "XVCLZ_W", "XVFFINT_S_W", "XVFFINT_S_WU", -+ "XVCLO_W", "XVFFINT_S_W", "XVFFINT_S_WU", - "XVFFINTL_D_W", "XVFFINTH_D_W", - "VEXT2XV_D_W", "VEXT2XV_DU_WU", "XVREPLVE0_W"] in - def : Pat<(deriveLASXIntrinsic.ret (v8i32 LASX256:$xj)), - (!cast(Inst) LASX256:$xj)>; - foreach Inst = ["XVEXTH_Q_D", "XVEXTH_QU_DU", "XVMSKLTZ_D", - "XVEXTL_Q_D", "XVEXTL_QU_DU", -- "XVCLO_D", "XVCLZ_D", "XVFFINT_D_L", "XVFFINT_D_LU", -+ "XVCLO_D", "XVFFINT_D_L", "XVFFINT_D_LU", - "XVREPLVE0_D"] in - def : Pat<(deriveLASXIntrinsic.ret (v4i64 LASX256:$xj)), - (!cast(Inst) LASX256:$xj)>; -diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -index 5645ce51194a..3480ade9eebf 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -+++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -@@ -1350,6 +1350,9 @@ defm : PatVrVr; - defm : PatShiftVrVr; - defm : PatShiftVrUimm; - -+// VCLZ_{B/H/W/D} -+defm : PatVr; -+ - // VPCNT_{B/H/W/D} - defm : PatVr; - -@@ -1674,21 +1677,21 @@ foreach Inst = ["VMADDWEV_Q_D", "VMADDWOD_Q_D", "VMADDWEV_Q_DU", - // (LAInst vty:$vj)>; - foreach Inst = ["VEXTH_H_B", "VEXTH_HU_BU", - "VMSKLTZ_B", "VMSKGEZ_B", "VMSKNZ_B", -- "VCLO_B", "VCLZ_B"] in -+ "VCLO_B"] in - def : Pat<(deriveLSXIntrinsic.ret (v16i8 LSX128:$vj)), - (!cast(Inst) LSX128:$vj)>; - foreach Inst = ["VEXTH_W_H", "VEXTH_WU_HU", "VMSKLTZ_H", -- "VCLO_H", "VCLZ_H", "VFCVTL_S_H", "VFCVTH_S_H"] in -+ "VCLO_H", "VFCVTL_S_H", "VFCVTH_S_H"] in - def : Pat<(deriveLSXIntrinsic.ret (v8i16 LSX128:$vj)), - (!cast(Inst) LSX128:$vj)>; - foreach Inst = ["VEXTH_D_W", "VEXTH_DU_WU", "VMSKLTZ_W", -- "VCLO_W", "VCLZ_W", "VFFINT_S_W", "VFFINT_S_WU", -+ "VCLO_W", "VFFINT_S_W", "VFFINT_S_WU", - "VFFINTL_D_W", "VFFINTH_D_W"] in - def : Pat<(deriveLSXIntrinsic.ret (v4i32 LSX128:$vj)), - (!cast(Inst) LSX128:$vj)>; - foreach Inst = ["VEXTH_Q_D", "VEXTH_QU_DU", "VMSKLTZ_D", - "VEXTL_Q_D", "VEXTL_QU_DU", -- "VCLO_D", "VCLZ_D", "VFFINT_D_L", "VFFINT_D_LU"] in -+ "VCLO_D", "VFFINT_D_L", "VFFINT_D_LU"] in - def : Pat<(deriveLSXIntrinsic.ret (v2i64 LSX128:$vj)), - (!cast(Inst) LSX128:$vj)>; - -diff --git a/llvm/test/CodeGen/LoongArch/lasx/ctpop-ctlz.ll b/llvm/test/CodeGen/LoongArch/lasx/ctpop-ctlz.ll -new file mode 100644 -index 000000000000..7786e399c95f ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/ctpop-ctlz.ll -@@ -0,0 +1,115 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+define void @ctpop_v32i8(ptr %src, ptr %dst) nounwind { -+; CHECK-LABEL: ctpop_v32i8: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a0, 0 -+; CHECK-NEXT: xvpcnt.b $xr0, $xr0 -+; CHECK-NEXT: xvst $xr0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load <32 x i8>, ptr %src -+ %res = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %v) -+ store <32 x i8> %res, ptr %dst -+ ret void -+} -+ -+define void @ctpop_v16i16(ptr %src, ptr %dst) nounwind { -+; CHECK-LABEL: ctpop_v16i16: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a0, 0 -+; CHECK-NEXT: xvpcnt.h $xr0, $xr0 -+; CHECK-NEXT: xvst $xr0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load <16 x i16>, ptr %src -+ %res = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %v) -+ store <16 x i16> %res, ptr %dst -+ ret void -+} -+ -+define void @ctpop_v8i32(ptr %src, ptr %dst) nounwind { -+; CHECK-LABEL: ctpop_v8i32: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a0, 0 -+; CHECK-NEXT: xvpcnt.w $xr0, $xr0 -+; CHECK-NEXT: xvst $xr0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load <8 x i32>, ptr %src -+ %res = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %v) -+ store <8 x i32> %res, ptr %dst -+ ret void -+} -+ -+define void @ctpop_v4i64(ptr %src, ptr %dst) nounwind { -+; CHECK-LABEL: ctpop_v4i64: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a0, 0 -+; CHECK-NEXT: xvpcnt.d $xr0, $xr0 -+; CHECK-NEXT: xvst $xr0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load <4 x i64>, ptr %src -+ %res = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %v) -+ store <4 x i64> %res, ptr %dst -+ ret void -+} -+ -+define void @ctlz_v32i8(ptr %src, ptr %dst) nounwind { -+; CHECK-LABEL: ctlz_v32i8: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a0, 0 -+; CHECK-NEXT: xvclz.b $xr0, $xr0 -+; CHECK-NEXT: xvst $xr0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load <32 x i8>, ptr %src -+ %res = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %v, i1 false) -+ store <32 x i8> %res, ptr %dst -+ ret void -+} -+ -+define void @ctlz_v16i16(ptr %src, ptr %dst) nounwind { -+; CHECK-LABEL: ctlz_v16i16: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a0, 0 -+; CHECK-NEXT: xvclz.h $xr0, $xr0 -+; CHECK-NEXT: xvst $xr0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load <16 x i16>, ptr %src -+ %res = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %v, i1 false) -+ store <16 x i16> %res, ptr %dst -+ ret void -+} -+ -+define void @ctlz_v8i32(ptr %src, ptr %dst) nounwind { -+; CHECK-LABEL: ctlz_v8i32: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a0, 0 -+; CHECK-NEXT: xvclz.w $xr0, $xr0 -+; CHECK-NEXT: xvst $xr0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load <8 x i32>, ptr %src -+ %res = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %v, i1 false) -+ store <8 x i32> %res, ptr %dst -+ ret void -+} -+ -+define void @ctlz_v4i64(ptr %src, ptr %dst) nounwind { -+; CHECK-LABEL: ctlz_v4i64: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a0, 0 -+; CHECK-NEXT: xvclz.d $xr0, $xr0 -+; CHECK-NEXT: xvst $xr0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load <4 x i64>, ptr %src -+ %res = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %v, i1 false) -+ store <4 x i64> %res, ptr %dst -+ ret void -+} -+ -+declare <32 x i8> @llvm.ctpop.v32i8(<32 x i8>) -+declare <16 x i16> @llvm.ctpop.v16i16(<16 x i16>) -+declare <8 x i32> @llvm.ctpop.v8i32(<8 x i32>) -+declare <4 x i64> @llvm.ctpop.v4i64(<4 x i64>) -+declare <32 x i8> @llvm.ctlz.v32i8(<32 x i8>, i1) -+declare <16 x i16> @llvm.ctlz.v16i16(<16 x i16>, i1) -+declare <8 x i32> @llvm.ctlz.v8i32(<8 x i32>, i1) -+declare <4 x i64> @llvm.ctlz.v4i64(<4 x i64>, i1) -diff --git a/llvm/test/CodeGen/LoongArch/lsx/ctpop-ctlz.ll b/llvm/test/CodeGen/LoongArch/lsx/ctpop-ctlz.ll -new file mode 100644 -index 000000000000..5df553fba7ef ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/ctpop-ctlz.ll -@@ -0,0 +1,115 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+define void @ctpop_v16i8(ptr %src, ptr %dst) nounwind { -+; CHECK-LABEL: ctpop_v16i8: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a0, 0 -+; CHECK-NEXT: vpcnt.b $vr0, $vr0 -+; CHECK-NEXT: vst $vr0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load <16 x i8>, ptr %src -+ %res = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %v) -+ store <16 x i8> %res, ptr %dst -+ ret void -+} -+ -+define void @ctpop_v8i16(ptr %src, ptr %dst) nounwind { -+; CHECK-LABEL: ctpop_v8i16: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a0, 0 -+; CHECK-NEXT: vpcnt.h $vr0, $vr0 -+; CHECK-NEXT: vst $vr0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load <8 x i16>, ptr %src -+ %res = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %v) -+ store <8 x i16> %res, ptr %dst -+ ret void -+} -+ -+define void @ctpop_v4i32(ptr %src, ptr %dst) nounwind { -+; CHECK-LABEL: ctpop_v4i32: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a0, 0 -+; CHECK-NEXT: vpcnt.w $vr0, $vr0 -+; CHECK-NEXT: vst $vr0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load <4 x i32>, ptr %src -+ %res = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %v) -+ store <4 x i32> %res, ptr %dst -+ ret void -+} -+ -+define void @ctpop_v2i64(ptr %src, ptr %dst) nounwind { -+; CHECK-LABEL: ctpop_v2i64: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a0, 0 -+; CHECK-NEXT: vpcnt.d $vr0, $vr0 -+; CHECK-NEXT: vst $vr0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load <2 x i64>, ptr %src -+ %res = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %v) -+ store <2 x i64> %res, ptr %dst -+ ret void -+} -+ -+define void @ctlz_v16i8(ptr %src, ptr %dst) nounwind { -+; CHECK-LABEL: ctlz_v16i8: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a0, 0 -+; CHECK-NEXT: vclz.b $vr0, $vr0 -+; CHECK-NEXT: vst $vr0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load <16 x i8>, ptr %src -+ %res = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %v, i1 false) -+ store <16 x i8> %res, ptr %dst -+ ret void -+} -+ -+define void @ctlz_v8i16(ptr %src, ptr %dst) nounwind { -+; CHECK-LABEL: ctlz_v8i16: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a0, 0 -+; CHECK-NEXT: vclz.h $vr0, $vr0 -+; CHECK-NEXT: vst $vr0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load <8 x i16>, ptr %src -+ %res = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %v, i1 false) -+ store <8 x i16> %res, ptr %dst -+ ret void -+} -+ -+define void @ctlz_v4i32(ptr %src, ptr %dst) nounwind { -+; CHECK-LABEL: ctlz_v4i32: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a0, 0 -+; CHECK-NEXT: vclz.w $vr0, $vr0 -+; CHECK-NEXT: vst $vr0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load <4 x i32>, ptr %src -+ %res = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %v, i1 false) -+ store <4 x i32> %res, ptr %dst -+ ret void -+} -+ -+define void @ctlz_v2i64(ptr %src, ptr %dst) nounwind { -+; CHECK-LABEL: ctlz_v2i64: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a0, 0 -+; CHECK-NEXT: vclz.d $vr0, $vr0 -+; CHECK-NEXT: vst $vr0, $a1, 0 -+; CHECK-NEXT: ret -+ %v = load <2 x i64>, ptr %src -+ %res = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %v, i1 false) -+ store <2 x i64> %res, ptr %dst -+ ret void -+} -+ -+declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8>) -+declare <8 x i16> @llvm.ctpop.v8i16(<8 x i16>) -+declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32>) -+declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>) -+declare <16 x i8> @llvm.ctlz.v16i8(<16 x i8>, i1) -+declare <8 x i16> @llvm.ctlz.v8i16(<8 x i16>, i1) -+declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1) -+declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>, i1) --- -2.20.1 - - -From aa55afe1a5c74c325f009c58f48645107fd95e11 Mon Sep 17 00:00:00 2001 -From: wanglei -Date: Mon, 4 Dec 2023 10:44:39 +0800 -Subject: [PATCH 21/35] [LoongArch] Support MULHS/MULHU with lsx/lasx - -Mark MULHS/MULHU nodes as legal and adds the necessary patterns. - -(cherry picked from commit e9cd197d15300f186a5a32092103add65fbd3f50) ---- - .../LoongArch/LoongArchISelLowering.cpp | 2 + - .../LoongArch/LoongArchLASXInstrInfo.td | 4 + - .../Target/LoongArch/LoongArchLSXInstrInfo.td | 4 + - llvm/test/CodeGen/LoongArch/lasx/mulh.ll | 162 ++++++++++++++++++ - llvm/test/CodeGen/LoongArch/lsx/mulh.ll | 162 ++++++++++++++++++ - 5 files changed, 334 insertions(+) - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/mulh.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/mulh.ll - -diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -index 358263b1a258..3d8d6898a4d5 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -@@ -248,6 +248,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, - setOperationAction({ISD::AND, ISD::OR, ISD::XOR}, VT, Legal); - setOperationAction({ISD::SHL, ISD::SRA, ISD::SRL}, VT, Legal); - setOperationAction({ISD::CTPOP, ISD::CTLZ}, VT, Legal); -+ setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Legal); - } - for (MVT VT : {MVT::v4f32, MVT::v2f64}) { - setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal); -@@ -278,6 +279,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, - setOperationAction({ISD::AND, ISD::OR, ISD::XOR}, VT, Legal); - setOperationAction({ISD::SHL, ISD::SRA, ISD::SRL}, VT, Legal); - setOperationAction({ISD::CTPOP, ISD::CTLZ}, VT, Legal); -+ setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Legal); - } - for (MVT VT : {MVT::v8f32, MVT::v4f64}) { - setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal); -diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -index 960ac627578c..240f28b0dc5a 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -@@ -1217,6 +1217,10 @@ defm : PatXrUimm5; - // XVMUL_{B/H/W/D} - defm : PatXrXr; - -+// XVMUH_{B/H/W/D}[U] -+defm : PatXrXr; -+defm : PatXrXrU; -+ - // XVMADD_{B/H/W/D} - defm : PatXrXrXr; - // XVMSUB_{B/H/W/D} -diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -index 3480ade9eebf..fb4726c530b5 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -+++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -@@ -1294,6 +1294,10 @@ defm : PatVrUimm5; - // VMUL_{B/H/W/D} - defm : PatVrVr; - -+// VMUH_{B/H/W/D}[U] -+defm : PatVrVr; -+defm : PatVrVrU; -+ - // VMADD_{B/H/W/D} - defm : PatVrVrVr; - // VMSUB_{B/H/W/D} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/mulh.ll b/llvm/test/CodeGen/LoongArch/lasx/mulh.ll -new file mode 100644 -index 000000000000..aac711a4a371 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/mulh.ll -@@ -0,0 +1,162 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+define void @mulhs_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: mulhs_v32i8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvmuh.b $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <32 x i8>, ptr %a0 -+ %v1 = load <32 x i8>, ptr %a1 -+ %v0s = sext <32 x i8> %v0 to <32 x i16> -+ %v1s = sext <32 x i8> %v1 to <32 x i16> -+ %m = mul <32 x i16> %v0s, %v1s -+ %s = ashr <32 x i16> %m, -+ %v2 = trunc <32 x i16> %s to <32 x i8> -+ store <32 x i8> %v2, ptr %res -+ ret void -+} -+ -+define void @mulhu_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: mulhu_v32i8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvmuh.bu $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <32 x i8>, ptr %a0 -+ %v1 = load <32 x i8>, ptr %a1 -+ %v0z = zext <32 x i8> %v0 to <32 x i16> -+ %v1z = zext <32 x i8> %v1 to <32 x i16> -+ %m = mul <32 x i16> %v0z, %v1z -+ %s = lshr <32 x i16> %m, -+ %v2 = trunc <32 x i16> %s to <32 x i8> -+ store <32 x i8> %v2, ptr %res -+ ret void -+} -+ -+define void @mulhs_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: mulhs_v16i16: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvmuh.h $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i16>, ptr %a0 -+ %v1 = load <16 x i16>, ptr %a1 -+ %v0s = sext <16 x i16> %v0 to <16 x i32> -+ %v1s = sext <16 x i16> %v1 to <16 x i32> -+ %m = mul <16 x i32> %v0s, %v1s -+ %s = ashr <16 x i32> %m, -+ %v2 = trunc <16 x i32> %s to <16 x i16> -+ store <16 x i16> %v2, ptr %res -+ ret void -+} -+ -+define void @mulhu_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: mulhu_v16i16: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvmuh.hu $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i16>, ptr %a0 -+ %v1 = load <16 x i16>, ptr %a1 -+ %v0z = zext <16 x i16> %v0 to <16 x i32> -+ %v1z = zext <16 x i16> %v1 to <16 x i32> -+ %m = mul <16 x i32> %v0z, %v1z -+ %s = lshr <16 x i32> %m, -+ %v2 = trunc <16 x i32> %s to <16 x i16> -+ store <16 x i16> %v2, ptr %res -+ ret void -+} -+ -+define void @mulhs_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: mulhs_v8i32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvmuh.w $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i32>, ptr %a0 -+ %v1 = load <8 x i32>, ptr %a1 -+ %v0s = sext <8 x i32> %v0 to <8 x i64> -+ %v1s = sext <8 x i32> %v1 to <8 x i64> -+ %m = mul <8 x i64> %v0s, %v1s -+ %s = ashr <8 x i64> %m, -+ %v2 = trunc <8 x i64> %s to <8 x i32> -+ store <8 x i32> %v2, ptr %res -+ ret void -+} -+ -+define void @mulhu_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: mulhu_v8i32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvmuh.wu $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i32>, ptr %a0 -+ %v1 = load <8 x i32>, ptr %a1 -+ %v0z = zext <8 x i32> %v0 to <8 x i64> -+ %v1z = zext <8 x i32> %v1 to <8 x i64> -+ %m = mul <8 x i64> %v0z, %v1z -+ %s = lshr <8 x i64> %m, -+ %v2 = trunc <8 x i64> %s to <8 x i32> -+ store <8 x i32> %v2, ptr %res -+ ret void -+} -+ -+define void @mulhs_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: mulhs_v4i64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvmuh.d $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i64>, ptr %a0 -+ %v1 = load <4 x i64>, ptr %a1 -+ %v0s = sext <4 x i64> %v0 to <4 x i128> -+ %v1s = sext <4 x i64> %v1 to <4 x i128> -+ %m = mul <4 x i128> %v0s, %v1s -+ %s = ashr <4 x i128> %m, -+ %v2 = trunc <4 x i128> %s to <4 x i64> -+ store <4 x i64> %v2, ptr %res -+ ret void -+} -+ -+define void @mulhu_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: mulhu_v4i64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvmuh.du $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i64>, ptr %a0 -+ %v1 = load <4 x i64>, ptr %a1 -+ %v0z = zext <4 x i64> %v0 to <4 x i128> -+ %v1z = zext <4 x i64> %v1 to <4 x i128> -+ %m = mul <4 x i128> %v0z, %v1z -+ %s = lshr <4 x i128> %m, -+ %v2 = trunc <4 x i128> %s to <4 x i64> -+ store <4 x i64> %v2, ptr %res -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/mulh.ll b/llvm/test/CodeGen/LoongArch/lsx/mulh.ll -new file mode 100644 -index 000000000000..e1388f00e355 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/mulh.ll -@@ -0,0 +1,162 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+define void @mulhs_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: mulhs_v16i8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vmuh.b $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i8>, ptr %a0 -+ %v1 = load <16 x i8>, ptr %a1 -+ %v0s = sext <16 x i8> %v0 to <16 x i16> -+ %v1s = sext <16 x i8> %v1 to <16 x i16> -+ %m = mul <16 x i16> %v0s, %v1s -+ %s = ashr <16 x i16> %m, -+ %v2 = trunc <16 x i16> %s to <16 x i8> -+ store <16 x i8> %v2, ptr %res -+ ret void -+} -+ -+define void @mulhu_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: mulhu_v16i8: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vmuh.bu $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <16 x i8>, ptr %a0 -+ %v1 = load <16 x i8>, ptr %a1 -+ %v0z = zext <16 x i8> %v0 to <16 x i16> -+ %v1z = zext <16 x i8> %v1 to <16 x i16> -+ %m = mul <16 x i16> %v0z, %v1z -+ %s = lshr <16 x i16> %m, -+ %v2 = trunc <16 x i16> %s to <16 x i8> -+ store <16 x i8> %v2, ptr %res -+ ret void -+} -+ -+define void @mulhs_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: mulhs_v8i16: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vmuh.h $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i16>, ptr %a0 -+ %v1 = load <8 x i16>, ptr %a1 -+ %v0s = sext <8 x i16> %v0 to <8 x i32> -+ %v1s = sext <8 x i16> %v1 to <8 x i32> -+ %m = mul <8 x i32> %v0s, %v1s -+ %s = ashr <8 x i32> %m, -+ %v2 = trunc <8 x i32> %s to <8 x i16> -+ store <8 x i16> %v2, ptr %res -+ ret void -+} -+ -+define void @mulhu_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: mulhu_v8i16: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vmuh.hu $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x i16>, ptr %a0 -+ %v1 = load <8 x i16>, ptr %a1 -+ %v0z = zext <8 x i16> %v0 to <8 x i32> -+ %v1z = zext <8 x i16> %v1 to <8 x i32> -+ %m = mul <8 x i32> %v0z, %v1z -+ %s = lshr <8 x i32> %m, -+ %v2 = trunc <8 x i32> %s to <8 x i16> -+ store <8 x i16> %v2, ptr %res -+ ret void -+} -+ -+define void @mulhs_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: mulhs_v4i32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vmuh.w $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i32>, ptr %a0 -+ %v1 = load <4 x i32>, ptr %a1 -+ %v0s = sext <4 x i32> %v0 to <4 x i64> -+ %v1s = sext <4 x i32> %v1 to <4 x i64> -+ %m = mul <4 x i64> %v0s, %v1s -+ %s = ashr <4 x i64> %m, -+ %v2 = trunc <4 x i64> %s to <4 x i32> -+ store <4 x i32> %v2, ptr %res -+ ret void -+} -+ -+define void @mulhu_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: mulhu_v4i32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vmuh.wu $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x i32>, ptr %a0 -+ %v1 = load <4 x i32>, ptr %a1 -+ %v0z = zext <4 x i32> %v0 to <4 x i64> -+ %v1z = zext <4 x i32> %v1 to <4 x i64> -+ %m = mul <4 x i64> %v0z, %v1z -+ %s = lshr <4 x i64> %m, -+ %v2 = trunc <4 x i64> %s to <4 x i32> -+ store <4 x i32> %v2, ptr %res -+ ret void -+} -+ -+define void @mulhs_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: mulhs_v2i64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vmuh.d $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <2 x i64>, ptr %a0 -+ %v1 = load <2 x i64>, ptr %a1 -+ %v0s = sext <2 x i64> %v0 to <2 x i128> -+ %v1s = sext <2 x i64> %v1 to <2 x i128> -+ %m = mul <2 x i128> %v0s, %v1s -+ %s = ashr <2 x i128> %m, -+ %v2 = trunc <2 x i128> %s to <2 x i64> -+ store <2 x i64> %v2, ptr %res -+ ret void -+} -+ -+define void @mulhu_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: mulhu_v2i64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vmuh.du $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <2 x i64>, ptr %a0 -+ %v1 = load <2 x i64>, ptr %a1 -+ %v0z = zext <2 x i64> %v0 to <2 x i128> -+ %v1z = zext <2 x i64> %v1 to <2 x i128> -+ %m = mul <2 x i128> %v0z, %v1z -+ %s = lshr <2 x i128> %m, -+ %v2 = trunc <2 x i128> %s to <2 x i64> -+ store <2 x i64> %v2, ptr %res -+ ret void -+} --- -2.20.1 - - -From 7d2d996fdab4fa9279318174f5b8042cc7ace0a6 Mon Sep 17 00:00:00 2001 -From: wanglei -Date: Wed, 6 Dec 2023 16:43:38 +0800 -Subject: [PATCH 22/35] [LoongArch] Make ISD::VSELECT a legal operation with - lsx/lasx - -(cherry picked from commit de21308f78f3b0f0910638dbdac90967150d19f0) ---- - .../LoongArch/LoongArchISelLowering.cpp | 5 ++ - .../LoongArch/LoongArchLASXInstrInfo.td | 8 ++ - .../Target/LoongArch/LoongArchLSXInstrInfo.td | 8 ++ - llvm/test/CodeGen/LoongArch/lasx/vselect.ll | 86 +++++++++++++++++++ - llvm/test/CodeGen/LoongArch/lsx/vselect.ll | 86 +++++++++++++++++++ - 5 files changed, 193 insertions(+) - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/vselect.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/vselect.ll - -diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -index 3d8d6898a4d5..229251987ae4 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -@@ -237,6 +237,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, - setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); - setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal); - setOperationAction(ISD::BUILD_VECTOR, VT, Custom); -+ -+ setOperationAction(ISD::VSELECT, VT, Legal); - } - for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) { - setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); -@@ -268,6 +270,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, - setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); - setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal); - setOperationAction(ISD::BUILD_VECTOR, VT, Custom); -+ -+ setOperationAction(ISD::VSELECT, VT, Legal); - } - for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) { - setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); -@@ -305,6 +309,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, - setStackPointerRegisterToSaveRestore(LoongArch::R3); - - setBooleanContents(ZeroOrOneBooleanContent); -+ setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); - - setMaxAtomicSizeInBitsSupported(Subtarget.getGRLen()); - -diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -index 240f28b0dc5a..0bd8db1bfdf0 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -@@ -1480,6 +1480,14 @@ def : Pat<(f32 (vector_extract v8f32:$xj, i64:$rk)), - def : Pat<(f64 (vector_extract v4f64:$xj, i64:$rk)), - (f64 (EXTRACT_SUBREG (XVREPLVE_D v4f64:$xj, i64:$rk), sub_64))>; - -+// vselect -+def : Pat<(v32i8 (vselect LASX256:$xj, LASX256:$xd, -+ (v32i8 (SplatPat_uimm8 uimm8:$imm)))), -+ (XVBITSELI_B LASX256:$xd, LASX256:$xj, uimm8:$imm)>; -+foreach vt = [v32i8, v16i16, v8i32, v4i64, v8f32, v4f64] in -+ def : Pat<(vt (vselect LASX256:$xa, LASX256:$xk, LASX256:$xj)), -+ (XVBITSEL_V LASX256:$xj, LASX256:$xk, LASX256:$xa)>; -+ - } // Predicates = [HasExtLASX] - - /// Intrinsic pattern -diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -index fb4726c530b5..5800ff6f6266 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -+++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -@@ -1564,6 +1564,14 @@ def : Pat<(f32 (vector_extract v4f32:$vj, i64:$rk)), - def : Pat<(f64 (vector_extract v2f64:$vj, i64:$rk)), - (f64 (EXTRACT_SUBREG (VREPLVE_D v2f64:$vj, i64:$rk), sub_64))>; - -+// vselect -+def : Pat<(v16i8 (vselect LSX128:$vj, LSX128:$vd, -+ (v16i8 (SplatPat_uimm8 uimm8:$imm)))), -+ (VBITSELI_B LSX128:$vd, LSX128:$vj, uimm8:$imm)>; -+foreach vt = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in -+ def : Pat<(vt (vselect LSX128:$va, LSX128:$vk, LSX128:$vj)), -+ (VBITSEL_V LSX128:$vj, LSX128:$vk, LSX128:$va)>; -+ - } // Predicates = [HasExtLSX] - - /// Intrinsic pattern -diff --git a/llvm/test/CodeGen/LoongArch/lasx/vselect.ll b/llvm/test/CodeGen/LoongArch/lasx/vselect.ll -new file mode 100644 -index 000000000000..24f4bcf752d3 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/vselect.ll -@@ -0,0 +1,86 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+define void @select_v32i8_imm(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: select_v32i8_imm: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvrepli.h $xr1, -256 -+; CHECK-NEXT: xvbitseli.b $xr0, $xr1, 1 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <32 x i8>, ptr %a0 -+ %sel = select <32 x i1> , <32 x i8> %v0, <32 x i8> -+ store <32 x i8> %sel, ptr %res -+ ret void -+} -+ -+define void @select_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: select_v32i8: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvld $xr1, $a2, 0 -+; CHECK-NEXT: xvrepli.h $xr2, -256 -+; CHECK-NEXT: xvbitsel.v $xr0, $xr1, $xr0, $xr2 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <32 x i8>, ptr %a0 -+ %v1 = load <32 x i8>, ptr %a1 -+ %sel = select <32 x i1> , <32 x i8> %v0, <32 x i8> %v1 -+ store <32 x i8> %sel, ptr %res -+ ret void -+} -+ -+define void @select_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: select_v16i16: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvld $xr1, $a2, 0 -+; CHECK-NEXT: lu12i.w $a1, -16 -+; CHECK-NEXT: lu32i.d $a1, 0 -+; CHECK-NEXT: xvreplgr2vr.w $xr2, $a1 -+; CHECK-NEXT: xvbitsel.v $xr0, $xr1, $xr0, $xr2 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <16 x i16>, ptr %a0 -+ %v1 = load <16 x i16>, ptr %a1 -+ %sel = select <16 x i1> , <16 x i16> %v0, <16 x i16> %v1 -+ store <16 x i16> %sel, ptr %res -+ ret void -+} -+ -+define void @select_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: select_v8i32: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvld $xr1, $a2, 0 -+; CHECK-NEXT: ori $a1, $zero, 0 -+; CHECK-NEXT: lu32i.d $a1, -1 -+; CHECK-NEXT: xvreplgr2vr.d $xr2, $a1 -+; CHECK-NEXT: xvbitsel.v $xr0, $xr1, $xr0, $xr2 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x i32>, ptr %a0 -+ %v1 = load <8 x i32>, ptr %a1 -+ %sel = select <8 x i1> , <8 x i32> %v0, <8 x i32> %v1 -+ store <8 x i32> %sel, ptr %res -+ ret void -+} -+ -+define void @select_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: select_v4i64: -+; CHECK: # %bb.0: -+; CHECK-NEXT: pcalau12i $a3, %pc_hi20(.LCPI4_0) -+; CHECK-NEXT: addi.d $a3, $a3, %pc_lo12(.LCPI4_0) -+; CHECK-NEXT: xvld $xr0, $a3, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvld $xr2, $a2, 0 -+; CHECK-NEXT: xvbitsel.v $xr0, $xr2, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x i64>, ptr %a0 -+ %v1 = load <4 x i64>, ptr %a1 -+ %sel = select <4 x i1> , <4 x i64> %v0, <4 x i64> %v1 -+ store <4 x i64> %sel, ptr %res -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/vselect.ll b/llvm/test/CodeGen/LoongArch/lsx/vselect.ll -new file mode 100644 -index 000000000000..00e3d9313f13 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/vselect.ll -@@ -0,0 +1,86 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+define void @select_v16i8_imm(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: select_v16i8_imm: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vrepli.h $vr1, -256 -+; CHECK-NEXT: vbitseli.b $vr0, $vr1, 255 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <16 x i8>, ptr %a0 -+ %sel = select <16 x i1> , <16 x i8> %v0, <16 x i8> -+ store <16 x i8> %sel, ptr %res -+ ret void -+} -+ -+define void @select_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: select_v16i8: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vld $vr1, $a2, 0 -+; CHECK-NEXT: vrepli.h $vr2, -256 -+; CHECK-NEXT: vbitsel.v $vr0, $vr1, $vr0, $vr2 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <16 x i8>, ptr %a0 -+ %v1 = load <16 x i8>, ptr %a1 -+ %sel = select <16 x i1> , <16 x i8> %v0, <16 x i8> %v1 -+ store <16 x i8> %sel, ptr %res -+ ret void -+} -+ -+define void @select_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: select_v8i16: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vld $vr1, $a2, 0 -+; CHECK-NEXT: lu12i.w $a1, -16 -+; CHECK-NEXT: lu32i.d $a1, 0 -+; CHECK-NEXT: vreplgr2vr.w $vr2, $a1 -+; CHECK-NEXT: vbitsel.v $vr0, $vr1, $vr0, $vr2 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x i16>, ptr %a0 -+ %v1 = load <8 x i16>, ptr %a1 -+ %sel = select <8 x i1> , <8 x i16> %v0, <8 x i16> %v1 -+ store <8 x i16> %sel, ptr %res -+ ret void -+} -+ -+define void @select_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: select_v4i32: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vld $vr1, $a2, 0 -+; CHECK-NEXT: ori $a1, $zero, 0 -+; CHECK-NEXT: lu32i.d $a1, -1 -+; CHECK-NEXT: vreplgr2vr.d $vr2, $a1 -+; CHECK-NEXT: vbitsel.v $vr0, $vr1, $vr0, $vr2 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x i32>, ptr %a0 -+ %v1 = load <4 x i32>, ptr %a1 -+ %sel = select <4 x i1> , <4 x i32> %v0, <4 x i32> %v1 -+ store <4 x i32> %sel, ptr %res -+ ret void -+} -+ -+define void @select_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: select_v2i64: -+; CHECK: # %bb.0: -+; CHECK-NEXT: pcalau12i $a3, %pc_hi20(.LCPI4_0) -+; CHECK-NEXT: addi.d $a3, $a3, %pc_lo12(.LCPI4_0) -+; CHECK-NEXT: vld $vr0, $a3, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vld $vr2, $a2, 0 -+; CHECK-NEXT: vbitsel.v $vr0, $vr2, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <2 x i64>, ptr %a0 -+ %v1 = load <2 x i64>, ptr %a1 -+ %sel = select <2 x i1> , <2 x i64> %v0, <2 x i64> %v1 -+ store <2 x i64> %sel, ptr %res -+ ret void -+} --- -2.20.1 - - -From 051e8cc8c17b13c4cb5ccd81038a305580fe3228 Mon Sep 17 00:00:00 2001 -From: wanglei -Date: Thu, 7 Dec 2023 20:11:43 +0800 -Subject: [PATCH 23/35] [LoongArch] Add codegen support for icmp/fcmp with - lsx/lasx fetaures (#74700) - -Mark ISD::SETCC node as legal, and add handling for the vector types -condition codes. - -(cherry picked from commit 9ff7d0ebeb54347f9006405a6d08ed2b713bc411) ---- - .../LoongArch/LoongArchISelLowering.cpp | 14 + - .../LoongArch/LoongArchLASXInstrInfo.td | 95 ++ - .../Target/LoongArch/LoongArchLSXInstrInfo.td | 95 ++ - .../LoongArch/lasx/ir-instruction/fcmp.ll | 692 +++++++++++++ - .../LoongArch/lasx/ir-instruction/icmp.ll | 939 ++++++++++++++++++ - .../LoongArch/lsx/ir-instruction/fcmp.ll | 692 +++++++++++++ - .../LoongArch/lsx/ir-instruction/icmp.ll | 939 ++++++++++++++++++ - 7 files changed, 3466 insertions(+) - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fcmp.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/icmp.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fcmp.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ir-instruction/icmp.ll - -diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -index 229251987ae4..3d5ae6d3deda 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -@@ -238,6 +238,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, - setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal); - setOperationAction(ISD::BUILD_VECTOR, VT, Custom); - -+ setOperationAction(ISD::SETCC, VT, Legal); - setOperationAction(ISD::VSELECT, VT, Legal); - } - for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) { -@@ -251,11 +252,17 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, - setOperationAction({ISD::SHL, ISD::SRA, ISD::SRL}, VT, Legal); - setOperationAction({ISD::CTPOP, ISD::CTLZ}, VT, Legal); - setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Legal); -+ setCondCodeAction( -+ {ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT, -+ Expand); - } - for (MVT VT : {MVT::v4f32, MVT::v2f64}) { - setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal); - setOperationAction({ISD::FMUL, ISD::FDIV}, VT, Legal); - setOperationAction(ISD::FMA, VT, Legal); -+ setCondCodeAction({ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT, -+ ISD::SETUGE, ISD::SETUGT}, -+ VT, Expand); - } - } - -@@ -271,6 +278,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, - setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal); - setOperationAction(ISD::BUILD_VECTOR, VT, Custom); - -+ setOperationAction(ISD::SETCC, VT, Legal); - setOperationAction(ISD::VSELECT, VT, Legal); - } - for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) { -@@ -284,11 +292,17 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, - setOperationAction({ISD::SHL, ISD::SRA, ISD::SRL}, VT, Legal); - setOperationAction({ISD::CTPOP, ISD::CTLZ}, VT, Legal); - setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Legal); -+ setCondCodeAction( -+ {ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT, -+ Expand); - } - for (MVT VT : {MVT::v8f32, MVT::v4f64}) { - setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal); - setOperationAction({ISD::FMUL, ISD::FDIV}, VT, Legal); - setOperationAction(ISD::FMA, VT, Legal); -+ setCondCodeAction({ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT, -+ ISD::SETUGE, ISD::SETUGT}, -+ VT, Expand); - } - } - -diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -index 0bd8db1bfdf0..a9bf65c6840d 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -@@ -1184,6 +1184,65 @@ multiclass PatShiftXrUimm { - (!cast(Inst#"_D") LASX256:$xj, uimm6:$imm)>; - } - -+multiclass PatCCXrSimm5 { -+ def : Pat<(v32i8 (setcc (v32i8 LASX256:$xj), -+ (v32i8 (SplatPat_simm5 simm5:$imm)), CC)), -+ (!cast(Inst#"_B") LASX256:$xj, simm5:$imm)>; -+ def : Pat<(v16i16 (setcc (v16i16 LASX256:$xj), -+ (v16i16 (SplatPat_simm5 simm5:$imm)), CC)), -+ (!cast(Inst#"_H") LASX256:$xj, simm5:$imm)>; -+ def : Pat<(v8i32 (setcc (v8i32 LASX256:$xj), -+ (v8i32 (SplatPat_simm5 simm5:$imm)), CC)), -+ (!cast(Inst#"_W") LASX256:$xj, simm5:$imm)>; -+ def : Pat<(v4i64 (setcc (v4i64 LASX256:$xj), -+ (v4i64 (SplatPat_simm5 simm5:$imm)), CC)), -+ (!cast(Inst#"_D") LASX256:$xj, simm5:$imm)>; -+} -+ -+multiclass PatCCXrUimm5 { -+ def : Pat<(v32i8 (setcc (v32i8 LASX256:$xj), -+ (v32i8 (SplatPat_uimm5 uimm5:$imm)), CC)), -+ (!cast(Inst#"_BU") LASX256:$xj, uimm5:$imm)>; -+ def : Pat<(v16i16 (setcc (v16i16 LASX256:$xj), -+ (v16i16 (SplatPat_uimm5 uimm5:$imm)), CC)), -+ (!cast(Inst#"_HU") LASX256:$xj, uimm5:$imm)>; -+ def : Pat<(v8i32 (setcc (v8i32 LASX256:$xj), -+ (v8i32 (SplatPat_uimm5 uimm5:$imm)), CC)), -+ (!cast(Inst#"_WU") LASX256:$xj, uimm5:$imm)>; -+ def : Pat<(v4i64 (setcc (v4i64 LASX256:$xj), -+ (v4i64 (SplatPat_uimm5 uimm5:$imm)), CC)), -+ (!cast(Inst#"_DU") LASX256:$xj, uimm5:$imm)>; -+} -+ -+multiclass PatCCXrXr { -+ def : Pat<(v32i8 (setcc (v32i8 LASX256:$xj), (v32i8 LASX256:$xk), CC)), -+ (!cast(Inst#"_B") LASX256:$xj, LASX256:$xk)>; -+ def : Pat<(v16i16 (setcc (v16i16 LASX256:$xj), (v16i16 LASX256:$xk), CC)), -+ (!cast(Inst#"_H") LASX256:$xj, LASX256:$xk)>; -+ def : Pat<(v8i32 (setcc (v8i32 LASX256:$xj), (v8i32 LASX256:$xk), CC)), -+ (!cast(Inst#"_W") LASX256:$xj, LASX256:$xk)>; -+ def : Pat<(v4i64 (setcc (v4i64 LASX256:$xj), (v4i64 LASX256:$xk), CC)), -+ (!cast(Inst#"_D") LASX256:$xj, LASX256:$xk)>; -+} -+ -+multiclass PatCCXrXrU { -+ def : Pat<(v32i8 (setcc (v32i8 LASX256:$xj), (v32i8 LASX256:$xk), CC)), -+ (!cast(Inst#"_BU") LASX256:$xj, LASX256:$xk)>; -+ def : Pat<(v16i16 (setcc (v16i16 LASX256:$xj), (v16i16 LASX256:$xk), CC)), -+ (!cast(Inst#"_HU") LASX256:$xj, LASX256:$xk)>; -+ def : Pat<(v8i32 (setcc (v8i32 LASX256:$xj), (v8i32 LASX256:$xk), CC)), -+ (!cast(Inst#"_WU") LASX256:$xj, LASX256:$xk)>; -+ def : Pat<(v4i64 (setcc (v4i64 LASX256:$xj), (v4i64 LASX256:$xk), CC)), -+ (!cast(Inst#"_DU") LASX256:$xj, LASX256:$xk)>; -+} -+ -+multiclass PatCCXrXrF { -+ def : Pat<(v8i32 (setcc (v8f32 LASX256:$xj), (v8f32 LASX256:$xk), CC)), -+ (!cast(Inst#"_S") LASX256:$xj, LASX256:$xk)>; -+ def : Pat<(v4i64 (setcc (v4f64 LASX256:$xj), (v4f64 LASX256:$xk), CC)), -+ (!cast(Inst#"_D") LASX256:$xj, LASX256:$xk)>; -+} -+ - let Predicates = [HasExtLASX] in { - - // XVADD_{B/H/W/D} -@@ -1389,6 +1448,42 @@ def : Pat<(fma v8f32:$xj, v8f32:$xk, v8f32:$xa), - def : Pat<(fma v4f64:$xj, v4f64:$xk, v4f64:$xa), - (XVFMADD_D v4f64:$xj, v4f64:$xk, v4f64:$xa)>; - -+// XVSEQ[I]_{B/H/W/D} -+defm : PatCCXrSimm5; -+defm : PatCCXrXr; -+ -+// XVSLE[I]_{B/H/W/D}[U] -+defm : PatCCXrSimm5; -+defm : PatCCXrUimm5; -+defm : PatCCXrXr; -+defm : PatCCXrXrU; -+ -+// XVSLT[I]_{B/H/W/D}[U] -+defm : PatCCXrSimm5; -+defm : PatCCXrUimm5; -+defm : PatCCXrXr; -+defm : PatCCXrXrU; -+ -+// XVFCMP.cond.{S/D} -+defm : PatCCXrXrF; -+defm : PatCCXrXrF; -+defm : PatCCXrXrF; -+ -+defm : PatCCXrXrF; -+defm : PatCCXrXrF; -+defm : PatCCXrXrF; -+ -+defm : PatCCXrXrF; -+defm : PatCCXrXrF; -+defm : PatCCXrXrF; -+ -+defm : PatCCXrXrF; -+defm : PatCCXrXrF; -+defm : PatCCXrXrF; -+ -+defm : PatCCXrXrF; -+defm : PatCCXrXrF; -+ - // PseudoXVINSGR2VR_{B/H} - def : Pat<(vector_insert v32i8:$xd, GRLenVT:$rj, uimm5:$imm), - (PseudoXVINSGR2VR_B v32i8:$xd, GRLenVT:$rj, uimm5:$imm)>; -diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -index 5800ff6f6266..ff21c6681271 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -+++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -@@ -1261,6 +1261,65 @@ multiclass PatShiftVrUimm { - (!cast(Inst#"_D") LSX128:$vj, uimm6:$imm)>; - } - -+multiclass PatCCVrSimm5 { -+ def : Pat<(v16i8 (setcc (v16i8 LSX128:$vj), -+ (v16i8 (SplatPat_simm5 simm5:$imm)), CC)), -+ (!cast(Inst#"_B") LSX128:$vj, simm5:$imm)>; -+ def : Pat<(v8i16 (setcc (v8i16 LSX128:$vj), -+ (v8i16 (SplatPat_simm5 simm5:$imm)), CC)), -+ (!cast(Inst#"_H") LSX128:$vj, simm5:$imm)>; -+ def : Pat<(v4i32 (setcc (v4i32 LSX128:$vj), -+ (v4i32 (SplatPat_simm5 simm5:$imm)), CC)), -+ (!cast(Inst#"_W") LSX128:$vj, simm5:$imm)>; -+ def : Pat<(v2i64 (setcc (v2i64 LSX128:$vj), -+ (v2i64 (SplatPat_simm5 simm5:$imm)), CC)), -+ (!cast(Inst#"_D") LSX128:$vj, simm5:$imm)>; -+} -+ -+multiclass PatCCVrUimm5 { -+ def : Pat<(v16i8 (setcc (v16i8 LSX128:$vj), -+ (v16i8 (SplatPat_uimm5 uimm5:$imm)), CC)), -+ (!cast(Inst#"_BU") LSX128:$vj, uimm5:$imm)>; -+ def : Pat<(v8i16 (setcc (v8i16 LSX128:$vj), -+ (v8i16 (SplatPat_uimm5 uimm5:$imm)), CC)), -+ (!cast(Inst#"_HU") LSX128:$vj, uimm5:$imm)>; -+ def : Pat<(v4i32 (setcc (v4i32 LSX128:$vj), -+ (v4i32 (SplatPat_uimm5 uimm5:$imm)), CC)), -+ (!cast(Inst#"_WU") LSX128:$vj, uimm5:$imm)>; -+ def : Pat<(v2i64 (setcc (v2i64 LSX128:$vj), -+ (v2i64 (SplatPat_uimm5 uimm5:$imm)), CC)), -+ (!cast(Inst#"_DU") LSX128:$vj, uimm5:$imm)>; -+} -+ -+multiclass PatCCVrVr { -+ def : Pat<(v16i8 (setcc (v16i8 LSX128:$vj), (v16i8 LSX128:$vk), CC)), -+ (!cast(Inst#"_B") LSX128:$vj, LSX128:$vk)>; -+ def : Pat<(v8i16 (setcc (v8i16 LSX128:$vj), (v8i16 LSX128:$vk), CC)), -+ (!cast(Inst#"_H") LSX128:$vj, LSX128:$vk)>; -+ def : Pat<(v4i32 (setcc (v4i32 LSX128:$vj), (v4i32 LSX128:$vk), CC)), -+ (!cast(Inst#"_W") LSX128:$vj, LSX128:$vk)>; -+ def : Pat<(v2i64 (setcc (v2i64 LSX128:$vj), (v2i64 LSX128:$vk), CC)), -+ (!cast(Inst#"_D") LSX128:$vj, LSX128:$vk)>; -+} -+ -+multiclass PatCCVrVrU { -+ def : Pat<(v16i8 (setcc (v16i8 LSX128:$vj), (v16i8 LSX128:$vk), CC)), -+ (!cast(Inst#"_BU") LSX128:$vj, LSX128:$vk)>; -+ def : Pat<(v8i16 (setcc (v8i16 LSX128:$vj), (v8i16 LSX128:$vk), CC)), -+ (!cast(Inst#"_HU") LSX128:$vj, LSX128:$vk)>; -+ def : Pat<(v4i32 (setcc (v4i32 LSX128:$vj), (v4i32 LSX128:$vk), CC)), -+ (!cast(Inst#"_WU") LSX128:$vj, LSX128:$vk)>; -+ def : Pat<(v2i64 (setcc (v2i64 LSX128:$vj), (v2i64 LSX128:$vk), CC)), -+ (!cast(Inst#"_DU") LSX128:$vj, LSX128:$vk)>; -+} -+ -+multiclass PatCCVrVrF { -+ def : Pat<(v4i32 (setcc (v4f32 LSX128:$vj), (v4f32 LSX128:$vk), CC)), -+ (!cast(Inst#"_S") LSX128:$vj, LSX128:$vk)>; -+ def : Pat<(v2i64 (setcc (v2f64 LSX128:$vj), (v2f64 LSX128:$vk), CC)), -+ (!cast(Inst#"_D") LSX128:$vj, LSX128:$vk)>; -+} -+ - let Predicates = [HasExtLSX] in { - - // VADD_{B/H/W/D} -@@ -1466,6 +1525,42 @@ def : Pat<(fma v4f32:$vj, v4f32:$vk, v4f32:$va), - def : Pat<(fma v2f64:$vj, v2f64:$vk, v2f64:$va), - (VFMADD_D v2f64:$vj, v2f64:$vk, v2f64:$va)>; - -+// VSEQ[I]_{B/H/W/D} -+defm : PatCCVrSimm5; -+defm : PatCCVrVr; -+ -+// VSLE[I]_{B/H/W/D}[U] -+defm : PatCCVrSimm5; -+defm : PatCCVrUimm5; -+defm : PatCCVrVr; -+defm : PatCCVrVrU; -+ -+// VSLT[I]_{B/H/W/D}[U] -+defm : PatCCVrSimm5; -+defm : PatCCVrUimm5; -+defm : PatCCVrVr; -+defm : PatCCVrVrU; -+ -+// VFCMP.cond.{S/D} -+defm : PatCCVrVrF; -+defm : PatCCVrVrF; -+defm : PatCCVrVrF; -+ -+defm : PatCCVrVrF; -+defm : PatCCVrVrF; -+defm : PatCCVrVrF; -+ -+defm : PatCCVrVrF; -+defm : PatCCVrVrF; -+defm : PatCCVrVrF; -+ -+defm : PatCCVrVrF; -+defm : PatCCVrVrF; -+defm : PatCCVrVrF; -+ -+defm : PatCCVrVrF; -+defm : PatCCVrVrF; -+ - // VINSGR2VR_{B/H/W/D} - def : Pat<(vector_insert v16i8:$vd, GRLenVT:$rj, uimm4:$imm), - (VINSGR2VR_B v16i8:$vd, GRLenVT:$rj, uimm4:$imm)>; -diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fcmp.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fcmp.ll -new file mode 100644 -index 000000000000..ef67dbc100c0 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fcmp.ll -@@ -0,0 +1,692 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+;; TREU -+define void @v8f32_fcmp_true(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v8f32_fcmp_true: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvrepli.b $xr0, -1 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x float>, ptr %a0 -+ %v1 = load <8 x float>, ptr %a1 -+ %cmp = fcmp true <8 x float> %v0, %v1 -+ %ext = sext <8 x i1> %cmp to <8 x i32> -+ store <8 x i32> %ext, ptr %res -+ ret void -+} -+ -+;; FALSE -+define void @v4f64_fcmp_false(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4f64_fcmp_false: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvrepli.b $xr0, 0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x double>, ptr %a0 -+ %v1 = load <4 x double>, ptr %a1 -+ %cmp = fcmp false <4 x double> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i64> -+ store <4 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; SETOEQ -+define void @v8f32_fcmp_oeq(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v8f32_fcmp_oeq: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvfcmp.ceq.s $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x float>, ptr %a0 -+ %v1 = load <8 x float>, ptr %a1 -+ %cmp = fcmp oeq <8 x float> %v0, %v1 -+ %ext = sext <8 x i1> %cmp to <8 x i32> -+ store <8 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v4f64_fcmp_oeq(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4f64_fcmp_oeq: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvfcmp.ceq.d $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x double>, ptr %a0 -+ %v1 = load <4 x double>, ptr %a1 -+ %cmp = fcmp oeq <4 x double> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i64> -+ store <4 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; SETUEQ -+define void @v8f32_fcmp_ueq(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v8f32_fcmp_ueq: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvfcmp.cueq.s $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x float>, ptr %a0 -+ %v1 = load <8 x float>, ptr %a1 -+ %cmp = fcmp ueq <8 x float> %v0, %v1 -+ %ext = sext <8 x i1> %cmp to <8 x i32> -+ store <8 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v4f64_fcmp_ueq(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4f64_fcmp_ueq: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvfcmp.cueq.d $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x double>, ptr %a0 -+ %v1 = load <4 x double>, ptr %a1 -+ %cmp = fcmp ueq <4 x double> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i64> -+ store <4 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; SETEQ -+define void @v8f32_fcmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v8f32_fcmp_eq: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvfcmp.ceq.s $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x float>, ptr %a0 -+ %v1 = load <8 x float>, ptr %a1 -+ %cmp = fcmp fast oeq <8 x float> %v0, %v1 -+ %ext = sext <8 x i1> %cmp to <8 x i32> -+ store <8 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v4f64_fcmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4f64_fcmp_eq: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvfcmp.ceq.d $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x double>, ptr %a0 -+ %v1 = load <4 x double>, ptr %a1 -+ %cmp = fcmp fast ueq <4 x double> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i64> -+ store <4 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; SETOLE -+define void @v8f32_fcmp_ole(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v8f32_fcmp_ole: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvfcmp.cle.s $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x float>, ptr %a0 -+ %v1 = load <8 x float>, ptr %a1 -+ %cmp = fcmp ole <8 x float> %v0, %v1 -+ %ext = sext <8 x i1> %cmp to <8 x i32> -+ store <8 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v4f64_fcmp_ole(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4f64_fcmp_ole: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvfcmp.cle.d $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x double>, ptr %a0 -+ %v1 = load <4 x double>, ptr %a1 -+ %cmp = fcmp ole <4 x double> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i64> -+ store <4 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; SETULE -+define void @v8f32_fcmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v8f32_fcmp_ule: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvfcmp.cule.s $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x float>, ptr %a0 -+ %v1 = load <8 x float>, ptr %a1 -+ %cmp = fcmp ule <8 x float> %v0, %v1 -+ %ext = sext <8 x i1> %cmp to <8 x i32> -+ store <8 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v4f64_fcmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4f64_fcmp_ule: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvfcmp.cule.d $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x double>, ptr %a0 -+ %v1 = load <4 x double>, ptr %a1 -+ %cmp = fcmp ule <4 x double> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i64> -+ store <4 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; SETLE -+define void @v8f32_fcmp_le(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v8f32_fcmp_le: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvfcmp.cle.s $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x float>, ptr %a0 -+ %v1 = load <8 x float>, ptr %a1 -+ %cmp = fcmp fast ole <8 x float> %v0, %v1 -+ %ext = sext <8 x i1> %cmp to <8 x i32> -+ store <8 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v4f64_fcmp_le(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4f64_fcmp_le: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvfcmp.cle.d $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x double>, ptr %a0 -+ %v1 = load <4 x double>, ptr %a1 -+ %cmp = fcmp fast ule <4 x double> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i64> -+ store <4 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; SETOLT -+define void @v8f32_fcmp_olt(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v8f32_fcmp_olt: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvfcmp.clt.s $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x float>, ptr %a0 -+ %v1 = load <8 x float>, ptr %a1 -+ %cmp = fcmp olt <8 x float> %v0, %v1 -+ %ext = sext <8 x i1> %cmp to <8 x i32> -+ store <8 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v4f64_fcmp_olt(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4f64_fcmp_olt: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvfcmp.clt.d $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x double>, ptr %a0 -+ %v1 = load <4 x double>, ptr %a1 -+ %cmp = fcmp olt <4 x double> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i64> -+ store <4 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; SETULT -+define void @v8f32_fcmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v8f32_fcmp_ult: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvfcmp.cult.s $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x float>, ptr %a0 -+ %v1 = load <8 x float>, ptr %a1 -+ %cmp = fcmp ult <8 x float> %v0, %v1 -+ %ext = sext <8 x i1> %cmp to <8 x i32> -+ store <8 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v4f64_fcmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4f64_fcmp_ult: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvfcmp.cult.d $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x double>, ptr %a0 -+ %v1 = load <4 x double>, ptr %a1 -+ %cmp = fcmp ult <4 x double> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i64> -+ store <4 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; SETLT -+define void @v8f32_fcmp_lt(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v8f32_fcmp_lt: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvfcmp.clt.s $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x float>, ptr %a0 -+ %v1 = load <8 x float>, ptr %a1 -+ %cmp = fcmp fast olt <8 x float> %v0, %v1 -+ %ext = sext <8 x i1> %cmp to <8 x i32> -+ store <8 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v4f64_fcmp_lt(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4f64_fcmp_lt: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvfcmp.clt.d $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x double>, ptr %a0 -+ %v1 = load <4 x double>, ptr %a1 -+ %cmp = fcmp fast ult <4 x double> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i64> -+ store <4 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; SETONE -+define void @v8f32_fcmp_one(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v8f32_fcmp_one: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvfcmp.cne.s $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x float>, ptr %a0 -+ %v1 = load <8 x float>, ptr %a1 -+ %cmp = fcmp one <8 x float> %v0, %v1 -+ %ext = sext <8 x i1> %cmp to <8 x i32> -+ store <8 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v4f64_fcmp_one(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4f64_fcmp_one: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvfcmp.cne.d $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x double>, ptr %a0 -+ %v1 = load <4 x double>, ptr %a1 -+ %cmp = fcmp one <4 x double> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i64> -+ store <4 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; SETUNE -+define void @v8f32_fcmp_une(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v8f32_fcmp_une: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvfcmp.cune.s $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x float>, ptr %a0 -+ %v1 = load <8 x float>, ptr %a1 -+ %cmp = fcmp une <8 x float> %v0, %v1 -+ %ext = sext <8 x i1> %cmp to <8 x i32> -+ store <8 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v4f64_fcmp_une(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4f64_fcmp_une: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvfcmp.cune.d $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x double>, ptr %a0 -+ %v1 = load <4 x double>, ptr %a1 -+ %cmp = fcmp une <4 x double> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i64> -+ store <4 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; SETNE -+define void @v8f32_fcmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v8f32_fcmp_ne: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvfcmp.cne.s $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x float>, ptr %a0 -+ %v1 = load <8 x float>, ptr %a1 -+ %cmp = fcmp fast one <8 x float> %v0, %v1 -+ %ext = sext <8 x i1> %cmp to <8 x i32> -+ store <8 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v4f64_fcmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4f64_fcmp_ne: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvfcmp.cne.d $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x double>, ptr %a0 -+ %v1 = load <4 x double>, ptr %a1 -+ %cmp = fcmp fast une <4 x double> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i64> -+ store <4 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; SETO -+define void @v8f32_fcmp_ord(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v8f32_fcmp_ord: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvfcmp.cor.s $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x float>, ptr %a0 -+ %v1 = load <8 x float>, ptr %a1 -+ %cmp = fcmp ord <8 x float> %v0, %v1 -+ %ext = sext <8 x i1> %cmp to <8 x i32> -+ store <8 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v4f64_fcmp_ord(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4f64_fcmp_ord: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvfcmp.cor.d $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x double>, ptr %a0 -+ %v1 = load <4 x double>, ptr %a1 -+ %cmp = fcmp ord <4 x double> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i64> -+ store <4 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; SETUO -+define void @v8f32_fcmp_uno(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v8f32_fcmp_uno: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvfcmp.cun.s $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x float>, ptr %a0 -+ %v1 = load <8 x float>, ptr %a1 -+ %cmp = fcmp uno <8 x float> %v0, %v1 -+ %ext = sext <8 x i1> %cmp to <8 x i32> -+ store <8 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v4f64_fcmp_uno(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4f64_fcmp_uno: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvfcmp.cun.d $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x double>, ptr %a0 -+ %v1 = load <4 x double>, ptr %a1 -+ %cmp = fcmp uno <4 x double> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i64> -+ store <4 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; Expand SETOGT -+define void @v8f32_fcmp_ogt(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v8f32_fcmp_ogt: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvld $xr1, $a2, 0 -+; CHECK-NEXT: xvfcmp.clt.s $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x float>, ptr %a0 -+ %v1 = load <8 x float>, ptr %a1 -+ %cmp = fcmp ogt <8 x float> %v0, %v1 -+ %ext = sext <8 x i1> %cmp to <8 x i32> -+ store <8 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v4f64_fcmp_ogt(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4f64_fcmp_ogt: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvld $xr1, $a2, 0 -+; CHECK-NEXT: xvfcmp.clt.d $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x double>, ptr %a0 -+ %v1 = load <4 x double>, ptr %a1 -+ %cmp = fcmp ogt <4 x double> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i64> -+ store <4 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; Expand SETUGT -+define void @v8f32_fcmp_ugt(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v8f32_fcmp_ugt: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvld $xr1, $a2, 0 -+; CHECK-NEXT: xvfcmp.cult.s $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x float>, ptr %a0 -+ %v1 = load <8 x float>, ptr %a1 -+ %cmp = fcmp ugt <8 x float> %v0, %v1 -+ %ext = sext <8 x i1> %cmp to <8 x i32> -+ store <8 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v4f64_fcmp_ugt(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4f64_fcmp_ugt: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvld $xr1, $a2, 0 -+; CHECK-NEXT: xvfcmp.cult.d $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x double>, ptr %a0 -+ %v1 = load <4 x double>, ptr %a1 -+ %cmp = fcmp ugt <4 x double> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i64> -+ store <4 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; Expand SETGT -+define void @v8f32_fcmp_gt(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v8f32_fcmp_gt: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvld $xr1, $a2, 0 -+; CHECK-NEXT: xvfcmp.clt.s $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x float>, ptr %a0 -+ %v1 = load <8 x float>, ptr %a1 -+ %cmp = fcmp fast ogt <8 x float> %v0, %v1 -+ %ext = sext <8 x i1> %cmp to <8 x i32> -+ store <8 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v4f64_fcmp_gt(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4f64_fcmp_gt: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvld $xr1, $a2, 0 -+; CHECK-NEXT: xvfcmp.clt.d $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x double>, ptr %a0 -+ %v1 = load <4 x double>, ptr %a1 -+ %cmp = fcmp fast ugt <4 x double> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i64> -+ store <4 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; Expand SETOGE -+define void @v8f32_fcmp_oge(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v8f32_fcmp_oge: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvld $xr1, $a2, 0 -+; CHECK-NEXT: xvfcmp.cle.s $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x float>, ptr %a0 -+ %v1 = load <8 x float>, ptr %a1 -+ %cmp = fcmp oge <8 x float> %v0, %v1 -+ %ext = sext <8 x i1> %cmp to <8 x i32> -+ store <8 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v4f64_fcmp_oge(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4f64_fcmp_oge: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvld $xr1, $a2, 0 -+; CHECK-NEXT: xvfcmp.cle.d $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x double>, ptr %a0 -+ %v1 = load <4 x double>, ptr %a1 -+ %cmp = fcmp oge <4 x double> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i64> -+ store <4 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; Expand SETUGE -+define void @v8f32_fcmp_uge(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v8f32_fcmp_uge: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvld $xr1, $a2, 0 -+; CHECK-NEXT: xvfcmp.cule.s $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x float>, ptr %a0 -+ %v1 = load <8 x float>, ptr %a1 -+ %cmp = fcmp uge <8 x float> %v0, %v1 -+ %ext = sext <8 x i1> %cmp to <8 x i32> -+ store <8 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v4f64_fcmp_uge(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4f64_fcmp_uge: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvld $xr1, $a2, 0 -+; CHECK-NEXT: xvfcmp.cule.d $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x double>, ptr %a0 -+ %v1 = load <4 x double>, ptr %a1 -+ %cmp = fcmp uge <4 x double> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i64> -+ store <4 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; Expand SETGE -+define void @v8f32_fcmp_ge(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v8f32_fcmp_ge: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvld $xr1, $a2, 0 -+; CHECK-NEXT: xvfcmp.cle.s $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x float>, ptr %a0 -+ %v1 = load <8 x float>, ptr %a1 -+ %cmp = fcmp fast oge <8 x float> %v0, %v1 -+ %ext = sext <8 x i1> %cmp to <8 x i32> -+ store <8 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v4f64_fcmp_ge(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4f64_fcmp_ge: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvld $xr1, $a2, 0 -+; CHECK-NEXT: xvfcmp.cle.d $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x double>, ptr %a0 -+ %v1 = load <4 x double>, ptr %a1 -+ %cmp = fcmp fast uge <4 x double> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i64> -+ store <4 x i64> %ext, ptr %res -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/icmp.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/icmp.ll -new file mode 100644 -index 000000000000..6693fe0f6ec7 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/icmp.ll -@@ -0,0 +1,939 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+;; SETEQ -+define void @v32i8_icmp_eq_imm(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: v32i8_icmp_eq_imm: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvseqi.b $xr0, $xr0, 15 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <32 x i8>, ptr %a0 -+ %cmp = icmp eq <32 x i8> %v0, -+ %ext = sext <32 x i1> %cmp to <32 x i8> -+ store <32 x i8> %ext, ptr %res -+ ret void -+} -+ -+define void @v32i8_icmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v32i8_icmp_eq: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvseq.b $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <32 x i8>, ptr %a0 -+ %v1 = load <32 x i8>, ptr %a1 -+ %cmp = icmp eq <32 x i8> %v0, %v1 -+ %ext = sext <32 x i1> %cmp to <32 x i8> -+ store <32 x i8> %ext, ptr %res -+ ret void -+} -+ -+define void @v16i16_icmp_eq_imm(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: v16i16_icmp_eq_imm: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvseqi.h $xr0, $xr0, 15 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <16 x i16>, ptr %a0 -+ %cmp = icmp eq <16 x i16> %v0, -+ %ext = sext <16 x i1> %cmp to <16 x i16> -+ store <16 x i16> %ext, ptr %res -+ ret void -+} -+ -+define void @v16i16_icmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v16i16_icmp_eq: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvseq.h $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <16 x i16>, ptr %a0 -+ %v1 = load <16 x i16>, ptr %a1 -+ %cmp = icmp eq <16 x i16> %v0, %v1 -+ %ext = sext <16 x i1> %cmp to <16 x i16> -+ store <16 x i16> %ext, ptr %res -+ ret void -+} -+ -+define void @v8i32_icmp_eq_imm(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: v8i32_icmp_eq_imm: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvseqi.w $xr0, $xr0, 15 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x i32>, ptr %a0 -+ %cmp = icmp eq <8 x i32> %v0, -+ %ext = sext <8 x i1> %cmp to <8 x i32> -+ store <8 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v8i32_icmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v8i32_icmp_eq: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvseq.w $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x i32>, ptr %a0 -+ %v1 = load <8 x i32>, ptr %a1 -+ %cmp = icmp eq <8 x i32> %v0, %v1 -+ %ext = sext <8 x i1> %cmp to <8 x i32> -+ store <8 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v4i64_icmp_eq_imm(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: v4i64_icmp_eq_imm: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvseqi.d $xr0, $xr0, 15 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x i64>, ptr %a0 -+ %cmp = icmp eq <4 x i64> %v0, -+ %ext = sext <4 x i1> %cmp to <4 x i64> -+ store <4 x i64> %ext, ptr %res -+ ret void -+} -+ -+define void @v4i64_icmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4i64_icmp_eq: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvseq.d $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x i64>, ptr %a0 -+ %v1 = load <4 x i64>, ptr %a1 -+ %cmp = icmp eq <4 x i64> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i64> -+ store <4 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; SETLE -+define void @v32i8_icmp_sle_imm(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: v32i8_icmp_sle_imm: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvslei.b $xr0, $xr0, 15 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <32 x i8>, ptr %a0 -+ %cmp = icmp sle <32 x i8> %v0, -+ %ext = sext <32 x i1> %cmp to <32 x i8> -+ store <32 x i8> %ext, ptr %res -+ ret void -+} -+ -+define void @v32i8_icmp_sle(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v32i8_icmp_sle: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvsle.b $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <32 x i8>, ptr %a0 -+ %v1 = load <32 x i8>, ptr %a1 -+ %cmp = icmp sle <32 x i8> %v0, %v1 -+ %ext = sext <32 x i1> %cmp to <32 x i8> -+ store <32 x i8> %ext, ptr %res -+ ret void -+} -+ -+define void @v16i16_icmp_sle_imm(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: v16i16_icmp_sle_imm: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvslei.h $xr0, $xr0, 15 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <16 x i16>, ptr %a0 -+ %cmp = icmp sle <16 x i16> %v0, -+ %ext = sext <16 x i1> %cmp to <16 x i16> -+ store <16 x i16> %ext, ptr %res -+ ret void -+} -+ -+define void @v16i16_icmp_sle(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v16i16_icmp_sle: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvsle.h $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <16 x i16>, ptr %a0 -+ %v1 = load <16 x i16>, ptr %a1 -+ %cmp = icmp sle <16 x i16> %v0, %v1 -+ %ext = sext <16 x i1> %cmp to <16 x i16> -+ store <16 x i16> %ext, ptr %res -+ ret void -+} -+ -+define void @v8i32_icmp_sle_imm(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: v8i32_icmp_sle_imm: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvslei.w $xr0, $xr0, 15 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x i32>, ptr %a0 -+ %cmp = icmp sle <8 x i32> %v0, -+ %ext = sext <8 x i1> %cmp to <8 x i32> -+ store <8 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v8i32_icmp_sle(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v8i32_icmp_sle: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvsle.w $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x i32>, ptr %a0 -+ %v1 = load <8 x i32>, ptr %a1 -+ %cmp = icmp sle <8 x i32> %v0, %v1 -+ %ext = sext <8 x i1> %cmp to <8 x i32> -+ store <8 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v4i64_icmp_sle_imm(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: v4i64_icmp_sle_imm: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvslei.d $xr0, $xr0, 15 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x i64>, ptr %a0 -+ %cmp = icmp sle <4 x i64> %v0, -+ %ext = sext <4 x i1> %cmp to <4 x i64> -+ store <4 x i64> %ext, ptr %res -+ ret void -+} -+ -+define void @v4i64_icmp_sle(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4i64_icmp_sle: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvsle.d $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x i64>, ptr %a0 -+ %v1 = load <4 x i64>, ptr %a1 -+ %cmp = icmp sle <4 x i64> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i64> -+ store <4 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; SETULE -+define void @v32i8_icmp_ule_imm(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: v32i8_icmp_ule_imm: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvslei.bu $xr0, $xr0, 31 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <32 x i8>, ptr %a0 -+ %cmp = icmp ule <32 x i8> %v0, -+ %ext = sext <32 x i1> %cmp to <32 x i8> -+ store <32 x i8> %ext, ptr %res -+ ret void -+} -+ -+define void @v32i8_icmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v32i8_icmp_ule: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvsle.bu $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <32 x i8>, ptr %a0 -+ %v1 = load <32 x i8>, ptr %a1 -+ %cmp = icmp ule <32 x i8> %v0, %v1 -+ %ext = sext <32 x i1> %cmp to <32 x i8> -+ store <32 x i8> %ext, ptr %res -+ ret void -+} -+ -+define void @v16i16_icmp_ule_imm(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: v16i16_icmp_ule_imm: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvslei.hu $xr0, $xr0, 31 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <16 x i16>, ptr %a0 -+ %cmp = icmp ule <16 x i16> %v0, -+ %ext = sext <16 x i1> %cmp to <16 x i16> -+ store <16 x i16> %ext, ptr %res -+ ret void -+} -+ -+define void @v16i16_icmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v16i16_icmp_ule: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvsle.hu $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <16 x i16>, ptr %a0 -+ %v1 = load <16 x i16>, ptr %a1 -+ %cmp = icmp ule <16 x i16> %v0, %v1 -+ %ext = sext <16 x i1> %cmp to <16 x i16> -+ store <16 x i16> %ext, ptr %res -+ ret void -+} -+ -+define void @v8i32_icmp_ule_imm(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: v8i32_icmp_ule_imm: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvslei.wu $xr0, $xr0, 31 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x i32>, ptr %a0 -+ %cmp = icmp ule <8 x i32> %v0, -+ %ext = sext <8 x i1> %cmp to <8 x i32> -+ store <8 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v8i32_icmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v8i32_icmp_ule: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvsle.wu $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x i32>, ptr %a0 -+ %v1 = load <8 x i32>, ptr %a1 -+ %cmp = icmp ule <8 x i32> %v0, %v1 -+ %ext = sext <8 x i1> %cmp to <8 x i32> -+ store <8 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v4i64_icmp_ule_imm(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: v4i64_icmp_ule_imm: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvslei.du $xr0, $xr0, 31 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x i64>, ptr %a0 -+ %cmp = icmp ule <4 x i64> %v0, -+ %ext = sext <4 x i1> %cmp to <4 x i64> -+ store <4 x i64> %ext, ptr %res -+ ret void -+} -+ -+define void @v4i64_icmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4i64_icmp_ule: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvsle.du $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x i64>, ptr %a0 -+ %v1 = load <4 x i64>, ptr %a1 -+ %cmp = icmp ule <4 x i64> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i64> -+ store <4 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; SETLT -+define void @v32i8_icmp_slt_imm(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: v32i8_icmp_slt_imm: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvslti.b $xr0, $xr0, 15 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <32 x i8>, ptr %a0 -+ %cmp = icmp slt <32 x i8> %v0, -+ %ext = sext <32 x i1> %cmp to <32 x i8> -+ store <32 x i8> %ext, ptr %res -+ ret void -+} -+ -+define void @v32i8_icmp_slt(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v32i8_icmp_slt: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvslt.b $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <32 x i8>, ptr %a0 -+ %v1 = load <32 x i8>, ptr %a1 -+ %cmp = icmp slt <32 x i8> %v0, %v1 -+ %ext = sext <32 x i1> %cmp to <32 x i8> -+ store <32 x i8> %ext, ptr %res -+ ret void -+} -+ -+define void @v16i16_icmp_slt_imm(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: v16i16_icmp_slt_imm: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvslti.h $xr0, $xr0, 15 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <16 x i16>, ptr %a0 -+ %cmp = icmp slt <16 x i16> %v0, -+ %ext = sext <16 x i1> %cmp to <16 x i16> -+ store <16 x i16> %ext, ptr %res -+ ret void -+} -+ -+define void @v16i16_icmp_slt(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v16i16_icmp_slt: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvslt.h $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <16 x i16>, ptr %a0 -+ %v1 = load <16 x i16>, ptr %a1 -+ %cmp = icmp slt <16 x i16> %v0, %v1 -+ %ext = sext <16 x i1> %cmp to <16 x i16> -+ store <16 x i16> %ext, ptr %res -+ ret void -+} -+ -+define void @v8i32_icmp_slt_imm(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: v8i32_icmp_slt_imm: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvslti.w $xr0, $xr0, 15 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x i32>, ptr %a0 -+ %cmp = icmp slt <8 x i32> %v0, -+ %ext = sext <8 x i1> %cmp to <8 x i32> -+ store <8 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v8i32_icmp_slt(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v8i32_icmp_slt: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvslt.w $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x i32>, ptr %a0 -+ %v1 = load <8 x i32>, ptr %a1 -+ %cmp = icmp slt <8 x i32> %v0, %v1 -+ %ext = sext <8 x i1> %cmp to <8 x i32> -+ store <8 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v4i64_icmp_slt_imm(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: v4i64_icmp_slt_imm: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvslti.d $xr0, $xr0, 15 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x i64>, ptr %a0 -+ %cmp = icmp slt <4 x i64> %v0, -+ %ext = sext <4 x i1> %cmp to <4 x i64> -+ store <4 x i64> %ext, ptr %res -+ ret void -+} -+ -+define void @v4i64_icmp_slt(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4i64_icmp_slt: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvslt.d $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x i64>, ptr %a0 -+ %v1 = load <4 x i64>, ptr %a1 -+ %cmp = icmp slt <4 x i64> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i64> -+ store <4 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; SETULT -+define void @v32i8_icmp_ult_imm(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: v32i8_icmp_ult_imm: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvslti.bu $xr0, $xr0, 31 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <32 x i8>, ptr %a0 -+ %cmp = icmp ult <32 x i8> %v0, -+ %ext = sext <32 x i1> %cmp to <32 x i8> -+ store <32 x i8> %ext, ptr %res -+ ret void -+} -+ -+define void @v32i8_icmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v32i8_icmp_ult: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvslt.bu $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <32 x i8>, ptr %a0 -+ %v1 = load <32 x i8>, ptr %a1 -+ %cmp = icmp ult <32 x i8> %v0, %v1 -+ %ext = sext <32 x i1> %cmp to <32 x i8> -+ store <32 x i8> %ext, ptr %res -+ ret void -+} -+ -+define void @v16i16_icmp_ult_imm(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: v16i16_icmp_ult_imm: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvslti.hu $xr0, $xr0, 31 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <16 x i16>, ptr %a0 -+ %cmp = icmp ult <16 x i16> %v0, -+ %ext = sext <16 x i1> %cmp to <16 x i16> -+ store <16 x i16> %ext, ptr %res -+ ret void -+} -+ -+define void @v16i16_icmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v16i16_icmp_ult: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvslt.hu $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <16 x i16>, ptr %a0 -+ %v1 = load <16 x i16>, ptr %a1 -+ %cmp = icmp ult <16 x i16> %v0, %v1 -+ %ext = sext <16 x i1> %cmp to <16 x i16> -+ store <16 x i16> %ext, ptr %res -+ ret void -+} -+ -+define void @v8i32_icmp_ult_imm(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: v8i32_icmp_ult_imm: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvslti.wu $xr0, $xr0, 31 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x i32>, ptr %a0 -+ %cmp = icmp ult <8 x i32> %v0, -+ %ext = sext <8 x i1> %cmp to <8 x i32> -+ store <8 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v8i32_icmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v8i32_icmp_ult: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvslt.wu $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x i32>, ptr %a0 -+ %v1 = load <8 x i32>, ptr %a1 -+ %cmp = icmp ult <8 x i32> %v0, %v1 -+ %ext = sext <8 x i1> %cmp to <8 x i32> -+ store <8 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v4i64_icmp_ult_imm(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: v4i64_icmp_ult_imm: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvslti.du $xr0, $xr0, 31 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x i64>, ptr %a0 -+ %cmp = icmp ult <4 x i64> %v0, -+ %ext = sext <4 x i1> %cmp to <4 x i64> -+ store <4 x i64> %ext, ptr %res -+ ret void -+} -+ -+define void @v4i64_icmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4i64_icmp_ult: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvslt.du $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x i64>, ptr %a0 -+ %v1 = load <4 x i64>, ptr %a1 -+ %cmp = icmp ult <4 x i64> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i64> -+ store <4 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; Expand SETNE -+define void @v32i8_icmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v32i8_icmp_ne: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvseq.b $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvxori.b $xr0, $xr0, 255 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <32 x i8>, ptr %a0 -+ %v1 = load <32 x i8>, ptr %a1 -+ %cmp = icmp ne <32 x i8> %v0, %v1 -+ %ext = sext <32 x i1> %cmp to <32 x i8> -+ store <32 x i8> %ext, ptr %res -+ ret void -+} -+ -+define void @v16i16_icmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v16i16_icmp_ne: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvseq.h $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvrepli.b $xr1, -1 -+; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <16 x i16>, ptr %a0 -+ %v1 = load <16 x i16>, ptr %a1 -+ %cmp = icmp ne <16 x i16> %v0, %v1 -+ %ext = sext <16 x i1> %cmp to <16 x i16> -+ store <16 x i16> %ext, ptr %res -+ ret void -+} -+ -+define void @v8i32_icmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v8i32_icmp_ne: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvseq.w $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvrepli.b $xr1, -1 -+; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x i32>, ptr %a0 -+ %v1 = load <8 x i32>, ptr %a1 -+ %cmp = icmp ne <8 x i32> %v0, %v1 -+ %ext = sext <8 x i1> %cmp to <8 x i32> -+ store <8 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v4i64_icmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4i64_icmp_ne: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a2, 0 -+; CHECK-NEXT: xvld $xr1, $a1, 0 -+; CHECK-NEXT: xvseq.d $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvrepli.b $xr1, -1 -+; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x i64>, ptr %a0 -+ %v1 = load <4 x i64>, ptr %a1 -+ %cmp = icmp ne <4 x i64> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i64> -+ store <4 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; Expand SETGE -+define void @v32i8_icmp_sge(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v32i8_icmp_sge: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvld $xr1, $a2, 0 -+; CHECK-NEXT: xvsle.b $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <32 x i8>, ptr %a0 -+ %v1 = load <32 x i8>, ptr %a1 -+ %cmp = icmp sge <32 x i8> %v0, %v1 -+ %ext = sext <32 x i1> %cmp to <32 x i8> -+ store <32 x i8> %ext, ptr %res -+ ret void -+} -+ -+define void @v16i16_icmp_sge(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v16i16_icmp_sge: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvld $xr1, $a2, 0 -+; CHECK-NEXT: xvsle.h $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <16 x i16>, ptr %a0 -+ %v1 = load <16 x i16>, ptr %a1 -+ %cmp = icmp sge <16 x i16> %v0, %v1 -+ %ext = sext <16 x i1> %cmp to <16 x i16> -+ store <16 x i16> %ext, ptr %res -+ ret void -+} -+ -+define void @v8i32_icmp_sge(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v8i32_icmp_sge: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvld $xr1, $a2, 0 -+; CHECK-NEXT: xvsle.w $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x i32>, ptr %a0 -+ %v1 = load <8 x i32>, ptr %a1 -+ %cmp = icmp sge <8 x i32> %v0, %v1 -+ %ext = sext <8 x i1> %cmp to <8 x i32> -+ store <8 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v4i64_icmp_sge(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4i64_icmp_sge: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvld $xr1, $a2, 0 -+; CHECK-NEXT: xvsle.d $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x i64>, ptr %a0 -+ %v1 = load <4 x i64>, ptr %a1 -+ %cmp = icmp sge <4 x i64> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i64> -+ store <4 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; Expand SETUGE -+define void @v32i8_icmp_uge(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v32i8_icmp_uge: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvld $xr1, $a2, 0 -+; CHECK-NEXT: xvsle.bu $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <32 x i8>, ptr %a0 -+ %v1 = load <32 x i8>, ptr %a1 -+ %cmp = icmp uge <32 x i8> %v0, %v1 -+ %ext = sext <32 x i1> %cmp to <32 x i8> -+ store <32 x i8> %ext, ptr %res -+ ret void -+} -+ -+define void @v16i16_icmp_uge(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v16i16_icmp_uge: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvld $xr1, $a2, 0 -+; CHECK-NEXT: xvsle.hu $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <16 x i16>, ptr %a0 -+ %v1 = load <16 x i16>, ptr %a1 -+ %cmp = icmp uge <16 x i16> %v0, %v1 -+ %ext = sext <16 x i1> %cmp to <16 x i16> -+ store <16 x i16> %ext, ptr %res -+ ret void -+} -+ -+define void @v8i32_icmp_uge(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v8i32_icmp_uge: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvld $xr1, $a2, 0 -+; CHECK-NEXT: xvsle.wu $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x i32>, ptr %a0 -+ %v1 = load <8 x i32>, ptr %a1 -+ %cmp = icmp uge <8 x i32> %v0, %v1 -+ %ext = sext <8 x i1> %cmp to <8 x i32> -+ store <8 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v4i64_icmp_uge(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4i64_icmp_uge: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvld $xr1, $a2, 0 -+; CHECK-NEXT: xvsle.du $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x i64>, ptr %a0 -+ %v1 = load <4 x i64>, ptr %a1 -+ %cmp = icmp uge <4 x i64> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i64> -+ store <4 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; Expand SETGT -+define void @v32i8_icmp_sgt(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v32i8_icmp_sgt: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvld $xr1, $a2, 0 -+; CHECK-NEXT: xvslt.b $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <32 x i8>, ptr %a0 -+ %v1 = load <32 x i8>, ptr %a1 -+ %cmp = icmp sgt <32 x i8> %v0, %v1 -+ %ext = sext <32 x i1> %cmp to <32 x i8> -+ store <32 x i8> %ext, ptr %res -+ ret void -+} -+ -+define void @v16i16_icmp_sgt(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v16i16_icmp_sgt: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvld $xr1, $a2, 0 -+; CHECK-NEXT: xvslt.h $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <16 x i16>, ptr %a0 -+ %v1 = load <16 x i16>, ptr %a1 -+ %cmp = icmp sgt <16 x i16> %v0, %v1 -+ %ext = sext <16 x i1> %cmp to <16 x i16> -+ store <16 x i16> %ext, ptr %res -+ ret void -+} -+ -+define void @v8i32_icmp_sgt(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v8i32_icmp_sgt: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvld $xr1, $a2, 0 -+; CHECK-NEXT: xvslt.w $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x i32>, ptr %a0 -+ %v1 = load <8 x i32>, ptr %a1 -+ %cmp = icmp sgt <8 x i32> %v0, %v1 -+ %ext = sext <8 x i1> %cmp to <8 x i32> -+ store <8 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v4i64_icmp_sgt(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4i64_icmp_sgt: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvld $xr1, $a2, 0 -+; CHECK-NEXT: xvslt.d $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x i64>, ptr %a0 -+ %v1 = load <4 x i64>, ptr %a1 -+ %cmp = icmp sgt <4 x i64> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i64> -+ store <4 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; Expand SETUGT -+define void @v32i8_icmp_ugt(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v32i8_icmp_ugt: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvld $xr1, $a2, 0 -+; CHECK-NEXT: xvslt.bu $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <32 x i8>, ptr %a0 -+ %v1 = load <32 x i8>, ptr %a1 -+ %cmp = icmp ugt <32 x i8> %v0, %v1 -+ %ext = sext <32 x i1> %cmp to <32 x i8> -+ store <32 x i8> %ext, ptr %res -+ ret void -+} -+ -+define void @v16i16_icmp_ugt(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v16i16_icmp_ugt: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvld $xr1, $a2, 0 -+; CHECK-NEXT: xvslt.hu $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <16 x i16>, ptr %a0 -+ %v1 = load <16 x i16>, ptr %a1 -+ %cmp = icmp ugt <16 x i16> %v0, %v1 -+ %ext = sext <16 x i1> %cmp to <16 x i16> -+ store <16 x i16> %ext, ptr %res -+ ret void -+} -+ -+define void @v8i32_icmp_ugt(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v8i32_icmp_ugt: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvld $xr1, $a2, 0 -+; CHECK-NEXT: xvslt.wu $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x i32>, ptr %a0 -+ %v1 = load <8 x i32>, ptr %a1 -+ %cmp = icmp ugt <8 x i32> %v0, %v1 -+ %ext = sext <8 x i1> %cmp to <8 x i32> -+ store <8 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v4i64_icmp_ugt(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4i64_icmp_ugt: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvld $xr1, $a2, 0 -+; CHECK-NEXT: xvslt.du $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x i64>, ptr %a0 -+ %v1 = load <4 x i64>, ptr %a1 -+ %cmp = icmp ugt <4 x i64> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i64> -+ store <4 x i64> %ext, ptr %res -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fcmp.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fcmp.ll -new file mode 100644 -index 000000000000..53fbf0b2f86f ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fcmp.ll -@@ -0,0 +1,692 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+;; TREU -+define void @v4f32_fcmp_true(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4f32_fcmp_true: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vrepli.b $vr0, -1 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x float>, ptr %a0 -+ %v1 = load <4 x float>, ptr %a1 -+ %cmp = fcmp true <4 x float> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i32> -+ store <4 x i32> %ext, ptr %res -+ ret void -+} -+ -+;; FALSE -+define void @v2f64_fcmp_false(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v2f64_fcmp_false: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vrepli.b $vr0, 0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <2 x double>, ptr %a0 -+ %v1 = load <2 x double>, ptr %a1 -+ %cmp = fcmp false <2 x double> %v0, %v1 -+ %ext = sext <2 x i1> %cmp to <2 x i64> -+ store <2 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; SETOEQ -+define void @v4f32_fcmp_oeq(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4f32_fcmp_oeq: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vfcmp.ceq.s $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x float>, ptr %a0 -+ %v1 = load <4 x float>, ptr %a1 -+ %cmp = fcmp oeq <4 x float> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i32> -+ store <4 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v2f64_fcmp_oeq(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v2f64_fcmp_oeq: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vfcmp.ceq.d $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <2 x double>, ptr %a0 -+ %v1 = load <2 x double>, ptr %a1 -+ %cmp = fcmp oeq <2 x double> %v0, %v1 -+ %ext = sext <2 x i1> %cmp to <2 x i64> -+ store <2 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; SETUEQ -+define void @v4f32_fcmp_ueq(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4f32_fcmp_ueq: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vfcmp.cueq.s $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x float>, ptr %a0 -+ %v1 = load <4 x float>, ptr %a1 -+ %cmp = fcmp ueq <4 x float> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i32> -+ store <4 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v2f64_fcmp_ueq(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v2f64_fcmp_ueq: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vfcmp.cueq.d $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <2 x double>, ptr %a0 -+ %v1 = load <2 x double>, ptr %a1 -+ %cmp = fcmp ueq <2 x double> %v0, %v1 -+ %ext = sext <2 x i1> %cmp to <2 x i64> -+ store <2 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; SETEQ -+define void @v4f32_fcmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4f32_fcmp_eq: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vfcmp.ceq.s $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x float>, ptr %a0 -+ %v1 = load <4 x float>, ptr %a1 -+ %cmp = fcmp fast oeq <4 x float> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i32> -+ store <4 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v2f64_fcmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v2f64_fcmp_eq: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vfcmp.ceq.d $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <2 x double>, ptr %a0 -+ %v1 = load <2 x double>, ptr %a1 -+ %cmp = fcmp fast ueq <2 x double> %v0, %v1 -+ %ext = sext <2 x i1> %cmp to <2 x i64> -+ store <2 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; SETOLE -+define void @v4f32_fcmp_ole(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4f32_fcmp_ole: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vfcmp.cle.s $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x float>, ptr %a0 -+ %v1 = load <4 x float>, ptr %a1 -+ %cmp = fcmp ole <4 x float> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i32> -+ store <4 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v2f64_fcmp_ole(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v2f64_fcmp_ole: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vfcmp.cle.d $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <2 x double>, ptr %a0 -+ %v1 = load <2 x double>, ptr %a1 -+ %cmp = fcmp ole <2 x double> %v0, %v1 -+ %ext = sext <2 x i1> %cmp to <2 x i64> -+ store <2 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; SETULE -+define void @v4f32_fcmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4f32_fcmp_ule: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vfcmp.cule.s $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x float>, ptr %a0 -+ %v1 = load <4 x float>, ptr %a1 -+ %cmp = fcmp ule <4 x float> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i32> -+ store <4 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v2f64_fcmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v2f64_fcmp_ule: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vfcmp.cule.d $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <2 x double>, ptr %a0 -+ %v1 = load <2 x double>, ptr %a1 -+ %cmp = fcmp ule <2 x double> %v0, %v1 -+ %ext = sext <2 x i1> %cmp to <2 x i64> -+ store <2 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; SETLE -+define void @v4f32_fcmp_le(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4f32_fcmp_le: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vfcmp.cle.s $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x float>, ptr %a0 -+ %v1 = load <4 x float>, ptr %a1 -+ %cmp = fcmp fast ole <4 x float> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i32> -+ store <4 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v2f64_fcmp_le(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v2f64_fcmp_le: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vfcmp.cle.d $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <2 x double>, ptr %a0 -+ %v1 = load <2 x double>, ptr %a1 -+ %cmp = fcmp fast ule <2 x double> %v0, %v1 -+ %ext = sext <2 x i1> %cmp to <2 x i64> -+ store <2 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; SETOLT -+define void @v4f32_fcmp_olt(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4f32_fcmp_olt: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vfcmp.clt.s $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x float>, ptr %a0 -+ %v1 = load <4 x float>, ptr %a1 -+ %cmp = fcmp olt <4 x float> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i32> -+ store <4 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v2f64_fcmp_olt(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v2f64_fcmp_olt: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vfcmp.clt.d $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <2 x double>, ptr %a0 -+ %v1 = load <2 x double>, ptr %a1 -+ %cmp = fcmp olt <2 x double> %v0, %v1 -+ %ext = sext <2 x i1> %cmp to <2 x i64> -+ store <2 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; SETULT -+define void @v4f32_fcmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4f32_fcmp_ult: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vfcmp.cult.s $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x float>, ptr %a0 -+ %v1 = load <4 x float>, ptr %a1 -+ %cmp = fcmp ult <4 x float> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i32> -+ store <4 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v2f64_fcmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v2f64_fcmp_ult: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vfcmp.cult.d $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <2 x double>, ptr %a0 -+ %v1 = load <2 x double>, ptr %a1 -+ %cmp = fcmp ult <2 x double> %v0, %v1 -+ %ext = sext <2 x i1> %cmp to <2 x i64> -+ store <2 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; SETLT -+define void @v4f32_fcmp_lt(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4f32_fcmp_lt: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vfcmp.clt.s $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x float>, ptr %a0 -+ %v1 = load <4 x float>, ptr %a1 -+ %cmp = fcmp fast olt <4 x float> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i32> -+ store <4 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v2f64_fcmp_lt(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v2f64_fcmp_lt: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vfcmp.clt.d $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <2 x double>, ptr %a0 -+ %v1 = load <2 x double>, ptr %a1 -+ %cmp = fcmp fast ult <2 x double> %v0, %v1 -+ %ext = sext <2 x i1> %cmp to <2 x i64> -+ store <2 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; SETONE -+define void @v4f32_fcmp_one(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4f32_fcmp_one: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vfcmp.cne.s $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x float>, ptr %a0 -+ %v1 = load <4 x float>, ptr %a1 -+ %cmp = fcmp one <4 x float> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i32> -+ store <4 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v2f64_fcmp_one(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v2f64_fcmp_one: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vfcmp.cne.d $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <2 x double>, ptr %a0 -+ %v1 = load <2 x double>, ptr %a1 -+ %cmp = fcmp one <2 x double> %v0, %v1 -+ %ext = sext <2 x i1> %cmp to <2 x i64> -+ store <2 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; SETUNE -+define void @v4f32_fcmp_une(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4f32_fcmp_une: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vfcmp.cune.s $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x float>, ptr %a0 -+ %v1 = load <4 x float>, ptr %a1 -+ %cmp = fcmp une <4 x float> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i32> -+ store <4 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v2f64_fcmp_une(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v2f64_fcmp_une: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vfcmp.cune.d $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <2 x double>, ptr %a0 -+ %v1 = load <2 x double>, ptr %a1 -+ %cmp = fcmp une <2 x double> %v0, %v1 -+ %ext = sext <2 x i1> %cmp to <2 x i64> -+ store <2 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; SETNE -+define void @v4f32_fcmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4f32_fcmp_ne: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vfcmp.cne.s $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x float>, ptr %a0 -+ %v1 = load <4 x float>, ptr %a1 -+ %cmp = fcmp fast one <4 x float> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i32> -+ store <4 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v2f64_fcmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v2f64_fcmp_ne: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vfcmp.cne.d $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <2 x double>, ptr %a0 -+ %v1 = load <2 x double>, ptr %a1 -+ %cmp = fcmp fast une <2 x double> %v0, %v1 -+ %ext = sext <2 x i1> %cmp to <2 x i64> -+ store <2 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; SETO -+define void @v4f32_fcmp_ord(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4f32_fcmp_ord: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vfcmp.cor.s $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x float>, ptr %a0 -+ %v1 = load <4 x float>, ptr %a1 -+ %cmp = fcmp ord <4 x float> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i32> -+ store <4 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v2f64_fcmp_ord(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v2f64_fcmp_ord: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vfcmp.cor.d $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <2 x double>, ptr %a0 -+ %v1 = load <2 x double>, ptr %a1 -+ %cmp = fcmp ord <2 x double> %v0, %v1 -+ %ext = sext <2 x i1> %cmp to <2 x i64> -+ store <2 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; SETUO -+define void @v4f32_fcmp_uno(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4f32_fcmp_uno: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vfcmp.cun.s $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x float>, ptr %a0 -+ %v1 = load <4 x float>, ptr %a1 -+ %cmp = fcmp uno <4 x float> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i32> -+ store <4 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v2f64_fcmp_uno(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v2f64_fcmp_uno: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vfcmp.cun.d $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <2 x double>, ptr %a0 -+ %v1 = load <2 x double>, ptr %a1 -+ %cmp = fcmp uno <2 x double> %v0, %v1 -+ %ext = sext <2 x i1> %cmp to <2 x i64> -+ store <2 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; Expand SETOGT -+define void @v4f32_fcmp_ogt(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4f32_fcmp_ogt: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vld $vr1, $a2, 0 -+; CHECK-NEXT: vfcmp.clt.s $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x float>, ptr %a0 -+ %v1 = load <4 x float>, ptr %a1 -+ %cmp = fcmp ogt <4 x float> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i32> -+ store <4 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v2f64_fcmp_ogt(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v2f64_fcmp_ogt: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vld $vr1, $a2, 0 -+; CHECK-NEXT: vfcmp.clt.d $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <2 x double>, ptr %a0 -+ %v1 = load <2 x double>, ptr %a1 -+ %cmp = fcmp ogt <2 x double> %v0, %v1 -+ %ext = sext <2 x i1> %cmp to <2 x i64> -+ store <2 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; Expand SETUGT -+define void @v4f32_fcmp_ugt(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4f32_fcmp_ugt: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vld $vr1, $a2, 0 -+; CHECK-NEXT: vfcmp.cult.s $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x float>, ptr %a0 -+ %v1 = load <4 x float>, ptr %a1 -+ %cmp = fcmp ugt <4 x float> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i32> -+ store <4 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v2f64_fcmp_ugt(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v2f64_fcmp_ugt: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vld $vr1, $a2, 0 -+; CHECK-NEXT: vfcmp.cult.d $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <2 x double>, ptr %a0 -+ %v1 = load <2 x double>, ptr %a1 -+ %cmp = fcmp ugt <2 x double> %v0, %v1 -+ %ext = sext <2 x i1> %cmp to <2 x i64> -+ store <2 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; Expand SETGT -+define void @v4f32_fcmp_gt(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4f32_fcmp_gt: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vld $vr1, $a2, 0 -+; CHECK-NEXT: vfcmp.clt.s $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x float>, ptr %a0 -+ %v1 = load <4 x float>, ptr %a1 -+ %cmp = fcmp fast ogt <4 x float> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i32> -+ store <4 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v2f64_fcmp_gt(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v2f64_fcmp_gt: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vld $vr1, $a2, 0 -+; CHECK-NEXT: vfcmp.clt.d $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <2 x double>, ptr %a0 -+ %v1 = load <2 x double>, ptr %a1 -+ %cmp = fcmp fast ugt <2 x double> %v0, %v1 -+ %ext = sext <2 x i1> %cmp to <2 x i64> -+ store <2 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; Expand SETOGE -+define void @v4f32_fcmp_oge(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4f32_fcmp_oge: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vld $vr1, $a2, 0 -+; CHECK-NEXT: vfcmp.cle.s $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x float>, ptr %a0 -+ %v1 = load <4 x float>, ptr %a1 -+ %cmp = fcmp oge <4 x float> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i32> -+ store <4 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v2f64_fcmp_oge(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v2f64_fcmp_oge: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vld $vr1, $a2, 0 -+; CHECK-NEXT: vfcmp.cle.d $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <2 x double>, ptr %a0 -+ %v1 = load <2 x double>, ptr %a1 -+ %cmp = fcmp oge <2 x double> %v0, %v1 -+ %ext = sext <2 x i1> %cmp to <2 x i64> -+ store <2 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; Expand SETUGE -+define void @v4f32_fcmp_uge(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4f32_fcmp_uge: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vld $vr1, $a2, 0 -+; CHECK-NEXT: vfcmp.cule.s $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x float>, ptr %a0 -+ %v1 = load <4 x float>, ptr %a1 -+ %cmp = fcmp uge <4 x float> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i32> -+ store <4 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v2f64_fcmp_uge(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v2f64_fcmp_uge: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vld $vr1, $a2, 0 -+; CHECK-NEXT: vfcmp.cule.d $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <2 x double>, ptr %a0 -+ %v1 = load <2 x double>, ptr %a1 -+ %cmp = fcmp uge <2 x double> %v0, %v1 -+ %ext = sext <2 x i1> %cmp to <2 x i64> -+ store <2 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; Expand SETGE -+define void @v4f32_fcmp_ge(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4f32_fcmp_ge: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vld $vr1, $a2, 0 -+; CHECK-NEXT: vfcmp.cle.s $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x float>, ptr %a0 -+ %v1 = load <4 x float>, ptr %a1 -+ %cmp = fcmp fast oge <4 x float> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i32> -+ store <4 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v2f64_fcmp_ge(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v2f64_fcmp_ge: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vld $vr1, $a2, 0 -+; CHECK-NEXT: vfcmp.cle.d $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <2 x double>, ptr %a0 -+ %v1 = load <2 x double>, ptr %a1 -+ %cmp = fcmp fast uge <2 x double> %v0, %v1 -+ %ext = sext <2 x i1> %cmp to <2 x i64> -+ store <2 x i64> %ext, ptr %res -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/icmp.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/icmp.ll -new file mode 100644 -index 000000000000..448f3fa6c6e0 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/icmp.ll -@@ -0,0 +1,939 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+;; SETEQ -+define void @v16i8_icmp_eq_imm(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: v16i8_icmp_eq_imm: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vseqi.b $vr0, $vr0, 15 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <16 x i8>, ptr %a0 -+ %cmp = icmp eq <16 x i8> %v0, -+ %ext = sext <16 x i1> %cmp to <16 x i8> -+ store <16 x i8> %ext, ptr %res -+ ret void -+} -+ -+define void @v16i8_icmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v16i8_icmp_eq: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vseq.b $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <16 x i8>, ptr %a0 -+ %v1 = load <16 x i8>, ptr %a1 -+ %cmp = icmp eq <16 x i8> %v0, %v1 -+ %ext = sext <16 x i1> %cmp to <16 x i8> -+ store <16 x i8> %ext, ptr %res -+ ret void -+} -+ -+define void @v8i16_icmp_eq_imm(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: v8i16_icmp_eq_imm: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vseqi.h $vr0, $vr0, 15 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x i16>, ptr %a0 -+ %cmp = icmp eq <8 x i16> %v0, -+ %ext = sext <8 x i1> %cmp to <8 x i16> -+ store <8 x i16> %ext, ptr %res -+ ret void -+} -+ -+define void @v8i16_icmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v8i16_icmp_eq: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vseq.h $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x i16>, ptr %a0 -+ %v1 = load <8 x i16>, ptr %a1 -+ %cmp = icmp eq <8 x i16> %v0, %v1 -+ %ext = sext <8 x i1> %cmp to <8 x i16> -+ store <8 x i16> %ext, ptr %res -+ ret void -+} -+ -+define void @v4i32_icmp_eq_imm(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: v4i32_icmp_eq_imm: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vseqi.w $vr0, $vr0, 15 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x i32>, ptr %a0 -+ %cmp = icmp eq <4 x i32> %v0, -+ %ext = sext <4 x i1> %cmp to <4 x i32> -+ store <4 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v4i32_icmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4i32_icmp_eq: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vseq.w $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x i32>, ptr %a0 -+ %v1 = load <4 x i32>, ptr %a1 -+ %cmp = icmp eq <4 x i32> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i32> -+ store <4 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v2i64_icmp_eq_imm(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: v2i64_icmp_eq_imm: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vseqi.d $vr0, $vr0, 15 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <2 x i64>, ptr %a0 -+ %cmp = icmp eq <2 x i64> %v0, -+ %ext = sext <2 x i1> %cmp to <2 x i64> -+ store <2 x i64> %ext, ptr %res -+ ret void -+} -+ -+define void @v2i64_icmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v2i64_icmp_eq: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vseq.d $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <2 x i64>, ptr %a0 -+ %v1 = load <2 x i64>, ptr %a1 -+ %cmp = icmp eq <2 x i64> %v0, %v1 -+ %ext = sext <2 x i1> %cmp to <2 x i64> -+ store <2 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; SETLE -+define void @v16i8_icmp_sle_imm(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: v16i8_icmp_sle_imm: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vslei.b $vr0, $vr0, 15 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <16 x i8>, ptr %a0 -+ %cmp = icmp sle <16 x i8> %v0, -+ %ext = sext <16 x i1> %cmp to <16 x i8> -+ store <16 x i8> %ext, ptr %res -+ ret void -+} -+ -+define void @v16i8_icmp_sle(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v16i8_icmp_sle: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vsle.b $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <16 x i8>, ptr %a0 -+ %v1 = load <16 x i8>, ptr %a1 -+ %cmp = icmp sle <16 x i8> %v0, %v1 -+ %ext = sext <16 x i1> %cmp to <16 x i8> -+ store <16 x i8> %ext, ptr %res -+ ret void -+} -+ -+define void @v8i16_icmp_sle_imm(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: v8i16_icmp_sle_imm: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vslei.h $vr0, $vr0, 15 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x i16>, ptr %a0 -+ %cmp = icmp sle <8 x i16> %v0, -+ %ext = sext <8 x i1> %cmp to <8 x i16> -+ store <8 x i16> %ext, ptr %res -+ ret void -+} -+ -+define void @v8i16_icmp_sle(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v8i16_icmp_sle: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vsle.h $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x i16>, ptr %a0 -+ %v1 = load <8 x i16>, ptr %a1 -+ %cmp = icmp sle <8 x i16> %v0, %v1 -+ %ext = sext <8 x i1> %cmp to <8 x i16> -+ store <8 x i16> %ext, ptr %res -+ ret void -+} -+ -+define void @v4i32_icmp_sle_imm(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: v4i32_icmp_sle_imm: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vslei.w $vr0, $vr0, 15 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x i32>, ptr %a0 -+ %cmp = icmp sle <4 x i32> %v0, -+ %ext = sext <4 x i1> %cmp to <4 x i32> -+ store <4 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v4i32_icmp_sle(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4i32_icmp_sle: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vsle.w $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x i32>, ptr %a0 -+ %v1 = load <4 x i32>, ptr %a1 -+ %cmp = icmp sle <4 x i32> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i32> -+ store <4 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v2i64_icmp_sle_imm(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: v2i64_icmp_sle_imm: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vslei.d $vr0, $vr0, 15 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <2 x i64>, ptr %a0 -+ %cmp = icmp sle <2 x i64> %v0, -+ %ext = sext <2 x i1> %cmp to <2 x i64> -+ store <2 x i64> %ext, ptr %res -+ ret void -+} -+ -+define void @v2i64_icmp_sle(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v2i64_icmp_sle: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vsle.d $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <2 x i64>, ptr %a0 -+ %v1 = load <2 x i64>, ptr %a1 -+ %cmp = icmp sle <2 x i64> %v0, %v1 -+ %ext = sext <2 x i1> %cmp to <2 x i64> -+ store <2 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; SETULE -+define void @v16i8_icmp_ule_imm(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: v16i8_icmp_ule_imm: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vslei.bu $vr0, $vr0, 31 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <16 x i8>, ptr %a0 -+ %cmp = icmp ule <16 x i8> %v0, -+ %ext = sext <16 x i1> %cmp to <16 x i8> -+ store <16 x i8> %ext, ptr %res -+ ret void -+} -+ -+define void @v16i8_icmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v16i8_icmp_ule: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vsle.bu $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <16 x i8>, ptr %a0 -+ %v1 = load <16 x i8>, ptr %a1 -+ %cmp = icmp ule <16 x i8> %v0, %v1 -+ %ext = sext <16 x i1> %cmp to <16 x i8> -+ store <16 x i8> %ext, ptr %res -+ ret void -+} -+ -+define void @v8i16_icmp_ule_imm(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: v8i16_icmp_ule_imm: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vslei.hu $vr0, $vr0, 31 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x i16>, ptr %a0 -+ %cmp = icmp ule <8 x i16> %v0, -+ %ext = sext <8 x i1> %cmp to <8 x i16> -+ store <8 x i16> %ext, ptr %res -+ ret void -+} -+ -+define void @v8i16_icmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v8i16_icmp_ule: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vsle.hu $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x i16>, ptr %a0 -+ %v1 = load <8 x i16>, ptr %a1 -+ %cmp = icmp ule <8 x i16> %v0, %v1 -+ %ext = sext <8 x i1> %cmp to <8 x i16> -+ store <8 x i16> %ext, ptr %res -+ ret void -+} -+ -+define void @v4i32_icmp_ule_imm(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: v4i32_icmp_ule_imm: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vslei.wu $vr0, $vr0, 31 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x i32>, ptr %a0 -+ %cmp = icmp ule <4 x i32> %v0, -+ %ext = sext <4 x i1> %cmp to <4 x i32> -+ store <4 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v4i32_icmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4i32_icmp_ule: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vsle.wu $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x i32>, ptr %a0 -+ %v1 = load <4 x i32>, ptr %a1 -+ %cmp = icmp ule <4 x i32> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i32> -+ store <4 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v2i64_icmp_ule_imm(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: v2i64_icmp_ule_imm: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vslei.du $vr0, $vr0, 31 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <2 x i64>, ptr %a0 -+ %cmp = icmp ule <2 x i64> %v0, -+ %ext = sext <2 x i1> %cmp to <2 x i64> -+ store <2 x i64> %ext, ptr %res -+ ret void -+} -+ -+define void @v2i64_icmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v2i64_icmp_ule: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vsle.du $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <2 x i64>, ptr %a0 -+ %v1 = load <2 x i64>, ptr %a1 -+ %cmp = icmp ule <2 x i64> %v0, %v1 -+ %ext = sext <2 x i1> %cmp to <2 x i64> -+ store <2 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; SETLT -+define void @v16i8_icmp_slt_imm(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: v16i8_icmp_slt_imm: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vslti.b $vr0, $vr0, 15 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <16 x i8>, ptr %a0 -+ %cmp = icmp slt <16 x i8> %v0, -+ %ext = sext <16 x i1> %cmp to <16 x i8> -+ store <16 x i8> %ext, ptr %res -+ ret void -+} -+ -+define void @v16i8_icmp_slt(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v16i8_icmp_slt: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vslt.b $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <16 x i8>, ptr %a0 -+ %v1 = load <16 x i8>, ptr %a1 -+ %cmp = icmp slt <16 x i8> %v0, %v1 -+ %ext = sext <16 x i1> %cmp to <16 x i8> -+ store <16 x i8> %ext, ptr %res -+ ret void -+} -+ -+define void @v8i16_icmp_slt_imm(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: v8i16_icmp_slt_imm: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vslti.h $vr0, $vr0, 15 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x i16>, ptr %a0 -+ %cmp = icmp slt <8 x i16> %v0, -+ %ext = sext <8 x i1> %cmp to <8 x i16> -+ store <8 x i16> %ext, ptr %res -+ ret void -+} -+ -+define void @v8i16_icmp_slt(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v8i16_icmp_slt: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vslt.h $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x i16>, ptr %a0 -+ %v1 = load <8 x i16>, ptr %a1 -+ %cmp = icmp slt <8 x i16> %v0, %v1 -+ %ext = sext <8 x i1> %cmp to <8 x i16> -+ store <8 x i16> %ext, ptr %res -+ ret void -+} -+ -+define void @v4i32_icmp_slt_imm(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: v4i32_icmp_slt_imm: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vslti.w $vr0, $vr0, 15 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x i32>, ptr %a0 -+ %cmp = icmp slt <4 x i32> %v0, -+ %ext = sext <4 x i1> %cmp to <4 x i32> -+ store <4 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v4i32_icmp_slt(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4i32_icmp_slt: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vslt.w $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x i32>, ptr %a0 -+ %v1 = load <4 x i32>, ptr %a1 -+ %cmp = icmp slt <4 x i32> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i32> -+ store <4 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v2i64_icmp_slt_imm(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: v2i64_icmp_slt_imm: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vslti.d $vr0, $vr0, 15 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <2 x i64>, ptr %a0 -+ %cmp = icmp slt <2 x i64> %v0, -+ %ext = sext <2 x i1> %cmp to <2 x i64> -+ store <2 x i64> %ext, ptr %res -+ ret void -+} -+ -+define void @v2i64_icmp_slt(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v2i64_icmp_slt: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vslt.d $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <2 x i64>, ptr %a0 -+ %v1 = load <2 x i64>, ptr %a1 -+ %cmp = icmp slt <2 x i64> %v0, %v1 -+ %ext = sext <2 x i1> %cmp to <2 x i64> -+ store <2 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; SETULT -+define void @v16i8_icmp_ult_imm(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: v16i8_icmp_ult_imm: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vslti.bu $vr0, $vr0, 31 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <16 x i8>, ptr %a0 -+ %cmp = icmp ult <16 x i8> %v0, -+ %ext = sext <16 x i1> %cmp to <16 x i8> -+ store <16 x i8> %ext, ptr %res -+ ret void -+} -+ -+define void @v16i8_icmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v16i8_icmp_ult: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vslt.bu $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <16 x i8>, ptr %a0 -+ %v1 = load <16 x i8>, ptr %a1 -+ %cmp = icmp ult <16 x i8> %v0, %v1 -+ %ext = sext <16 x i1> %cmp to <16 x i8> -+ store <16 x i8> %ext, ptr %res -+ ret void -+} -+ -+define void @v8i16_icmp_ult_imm(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: v8i16_icmp_ult_imm: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vslti.hu $vr0, $vr0, 31 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x i16>, ptr %a0 -+ %cmp = icmp ult <8 x i16> %v0, -+ %ext = sext <8 x i1> %cmp to <8 x i16> -+ store <8 x i16> %ext, ptr %res -+ ret void -+} -+ -+define void @v8i16_icmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v8i16_icmp_ult: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vslt.hu $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x i16>, ptr %a0 -+ %v1 = load <8 x i16>, ptr %a1 -+ %cmp = icmp ult <8 x i16> %v0, %v1 -+ %ext = sext <8 x i1> %cmp to <8 x i16> -+ store <8 x i16> %ext, ptr %res -+ ret void -+} -+ -+define void @v4i32_icmp_ult_imm(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: v4i32_icmp_ult_imm: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vslti.wu $vr0, $vr0, 31 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x i32>, ptr %a0 -+ %cmp = icmp ult <4 x i32> %v0, -+ %ext = sext <4 x i1> %cmp to <4 x i32> -+ store <4 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v4i32_icmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4i32_icmp_ult: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vslt.wu $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x i32>, ptr %a0 -+ %v1 = load <4 x i32>, ptr %a1 -+ %cmp = icmp ult <4 x i32> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i32> -+ store <4 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v2i64_icmp_ult_imm(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: v2i64_icmp_ult_imm: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vslti.du $vr0, $vr0, 31 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <2 x i64>, ptr %a0 -+ %cmp = icmp ult <2 x i64> %v0, -+ %ext = sext <2 x i1> %cmp to <2 x i64> -+ store <2 x i64> %ext, ptr %res -+ ret void -+} -+ -+define void @v2i64_icmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v2i64_icmp_ult: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vslt.du $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <2 x i64>, ptr %a0 -+ %v1 = load <2 x i64>, ptr %a1 -+ %cmp = icmp ult <2 x i64> %v0, %v1 -+ %ext = sext <2 x i1> %cmp to <2 x i64> -+ store <2 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; Expand SETNE -+define void @v16i8_icmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v16i8_icmp_ne: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vseq.b $vr0, $vr1, $vr0 -+; CHECK-NEXT: vxori.b $vr0, $vr0, 255 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <16 x i8>, ptr %a0 -+ %v1 = load <16 x i8>, ptr %a1 -+ %cmp = icmp ne <16 x i8> %v0, %v1 -+ %ext = sext <16 x i1> %cmp to <16 x i8> -+ store <16 x i8> %ext, ptr %res -+ ret void -+} -+ -+define void @v8i16_icmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v8i16_icmp_ne: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vseq.h $vr0, $vr1, $vr0 -+; CHECK-NEXT: vrepli.b $vr1, -1 -+; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x i16>, ptr %a0 -+ %v1 = load <8 x i16>, ptr %a1 -+ %cmp = icmp ne <8 x i16> %v0, %v1 -+ %ext = sext <8 x i1> %cmp to <8 x i16> -+ store <8 x i16> %ext, ptr %res -+ ret void -+} -+ -+define void @v4i32_icmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4i32_icmp_ne: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vseq.w $vr0, $vr1, $vr0 -+; CHECK-NEXT: vrepli.b $vr1, -1 -+; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x i32>, ptr %a0 -+ %v1 = load <4 x i32>, ptr %a1 -+ %cmp = icmp ne <4 x i32> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i32> -+ store <4 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v2i64_icmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v2i64_icmp_ne: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a2, 0 -+; CHECK-NEXT: vld $vr1, $a1, 0 -+; CHECK-NEXT: vseq.d $vr0, $vr1, $vr0 -+; CHECK-NEXT: vrepli.b $vr1, -1 -+; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <2 x i64>, ptr %a0 -+ %v1 = load <2 x i64>, ptr %a1 -+ %cmp = icmp ne <2 x i64> %v0, %v1 -+ %ext = sext <2 x i1> %cmp to <2 x i64> -+ store <2 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; Expand SETGE -+define void @v16i8_icmp_sge(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v16i8_icmp_sge: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vld $vr1, $a2, 0 -+; CHECK-NEXT: vsle.b $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <16 x i8>, ptr %a0 -+ %v1 = load <16 x i8>, ptr %a1 -+ %cmp = icmp sge <16 x i8> %v0, %v1 -+ %ext = sext <16 x i1> %cmp to <16 x i8> -+ store <16 x i8> %ext, ptr %res -+ ret void -+} -+ -+define void @v8i16_icmp_sge(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v8i16_icmp_sge: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vld $vr1, $a2, 0 -+; CHECK-NEXT: vsle.h $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x i16>, ptr %a0 -+ %v1 = load <8 x i16>, ptr %a1 -+ %cmp = icmp sge <8 x i16> %v0, %v1 -+ %ext = sext <8 x i1> %cmp to <8 x i16> -+ store <8 x i16> %ext, ptr %res -+ ret void -+} -+ -+define void @v4i32_icmp_sge(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4i32_icmp_sge: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vld $vr1, $a2, 0 -+; CHECK-NEXT: vsle.w $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x i32>, ptr %a0 -+ %v1 = load <4 x i32>, ptr %a1 -+ %cmp = icmp sge <4 x i32> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i32> -+ store <4 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v2i64_icmp_sge(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v2i64_icmp_sge: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vld $vr1, $a2, 0 -+; CHECK-NEXT: vsle.d $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <2 x i64>, ptr %a0 -+ %v1 = load <2 x i64>, ptr %a1 -+ %cmp = icmp sge <2 x i64> %v0, %v1 -+ %ext = sext <2 x i1> %cmp to <2 x i64> -+ store <2 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; Expand SETUGE -+define void @v16i8_icmp_uge(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v16i8_icmp_uge: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vld $vr1, $a2, 0 -+; CHECK-NEXT: vsle.bu $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <16 x i8>, ptr %a0 -+ %v1 = load <16 x i8>, ptr %a1 -+ %cmp = icmp uge <16 x i8> %v0, %v1 -+ %ext = sext <16 x i1> %cmp to <16 x i8> -+ store <16 x i8> %ext, ptr %res -+ ret void -+} -+ -+define void @v8i16_icmp_uge(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v8i16_icmp_uge: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vld $vr1, $a2, 0 -+; CHECK-NEXT: vsle.hu $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x i16>, ptr %a0 -+ %v1 = load <8 x i16>, ptr %a1 -+ %cmp = icmp uge <8 x i16> %v0, %v1 -+ %ext = sext <8 x i1> %cmp to <8 x i16> -+ store <8 x i16> %ext, ptr %res -+ ret void -+} -+ -+define void @v4i32_icmp_uge(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4i32_icmp_uge: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vld $vr1, $a2, 0 -+; CHECK-NEXT: vsle.wu $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x i32>, ptr %a0 -+ %v1 = load <4 x i32>, ptr %a1 -+ %cmp = icmp uge <4 x i32> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i32> -+ store <4 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v2i64_icmp_uge(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v2i64_icmp_uge: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vld $vr1, $a2, 0 -+; CHECK-NEXT: vsle.du $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <2 x i64>, ptr %a0 -+ %v1 = load <2 x i64>, ptr %a1 -+ %cmp = icmp uge <2 x i64> %v0, %v1 -+ %ext = sext <2 x i1> %cmp to <2 x i64> -+ store <2 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; Expand SETGT -+define void @v16i8_icmp_sgt(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v16i8_icmp_sgt: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vld $vr1, $a2, 0 -+; CHECK-NEXT: vslt.b $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <16 x i8>, ptr %a0 -+ %v1 = load <16 x i8>, ptr %a1 -+ %cmp = icmp sgt <16 x i8> %v0, %v1 -+ %ext = sext <16 x i1> %cmp to <16 x i8> -+ store <16 x i8> %ext, ptr %res -+ ret void -+} -+ -+define void @v8i16_icmp_sgt(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v8i16_icmp_sgt: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vld $vr1, $a2, 0 -+; CHECK-NEXT: vslt.h $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x i16>, ptr %a0 -+ %v1 = load <8 x i16>, ptr %a1 -+ %cmp = icmp sgt <8 x i16> %v0, %v1 -+ %ext = sext <8 x i1> %cmp to <8 x i16> -+ store <8 x i16> %ext, ptr %res -+ ret void -+} -+ -+define void @v4i32_icmp_sgt(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4i32_icmp_sgt: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vld $vr1, $a2, 0 -+; CHECK-NEXT: vslt.w $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x i32>, ptr %a0 -+ %v1 = load <4 x i32>, ptr %a1 -+ %cmp = icmp sgt <4 x i32> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i32> -+ store <4 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v2i64_icmp_sgt(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v2i64_icmp_sgt: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vld $vr1, $a2, 0 -+; CHECK-NEXT: vslt.d $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <2 x i64>, ptr %a0 -+ %v1 = load <2 x i64>, ptr %a1 -+ %cmp = icmp sgt <2 x i64> %v0, %v1 -+ %ext = sext <2 x i1> %cmp to <2 x i64> -+ store <2 x i64> %ext, ptr %res -+ ret void -+} -+ -+;; Expand SETUGT -+define void @v16i8_icmp_ugt(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v16i8_icmp_ugt: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vld $vr1, $a2, 0 -+; CHECK-NEXT: vslt.bu $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <16 x i8>, ptr %a0 -+ %v1 = load <16 x i8>, ptr %a1 -+ %cmp = icmp ugt <16 x i8> %v0, %v1 -+ %ext = sext <16 x i1> %cmp to <16 x i8> -+ store <16 x i8> %ext, ptr %res -+ ret void -+} -+ -+define void @v8i16_icmp_ugt(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v8i16_icmp_ugt: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vld $vr1, $a2, 0 -+; CHECK-NEXT: vslt.hu $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x i16>, ptr %a0 -+ %v1 = load <8 x i16>, ptr %a1 -+ %cmp = icmp ugt <8 x i16> %v0, %v1 -+ %ext = sext <8 x i1> %cmp to <8 x i16> -+ store <8 x i16> %ext, ptr %res -+ ret void -+} -+ -+define void @v4i32_icmp_ugt(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v4i32_icmp_ugt: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vld $vr1, $a2, 0 -+; CHECK-NEXT: vslt.wu $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x i32>, ptr %a0 -+ %v1 = load <4 x i32>, ptr %a1 -+ %cmp = icmp ugt <4 x i32> %v0, %v1 -+ %ext = sext <4 x i1> %cmp to <4 x i32> -+ store <4 x i32> %ext, ptr %res -+ ret void -+} -+ -+define void @v2i64_icmp_ugt(ptr %res, ptr %a0, ptr %a1) nounwind { -+; CHECK-LABEL: v2i64_icmp_ugt: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vld $vr1, $a2, 0 -+; CHECK-NEXT: vslt.du $vr0, $vr1, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <2 x i64>, ptr %a0 -+ %v1 = load <2 x i64>, ptr %a1 -+ %cmp = icmp ugt <2 x i64> %v0, %v1 -+ %ext = sext <2 x i1> %cmp to <2 x i64> -+ store <2 x i64> %ext, ptr %res -+ ret void -+} --- -2.20.1 - - -From 49444f4fbca6681e0fd404a19b562ccfcc140879 Mon Sep 17 00:00:00 2001 -From: wanglei -Date: Fri, 8 Dec 2023 14:16:26 +0800 -Subject: [PATCH 24/35] [LoongArch] Make ISD::FSQRT a legal operation with - lsx/lasx feature (#74795) - -And add some patterns: -1. (fdiv 1.0, vector) -2. (fdiv 1.0, (fsqrt vector)) - -(cherry picked from commit 9f70e708a7d3fce97d63b626520351501455fca0) ---- - .../LoongArch/LoongArchISelLowering.cpp | 2 + - .../LoongArch/LoongArchLASXInstrInfo.td | 22 +++++++ - .../Target/LoongArch/LoongArchLSXInstrInfo.td | 45 +++++++++++++ - llvm/test/CodeGen/LoongArch/lasx/fsqrt.ll | 65 +++++++++++++++++++ - .../LoongArch/lasx/ir-instruction/fdiv.ll | 29 +++++++++ - llvm/test/CodeGen/LoongArch/lsx/fsqrt.ll | 65 +++++++++++++++++++ - .../LoongArch/lsx/ir-instruction/fdiv.ll | 29 +++++++++ - 7 files changed, 257 insertions(+) - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/fsqrt.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/fsqrt.ll - -diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -index 3d5ae6d3deda..8c54c7cf2cab 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -@@ -260,6 +260,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, - setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal); - setOperationAction({ISD::FMUL, ISD::FDIV}, VT, Legal); - setOperationAction(ISD::FMA, VT, Legal); -+ setOperationAction(ISD::FSQRT, VT, Legal); - setCondCodeAction({ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT, - ISD::SETUGE, ISD::SETUGT}, - VT, Expand); -@@ -300,6 +301,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, - setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal); - setOperationAction({ISD::FMUL, ISD::FDIV}, VT, Legal); - setOperationAction(ISD::FMA, VT, Legal); -+ setOperationAction(ISD::FSQRT, VT, Legal); - setCondCodeAction({ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT, - ISD::SETUGE, ISD::SETUGT}, - VT, Expand); -diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -index a9bf65c6840d..55b90f4450c0 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -@@ -1092,6 +1092,13 @@ multiclass PatXr { - (!cast(Inst#"_D") LASX256:$xj)>; - } - -+multiclass PatXrF { -+ def : Pat<(v8f32 (OpNode (v8f32 LASX256:$xj))), -+ (!cast(Inst#"_S") LASX256:$xj)>; -+ def : Pat<(v4f64 (OpNode (v4f64 LASX256:$xj))), -+ (!cast(Inst#"_D") LASX256:$xj)>; -+} -+ - multiclass PatXrXr { - def : Pat<(OpNode (v32i8 LASX256:$xj), (v32i8 LASX256:$xk)), - (!cast(Inst#"_B") LASX256:$xj, LASX256:$xk)>; -@@ -1448,6 +1455,21 @@ def : Pat<(fma v8f32:$xj, v8f32:$xk, v8f32:$xa), - def : Pat<(fma v4f64:$xj, v4f64:$xk, v4f64:$xa), - (XVFMADD_D v4f64:$xj, v4f64:$xk, v4f64:$xa)>; - -+// XVFSQRT_{S/D} -+defm : PatXrF; -+ -+// XVRECIP_{S/D} -+def : Pat<(fdiv vsplatf32_fpimm_eq_1, v8f32:$xj), -+ (XVFRECIP_S v8f32:$xj)>; -+def : Pat<(fdiv vsplatf64_fpimm_eq_1, v4f64:$xj), -+ (XVFRECIP_D v4f64:$xj)>; -+ -+// XVFRSQRT_{S/D} -+def : Pat<(fdiv vsplatf32_fpimm_eq_1, (fsqrt v8f32:$xj)), -+ (XVFRSQRT_S v8f32:$xj)>; -+def : Pat<(fdiv vsplatf64_fpimm_eq_1, (fsqrt v4f64:$xj)), -+ (XVFRSQRT_D v4f64:$xj)>; -+ - // XVSEQ[I]_{B/H/W/D} - defm : PatCCXrSimm5; - defm : PatCCXrXr; -diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -index ff21c6681271..8ad0c5904f25 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -+++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -@@ -95,6 +95,29 @@ def vsplati64_imm_eq_63 : PatFrags<(ops), [(build_vector), - Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 63; - }]>; - -+def vsplatf32_fpimm_eq_1 -+ : PatFrags<(ops), [(bitconvert (v4i32 (build_vector))), -+ (bitconvert (v8i32 (build_vector)))], [{ -+ APInt Imm; -+ EVT EltTy = N->getValueType(0).getVectorElementType(); -+ N = N->getOperand(0).getNode(); -+ -+ return selectVSplat(N, Imm, EltTy.getSizeInBits()) && -+ Imm.getBitWidth() == EltTy.getSizeInBits() && -+ Imm == APFloat(+1.0f).bitcastToAPInt(); -+}]>; -+def vsplatf64_fpimm_eq_1 -+ : PatFrags<(ops), [(bitconvert (v2i64 (build_vector))), -+ (bitconvert (v4i64 (build_vector)))], [{ -+ APInt Imm; -+ EVT EltTy = N->getValueType(0).getVectorElementType(); -+ N = N->getOperand(0).getNode(); -+ -+ return selectVSplat(N, Imm, EltTy.getSizeInBits()) && -+ Imm.getBitWidth() == EltTy.getSizeInBits() && -+ Imm == APFloat(+1.0).bitcastToAPInt(); -+}]>; -+ - def vsplati8imm7 : PatFrag<(ops node:$reg), - (and node:$reg, vsplati8_imm_eq_7)>; - def vsplati16imm15 : PatFrag<(ops node:$reg), -@@ -1173,6 +1196,13 @@ multiclass PatVr { - (!cast(Inst#"_D") LSX128:$vj)>; - } - -+multiclass PatVrF { -+ def : Pat<(v4f32 (OpNode (v4f32 LSX128:$vj))), -+ (!cast(Inst#"_S") LSX128:$vj)>; -+ def : Pat<(v2f64 (OpNode (v2f64 LSX128:$vj))), -+ (!cast(Inst#"_D") LSX128:$vj)>; -+} -+ - multiclass PatVrVr { - def : Pat<(OpNode (v16i8 LSX128:$vj), (v16i8 LSX128:$vk)), - (!cast(Inst#"_B") LSX128:$vj, LSX128:$vk)>; -@@ -1525,6 +1555,21 @@ def : Pat<(fma v4f32:$vj, v4f32:$vk, v4f32:$va), - def : Pat<(fma v2f64:$vj, v2f64:$vk, v2f64:$va), - (VFMADD_D v2f64:$vj, v2f64:$vk, v2f64:$va)>; - -+// VFSQRT_{S/D} -+defm : PatVrF; -+ -+// VFRECIP_{S/D} -+def : Pat<(fdiv vsplatf32_fpimm_eq_1, v4f32:$vj), -+ (VFRECIP_S v4f32:$vj)>; -+def : Pat<(fdiv vsplatf64_fpimm_eq_1, v2f64:$vj), -+ (VFRECIP_D v2f64:$vj)>; -+ -+// VFRSQRT_{S/D} -+def : Pat<(fdiv vsplatf32_fpimm_eq_1, (fsqrt v4f32:$vj)), -+ (VFRSQRT_S v4f32:$vj)>; -+def : Pat<(fdiv vsplatf64_fpimm_eq_1, (fsqrt v2f64:$vj)), -+ (VFRSQRT_D v2f64:$vj)>; -+ - // VSEQ[I]_{B/H/W/D} - defm : PatCCVrSimm5; - defm : PatCCVrVr; -diff --git a/llvm/test/CodeGen/LoongArch/lasx/fsqrt.ll b/llvm/test/CodeGen/LoongArch/lasx/fsqrt.ll -new file mode 100644 -index 000000000000..c4a881bdeae9 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/fsqrt.ll -@@ -0,0 +1,65 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+;; fsqrt -+define void @sqrt_v8f32(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: sqrt_v8f32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvfsqrt.s $xr0, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x float>, ptr %a0, align 16 -+ %sqrt = call <8 x float> @llvm.sqrt.v8f32 (<8 x float> %v0) -+ store <8 x float> %sqrt, ptr %res, align 16 -+ ret void -+} -+ -+define void @sqrt_v4f64(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: sqrt_v4f64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvfsqrt.d $xr0, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x double>, ptr %a0, align 16 -+ %sqrt = call <4 x double> @llvm.sqrt.v4f64 (<4 x double> %v0) -+ store <4 x double> %sqrt, ptr %res, align 16 -+ ret void -+} -+ -+;; 1.0 / (fsqrt vec) -+define void @one_div_sqrt_v8f32(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: one_div_sqrt_v8f32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvfrsqrt.s $xr0, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x float>, ptr %a0, align 16 -+ %sqrt = call <8 x float> @llvm.sqrt.v8f32 (<8 x float> %v0) -+ %div = fdiv <8 x float> , %sqrt -+ store <8 x float> %div, ptr %res, align 16 -+ ret void -+} -+ -+define void @one_div_sqrt_v4f64(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: one_div_sqrt_v4f64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvfrsqrt.d $xr0, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x double>, ptr %a0, align 16 -+ %sqrt = call <4 x double> @llvm.sqrt.v4f64 (<4 x double> %v0) -+ %div = fdiv <4 x double> , %sqrt -+ store <4 x double> %div, ptr %res, align 16 -+ ret void -+} -+ -+declare <8 x float> @llvm.sqrt.v8f32(<8 x float>) -+declare <4 x double> @llvm.sqrt.v4f64(<4 x double>) -diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fdiv.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fdiv.ll -index 284121a79a49..6004565b0b78 100644 ---- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fdiv.ll -+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fdiv.ll -@@ -32,3 +32,32 @@ entry: - store <4 x double> %v2, ptr %res - ret void - } -+ -+;; 1.0 / vec -+define void @one_fdiv_v8f32(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: one_fdiv_v8f32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvfrecip.s $xr0, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x float>, ptr %a0 -+ %div = fdiv <8 x float> , %v0 -+ store <8 x float> %div, ptr %res -+ ret void -+} -+ -+define void @one_fdiv_v4f64(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: one_fdiv_v4f64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvfrecip.d $xr0, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x double>, ptr %a0 -+ %div = fdiv <4 x double> , %v0 -+ store <4 x double> %div, ptr %res -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/fsqrt.ll b/llvm/test/CodeGen/LoongArch/lsx/fsqrt.ll -new file mode 100644 -index 000000000000..a57bc1ca0e94 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/fsqrt.ll -@@ -0,0 +1,65 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+;; fsqrt -+define void @sqrt_v4f32(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: sqrt_v4f32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vfsqrt.s $vr0, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x float>, ptr %a0, align 16 -+ %sqrt = call <4 x float> @llvm.sqrt.v4f32 (<4 x float> %v0) -+ store <4 x float> %sqrt, ptr %res, align 16 -+ ret void -+} -+ -+define void @sqrt_v2f64(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: sqrt_v2f64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vfsqrt.d $vr0, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <2 x double>, ptr %a0, align 16 -+ %sqrt = call <2 x double> @llvm.sqrt.v2f64 (<2 x double> %v0) -+ store <2 x double> %sqrt, ptr %res, align 16 -+ ret void -+} -+ -+;; 1.0 / (fsqrt vec) -+define void @one_div_sqrt_v4f32(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: one_div_sqrt_v4f32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vfrsqrt.s $vr0, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x float>, ptr %a0, align 16 -+ %sqrt = call <4 x float> @llvm.sqrt.v4f32 (<4 x float> %v0) -+ %div = fdiv <4 x float> , %sqrt -+ store <4 x float> %div, ptr %res, align 16 -+ ret void -+} -+ -+define void @one_div_sqrt_v2f64(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: one_div_sqrt_v2f64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vfrsqrt.d $vr0, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <2 x double>, ptr %a0, align 16 -+ %sqrt = call <2 x double> @llvm.sqrt.v2f64 (<2 x double> %v0) -+ %div = fdiv <2 x double> , %sqrt -+ store <2 x double> %div, ptr %res, align 16 -+ ret void -+} -+ -+declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) -+declare <2 x double> @llvm.sqrt.v2f64(<2 x double>) -diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fdiv.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fdiv.ll -index eb7c8bd9616e..5f1ee9e4d212 100644 ---- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fdiv.ll -+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fdiv.ll -@@ -32,3 +32,32 @@ entry: - store <2 x double> %v2, ptr %res - ret void - } -+ -+;; 1.0 / vec -+define void @one_fdiv_v4f32(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: one_fdiv_v4f32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vfrecip.s $vr0, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x float>, ptr %a0 -+ %div = fdiv <4 x float> , %v0 -+ store <4 x float> %div, ptr %res -+ ret void -+} -+ -+define void @one_fdiv_v2f64(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: one_fdiv_v2f64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vfrecip.d $vr0, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <2 x double>, ptr %a0 -+ %div = fdiv <2 x double> , %v0 -+ store <2 x double> %div, ptr %res -+ ret void -+} --- -2.20.1 - - -From 5942b745b9680284decadd33d2242ffd3d2d61c0 Mon Sep 17 00:00:00 2001 -From: wanglei -Date: Fri, 8 Dec 2023 14:21:10 +0800 -Subject: [PATCH 25/35] [LoongArch] Mark ISD::FNEG as legal - -(cherry picked from commit cdc37325669c0321328a7245083c427b229e79e9) ---- - .../LoongArch/LoongArchISelLowering.cpp | 2 ++ - .../LoongArch/LoongArchLASXInstrInfo.td | 4 +++ - .../Target/LoongArch/LoongArchLSXInstrInfo.td | 4 +++ - .../LoongArch/lasx/ir-instruction/fneg.ll | 29 +++++++++++++++++++ - .../LoongArch/lsx/ir-instruction/fneg.ll | 29 +++++++++++++++++++ - 5 files changed, 68 insertions(+) - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fneg.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fneg.ll - -diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -index 8c54c7cf2cab..c7f4b1d24f07 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -@@ -261,6 +261,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, - setOperationAction({ISD::FMUL, ISD::FDIV}, VT, Legal); - setOperationAction(ISD::FMA, VT, Legal); - setOperationAction(ISD::FSQRT, VT, Legal); -+ setOperationAction(ISD::FNEG, VT, Legal); - setCondCodeAction({ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT, - ISD::SETUGE, ISD::SETUGT}, - VT, Expand); -@@ -302,6 +303,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, - setOperationAction({ISD::FMUL, ISD::FDIV}, VT, Legal); - setOperationAction(ISD::FMA, VT, Legal); - setOperationAction(ISD::FSQRT, VT, Legal); -+ setOperationAction(ISD::FNEG, VT, Legal); - setCondCodeAction({ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT, - ISD::SETUGE, ISD::SETUGT}, - VT, Expand); -diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -index 55b90f4450c0..8559baa0e525 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -@@ -1605,6 +1605,10 @@ foreach vt = [v32i8, v16i16, v8i32, v4i64, v8f32, v4f64] in - def : Pat<(vt (vselect LASX256:$xa, LASX256:$xk, LASX256:$xj)), - (XVBITSEL_V LASX256:$xj, LASX256:$xk, LASX256:$xa)>; - -+// fneg -+def : Pat<(fneg (v8f32 LASX256:$xj)), (XVBITREVI_W LASX256:$xj, 31)>; -+def : Pat<(fneg (v4f64 LASX256:$xj)), (XVBITREVI_D LASX256:$xj, 63)>; -+ - } // Predicates = [HasExtLASX] - - /// Intrinsic pattern -diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -index 8ad0c5904f25..5947f241bb59 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -+++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -@@ -1712,6 +1712,10 @@ foreach vt = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in - def : Pat<(vt (vselect LSX128:$va, LSX128:$vk, LSX128:$vj)), - (VBITSEL_V LSX128:$vj, LSX128:$vk, LSX128:$va)>; - -+// fneg -+def : Pat<(fneg (v4f32 LSX128:$vj)), (VBITREVI_W LSX128:$vj, 31)>; -+def : Pat<(fneg (v2f64 LSX128:$vj)), (VBITREVI_D LSX128:$vj, 63)>; -+ - } // Predicates = [HasExtLSX] - - /// Intrinsic pattern -diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fneg.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fneg.ll -new file mode 100644 -index 000000000000..5eb468fc55a0 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fneg.ll -@@ -0,0 +1,29 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+define void @fneg_v8f32(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: fneg_v8f32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvbitrevi.w $xr0, $xr0, 31 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <8 x float>, ptr %a0 -+ %v1 = fneg <8 x float> %v0 -+ store <8 x float> %v1, ptr %res -+ ret void -+} -+define void @fneg_v4f64(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: fneg_v4f64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvbitrevi.d $xr0, $xr0, 63 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x double>, ptr %a0 -+ %v1 = fneg <4 x double> %v0 -+ store <4 x double> %v1, ptr %res -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fneg.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fneg.ll -new file mode 100644 -index 000000000000..795c1ac8b368 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fneg.ll -@@ -0,0 +1,29 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+define void @fneg_v4f32(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: fneg_v4f32: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vbitrevi.w $vr0, $vr0, 31 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <4 x float>, ptr %a0 -+ %v1 = fneg <4 x float> %v0 -+ store <4 x float> %v1, ptr %res -+ ret void -+} -+define void @fneg_v2f64(ptr %res, ptr %a0) nounwind { -+; CHECK-LABEL: fneg_v2f64: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vbitrevi.d $vr0, $vr0, 63 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+entry: -+ %v0 = load <2 x double>, ptr %a0 -+ %v1 = fneg <2 x double> %v0 -+ store <2 x double> %v1, ptr %res -+ ret void -+} --- -2.20.1 - - -From b8eb506d34e303ddc42bc4e8f304a81ba320dff2 Mon Sep 17 00:00:00 2001 -From: wanglei -Date: Mon, 11 Dec 2023 10:37:22 +0800 -Subject: [PATCH 26/35] [LoongArch] Add codegen support for - [X]VF{MSUB/NMADD/NMSUB}.{S/D} instructions (#74819) - -This is similar to single and double-precision floating-point -instructions. - -(cherry picked from commit af999c4be9f5643724c6f379690ecee4346b2b48) ---- - .../LoongArch/LoongArchLASXInstrInfo.td | 26 + - .../Target/LoongArch/LoongArchLSXInstrInfo.td | 26 + - llvm/test/CodeGen/LoongArch/lasx/fma-v4f64.ll | 804 ++++++++++++++++++ - llvm/test/CodeGen/LoongArch/lasx/fma-v8f32.ll | 804 ++++++++++++++++++ - llvm/test/CodeGen/LoongArch/lsx/fma-v2f64.ll | 804 ++++++++++++++++++ - llvm/test/CodeGen/LoongArch/lsx/fma-v4f32.ll | 804 ++++++++++++++++++ - 6 files changed, 3268 insertions(+) - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/fma-v4f64.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/fma-v8f32.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/fma-v2f64.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/fma-v4f32.ll - -diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -index 8559baa0e525..ec6983d0f487 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -@@ -1455,6 +1455,32 @@ def : Pat<(fma v8f32:$xj, v8f32:$xk, v8f32:$xa), - def : Pat<(fma v4f64:$xj, v4f64:$xk, v4f64:$xa), - (XVFMADD_D v4f64:$xj, v4f64:$xk, v4f64:$xa)>; - -+// XVFMSUB_{S/D} -+def : Pat<(fma v8f32:$xj, v8f32:$xk, (fneg v8f32:$xa)), -+ (XVFMSUB_S v8f32:$xj, v8f32:$xk, v8f32:$xa)>; -+def : Pat<(fma v4f64:$xj, v4f64:$xk, (fneg v4f64:$xa)), -+ (XVFMSUB_D v4f64:$xj, v4f64:$xk, v4f64:$xa)>; -+ -+// XVFNMADD_{S/D} -+def : Pat<(fneg (fma v8f32:$xj, v8f32:$xk, v8f32:$xa)), -+ (XVFNMADD_S v8f32:$xj, v8f32:$xk, v8f32:$xa)>; -+def : Pat<(fneg (fma v4f64:$xj, v4f64:$xk, v4f64:$xa)), -+ (XVFNMADD_D v4f64:$xj, v4f64:$xk, v4f64:$xa)>; -+def : Pat<(fma_nsz (fneg v8f32:$xj), v8f32:$xk, (fneg v8f32:$xa)), -+ (XVFNMADD_S v8f32:$xj, v8f32:$xk, v8f32:$xa)>; -+def : Pat<(fma_nsz (fneg v4f64:$xj), v4f64:$xk, (fneg v4f64:$xa)), -+ (XVFNMADD_D v4f64:$xj, v4f64:$xk, v4f64:$xa)>; -+ -+// XVFNMSUB_{S/D} -+def : Pat<(fneg (fma v8f32:$xj, v8f32:$xk, (fneg v8f32:$xa))), -+ (XVFNMSUB_S v8f32:$xj, v8f32:$xk, v8f32:$xa)>; -+def : Pat<(fneg (fma v4f64:$xj, v4f64:$xk, (fneg v4f64:$xa))), -+ (XVFNMSUB_D v4f64:$xj, v4f64:$xk, v4f64:$xa)>; -+def : Pat<(fma_nsz (fneg v8f32:$xj), v8f32:$xk, v8f32:$xa), -+ (XVFNMSUB_S v8f32:$xj, v8f32:$xk, v8f32:$xa)>; -+def : Pat<(fma_nsz (fneg v4f64:$xj), v4f64:$xk, v4f64:$xa), -+ (XVFNMSUB_D v4f64:$xj, v4f64:$xk, v4f64:$xa)>; -+ - // XVFSQRT_{S/D} - defm : PatXrF; - -diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -index 5947f241bb59..e468176885d7 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -+++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -@@ -1555,6 +1555,32 @@ def : Pat<(fma v4f32:$vj, v4f32:$vk, v4f32:$va), - def : Pat<(fma v2f64:$vj, v2f64:$vk, v2f64:$va), - (VFMADD_D v2f64:$vj, v2f64:$vk, v2f64:$va)>; - -+// VFMSUB_{S/D} -+def : Pat<(fma v4f32:$vj, v4f32:$vk, (fneg v4f32:$va)), -+ (VFMSUB_S v4f32:$vj, v4f32:$vk, v4f32:$va)>; -+def : Pat<(fma v2f64:$vj, v2f64:$vk, (fneg v2f64:$va)), -+ (VFMSUB_D v2f64:$vj, v2f64:$vk, v2f64:$va)>; -+ -+// VFNMADD_{S/D} -+def : Pat<(fneg (fma v4f32:$vj, v4f32:$vk, v4f32:$va)), -+ (VFNMADD_S v4f32:$vj, v4f32:$vk, v4f32:$va)>; -+def : Pat<(fneg (fma v2f64:$vj, v2f64:$vk, v2f64:$va)), -+ (VFNMADD_D v2f64:$vj, v2f64:$vk, v2f64:$va)>; -+def : Pat<(fma_nsz (fneg v4f32:$vj), v4f32:$vk, (fneg v4f32:$va)), -+ (VFNMADD_S v4f32:$vj, v4f32:$vk, v4f32:$va)>; -+def : Pat<(fma_nsz (fneg v2f64:$vj), v2f64:$vk, (fneg v2f64:$va)), -+ (VFNMADD_D v2f64:$vj, v2f64:$vk, v2f64:$va)>; -+ -+// VFNMSUB_{S/D} -+def : Pat<(fneg (fma v4f32:$vj, v4f32:$vk, (fneg v4f32:$va))), -+ (VFNMSUB_S v4f32:$vj, v4f32:$vk, v4f32:$va)>; -+def : Pat<(fneg (fma v2f64:$vj, v2f64:$vk, (fneg v2f64:$va))), -+ (VFNMSUB_D v2f64:$vj, v2f64:$vk, v2f64:$va)>; -+def : Pat<(fma_nsz (fneg v4f32:$vj), v4f32:$vk, v4f32:$va), -+ (VFNMSUB_S v4f32:$vj, v4f32:$vk, v4f32:$va)>; -+def : Pat<(fma_nsz (fneg v2f64:$vj), v2f64:$vk, v2f64:$va), -+ (VFNMSUB_D v2f64:$vj, v2f64:$vk, v2f64:$va)>; -+ - // VFSQRT_{S/D} - defm : PatVrF; - -diff --git a/llvm/test/CodeGen/LoongArch/lasx/fma-v4f64.ll b/llvm/test/CodeGen/LoongArch/lasx/fma-v4f64.ll -new file mode 100644 -index 000000000000..af18c52b096c ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/fma-v4f64.ll -@@ -0,0 +1,804 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx --fp-contract=fast < %s \ -+; RUN: | FileCheck %s --check-prefix=CONTRACT-FAST -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx --fp-contract=on < %s \ -+; RUN: | FileCheck %s --check-prefix=CONTRACT-ON -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx --fp-contract=off < %s \ -+; RUN: | FileCheck %s --check-prefix=CONTRACT-OFF -+ -+define void @xvfmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: xvfmadd_d: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-FAST-NEXT: xvfmadd.d $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: xvfmadd_d: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: xvld $xr0, $a2, 0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a1, 0 -+; CONTRACT-ON-NEXT: xvfmul.d $xr0, $xr1, $xr0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a3, 0 -+; CONTRACT-ON-NEXT: xvfadd.d $xr0, $xr0, $xr1 -+; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: xvfmadd_d: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: xvld $xr0, $a2, 0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a1, 0 -+; CONTRACT-OFF-NEXT: xvfmul.d $xr0, $xr1, $xr0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a3, 0 -+; CONTRACT-OFF-NEXT: xvfadd.d $xr0, $xr0, $xr1 -+; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <4 x double>, ptr %a0 -+ %v1 = load <4 x double>, ptr %a1 -+ %v2 = load <4 x double>, ptr %a2 -+ %mul = fmul<4 x double> %v0, %v1 -+ %add = fadd<4 x double> %mul, %v2 -+ store <4 x double> %add, ptr %res -+ ret void -+} -+ -+define void @xvfmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: xvfmsub_d: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-FAST-NEXT: xvfmsub.d $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: xvfmsub_d: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: xvld $xr0, $a2, 0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a1, 0 -+; CONTRACT-ON-NEXT: xvfmul.d $xr0, $xr1, $xr0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a3, 0 -+; CONTRACT-ON-NEXT: xvfsub.d $xr0, $xr0, $xr1 -+; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: xvfmsub_d: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: xvld $xr0, $a2, 0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a1, 0 -+; CONTRACT-OFF-NEXT: xvfmul.d $xr0, $xr1, $xr0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a3, 0 -+; CONTRACT-OFF-NEXT: xvfsub.d $xr0, $xr0, $xr1 -+; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <4 x double>, ptr %a0 -+ %v1 = load <4 x double>, ptr %a1 -+ %v2 = load <4 x double>, ptr %a2 -+ %mul = fmul<4 x double> %v0, %v1 -+ %sub = fsub<4 x double> %mul, %v2 -+ store <4 x double> %sub, ptr %res -+ ret void -+} -+ -+define void @xvfnmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: xvfnmadd_d: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-FAST-NEXT: xvfnmadd.d $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: xvfnmadd_d: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: xvld $xr0, $a2, 0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a1, 0 -+; CONTRACT-ON-NEXT: xvfmul.d $xr0, $xr1, $xr0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a3, 0 -+; CONTRACT-ON-NEXT: xvfadd.d $xr0, $xr0, $xr1 -+; CONTRACT-ON-NEXT: xvbitrevi.d $xr0, $xr0, 63 -+; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: xvfnmadd_d: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: xvld $xr0, $a2, 0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a1, 0 -+; CONTRACT-OFF-NEXT: xvfmul.d $xr0, $xr1, $xr0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a3, 0 -+; CONTRACT-OFF-NEXT: xvfadd.d $xr0, $xr0, $xr1 -+; CONTRACT-OFF-NEXT: xvbitrevi.d $xr0, $xr0, 63 -+; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <4 x double>, ptr %a0 -+ %v1 = load <4 x double>, ptr %a1 -+ %v2 = load <4 x double>, ptr %a2 -+ %mul = fmul<4 x double> %v0, %v1 -+ %add = fadd<4 x double> %mul, %v2 -+ %negadd = fneg<4 x double> %add -+ store <4 x double> %negadd, ptr %res -+ ret void -+} -+ -+define void @xvfnmadd_d_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: xvfnmadd_d_nsz: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-FAST-NEXT: xvfnmadd.d $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: xvfnmadd_d_nsz: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: xvld $xr0, $a2, 0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a1, 0 -+; CONTRACT-ON-NEXT: xvbitrevi.d $xr1, $xr1, 63 -+; CONTRACT-ON-NEXT: xvfmul.d $xr0, $xr1, $xr0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a3, 0 -+; CONTRACT-ON-NEXT: xvfsub.d $xr0, $xr0, $xr1 -+; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: xvfnmadd_d_nsz: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: xvld $xr0, $a2, 0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a1, 0 -+; CONTRACT-OFF-NEXT: xvbitrevi.d $xr1, $xr1, 63 -+; CONTRACT-OFF-NEXT: xvfmul.d $xr0, $xr1, $xr0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a3, 0 -+; CONTRACT-OFF-NEXT: xvfsub.d $xr0, $xr0, $xr1 -+; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <4 x double>, ptr %a0 -+ %v1 = load <4 x double>, ptr %a1 -+ %v2 = load <4 x double>, ptr %a2 -+ %negv0 = fneg nsz<4 x double> %v0 -+ %negv2 = fneg nsz<4 x double> %v2 -+ %mul = fmul nsz<4 x double> %negv0, %v1 -+ %add = fadd nsz<4 x double> %mul, %negv2 -+ store <4 x double> %add, ptr %res -+ ret void -+} -+ -+;; Check that xvfnmadd.d is not emitted. -+define void @not_xvfnmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: not_xvfnmadd_d: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-FAST-NEXT: xvbitrevi.d $xr2, $xr2, 63 -+; CONTRACT-FAST-NEXT: xvfmsub.d $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: not_xvfnmadd_d: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: xvld $xr0, $a2, 0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a1, 0 -+; CONTRACT-ON-NEXT: xvbitrevi.d $xr1, $xr1, 63 -+; CONTRACT-ON-NEXT: xvfmul.d $xr0, $xr1, $xr0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a3, 0 -+; CONTRACT-ON-NEXT: xvfsub.d $xr0, $xr0, $xr1 -+; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: not_xvfnmadd_d: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: xvld $xr0, $a2, 0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a1, 0 -+; CONTRACT-OFF-NEXT: xvbitrevi.d $xr1, $xr1, 63 -+; CONTRACT-OFF-NEXT: xvfmul.d $xr0, $xr1, $xr0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a3, 0 -+; CONTRACT-OFF-NEXT: xvfsub.d $xr0, $xr0, $xr1 -+; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <4 x double>, ptr %a0 -+ %v1 = load <4 x double>, ptr %a1 -+ %v2 = load <4 x double>, ptr %a2 -+ %negv0 = fneg<4 x double> %v0 -+ %negv2 = fneg<4 x double> %v2 -+ %mul = fmul<4 x double> %negv0, %v1 -+ %add = fadd<4 x double> %mul, %negv2 -+ store <4 x double> %add, ptr %res -+ ret void -+} -+ -+define void @xvfnmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: xvfnmsub_d: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-FAST-NEXT: xvfnmsub.d $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: xvfnmsub_d: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: xvld $xr0, $a2, 0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a1, 0 -+; CONTRACT-ON-NEXT: xvfmul.d $xr0, $xr1, $xr0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a3, 0 -+; CONTRACT-ON-NEXT: xvfsub.d $xr0, $xr0, $xr1 -+; CONTRACT-ON-NEXT: xvbitrevi.d $xr0, $xr0, 63 -+; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: xvfnmsub_d: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: xvld $xr0, $a2, 0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a1, 0 -+; CONTRACT-OFF-NEXT: xvfmul.d $xr0, $xr1, $xr0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a3, 0 -+; CONTRACT-OFF-NEXT: xvfsub.d $xr0, $xr0, $xr1 -+; CONTRACT-OFF-NEXT: xvbitrevi.d $xr0, $xr0, 63 -+; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <4 x double>, ptr %a0 -+ %v1 = load <4 x double>, ptr %a1 -+ %v2 = load <4 x double>, ptr %a2 -+ %negv2 = fneg<4 x double> %v2 -+ %mul = fmul<4 x double> %v0, %v1 -+ %add = fadd<4 x double> %mul, %negv2 -+ %neg = fneg<4 x double> %add -+ store <4 x double> %neg, ptr %res -+ ret void -+} -+ -+define void @xvfnmsub_d_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: xvfnmsub_d_nsz: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-FAST-NEXT: xvfnmsub.d $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: xvfnmsub_d_nsz: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: xvld $xr0, $a2, 0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a1, 0 -+; CONTRACT-ON-NEXT: xvfmul.d $xr0, $xr1, $xr0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a3, 0 -+; CONTRACT-ON-NEXT: xvfsub.d $xr0, $xr1, $xr0 -+; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: xvfnmsub_d_nsz: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: xvld $xr0, $a2, 0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a1, 0 -+; CONTRACT-OFF-NEXT: xvfmul.d $xr0, $xr1, $xr0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a3, 0 -+; CONTRACT-OFF-NEXT: xvfsub.d $xr0, $xr1, $xr0 -+; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <4 x double>, ptr %a0 -+ %v1 = load <4 x double>, ptr %a1 -+ %v2 = load <4 x double>, ptr %a2 -+ %negv0 = fneg nsz<4 x double> %v0 -+ %mul = fmul nsz<4 x double> %negv0, %v1 -+ %add = fadd nsz<4 x double> %mul, %v2 -+ store <4 x double> %add, ptr %res -+ ret void -+} -+ -+;; Check that xvfnmsub.d is not emitted. -+define void @not_xvfnmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: not_xvfnmsub_d: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-FAST-NEXT: xvbitrevi.d $xr2, $xr2, 63 -+; CONTRACT-FAST-NEXT: xvfmadd.d $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: not_xvfnmsub_d: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: xvld $xr0, $a2, 0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a1, 0 -+; CONTRACT-ON-NEXT: xvfmul.d $xr0, $xr1, $xr0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a3, 0 -+; CONTRACT-ON-NEXT: xvfsub.d $xr0, $xr1, $xr0 -+; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: not_xvfnmsub_d: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: xvld $xr0, $a2, 0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a1, 0 -+; CONTRACT-OFF-NEXT: xvfmul.d $xr0, $xr1, $xr0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a3, 0 -+; CONTRACT-OFF-NEXT: xvfsub.d $xr0, $xr1, $xr0 -+; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <4 x double>, ptr %a0 -+ %v1 = load <4 x double>, ptr %a1 -+ %v2 = load <4 x double>, ptr %a2 -+ %negv0 = fneg<4 x double> %v0 -+ %mul = fmul<4 x double> %negv0, %v1 -+ %add = fadd<4 x double> %mul, %v2 -+ store <4 x double> %add, ptr %res -+ ret void -+} -+ -+define void @contract_xvfmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: contract_xvfmadd_d: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-FAST-NEXT: xvfmadd.d $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: contract_xvfmadd_d: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-ON-NEXT: xvfmadd.d $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: contract_xvfmadd_d: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-OFF-NEXT: xvfmadd.d $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <4 x double>, ptr %a0 -+ %v1 = load <4 x double>, ptr %a1 -+ %v2 = load <4 x double>, ptr %a2 -+ %mul = fmul contract <4 x double> %v0, %v1 -+ %add = fadd contract <4 x double> %mul, %v2 -+ store <4 x double> %add, ptr %res -+ ret void -+} -+ -+define void @contract_xvfmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: contract_xvfmsub_d: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-FAST-NEXT: xvfmsub.d $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: contract_xvfmsub_d: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-ON-NEXT: xvfmsub.d $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: contract_xvfmsub_d: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-OFF-NEXT: xvfmsub.d $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <4 x double>, ptr %a0 -+ %v1 = load <4 x double>, ptr %a1 -+ %v2 = load <4 x double>, ptr %a2 -+ %mul = fmul contract <4 x double> %v0, %v1 -+ %sub = fsub contract <4 x double> %mul, %v2 -+ store <4 x double> %sub, ptr %res -+ ret void -+} -+ -+define void @contract_xvfnmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: contract_xvfnmadd_d: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-FAST-NEXT: xvfnmadd.d $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: contract_xvfnmadd_d: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-ON-NEXT: xvfnmadd.d $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: contract_xvfnmadd_d: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-OFF-NEXT: xvfnmadd.d $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <4 x double>, ptr %a0 -+ %v1 = load <4 x double>, ptr %a1 -+ %v2 = load <4 x double>, ptr %a2 -+ %mul = fmul contract <4 x double> %v0, %v1 -+ %add = fadd contract <4 x double> %mul, %v2 -+ %negadd = fneg contract <4 x double> %add -+ store <4 x double> %negadd, ptr %res -+ ret void -+} -+ -+define void @contract_xvfnmadd_d_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: contract_xvfnmadd_d_nsz: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-FAST-NEXT: xvfnmadd.d $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: contract_xvfnmadd_d_nsz: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-ON-NEXT: xvfnmadd.d $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: contract_xvfnmadd_d_nsz: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-OFF-NEXT: xvfnmadd.d $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <4 x double>, ptr %a0 -+ %v1 = load <4 x double>, ptr %a1 -+ %v2 = load <4 x double>, ptr %a2 -+ %negv0 = fneg contract nsz<4 x double> %v0 -+ %negv2 = fneg contract nsz<4 x double> %v2 -+ %mul = fmul contract nsz<4 x double> %negv0, %v1 -+ %add = fadd contract nsz<4 x double> %mul, %negv2 -+ store <4 x double> %add, ptr %res -+ ret void -+} -+ -+;; Check that xvfnmadd.d is not emitted. -+define void @not_contract_xvfnmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: not_contract_xvfnmadd_d: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-FAST-NEXT: xvbitrevi.d $xr2, $xr2, 63 -+; CONTRACT-FAST-NEXT: xvfmsub.d $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: not_contract_xvfnmadd_d: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-ON-NEXT: xvbitrevi.d $xr2, $xr2, 63 -+; CONTRACT-ON-NEXT: xvfmsub.d $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: not_contract_xvfnmadd_d: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-OFF-NEXT: xvbitrevi.d $xr2, $xr2, 63 -+; CONTRACT-OFF-NEXT: xvfmsub.d $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <4 x double>, ptr %a0 -+ %v1 = load <4 x double>, ptr %a1 -+ %v2 = load <4 x double>, ptr %a2 -+ %negv0 = fneg contract <4 x double> %v0 -+ %negv2 = fneg contract <4 x double> %v2 -+ %mul = fmul contract <4 x double> %negv0, %v1 -+ %add = fadd contract <4 x double> %mul, %negv2 -+ store <4 x double> %add, ptr %res -+ ret void -+} -+ -+define void @contract_xvfnmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: contract_xvfnmsub_d: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-FAST-NEXT: xvfnmsub.d $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: contract_xvfnmsub_d: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-ON-NEXT: xvfnmsub.d $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: contract_xvfnmsub_d: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-OFF-NEXT: xvfnmsub.d $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <4 x double>, ptr %a0 -+ %v1 = load <4 x double>, ptr %a1 -+ %v2 = load <4 x double>, ptr %a2 -+ %negv2 = fneg contract <4 x double> %v2 -+ %mul = fmul contract <4 x double> %v0, %v1 -+ %add = fadd contract <4 x double> %mul, %negv2 -+ %neg = fneg contract <4 x double> %add -+ store <4 x double> %neg, ptr %res -+ ret void -+} -+ -+define void @contract_xvfnmsub_d_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: contract_xvfnmsub_d_nsz: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-FAST-NEXT: xvfnmsub.d $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: contract_xvfnmsub_d_nsz: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-ON-NEXT: xvfnmsub.d $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: contract_xvfnmsub_d_nsz: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-OFF-NEXT: xvfnmsub.d $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <4 x double>, ptr %a0 -+ %v1 = load <4 x double>, ptr %a1 -+ %v2 = load <4 x double>, ptr %a2 -+ %negv0 = fneg contract nsz<4 x double> %v0 -+ %mul = fmul contract nsz<4 x double> %negv0, %v1 -+ %add = fadd contract nsz<4 x double> %mul, %v2 -+ store <4 x double> %add, ptr %res -+ ret void -+} -+ -+;; Check that xvfnmsub.d is not emitted. -+define void @not_contract_xvfnmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: not_contract_xvfnmsub_d: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-FAST-NEXT: xvbitrevi.d $xr2, $xr2, 63 -+; CONTRACT-FAST-NEXT: xvfmadd.d $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: not_contract_xvfnmsub_d: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-ON-NEXT: xvbitrevi.d $xr2, $xr2, 63 -+; CONTRACT-ON-NEXT: xvfmadd.d $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: not_contract_xvfnmsub_d: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-OFF-NEXT: xvbitrevi.d $xr2, $xr2, 63 -+; CONTRACT-OFF-NEXT: xvfmadd.d $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <4 x double>, ptr %a0 -+ %v1 = load <4 x double>, ptr %a1 -+ %v2 = load <4 x double>, ptr %a2 -+ %negv0 = fneg contract <4 x double> %v0 -+ %mul = fmul contract <4 x double> %negv0, %v1 -+ %add = fadd contract <4 x double> %mul, %v2 -+ store <4 x double> %add, ptr %res -+ ret void -+} -+ -+define void @xvfmadd_d_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: xvfmadd_d_contract: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-FAST-NEXT: xvfmadd.d $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: xvfmadd_d_contract: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-ON-NEXT: xvfmadd.d $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: xvfmadd_d_contract: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-OFF-NEXT: xvfmadd.d $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <4 x double>, ptr %a0 -+ %v1 = load <4 x double>, ptr %a1 -+ %v2 = load <4 x double>, ptr %a2 -+ %mul = fmul contract <4 x double> %v0, %v1 -+ %add = fadd contract <4 x double> %mul, %v2 -+ store <4 x double> %add, ptr %res -+ ret void -+} -+ -+define void @xvfmsub_d_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: xvfmsub_d_contract: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-FAST-NEXT: xvfmsub.d $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: xvfmsub_d_contract: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-ON-NEXT: xvfmsub.d $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: xvfmsub_d_contract: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-OFF-NEXT: xvfmsub.d $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <4 x double>, ptr %a0 -+ %v1 = load <4 x double>, ptr %a1 -+ %v2 = load <4 x double>, ptr %a2 -+ %mul = fmul contract <4 x double> %v0, %v1 -+ %sub = fsub contract <4 x double> %mul, %v2 -+ store <4 x double> %sub, ptr %res -+ ret void -+} -+ -+define void @xvfnmadd_d_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: xvfnmadd_d_contract: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-FAST-NEXT: xvfnmadd.d $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: xvfnmadd_d_contract: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-ON-NEXT: xvfnmadd.d $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: xvfnmadd_d_contract: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-OFF-NEXT: xvfnmadd.d $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <4 x double>, ptr %a0 -+ %v1 = load <4 x double>, ptr %a1 -+ %v2 = load <4 x double>, ptr %a2 -+ %mul = fmul contract <4 x double> %v0, %v1 -+ %add = fadd contract <4 x double> %mul, %v2 -+ %negadd = fneg contract <4 x double> %add -+ store <4 x double> %negadd, ptr %res -+ ret void -+} -+ -+define void @xvfnmsub_d_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: xvfnmsub_d_contract: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-FAST-NEXT: xvfnmsub.d $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: xvfnmsub_d_contract: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-ON-NEXT: xvfnmsub.d $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: xvfnmsub_d_contract: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-OFF-NEXT: xvfnmsub.d $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <4 x double>, ptr %a0 -+ %v1 = load <4 x double>, ptr %a1 -+ %v2 = load <4 x double>, ptr %a2 -+ %mul = fmul contract <4 x double> %v0, %v1 -+ %negv2 = fneg contract <4 x double> %v2 -+ %add = fadd contract <4 x double> %negv2, %mul -+ %negadd = fneg contract <4 x double> %add -+ store <4 x double> %negadd, ptr %res -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/fma-v8f32.ll b/llvm/test/CodeGen/LoongArch/lasx/fma-v8f32.ll -new file mode 100644 -index 000000000000..b7b3cb3a2e66 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/fma-v8f32.ll -@@ -0,0 +1,804 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx --fp-contract=fast < %s \ -+; RUN: | FileCheck %s --check-prefix=CONTRACT-FAST -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx --fp-contract=on < %s \ -+; RUN: | FileCheck %s --check-prefix=CONTRACT-ON -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx --fp-contract=off < %s \ -+; RUN: | FileCheck %s --check-prefix=CONTRACT-OFF -+ -+define void @xvfmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: xvfmadd_s: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-FAST-NEXT: xvfmadd.s $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: xvfmadd_s: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: xvld $xr0, $a2, 0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a1, 0 -+; CONTRACT-ON-NEXT: xvfmul.s $xr0, $xr1, $xr0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a3, 0 -+; CONTRACT-ON-NEXT: xvfadd.s $xr0, $xr0, $xr1 -+; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: xvfmadd_s: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: xvld $xr0, $a2, 0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a1, 0 -+; CONTRACT-OFF-NEXT: xvfmul.s $xr0, $xr1, $xr0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a3, 0 -+; CONTRACT-OFF-NEXT: xvfadd.s $xr0, $xr0, $xr1 -+; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <8 x float>, ptr %a0 -+ %v1 = load <8 x float>, ptr %a1 -+ %v2 = load <8 x float>, ptr %a2 -+ %mul = fmul<8 x float> %v0, %v1 -+ %add = fadd<8 x float> %mul, %v2 -+ store <8 x float> %add, ptr %res -+ ret void -+} -+ -+define void @xvfmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: xvfmsub_s: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-FAST-NEXT: xvfmsub.s $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: xvfmsub_s: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: xvld $xr0, $a2, 0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a1, 0 -+; CONTRACT-ON-NEXT: xvfmul.s $xr0, $xr1, $xr0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a3, 0 -+; CONTRACT-ON-NEXT: xvfsub.s $xr0, $xr0, $xr1 -+; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: xvfmsub_s: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: xvld $xr0, $a2, 0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a1, 0 -+; CONTRACT-OFF-NEXT: xvfmul.s $xr0, $xr1, $xr0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a3, 0 -+; CONTRACT-OFF-NEXT: xvfsub.s $xr0, $xr0, $xr1 -+; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <8 x float>, ptr %a0 -+ %v1 = load <8 x float>, ptr %a1 -+ %v2 = load <8 x float>, ptr %a2 -+ %mul = fmul<8 x float> %v0, %v1 -+ %sub = fsub<8 x float> %mul, %v2 -+ store <8 x float> %sub, ptr %res -+ ret void -+} -+ -+define void @xvfnmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: xvfnmadd_s: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-FAST-NEXT: xvfnmadd.s $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: xvfnmadd_s: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: xvld $xr0, $a2, 0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a1, 0 -+; CONTRACT-ON-NEXT: xvfmul.s $xr0, $xr1, $xr0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a3, 0 -+; CONTRACT-ON-NEXT: xvfadd.s $xr0, $xr0, $xr1 -+; CONTRACT-ON-NEXT: xvbitrevi.w $xr0, $xr0, 31 -+; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: xvfnmadd_s: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: xvld $xr0, $a2, 0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a1, 0 -+; CONTRACT-OFF-NEXT: xvfmul.s $xr0, $xr1, $xr0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a3, 0 -+; CONTRACT-OFF-NEXT: xvfadd.s $xr0, $xr0, $xr1 -+; CONTRACT-OFF-NEXT: xvbitrevi.w $xr0, $xr0, 31 -+; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <8 x float>, ptr %a0 -+ %v1 = load <8 x float>, ptr %a1 -+ %v2 = load <8 x float>, ptr %a2 -+ %mul = fmul<8 x float> %v0, %v1 -+ %add = fadd<8 x float> %mul, %v2 -+ %negadd = fneg<8 x float> %add -+ store <8 x float> %negadd, ptr %res -+ ret void -+} -+ -+define void @xvfnmadd_s_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: xvfnmadd_s_nsz: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-FAST-NEXT: xvfnmadd.s $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: xvfnmadd_s_nsz: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: xvld $xr0, $a2, 0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a1, 0 -+; CONTRACT-ON-NEXT: xvbitrevi.w $xr1, $xr1, 31 -+; CONTRACT-ON-NEXT: xvfmul.s $xr0, $xr1, $xr0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a3, 0 -+; CONTRACT-ON-NEXT: xvfsub.s $xr0, $xr0, $xr1 -+; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: xvfnmadd_s_nsz: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: xvld $xr0, $a2, 0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a1, 0 -+; CONTRACT-OFF-NEXT: xvbitrevi.w $xr1, $xr1, 31 -+; CONTRACT-OFF-NEXT: xvfmul.s $xr0, $xr1, $xr0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a3, 0 -+; CONTRACT-OFF-NEXT: xvfsub.s $xr0, $xr0, $xr1 -+; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <8 x float>, ptr %a0 -+ %v1 = load <8 x float>, ptr %a1 -+ %v2 = load <8 x float>, ptr %a2 -+ %negv0 = fneg nsz<8 x float> %v0 -+ %negv2 = fneg nsz<8 x float> %v2 -+ %mul = fmul nsz<8 x float> %negv0, %v1 -+ %add = fadd nsz<8 x float> %mul, %negv2 -+ store <8 x float> %add, ptr %res -+ ret void -+} -+ -+;; Check that fnmadd.s is not emitted. -+define void @not_xvfnmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: not_xvfnmadd_s: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-FAST-NEXT: xvbitrevi.w $xr2, $xr2, 31 -+; CONTRACT-FAST-NEXT: xvfmsub.s $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: not_xvfnmadd_s: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: xvld $xr0, $a2, 0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a1, 0 -+; CONTRACT-ON-NEXT: xvbitrevi.w $xr1, $xr1, 31 -+; CONTRACT-ON-NEXT: xvfmul.s $xr0, $xr1, $xr0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a3, 0 -+; CONTRACT-ON-NEXT: xvfsub.s $xr0, $xr0, $xr1 -+; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: not_xvfnmadd_s: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: xvld $xr0, $a2, 0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a1, 0 -+; CONTRACT-OFF-NEXT: xvbitrevi.w $xr1, $xr1, 31 -+; CONTRACT-OFF-NEXT: xvfmul.s $xr0, $xr1, $xr0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a3, 0 -+; CONTRACT-OFF-NEXT: xvfsub.s $xr0, $xr0, $xr1 -+; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <8 x float>, ptr %a0 -+ %v1 = load <8 x float>, ptr %a1 -+ %v2 = load <8 x float>, ptr %a2 -+ %negv0 = fneg<8 x float> %v0 -+ %negv2 = fneg<8 x float> %v2 -+ %mul = fmul<8 x float> %negv0, %v1 -+ %add = fadd<8 x float> %mul, %negv2 -+ store <8 x float> %add, ptr %res -+ ret void -+} -+ -+define void @xvfnmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: xvfnmsub_s: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-FAST-NEXT: xvfnmsub.s $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: xvfnmsub_s: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: xvld $xr0, $a2, 0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a1, 0 -+; CONTRACT-ON-NEXT: xvfmul.s $xr0, $xr1, $xr0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a3, 0 -+; CONTRACT-ON-NEXT: xvfsub.s $xr0, $xr0, $xr1 -+; CONTRACT-ON-NEXT: xvbitrevi.w $xr0, $xr0, 31 -+; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: xvfnmsub_s: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: xvld $xr0, $a2, 0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a1, 0 -+; CONTRACT-OFF-NEXT: xvfmul.s $xr0, $xr1, $xr0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a3, 0 -+; CONTRACT-OFF-NEXT: xvfsub.s $xr0, $xr0, $xr1 -+; CONTRACT-OFF-NEXT: xvbitrevi.w $xr0, $xr0, 31 -+; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <8 x float>, ptr %a0 -+ %v1 = load <8 x float>, ptr %a1 -+ %v2 = load <8 x float>, ptr %a2 -+ %negv2 = fneg<8 x float> %v2 -+ %mul = fmul<8 x float> %v0, %v1 -+ %add = fadd<8 x float> %mul, %negv2 -+ %neg = fneg<8 x float> %add -+ store <8 x float> %neg, ptr %res -+ ret void -+} -+ -+define void @xvfnmsub_s_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: xvfnmsub_s_nsz: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-FAST-NEXT: xvfnmsub.s $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: xvfnmsub_s_nsz: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: xvld $xr0, $a2, 0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a1, 0 -+; CONTRACT-ON-NEXT: xvfmul.s $xr0, $xr1, $xr0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a3, 0 -+; CONTRACT-ON-NEXT: xvfsub.s $xr0, $xr1, $xr0 -+; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: xvfnmsub_s_nsz: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: xvld $xr0, $a2, 0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a1, 0 -+; CONTRACT-OFF-NEXT: xvfmul.s $xr0, $xr1, $xr0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a3, 0 -+; CONTRACT-OFF-NEXT: xvfsub.s $xr0, $xr1, $xr0 -+; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <8 x float>, ptr %a0 -+ %v1 = load <8 x float>, ptr %a1 -+ %v2 = load <8 x float>, ptr %a2 -+ %negv0 = fneg nsz<8 x float> %v0 -+ %mul = fmul nsz<8 x float> %negv0, %v1 -+ %add = fadd nsz<8 x float> %mul, %v2 -+ store <8 x float> %add, ptr %res -+ ret void -+} -+ -+;; Check that fnmsub.s is not emitted. -+define void @not_xvfnmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: not_xvfnmsub_s: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-FAST-NEXT: xvbitrevi.w $xr2, $xr2, 31 -+; CONTRACT-FAST-NEXT: xvfmadd.s $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: not_xvfnmsub_s: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: xvld $xr0, $a2, 0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a1, 0 -+; CONTRACT-ON-NEXT: xvfmul.s $xr0, $xr1, $xr0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a3, 0 -+; CONTRACT-ON-NEXT: xvfsub.s $xr0, $xr1, $xr0 -+; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: not_xvfnmsub_s: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: xvld $xr0, $a2, 0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a1, 0 -+; CONTRACT-OFF-NEXT: xvfmul.s $xr0, $xr1, $xr0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a3, 0 -+; CONTRACT-OFF-NEXT: xvfsub.s $xr0, $xr1, $xr0 -+; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <8 x float>, ptr %a0 -+ %v1 = load <8 x float>, ptr %a1 -+ %v2 = load <8 x float>, ptr %a2 -+ %negv0 = fneg<8 x float> %v0 -+ %mul = fmul<8 x float> %negv0, %v1 -+ %add = fadd<8 x float> %mul, %v2 -+ store <8 x float> %add, ptr %res -+ ret void -+} -+ -+define void @contract_xvfmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: contract_xvfmadd_s: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-FAST-NEXT: xvfmadd.s $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: contract_xvfmadd_s: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-ON-NEXT: xvfmadd.s $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: contract_xvfmadd_s: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-OFF-NEXT: xvfmadd.s $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <8 x float>, ptr %a0 -+ %v1 = load <8 x float>, ptr %a1 -+ %v2 = load <8 x float>, ptr %a2 -+ %mul = fmul contract <8 x float> %v0, %v1 -+ %add = fadd contract <8 x float> %mul, %v2 -+ store <8 x float> %add, ptr %res -+ ret void -+} -+ -+define void @contract_xvfmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: contract_xvfmsub_s: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-FAST-NEXT: xvfmsub.s $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: contract_xvfmsub_s: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-ON-NEXT: xvfmsub.s $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: contract_xvfmsub_s: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-OFF-NEXT: xvfmsub.s $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <8 x float>, ptr %a0 -+ %v1 = load <8 x float>, ptr %a1 -+ %v2 = load <8 x float>, ptr %a2 -+ %mul = fmul contract <8 x float> %v0, %v1 -+ %sub = fsub contract <8 x float> %mul, %v2 -+ store <8 x float> %sub, ptr %res -+ ret void -+} -+ -+define void @contract_xvfnmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: contract_xvfnmadd_s: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-FAST-NEXT: xvfnmadd.s $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: contract_xvfnmadd_s: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-ON-NEXT: xvfnmadd.s $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: contract_xvfnmadd_s: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-OFF-NEXT: xvfnmadd.s $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <8 x float>, ptr %a0 -+ %v1 = load <8 x float>, ptr %a1 -+ %v2 = load <8 x float>, ptr %a2 -+ %mul = fmul contract <8 x float> %v0, %v1 -+ %add = fadd contract <8 x float> %mul, %v2 -+ %negadd = fneg contract <8 x float> %add -+ store <8 x float> %negadd, ptr %res -+ ret void -+} -+ -+define void @contract_xvfnmadd_s_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: contract_xvfnmadd_s_nsz: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-FAST-NEXT: xvfnmadd.s $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: contract_xvfnmadd_s_nsz: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-ON-NEXT: xvfnmadd.s $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: contract_xvfnmadd_s_nsz: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-OFF-NEXT: xvfnmadd.s $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <8 x float>, ptr %a0 -+ %v1 = load <8 x float>, ptr %a1 -+ %v2 = load <8 x float>, ptr %a2 -+ %negv0 = fneg contract nsz<8 x float> %v0 -+ %negv2 = fneg contract nsz<8 x float> %v2 -+ %mul = fmul contract nsz<8 x float> %negv0, %v1 -+ %add = fadd contract nsz<8 x float> %mul, %negv2 -+ store <8 x float> %add, ptr %res -+ ret void -+} -+ -+;; Check that fnmadd.s is not emitted. -+define void @not_contract_xvfnmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: not_contract_xvfnmadd_s: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-FAST-NEXT: xvbitrevi.w $xr2, $xr2, 31 -+; CONTRACT-FAST-NEXT: xvfmsub.s $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: not_contract_xvfnmadd_s: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-ON-NEXT: xvbitrevi.w $xr2, $xr2, 31 -+; CONTRACT-ON-NEXT: xvfmsub.s $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: not_contract_xvfnmadd_s: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-OFF-NEXT: xvbitrevi.w $xr2, $xr2, 31 -+; CONTRACT-OFF-NEXT: xvfmsub.s $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <8 x float>, ptr %a0 -+ %v1 = load <8 x float>, ptr %a1 -+ %v2 = load <8 x float>, ptr %a2 -+ %negv0 = fneg contract <8 x float> %v0 -+ %negv2 = fneg contract <8 x float> %v2 -+ %mul = fmul contract <8 x float> %negv0, %v1 -+ %add = fadd contract <8 x float> %mul, %negv2 -+ store <8 x float> %add, ptr %res -+ ret void -+} -+ -+define void @contract_xvfnmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: contract_xvfnmsub_s: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-FAST-NEXT: xvfnmsub.s $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: contract_xvfnmsub_s: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-ON-NEXT: xvfnmsub.s $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: contract_xvfnmsub_s: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-OFF-NEXT: xvfnmsub.s $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <8 x float>, ptr %a0 -+ %v1 = load <8 x float>, ptr %a1 -+ %v2 = load <8 x float>, ptr %a2 -+ %negv2 = fneg contract <8 x float> %v2 -+ %mul = fmul contract <8 x float> %v0, %v1 -+ %add = fadd contract <8 x float> %mul, %negv2 -+ %neg = fneg contract <8 x float> %add -+ store <8 x float> %neg, ptr %res -+ ret void -+} -+ -+define void @contract_xvfnmsub_s_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: contract_xvfnmsub_s_nsz: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-FAST-NEXT: xvfnmsub.s $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: contract_xvfnmsub_s_nsz: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-ON-NEXT: xvfnmsub.s $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: contract_xvfnmsub_s_nsz: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-OFF-NEXT: xvfnmsub.s $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <8 x float>, ptr %a0 -+ %v1 = load <8 x float>, ptr %a1 -+ %v2 = load <8 x float>, ptr %a2 -+ %negv0 = fneg contract nsz<8 x float> %v0 -+ %mul = fmul contract nsz<8 x float> %negv0, %v1 -+ %add = fadd contract nsz<8 x float> %mul, %v2 -+ store <8 x float> %add, ptr %res -+ ret void -+} -+ -+;; Check that fnmsub.s is not emitted. -+define void @not_contract_xvfnmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: not_contract_xvfnmsub_s: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-FAST-NEXT: xvbitrevi.w $xr2, $xr2, 31 -+; CONTRACT-FAST-NEXT: xvfmadd.s $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: not_contract_xvfnmsub_s: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-ON-NEXT: xvbitrevi.w $xr2, $xr2, 31 -+; CONTRACT-ON-NEXT: xvfmadd.s $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: not_contract_xvfnmsub_s: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-OFF-NEXT: xvbitrevi.w $xr2, $xr2, 31 -+; CONTRACT-OFF-NEXT: xvfmadd.s $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <8 x float>, ptr %a0 -+ %v1 = load <8 x float>, ptr %a1 -+ %v2 = load <8 x float>, ptr %a2 -+ %negv0 = fneg contract <8 x float> %v0 -+ %mul = fmul contract <8 x float> %negv0, %v1 -+ %add = fadd contract <8 x float> %mul, %v2 -+ store <8 x float> %add, ptr %res -+ ret void -+} -+ -+define void @xvfmadd_s_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: xvfmadd_s_contract: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-FAST-NEXT: xvfmadd.s $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: xvfmadd_s_contract: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-ON-NEXT: xvfmadd.s $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: xvfmadd_s_contract: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-OFF-NEXT: xvfmadd.s $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <8 x float>, ptr %a0 -+ %v1 = load <8 x float>, ptr %a1 -+ %v2 = load <8 x float>, ptr %a2 -+ %mul = fmul contract <8 x float> %v0, %v1 -+ %add = fadd contract <8 x float> %mul, %v2 -+ store <8 x float> %add, ptr %res -+ ret void -+} -+ -+define void @xvfmsub_s_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: xvfmsub_s_contract: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-FAST-NEXT: xvfmsub.s $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: xvfmsub_s_contract: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-ON-NEXT: xvfmsub.s $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: xvfmsub_s_contract: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-OFF-NEXT: xvfmsub.s $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <8 x float>, ptr %a0 -+ %v1 = load <8 x float>, ptr %a1 -+ %v2 = load <8 x float>, ptr %a2 -+ %mul = fmul contract <8 x float> %v0, %v1 -+ %sub = fsub contract <8 x float> %mul, %v2 -+ store <8 x float> %sub, ptr %res -+ ret void -+} -+ -+define void @xvfnmadd_s_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: xvfnmadd_s_contract: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-FAST-NEXT: xvfnmadd.s $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: xvfnmadd_s_contract: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-ON-NEXT: xvfnmadd.s $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: xvfnmadd_s_contract: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-OFF-NEXT: xvfnmadd.s $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <8 x float>, ptr %a0 -+ %v1 = load <8 x float>, ptr %a1 -+ %v2 = load <8 x float>, ptr %a2 -+ %mul = fmul contract <8 x float> %v0, %v1 -+ %add = fadd contract <8 x float> %mul, %v2 -+ %negadd = fneg contract <8 x float> %add -+ store <8 x float> %negadd, ptr %res -+ ret void -+} -+ -+define void @xvfnmsub_s_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: xvfnmsub_s_contract: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-FAST-NEXT: xvfnmsub.s $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: xvfnmsub_s_contract: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-ON-NEXT: xvfnmsub.s $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: xvfnmsub_s_contract: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 -+; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 -+; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 -+; CONTRACT-OFF-NEXT: xvfnmsub.s $xr0, $xr2, $xr1, $xr0 -+; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <8 x float>, ptr %a0 -+ %v1 = load <8 x float>, ptr %a1 -+ %v2 = load <8 x float>, ptr %a2 -+ %mul = fmul contract <8 x float> %v0, %v1 -+ %negv2 = fneg contract <8 x float> %v2 -+ %add = fadd contract <8 x float> %negv2, %mul -+ %negadd = fneg contract <8 x float> %add -+ store <8 x float> %negadd, ptr %res -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/fma-v2f64.ll b/llvm/test/CodeGen/LoongArch/lsx/fma-v2f64.ll -new file mode 100644 -index 000000000000..8e0459b4afab ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/fma-v2f64.ll -@@ -0,0 +1,804 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx --fp-contract=fast < %s \ -+; RUN: | FileCheck %s --check-prefix=CONTRACT-FAST -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx --fp-contract=on < %s \ -+; RUN: | FileCheck %s --check-prefix=CONTRACT-ON -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx --fp-contract=off < %s \ -+; RUN: | FileCheck %s --check-prefix=CONTRACT-OFF -+ -+define void @vfmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: vfmadd_d: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-FAST-NEXT: vfmadd.d $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: vfmadd_d: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: vld $vr0, $a2, 0 -+; CONTRACT-ON-NEXT: vld $vr1, $a1, 0 -+; CONTRACT-ON-NEXT: vfmul.d $vr0, $vr1, $vr0 -+; CONTRACT-ON-NEXT: vld $vr1, $a3, 0 -+; CONTRACT-ON-NEXT: vfadd.d $vr0, $vr0, $vr1 -+; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: vfmadd_d: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: vld $vr0, $a2, 0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a1, 0 -+; CONTRACT-OFF-NEXT: vfmul.d $vr0, $vr1, $vr0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a3, 0 -+; CONTRACT-OFF-NEXT: vfadd.d $vr0, $vr0, $vr1 -+; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <2 x double>, ptr %a0 -+ %v1 = load <2 x double>, ptr %a1 -+ %v2 = load <2 x double>, ptr %a2 -+ %mul = fmul<2 x double> %v0, %v1 -+ %add = fadd<2 x double> %mul, %v2 -+ store <2 x double> %add, ptr %res -+ ret void -+} -+ -+define void @vfmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: vfmsub_d: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-FAST-NEXT: vfmsub.d $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: vfmsub_d: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: vld $vr0, $a2, 0 -+; CONTRACT-ON-NEXT: vld $vr1, $a1, 0 -+; CONTRACT-ON-NEXT: vfmul.d $vr0, $vr1, $vr0 -+; CONTRACT-ON-NEXT: vld $vr1, $a3, 0 -+; CONTRACT-ON-NEXT: vfsub.d $vr0, $vr0, $vr1 -+; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: vfmsub_d: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: vld $vr0, $a2, 0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a1, 0 -+; CONTRACT-OFF-NEXT: vfmul.d $vr0, $vr1, $vr0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a3, 0 -+; CONTRACT-OFF-NEXT: vfsub.d $vr0, $vr0, $vr1 -+; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <2 x double>, ptr %a0 -+ %v1 = load <2 x double>, ptr %a1 -+ %v2 = load <2 x double>, ptr %a2 -+ %mul = fmul<2 x double> %v0, %v1 -+ %sub = fsub<2 x double> %mul, %v2 -+ store <2 x double> %sub, ptr %res -+ ret void -+} -+ -+define void @vfnmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: vfnmadd_d: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-FAST-NEXT: vfnmadd.d $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: vfnmadd_d: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: vld $vr0, $a2, 0 -+; CONTRACT-ON-NEXT: vld $vr1, $a1, 0 -+; CONTRACT-ON-NEXT: vfmul.d $vr0, $vr1, $vr0 -+; CONTRACT-ON-NEXT: vld $vr1, $a3, 0 -+; CONTRACT-ON-NEXT: vfadd.d $vr0, $vr0, $vr1 -+; CONTRACT-ON-NEXT: vbitrevi.d $vr0, $vr0, 63 -+; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: vfnmadd_d: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: vld $vr0, $a2, 0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a1, 0 -+; CONTRACT-OFF-NEXT: vfmul.d $vr0, $vr1, $vr0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a3, 0 -+; CONTRACT-OFF-NEXT: vfadd.d $vr0, $vr0, $vr1 -+; CONTRACT-OFF-NEXT: vbitrevi.d $vr0, $vr0, 63 -+; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <2 x double>, ptr %a0 -+ %v1 = load <2 x double>, ptr %a1 -+ %v2 = load <2 x double>, ptr %a2 -+ %mul = fmul<2 x double> %v0, %v1 -+ %add = fadd<2 x double> %mul, %v2 -+ %negadd = fneg<2 x double> %add -+ store <2 x double> %negadd, ptr %res -+ ret void -+} -+ -+define void @vfnmadd_d_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: vfnmadd_d_nsz: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-FAST-NEXT: vfnmadd.d $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: vfnmadd_d_nsz: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: vld $vr0, $a2, 0 -+; CONTRACT-ON-NEXT: vld $vr1, $a1, 0 -+; CONTRACT-ON-NEXT: vbitrevi.d $vr1, $vr1, 63 -+; CONTRACT-ON-NEXT: vfmul.d $vr0, $vr1, $vr0 -+; CONTRACT-ON-NEXT: vld $vr1, $a3, 0 -+; CONTRACT-ON-NEXT: vfsub.d $vr0, $vr0, $vr1 -+; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: vfnmadd_d_nsz: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: vld $vr0, $a2, 0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a1, 0 -+; CONTRACT-OFF-NEXT: vbitrevi.d $vr1, $vr1, 63 -+; CONTRACT-OFF-NEXT: vfmul.d $vr0, $vr1, $vr0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a3, 0 -+; CONTRACT-OFF-NEXT: vfsub.d $vr0, $vr0, $vr1 -+; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <2 x double>, ptr %a0 -+ %v1 = load <2 x double>, ptr %a1 -+ %v2 = load <2 x double>, ptr %a2 -+ %negv0 = fneg nsz<2 x double> %v0 -+ %negv2 = fneg nsz<2 x double> %v2 -+ %mul = fmul nsz<2 x double> %negv0, %v1 -+ %add = fadd nsz<2 x double> %mul, %negv2 -+ store <2 x double> %add, ptr %res -+ ret void -+} -+ -+;; Check that vfnmadd.d is not emitted. -+define void @not_vfnmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: not_vfnmadd_d: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-FAST-NEXT: vbitrevi.d $vr2, $vr2, 63 -+; CONTRACT-FAST-NEXT: vfmsub.d $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: not_vfnmadd_d: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: vld $vr0, $a2, 0 -+; CONTRACT-ON-NEXT: vld $vr1, $a1, 0 -+; CONTRACT-ON-NEXT: vbitrevi.d $vr1, $vr1, 63 -+; CONTRACT-ON-NEXT: vfmul.d $vr0, $vr1, $vr0 -+; CONTRACT-ON-NEXT: vld $vr1, $a3, 0 -+; CONTRACT-ON-NEXT: vfsub.d $vr0, $vr0, $vr1 -+; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: not_vfnmadd_d: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: vld $vr0, $a2, 0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a1, 0 -+; CONTRACT-OFF-NEXT: vbitrevi.d $vr1, $vr1, 63 -+; CONTRACT-OFF-NEXT: vfmul.d $vr0, $vr1, $vr0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a3, 0 -+; CONTRACT-OFF-NEXT: vfsub.d $vr0, $vr0, $vr1 -+; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <2 x double>, ptr %a0 -+ %v1 = load <2 x double>, ptr %a1 -+ %v2 = load <2 x double>, ptr %a2 -+ %negv0 = fneg<2 x double> %v0 -+ %negv2 = fneg<2 x double> %v2 -+ %mul = fmul<2 x double> %negv0, %v1 -+ %add = fadd<2 x double> %mul, %negv2 -+ store <2 x double> %add, ptr %res -+ ret void -+} -+ -+define void @vfnmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: vfnmsub_d: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-FAST-NEXT: vfnmsub.d $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: vfnmsub_d: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: vld $vr0, $a2, 0 -+; CONTRACT-ON-NEXT: vld $vr1, $a1, 0 -+; CONTRACT-ON-NEXT: vfmul.d $vr0, $vr1, $vr0 -+; CONTRACT-ON-NEXT: vld $vr1, $a3, 0 -+; CONTRACT-ON-NEXT: vfsub.d $vr0, $vr0, $vr1 -+; CONTRACT-ON-NEXT: vbitrevi.d $vr0, $vr0, 63 -+; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: vfnmsub_d: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: vld $vr0, $a2, 0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a1, 0 -+; CONTRACT-OFF-NEXT: vfmul.d $vr0, $vr1, $vr0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a3, 0 -+; CONTRACT-OFF-NEXT: vfsub.d $vr0, $vr0, $vr1 -+; CONTRACT-OFF-NEXT: vbitrevi.d $vr0, $vr0, 63 -+; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <2 x double>, ptr %a0 -+ %v1 = load <2 x double>, ptr %a1 -+ %v2 = load <2 x double>, ptr %a2 -+ %negv2 = fneg<2 x double> %v2 -+ %mul = fmul<2 x double> %v0, %v1 -+ %add = fadd<2 x double> %mul, %negv2 -+ %neg = fneg<2 x double> %add -+ store <2 x double> %neg, ptr %res -+ ret void -+} -+ -+define void @vfnmsub_d_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: vfnmsub_d_nsz: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-FAST-NEXT: vfnmsub.d $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: vfnmsub_d_nsz: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: vld $vr0, $a2, 0 -+; CONTRACT-ON-NEXT: vld $vr1, $a1, 0 -+; CONTRACT-ON-NEXT: vfmul.d $vr0, $vr1, $vr0 -+; CONTRACT-ON-NEXT: vld $vr1, $a3, 0 -+; CONTRACT-ON-NEXT: vfsub.d $vr0, $vr1, $vr0 -+; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: vfnmsub_d_nsz: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: vld $vr0, $a2, 0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a1, 0 -+; CONTRACT-OFF-NEXT: vfmul.d $vr0, $vr1, $vr0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a3, 0 -+; CONTRACT-OFF-NEXT: vfsub.d $vr0, $vr1, $vr0 -+; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <2 x double>, ptr %a0 -+ %v1 = load <2 x double>, ptr %a1 -+ %v2 = load <2 x double>, ptr %a2 -+ %negv0 = fneg nsz<2 x double> %v0 -+ %mul = fmul nsz<2 x double> %negv0, %v1 -+ %add = fadd nsz<2 x double> %mul, %v2 -+ store <2 x double> %add, ptr %res -+ ret void -+} -+ -+;; Check that vfnmsub.d is not emitted. -+define void @not_vfnmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: not_vfnmsub_d: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-FAST-NEXT: vbitrevi.d $vr2, $vr2, 63 -+; CONTRACT-FAST-NEXT: vfmadd.d $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: not_vfnmsub_d: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: vld $vr0, $a2, 0 -+; CONTRACT-ON-NEXT: vld $vr1, $a1, 0 -+; CONTRACT-ON-NEXT: vfmul.d $vr0, $vr1, $vr0 -+; CONTRACT-ON-NEXT: vld $vr1, $a3, 0 -+; CONTRACT-ON-NEXT: vfsub.d $vr0, $vr1, $vr0 -+; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: not_vfnmsub_d: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: vld $vr0, $a2, 0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a1, 0 -+; CONTRACT-OFF-NEXT: vfmul.d $vr0, $vr1, $vr0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a3, 0 -+; CONTRACT-OFF-NEXT: vfsub.d $vr0, $vr1, $vr0 -+; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <2 x double>, ptr %a0 -+ %v1 = load <2 x double>, ptr %a1 -+ %v2 = load <2 x double>, ptr %a2 -+ %negv0 = fneg<2 x double> %v0 -+ %mul = fmul<2 x double> %negv0, %v1 -+ %add = fadd<2 x double> %mul, %v2 -+ store <2 x double> %add, ptr %res -+ ret void -+} -+ -+define void @contract_vfmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: contract_vfmadd_d: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-FAST-NEXT: vfmadd.d $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: contract_vfmadd_d: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-ON-NEXT: vfmadd.d $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: contract_vfmadd_d: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-OFF-NEXT: vfmadd.d $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <2 x double>, ptr %a0 -+ %v1 = load <2 x double>, ptr %a1 -+ %v2 = load <2 x double>, ptr %a2 -+ %mul = fmul contract <2 x double> %v0, %v1 -+ %add = fadd contract <2 x double> %mul, %v2 -+ store <2 x double> %add, ptr %res -+ ret void -+} -+ -+define void @contract_vfmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: contract_vfmsub_d: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-FAST-NEXT: vfmsub.d $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: contract_vfmsub_d: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-ON-NEXT: vfmsub.d $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: contract_vfmsub_d: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-OFF-NEXT: vfmsub.d $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <2 x double>, ptr %a0 -+ %v1 = load <2 x double>, ptr %a1 -+ %v2 = load <2 x double>, ptr %a2 -+ %mul = fmul contract <2 x double> %v0, %v1 -+ %sub = fsub contract <2 x double> %mul, %v2 -+ store <2 x double> %sub, ptr %res -+ ret void -+} -+ -+define void @contract_vfnmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: contract_vfnmadd_d: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-FAST-NEXT: vfnmadd.d $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: contract_vfnmadd_d: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-ON-NEXT: vfnmadd.d $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: contract_vfnmadd_d: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-OFF-NEXT: vfnmadd.d $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <2 x double>, ptr %a0 -+ %v1 = load <2 x double>, ptr %a1 -+ %v2 = load <2 x double>, ptr %a2 -+ %mul = fmul contract <2 x double> %v0, %v1 -+ %add = fadd contract <2 x double> %mul, %v2 -+ %negadd = fneg contract <2 x double> %add -+ store <2 x double> %negadd, ptr %res -+ ret void -+} -+ -+define void @contract_vfnmadd_d_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: contract_vfnmadd_d_nsz: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-FAST-NEXT: vfnmadd.d $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: contract_vfnmadd_d_nsz: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-ON-NEXT: vfnmadd.d $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: contract_vfnmadd_d_nsz: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-OFF-NEXT: vfnmadd.d $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <2 x double>, ptr %a0 -+ %v1 = load <2 x double>, ptr %a1 -+ %v2 = load <2 x double>, ptr %a2 -+ %negv0 = fneg contract nsz<2 x double> %v0 -+ %negv2 = fneg contract nsz<2 x double> %v2 -+ %mul = fmul contract nsz<2 x double> %negv0, %v1 -+ %add = fadd contract nsz<2 x double> %mul, %negv2 -+ store <2 x double> %add, ptr %res -+ ret void -+} -+ -+;; Check that vfnmadd.d is not emitted. -+define void @not_contract_vfnmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: not_contract_vfnmadd_d: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-FAST-NEXT: vbitrevi.d $vr2, $vr2, 63 -+; CONTRACT-FAST-NEXT: vfmsub.d $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: not_contract_vfnmadd_d: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-ON-NEXT: vbitrevi.d $vr2, $vr2, 63 -+; CONTRACT-ON-NEXT: vfmsub.d $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: not_contract_vfnmadd_d: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-OFF-NEXT: vbitrevi.d $vr2, $vr2, 63 -+; CONTRACT-OFF-NEXT: vfmsub.d $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <2 x double>, ptr %a0 -+ %v1 = load <2 x double>, ptr %a1 -+ %v2 = load <2 x double>, ptr %a2 -+ %negv0 = fneg contract <2 x double> %v0 -+ %negv2 = fneg contract <2 x double> %v2 -+ %mul = fmul contract <2 x double> %negv0, %v1 -+ %add = fadd contract <2 x double> %mul, %negv2 -+ store <2 x double> %add, ptr %res -+ ret void -+} -+ -+define void @contract_vfnmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: contract_vfnmsub_d: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-FAST-NEXT: vfnmsub.d $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: contract_vfnmsub_d: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-ON-NEXT: vfnmsub.d $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: contract_vfnmsub_d: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-OFF-NEXT: vfnmsub.d $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <2 x double>, ptr %a0 -+ %v1 = load <2 x double>, ptr %a1 -+ %v2 = load <2 x double>, ptr %a2 -+ %negv2 = fneg contract <2 x double> %v2 -+ %mul = fmul contract <2 x double> %v0, %v1 -+ %add = fadd contract <2 x double> %mul, %negv2 -+ %neg = fneg contract <2 x double> %add -+ store <2 x double> %neg, ptr %res -+ ret void -+} -+ -+define void @contract_vfnmsub_d_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: contract_vfnmsub_d_nsz: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-FAST-NEXT: vfnmsub.d $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: contract_vfnmsub_d_nsz: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-ON-NEXT: vfnmsub.d $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: contract_vfnmsub_d_nsz: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-OFF-NEXT: vfnmsub.d $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <2 x double>, ptr %a0 -+ %v1 = load <2 x double>, ptr %a1 -+ %v2 = load <2 x double>, ptr %a2 -+ %negv0 = fneg contract nsz<2 x double> %v0 -+ %mul = fmul contract nsz<2 x double> %negv0, %v1 -+ %add = fadd contract nsz<2 x double> %mul, %v2 -+ store <2 x double> %add, ptr %res -+ ret void -+} -+ -+;; Check that vfnmsub.d is not emitted. -+define void @not_contract_vfnmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: not_contract_vfnmsub_d: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-FAST-NEXT: vbitrevi.d $vr2, $vr2, 63 -+; CONTRACT-FAST-NEXT: vfmadd.d $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: not_contract_vfnmsub_d: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-ON-NEXT: vbitrevi.d $vr2, $vr2, 63 -+; CONTRACT-ON-NEXT: vfmadd.d $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: not_contract_vfnmsub_d: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-OFF-NEXT: vbitrevi.d $vr2, $vr2, 63 -+; CONTRACT-OFF-NEXT: vfmadd.d $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <2 x double>, ptr %a0 -+ %v1 = load <2 x double>, ptr %a1 -+ %v2 = load <2 x double>, ptr %a2 -+ %negv0 = fneg contract <2 x double> %v0 -+ %mul = fmul contract <2 x double> %negv0, %v1 -+ %add = fadd contract <2 x double> %mul, %v2 -+ store <2 x double> %add, ptr %res -+ ret void -+} -+ -+define void @vfmadd_d_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: vfmadd_d_contract: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-FAST-NEXT: vfmadd.d $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: vfmadd_d_contract: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-ON-NEXT: vfmadd.d $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: vfmadd_d_contract: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-OFF-NEXT: vfmadd.d $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <2 x double>, ptr %a0 -+ %v1 = load <2 x double>, ptr %a1 -+ %v2 = load <2 x double>, ptr %a2 -+ %mul = fmul contract <2 x double> %v0, %v1 -+ %add = fadd contract <2 x double> %mul, %v2 -+ store <2 x double> %add, ptr %res -+ ret void -+} -+ -+define void @vfmsub_d_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: vfmsub_d_contract: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-FAST-NEXT: vfmsub.d $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: vfmsub_d_contract: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-ON-NEXT: vfmsub.d $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: vfmsub_d_contract: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-OFF-NEXT: vfmsub.d $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <2 x double>, ptr %a0 -+ %v1 = load <2 x double>, ptr %a1 -+ %v2 = load <2 x double>, ptr %a2 -+ %mul = fmul contract <2 x double> %v0, %v1 -+ %sub = fsub contract <2 x double> %mul, %v2 -+ store <2 x double> %sub, ptr %res -+ ret void -+} -+ -+define void @vfnmadd_d_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: vfnmadd_d_contract: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-FAST-NEXT: vfnmadd.d $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: vfnmadd_d_contract: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-ON-NEXT: vfnmadd.d $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: vfnmadd_d_contract: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-OFF-NEXT: vfnmadd.d $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <2 x double>, ptr %a0 -+ %v1 = load <2 x double>, ptr %a1 -+ %v2 = load <2 x double>, ptr %a2 -+ %mul = fmul contract <2 x double> %v0, %v1 -+ %add = fadd contract <2 x double> %mul, %v2 -+ %negadd = fneg contract <2 x double> %add -+ store <2 x double> %negadd, ptr %res -+ ret void -+} -+ -+define void @vfnmsub_d_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: vfnmsub_d_contract: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-FAST-NEXT: vfnmsub.d $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: vfnmsub_d_contract: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-ON-NEXT: vfnmsub.d $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: vfnmsub_d_contract: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-OFF-NEXT: vfnmsub.d $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <2 x double>, ptr %a0 -+ %v1 = load <2 x double>, ptr %a1 -+ %v2 = load <2 x double>, ptr %a2 -+ %mul = fmul contract <2 x double> %v0, %v1 -+ %negv2 = fneg contract <2 x double> %v2 -+ %add = fadd contract <2 x double> %negv2, %mul -+ %negadd = fneg contract <2 x double> %add -+ store <2 x double> %negadd, ptr %res -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/fma-v4f32.ll b/llvm/test/CodeGen/LoongArch/lsx/fma-v4f32.ll -new file mode 100644 -index 000000000000..7efbd61c0c4f ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/fma-v4f32.ll -@@ -0,0 +1,804 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx --fp-contract=fast < %s \ -+; RUN: | FileCheck %s --check-prefix=CONTRACT-FAST -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx --fp-contract=on < %s \ -+; RUN: | FileCheck %s --check-prefix=CONTRACT-ON -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx --fp-contract=off < %s \ -+; RUN: | FileCheck %s --check-prefix=CONTRACT-OFF -+ -+define void @vfmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: vfmadd_s: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-FAST-NEXT: vfmadd.s $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: vfmadd_s: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: vld $vr0, $a2, 0 -+; CONTRACT-ON-NEXT: vld $vr1, $a1, 0 -+; CONTRACT-ON-NEXT: vfmul.s $vr0, $vr1, $vr0 -+; CONTRACT-ON-NEXT: vld $vr1, $a3, 0 -+; CONTRACT-ON-NEXT: vfadd.s $vr0, $vr0, $vr1 -+; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: vfmadd_s: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: vld $vr0, $a2, 0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a1, 0 -+; CONTRACT-OFF-NEXT: vfmul.s $vr0, $vr1, $vr0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a3, 0 -+; CONTRACT-OFF-NEXT: vfadd.s $vr0, $vr0, $vr1 -+; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <4 x float>, ptr %a0 -+ %v1 = load <4 x float>, ptr %a1 -+ %v2 = load <4 x float>, ptr %a2 -+ %mul = fmul<4 x float> %v0, %v1 -+ %add = fadd<4 x float> %mul, %v2 -+ store <4 x float> %add, ptr %res -+ ret void -+} -+ -+define void @vfmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: vfmsub_s: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-FAST-NEXT: vfmsub.s $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: vfmsub_s: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: vld $vr0, $a2, 0 -+; CONTRACT-ON-NEXT: vld $vr1, $a1, 0 -+; CONTRACT-ON-NEXT: vfmul.s $vr0, $vr1, $vr0 -+; CONTRACT-ON-NEXT: vld $vr1, $a3, 0 -+; CONTRACT-ON-NEXT: vfsub.s $vr0, $vr0, $vr1 -+; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: vfmsub_s: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: vld $vr0, $a2, 0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a1, 0 -+; CONTRACT-OFF-NEXT: vfmul.s $vr0, $vr1, $vr0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a3, 0 -+; CONTRACT-OFF-NEXT: vfsub.s $vr0, $vr0, $vr1 -+; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <4 x float>, ptr %a0 -+ %v1 = load <4 x float>, ptr %a1 -+ %v2 = load <4 x float>, ptr %a2 -+ %mul = fmul<4 x float> %v0, %v1 -+ %sub = fsub<4 x float> %mul, %v2 -+ store <4 x float> %sub, ptr %res -+ ret void -+} -+ -+define void @vfnmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: vfnmadd_s: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-FAST-NEXT: vfnmadd.s $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: vfnmadd_s: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: vld $vr0, $a2, 0 -+; CONTRACT-ON-NEXT: vld $vr1, $a1, 0 -+; CONTRACT-ON-NEXT: vfmul.s $vr0, $vr1, $vr0 -+; CONTRACT-ON-NEXT: vld $vr1, $a3, 0 -+; CONTRACT-ON-NEXT: vfadd.s $vr0, $vr0, $vr1 -+; CONTRACT-ON-NEXT: vbitrevi.w $vr0, $vr0, 31 -+; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: vfnmadd_s: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: vld $vr0, $a2, 0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a1, 0 -+; CONTRACT-OFF-NEXT: vfmul.s $vr0, $vr1, $vr0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a3, 0 -+; CONTRACT-OFF-NEXT: vfadd.s $vr0, $vr0, $vr1 -+; CONTRACT-OFF-NEXT: vbitrevi.w $vr0, $vr0, 31 -+; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <4 x float>, ptr %a0 -+ %v1 = load <4 x float>, ptr %a1 -+ %v2 = load <4 x float>, ptr %a2 -+ %mul = fmul<4 x float> %v0, %v1 -+ %add = fadd<4 x float> %mul, %v2 -+ %negadd = fneg<4 x float> %add -+ store <4 x float> %negadd, ptr %res -+ ret void -+} -+ -+define void @vfnmadd_s_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: vfnmadd_s_nsz: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-FAST-NEXT: vfnmadd.s $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: vfnmadd_s_nsz: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: vld $vr0, $a2, 0 -+; CONTRACT-ON-NEXT: vld $vr1, $a1, 0 -+; CONTRACT-ON-NEXT: vbitrevi.w $vr1, $vr1, 31 -+; CONTRACT-ON-NEXT: vfmul.s $vr0, $vr1, $vr0 -+; CONTRACT-ON-NEXT: vld $vr1, $a3, 0 -+; CONTRACT-ON-NEXT: vfsub.s $vr0, $vr0, $vr1 -+; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: vfnmadd_s_nsz: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: vld $vr0, $a2, 0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a1, 0 -+; CONTRACT-OFF-NEXT: vbitrevi.w $vr1, $vr1, 31 -+; CONTRACT-OFF-NEXT: vfmul.s $vr0, $vr1, $vr0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a3, 0 -+; CONTRACT-OFF-NEXT: vfsub.s $vr0, $vr0, $vr1 -+; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <4 x float>, ptr %a0 -+ %v1 = load <4 x float>, ptr %a1 -+ %v2 = load <4 x float>, ptr %a2 -+ %negv0 = fneg nsz<4 x float> %v0 -+ %negv2 = fneg nsz<4 x float> %v2 -+ %mul = fmul nsz<4 x float> %negv0, %v1 -+ %add = fadd nsz<4 x float> %mul, %negv2 -+ store <4 x float> %add, ptr %res -+ ret void -+} -+ -+;; Check that vfnmadd.s is not emitted. -+define void @not_vfnmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: not_vfnmadd_s: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-FAST-NEXT: vbitrevi.w $vr2, $vr2, 31 -+; CONTRACT-FAST-NEXT: vfmsub.s $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: not_vfnmadd_s: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: vld $vr0, $a2, 0 -+; CONTRACT-ON-NEXT: vld $vr1, $a1, 0 -+; CONTRACT-ON-NEXT: vbitrevi.w $vr1, $vr1, 31 -+; CONTRACT-ON-NEXT: vfmul.s $vr0, $vr1, $vr0 -+; CONTRACT-ON-NEXT: vld $vr1, $a3, 0 -+; CONTRACT-ON-NEXT: vfsub.s $vr0, $vr0, $vr1 -+; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: not_vfnmadd_s: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: vld $vr0, $a2, 0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a1, 0 -+; CONTRACT-OFF-NEXT: vbitrevi.w $vr1, $vr1, 31 -+; CONTRACT-OFF-NEXT: vfmul.s $vr0, $vr1, $vr0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a3, 0 -+; CONTRACT-OFF-NEXT: vfsub.s $vr0, $vr0, $vr1 -+; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <4 x float>, ptr %a0 -+ %v1 = load <4 x float>, ptr %a1 -+ %v2 = load <4 x float>, ptr %a2 -+ %negv0 = fneg<4 x float> %v0 -+ %negv2 = fneg<4 x float> %v2 -+ %mul = fmul<4 x float> %negv0, %v1 -+ %add = fadd<4 x float> %mul, %negv2 -+ store <4 x float> %add, ptr %res -+ ret void -+} -+ -+define void @vfnmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: vfnmsub_s: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-FAST-NEXT: vfnmsub.s $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: vfnmsub_s: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: vld $vr0, $a2, 0 -+; CONTRACT-ON-NEXT: vld $vr1, $a1, 0 -+; CONTRACT-ON-NEXT: vfmul.s $vr0, $vr1, $vr0 -+; CONTRACT-ON-NEXT: vld $vr1, $a3, 0 -+; CONTRACT-ON-NEXT: vfsub.s $vr0, $vr0, $vr1 -+; CONTRACT-ON-NEXT: vbitrevi.w $vr0, $vr0, 31 -+; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: vfnmsub_s: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: vld $vr0, $a2, 0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a1, 0 -+; CONTRACT-OFF-NEXT: vfmul.s $vr0, $vr1, $vr0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a3, 0 -+; CONTRACT-OFF-NEXT: vfsub.s $vr0, $vr0, $vr1 -+; CONTRACT-OFF-NEXT: vbitrevi.w $vr0, $vr0, 31 -+; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <4 x float>, ptr %a0 -+ %v1 = load <4 x float>, ptr %a1 -+ %v2 = load <4 x float>, ptr %a2 -+ %negv2 = fneg<4 x float> %v2 -+ %mul = fmul<4 x float> %v0, %v1 -+ %add = fadd<4 x float> %mul, %negv2 -+ %neg = fneg<4 x float> %add -+ store <4 x float> %neg, ptr %res -+ ret void -+} -+ -+define void @vfnmsub_s_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: vfnmsub_s_nsz: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-FAST-NEXT: vfnmsub.s $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: vfnmsub_s_nsz: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: vld $vr0, $a2, 0 -+; CONTRACT-ON-NEXT: vld $vr1, $a1, 0 -+; CONTRACT-ON-NEXT: vfmul.s $vr0, $vr1, $vr0 -+; CONTRACT-ON-NEXT: vld $vr1, $a3, 0 -+; CONTRACT-ON-NEXT: vfsub.s $vr0, $vr1, $vr0 -+; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: vfnmsub_s_nsz: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: vld $vr0, $a2, 0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a1, 0 -+; CONTRACT-OFF-NEXT: vfmul.s $vr0, $vr1, $vr0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a3, 0 -+; CONTRACT-OFF-NEXT: vfsub.s $vr0, $vr1, $vr0 -+; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <4 x float>, ptr %a0 -+ %v1 = load <4 x float>, ptr %a1 -+ %v2 = load <4 x float>, ptr %a2 -+ %negv0 = fneg nsz<4 x float> %v0 -+ %mul = fmul nsz<4 x float> %negv0, %v1 -+ %add = fadd nsz<4 x float> %mul, %v2 -+ store <4 x float> %add, ptr %res -+ ret void -+} -+ -+;; Check that vfnmsub.s is not emitted. -+define void @not_vfnmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: not_vfnmsub_s: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-FAST-NEXT: vbitrevi.w $vr2, $vr2, 31 -+; CONTRACT-FAST-NEXT: vfmadd.s $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: not_vfnmsub_s: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: vld $vr0, $a2, 0 -+; CONTRACT-ON-NEXT: vld $vr1, $a1, 0 -+; CONTRACT-ON-NEXT: vfmul.s $vr0, $vr1, $vr0 -+; CONTRACT-ON-NEXT: vld $vr1, $a3, 0 -+; CONTRACT-ON-NEXT: vfsub.s $vr0, $vr1, $vr0 -+; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: not_vfnmsub_s: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: vld $vr0, $a2, 0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a1, 0 -+; CONTRACT-OFF-NEXT: vfmul.s $vr0, $vr1, $vr0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a3, 0 -+; CONTRACT-OFF-NEXT: vfsub.s $vr0, $vr1, $vr0 -+; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <4 x float>, ptr %a0 -+ %v1 = load <4 x float>, ptr %a1 -+ %v2 = load <4 x float>, ptr %a2 -+ %negv0 = fneg<4 x float> %v0 -+ %mul = fmul<4 x float> %negv0, %v1 -+ %add = fadd<4 x float> %mul, %v2 -+ store <4 x float> %add, ptr %res -+ ret void -+} -+ -+define void @contract_vfmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: contract_vfmadd_s: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-FAST-NEXT: vfmadd.s $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: contract_vfmadd_s: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-ON-NEXT: vfmadd.s $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: contract_vfmadd_s: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-OFF-NEXT: vfmadd.s $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <4 x float>, ptr %a0 -+ %v1 = load <4 x float>, ptr %a1 -+ %v2 = load <4 x float>, ptr %a2 -+ %mul = fmul contract <4 x float> %v0, %v1 -+ %add = fadd contract <4 x float> %mul, %v2 -+ store <4 x float> %add, ptr %res -+ ret void -+} -+ -+define void @contract_vfmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: contract_vfmsub_s: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-FAST-NEXT: vfmsub.s $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: contract_vfmsub_s: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-ON-NEXT: vfmsub.s $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: contract_vfmsub_s: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-OFF-NEXT: vfmsub.s $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <4 x float>, ptr %a0 -+ %v1 = load <4 x float>, ptr %a1 -+ %v2 = load <4 x float>, ptr %a2 -+ %mul = fmul contract <4 x float> %v0, %v1 -+ %sub = fsub contract <4 x float> %mul, %v2 -+ store <4 x float> %sub, ptr %res -+ ret void -+} -+ -+define void @contract_vfnmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: contract_vfnmadd_s: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-FAST-NEXT: vfnmadd.s $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: contract_vfnmadd_s: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-ON-NEXT: vfnmadd.s $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: contract_vfnmadd_s: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-OFF-NEXT: vfnmadd.s $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <4 x float>, ptr %a0 -+ %v1 = load <4 x float>, ptr %a1 -+ %v2 = load <4 x float>, ptr %a2 -+ %mul = fmul contract <4 x float> %v0, %v1 -+ %add = fadd contract <4 x float> %mul, %v2 -+ %negadd = fneg contract <4 x float> %add -+ store <4 x float> %negadd, ptr %res -+ ret void -+} -+ -+define void @contract_vfnmadd_s_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: contract_vfnmadd_s_nsz: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-FAST-NEXT: vfnmadd.s $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: contract_vfnmadd_s_nsz: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-ON-NEXT: vfnmadd.s $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: contract_vfnmadd_s_nsz: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-OFF-NEXT: vfnmadd.s $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <4 x float>, ptr %a0 -+ %v1 = load <4 x float>, ptr %a1 -+ %v2 = load <4 x float>, ptr %a2 -+ %negv0 = fneg contract nsz<4 x float> %v0 -+ %negv2 = fneg contract nsz<4 x float> %v2 -+ %mul = fmul contract nsz<4 x float> %negv0, %v1 -+ %add = fadd contract nsz<4 x float> %mul, %negv2 -+ store <4 x float> %add, ptr %res -+ ret void -+} -+ -+;; Check that vfnmadd.s is not emitted. -+define void @not_contract_vfnmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: not_contract_vfnmadd_s: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-FAST-NEXT: vbitrevi.w $vr2, $vr2, 31 -+; CONTRACT-FAST-NEXT: vfmsub.s $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: not_contract_vfnmadd_s: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-ON-NEXT: vbitrevi.w $vr2, $vr2, 31 -+; CONTRACT-ON-NEXT: vfmsub.s $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: not_contract_vfnmadd_s: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-OFF-NEXT: vbitrevi.w $vr2, $vr2, 31 -+; CONTRACT-OFF-NEXT: vfmsub.s $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <4 x float>, ptr %a0 -+ %v1 = load <4 x float>, ptr %a1 -+ %v2 = load <4 x float>, ptr %a2 -+ %negv0 = fneg contract <4 x float> %v0 -+ %negv2 = fneg contract <4 x float> %v2 -+ %mul = fmul contract <4 x float> %negv0, %v1 -+ %add = fadd contract <4 x float> %mul, %negv2 -+ store <4 x float> %add, ptr %res -+ ret void -+} -+ -+define void @contract_vfnmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: contract_vfnmsub_s: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-FAST-NEXT: vfnmsub.s $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: contract_vfnmsub_s: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-ON-NEXT: vfnmsub.s $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: contract_vfnmsub_s: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-OFF-NEXT: vfnmsub.s $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <4 x float>, ptr %a0 -+ %v1 = load <4 x float>, ptr %a1 -+ %v2 = load <4 x float>, ptr %a2 -+ %negv2 = fneg contract <4 x float> %v2 -+ %mul = fmul contract <4 x float> %v0, %v1 -+ %add = fadd contract <4 x float> %mul, %negv2 -+ %neg = fneg contract <4 x float> %add -+ store <4 x float> %neg, ptr %res -+ ret void -+} -+ -+define void @contract_vfnmsub_s_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: contract_vfnmsub_s_nsz: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-FAST-NEXT: vfnmsub.s $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: contract_vfnmsub_s_nsz: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-ON-NEXT: vfnmsub.s $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: contract_vfnmsub_s_nsz: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-OFF-NEXT: vfnmsub.s $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <4 x float>, ptr %a0 -+ %v1 = load <4 x float>, ptr %a1 -+ %v2 = load <4 x float>, ptr %a2 -+ %negv0 = fneg contract nsz<4 x float> %v0 -+ %mul = fmul contract nsz<4 x float> %negv0, %v1 -+ %add = fadd contract nsz<4 x float> %mul, %v2 -+ store <4 x float> %add, ptr %res -+ ret void -+} -+ -+;; Check that vfnmsub.s is not emitted. -+define void @not_contract_vfnmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: not_contract_vfnmsub_s: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-FAST-NEXT: vbitrevi.w $vr2, $vr2, 31 -+; CONTRACT-FAST-NEXT: vfmadd.s $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: not_contract_vfnmsub_s: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-ON-NEXT: vbitrevi.w $vr2, $vr2, 31 -+; CONTRACT-ON-NEXT: vfmadd.s $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: not_contract_vfnmsub_s: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-OFF-NEXT: vbitrevi.w $vr2, $vr2, 31 -+; CONTRACT-OFF-NEXT: vfmadd.s $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <4 x float>, ptr %a0 -+ %v1 = load <4 x float>, ptr %a1 -+ %v2 = load <4 x float>, ptr %a2 -+ %negv0 = fneg contract <4 x float> %v0 -+ %mul = fmul contract <4 x float> %negv0, %v1 -+ %add = fadd contract <4 x float> %mul, %v2 -+ store <4 x float> %add, ptr %res -+ ret void -+} -+ -+define void @vfmadd_s_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: vfmadd_s_contract: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-FAST-NEXT: vfmadd.s $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: vfmadd_s_contract: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-ON-NEXT: vfmadd.s $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: vfmadd_s_contract: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-OFF-NEXT: vfmadd.s $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <4 x float>, ptr %a0 -+ %v1 = load <4 x float>, ptr %a1 -+ %v2 = load <4 x float>, ptr %a2 -+ %mul = fmul contract <4 x float> %v0, %v1 -+ %add = fadd contract <4 x float> %mul, %v2 -+ store <4 x float> %add, ptr %res -+ ret void -+} -+ -+define void @vfmsub_s_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: vfmsub_s_contract: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-FAST-NEXT: vfmsub.s $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: vfmsub_s_contract: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-ON-NEXT: vfmsub.s $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: vfmsub_s_contract: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-OFF-NEXT: vfmsub.s $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <4 x float>, ptr %a0 -+ %v1 = load <4 x float>, ptr %a1 -+ %v2 = load <4 x float>, ptr %a2 -+ %mul = fmul contract <4 x float> %v0, %v1 -+ %sub = fsub contract <4 x float> %mul, %v2 -+ store <4 x float> %sub, ptr %res -+ ret void -+} -+ -+define void @vfnmadd_s_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: vfnmadd_s_contract: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-FAST-NEXT: vfnmadd.s $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: vfnmadd_s_contract: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-ON-NEXT: vfnmadd.s $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: vfnmadd_s_contract: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-OFF-NEXT: vfnmadd.s $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <4 x float>, ptr %a0 -+ %v1 = load <4 x float>, ptr %a1 -+ %v2 = load <4 x float>, ptr %a2 -+ %mul = fmul contract <4 x float> %v0, %v1 -+ %add = fadd contract <4 x float> %mul, %v2 -+ %negadd = fneg contract <4 x float> %add -+ store <4 x float> %negadd, ptr %res -+ ret void -+} -+ -+define void @vfnmsub_s_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -+; CONTRACT-FAST-LABEL: vfnmsub_s_contract: -+; CONTRACT-FAST: # %bb.0: # %entry -+; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-FAST-NEXT: vfnmsub.s $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-FAST-NEXT: ret -+; -+; CONTRACT-ON-LABEL: vfnmsub_s_contract: -+; CONTRACT-ON: # %bb.0: # %entry -+; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-ON-NEXT: vfnmsub.s $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-ON-NEXT: ret -+; -+; CONTRACT-OFF-LABEL: vfnmsub_s_contract: -+; CONTRACT-OFF: # %bb.0: # %entry -+; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 -+; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 -+; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 -+; CONTRACT-OFF-NEXT: vfnmsub.s $vr0, $vr2, $vr1, $vr0 -+; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 -+; CONTRACT-OFF-NEXT: ret -+entry: -+ %v0 = load <4 x float>, ptr %a0 -+ %v1 = load <4 x float>, ptr %a1 -+ %v2 = load <4 x float>, ptr %a2 -+ %mul = fmul contract <4 x float> %v0, %v1 -+ %negv2 = fneg contract <4 x float> %v2 -+ %add = fadd contract <4 x float> %negv2, %mul -+ %negadd = fneg contract <4 x float> %add -+ store <4 x float> %negadd, ptr %res -+ ret void -+} --- -2.20.1 - - -From 8aa8ce5abc7bf58ef9ae0460d1e9ed705895a887 Mon Sep 17 00:00:00 2001 -From: wanglei -Date: Mon, 25 Dec 2023 10:09:20 +0800 -Subject: [PATCH 27/35] [LoongArch] Fix LASX vector_extract codegen - -Custom lowering `ISD::EXTRACT_VECTOR_ELT` with lasx. - -(cherry picked from commit 47c88bcd5de91522241cca1aaa1b7762ceb01394) ---- - .../LoongArch/LoongArchISelLowering.cpp | 21 +++- - .../Target/LoongArch/LoongArchISelLowering.h | 1 + - .../LoongArch/LoongArchLASXInstrInfo.td | 40 ++---- - .../lasx/ir-instruction/extractelement.ll | 114 ++++++++++++++---- - 4 files changed, 119 insertions(+), 57 deletions(-) - -diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -index c7f4b1d24f07..cf881ce720a6 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -@@ -277,7 +277,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, - setOperationAction(ISD::UNDEF, VT, Legal); - - setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); -- setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal); -+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); - setOperationAction(ISD::BUILD_VECTOR, VT, Custom); - - setOperationAction(ISD::SETCC, VT, Legal); -@@ -395,6 +395,8 @@ SDValue LoongArchTargetLowering::LowerOperation(SDValue Op, - return lowerWRITE_REGISTER(Op, DAG); - case ISD::INSERT_VECTOR_ELT: - return lowerINSERT_VECTOR_ELT(Op, DAG); -+ case ISD::EXTRACT_VECTOR_ELT: -+ return lowerEXTRACT_VECTOR_ELT(Op, DAG); - case ISD::BUILD_VECTOR: - return lowerBUILD_VECTOR(Op, DAG); - case ISD::VECTOR_SHUFFLE: -@@ -502,6 +504,23 @@ SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op, - return SDValue(); - } - -+SDValue -+LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op, -+ SelectionDAG &DAG) const { -+ EVT VecTy = Op->getOperand(0)->getValueType(0); -+ SDValue Idx = Op->getOperand(1); -+ EVT EltTy = VecTy.getVectorElementType(); -+ unsigned NumElts = VecTy.getVectorNumElements(); -+ -+ if (isa(Idx) && -+ (EltTy == MVT::i32 || EltTy == MVT::i64 || EltTy == MVT::f32 || -+ EltTy == MVT::f64 || -+ cast(Idx)->getZExtValue() < NumElts / 2)) -+ return Op; -+ -+ return SDValue(); -+} -+ - SDValue - LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, - SelectionDAG &DAG) const { -diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h -index 2c35f9e5d378..6b5a851ec55d 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h -+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h -@@ -279,6 +279,7 @@ private: - SDValue lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; - SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; - SDValue lowerWRITE_REGISTER(SDValue Op, SelectionDAG &DAG) const; -+ SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; - SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; - SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; - SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; -diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -index ec6983d0f487..9b7a34688811 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -@@ -1590,38 +1590,14 @@ def : Pat<(i64 (vector_extract v32i8:$xj, uimm4:$imm)), - (VPICKVE2GR_B (EXTRACT_SUBREG v32i8:$xj, sub_128), uimm4:$imm)>; - def : Pat<(i64 (vector_extract v16i16:$xj, uimm3:$imm)), - (VPICKVE2GR_H (EXTRACT_SUBREG v16i16:$xj, sub_128), uimm3:$imm)>; --def : Pat<(i64 (vector_extract v8i32:$xj, uimm2:$imm)), -- (VPICKVE2GR_W (EXTRACT_SUBREG v8i32:$xj, sub_128), uimm2:$imm)>; --def : Pat<(i64 (vector_extract v4i64:$xj, uimm1:$imm)), -- (VPICKVE2GR_D (EXTRACT_SUBREG v4i64:$xj, sub_128), uimm1:$imm)>; --def : Pat<(f32 (vector_extract v8f32:$xj, uimm2:$imm)), -- (f32 (EXTRACT_SUBREG (XVREPL128VEI_W v8f32:$xj, uimm2:$imm), sub_32))>; --def : Pat<(f64 (vector_extract v4f64:$xj, uimm1:$imm)), -- (f64 (EXTRACT_SUBREG (XVREPL128VEI_D v4f64:$xj, uimm1:$imm), sub_64))>; -- --// Vector extraction with variable index. --def : Pat<(i64 (vector_extract v32i8:$xj, i64:$rk)), -- (SRAI_W (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG (XVREPLVE_B v32i8:$xj, -- i64:$rk), -- sub_32)), -- GPR), (i64 24))>; --def : Pat<(i64 (vector_extract v16i16:$xj, i64:$rk)), -- (SRAI_W (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG (XVREPLVE_H v16i16:$xj, -- i64:$rk), -- sub_32)), -- GPR), (i64 16))>; --def : Pat<(i64 (vector_extract v8i32:$xj, i64:$rk)), -- (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG (XVREPLVE_W v8i32:$xj, i64:$rk), -- sub_32)), -- GPR)>; --def : Pat<(i64 (vector_extract v4i64:$xj, i64:$rk)), -- (COPY_TO_REGCLASS (f64 (EXTRACT_SUBREG (XVREPLVE_D v4i64:$xj, i64:$rk), -- sub_64)), -- GPR)>; --def : Pat<(f32 (vector_extract v8f32:$xj, i64:$rk)), -- (f32 (EXTRACT_SUBREG (XVREPLVE_W v8f32:$xj, i64:$rk), sub_32))>; --def : Pat<(f64 (vector_extract v4f64:$xj, i64:$rk)), -- (f64 (EXTRACT_SUBREG (XVREPLVE_D v4f64:$xj, i64:$rk), sub_64))>; -+def : Pat<(i64 (vector_extract v8i32:$xj, uimm3:$imm)), -+ (XVPICKVE2GR_W v8i32:$xj, uimm3:$imm)>; -+def : Pat<(i64 (vector_extract v4i64:$xj, uimm2:$imm)), -+ (XVPICKVE2GR_D v4i64:$xj, uimm2:$imm)>; -+def : Pat<(f32 (vector_extract v8f32:$xj, uimm3:$imm)), -+ (MOVGR2FR_W (XVPICKVE2GR_W v8f32:$xj, uimm3:$imm))>; -+def : Pat<(f64 (vector_extract v4f64:$xj, uimm2:$imm)), -+ (MOVGR2FR_D (XVPICKVE2GR_D v4f64:$xj, uimm2:$imm))>; - - // vselect - def : Pat<(v32i8 (vselect LASX256:$xj, LASX256:$xd, -diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/extractelement.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/extractelement.ll -index 78f584cd09a8..02b76bf75b75 100644 ---- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/extractelement.ll -+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/extractelement.ll -@@ -31,7 +31,7 @@ define void @extract_8xi32(ptr %src, ptr %dst) nounwind { - ; CHECK-LABEL: extract_8xi32: - ; CHECK: # %bb.0: - ; CHECK-NEXT: xvld $xr0, $a0, 0 --; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 1 -+; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 1 - ; CHECK-NEXT: st.w $a0, $a1, 0 - ; CHECK-NEXT: ret - %v = load volatile <8 x i32>, ptr %src -@@ -44,7 +44,7 @@ define void @extract_4xi64(ptr %src, ptr %dst) nounwind { - ; CHECK-LABEL: extract_4xi64: - ; CHECK: # %bb.0: - ; CHECK-NEXT: xvld $xr0, $a0, 0 --; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 1 -+; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 1 - ; CHECK-NEXT: st.d $a0, $a1, 0 - ; CHECK-NEXT: ret - %v = load volatile <4 x i64>, ptr %src -@@ -57,8 +57,8 @@ define void @extract_8xfloat(ptr %src, ptr %dst) nounwind { - ; CHECK-LABEL: extract_8xfloat: - ; CHECK: # %bb.0: - ; CHECK-NEXT: xvld $xr0, $a0, 0 --; CHECK-NEXT: ori $a0, $zero, 7 --; CHECK-NEXT: xvreplve.w $xr0, $xr0, $a0 -+; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 7 -+; CHECK-NEXT: movgr2fr.w $fa0, $a0 - ; CHECK-NEXT: fst.s $fa0, $a1, 0 - ; CHECK-NEXT: ret - %v = load volatile <8 x float>, ptr %src -@@ -71,8 +71,8 @@ define void @extract_4xdouble(ptr %src, ptr %dst) nounwind { - ; CHECK-LABEL: extract_4xdouble: - ; CHECK: # %bb.0: - ; CHECK-NEXT: xvld $xr0, $a0, 0 --; CHECK-NEXT: ori $a0, $zero, 3 --; CHECK-NEXT: xvreplve.d $xr0, $xr0, $a0 -+; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 3 -+; CHECK-NEXT: movgr2fr.d $fa0, $a0 - ; CHECK-NEXT: fst.d $fa0, $a1, 0 - ; CHECK-NEXT: ret - %v = load volatile <4 x double>, ptr %src -@@ -84,12 +84,22 @@ define void @extract_4xdouble(ptr %src, ptr %dst) nounwind { - define void @extract_32xi8_idx(ptr %src, ptr %dst, i32 %idx) nounwind { - ; CHECK-LABEL: extract_32xi8_idx: - ; CHECK: # %bb.0: --; CHECK-NEXT: bstrpick.d $a2, $a2, 31, 0 -+; CHECK-NEXT: addi.d $sp, $sp, -64 -+; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill -+; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill -+; CHECK-NEXT: addi.d $fp, $sp, 64 -+; CHECK-NEXT: srli.d $a3, $sp, 5 -+; CHECK-NEXT: slli.d $sp, $a3, 5 - ; CHECK-NEXT: xvld $xr0, $a0, 0 --; CHECK-NEXT: xvreplve.b $xr0, $xr0, $a2 --; CHECK-NEXT: movfr2gr.s $a0, $fa0 --; CHECK-NEXT: srai.w $a0, $a0, 24 -+; CHECK-NEXT: xvst $xr0, $sp, 0 -+; CHECK-NEXT: addi.d $a0, $sp, 0 -+; CHECK-NEXT: bstrins.d $a0, $a2, 4, 0 -+; CHECK-NEXT: ld.b $a0, $a0, 0 - ; CHECK-NEXT: st.b $a0, $a1, 0 -+; CHECK-NEXT: addi.d $sp, $fp, -64 -+; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload -+; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload -+; CHECK-NEXT: addi.d $sp, $sp, 64 - ; CHECK-NEXT: ret - %v = load volatile <32 x i8>, ptr %src - %e = extractelement <32 x i8> %v, i32 %idx -@@ -100,12 +110,22 @@ define void @extract_32xi8_idx(ptr %src, ptr %dst, i32 %idx) nounwind { - define void @extract_16xi16_idx(ptr %src, ptr %dst, i32 %idx) nounwind { - ; CHECK-LABEL: extract_16xi16_idx: - ; CHECK: # %bb.0: --; CHECK-NEXT: bstrpick.d $a2, $a2, 31, 0 -+; CHECK-NEXT: addi.d $sp, $sp, -64 -+; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill -+; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill -+; CHECK-NEXT: addi.d $fp, $sp, 64 -+; CHECK-NEXT: srli.d $a3, $sp, 5 -+; CHECK-NEXT: slli.d $sp, $a3, 5 - ; CHECK-NEXT: xvld $xr0, $a0, 0 --; CHECK-NEXT: xvreplve.h $xr0, $xr0, $a2 --; CHECK-NEXT: movfr2gr.s $a0, $fa0 --; CHECK-NEXT: srai.w $a0, $a0, 16 -+; CHECK-NEXT: xvst $xr0, $sp, 0 -+; CHECK-NEXT: addi.d $a0, $sp, 0 -+; CHECK-NEXT: bstrins.d $a0, $a2, 4, 1 -+; CHECK-NEXT: ld.h $a0, $a0, 0 - ; CHECK-NEXT: st.h $a0, $a1, 0 -+; CHECK-NEXT: addi.d $sp, $fp, -64 -+; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload -+; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload -+; CHECK-NEXT: addi.d $sp, $sp, 64 - ; CHECK-NEXT: ret - %v = load volatile <16 x i16>, ptr %src - %e = extractelement <16 x i16> %v, i32 %idx -@@ -116,11 +136,22 @@ define void @extract_16xi16_idx(ptr %src, ptr %dst, i32 %idx) nounwind { - define void @extract_8xi32_idx(ptr %src, ptr %dst, i32 %idx) nounwind { - ; CHECK-LABEL: extract_8xi32_idx: - ; CHECK: # %bb.0: --; CHECK-NEXT: bstrpick.d $a2, $a2, 31, 0 -+; CHECK-NEXT: addi.d $sp, $sp, -64 -+; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill -+; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill -+; CHECK-NEXT: addi.d $fp, $sp, 64 -+; CHECK-NEXT: srli.d $a3, $sp, 5 -+; CHECK-NEXT: slli.d $sp, $a3, 5 - ; CHECK-NEXT: xvld $xr0, $a0, 0 --; CHECK-NEXT: xvreplve.w $xr0, $xr0, $a2 --; CHECK-NEXT: movfr2gr.s $a0, $fa0 -+; CHECK-NEXT: xvst $xr0, $sp, 0 -+; CHECK-NEXT: addi.d $a0, $sp, 0 -+; CHECK-NEXT: bstrins.d $a0, $a2, 4, 2 -+; CHECK-NEXT: ld.w $a0, $a0, 0 - ; CHECK-NEXT: st.w $a0, $a1, 0 -+; CHECK-NEXT: addi.d $sp, $fp, -64 -+; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload -+; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload -+; CHECK-NEXT: addi.d $sp, $sp, 64 - ; CHECK-NEXT: ret - %v = load volatile <8 x i32>, ptr %src - %e = extractelement <8 x i32> %v, i32 %idx -@@ -131,11 +162,22 @@ define void @extract_8xi32_idx(ptr %src, ptr %dst, i32 %idx) nounwind { - define void @extract_4xi64_idx(ptr %src, ptr %dst, i32 %idx) nounwind { - ; CHECK-LABEL: extract_4xi64_idx: - ; CHECK: # %bb.0: --; CHECK-NEXT: bstrpick.d $a2, $a2, 31, 0 -+; CHECK-NEXT: addi.d $sp, $sp, -64 -+; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill -+; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill -+; CHECK-NEXT: addi.d $fp, $sp, 64 -+; CHECK-NEXT: srli.d $a3, $sp, 5 -+; CHECK-NEXT: slli.d $sp, $a3, 5 - ; CHECK-NEXT: xvld $xr0, $a0, 0 --; CHECK-NEXT: xvreplve.d $xr0, $xr0, $a2 --; CHECK-NEXT: movfr2gr.d $a0, $fa0 -+; CHECK-NEXT: xvst $xr0, $sp, 0 -+; CHECK-NEXT: addi.d $a0, $sp, 0 -+; CHECK-NEXT: bstrins.d $a0, $a2, 4, 3 -+; CHECK-NEXT: ld.d $a0, $a0, 0 - ; CHECK-NEXT: st.d $a0, $a1, 0 -+; CHECK-NEXT: addi.d $sp, $fp, -64 -+; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload -+; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload -+; CHECK-NEXT: addi.d $sp, $sp, 64 - ; CHECK-NEXT: ret - %v = load volatile <4 x i64>, ptr %src - %e = extractelement <4 x i64> %v, i32 %idx -@@ -146,10 +188,22 @@ define void @extract_4xi64_idx(ptr %src, ptr %dst, i32 %idx) nounwind { - define void @extract_8xfloat_idx(ptr %src, ptr %dst, i32 %idx) nounwind { - ; CHECK-LABEL: extract_8xfloat_idx: - ; CHECK: # %bb.0: --; CHECK-NEXT: bstrpick.d $a2, $a2, 31, 0 -+; CHECK-NEXT: addi.d $sp, $sp, -64 -+; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill -+; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill -+; CHECK-NEXT: addi.d $fp, $sp, 64 -+; CHECK-NEXT: srli.d $a3, $sp, 5 -+; CHECK-NEXT: slli.d $sp, $a3, 5 - ; CHECK-NEXT: xvld $xr0, $a0, 0 --; CHECK-NEXT: xvreplve.w $xr0, $xr0, $a2 -+; CHECK-NEXT: xvst $xr0, $sp, 0 -+; CHECK-NEXT: addi.d $a0, $sp, 0 -+; CHECK-NEXT: bstrins.d $a0, $a2, 4, 2 -+; CHECK-NEXT: fld.s $fa0, $a0, 0 - ; CHECK-NEXT: fst.s $fa0, $a1, 0 -+; CHECK-NEXT: addi.d $sp, $fp, -64 -+; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload -+; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload -+; CHECK-NEXT: addi.d $sp, $sp, 64 - ; CHECK-NEXT: ret - %v = load volatile <8 x float>, ptr %src - %e = extractelement <8 x float> %v, i32 %idx -@@ -160,10 +214,22 @@ define void @extract_8xfloat_idx(ptr %src, ptr %dst, i32 %idx) nounwind { - define void @extract_4xdouble_idx(ptr %src, ptr %dst, i32 %idx) nounwind { - ; CHECK-LABEL: extract_4xdouble_idx: - ; CHECK: # %bb.0: --; CHECK-NEXT: bstrpick.d $a2, $a2, 31, 0 -+; CHECK-NEXT: addi.d $sp, $sp, -64 -+; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill -+; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill -+; CHECK-NEXT: addi.d $fp, $sp, 64 -+; CHECK-NEXT: srli.d $a3, $sp, 5 -+; CHECK-NEXT: slli.d $sp, $a3, 5 - ; CHECK-NEXT: xvld $xr0, $a0, 0 --; CHECK-NEXT: xvreplve.d $xr0, $xr0, $a2 -+; CHECK-NEXT: xvst $xr0, $sp, 0 -+; CHECK-NEXT: addi.d $a0, $sp, 0 -+; CHECK-NEXT: bstrins.d $a0, $a2, 4, 3 -+; CHECK-NEXT: fld.d $fa0, $a0, 0 - ; CHECK-NEXT: fst.d $fa0, $a1, 0 -+; CHECK-NEXT: addi.d $sp, $fp, -64 -+; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload -+; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload -+; CHECK-NEXT: addi.d $sp, $sp, 64 - ; CHECK-NEXT: ret - %v = load volatile <4 x double>, ptr %src - %e = extractelement <4 x double> %v, i32 %idx --- -2.20.1 - - -From 5953c8d6a82ac2ad2438de5dd46525a9f7a084fb Mon Sep 17 00:00:00 2001 -From: wanglei -Date: Wed, 27 Dec 2023 16:31:49 +0800 -Subject: [PATCH 28/35] [LoongArch] Fix incorrect pattern XVREPL128VEI_{W/D} - instructions - -Remove the incorrect patterns for `XVREPL128VEI_{W/D}` instructions, -and add correct patterns for XVREPLVE0_{W/D} instructions - -(cherry picked from commit c7367f985e0d27aeb8bc993406d1b9f4ca307399) ---- - llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td | 6 +++--- - llvm/test/CodeGen/LoongArch/lasx/build-vector.ll | 4 ++-- - 2 files changed, 5 insertions(+), 5 deletions(-) - -diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -index 9b7a34688811..059689cef840 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -@@ -1571,11 +1571,11 @@ def : Pat<(loongarch_vreplve v8i32:$xj, GRLenVT:$rk), - def : Pat<(loongarch_vreplve v4i64:$xj, GRLenVT:$rk), - (XVREPLVE_D v4i64:$xj, GRLenVT:$rk)>; - --// XVREPL128VEI_{W/D} -+// XVREPLVE0_{W/D} - def : Pat<(lasxsplatf32 FPR32:$fj), -- (XVREPL128VEI_W (SUBREG_TO_REG (i64 0), FPR32:$fj, sub_32), 0)>; -+ (XVREPLVE0_W (SUBREG_TO_REG (i64 0), FPR32:$fj, sub_32))>; - def : Pat<(lasxsplatf64 FPR64:$fj), -- (XVREPL128VEI_D (SUBREG_TO_REG (i64 0), FPR64:$fj, sub_64), 0)>; -+ (XVREPLVE0_D (SUBREG_TO_REG (i64 0), FPR64:$fj, sub_64))>; - - // Loads/Stores - foreach vt = [v32i8, v16i16, v8i32, v4i64, v8f32, v4f64] in { -diff --git a/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll b/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll -index 6824ab5cda8d..ae6f31aaec64 100644 ---- a/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll -+++ b/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll -@@ -57,7 +57,7 @@ define void @buildvector_v8f32_splat(ptr %dst, float %a0) nounwind { - ; CHECK-LABEL: buildvector_v8f32_splat: - ; CHECK: # %bb.0: # %entry - ; CHECK-NEXT: # kill: def $f0 killed $f0 def $xr0 --; CHECK-NEXT: xvrepl128vei.w $xr0, $xr0, 0 -+; CHECK-NEXT: xvreplve0.w $xr0, $xr0 - ; CHECK-NEXT: xvst $xr0, $a0, 0 - ; CHECK-NEXT: ret - entry: -@@ -71,7 +71,7 @@ define void @buildvector_v4f64_splat(ptr %dst, double %a0) nounwind { - ; CHECK-LABEL: buildvector_v4f64_splat: - ; CHECK: # %bb.0: # %entry - ; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0 --; CHECK-NEXT: xvrepl128vei.d $xr0, $xr0, 0 -+; CHECK-NEXT: xvreplve0.d $xr0, $xr0 - ; CHECK-NEXT: xvst $xr0, $a0, 0 - ; CHECK-NEXT: ret - entry: --- -2.20.1 - - -From 7e21c962da87491bb438ea3906826875f53f2931 Mon Sep 17 00:00:00 2001 -From: wanglei -Date: Thu, 28 Dec 2023 20:56:32 +0800 -Subject: [PATCH 29/35] [LoongArch] Fix incorrect pattern [X]VBITSELI_B - instructions - -Adjusted the operand order of [X]VBITSELI_B to correctly match vselect. - -(cherry picked from commit da5378e87e11689d05a58198d6e15e9551916794) ---- - llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td | 4 ++-- - llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td | 4 ++-- - llvm/test/CodeGen/LoongArch/lasx/vselect.ll | 6 +++--- - llvm/test/CodeGen/LoongArch/lsx/vselect.ll | 6 +++--- - 4 files changed, 10 insertions(+), 10 deletions(-) - -diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -index 059689cef840..b3c11bc5423d 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -@@ -1600,8 +1600,8 @@ def : Pat<(f64 (vector_extract v4f64:$xj, uimm2:$imm)), - (MOVGR2FR_D (XVPICKVE2GR_D v4f64:$xj, uimm2:$imm))>; - - // vselect --def : Pat<(v32i8 (vselect LASX256:$xj, LASX256:$xd, -- (v32i8 (SplatPat_uimm8 uimm8:$imm)))), -+def : Pat<(v32i8 (vselect LASX256:$xd, (v32i8 (SplatPat_uimm8 uimm8:$imm)), -+ LASX256:$xj)), - (XVBITSELI_B LASX256:$xd, LASX256:$xj, uimm8:$imm)>; - foreach vt = [v32i8, v16i16, v8i32, v4i64, v8f32, v4f64] in - def : Pat<(vt (vselect LASX256:$xa, LASX256:$xk, LASX256:$xj)), -diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -index e468176885d7..5569c2cd15b5 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -+++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -@@ -1731,8 +1731,8 @@ def : Pat<(f64 (vector_extract v2f64:$vj, i64:$rk)), - (f64 (EXTRACT_SUBREG (VREPLVE_D v2f64:$vj, i64:$rk), sub_64))>; - - // vselect --def : Pat<(v16i8 (vselect LSX128:$vj, LSX128:$vd, -- (v16i8 (SplatPat_uimm8 uimm8:$imm)))), -+def : Pat<(v16i8 (vselect LSX128:$vd, (v16i8 (SplatPat_uimm8 uimm8:$imm)), -+ LSX128:$vj)), - (VBITSELI_B LSX128:$vd, LSX128:$vj, uimm8:$imm)>; - foreach vt = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in - def : Pat<(vt (vselect LSX128:$va, LSX128:$vk, LSX128:$vj)), -diff --git a/llvm/test/CodeGen/LoongArch/lasx/vselect.ll b/llvm/test/CodeGen/LoongArch/lasx/vselect.ll -index 24f4bcf752d3..ec2fc28db33c 100644 ---- a/llvm/test/CodeGen/LoongArch/lasx/vselect.ll -+++ b/llvm/test/CodeGen/LoongArch/lasx/vselect.ll -@@ -6,11 +6,11 @@ define void @select_v32i8_imm(ptr %res, ptr %a0) nounwind { - ; CHECK: # %bb.0: - ; CHECK-NEXT: xvld $xr0, $a1, 0 - ; CHECK-NEXT: xvrepli.h $xr1, -256 --; CHECK-NEXT: xvbitseli.b $xr0, $xr1, 1 --; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: xvbitseli.b $xr1, $xr0, 1 -+; CHECK-NEXT: xvst $xr1, $a0, 0 - ; CHECK-NEXT: ret - %v0 = load <32 x i8>, ptr %a0 -- %sel = select <32 x i1> , <32 x i8> %v0, <32 x i8> -+ %sel = select <32 x i1> , <32 x i8> , <32 x i8> %v0 - store <32 x i8> %sel, ptr %res - ret void - } -diff --git a/llvm/test/CodeGen/LoongArch/lsx/vselect.ll b/llvm/test/CodeGen/LoongArch/lsx/vselect.ll -index 00e3d9313f13..746152f0f026 100644 ---- a/llvm/test/CodeGen/LoongArch/lsx/vselect.ll -+++ b/llvm/test/CodeGen/LoongArch/lsx/vselect.ll -@@ -6,11 +6,11 @@ define void @select_v16i8_imm(ptr %res, ptr %a0) nounwind { - ; CHECK: # %bb.0: - ; CHECK-NEXT: vld $vr0, $a1, 0 - ; CHECK-NEXT: vrepli.h $vr1, -256 --; CHECK-NEXT: vbitseli.b $vr0, $vr1, 255 --; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: vbitseli.b $vr1, $vr0, 255 -+; CHECK-NEXT: vst $vr1, $a0, 0 - ; CHECK-NEXT: ret - %v0 = load <16 x i8>, ptr %a0 -- %sel = select <16 x i1> , <16 x i8> %v0, <16 x i8> -+ %sel = select <16 x i1> , <16 x i8> , <16 x i8> %v0 - store <16 x i8> %sel, ptr %res - ret void - } --- -2.20.1 - - -From 9aab6c004b73d1069444b17a9768310f288b3130 Mon Sep 17 00:00:00 2001 -From: wanglei -Date: Tue, 23 Jan 2024 09:06:35 +0800 -Subject: [PATCH 30/35] [LoongArch] Permit auto-vectorization using LSX/LASX - with `auto-vec` feature (#78943) - -With enough codegen complete, we can now correctly report the size of -vector registers for LSX/LASX, allowing auto vectorization (The -`auto-vec` feature needs to be enabled simultaneously). - -As described, the `auto-vec` feature is an experimental one. To ensure -that automatic vectorization is not enabled by default, because the -information provided by the current `TTI` cannot yield additional -benefits for automatic vectorization. - -(cherry picked from commit fcff4582f01db2f5a99e3acf452aec9f2d8a126a) ---- - llvm/lib/Target/LoongArch/LoongArch.td | 4 ++ - .../lib/Target/LoongArch/LoongArchSubtarget.h | 2 + - .../LoongArchTargetTransformInfo.cpp | 18 +++++ - .../LoongArch/LoongArchTargetTransformInfo.h | 2 + - .../LoopVectorize/LoongArch/defaults.ll | 66 +++++++++++++++++++ - .../LoopVectorize/LoongArch/lit.local.cfg | 4 ++ - 6 files changed, 96 insertions(+) - create mode 100644 llvm/test/Transforms/LoopVectorize/LoongArch/defaults.ll - create mode 100644 llvm/test/Transforms/LoopVectorize/LoongArch/lit.local.cfg - -diff --git a/llvm/lib/Target/LoongArch/LoongArch.td b/llvm/lib/Target/LoongArch/LoongArch.td -index 75b65fe69f26..2a4c991a43b0 100644 ---- a/llvm/lib/Target/LoongArch/LoongArch.td -+++ b/llvm/lib/Target/LoongArch/LoongArch.td -@@ -105,6 +105,10 @@ def FeatureUAL - def FeatureRelax - : SubtargetFeature<"relax", "HasLinkerRelax", "true", - "Enable Linker relaxation">; -+// Experimental auto vectorization -+def FeatureAutoVec -+ : SubtargetFeature<"auto-vec", "HasExpAutoVec", "true", -+ "Experimental auto vectorization">; - - //===----------------------------------------------------------------------===// - // Registers, instruction descriptions ... -diff --git a/llvm/lib/Target/LoongArch/LoongArchSubtarget.h b/llvm/lib/Target/LoongArch/LoongArchSubtarget.h -index 5c173675cca4..174e4cba8326 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchSubtarget.h -+++ b/llvm/lib/Target/LoongArch/LoongArchSubtarget.h -@@ -44,6 +44,7 @@ class LoongArchSubtarget : public LoongArchGenSubtargetInfo { - bool HasLaLocalWithAbs = false; - bool HasUAL = false; - bool HasLinkerRelax = false; -+ bool HasExpAutoVec = false; - unsigned GRLen = 32; - MVT GRLenVT = MVT::i32; - LoongArchABI::ABI TargetABI = LoongArchABI::ABI_Unknown; -@@ -102,6 +103,7 @@ public: - bool hasLaLocalWithAbs() const { return HasLaLocalWithAbs; } - bool hasUAL() const { return HasUAL; } - bool hasLinkerRelax() const { return HasLinkerRelax; } -+ bool hasExpAutoVec() const { return HasExpAutoVec; } - MVT getGRLenVT() const { return GRLenVT; } - unsigned getGRLen() const { return GRLen; } - LoongArchABI::ABI getTargetABI() const { return TargetABI; } -diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp -index a6de86eea116..04349aa52b54 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp -+++ b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp -@@ -19,4 +19,22 @@ using namespace llvm; - - #define DEBUG_TYPE "loongarchtti" - -+TypeSize LoongArchTTIImpl::getRegisterBitWidth( -+ TargetTransformInfo::RegisterKind K) const { -+ switch (K) { -+ case TargetTransformInfo::RGK_Scalar: -+ return TypeSize::getFixed(ST->is64Bit() ? 64 : 32); -+ case TargetTransformInfo::RGK_FixedWidthVector: -+ if (ST->hasExtLASX() && ST->hasExpAutoVec()) -+ return TypeSize::getFixed(256); -+ if (ST->hasExtLSX() && ST->hasExpAutoVec()) -+ return TypeSize::getFixed(128); -+ return TypeSize::getFixed(0); -+ case TargetTransformInfo::RGK_ScalableVector: -+ return TypeSize::getScalable(0); -+ } -+ -+ llvm_unreachable("Unsupported register kind"); -+} -+ - // TODO: Implement more hooks to provide TTI machinery for LoongArch. -diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h -index 9e02f793ba8a..d296c9ed576f 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h -+++ b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h -@@ -39,6 +39,8 @@ public: - : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)), - TLI(ST->getTargetLowering()) {} - -+ TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const; -+ - // TODO: Implement more hooks to provide TTI machinery for LoongArch. - }; - -diff --git a/llvm/test/Transforms/LoopVectorize/LoongArch/defaults.ll b/llvm/test/Transforms/LoopVectorize/LoongArch/defaults.ll -new file mode 100644 -index 000000000000..a8ac2411dd82 ---- /dev/null -+++ b/llvm/test/Transforms/LoopVectorize/LoongArch/defaults.ll -@@ -0,0 +1,66 @@ -+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 -+; RUN: opt < %s -passes=loop-vectorize -mtriple loongarch64-linux-gnu -mattr=+lasx,+auto-vec -S | FileCheck %s -+ -+;; This is a collection of tests whose only purpose is to show changes in the -+;; default configuration. Please keep these tests minimal - if you're testing -+;; functionality of some specific configuration, please place that in a -+;; seperate test file with a hard coded configuration (even if that -+;; configuration is the current default). -+ -+target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128" -+target triple = "loongarch64" -+ -+define void @vector_add(ptr noalias nocapture %a, i64 %v) { -+; CHECK-LABEL: define void @vector_add -+; CHECK-SAME: (ptr noalias nocapture [[A:%.*]], i64 [[V:%.*]]) #[[ATTR0:[0-9]+]] { -+; CHECK-NEXT: entry: -+; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] -+; CHECK: vector.ph: -+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[V]], i64 0 -+; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer -+; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] -+; CHECK: vector.body: -+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -+; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]] -+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0 -+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8 -+; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i64> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] -+; CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[TMP2]], align 8 -+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -+; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 -+; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] -+; CHECK: middle.block: -+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, 1024 -+; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] -+; CHECK: scalar.ph: -+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -+; CHECK-NEXT: br label [[FOR_BODY:%.*]] -+; CHECK: for.body: -+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] -+; CHECK-NEXT: [[ELEM:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 -+; CHECK-NEXT: [[ADD:%.*]] = add i64 [[ELEM]], [[V]] -+; CHECK-NEXT: store i64 [[ADD]], ptr [[ARRAYIDX]], align 8 -+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -+; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 -+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] -+; CHECK: for.end: -+; CHECK-NEXT: ret void -+; -+entry: -+ br label %for.body -+ -+for.body: -+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] -+ %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv -+ %elem = load i64, ptr %arrayidx -+ %add = add i64 %elem, %v -+ store i64 %add, ptr %arrayidx -+ %iv.next = add nuw nsw i64 %iv, 1 -+ %exitcond.not = icmp eq i64 %iv.next, 1024 -+ br i1 %exitcond.not, label %for.end, label %for.body -+ -+for.end: -+ ret void -+} -diff --git a/llvm/test/Transforms/LoopVectorize/LoongArch/lit.local.cfg b/llvm/test/Transforms/LoopVectorize/LoongArch/lit.local.cfg -new file mode 100644 -index 000000000000..9570af17fe5f ---- /dev/null -+++ b/llvm/test/Transforms/LoopVectorize/LoongArch/lit.local.cfg -@@ -0,0 +1,4 @@ -+config.suffixes = [".ll"] -+ -+if not "LoongArch" in config.root.targets: -+ config.unsupported = True --- -2.20.1 - - -From 61b8589c3c71026af28b6a71e0b5d4c41b7c78d1 Mon Sep 17 00:00:00 2001 -From: yjijd -Date: Tue, 23 Jan 2024 15:16:23 +0800 -Subject: [PATCH 31/35] [CodeGen][LoongArch] Set SINT_TO_FP/UINT_TO_FP to legal - for vector types (#78924) - -Support the following conversions: -v4i32->v4f32, v2i64->v2f64(LSX) -v8i32->v8f32, v4i64->v4f64(LASX) -v4i32->v4f64, v4i64->v4f32(LASX) - -(cherry picked from commit f799f936929c232a16abc7c520a10fecadbf05f9) ---- - .../LoongArch/LoongArchISelLowering.cpp | 4 ++ - .../LoongArch/LoongArchLASXInstrInfo.td | 22 +++++++ - .../Target/LoongArch/LoongArchLSXInstrInfo.td | 8 +++ - .../LoongArch/lasx/ir-instruction/sitofp.ll | 57 +++++++++++++++++++ - .../LoongArch/lasx/ir-instruction/uitofp.ll | 57 +++++++++++++++++++ - .../LoongArch/lsx/ir-instruction/sitofp.ll | 28 +++++++++ - .../LoongArch/lsx/ir-instruction/uitofp.ll | 28 +++++++++ - 7 files changed, 204 insertions(+) - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sitofp.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/uitofp.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sitofp.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ir-instruction/uitofp.ll - -diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -index cf881ce720a6..7a360b42e15d 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -@@ -256,6 +256,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, - {ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT, - Expand); - } -+ setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP}, -+ {MVT::v4i32, MVT::v2i64}, Legal); - for (MVT VT : {MVT::v4f32, MVT::v2f64}) { - setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal); - setOperationAction({ISD::FMUL, ISD::FDIV}, VT, Legal); -@@ -298,6 +300,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, - {ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT, - Expand); - } -+ setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP}, -+ {MVT::v8i32, MVT::v4i32, MVT::v4i64}, Legal); - for (MVT VT : {MVT::v8f32, MVT::v4f64}) { - setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal); - setOperationAction({ISD::FMUL, ISD::FDIV}, VT, Legal); -diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -index b3c11bc5423d..b3e74b480922 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -@@ -1611,6 +1611,28 @@ foreach vt = [v32i8, v16i16, v8i32, v4i64, v8f32, v4f64] in - def : Pat<(fneg (v8f32 LASX256:$xj)), (XVBITREVI_W LASX256:$xj, 31)>; - def : Pat<(fneg (v4f64 LASX256:$xj)), (XVBITREVI_D LASX256:$xj, 63)>; - -+// XVFFINT_{S_W/D_L} -+def : Pat<(v8f32 (sint_to_fp v8i32:$vj)), (XVFFINT_S_W v8i32:$vj)>; -+def : Pat<(v4f64 (sint_to_fp v4i64:$vj)), (XVFFINT_D_L v4i64:$vj)>; -+def : Pat<(v4f64 (sint_to_fp v4i32:$vj)), -+ (XVFFINT_D_L (VEXT2XV_D_W (SUBREG_TO_REG (i64 0), v4i32:$vj, -+ sub_128)))>; -+def : Pat<(v4f32 (sint_to_fp v4i64:$vj)), -+ (EXTRACT_SUBREG (XVFCVT_S_D (XVPERMI_D (XVFFINT_D_L v4i64:$vj), 238), -+ (XVFFINT_D_L v4i64:$vj)), -+ sub_128)>; -+ -+// XVFFINT_{S_WU/D_LU} -+def : Pat<(v8f32 (uint_to_fp v8i32:$vj)), (XVFFINT_S_WU v8i32:$vj)>; -+def : Pat<(v4f64 (uint_to_fp v4i64:$vj)), (XVFFINT_D_LU v4i64:$vj)>; -+def : Pat<(v4f64 (uint_to_fp v4i32:$vj)), -+ (XVFFINT_D_LU (VEXT2XV_DU_WU (SUBREG_TO_REG (i64 0), v4i32:$vj, -+ sub_128)))>; -+def : Pat<(v4f32 (uint_to_fp v4i64:$vj)), -+ (EXTRACT_SUBREG (XVFCVT_S_D (XVPERMI_D (XVFFINT_D_LU v4i64:$vj), 238), -+ (XVFFINT_D_LU v4i64:$vj)), -+ sub_128)>; -+ - } // Predicates = [HasExtLASX] - - /// Intrinsic pattern -diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -index 5569c2cd15b5..63eac4d1aeb7 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -+++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -@@ -1742,6 +1742,14 @@ foreach vt = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in - def : Pat<(fneg (v4f32 LSX128:$vj)), (VBITREVI_W LSX128:$vj, 31)>; - def : Pat<(fneg (v2f64 LSX128:$vj)), (VBITREVI_D LSX128:$vj, 63)>; - -+// VFFINT_{S_W/D_L} -+def : Pat<(v4f32 (sint_to_fp v4i32:$vj)), (VFFINT_S_W v4i32:$vj)>; -+def : Pat<(v2f64 (sint_to_fp v2i64:$vj)), (VFFINT_D_L v2i64:$vj)>; -+ -+// VFFINT_{S_WU/D_LU} -+def : Pat<(v4f32 (uint_to_fp v4i32:$vj)), (VFFINT_S_WU v4i32:$vj)>; -+def : Pat<(v2f64 (uint_to_fp v2i64:$vj)), (VFFINT_D_LU v2i64:$vj)>; -+ - } // Predicates = [HasExtLSX] - - /// Intrinsic pattern -diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sitofp.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sitofp.ll -new file mode 100644 -index 000000000000..208a758ea4e9 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sitofp.ll -@@ -0,0 +1,57 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+define void @sitofp_v8i32_v8f32(ptr %res, ptr %in){ -+; CHECK-LABEL: sitofp_v8i32_v8f32: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvffint.s.w $xr0, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x i32>, ptr %in -+ %v1 = sitofp <8 x i32> %v0 to <8 x float> -+ store <8 x float> %v1, ptr %res -+ ret void -+} -+ -+define void @sitofp_v4f64_v4f64(ptr %res, ptr %in){ -+; CHECK-LABEL: sitofp_v4f64_v4f64: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvffint.d.l $xr0, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x i64>, ptr %in -+ %v1 = sitofp <4 x i64> %v0 to <4 x double> -+ store <4 x double> %v1, ptr %res -+ ret void -+} -+ -+define void @sitofp_v4i64_v4f32(ptr %res, ptr %in){ -+; CHECK-LABEL: sitofp_v4i64_v4f32: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvffint.d.l $xr0, $xr0 -+; CHECK-NEXT: xvpermi.d $xr1, $xr0, 238 -+; CHECK-NEXT: xvfcvt.s.d $xr0, $xr1, $xr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x i64>, ptr %in -+ %v1 = sitofp <4 x i64> %v0 to <4 x float> -+ store <4 x float> %v1, ptr %res -+ ret void -+} -+ -+define void @sitofp_v4i32_v4f64(ptr %res, ptr %in){ -+; CHECK-LABEL: sitofp_v4i32_v4f64: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vext2xv.d.w $xr0, $xr0 -+; CHECK-NEXT: xvffint.d.l $xr0, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x i32>, ptr %in -+ %v1 = sitofp <4 x i32> %v0 to <4 x double> -+ store <4 x double> %v1, ptr %res -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/uitofp.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/uitofp.ll -new file mode 100644 -index 000000000000..70cf71c4cec2 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/uitofp.ll -@@ -0,0 +1,57 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+define void @uitofp_v8i32_v8f32(ptr %res, ptr %in){ -+; CHECK-LABEL: uitofp_v8i32_v8f32: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvffint.s.wu $xr0, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x i32>, ptr %in -+ %v1 = uitofp <8 x i32> %v0 to <8 x float> -+ store <8 x float> %v1, ptr %res -+ ret void -+} -+ -+define void @uitofp_v4f64_v4f64(ptr %res, ptr %in){ -+; CHECK-LABEL: uitofp_v4f64_v4f64: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvffint.d.lu $xr0, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x i64>, ptr %in -+ %v1 = uitofp <4 x i64> %v0 to <4 x double> -+ store <4 x double> %v1, ptr %res -+ ret void -+} -+ -+define void @uitofp_v4i64_v4f32(ptr %res, ptr %in){ -+; CHECK-LABEL: uitofp_v4i64_v4f32: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvffint.d.lu $xr0, $xr0 -+; CHECK-NEXT: xvpermi.d $xr1, $xr0, 238 -+; CHECK-NEXT: xvfcvt.s.d $xr0, $xr1, $xr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x i64>, ptr %in -+ %v1 = uitofp <4 x i64> %v0 to <4 x float> -+ store <4 x float> %v1, ptr %res -+ ret void -+} -+ -+define void @uitofp_v4i32_v4f64(ptr %res, ptr %in){ -+; CHECK-LABEL: uitofp_v4i32_v4f64: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vext2xv.du.wu $xr0, $xr0 -+; CHECK-NEXT: xvffint.d.lu $xr0, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x i32>, ptr %in -+ %v1 = uitofp <4 x i32> %v0 to <4 x double> -+ store <4 x double> %v1, ptr %res -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sitofp.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sitofp.ll -new file mode 100644 -index 000000000000..1e820a37a240 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sitofp.ll -@@ -0,0 +1,28 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+define void @sitofp_v4i32_v4f32(ptr %res, ptr %in){ -+; CHECK-LABEL: sitofp_v4i32_v4f32: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vffint.s.w $vr0, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x i32>, ptr %in -+ %v1 = sitofp <4 x i32> %v0 to <4 x float> -+ store <4 x float> %v1, ptr %res -+ ret void -+} -+ -+define void @sitofp_v2i64_v2f64(ptr %res, ptr %in){ -+; CHECK-LABEL: sitofp_v2i64_v2f64: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vffint.d.l $vr0, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <2 x i64>, ptr %in -+ %v1 = sitofp <2 x i64> %v0 to <2 x double> -+ store <2 x double> %v1, ptr %res -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/uitofp.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/uitofp.ll -new file mode 100644 -index 000000000000..3d4913f12e57 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/uitofp.ll -@@ -0,0 +1,28 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+define void @uitofp_v4i32_v4f32(ptr %res, ptr %in){ -+; CHECK-LABEL: uitofp_v4i32_v4f32: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vffint.s.wu $vr0, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x i32>, ptr %in -+ %v1 = uitofp <4 x i32> %v0 to <4 x float> -+ store <4 x float> %v1, ptr %res -+ ret void -+} -+ -+define void @uitofp_v2i64_v2f64(ptr %res, ptr %in){ -+; CHECK-LABEL: uitofp_v2i64_v2f64: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vffint.d.lu $vr0, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <2 x i64>, ptr %in -+ %v1 = uitofp <2 x i64> %v0 to <2 x double> -+ store <2 x double> %v1, ptr %res -+ ret void -+} --- -2.20.1 - - -From 0bf1418c5f46ca74dfc8903757b3bb14e0760633 Mon Sep 17 00:00:00 2001 -From: yjijd -Date: Tue, 23 Jan 2024 15:57:06 +0800 -Subject: [PATCH 32/35] [CodeGen][LoongArch] Set FP_TO_SINT/FP_TO_UINT to legal - for vector types (#79107) - -Support the following conversions: -v4f32->v4i32, v2f64->v2i64(LSX) -v8f32->v8i32, v4f64->v4i64(LASX) -v4f32->v4i64, v4f64->v4i32(LASX) - -(cherry picked from commit 44ba6ebc999d6e9b27bedfe04a993adfd204dc6a) ---- - .../LoongArch/LoongArchISelLowering.cpp | 12 ++-- - .../LoongArch/LoongArchLASXInstrInfo.td | 22 +++++++ - .../Target/LoongArch/LoongArchLSXInstrInfo.td | 8 +++ - .../LoongArch/lasx/ir-instruction/fptosi.ll | 57 +++++++++++++++++++ - .../LoongArch/lasx/ir-instruction/fptoui.ll | 57 +++++++++++++++++++ - .../LoongArch/lsx/ir-instruction/fptosi.ll | 28 +++++++++ - .../LoongArch/lsx/ir-instruction/fptoui.ll | 28 +++++++++ - 7 files changed, 208 insertions(+), 4 deletions(-) - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fptosi.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fptoui.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fptosi.ll - create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fptoui.ll - -diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -index 7a360b42e15d..f7eacd56c542 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -@@ -256,8 +256,10 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, - {ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT, - Expand); - } -- setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP}, -- {MVT::v4i32, MVT::v2i64}, Legal); -+ for (MVT VT : {MVT::v4i32, MVT::v2i64}) { -+ setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP}, VT, Legal); -+ setOperationAction({ISD::FP_TO_SINT, ISD::FP_TO_UINT}, VT, Legal); -+ } - for (MVT VT : {MVT::v4f32, MVT::v2f64}) { - setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal); - setOperationAction({ISD::FMUL, ISD::FDIV}, VT, Legal); -@@ -300,8 +302,10 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, - {ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT, - Expand); - } -- setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP}, -- {MVT::v8i32, MVT::v4i32, MVT::v4i64}, Legal); -+ for (MVT VT : {MVT::v8i32, MVT::v4i32, MVT::v4i64}) { -+ setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP}, VT, Legal); -+ setOperationAction({ISD::FP_TO_SINT, ISD::FP_TO_UINT}, VT, Legal); -+ } - for (MVT VT : {MVT::v8f32, MVT::v4f64}) { - setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal); - setOperationAction({ISD::FMUL, ISD::FDIV}, VT, Legal); -diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -index b3e74b480922..492b62da6ce7 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td -@@ -1633,6 +1633,28 @@ def : Pat<(v4f32 (uint_to_fp v4i64:$vj)), - (XVFFINT_D_LU v4i64:$vj)), - sub_128)>; - -+// XVFTINTRZ_{W_S/L_D} -+def : Pat<(v8i32 (fp_to_sint v8f32:$vj)), (XVFTINTRZ_W_S v8f32:$vj)>; -+def : Pat<(v4i64 (fp_to_sint v4f64:$vj)), (XVFTINTRZ_L_D v4f64:$vj)>; -+def : Pat<(v4i64 (fp_to_sint v4f32:$vj)), -+ (VEXT2XV_D_W (SUBREG_TO_REG (i64 0), (VFTINTRZ_W_S v4f32:$vj), -+ sub_128))>; -+def : Pat<(v4i32 (fp_to_sint (v4f64 LASX256:$vj))), -+ (EXTRACT_SUBREG (XVFTINTRZ_W_S (XVFCVT_S_D (XVPERMI_D v4f64:$vj, 238), -+ v4f64:$vj)), -+ sub_128)>; -+ -+// XVFTINTRZ_{W_SU/L_DU} -+def : Pat<(v8i32 (fp_to_uint v8f32:$vj)), (XVFTINTRZ_WU_S v8f32:$vj)>; -+def : Pat<(v4i64 (fp_to_uint v4f64:$vj)), (XVFTINTRZ_LU_D v4f64:$vj)>; -+def : Pat<(v4i64 (fp_to_uint v4f32:$vj)), -+ (VEXT2XV_DU_WU (SUBREG_TO_REG (i64 0), (VFTINTRZ_WU_S v4f32:$vj), -+ sub_128))>; -+def : Pat<(v4i32 (fp_to_uint (v4f64 LASX256:$vj))), -+ (EXTRACT_SUBREG (XVFTINTRZ_W_S (XVFCVT_S_D (XVPERMI_D v4f64:$vj, 238), -+ v4f64:$vj)), -+ sub_128)>; -+ - } // Predicates = [HasExtLASX] - - /// Intrinsic pattern -diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -index 63eac4d1aeb7..99ac2f3c162f 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -+++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td -@@ -1750,6 +1750,14 @@ def : Pat<(v2f64 (sint_to_fp v2i64:$vj)), (VFFINT_D_L v2i64:$vj)>; - def : Pat<(v4f32 (uint_to_fp v4i32:$vj)), (VFFINT_S_WU v4i32:$vj)>; - def : Pat<(v2f64 (uint_to_fp v2i64:$vj)), (VFFINT_D_LU v2i64:$vj)>; - -+// VFTINTRZ_{W_S/L_D} -+def : Pat<(v4i32 (fp_to_sint v4f32:$vj)), (VFTINTRZ_W_S v4f32:$vj)>; -+def : Pat<(v2i64 (fp_to_sint v2f64:$vj)), (VFTINTRZ_L_D v2f64:$vj)>; -+ -+// VFTINTRZ_{W_SU/L_DU} -+def : Pat<(v4i32 (fp_to_uint v4f32:$vj)), (VFTINTRZ_WU_S v4f32:$vj)>; -+def : Pat<(v2i64 (fp_to_uint v2f64:$vj)), (VFTINTRZ_LU_D v2f64:$vj)>; -+ - } // Predicates = [HasExtLSX] - - /// Intrinsic pattern -diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fptosi.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fptosi.ll -new file mode 100644 -index 000000000000..0d9f57b57ffa ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fptosi.ll -@@ -0,0 +1,57 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+define void @fptosi_v8f32_v8i32(ptr %res, ptr %in){ -+; CHECK-LABEL: fptosi_v8f32_v8i32: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvftintrz.w.s $xr0, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x float>, ptr %in -+ %v1 = fptosi <8 x float> %v0 to <8 x i32> -+ store <8 x i32> %v1, ptr %res -+ ret void -+} -+ -+define void @fptosi_v4f64_v4i64(ptr %res, ptr %in){ -+; CHECK-LABEL: fptosi_v4f64_v4i64: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvftintrz.l.d $xr0, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x double>, ptr %in -+ %v1 = fptosi <4 x double> %v0 to <4 x i64> -+ store <4 x i64> %v1, ptr %res -+ ret void -+} -+ -+define void @fptosi_v4f64_v4i32(ptr %res, ptr %in){ -+; CHECK-LABEL: fptosi_v4f64_v4i32: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvpermi.d $xr1, $xr0, 238 -+; CHECK-NEXT: xvfcvt.s.d $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvftintrz.w.s $xr0, $xr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x double>, ptr %in -+ %v1 = fptosi <4 x double> %v0 to <4 x i32> -+ store <4 x i32> %v1, ptr %res -+ ret void -+} -+ -+define void @fptosi_v4f32_v4i64(ptr %res, ptr %in){ -+; CHECK-LABEL: fptosi_v4f32_v4i64: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vftintrz.w.s $vr0, $vr0 -+; CHECK-NEXT: vext2xv.d.w $xr0, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x float>, ptr %in -+ %v1 = fptosi <4 x float> %v0 to <4 x i64> -+ store <4 x i64> %v1, ptr %res -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fptoui.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fptoui.ll -new file mode 100644 -index 000000000000..27d70f33cd34 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fptoui.ll -@@ -0,0 +1,57 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 -+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s -+ -+define void @fptoui_v8f32_v8i32(ptr %res, ptr %in){ -+; CHECK-LABEL: fptoui_v8f32_v8i32: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvftintrz.wu.s $xr0, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <8 x float>, ptr %in -+ %v1 = fptoui <8 x float> %v0 to <8 x i32> -+ store <8 x i32> %v1, ptr %res -+ ret void -+} -+ -+define void @fptoui_v4f64_v4i64(ptr %res, ptr %in){ -+; CHECK-LABEL: fptoui_v4f64_v4i64: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvftintrz.lu.d $xr0, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x double>, ptr %in -+ %v1 = fptoui <4 x double> %v0 to <4 x i64> -+ store <4 x i64> %v1, ptr %res -+ ret void -+} -+ -+define void @fptoui_v4f64_v4i32(ptr %res, ptr %in){ -+; CHECK-LABEL: fptoui_v4f64_v4i32: -+; CHECK: # %bb.0: -+; CHECK-NEXT: xvld $xr0, $a1, 0 -+; CHECK-NEXT: xvpermi.d $xr1, $xr0, 238 -+; CHECK-NEXT: xvfcvt.s.d $xr0, $xr1, $xr0 -+; CHECK-NEXT: xvftintrz.w.s $xr0, $xr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x double>, ptr %in -+ %v1 = fptoui <4 x double> %v0 to <4 x i32> -+ store <4 x i32> %v1, ptr %res -+ ret void -+} -+ -+define void @fptoui_v4f32_v4i64(ptr %res, ptr %in){ -+; CHECK-LABEL: fptoui_v4f32_v4i64: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vftintrz.wu.s $vr0, $vr0 -+; CHECK-NEXT: vext2xv.du.wu $xr0, $xr0 -+; CHECK-NEXT: xvst $xr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x float>, ptr %in -+ %v1 = fptoui <4 x float> %v0 to <4 x i64> -+ store <4 x i64> %v1, ptr %res -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fptosi.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fptosi.ll -new file mode 100644 -index 000000000000..c3008fe96e47 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fptosi.ll -@@ -0,0 +1,28 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+define void @fptosi_v4f32_v4i32(ptr %res, ptr %in){ -+; CHECK-LABEL: fptosi_v4f32_v4i32: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vftintrz.w.s $vr0, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x float>, ptr %in -+ %v1 = fptosi <4 x float> %v0 to <4 x i32> -+ store <4 x i32> %v1, ptr %res -+ ret void -+} -+ -+define void @fptosi_v2f64_v2i64(ptr %res, ptr %in){ -+; CHECK-LABEL: fptosi_v2f64_v2i64: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vftintrz.l.d $vr0, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <2 x double>, ptr %in -+ %v1 = fptosi <2 x double> %v0 to <2 x i64> -+ store <2 x i64> %v1, ptr %res -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fptoui.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fptoui.ll -new file mode 100644 -index 000000000000..f0aeb0bd14e7 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fptoui.ll -@@ -0,0 +1,28 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 -+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -+ -+define void @fptoui_v4f32_v4i32(ptr %res, ptr %in){ -+; CHECK-LABEL: fptoui_v4f32_v4i32: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vftintrz.wu.s $vr0, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <4 x float>, ptr %in -+ %v1 = fptoui <4 x float> %v0 to <4 x i32> -+ store <4 x i32> %v1, ptr %res -+ ret void -+} -+ -+define void @fptoui_v2f64_v2i64(ptr %res, ptr %in){ -+; CHECK-LABEL: fptoui_v2f64_v2i64: -+; CHECK: # %bb.0: -+; CHECK-NEXT: vld $vr0, $a1, 0 -+; CHECK-NEXT: vftintrz.lu.d $vr0, $vr0 -+; CHECK-NEXT: vst $vr0, $a0, 0 -+; CHECK-NEXT: ret -+ %v0 = load <2 x double>, ptr %in -+ %v1 = fptoui <2 x double> %v0 to <2 x i64> -+ store <2 x i64> %v1, ptr %res -+ ret void -+} --- -2.20.1 - - -From 66224dcebf8b0cc0d32fa5c73fbb4bca0d885a7d Mon Sep 17 00:00:00 2001 -From: wanglei -Date: Fri, 26 Jan 2024 10:24:07 +0800 -Subject: [PATCH 33/35] [LoongArch] Fixing the incorrect return value of - LoongArchTTIImpl::getRegisterBitWidth (#79441) - -When we do not enable vector features, we should return the default -value (`TargetTransformInfoImplBase::getRegisterBitWidth`) instead of -zero. - -This should fix the LoongArch [buildbot -breakage](https://lab.llvm.org/staging/#/builders/5/builds/486) from - -(cherry picked from commit 1e9924c1f248bbddcb95d82a59708d617297dad3) -(cherry picked from commit 900e7cbfdee09c94d022e4dae923b3c7827f95e3) ---- - .../Target/LoongArch/LoongArchTargetTransformInfo.cpp | 11 +++++++---- - 1 file changed, 7 insertions(+), 4 deletions(-) - -diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp -index 04349aa52b54..d47dded9ea6e 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp -+++ b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp -@@ -21,17 +21,20 @@ using namespace llvm; - - TypeSize LoongArchTTIImpl::getRegisterBitWidth( - TargetTransformInfo::RegisterKind K) const { -+ TypeSize DefSize = TargetTransformInfoImplBase::getRegisterBitWidth(K); - switch (K) { - case TargetTransformInfo::RGK_Scalar: - return TypeSize::getFixed(ST->is64Bit() ? 64 : 32); - case TargetTransformInfo::RGK_FixedWidthVector: -- if (ST->hasExtLASX() && ST->hasExpAutoVec()) -+ if (!ST->hasExpAutoVec()) -+ return DefSize; -+ if (ST->hasExtLASX()) - return TypeSize::getFixed(256); -- if (ST->hasExtLSX() && ST->hasExpAutoVec()) -+ if (ST->hasExtLSX()) - return TypeSize::getFixed(128); -- return TypeSize::getFixed(0); -+ [[fallthrough]]; - case TargetTransformInfo::RGK_ScalableVector: -- return TypeSize::getScalable(0); -+ return DefSize; - } - - llvm_unreachable("Unsupported register kind"); --- -2.20.1 - - -From fe278490b48572e5f5581f35d6b4195f35693b8c Mon Sep 17 00:00:00 2001 -From: wanglei -Date: Tue, 9 Jan 2024 20:32:20 +0800 -Subject: [PATCH 34/35] [LoongArch] Pre-commit test for #76913. NFC - -This test will crash with expensive check. - -Crash message: -``` -*** Bad machine code: Using an undefined physical register *** -- function: main -- basic block: %bb.0 entry (0x20fee70) -- instruction: $r3 = frame-destroy ADDI_D $r22, -288 -- operand 1: $r22 -``` - -(cherry picked from commit f499472de3e1184b83fc6cd78bc244a55f2cac7d) ---- - .../LoongArch/can-not-realign-stack.ll | 39 +++++++++++++++++++ - 1 file changed, 39 insertions(+) - create mode 100644 llvm/test/CodeGen/LoongArch/can-not-realign-stack.ll - -diff --git a/llvm/test/CodeGen/LoongArch/can-not-realign-stack.ll b/llvm/test/CodeGen/LoongArch/can-not-realign-stack.ll -new file mode 100644 -index 000000000000..526821076498 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/can-not-realign-stack.ll -@@ -0,0 +1,39 @@ -+; REQUIRES: expensive_checks -+; RUN: llc --mtriple=loongarch64 --frame-pointer=none --mattr=+lasx < %s -+ -+; XFAIL: * -+ -+;; FIXME: This test will crash with expensive check. The subsequent patch will -+;; address and fix this issue. -+ -+%struct.S = type { [64 x i16] } -+ -+define dso_local noundef signext i32 @main() nounwind { -+entry: -+ %s = alloca %struct.S, align 2 -+ call void @llvm.lifetime.start.p0(i64 128, ptr nonnull %s) -+ store <16 x i16> , ptr %s, align 2 -+ %0 = getelementptr inbounds [64 x i16], ptr %s, i64 0, i64 16 -+ store <16 x i16> , ptr %0, align 2 -+ %1 = getelementptr inbounds [64 x i16], ptr %s, i64 0, i64 32 -+ store <16 x i16> , ptr %1, align 2 -+ %2 = getelementptr inbounds [64 x i16], ptr %s, i64 0, i64 48 -+ store <16 x i16> , ptr %2, align 2 -+ call void @foo(ptr noundef nonnull %s) -+ store <16 x i16> , ptr %s, align 2 -+ %3 = getelementptr inbounds [64 x i16], ptr %s, i64 0, i64 16 -+ store <16 x i16> , ptr %3, align 2 -+ %4 = getelementptr inbounds [64 x i16], ptr %s, i64 0, i64 32 -+ store <16 x i16> , ptr %4, align 2 -+ %5 = getelementptr inbounds [64 x i16], ptr %s, i64 0, i64 48 -+ store <16 x i16> , ptr %5, align 2 -+ call void @bar(ptr noundef nonnull %s) -+ call void @llvm.lifetime.end.p0(i64 128, ptr nonnull %s) -+ ret i32 0 -+} -+ -+declare void @foo(ptr nocapture noundef) -+declare void @bar(ptr nocapture noundef) -+ -+declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) -+declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) --- -2.20.1 - - -From e3e2d0c2cb7cfaffe2663f5f8607dad09fcdf3a5 Mon Sep 17 00:00:00 2001 -From: wanglei -Date: Tue, 9 Jan 2024 20:35:49 +0800 -Subject: [PATCH 35/35] [LoongArch] Implement - LoongArchRegisterInfo::canRealignStack() (#76913) - -This patch fixes the crash issue in the test: -CodeGen/LoongArch/can-not-realign-stack.ll - -Register allocator may spill virtual registers to the stack, which -introduces stack alignment requirements (when the size of spilled - registers exceeds the default alignment size of the stack). If a -function does not have stack alignment requirements before register -allocation, registers used for stack alignment will not be preserved. - -Therefore, we should implement `canRealignStack()` to inform the -register allocator whether it is allowed to perform stack realignment -operations. - -(cherry picked from commit 98c6aa72299caeff6b188e1ff2fc1b39c5b893b6) ---- - .../LoongArch/LoongArchRegisterInfo.cpp | 23 ++++++++ - .../Target/LoongArch/LoongArchRegisterInfo.h | 1 + - .../LoongArch/can-not-realign-stack.ll | 56 +++++++++++++++++-- - 3 files changed, 75 insertions(+), 5 deletions(-) - -diff --git a/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp -index 257b947a3ce4..092b5f1fb442 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp -+++ b/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp -@@ -15,6 +15,7 @@ - #include "LoongArch.h" - #include "LoongArchInstrInfo.h" - #include "LoongArchSubtarget.h" -+#include "MCTargetDesc/LoongArchBaseInfo.h" - #include "MCTargetDesc/LoongArchMCTargetDesc.h" - #include "llvm/CodeGen/MachineFrameInfo.h" - #include "llvm/CodeGen/MachineFunction.h" -@@ -194,3 +195,25 @@ bool LoongArchRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, - MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset.getFixed()); - return false; - } -+ -+bool LoongArchRegisterInfo::canRealignStack(const MachineFunction &MF) const { -+ if (!TargetRegisterInfo::canRealignStack(MF)) -+ return false; -+ -+ const MachineRegisterInfo *MRI = &MF.getRegInfo(); -+ const LoongArchFrameLowering *TFI = getFrameLowering(MF); -+ -+ // Stack realignment requires a frame pointer. If we already started -+ // register allocation with frame pointer elimination, it is too late now. -+ if (!MRI->canReserveReg(LoongArch::R22)) -+ return false; -+ -+ // We may also need a base pointer if there are dynamic allocas or stack -+ // pointer adjustments around calls. -+ if (TFI->hasReservedCallFrame(MF)) -+ return true; -+ -+ // A base pointer is required and allowed. Check that it isn't too late to -+ // reserve it. -+ return MRI->canReserveReg(LoongArchABI::getBPReg()); -+} -diff --git a/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.h b/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.h -index 7e8f26b14097..d1e40254c297 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.h -+++ b/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.h -@@ -51,6 +51,7 @@ struct LoongArchRegisterInfo : public LoongArchGenRegisterInfo { - bool requiresFrameIndexScavenging(const MachineFunction &MF) const override { - return true; - } -+ bool canRealignStack(const MachineFunction &MF) const override; - }; - } // end namespace llvm - -diff --git a/llvm/test/CodeGen/LoongArch/can-not-realign-stack.ll b/llvm/test/CodeGen/LoongArch/can-not-realign-stack.ll -index 526821076498..af24ae64b7c7 100644 ---- a/llvm/test/CodeGen/LoongArch/can-not-realign-stack.ll -+++ b/llvm/test/CodeGen/LoongArch/can-not-realign-stack.ll -@@ -1,14 +1,60 @@ --; REQUIRES: expensive_checks --; RUN: llc --mtriple=loongarch64 --frame-pointer=none --mattr=+lasx < %s -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 -+; RUN: llc --mtriple=loongarch64 --frame-pointer=none --mattr=+lasx < %s | FileCheck %s - --; XFAIL: * -+;; This test is checking that when a function allows stack realignment and -+;; realignment needs were not detected before register allocation (at this -+;; point, fp is not preserved), but realignment is required during register -+;; allocation, the stack should not undergo realignment. - --;; FIXME: This test will crash with expensive check. The subsequent patch will --;; address and fix this issue. -+;; Ensure that the `bstrins.d $sp, $zero, n, 0` instruction is not generated. -+;; n = log2(realign_size) - 1 - - %struct.S = type { [64 x i16] } - - define dso_local noundef signext i32 @main() nounwind { -+; CHECK-LABEL: main: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: addi.d $sp, $sp, -272 -+; CHECK-NEXT: st.d $ra, $sp, 264 # 8-byte Folded Spill -+; CHECK-NEXT: st.d $fp, $sp, 256 # 8-byte Folded Spill -+; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI0_0) -+; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI0_0) -+; CHECK-NEXT: xvld $xr0, $a0, 0 -+; CHECK-NEXT: xvst $xr0, $sp, 96 # 32-byte Folded Spill -+; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI0_1) -+; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI0_1) -+; CHECK-NEXT: xvld $xr1, $a0, 0 -+; CHECK-NEXT: xvst $xr1, $sp, 64 # 32-byte Folded Spill -+; CHECK-NEXT: xvst $xr1, $sp, 224 -+; CHECK-NEXT: xvst $xr0, $sp, 192 -+; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI0_2) -+; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI0_2) -+; CHECK-NEXT: xvld $xr0, $a0, 0 -+; CHECK-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill -+; CHECK-NEXT: xvst $xr0, $sp, 160 -+; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI0_3) -+; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI0_3) -+; CHECK-NEXT: xvld $xr0, $a0, 0 -+; CHECK-NEXT: xvst $xr0, $sp, 0 # 32-byte Folded Spill -+; CHECK-NEXT: xvst $xr0, $sp, 128 -+; CHECK-NEXT: addi.d $fp, $sp, 128 -+; CHECK-NEXT: move $a0, $fp -+; CHECK-NEXT: bl %plt(foo) -+; CHECK-NEXT: xvld $xr0, $sp, 64 # 32-byte Folded Reload -+; CHECK-NEXT: xvst $xr0, $sp, 224 -+; CHECK-NEXT: xvld $xr0, $sp, 96 # 32-byte Folded Reload -+; CHECK-NEXT: xvst $xr0, $sp, 192 -+; CHECK-NEXT: xvld $xr0, $sp, 32 # 32-byte Folded Reload -+; CHECK-NEXT: xvst $xr0, $sp, 160 -+; CHECK-NEXT: xvld $xr0, $sp, 0 # 32-byte Folded Reload -+; CHECK-NEXT: xvst $xr0, $sp, 128 -+; CHECK-NEXT: move $a0, $fp -+; CHECK-NEXT: bl %plt(bar) -+; CHECK-NEXT: move $a0, $zero -+; CHECK-NEXT: ld.d $fp, $sp, 256 # 8-byte Folded Reload -+; CHECK-NEXT: ld.d $ra, $sp, 264 # 8-byte Folded Reload -+; CHECK-NEXT: addi.d $sp, $sp, 272 -+; CHECK-NEXT: ret - entry: - %s = alloca %struct.S, align 2 - call void @llvm.lifetime.start.p0(i64 128, ptr nonnull %s) --- -2.20.1 - diff --git a/0012-Backport-LoongArch-improve-the-support-for-compiler-rt-and-bugfix.patch b/0012-Backport-LoongArch-improve-the-support-for-compiler-rt-and-bugfix.patch deleted file mode 100644 index e40be810e6d9e84bf18bc7a115fcb81a212cc5de..0000000000000000000000000000000000000000 --- a/0012-Backport-LoongArch-improve-the-support-for-compiler-rt-and-bugfix.patch +++ /dev/null @@ -1,2474 +0,0 @@ -From 0bce68310dc0ff6a09ec2cf5c3ae32400c631324 Mon Sep 17 00:00:00 2001 -From: zhanglimin -Date: Tue, 12 Sep 2023 09:51:16 +0800 -Subject: [PATCH 01/14] [sanitizer][msan] VarArgHelper for loongarch64 - -This patch adds support for variadic argument for loongarch64, -which is based on MIPS64. And `check-msan` all pass. - -Reviewed By: vitalybuka - -Differential Revision: https://reviews.llvm.org/D158587 - -(cherry picked from commit ec42c78cc43ac1e8364e5a0941aa5fc91b813dd3) ---- - .../Instrumentation/MemorySanitizer.cpp | 7 ++ - .../LoongArch/vararg-loongarch64.ll | 78 +++++++++++++++++++ - 2 files changed, 85 insertions(+) - create mode 100644 llvm/test/Instrumentation/MemorySanitizer/LoongArch/vararg-loongarch64.ll - -diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp -index 83d90049abc3..362fd6e4151f 100644 ---- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp -+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp -@@ -4945,6 +4945,7 @@ struct VarArgAMD64Helper : public VarArgHelper { - }; - - /// MIPS64-specific implementation of VarArgHelper. -+/// NOTE: This is also used for LoongArch64. - struct VarArgMIPS64Helper : public VarArgHelper { - Function &F; - MemorySanitizer &MS; -@@ -5836,6 +5837,10 @@ struct VarArgSystemZHelper : public VarArgHelper { - } - }; - -+// Loongarch64 is not a MIPS, but the current vargs calling convention matches -+// the MIPS. -+using VarArgLoongArch64Helper = VarArgMIPS64Helper; -+ - /// A no-op implementation of VarArgHelper. - struct VarArgNoOpHelper : public VarArgHelper { - VarArgNoOpHelper(Function &F, MemorySanitizer &MS, -@@ -5868,6 +5873,8 @@ static VarArgHelper *CreateVarArgHelper(Function &Func, MemorySanitizer &Msan, - return new VarArgPowerPC64Helper(Func, Msan, Visitor); - else if (TargetTriple.getArch() == Triple::systemz) - return new VarArgSystemZHelper(Func, Msan, Visitor); -+ else if (TargetTriple.isLoongArch64()) -+ return new VarArgLoongArch64Helper(Func, Msan, Visitor); - else - return new VarArgNoOpHelper(Func, Msan, Visitor); - } -diff --git a/llvm/test/Instrumentation/MemorySanitizer/LoongArch/vararg-loongarch64.ll b/llvm/test/Instrumentation/MemorySanitizer/LoongArch/vararg-loongarch64.ll -new file mode 100644 -index 000000000000..8a4ab59588ad ---- /dev/null -+++ b/llvm/test/Instrumentation/MemorySanitizer/LoongArch/vararg-loongarch64.ll -@@ -0,0 +1,78 @@ -+; RUN: opt < %s -S -passes=msan 2>&1 | FileCheck %s -+ -+target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128" -+target triple = "loongarch64-unknown-linux-gnu" -+ -+;; First, check allocation of the save area. -+declare void @llvm.lifetime.start.p0(i64, ptr nocapture) #1 -+declare void @llvm.va_start(ptr) #2 -+declare void @llvm.va_end(ptr) #2 -+declare void @llvm.lifetime.end.p0(i64, ptr nocapture) #1 -+define i32 @foo(i32 %guard, ...) { -+; CHECK-LABEL: @foo -+; CHECK: [[TMP1:%.*]] = load {{.*}} @__msan_va_arg_overflow_size_tls -+; CHECK: [[TMP2:%.*]] = add i64 0, [[TMP1]] -+; CHECK: [[TMP3:%.*]] = alloca {{.*}} [[TMP2]] -+; CHECK: call void @llvm.memset.p0.i64(ptr align 8 [[TMP3]], i8 0, i64 [[TMP2]], i1 false) -+; CHECK: [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP2]], i64 800) -+; CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP3]], ptr align 8 @__msan_va_arg_tls, i64 [[TMP4]], i1 false) -+; -+ %vl = alloca ptr, align 8 -+ call void @llvm.lifetime.start.p0(i64 32, ptr %vl) -+ call void @llvm.va_start(ptr %vl) -+ call void @llvm.va_end(ptr %vl) -+ call void @llvm.lifetime.end.p0(i64 32, ptr %vl) -+ ret i32 0 -+} -+ -+;; Save the incoming shadow value from the arguments in the __msan_va_arg_tls -+;; array. -+define i32 @bar() { -+; CHECK-LABEL: @bar -+; CHECK: store i32 0, ptr @__msan_va_arg_tls, align 8 -+; CHECK: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8 -+; CHECK: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 16) to ptr), align 8 -+; CHECK: store {{.*}} 24, {{.*}} @__msan_va_arg_overflow_size_tls -+; -+ %1 = call i32 (i32, ...) @foo(i32 0, i32 1, i64 2, double 3.000000e+00) -+ ret i32 %1 -+} -+ -+;; Check multiple fixed arguments. -+declare i32 @foo2(i32 %g1, i32 %g2, ...) -+define i32 @bar2() { -+; CHECK-LABEL: @bar2 -+; CHECK: store i64 0, ptr @__msan_va_arg_tls, align 8 -+; CHECK: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8 -+; CHECK: store {{.*}} 16, {{.*}} @__msan_va_arg_overflow_size_tls -+; -+ %1 = call i32 (i32, i32, ...) @foo2(i32 0, i32 1, i64 2, double 3.000000e+00) -+ ret i32 %1 -+} -+ -+;; Test that MSan doesn't generate code overflowing __msan_va_arg_tls when too many arguments are -+;; passed to a variadic function. -+declare i64 @sum(i64 %n, ...) -+define dso_local i64 @many_args() { -+;; If the size of __msan_va_arg_tls changes the second argument of `add` must also be changed. -+; CHECK-LABEL: @many_args -+; CHECK: i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 792) -+; CHECK-NOT: i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 800) -+; -+entry: -+ %ret = call i64 (i64, ...) @sum(i64 120, -+ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, -+ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, -+ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, -+ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, -+ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, -+ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, -+ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, -+ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, -+ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, -+ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, -+ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, -+ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1 -+ ) -+ ret i64 %ret -+} --- -2.20.1 - - -From f1265a12fa947b79967552ab520f904486c76353 Mon Sep 17 00:00:00 2001 -From: Ami-zhang <96056515+Ami-zhang@users.noreply.github.com> -Date: Thu, 28 Sep 2023 15:26:18 +0800 -Subject: [PATCH 02/14] [LowerTypeTests] Add loongarch64 to CFI jumptables - (#67312) - -This patch implements jump tables for loongarch64. - -(cherry picked from commit 0e8a8c85f8765c086c573f36e60c895920381e18) ---- - llvm/lib/Transforms/IPO/LowerTypeTests.cpp | 9 ++++++++- - llvm/test/Transforms/LowerTypeTests/function-weak.ll | 2 ++ - llvm/test/Transforms/LowerTypeTests/function.ll | 9 +++++++++ - 3 files changed, 19 insertions(+), 1 deletion(-) - -diff --git a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp -index 9b4b3efd7283..a89d57d12615 100644 ---- a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp -+++ b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp -@@ -1196,6 +1196,7 @@ static const unsigned kARMJumpTableEntrySize = 4; - static const unsigned kARMBTIJumpTableEntrySize = 8; - static const unsigned kARMv6MJumpTableEntrySize = 16; - static const unsigned kRISCVJumpTableEntrySize = 8; -+static const unsigned kLOONGARCH64JumpTableEntrySize = 8; - - unsigned LowerTypeTestsModule::getJumpTableEntrySize() { - switch (JumpTableArch) { -@@ -1222,6 +1223,8 @@ unsigned LowerTypeTestsModule::getJumpTableEntrySize() { - case Triple::riscv32: - case Triple::riscv64: - return kRISCVJumpTableEntrySize; -+ case Triple::loongarch64: -+ return kLOONGARCH64JumpTableEntrySize; - default: - report_fatal_error("Unsupported architecture for jump tables"); - } -@@ -1286,6 +1289,9 @@ void LowerTypeTestsModule::createJumpTableEntry( - } else if (JumpTableArch == Triple::riscv32 || - JumpTableArch == Triple::riscv64) { - AsmOS << "tail $" << ArgIndex << "@plt\n"; -+ } else if (JumpTableArch == Triple::loongarch64) { -+ AsmOS << "pcalau12i $$t0, %pc_hi20($" << ArgIndex << ")\n" -+ << "jirl $$r0, $$t0, %pc_lo12($" << ArgIndex << ")\n"; - } else { - report_fatal_error("Unsupported architecture for jump tables"); - } -@@ -1304,7 +1310,8 @@ void LowerTypeTestsModule::buildBitSetsFromFunctions( - ArrayRef TypeIds, ArrayRef Functions) { - if (Arch == Triple::x86 || Arch == Triple::x86_64 || Arch == Triple::arm || - Arch == Triple::thumb || Arch == Triple::aarch64 || -- Arch == Triple::riscv32 || Arch == Triple::riscv64) -+ Arch == Triple::riscv32 || Arch == Triple::riscv64 || -+ Arch == Triple::loongarch64) - buildBitSetsFromFunctionsNative(TypeIds, Functions); - else if (Arch == Triple::wasm32 || Arch == Triple::wasm64) - buildBitSetsFromFunctionsWASM(TypeIds, Functions); -diff --git a/llvm/test/Transforms/LowerTypeTests/function-weak.ll b/llvm/test/Transforms/LowerTypeTests/function-weak.ll -index ff69abacc8e9..c765937f1991 100644 ---- a/llvm/test/Transforms/LowerTypeTests/function-weak.ll -+++ b/llvm/test/Transforms/LowerTypeTests/function-weak.ll -@@ -4,6 +4,7 @@ - ; RUN: opt -S -passes=lowertypetests -mtriple=aarch64-unknown-linux-gnu %s | FileCheck --check-prefixes=CHECK,ARM %s - ; RUN: opt -S -passes=lowertypetests -mtriple=riscv32-unknown-linux-gnu %s | FileCheck --check-prefixes=CHECK,RISCV %s - ; RUN: opt -S -passes=lowertypetests -mtriple=riscv64-unknown-linux-gnu %s | FileCheck --check-prefixes=CHECK,RISCV %s -+; RUN: opt -S -passes=lowertypetests -mtriple=loongarch64-unknown-linux-gnu %s | FileCheck --check-prefixes=CHECK,LOONGARCH64 %s - - target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" - target triple = "x86_64-unknown-linux-gnu" -@@ -116,6 +117,7 @@ define i1 @foo(ptr %p) { - ; X86: define private void @[[JT]]() #{{.*}} align 8 { - ; ARM: define private void @[[JT]]() #{{.*}} align 4 { - ; RISCV: define private void @[[JT]]() #{{.*}} align 8 { -+; LOONGARCH64: define private void @[[JT]]() #{{.*}} align 8 { - - ; CHECK: define internal void @__cfi_global_var_init() section ".text.startup" { - ; CHECK-NEXT: entry: -diff --git a/llvm/test/Transforms/LowerTypeTests/function.ll b/llvm/test/Transforms/LowerTypeTests/function.ll -index 968c9d434eb2..802b88d92977 100644 ---- a/llvm/test/Transforms/LowerTypeTests/function.ll -+++ b/llvm/test/Transforms/LowerTypeTests/function.ll -@@ -5,6 +5,7 @@ - ; RUN: opt -S -passes=lowertypetests -mtriple=riscv32-unknown-linux-gnu %s | FileCheck --check-prefixes=RISCV,NATIVE %s - ; RUN: opt -S -passes=lowertypetests -mtriple=riscv64-unknown-linux-gnu %s | FileCheck --check-prefixes=RISCV,NATIVE %s - ; RUN: opt -S -passes=lowertypetests -mtriple=wasm32-unknown-unknown %s | FileCheck --check-prefix=WASM32 %s -+; RUN: opt -S -passes=lowertypetests -mtriple=loongarch64-unknown-linux-gnu %s | FileCheck --check-prefixes=LOONGARCH64,NATIVE %s - - ; The right format for Arm jump tables depends on the selected - ; subtarget, so we can't get these tests right without the Arm target -@@ -34,6 +35,7 @@ target datalayout = "e-p:64:64" - ; THUMB: @g = internal alias void (), getelementptr inbounds ([2 x [4 x i8]], ptr @[[JT]], i64 0, i64 1) - ; THUMBV6M: @g = internal alias void (), getelementptr inbounds ([2 x [16 x i8]], ptr @[[JT]], i64 0, i64 1) - ; RISCV: @g = internal alias void (), getelementptr inbounds ([2 x [8 x i8]], ptr @[[JT]], i64 0, i64 1) -+; LOONGARCH64: @g = internal alias void (), getelementptr inbounds ([2 x [8 x i8]], ptr @[[JT]], i64 0, i64 1) - - ; NATIVE: define hidden void @f.cfi() - ; WASM32: define void @f() !type !{{[0-9]+}} !wasm.index ![[I0:[0-9]+]] -@@ -65,6 +67,7 @@ define i1 @foo(ptr %p) { - ; THUMB: define private void @[[JT]]() #[[ATTR:.*]] align 4 { - ; THUMBV6M: define private void @[[JT]]() #[[ATTR:.*]] align 16 { - ; RISCV: define private void @[[JT]]() #[[ATTR:.*]] align 8 { -+; LOONGARCH64: define private void @[[JT]]() #[[ATTR:.*]] align 8 { - - ; X86: jmp ${0:c}@plt - ; X86-SAME: int3 -@@ -99,6 +102,11 @@ define i1 @foo(ptr %p) { - ; RISCV: tail $0@plt - ; RISCV-SAME: tail $1@plt - -+; LOONGARCH64: pcalau12i $$t0, %pc_hi20($0) -+; LOONGARCH64-SAME: jirl $$r0, $$t0, %pc_lo12($0) -+; LOONGARCH64-SAME: pcalau12i $$t0, %pc_hi20($1) -+; LOONGARCH64-SAME: jirl $$r0, $$t0, %pc_lo12($1) -+ - ; NATIVE-SAME: "s,s"(ptr @f.cfi, ptr @g.cfi) - - ; X86-LINUX: attributes #[[ATTR]] = { naked nocf_check nounwind } -@@ -107,6 +115,7 @@ define i1 @foo(ptr %p) { - ; THUMB: attributes #[[ATTR]] = { naked nounwind "target-cpu"="cortex-a8" "target-features"="+thumb-mode" } - ; THUMBV6M: attributes #[[ATTR]] = { naked nounwind "target-features"="+thumb-mode" } - ; RISCV: attributes #[[ATTR]] = { naked nounwind "target-features"="-c,-relax" } -+; LOONGARCH64: attributes #[[ATTR]] = { naked nounwind } - - ; WASM32: ![[I0]] = !{i64 1} - ; WASM32: ![[I1]] = !{i64 2} --- -2.20.1 - - -From 6f3143e1ad0bb759b7519af81994ed3c71dcf52b Mon Sep 17 00:00:00 2001 -From: wanglei -Date: Fri, 20 Oct 2023 10:44:55 +0800 -Subject: [PATCH 03/14] [LoongArch] Fix td pattern for CACOP LDPTE and LDDIR - -The immediate argument should be a target constant (`timm`). - -(cherry picked from commit 47826b3f148996767ebd2c67ee41c329cb364fef) ---- - llvm/lib/Target/LoongArch/LoongArchInstrInfo.td | 8 ++++---- - 1 file changed, 4 insertions(+), 4 deletions(-) - -diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td -index b2c4bb812ba5..166379d7d592 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td -+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td -@@ -1857,9 +1857,9 @@ defm : PseudoBinPat<"atomic_load_xor_32", PseudoAtomicLoadXor32>; - /// Intrinsics - - def : Pat<(int_loongarch_cacop_d timm:$op, i64:$rj, timm:$imm12), -- (CACOP uimm5:$op, GPR:$rj, simm12:$imm12)>; -+ (CACOP timm:$op, GPR:$rj, timm:$imm12)>; - def : Pat<(int_loongarch_cacop_w i32:$op, i32:$rj, i32:$imm12), -- (CACOP uimm5:$op, GPR:$rj, simm12:$imm12)>; -+ (CACOP timm:$op, GPR:$rj, timm:$imm12)>; - def : Pat<(loongarch_dbar uimm15:$imm15), (DBAR uimm15:$imm15)>; - def : Pat<(loongarch_ibar uimm15:$imm15), (IBAR uimm15:$imm15)>; - def : Pat<(loongarch_break uimm15:$imm15), (BREAK uimm15:$imm15)>; -@@ -2023,9 +2023,9 @@ def : Pat<(int_loongarch_asrtle_d GPR:$rj, GPR:$rk), - def : Pat<(int_loongarch_asrtgt_d GPR:$rj, GPR:$rk), - (ASRTGT_D GPR:$rj, GPR:$rk)>; - def : Pat<(int_loongarch_lddir_d GPR:$rj, timm:$imm8), -- (LDDIR GPR:$rj, uimm8:$imm8)>; -+ (LDDIR GPR:$rj, timm:$imm8)>; - def : Pat<(int_loongarch_ldpte_d GPR:$rj, timm:$imm8), -- (LDPTE GPR:$rj, uimm8:$imm8)>; -+ (LDPTE GPR:$rj, timm:$imm8)>; - } // Predicates = [IsLA64] - - //===----------------------------------------------------------------------===// --- -2.20.1 - - -From d90b85e94180543fd1789f9e26d7931f2329069b Mon Sep 17 00:00:00 2001 -From: ZhaoQi -Date: Fri, 10 Nov 2023 15:54:33 +0800 -Subject: [PATCH 04/14] [LoongArch][MC] Refine MCInstrAnalysis based on - registers used (#71276) - -MCInstrAnalysis can return properties of instructions (e.g., isCall(), -isBranch(),...) based on the informations that MCInstrDesc can get from -*InstrInfo*.td files. These infos are based on opcodes only, but JIRL -can have different properties based on different registers used. - -So this patch refines several MCInstrAnalysis methods: isTerminator, -isCall,isReturn,isBranch,isUnconditionalBranch and isIndirectBranch. - -This patch also allows BOLT which will be supported on LoongArch later -to get right instruction infos. - -(cherry picked from commit f7d784709673ca185f6fb0633fd53c72e81f2ae1) ---- - .../MCTargetDesc/LoongArchMCTargetDesc.cpp | 76 +++++++++++++ - .../unittests/Target/LoongArch/CMakeLists.txt | 1 + - .../Target/LoongArch/MCInstrAnalysisTest.cpp | 107 ++++++++++++++++++ - 3 files changed, 184 insertions(+) - create mode 100644 llvm/unittests/Target/LoongArch/MCInstrAnalysisTest.cpp - -diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp -index 942e667bc261..d580c3457fec 100644 ---- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp -+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp -@@ -104,6 +104,82 @@ public: - - return false; - } -+ -+ bool isTerminator(const MCInst &Inst) const override { -+ if (MCInstrAnalysis::isTerminator(Inst)) -+ return true; -+ -+ switch (Inst.getOpcode()) { -+ default: -+ return false; -+ case LoongArch::JIRL: -+ return Inst.getOperand(0).getReg() == LoongArch::R0; -+ } -+ } -+ -+ bool isCall(const MCInst &Inst) const override { -+ if (MCInstrAnalysis::isCall(Inst)) -+ return true; -+ -+ switch (Inst.getOpcode()) { -+ default: -+ return false; -+ case LoongArch::JIRL: -+ return Inst.getOperand(0).getReg() != LoongArch::R0; -+ } -+ } -+ -+ bool isReturn(const MCInst &Inst) const override { -+ if (MCInstrAnalysis::isReturn(Inst)) -+ return true; -+ -+ switch (Inst.getOpcode()) { -+ default: -+ return false; -+ case LoongArch::JIRL: -+ return Inst.getOperand(0).getReg() == LoongArch::R0 && -+ Inst.getOperand(1).getReg() == LoongArch::R1; -+ } -+ } -+ -+ bool isBranch(const MCInst &Inst) const override { -+ if (MCInstrAnalysis::isBranch(Inst)) -+ return true; -+ -+ switch (Inst.getOpcode()) { -+ default: -+ return false; -+ case LoongArch::JIRL: -+ return Inst.getOperand(0).getReg() == LoongArch::R0 && -+ Inst.getOperand(1).getReg() != LoongArch::R1; -+ } -+ } -+ -+ bool isUnconditionalBranch(const MCInst &Inst) const override { -+ if (MCInstrAnalysis::isUnconditionalBranch(Inst)) -+ return true; -+ -+ switch (Inst.getOpcode()) { -+ default: -+ return false; -+ case LoongArch::JIRL: -+ return Inst.getOperand(0).getReg() == LoongArch::R0 && -+ Inst.getOperand(1).getReg() != LoongArch::R1; -+ } -+ } -+ -+ bool isIndirectBranch(const MCInst &Inst) const override { -+ if (MCInstrAnalysis::isIndirectBranch(Inst)) -+ return true; -+ -+ switch (Inst.getOpcode()) { -+ default: -+ return false; -+ case LoongArch::JIRL: -+ return Inst.getOperand(0).getReg() == LoongArch::R0 && -+ Inst.getOperand(1).getReg() != LoongArch::R1; -+ } -+ } - }; - - } // end namespace -diff --git a/llvm/unittests/Target/LoongArch/CMakeLists.txt b/llvm/unittests/Target/LoongArch/CMakeLists.txt -index fef4f8e15461..e6f8ec073721 100644 ---- a/llvm/unittests/Target/LoongArch/CMakeLists.txt -+++ b/llvm/unittests/Target/LoongArch/CMakeLists.txt -@@ -20,6 +20,7 @@ set(LLVM_LINK_COMPONENTS - - add_llvm_target_unittest(LoongArchTests - InstSizes.cpp -+ MCInstrAnalysisTest.cpp - ) - - set_property(TARGET LoongArchTests PROPERTY FOLDER "Tests/UnitTests/TargetTests") -diff --git a/llvm/unittests/Target/LoongArch/MCInstrAnalysisTest.cpp b/llvm/unittests/Target/LoongArch/MCInstrAnalysisTest.cpp -new file mode 100644 -index 000000000000..6a208d274a0d ---- /dev/null -+++ b/llvm/unittests/Target/LoongArch/MCInstrAnalysisTest.cpp -@@ -0,0 +1,107 @@ -+//===- MCInstrAnalysisTest.cpp - LoongArchMCInstrAnalysis unit tests ------===// -+// -+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -+// See https://llvm.org/LICENSE.txt for license information. -+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -+// -+//===----------------------------------------------------------------------===// -+ -+#include "llvm/MC/MCInstrAnalysis.h" -+#include "MCTargetDesc/LoongArchMCTargetDesc.h" -+#include "llvm/MC/MCInstBuilder.h" -+#include "llvm/MC/TargetRegistry.h" -+#include "llvm/Support/TargetSelect.h" -+ -+#include "gtest/gtest.h" -+ -+#include -+ -+using namespace llvm; -+ -+namespace { -+ -+class InstrAnalysisTest : public testing::TestWithParam { -+protected: -+ std::unique_ptr Info; -+ std::unique_ptr Analysis; -+ -+ static void SetUpTestSuite() { -+ LLVMInitializeLoongArchTargetInfo(); -+ LLVMInitializeLoongArchTarget(); -+ LLVMInitializeLoongArchTargetMC(); -+ } -+ -+ InstrAnalysisTest() { -+ std::string Error; -+ const Target *TheTarget = -+ TargetRegistry::lookupTarget(Triple::normalize(GetParam()), Error); -+ Info = std::unique_ptr(TheTarget->createMCInstrInfo()); -+ Analysis = std::unique_ptr( -+ TheTarget->createMCInstrAnalysis(Info.get())); -+ } -+}; -+ -+} // namespace -+ -+static MCInst beq() { -+ return MCInstBuilder(LoongArch::BEQ) -+ .addReg(LoongArch::R0) -+ .addReg(LoongArch::R1) -+ .addImm(32); -+} -+ -+static MCInst bl() { return MCInstBuilder(LoongArch::BL).addImm(32); } -+ -+static MCInst jirl(unsigned RD, unsigned RJ = LoongArch::R10) { -+ return MCInstBuilder(LoongArch::JIRL).addReg(RD).addReg(RJ).addImm(16); -+} -+ -+TEST_P(InstrAnalysisTest, IsTerminator) { -+ EXPECT_TRUE(Analysis->isTerminator(beq())); -+ EXPECT_FALSE(Analysis->isTerminator(bl())); -+ EXPECT_TRUE(Analysis->isTerminator(jirl(LoongArch::R0))); -+ EXPECT_FALSE(Analysis->isTerminator(jirl(LoongArch::R5))); -+} -+ -+TEST_P(InstrAnalysisTest, IsCall) { -+ EXPECT_FALSE(Analysis->isCall(beq())); -+ EXPECT_TRUE(Analysis->isCall(bl())); -+ EXPECT_TRUE(Analysis->isCall(jirl(LoongArch::R1))); -+ EXPECT_FALSE(Analysis->isCall(jirl(LoongArch::R0))); -+} -+ -+TEST_P(InstrAnalysisTest, IsReturn) { -+ EXPECT_FALSE(Analysis->isReturn(beq())); -+ EXPECT_FALSE(Analysis->isReturn(bl())); -+ EXPECT_TRUE(Analysis->isReturn(jirl(LoongArch::R0, LoongArch::R1))); -+ EXPECT_FALSE(Analysis->isReturn(jirl(LoongArch::R0))); -+ EXPECT_FALSE(Analysis->isReturn(jirl(LoongArch::R1))); -+} -+ -+TEST_P(InstrAnalysisTest, IsBranch) { -+ EXPECT_TRUE(Analysis->isBranch(beq())); -+ EXPECT_FALSE(Analysis->isBranch(bl())); -+ EXPECT_TRUE(Analysis->isBranch(jirl(LoongArch::R0))); -+ EXPECT_FALSE(Analysis->isBranch(jirl(LoongArch::R1))); -+ EXPECT_FALSE(Analysis->isBranch(jirl(LoongArch::R0, LoongArch::R1))); -+} -+ -+TEST_P(InstrAnalysisTest, IsUnconditionalBranch) { -+ EXPECT_FALSE(Analysis->isUnconditionalBranch(beq())); -+ EXPECT_FALSE(Analysis->isUnconditionalBranch(bl())); -+ EXPECT_TRUE(Analysis->isUnconditionalBranch(jirl(LoongArch::R0))); -+ EXPECT_FALSE(Analysis->isUnconditionalBranch(jirl(LoongArch::R1))); -+ EXPECT_FALSE( -+ Analysis->isUnconditionalBranch(jirl(LoongArch::R0, LoongArch::R1))); -+} -+ -+TEST_P(InstrAnalysisTest, IsIndirectBranch) { -+ EXPECT_FALSE(Analysis->isIndirectBranch(beq())); -+ EXPECT_FALSE(Analysis->isIndirectBranch(bl())); -+ EXPECT_TRUE(Analysis->isIndirectBranch(jirl(LoongArch::R0))); -+ EXPECT_FALSE(Analysis->isIndirectBranch(jirl(LoongArch::R1))); -+ EXPECT_FALSE(Analysis->isIndirectBranch(jirl(LoongArch::R0, LoongArch::R1))); -+} -+ -+INSTANTIATE_TEST_SUITE_P(LA32And64, InstrAnalysisTest, -+ testing::Values("loongarch32", "loongarch64")); --- -2.20.1 - - -From 4d3ba0892d66b21f6a8a72f1d787e42a64be8867 Mon Sep 17 00:00:00 2001 -From: ZhaoQi -Date: Wed, 15 Nov 2023 11:12:30 +0800 -Subject: [PATCH 05/14] [LoongArch][NFC] Pre-commit MCInstrAnalysis tests for - instruction 'b' (#71903) - -The tests for 'b' which commented with FIXME are incorrect, the -following patch will fix it. - -(cherry picked from commit f6c4bb07eaa94bcd5d02ba7a46850225b6ed50d4) ---- - .../Target/LoongArch/MCInstrAnalysisTest.cpp | 18 ++++++++++++++++++ - 1 file changed, 18 insertions(+) - -diff --git a/llvm/unittests/Target/LoongArch/MCInstrAnalysisTest.cpp b/llvm/unittests/Target/LoongArch/MCInstrAnalysisTest.cpp -index 6a208d274a0d..6e1919fc2261 100644 ---- a/llvm/unittests/Target/LoongArch/MCInstrAnalysisTest.cpp -+++ b/llvm/unittests/Target/LoongArch/MCInstrAnalysisTest.cpp -@@ -50,6 +50,8 @@ static MCInst beq() { - .addImm(32); - } - -+static MCInst b() { return MCInstBuilder(LoongArch::B).addImm(32); } -+ - static MCInst bl() { return MCInstBuilder(LoongArch::BL).addImm(32); } - - static MCInst jirl(unsigned RD, unsigned RJ = LoongArch::R10) { -@@ -58,6 +60,7 @@ static MCInst jirl(unsigned RD, unsigned RJ = LoongArch::R10) { - - TEST_P(InstrAnalysisTest, IsTerminator) { - EXPECT_TRUE(Analysis->isTerminator(beq())); -+ EXPECT_TRUE(Analysis->isTerminator(b())); - EXPECT_FALSE(Analysis->isTerminator(bl())); - EXPECT_TRUE(Analysis->isTerminator(jirl(LoongArch::R0))); - EXPECT_FALSE(Analysis->isTerminator(jirl(LoongArch::R5))); -@@ -65,6 +68,7 @@ TEST_P(InstrAnalysisTest, IsTerminator) { - - TEST_P(InstrAnalysisTest, IsCall) { - EXPECT_FALSE(Analysis->isCall(beq())); -+ EXPECT_FALSE(Analysis->isCall(b())); - EXPECT_TRUE(Analysis->isCall(bl())); - EXPECT_TRUE(Analysis->isCall(jirl(LoongArch::R1))); - EXPECT_FALSE(Analysis->isCall(jirl(LoongArch::R0))); -@@ -72,6 +76,7 @@ TEST_P(InstrAnalysisTest, IsCall) { - - TEST_P(InstrAnalysisTest, IsReturn) { - EXPECT_FALSE(Analysis->isReturn(beq())); -+ EXPECT_FALSE(Analysis->isReturn(b())); - EXPECT_FALSE(Analysis->isReturn(bl())); - EXPECT_TRUE(Analysis->isReturn(jirl(LoongArch::R0, LoongArch::R1))); - EXPECT_FALSE(Analysis->isReturn(jirl(LoongArch::R0))); -@@ -80,14 +85,26 @@ TEST_P(InstrAnalysisTest, IsReturn) { - - TEST_P(InstrAnalysisTest, IsBranch) { - EXPECT_TRUE(Analysis->isBranch(beq())); -+ EXPECT_TRUE(Analysis->isBranch(b())); - EXPECT_FALSE(Analysis->isBranch(bl())); - EXPECT_TRUE(Analysis->isBranch(jirl(LoongArch::R0))); - EXPECT_FALSE(Analysis->isBranch(jirl(LoongArch::R1))); - EXPECT_FALSE(Analysis->isBranch(jirl(LoongArch::R0, LoongArch::R1))); - } - -+TEST_P(InstrAnalysisTest, IsConditionalBranch) { -+ EXPECT_TRUE(Analysis->isConditionalBranch(beq())); -+ // FIXME: Instr 'b' is not a ConditionalBranch, so the analysis here is -+ // wrong. The following patch will fix it. -+ EXPECT_TRUE(Analysis->isConditionalBranch(b())); -+ EXPECT_FALSE(Analysis->isConditionalBranch(bl())); -+} -+ - TEST_P(InstrAnalysisTest, IsUnconditionalBranch) { - EXPECT_FALSE(Analysis->isUnconditionalBranch(beq())); -+ // FIXME: Instr 'b' is an UnconditionalBranch, so the analysis here is -+ // wrong. The following patch will fix it. -+ EXPECT_FALSE(Analysis->isUnconditionalBranch(b())); - EXPECT_FALSE(Analysis->isUnconditionalBranch(bl())); - EXPECT_TRUE(Analysis->isUnconditionalBranch(jirl(LoongArch::R0))); - EXPECT_FALSE(Analysis->isUnconditionalBranch(jirl(LoongArch::R1))); -@@ -97,6 +114,7 @@ TEST_P(InstrAnalysisTest, IsUnconditionalBranch) { - - TEST_P(InstrAnalysisTest, IsIndirectBranch) { - EXPECT_FALSE(Analysis->isIndirectBranch(beq())); -+ EXPECT_FALSE(Analysis->isIndirectBranch(b())); - EXPECT_FALSE(Analysis->isIndirectBranch(bl())); - EXPECT_TRUE(Analysis->isIndirectBranch(jirl(LoongArch::R0))); - EXPECT_FALSE(Analysis->isIndirectBranch(jirl(LoongArch::R1))); --- -2.20.1 - - -From 034d4087be71c54248fff1bf7eae66291671776a Mon Sep 17 00:00:00 2001 -From: ZhaoQi -Date: Thu, 16 Nov 2023 14:01:58 +0800 -Subject: [PATCH 06/14] [LoongArch] Set isBarrier to true for instruction 'b' - (#72339) - -Instr "b offs26" represent to an unconditional branch in LoongArch. Set -isBarrier to 1 in tablegen for it, so that MCInstrAnalysis can return -correctly. - -Fixes https://github.com/llvm/llvm-project/pull/71903. - -(cherry picked from commit 42a4d5e8cab1537515d92ed56d6e17b673ed352f) ---- - llvm/lib/Target/LoongArch/LoongArchInstrInfo.td | 1 + - llvm/unittests/Target/LoongArch/MCInstrAnalysisTest.cpp | 8 ++------ - 2 files changed, 3 insertions(+), 6 deletions(-) - -diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td -index 166379d7d592..05ae36a9781d 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td -+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td -@@ -586,6 +586,7 @@ class Br_I26 op> - : FmtI26 { - let isBranch = 1; - let isTerminator = 1; -+ let isBarrier = 1; - } - } // hasSideEffects = 0, mayLoad = 0, mayStore = 0 - -diff --git a/llvm/unittests/Target/LoongArch/MCInstrAnalysisTest.cpp b/llvm/unittests/Target/LoongArch/MCInstrAnalysisTest.cpp -index 6e1919fc2261..468ee79615d6 100644 ---- a/llvm/unittests/Target/LoongArch/MCInstrAnalysisTest.cpp -+++ b/llvm/unittests/Target/LoongArch/MCInstrAnalysisTest.cpp -@@ -94,17 +94,13 @@ TEST_P(InstrAnalysisTest, IsBranch) { - - TEST_P(InstrAnalysisTest, IsConditionalBranch) { - EXPECT_TRUE(Analysis->isConditionalBranch(beq())); -- // FIXME: Instr 'b' is not a ConditionalBranch, so the analysis here is -- // wrong. The following patch will fix it. -- EXPECT_TRUE(Analysis->isConditionalBranch(b())); -+ EXPECT_FALSE(Analysis->isConditionalBranch(b())); - EXPECT_FALSE(Analysis->isConditionalBranch(bl())); - } - - TEST_P(InstrAnalysisTest, IsUnconditionalBranch) { - EXPECT_FALSE(Analysis->isUnconditionalBranch(beq())); -- // FIXME: Instr 'b' is an UnconditionalBranch, so the analysis here is -- // wrong. The following patch will fix it. -- EXPECT_FALSE(Analysis->isUnconditionalBranch(b())); -+ EXPECT_TRUE(Analysis->isUnconditionalBranch(b())); - EXPECT_FALSE(Analysis->isUnconditionalBranch(bl())); - EXPECT_TRUE(Analysis->isUnconditionalBranch(jirl(LoongArch::R0))); - EXPECT_FALSE(Analysis->isUnconditionalBranch(jirl(LoongArch::R1))); --- -2.20.1 - - -From 701109dc419b8d07cd5254268d848dee1278b9ad Mon Sep 17 00:00:00 2001 -From: ZhaoQi -Date: Tue, 21 Nov 2023 08:34:52 +0800 -Subject: [PATCH 07/14] [LoongArch][MC] Pre-commit tests for instr bl fixupkind - testing (#72826) - -This patch is used to test whether fixupkind for bl can be returned -correctly. When BL has target-flags(loongarch-call), there is no error. -But without this flag, an assertion error will appear. So the test is -just tagged as "Expectedly Failed" now until the following patch fix it. - -(cherry picked from commit 2ca028ce7c6de5f1350440012355a65383b8729a) ---- - .../CodeGen/LoongArch/test_bl_fixupkind.mir | 66 +++++++++++++++++++ - 1 file changed, 66 insertions(+) - create mode 100644 llvm/test/CodeGen/LoongArch/test_bl_fixupkind.mir - -diff --git a/llvm/test/CodeGen/LoongArch/test_bl_fixupkind.mir b/llvm/test/CodeGen/LoongArch/test_bl_fixupkind.mir -new file mode 100644 -index 000000000000..2c1d41be7711 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/test_bl_fixupkind.mir -@@ -0,0 +1,66 @@ -+## Tagged as "Expectedly Failed" until the following patch fix it -+# XFAIL: * -+# RUN: llc --mtriple=loongarch64 --filetype=obj %s -o - | \ -+# RUN: llvm-objdump -d - | FileCheck %s -+ -+# REQUIRES: asserts -+ -+## Check that bl can get fixupkind correctly. -+## When BL has target-flags(loongarch-call), there is no error. But without -+## this flag, an assertion error will appear: -+## Assertion `FixupKind != LoongArch::fixup_loongarch_invalid && "Unhandled expression!"' failed. -+ -+--- | -+ target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128" -+ target triple = "loongarch64" -+ -+ define dso_local void @test_bl_fixupkind_with_flag() { -+ ; CHECK-LABEL: test_bl_fixupkind_with_flag -+ ; CHECK: addi.d $sp, $sp, -16 -+ ; CHECK-NEXT: st.d $ra, $sp, 8 -+ ; CHECK-NEXT: bl 0 -+ ; CHECK-NEXT: ld.d $ra, $sp, 8 -+ ; CHECK-NEXT: addi.d $sp, $sp, 16 -+ ; CHECK-NEXT: ret -+ entry: -+ call void @foo() -+ ret void -+ } -+ -+ define dso_local void @test_bl_fixupkind_without_flag() { -+ ; CHECK-LABEL: test_bl_fixupkind_without_flag -+ ; CHECK: addi.d $sp, $sp, -16 -+ ; CHECK-NEXT: st.d $ra, $sp, 8 -+ ; CHECK-NEXT: bl 0 -+ ; CHECK-NEXT: ld.d $ra, $sp, 8 -+ ; CHECK-NEXT: addi.d $sp, $sp, 16 -+ ; CHECK-NEXT: ret -+ entry: -+ call void @foo() -+ ret void -+ } -+ -+ declare dso_local void @foo(...) -+... -+--- -+name: test_bl_fixupkind_with_flag -+tracksRegLiveness: true -+body: | -+ bb.0.entry: -+ ADJCALLSTACKDOWN 0, 0, implicit-def dead $r3, implicit $r3 -+ BL target-flags(loongarch-call) @foo, csr_ilp32d_lp64d, implicit-def $r1, implicit-def dead $r1, implicit-def $r3 -+ ADJCALLSTACKUP 0, 0, implicit-def dead $r3, implicit $r3 -+ PseudoRET -+ -+... -+--- -+name: test_bl_fixupkind_without_flag -+tracksRegLiveness: true -+body: | -+ bb.0.entry: -+ ADJCALLSTACKDOWN 0, 0, implicit-def dead $r3, implicit $r3 -+ BL @foo, csr_ilp32d_lp64d, implicit-def $r1, implicit-def dead $r1, implicit-def $r3 -+ ADJCALLSTACKUP 0, 0, implicit-def dead $r3, implicit $r3 -+ PseudoRET -+ -+... --- -2.20.1 - - -From a5bf03107b8738b0fab521d7718bed863056134b Mon Sep 17 00:00:00 2001 -From: ZhaoQi -Date: Tue, 21 Nov 2023 19:00:29 +0800 -Subject: [PATCH 08/14] [LoongArch][MC] Support to get the FixupKind for BL - (#72938) - -Previously, bolt could not get FixupKind for BL correctly, because bolt -cannot get target-flags for BL. Here just add support in MCCodeEmitter. - -Fixes https://github.com/llvm/llvm-project/pull/72826. - -(cherry picked from commit 775d2f3201cf7fb657aaf58d1b37c130bd9eb8f9) ---- - .../LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp | 1 + - llvm/test/CodeGen/LoongArch/test_bl_fixupkind.mir | 8 ++------ - 2 files changed, 3 insertions(+), 6 deletions(-) - -diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp -index 08c0820cb862..09d92ac9aa3a 100644 ---- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp -+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp -@@ -263,6 +263,7 @@ LoongArchMCCodeEmitter::getExprOpValue(const MCInst &MI, const MCOperand &MO, - FixupKind = LoongArch::fixup_loongarch_b21; - break; - case LoongArch::B: -+ case LoongArch::BL: - FixupKind = LoongArch::fixup_loongarch_b26; - break; - } -diff --git a/llvm/test/CodeGen/LoongArch/test_bl_fixupkind.mir b/llvm/test/CodeGen/LoongArch/test_bl_fixupkind.mir -index 2c1d41be7711..70cd5fb8d7eb 100644 ---- a/llvm/test/CodeGen/LoongArch/test_bl_fixupkind.mir -+++ b/llvm/test/CodeGen/LoongArch/test_bl_fixupkind.mir -@@ -1,14 +1,10 @@ --## Tagged as "Expectedly Failed" until the following patch fix it --# XFAIL: * - # RUN: llc --mtriple=loongarch64 --filetype=obj %s -o - | \ - # RUN: llvm-objdump -d - | FileCheck %s - - # REQUIRES: asserts - --## Check that bl can get fixupkind correctly. --## When BL has target-flags(loongarch-call), there is no error. But without --## this flag, an assertion error will appear: --## Assertion `FixupKind != LoongArch::fixup_loongarch_invalid && "Unhandled expression!"' failed. -+## Check that bl can get fixupkind correctly, whether BL contains -+## target-flags(loongarch-call) or not. - - --- | - target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128" --- -2.20.1 - - -From 20421e57af53d963a95c6c318f71f9399d241188 Mon Sep 17 00:00:00 2001 -From: ZhaoQi -Date: Thu, 23 Nov 2023 16:38:41 +0800 -Subject: [PATCH 09/14] [LoongArch][MC] Modify branch evaluation for - MCInstrAnalysis (#73205) - -Function evaluateBranch() is used to compute target address for a given -branch instruction and return true on success. But target address of -indirect branch cannot be simply added, so rule it out and just return -false. - -This patch also add objdump tests which capture the current state of -support for printing branch targets. Without this patch, the result of -"jirl $zero, $a0, 4" is "jirl $zero, $a0, 4 ". It is obviously -incorrect, because this instruction represents an indirect branch whose -target address depends on both the register value and the imm. After -this patch, it will be right despite loss of details. - -(cherry picked from commit 1c68c4c57a65a67963264878bc4646be8b58854c) ---- - .../MCTargetDesc/LoongArchMCTargetDesc.cpp | 3 +- - .../llvm-objdump/ELF/LoongArch/branches.s | 76 +++++++++++++++++++ - .../llvm-objdump/ELF/LoongArch/lit.local.cfg | 2 + - 3 files changed, 80 insertions(+), 1 deletion(-) - create mode 100644 llvm/test/tools/llvm-objdump/ELF/LoongArch/branches.s - create mode 100644 llvm/test/tools/llvm-objdump/ELF/LoongArch/lit.local.cfg - -diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp -index d580c3457fec..a4e6a09863e6 100644 ---- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp -+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp -@@ -97,7 +97,8 @@ public: - bool evaluateBranch(const MCInst &Inst, uint64_t Addr, uint64_t Size, - uint64_t &Target) const override { - unsigned NumOps = Inst.getNumOperands(); -- if (isBranch(Inst) || Inst.getOpcode() == LoongArch::BL) { -+ if ((isBranch(Inst) && !isIndirectBranch(Inst)) || -+ Inst.getOpcode() == LoongArch::BL) { - Target = Addr + Inst.getOperand(NumOps - 1).getImm(); - return true; - } -diff --git a/llvm/test/tools/llvm-objdump/ELF/LoongArch/branches.s b/llvm/test/tools/llvm-objdump/ELF/LoongArch/branches.s -new file mode 100644 -index 000000000000..8cb00aef9954 ---- /dev/null -+++ b/llvm/test/tools/llvm-objdump/ELF/LoongArch/branches.s -@@ -0,0 +1,76 @@ -+# RUN: llvm-mc --triple=loongarch32 --filetype=obj < %s | \ -+# RUN: llvm-objdump -d --no-show-raw-insn - | FileCheck %s -+# RUN: llvm-mc --triple=loongarch64 --filetype=obj < %s | \ -+# RUN: llvm-objdump -d --no-show-raw-insn - | FileCheck %s -+ -+# CHECK-LABEL: : -+foo: -+# CHECK: beq $a0, $a1, 108 -+beq $a0, $a1, .Llocal -+# CHECK: bne $a0, $a1, 104 -+bne $a0, $a1, .Llocal -+# CHECK: blt $a0, $a1, 100 -+blt $a0, $a1, .Llocal -+# CHECK: bltu $a0, $a1, 96 -+bltu $a0, $a1, .Llocal -+# CHECK: bge $a0, $a1, 92 -+bge $a0, $a1, .Llocal -+# CHECK: bgeu $a0, $a1, 88 -+bgeu $a0, $a1, .Llocal -+# CHECK: beqz $a0, 84 -+beqz $a0, .Llocal -+# CHECK: bnez $a0, 80 -+bnez $a0, .Llocal -+# CHECK: bceqz $fcc6, 76 -+bceqz $fcc6, .Llocal -+# CHECK: bcnez $fcc6, 72 -+bcnez $fcc6, .Llocal -+ -+# CHECK: beq $a0, $a1, 76 -+beq $a0, $a1, bar -+# CHECK: bne $a0, $a1, 72 -+bne $a0, $a1, bar -+# CHECK: blt $a0, $a1, 68 -+blt $a0, $a1, bar -+# CHECK: bltu $a0, $a1, 64 -+bltu $a0, $a1, bar -+# CHECK: bge $a0, $a1, 60 -+bge $a0, $a1, bar -+# CHECK: bgeu $a0, $a1, 56 -+bgeu $a0, $a1, bar -+# CHECK: beqz $a0, 52 -+beqz $a0, bar -+# CHECK: bnez $a0, 48 -+bnez $a0, bar -+# CHECK: bceqz $fcc6, 44 -+bceqz $fcc6, bar -+# CHECK: bcnez $fcc6, 40 -+bcnez $fcc6, bar -+ -+# CHECK: b 28 -+b .Llocal -+# CHECK: b 32 -+b bar -+ -+# CHECK: bl 20 -+bl .Llocal -+# CHECK: bl 24 -+bl bar -+ -+# CHECK: jirl $zero, $a0, 4{{$}} -+jirl $zero, $a0, 4 -+# CHECK: jirl $ra, $a0, 4{{$}} -+jirl $ra, $a0, 4 -+# CHECK: ret -+ret -+ -+.Llocal: -+# CHECK: 6c: nop -+# CHECK: nop -+nop -+nop -+ -+# CHECK-LABEL: : -+bar: -+# CHECK: 74: nop -+nop -diff --git a/llvm/test/tools/llvm-objdump/ELF/LoongArch/lit.local.cfg b/llvm/test/tools/llvm-objdump/ELF/LoongArch/lit.local.cfg -new file mode 100644 -index 000000000000..cc24278acbb4 ---- /dev/null -+++ b/llvm/test/tools/llvm-objdump/ELF/LoongArch/lit.local.cfg -@@ -0,0 +1,2 @@ -+if not "LoongArch" in config.root.targets: -+ config.unsupported = True --- -2.20.1 - - -From 0fe85205a8637c6671f423cddd41b712085232ac Mon Sep 17 00:00:00 2001 -From: hev -Date: Thu, 23 Nov 2023 15:15:26 +0800 -Subject: [PATCH 10/14] [LoongArch] Precommit a test for smul with overflow - (NFC) (#73212) - -(cherry picked from commit 7414c0db962f8a5029fd44c3e0bc93d9ce20be71) ---- - .../CodeGen/LoongArch/smul-with-overflow.ll | 118 ++++++++++++++++++ - 1 file changed, 118 insertions(+) - create mode 100644 llvm/test/CodeGen/LoongArch/smul-with-overflow.ll - -diff --git a/llvm/test/CodeGen/LoongArch/smul-with-overflow.ll b/llvm/test/CodeGen/LoongArch/smul-with-overflow.ll -new file mode 100644 -index 000000000000..a53e77e5aa4b ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/smul-with-overflow.ll -@@ -0,0 +1,118 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefix=LA32 -+; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64 -+ -+define zeroext i1 @smuloi64(i64 %v1, i64 %v2, ptr %res) { -+; LA32-LABEL: smuloi64: -+; LA32: # %bb.0: -+; LA32-NEXT: addi.w $sp, $sp, -16 -+; LA32-NEXT: .cfi_def_cfa_offset 16 -+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -+; LA32-NEXT: st.w $fp, $sp, 8 # 4-byte Folded Spill -+; LA32-NEXT: .cfi_offset 1, -4 -+; LA32-NEXT: .cfi_offset 22, -8 -+; LA32-NEXT: move $fp, $a4 -+; LA32-NEXT: st.w $zero, $sp, 4 -+; LA32-NEXT: addi.w $a4, $sp, 4 -+; LA32-NEXT: bl %plt(__mulodi4) -+; LA32-NEXT: st.w $a1, $fp, 4 -+; LA32-NEXT: st.w $a0, $fp, 0 -+; LA32-NEXT: ld.w $a0, $sp, 4 -+; LA32-NEXT: sltu $a0, $zero, $a0 -+; LA32-NEXT: ld.w $fp, $sp, 8 # 4-byte Folded Reload -+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -+; LA32-NEXT: addi.w $sp, $sp, 16 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: smuloi64: -+; LA64: # %bb.0: -+; LA64-NEXT: mul.d $a3, $a0, $a1 -+; LA64-NEXT: st.d $a3, $a2, 0 -+; LA64-NEXT: mulh.d $a0, $a0, $a1 -+; LA64-NEXT: srai.d $a1, $a3, 63 -+; LA64-NEXT: xor $a0, $a0, $a1 -+; LA64-NEXT: sltu $a0, $zero, $a0 -+; LA64-NEXT: ret -+ %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2) -+ %val = extractvalue {i64, i1} %t, 0 -+ %obit = extractvalue {i64, i1} %t, 1 -+ store i64 %val, ptr %res -+ ret i1 %obit -+} -+ -+define zeroext i1 @smuloi128(i128 %v1, i128 %v2, ptr %res) { -+; LA32-LABEL: smuloi128: -+; LA32: # %bb.0: -+; LA32-NEXT: addi.w $sp, $sp, -64 -+; LA32-NEXT: .cfi_def_cfa_offset 64 -+; LA32-NEXT: st.w $ra, $sp, 60 # 4-byte Folded Spill -+; LA32-NEXT: st.w $fp, $sp, 56 # 4-byte Folded Spill -+; LA32-NEXT: .cfi_offset 1, -4 -+; LA32-NEXT: .cfi_offset 22, -8 -+; LA32-NEXT: move $fp, $a2 -+; LA32-NEXT: st.w $zero, $sp, 52 -+; LA32-NEXT: ld.w $a2, $a1, 12 -+; LA32-NEXT: st.w $a2, $sp, 12 -+; LA32-NEXT: ld.w $a2, $a1, 8 -+; LA32-NEXT: st.w $a2, $sp, 8 -+; LA32-NEXT: ld.w $a2, $a1, 4 -+; LA32-NEXT: st.w $a2, $sp, 4 -+; LA32-NEXT: ld.w $a1, $a1, 0 -+; LA32-NEXT: st.w $a1, $sp, 0 -+; LA32-NEXT: ld.w $a1, $a0, 12 -+; LA32-NEXT: st.w $a1, $sp, 28 -+; LA32-NEXT: ld.w $a1, $a0, 8 -+; LA32-NEXT: st.w $a1, $sp, 24 -+; LA32-NEXT: ld.w $a1, $a0, 4 -+; LA32-NEXT: st.w $a1, $sp, 20 -+; LA32-NEXT: ld.w $a0, $a0, 0 -+; LA32-NEXT: st.w $a0, $sp, 16 -+; LA32-NEXT: addi.w $a0, $sp, 32 -+; LA32-NEXT: addi.w $a1, $sp, 16 -+; LA32-NEXT: addi.w $a2, $sp, 0 -+; LA32-NEXT: addi.w $a3, $sp, 52 -+; LA32-NEXT: bl %plt(__muloti4) -+; LA32-NEXT: ld.w $a0, $sp, 44 -+; LA32-NEXT: st.w $a0, $fp, 12 -+; LA32-NEXT: ld.w $a0, $sp, 40 -+; LA32-NEXT: st.w $a0, $fp, 8 -+; LA32-NEXT: ld.w $a0, $sp, 36 -+; LA32-NEXT: st.w $a0, $fp, 4 -+; LA32-NEXT: ld.w $a0, $sp, 32 -+; LA32-NEXT: st.w $a0, $fp, 0 -+; LA32-NEXT: ld.w $a0, $sp, 52 -+; LA32-NEXT: sltu $a0, $zero, $a0 -+; LA32-NEXT: ld.w $fp, $sp, 56 # 4-byte Folded Reload -+; LA32-NEXT: ld.w $ra, $sp, 60 # 4-byte Folded Reload -+; LA32-NEXT: addi.w $sp, $sp, 64 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: smuloi128: -+; LA64: # %bb.0: -+; LA64-NEXT: addi.d $sp, $sp, -32 -+; LA64-NEXT: .cfi_def_cfa_offset 32 -+; LA64-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill -+; LA64-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill -+; LA64-NEXT: .cfi_offset 1, -8 -+; LA64-NEXT: .cfi_offset 22, -16 -+; LA64-NEXT: move $fp, $a4 -+; LA64-NEXT: st.d $zero, $sp, 8 -+; LA64-NEXT: addi.d $a4, $sp, 8 -+; LA64-NEXT: bl %plt(__muloti4) -+; LA64-NEXT: st.d $a1, $fp, 8 -+; LA64-NEXT: st.d $a0, $fp, 0 -+; LA64-NEXT: ld.d $a0, $sp, 8 -+; LA64-NEXT: sltu $a0, $zero, $a0 -+; LA64-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload -+; LA64-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload -+; LA64-NEXT: addi.d $sp, $sp, 32 -+; LA64-NEXT: ret -+ %t = call {i128, i1} @llvm.smul.with.overflow.i128(i128 %v1, i128 %v2) -+ %val = extractvalue {i128, i1} %t, 0 -+ %obit = extractvalue {i128, i1} %t, 1 -+ store i128 %val, ptr %res -+ ret i1 %obit -+} -+ -+declare {i64, i1} @llvm.smul.with.overflow.i64(i64, i64) nounwind readnone -+declare {i128, i1} @llvm.smul.with.overflow.i128(i128, i128) nounwind readnone --- -2.20.1 - - -From e29ff285726046ec46c9005c67ba992e3efc8ace Mon Sep 17 00:00:00 2001 -From: hev -Date: Thu, 23 Nov 2023 19:34:50 +0800 -Subject: [PATCH 11/14] [LoongArch] Disable mulodi4 and muloti4 libcalls - (#73199) - -This library function only exists in compiler-rt not libgcc. So this -would fail to link unless we were linking with compiler-rt. - -Fixes https://github.com/ClangBuiltLinux/linux/issues/1958 - -(cherry picked from commit 0d9f557b6c36da3aa92daff4c0d37ea821d7ae1e) ---- - .../LoongArch/LoongArchISelLowering.cpp | 5 + - .../CodeGen/LoongArch/smul-with-overflow.ll | 463 +++++++++++++++--- - 2 files changed, 397 insertions(+), 71 deletions(-) - -diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -index f7eacd56c542..ed106cb766bc 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -@@ -152,8 +152,13 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, - - // Set libcalls. - setLibcallName(RTLIB::MUL_I128, nullptr); -+ // The MULO libcall is not part of libgcc, only compiler-rt. -+ setLibcallName(RTLIB::MULO_I64, nullptr); - } - -+ // The MULO libcall is not part of libgcc, only compiler-rt. -+ setLibcallName(RTLIB::MULO_I128, nullptr); -+ - static const ISD::CondCode FPCCToExpand[] = { - ISD::SETOGT, ISD::SETOGE, ISD::SETUGT, ISD::SETUGE, - ISD::SETGE, ISD::SETNE, ISD::SETGT}; -diff --git a/llvm/test/CodeGen/LoongArch/smul-with-overflow.ll b/llvm/test/CodeGen/LoongArch/smul-with-overflow.ll -index a53e77e5aa4b..6cba4108d63c 100644 ---- a/llvm/test/CodeGen/LoongArch/smul-with-overflow.ll -+++ b/llvm/test/CodeGen/LoongArch/smul-with-overflow.ll -@@ -5,23 +5,53 @@ - define zeroext i1 @smuloi64(i64 %v1, i64 %v2, ptr %res) { - ; LA32-LABEL: smuloi64: - ; LA32: # %bb.0: --; LA32-NEXT: addi.w $sp, $sp, -16 --; LA32-NEXT: .cfi_def_cfa_offset 16 --; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill --; LA32-NEXT: st.w $fp, $sp, 8 # 4-byte Folded Spill --; LA32-NEXT: .cfi_offset 1, -4 --; LA32-NEXT: .cfi_offset 22, -8 --; LA32-NEXT: move $fp, $a4 --; LA32-NEXT: st.w $zero, $sp, 4 --; LA32-NEXT: addi.w $a4, $sp, 4 --; LA32-NEXT: bl %plt(__mulodi4) --; LA32-NEXT: st.w $a1, $fp, 4 --; LA32-NEXT: st.w $a0, $fp, 0 --; LA32-NEXT: ld.w $a0, $sp, 4 -+; LA32-NEXT: srai.w $a5, $a1, 31 -+; LA32-NEXT: mul.w $a6, $a2, $a5 -+; LA32-NEXT: mulh.wu $a7, $a2, $a5 -+; LA32-NEXT: add.w $a7, $a7, $a6 -+; LA32-NEXT: mul.w $a5, $a3, $a5 -+; LA32-NEXT: add.w $a5, $a7, $a5 -+; LA32-NEXT: srai.w $a7, $a3, 31 -+; LA32-NEXT: mul.w $t0, $a7, $a1 -+; LA32-NEXT: mulh.wu $t1, $a7, $a0 -+; LA32-NEXT: add.w $t0, $t1, $t0 -+; LA32-NEXT: mul.w $a7, $a7, $a0 -+; LA32-NEXT: add.w $t0, $t0, $a7 -+; LA32-NEXT: add.w $a5, $t0, $a5 -+; LA32-NEXT: mulh.wu $t0, $a0, $a2 -+; LA32-NEXT: mul.w $t1, $a1, $a2 -+; LA32-NEXT: add.w $t0, $t1, $t0 -+; LA32-NEXT: sltu $t1, $t0, $t1 -+; LA32-NEXT: mulh.wu $t2, $a1, $a2 -+; LA32-NEXT: add.w $t1, $t2, $t1 -+; LA32-NEXT: mul.w $t2, $a0, $a3 -+; LA32-NEXT: add.w $t0, $t2, $t0 -+; LA32-NEXT: sltu $t2, $t0, $t2 -+; LA32-NEXT: mulh.wu $t3, $a0, $a3 -+; LA32-NEXT: add.w $t2, $t3, $t2 -+; LA32-NEXT: add.w $a6, $a7, $a6 -+; LA32-NEXT: sltu $a7, $a6, $a7 -+; LA32-NEXT: add.w $a5, $a5, $a7 -+; LA32-NEXT: mul.w $a0, $a0, $a2 -+; LA32-NEXT: mul.w $a2, $a1, $a3 -+; LA32-NEXT: mulh.wu $a1, $a1, $a3 -+; LA32-NEXT: add.w $a3, $t1, $t2 -+; LA32-NEXT: sltu $a7, $a3, $t1 -+; LA32-NEXT: add.w $a1, $a1, $a7 -+; LA32-NEXT: st.w $a0, $a4, 0 -+; LA32-NEXT: add.w $a0, $a2, $a3 -+; LA32-NEXT: sltu $a2, $a0, $a2 -+; LA32-NEXT: add.w $a1, $a1, $a2 -+; LA32-NEXT: st.w $t0, $a4, 4 -+; LA32-NEXT: add.w $a1, $a1, $a5 -+; LA32-NEXT: add.w $a2, $a0, $a6 -+; LA32-NEXT: sltu $a0, $a2, $a0 -+; LA32-NEXT: add.w $a0, $a1, $a0 -+; LA32-NEXT: srai.w $a1, $t0, 31 -+; LA32-NEXT: xor $a0, $a0, $a1 -+; LA32-NEXT: xor $a1, $a2, $a1 -+; LA32-NEXT: or $a0, $a1, $a0 - ; LA32-NEXT: sltu $a0, $zero, $a0 --; LA32-NEXT: ld.w $fp, $sp, 8 # 4-byte Folded Reload --; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload --; LA32-NEXT: addi.w $sp, $sp, 16 - ; LA32-NEXT: ret - ; - ; LA64-LABEL: smuloi64: -@@ -43,69 +73,360 @@ define zeroext i1 @smuloi64(i64 %v1, i64 %v2, ptr %res) { - define zeroext i1 @smuloi128(i128 %v1, i128 %v2, ptr %res) { - ; LA32-LABEL: smuloi128: - ; LA32: # %bb.0: --; LA32-NEXT: addi.w $sp, $sp, -64 --; LA32-NEXT: .cfi_def_cfa_offset 64 --; LA32-NEXT: st.w $ra, $sp, 60 # 4-byte Folded Spill --; LA32-NEXT: st.w $fp, $sp, 56 # 4-byte Folded Spill -+; LA32-NEXT: addi.w $sp, $sp, -96 -+; LA32-NEXT: .cfi_def_cfa_offset 96 -+; LA32-NEXT: st.w $ra, $sp, 92 # 4-byte Folded Spill -+; LA32-NEXT: st.w $fp, $sp, 88 # 4-byte Folded Spill -+; LA32-NEXT: st.w $s0, $sp, 84 # 4-byte Folded Spill -+; LA32-NEXT: st.w $s1, $sp, 80 # 4-byte Folded Spill -+; LA32-NEXT: st.w $s2, $sp, 76 # 4-byte Folded Spill -+; LA32-NEXT: st.w $s3, $sp, 72 # 4-byte Folded Spill -+; LA32-NEXT: st.w $s4, $sp, 68 # 4-byte Folded Spill -+; LA32-NEXT: st.w $s5, $sp, 64 # 4-byte Folded Spill -+; LA32-NEXT: st.w $s6, $sp, 60 # 4-byte Folded Spill -+; LA32-NEXT: st.w $s7, $sp, 56 # 4-byte Folded Spill -+; LA32-NEXT: st.w $s8, $sp, 52 # 4-byte Folded Spill - ; LA32-NEXT: .cfi_offset 1, -4 - ; LA32-NEXT: .cfi_offset 22, -8 --; LA32-NEXT: move $fp, $a2 --; LA32-NEXT: st.w $zero, $sp, 52 --; LA32-NEXT: ld.w $a2, $a1, 12 --; LA32-NEXT: st.w $a2, $sp, 12 --; LA32-NEXT: ld.w $a2, $a1, 8 --; LA32-NEXT: st.w $a2, $sp, 8 --; LA32-NEXT: ld.w $a2, $a1, 4 --; LA32-NEXT: st.w $a2, $sp, 4 --; LA32-NEXT: ld.w $a1, $a1, 0 --; LA32-NEXT: st.w $a1, $sp, 0 --; LA32-NEXT: ld.w $a1, $a0, 12 --; LA32-NEXT: st.w $a1, $sp, 28 --; LA32-NEXT: ld.w $a1, $a0, 8 --; LA32-NEXT: st.w $a1, $sp, 24 --; LA32-NEXT: ld.w $a1, $a0, 4 --; LA32-NEXT: st.w $a1, $sp, 20 --; LA32-NEXT: ld.w $a0, $a0, 0 --; LA32-NEXT: st.w $a0, $sp, 16 --; LA32-NEXT: addi.w $a0, $sp, 32 --; LA32-NEXT: addi.w $a1, $sp, 16 --; LA32-NEXT: addi.w $a2, $sp, 0 --; LA32-NEXT: addi.w $a3, $sp, 52 --; LA32-NEXT: bl %plt(__muloti4) --; LA32-NEXT: ld.w $a0, $sp, 44 --; LA32-NEXT: st.w $a0, $fp, 12 --; LA32-NEXT: ld.w $a0, $sp, 40 --; LA32-NEXT: st.w $a0, $fp, 8 --; LA32-NEXT: ld.w $a0, $sp, 36 --; LA32-NEXT: st.w $a0, $fp, 4 --; LA32-NEXT: ld.w $a0, $sp, 32 --; LA32-NEXT: st.w $a0, $fp, 0 --; LA32-NEXT: ld.w $a0, $sp, 52 -+; LA32-NEXT: .cfi_offset 23, -12 -+; LA32-NEXT: .cfi_offset 24, -16 -+; LA32-NEXT: .cfi_offset 25, -20 -+; LA32-NEXT: .cfi_offset 26, -24 -+; LA32-NEXT: .cfi_offset 27, -28 -+; LA32-NEXT: .cfi_offset 28, -32 -+; LA32-NEXT: .cfi_offset 29, -36 -+; LA32-NEXT: .cfi_offset 30, -40 -+; LA32-NEXT: .cfi_offset 31, -44 -+; LA32-NEXT: st.w $a2, $sp, 12 # 4-byte Folded Spill -+; LA32-NEXT: ld.w $a6, $a1, 0 -+; LA32-NEXT: ld.w $a7, $a0, 0 -+; LA32-NEXT: mulh.wu $a3, $a7, $a6 -+; LA32-NEXT: ld.w $a5, $a0, 4 -+; LA32-NEXT: mul.w $a4, $a5, $a6 -+; LA32-NEXT: add.w $a3, $a4, $a3 -+; LA32-NEXT: sltu $a4, $a3, $a4 -+; LA32-NEXT: mulh.wu $t0, $a5, $a6 -+; LA32-NEXT: add.w $a4, $t0, $a4 -+; LA32-NEXT: ld.w $t0, $a1, 4 -+; LA32-NEXT: mul.w $t1, $a7, $t0 -+; LA32-NEXT: add.w $a3, $t1, $a3 -+; LA32-NEXT: st.w $a3, $sp, 44 # 4-byte Folded Spill -+; LA32-NEXT: sltu $t1, $a3, $t1 -+; LA32-NEXT: mulh.wu $t2, $a7, $t0 -+; LA32-NEXT: add.w $t1, $t2, $t1 -+; LA32-NEXT: ld.w $t4, $a0, 12 -+; LA32-NEXT: ld.w $t2, $a0, 8 -+; LA32-NEXT: ld.w $t3, $a1, 8 -+; LA32-NEXT: mulh.wu $a0, $t2, $t3 -+; LA32-NEXT: mul.w $t5, $t4, $t3 -+; LA32-NEXT: add.w $a0, $t5, $a0 -+; LA32-NEXT: sltu $t5, $a0, $t5 -+; LA32-NEXT: mulh.wu $t6, $t4, $t3 -+; LA32-NEXT: add.w $t5, $t6, $t5 -+; LA32-NEXT: ld.w $t7, $a1, 12 -+; LA32-NEXT: mul.w $a1, $t2, $t7 -+; LA32-NEXT: add.w $a0, $a1, $a0 -+; LA32-NEXT: st.w $a0, $sp, 48 # 4-byte Folded Spill -+; LA32-NEXT: sltu $a1, $a0, $a1 -+; LA32-NEXT: mulh.wu $t6, $t2, $t7 -+; LA32-NEXT: add.w $t6, $t6, $a1 -+; LA32-NEXT: srai.w $s7, $t4, 31 -+; LA32-NEXT: mul.w $a1, $s7, $t7 -+; LA32-NEXT: mulh.wu $t8, $s7, $t3 -+; LA32-NEXT: add.w $t8, $t8, $a1 -+; LA32-NEXT: mulh.wu $fp, $a6, $s7 -+; LA32-NEXT: mul.w $s6, $t0, $s7 -+; LA32-NEXT: add.w $s8, $s6, $fp -+; LA32-NEXT: mul.w $a1, $a6, $s7 -+; LA32-NEXT: add.w $ra, $a1, $s8 -+; LA32-NEXT: sltu $s0, $ra, $a1 -+; LA32-NEXT: add.w $a0, $fp, $s0 -+; LA32-NEXT: add.w $a3, $a4, $t1 -+; LA32-NEXT: st.w $a3, $sp, 20 # 4-byte Folded Spill -+; LA32-NEXT: sltu $a4, $a3, $a4 -+; LA32-NEXT: mulh.wu $t1, $a5, $t0 -+; LA32-NEXT: add.w $a3, $t1, $a4 -+; LA32-NEXT: st.w $a3, $sp, 28 # 4-byte Folded Spill -+; LA32-NEXT: srai.w $s4, $t7, 31 -+; LA32-NEXT: mul.w $fp, $a7, $s4 -+; LA32-NEXT: mulh.wu $a4, $a7, $s4 -+; LA32-NEXT: add.w $s1, $a4, $fp -+; LA32-NEXT: sltu $s0, $s1, $fp -+; LA32-NEXT: add.w $s5, $a4, $s0 -+; LA32-NEXT: mul.w $a4, $s7, $t3 -+; LA32-NEXT: add.w $t8, $t8, $a4 -+; LA32-NEXT: add.w $s0, $ra, $t8 -+; LA32-NEXT: add.w $a3, $a1, $a4 -+; LA32-NEXT: st.w $a3, $sp, 32 # 4-byte Folded Spill -+; LA32-NEXT: sltu $a4, $a3, $a1 -+; LA32-NEXT: add.w $a3, $s0, $a4 -+; LA32-NEXT: st.w $a3, $sp, 24 # 4-byte Folded Spill -+; LA32-NEXT: add.w $s3, $t5, $t6 -+; LA32-NEXT: sltu $a4, $s3, $t5 -+; LA32-NEXT: mulh.wu $t5, $t4, $t7 -+; LA32-NEXT: add.w $a3, $t5, $a4 -+; LA32-NEXT: st.w $a3, $sp, 16 # 4-byte Folded Spill -+; LA32-NEXT: mul.w $a4, $a7, $a6 -+; LA32-NEXT: st.w $a4, $a2, 0 -+; LA32-NEXT: sltu $a4, $s8, $s6 -+; LA32-NEXT: mulh.wu $t5, $t0, $s7 -+; LA32-NEXT: add.w $a4, $t5, $a4 -+; LA32-NEXT: add.w $t1, $a4, $a0 -+; LA32-NEXT: sltu $a4, $t1, $a4 -+; LA32-NEXT: add.w $s2, $t5, $a4 -+; LA32-NEXT: mulh.wu $a4, $a7, $t3 -+; LA32-NEXT: mul.w $t5, $a5, $t3 -+; LA32-NEXT: add.w $a4, $t5, $a4 -+; LA32-NEXT: sltu $t5, $a4, $t5 -+; LA32-NEXT: mulh.wu $t6, $a5, $t3 -+; LA32-NEXT: add.w $a3, $t6, $t5 -+; LA32-NEXT: mul.w $t6, $a7, $t7 -+; LA32-NEXT: add.w $t5, $t6, $a4 -+; LA32-NEXT: sltu $a4, $t5, $t6 -+; LA32-NEXT: mulh.wu $t6, $a7, $t7 -+; LA32-NEXT: add.w $a4, $t6, $a4 -+; LA32-NEXT: mulh.wu $t6, $t2, $a6 -+; LA32-NEXT: mul.w $s7, $t4, $a6 -+; LA32-NEXT: add.w $t6, $s7, $t6 -+; LA32-NEXT: sltu $s7, $t6, $s7 -+; LA32-NEXT: mulh.wu $s8, $t4, $a6 -+; LA32-NEXT: add.w $a0, $s8, $s7 -+; LA32-NEXT: mul.w $s7, $t2, $t0 -+; LA32-NEXT: add.w $t6, $s7, $t6 -+; LA32-NEXT: sltu $s7, $t6, $s7 -+; LA32-NEXT: mulh.wu $s8, $t2, $t0 -+; LA32-NEXT: add.w $a2, $s8, $s7 -+; LA32-NEXT: mul.w $s8, $a5, $s4 -+; LA32-NEXT: add.w $s7, $s1, $s8 -+; LA32-NEXT: add.w $s1, $s7, $ra -+; LA32-NEXT: add.w $a1, $fp, $a1 -+; LA32-NEXT: st.w $a1, $sp, 40 # 4-byte Folded Spill -+; LA32-NEXT: sltu $ra, $a1, $fp -+; LA32-NEXT: add.w $a1, $s1, $ra -+; LA32-NEXT: st.w $a1, $sp, 36 # 4-byte Folded Spill -+; LA32-NEXT: xor $s0, $a1, $s7 -+; LA32-NEXT: sltui $s0, $s0, 1 -+; LA32-NEXT: sltu $a1, $a1, $s7 -+; LA32-NEXT: masknez $s1, $a1, $s0 -+; LA32-NEXT: maskeqz $s0, $ra, $s0 -+; LA32-NEXT: add.w $t1, $s6, $t1 -+; LA32-NEXT: sltu $s6, $t1, $s6 -+; LA32-NEXT: add.w $s2, $s2, $s6 -+; LA32-NEXT: add.w $a2, $a0, $a2 -+; LA32-NEXT: sltu $a0, $a2, $a0 -+; LA32-NEXT: mulh.wu $s6, $t4, $t0 -+; LA32-NEXT: add.w $t8, $s6, $a0 -+; LA32-NEXT: add.w $a4, $a3, $a4 -+; LA32-NEXT: sltu $a3, $a4, $a3 -+; LA32-NEXT: mulh.wu $s6, $a5, $t7 -+; LA32-NEXT: add.w $a3, $s6, $a3 -+; LA32-NEXT: mul.w $s6, $t4, $t7 -+; LA32-NEXT: mul.w $t7, $a5, $t7 -+; LA32-NEXT: mul.w $ra, $t4, $t0 -+; LA32-NEXT: mul.w $t0, $a5, $t0 -+; LA32-NEXT: mul.w $t4, $t4, $s4 -+; LA32-NEXT: mul.w $a7, $a7, $t3 -+; LA32-NEXT: mul.w $a6, $t2, $a6 -+; LA32-NEXT: mul.w $t3, $t2, $t3 -+; LA32-NEXT: mul.w $a0, $t2, $s4 -+; LA32-NEXT: mulh.wu $t2, $t2, $s4 -+; LA32-NEXT: mulh.wu $a5, $s4, $a5 -+; LA32-NEXT: sltu $s4, $s7, $s8 -+; LA32-NEXT: add.w $s4, $a5, $s4 -+; LA32-NEXT: add.w $s4, $s5, $s4 -+; LA32-NEXT: sltu $s5, $s4, $s5 -+; LA32-NEXT: add.w $s5, $a5, $s5 -+; LA32-NEXT: ld.w $a1, $sp, 20 # 4-byte Folded Reload -+; LA32-NEXT: add.w $a1, $t0, $a1 -+; LA32-NEXT: sltu $a5, $a1, $t0 -+; LA32-NEXT: ld.w $t0, $sp, 28 # 4-byte Folded Reload -+; LA32-NEXT: add.w $t0, $t0, $a5 -+; LA32-NEXT: or $s0, $s0, $s1 -+; LA32-NEXT: add.w $a4, $t7, $a4 -+; LA32-NEXT: sltu $a5, $a4, $t7 -+; LA32-NEXT: add.w $t7, $a3, $a5 -+; LA32-NEXT: add.w $s1, $ra, $a2 -+; LA32-NEXT: sltu $a2, $s1, $ra -+; LA32-NEXT: add.w $t8, $t8, $a2 -+; LA32-NEXT: add.w $a5, $s6, $s3 -+; LA32-NEXT: sltu $a2, $a5, $s6 -+; LA32-NEXT: ld.w $a3, $sp, 16 # 4-byte Folded Reload -+; LA32-NEXT: add.w $a2, $a3, $a2 -+; LA32-NEXT: ld.w $s6, $sp, 12 # 4-byte Folded Reload -+; LA32-NEXT: ld.w $a3, $sp, 44 # 4-byte Folded Reload -+; LA32-NEXT: st.w $a3, $s6, 4 -+; LA32-NEXT: ld.w $a3, $sp, 24 # 4-byte Folded Reload -+; LA32-NEXT: add.w $a3, $s2, $a3 -+; LA32-NEXT: ld.w $s2, $sp, 32 # 4-byte Folded Reload -+; LA32-NEXT: add.w $s2, $t1, $s2 -+; LA32-NEXT: sltu $t1, $s2, $t1 -+; LA32-NEXT: add.w $a3, $a3, $t1 -+; LA32-NEXT: add.w $t1, $s8, $s4 -+; LA32-NEXT: sltu $s3, $t1, $s8 -+; LA32-NEXT: add.w $s3, $s5, $s3 -+; LA32-NEXT: add.w $t2, $t2, $a0 -+; LA32-NEXT: add.w $t2, $t2, $t4 -+; LA32-NEXT: add.w $t2, $t2, $s7 -+; LA32-NEXT: add.w $t4, $a0, $fp -+; LA32-NEXT: sltu $a0, $t4, $a0 -+; LA32-NEXT: add.w $a0, $t2, $a0 -+; LA32-NEXT: add.w $a0, $s3, $a0 -+; LA32-NEXT: add.w $t2, $t1, $t4 -+; LA32-NEXT: sltu $t1, $t2, $t1 -+; LA32-NEXT: add.w $a0, $a0, $t1 -+; LA32-NEXT: add.w $a0, $a0, $a3 -+; LA32-NEXT: add.w $t1, $t2, $s2 -+; LA32-NEXT: sltu $a3, $t1, $t2 -+; LA32-NEXT: add.w $a0, $a0, $a3 -+; LA32-NEXT: add.w $a3, $t6, $t0 -+; LA32-NEXT: add.w $a1, $a6, $a1 -+; LA32-NEXT: sltu $a6, $a1, $a6 -+; LA32-NEXT: add.w $t0, $a3, $a6 -+; LA32-NEXT: add.w $a1, $a7, $a1 -+; LA32-NEXT: sltu $a7, $a1, $a7 -+; LA32-NEXT: add.w $a3, $t5, $t0 -+; LA32-NEXT: add.w $a3, $a3, $a7 -+; LA32-NEXT: sltu $t2, $a3, $t5 -+; LA32-NEXT: xor $t4, $a3, $t5 -+; LA32-NEXT: sltui $t4, $t4, 1 -+; LA32-NEXT: masknez $t2, $t2, $t4 -+; LA32-NEXT: maskeqz $a7, $a7, $t4 -+; LA32-NEXT: st.w $a1, $s6, 8 -+; LA32-NEXT: or $a1, $a7, $t2 -+; LA32-NEXT: sltu $a7, $t0, $t6 -+; LA32-NEXT: xor $t0, $t0, $t6 -+; LA32-NEXT: sltui $t0, $t0, 1 -+; LA32-NEXT: masknez $a7, $a7, $t0 -+; LA32-NEXT: maskeqz $a6, $a6, $t0 -+; LA32-NEXT: or $a6, $a6, $a7 -+; LA32-NEXT: add.w $a6, $s1, $a6 -+; LA32-NEXT: sltu $a7, $a6, $s1 -+; LA32-NEXT: add.w $a7, $t8, $a7 -+; LA32-NEXT: add.w $a1, $a4, $a1 -+; LA32-NEXT: sltu $a4, $a1, $a4 -+; LA32-NEXT: add.w $a4, $t7, $a4 -+; LA32-NEXT: add.w $t0, $t1, $s0 -+; LA32-NEXT: sltu $t1, $t0, $t1 -+; LA32-NEXT: add.w $a0, $a0, $t1 -+; LA32-NEXT: st.w $a3, $s6, 12 -+; LA32-NEXT: add.w $a1, $a6, $a1 -+; LA32-NEXT: sltu $a6, $a1, $a6 -+; LA32-NEXT: add.w $a4, $a7, $a4 -+; LA32-NEXT: add.w $a4, $a4, $a6 -+; LA32-NEXT: sltu $t1, $a4, $a7 -+; LA32-NEXT: xor $a7, $a4, $a7 -+; LA32-NEXT: sltui $a7, $a7, 1 -+; LA32-NEXT: masknez $t1, $t1, $a7 -+; LA32-NEXT: maskeqz $a6, $a6, $a7 -+; LA32-NEXT: or $a6, $a6, $t1 -+; LA32-NEXT: add.w $a6, $a5, $a6 -+; LA32-NEXT: sltu $a5, $a6, $a5 -+; LA32-NEXT: add.w $a2, $a2, $a5 -+; LA32-NEXT: ld.w $t1, $sp, 48 # 4-byte Folded Reload -+; LA32-NEXT: add.w $a4, $t1, $a4 -+; LA32-NEXT: add.w $a1, $t3, $a1 -+; LA32-NEXT: sltu $a5, $a1, $t3 -+; LA32-NEXT: add.w $a4, $a4, $a5 -+; LA32-NEXT: sltu $a7, $a4, $t1 -+; LA32-NEXT: xor $t1, $a4, $t1 -+; LA32-NEXT: sltui $t1, $t1, 1 -+; LA32-NEXT: masknez $a7, $a7, $t1 -+; LA32-NEXT: maskeqz $a5, $a5, $t1 -+; LA32-NEXT: or $a5, $a5, $a7 -+; LA32-NEXT: add.w $a5, $a6, $a5 -+; LA32-NEXT: sltu $a6, $a5, $a6 -+; LA32-NEXT: add.w $a2, $a2, $a6 -+; LA32-NEXT: add.w $a0, $a2, $a0 -+; LA32-NEXT: add.w $a2, $a5, $t0 -+; LA32-NEXT: sltu $a5, $a2, $a5 -+; LA32-NEXT: add.w $a0, $a0, $a5 -+; LA32-NEXT: ld.w $a5, $sp, 40 # 4-byte Folded Reload -+; LA32-NEXT: add.w $a5, $a1, $a5 -+; LA32-NEXT: sltu $a1, $a5, $a1 -+; LA32-NEXT: ld.w $a6, $sp, 36 # 4-byte Folded Reload -+; LA32-NEXT: add.w $a6, $a4, $a6 -+; LA32-NEXT: add.w $a6, $a6, $a1 -+; LA32-NEXT: sltu $a7, $a6, $a4 -+; LA32-NEXT: xor $a4, $a6, $a4 -+; LA32-NEXT: sltui $a4, $a4, 1 -+; LA32-NEXT: masknez $a7, $a7, $a4 -+; LA32-NEXT: maskeqz $a1, $a1, $a4 -+; LA32-NEXT: or $a1, $a1, $a7 -+; LA32-NEXT: add.w $a1, $a2, $a1 -+; LA32-NEXT: sltu $a2, $a1, $a2 -+; LA32-NEXT: add.w $a0, $a0, $a2 -+; LA32-NEXT: srai.w $a2, $a3, 31 -+; LA32-NEXT: xor $a3, $a6, $a2 -+; LA32-NEXT: xor $a0, $a0, $a2 -+; LA32-NEXT: or $a0, $a3, $a0 -+; LA32-NEXT: xor $a3, $a5, $a2 -+; LA32-NEXT: xor $a1, $a1, $a2 -+; LA32-NEXT: or $a1, $a3, $a1 -+; LA32-NEXT: or $a0, $a1, $a0 - ; LA32-NEXT: sltu $a0, $zero, $a0 --; LA32-NEXT: ld.w $fp, $sp, 56 # 4-byte Folded Reload --; LA32-NEXT: ld.w $ra, $sp, 60 # 4-byte Folded Reload --; LA32-NEXT: addi.w $sp, $sp, 64 -+; LA32-NEXT: ld.w $s8, $sp, 52 # 4-byte Folded Reload -+; LA32-NEXT: ld.w $s7, $sp, 56 # 4-byte Folded Reload -+; LA32-NEXT: ld.w $s6, $sp, 60 # 4-byte Folded Reload -+; LA32-NEXT: ld.w $s5, $sp, 64 # 4-byte Folded Reload -+; LA32-NEXT: ld.w $s4, $sp, 68 # 4-byte Folded Reload -+; LA32-NEXT: ld.w $s3, $sp, 72 # 4-byte Folded Reload -+; LA32-NEXT: ld.w $s2, $sp, 76 # 4-byte Folded Reload -+; LA32-NEXT: ld.w $s1, $sp, 80 # 4-byte Folded Reload -+; LA32-NEXT: ld.w $s0, $sp, 84 # 4-byte Folded Reload -+; LA32-NEXT: ld.w $fp, $sp, 88 # 4-byte Folded Reload -+; LA32-NEXT: ld.w $ra, $sp, 92 # 4-byte Folded Reload -+; LA32-NEXT: addi.w $sp, $sp, 96 - ; LA32-NEXT: ret - ; - ; LA64-LABEL: smuloi128: - ; LA64: # %bb.0: --; LA64-NEXT: addi.d $sp, $sp, -32 --; LA64-NEXT: .cfi_def_cfa_offset 32 --; LA64-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill --; LA64-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill --; LA64-NEXT: .cfi_offset 1, -8 --; LA64-NEXT: .cfi_offset 22, -16 --; LA64-NEXT: move $fp, $a4 --; LA64-NEXT: st.d $zero, $sp, 8 --; LA64-NEXT: addi.d $a4, $sp, 8 --; LA64-NEXT: bl %plt(__muloti4) --; LA64-NEXT: st.d $a1, $fp, 8 --; LA64-NEXT: st.d $a0, $fp, 0 --; LA64-NEXT: ld.d $a0, $sp, 8 -+; LA64-NEXT: srai.d $a5, $a1, 63 -+; LA64-NEXT: mul.d $a6, $a2, $a5 -+; LA64-NEXT: mulh.du $a7, $a2, $a5 -+; LA64-NEXT: add.d $a7, $a7, $a6 -+; LA64-NEXT: mul.d $a5, $a3, $a5 -+; LA64-NEXT: add.d $a5, $a7, $a5 -+; LA64-NEXT: srai.d $a7, $a3, 63 -+; LA64-NEXT: mul.d $t0, $a7, $a1 -+; LA64-NEXT: mulh.du $t1, $a7, $a0 -+; LA64-NEXT: add.d $t0, $t1, $t0 -+; LA64-NEXT: mul.d $a7, $a7, $a0 -+; LA64-NEXT: add.d $t0, $t0, $a7 -+; LA64-NEXT: add.d $a5, $t0, $a5 -+; LA64-NEXT: mulh.du $t0, $a0, $a2 -+; LA64-NEXT: mul.d $t1, $a1, $a2 -+; LA64-NEXT: add.d $t0, $t1, $t0 -+; LA64-NEXT: sltu $t1, $t0, $t1 -+; LA64-NEXT: mulh.du $t2, $a1, $a2 -+; LA64-NEXT: add.d $t1, $t2, $t1 -+; LA64-NEXT: mul.d $t2, $a0, $a3 -+; LA64-NEXT: add.d $t0, $t2, $t0 -+; LA64-NEXT: sltu $t2, $t0, $t2 -+; LA64-NEXT: mulh.du $t3, $a0, $a3 -+; LA64-NEXT: add.d $t2, $t3, $t2 -+; LA64-NEXT: add.d $a6, $a7, $a6 -+; LA64-NEXT: sltu $a7, $a6, $a7 -+; LA64-NEXT: add.d $a5, $a5, $a7 -+; LA64-NEXT: mul.d $a0, $a0, $a2 -+; LA64-NEXT: mul.d $a2, $a1, $a3 -+; LA64-NEXT: mulh.du $a1, $a1, $a3 -+; LA64-NEXT: add.d $a3, $t1, $t2 -+; LA64-NEXT: sltu $a7, $a3, $t1 -+; LA64-NEXT: add.d $a1, $a1, $a7 -+; LA64-NEXT: st.d $a0, $a4, 0 -+; LA64-NEXT: add.d $a0, $a2, $a3 -+; LA64-NEXT: sltu $a2, $a0, $a2 -+; LA64-NEXT: add.d $a1, $a1, $a2 -+; LA64-NEXT: st.d $t0, $a4, 8 -+; LA64-NEXT: add.d $a1, $a1, $a5 -+; LA64-NEXT: add.d $a2, $a0, $a6 -+; LA64-NEXT: sltu $a0, $a2, $a0 -+; LA64-NEXT: add.d $a0, $a1, $a0 -+; LA64-NEXT: srai.d $a1, $t0, 63 -+; LA64-NEXT: xor $a0, $a0, $a1 -+; LA64-NEXT: xor $a1, $a2, $a1 -+; LA64-NEXT: or $a0, $a1, $a0 - ; LA64-NEXT: sltu $a0, $zero, $a0 --; LA64-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload --; LA64-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload --; LA64-NEXT: addi.d $sp, $sp, 32 - ; LA64-NEXT: ret - %t = call {i128, i1} @llvm.smul.with.overflow.i128(i128 %v1, i128 %v2) - %val = extractvalue {i128, i1} %t, 0 --- -2.20.1 - - -From 01ced6193e2abfbd50fbd9d40066cf27f9f9067b Mon Sep 17 00:00:00 2001 -From: wanglei -Date: Wed, 29 Nov 2023 15:21:21 +0800 -Subject: [PATCH 12/14] [LoongArch] Fix pattern for FNMSUB_{S/D} instructions - (#73742) - -``` -when a=c=-0.0, b=0.0: --(a * b + (-c)) = -0.0 --a * b + c = 0.0 -(fneg (fma a, b (-c))) != (fma (fneg a), b ,c) -``` - -See https://reviews.llvm.org/D90901 for a similar discussion on X86. - -(cherry picked from commit 5e7e0d603204ede803323a825318e365a87f73e9) ---- - .../LoongArch/LoongArchFloat32InstrInfo.td | 8 +- - .../LoongArch/LoongArchFloat64InstrInfo.td | 6 +- - llvm/test/CodeGen/LoongArch/double-fma.ll | 259 ++++++++++++++++-- - llvm/test/CodeGen/LoongArch/float-fma.ll | 259 ++++++++++++++++-- - 4 files changed, 483 insertions(+), 49 deletions(-) - -diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td -index 826db54febd3..65120c083f49 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td -+++ b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td -@@ -294,8 +294,12 @@ def : Pat<(fneg (fma FPR32:$fj, FPR32:$fk, FPR32:$fa)), - def : Pat<(fma_nsz (fneg FPR32:$fj), FPR32:$fk, (fneg FPR32:$fa)), - (FNMADD_S FPR32:$fj, FPR32:$fk, FPR32:$fa)>; - --// fnmsub.s: -fj * fk + fa --def : Pat<(fma (fneg FPR32:$fj), FPR32:$fk, FPR32:$fa), -+// fnmsub.s: -(fj * fk - fa) -+def : Pat<(fneg (fma FPR32:$fj, FPR32:$fk, (fneg FPR32:$fa))), -+ (FNMSUB_S FPR32:$fj, FPR32:$fk, FPR32:$fa)>; -+ -+// fnmsub.s: -fj * fk + fa (the nsz flag on the FMA) -+def : Pat<(fma_nsz (fneg FPR32:$fj), FPR32:$fk, FPR32:$fa), - (FNMSUB_S FPR32:$fj, FPR32:$fk, FPR32:$fa)>; - } // Predicates = [HasBasicF] - -diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td -index 5118474725b6..437c1e4d7be2 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td -+++ b/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td -@@ -256,7 +256,11 @@ def : Pat<(fma_nsz (fneg FPR64:$fj), FPR64:$fk, (fneg FPR64:$fa)), - (FNMADD_D FPR64:$fj, FPR64:$fk, FPR64:$fa)>; - - // fnmsub.d: -(fj * fk - fa) --def : Pat<(fma (fneg FPR64:$fj), FPR64:$fk, FPR64:$fa), -+def : Pat<(fneg (fma FPR64:$fj, FPR64:$fk, (fneg FPR64:$fa))), -+ (FNMSUB_D FPR64:$fj, FPR64:$fk, FPR64:$fa)>; -+ -+// fnmsub.d: -fj * fk + fa (the nsz flag on the FMA) -+def : Pat<(fma_nsz (fneg FPR64:$fj), FPR64:$fk, FPR64:$fa), - (FNMSUB_D FPR64:$fj, FPR64:$fk, FPR64:$fa)>; - } // Predicates = [HasBasicD] - -diff --git a/llvm/test/CodeGen/LoongArch/double-fma.ll b/llvm/test/CodeGen/LoongArch/double-fma.ll -index 6dd628479433..58d20c62a668 100644 ---- a/llvm/test/CodeGen/LoongArch/double-fma.ll -+++ b/llvm/test/CodeGen/LoongArch/double-fma.ll -@@ -236,13 +236,15 @@ define double @fnmsub_d(double %a, double %b, double %c) nounwind { - ; LA32-CONTRACT-ON-LABEL: fnmsub_d: - ; LA32-CONTRACT-ON: # %bb.0: - ; LA32-CONTRACT-ON-NEXT: fmul.d $fa0, $fa0, $fa1 --; LA32-CONTRACT-ON-NEXT: fsub.d $fa0, $fa2, $fa0 -+; LA32-CONTRACT-ON-NEXT: fsub.d $fa0, $fa0, $fa2 -+; LA32-CONTRACT-ON-NEXT: fneg.d $fa0, $fa0 - ; LA32-CONTRACT-ON-NEXT: ret - ; - ; LA32-CONTRACT-OFF-LABEL: fnmsub_d: - ; LA32-CONTRACT-OFF: # %bb.0: - ; LA32-CONTRACT-OFF-NEXT: fmul.d $fa0, $fa0, $fa1 --; LA32-CONTRACT-OFF-NEXT: fsub.d $fa0, $fa2, $fa0 -+; LA32-CONTRACT-OFF-NEXT: fsub.d $fa0, $fa0, $fa2 -+; LA32-CONTRACT-OFF-NEXT: fneg.d $fa0, $fa0 - ; LA32-CONTRACT-OFF-NEXT: ret - ; - ; LA64-CONTRACT-FAST-LABEL: fnmsub_d: -@@ -253,12 +255,98 @@ define double @fnmsub_d(double %a, double %b, double %c) nounwind { - ; LA64-CONTRACT-ON-LABEL: fnmsub_d: - ; LA64-CONTRACT-ON: # %bb.0: - ; LA64-CONTRACT-ON-NEXT: fmul.d $fa0, $fa0, $fa1 --; LA64-CONTRACT-ON-NEXT: fsub.d $fa0, $fa2, $fa0 -+; LA64-CONTRACT-ON-NEXT: fsub.d $fa0, $fa0, $fa2 -+; LA64-CONTRACT-ON-NEXT: fneg.d $fa0, $fa0 - ; LA64-CONTRACT-ON-NEXT: ret - ; - ; LA64-CONTRACT-OFF-LABEL: fnmsub_d: - ; LA64-CONTRACT-OFF: # %bb.0: - ; LA64-CONTRACT-OFF-NEXT: fmul.d $fa0, $fa0, $fa1 -+; LA64-CONTRACT-OFF-NEXT: fsub.d $fa0, $fa0, $fa2 -+; LA64-CONTRACT-OFF-NEXT: fneg.d $fa0, $fa0 -+; LA64-CONTRACT-OFF-NEXT: ret -+ %negc = fneg double %c -+ %mul = fmul double %a, %b -+ %add = fadd double %mul, %negc -+ %neg = fneg double %add -+ ret double %neg -+} -+ -+define double @fnmsub_d_nsz(double %a, double %b, double %c) nounwind { -+; LA32-CONTRACT-FAST-LABEL: fnmsub_d_nsz: -+; LA32-CONTRACT-FAST: # %bb.0: -+; LA32-CONTRACT-FAST-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 -+; LA32-CONTRACT-FAST-NEXT: ret -+; -+; LA32-CONTRACT-ON-LABEL: fnmsub_d_nsz: -+; LA32-CONTRACT-ON: # %bb.0: -+; LA32-CONTRACT-ON-NEXT: fmul.d $fa0, $fa0, $fa1 -+; LA32-CONTRACT-ON-NEXT: fsub.d $fa0, $fa2, $fa0 -+; LA32-CONTRACT-ON-NEXT: ret -+; -+; LA32-CONTRACT-OFF-LABEL: fnmsub_d_nsz: -+; LA32-CONTRACT-OFF: # %bb.0: -+; LA32-CONTRACT-OFF-NEXT: fmul.d $fa0, $fa0, $fa1 -+; LA32-CONTRACT-OFF-NEXT: fsub.d $fa0, $fa2, $fa0 -+; LA32-CONTRACT-OFF-NEXT: ret -+; -+; LA64-CONTRACT-FAST-LABEL: fnmsub_d_nsz: -+; LA64-CONTRACT-FAST: # %bb.0: -+; LA64-CONTRACT-FAST-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 -+; LA64-CONTRACT-FAST-NEXT: ret -+; -+; LA64-CONTRACT-ON-LABEL: fnmsub_d_nsz: -+; LA64-CONTRACT-ON: # %bb.0: -+; LA64-CONTRACT-ON-NEXT: fmul.d $fa0, $fa0, $fa1 -+; LA64-CONTRACT-ON-NEXT: fsub.d $fa0, $fa2, $fa0 -+; LA64-CONTRACT-ON-NEXT: ret -+; -+; LA64-CONTRACT-OFF-LABEL: fnmsub_d_nsz: -+; LA64-CONTRACT-OFF: # %bb.0: -+; LA64-CONTRACT-OFF-NEXT: fmul.d $fa0, $fa0, $fa1 -+; LA64-CONTRACT-OFF-NEXT: fsub.d $fa0, $fa2, $fa0 -+; LA64-CONTRACT-OFF-NEXT: ret -+ %nega = fneg nsz double %a -+ %mul = fmul nsz double %nega, %b -+ %add = fadd nsz double %mul, %c -+ ret double %add -+} -+ -+;; Check that fnmsub.d is not emitted. -+define double @not_fnmsub_d(double %a, double %b, double %c) nounwind { -+; LA32-CONTRACT-FAST-LABEL: not_fnmsub_d: -+; LA32-CONTRACT-FAST: # %bb.0: -+; LA32-CONTRACT-FAST-NEXT: fneg.d $fa0, $fa0 -+; LA32-CONTRACT-FAST-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 -+; LA32-CONTRACT-FAST-NEXT: ret -+; -+; LA32-CONTRACT-ON-LABEL: not_fnmsub_d: -+; LA32-CONTRACT-ON: # %bb.0: -+; LA32-CONTRACT-ON-NEXT: fmul.d $fa0, $fa0, $fa1 -+; LA32-CONTRACT-ON-NEXT: fsub.d $fa0, $fa2, $fa0 -+; LA32-CONTRACT-ON-NEXT: ret -+; -+; LA32-CONTRACT-OFF-LABEL: not_fnmsub_d: -+; LA32-CONTRACT-OFF: # %bb.0: -+; LA32-CONTRACT-OFF-NEXT: fmul.d $fa0, $fa0, $fa1 -+; LA32-CONTRACT-OFF-NEXT: fsub.d $fa0, $fa2, $fa0 -+; LA32-CONTRACT-OFF-NEXT: ret -+; -+; LA64-CONTRACT-FAST-LABEL: not_fnmsub_d: -+; LA64-CONTRACT-FAST: # %bb.0: -+; LA64-CONTRACT-FAST-NEXT: fneg.d $fa0, $fa0 -+; LA64-CONTRACT-FAST-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 -+; LA64-CONTRACT-FAST-NEXT: ret -+; -+; LA64-CONTRACT-ON-LABEL: not_fnmsub_d: -+; LA64-CONTRACT-ON: # %bb.0: -+; LA64-CONTRACT-ON-NEXT: fmul.d $fa0, $fa0, $fa1 -+; LA64-CONTRACT-ON-NEXT: fsub.d $fa0, $fa2, $fa0 -+; LA64-CONTRACT-ON-NEXT: ret -+; -+; LA64-CONTRACT-OFF-LABEL: not_fnmsub_d: -+; LA64-CONTRACT-OFF: # %bb.0: -+; LA64-CONTRACT-OFF-NEXT: fmul.d $fa0, $fa0, $fa1 - ; LA64-CONTRACT-OFF-NEXT: fsub.d $fa0, $fa2, $fa0 - ; LA64-CONTRACT-OFF-NEXT: ret - %nega = fneg double %a -@@ -483,6 +571,86 @@ define double @contract_fnmsub_d(double %a, double %b, double %c) nounwind { - ; LA64-CONTRACT-OFF-LABEL: contract_fnmsub_d: - ; LA64-CONTRACT-OFF: # %bb.0: - ; LA64-CONTRACT-OFF-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 -+; LA64-CONTRACT-OFF-NEXT: ret -+ %negc = fneg contract double %c -+ %mul = fmul contract double %a, %b -+ %add = fadd contract double %mul, %negc -+ %neg = fneg contract double %add -+ ret double %neg -+} -+ -+define double @contract_fnmsub_d_nsz(double %a, double %b, double %c) nounwind { -+; LA32-CONTRACT-FAST-LABEL: contract_fnmsub_d_nsz: -+; LA32-CONTRACT-FAST: # %bb.0: -+; LA32-CONTRACT-FAST-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 -+; LA32-CONTRACT-FAST-NEXT: ret -+; -+; LA32-CONTRACT-ON-LABEL: contract_fnmsub_d_nsz: -+; LA32-CONTRACT-ON: # %bb.0: -+; LA32-CONTRACT-ON-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 -+; LA32-CONTRACT-ON-NEXT: ret -+; -+; LA32-CONTRACT-OFF-LABEL: contract_fnmsub_d_nsz: -+; LA32-CONTRACT-OFF: # %bb.0: -+; LA32-CONTRACT-OFF-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 -+; LA32-CONTRACT-OFF-NEXT: ret -+; -+; LA64-CONTRACT-FAST-LABEL: contract_fnmsub_d_nsz: -+; LA64-CONTRACT-FAST: # %bb.0: -+; LA64-CONTRACT-FAST-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 -+; LA64-CONTRACT-FAST-NEXT: ret -+; -+; LA64-CONTRACT-ON-LABEL: contract_fnmsub_d_nsz: -+; LA64-CONTRACT-ON: # %bb.0: -+; LA64-CONTRACT-ON-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 -+; LA64-CONTRACT-ON-NEXT: ret -+; -+; LA64-CONTRACT-OFF-LABEL: contract_fnmsub_d_nsz: -+; LA64-CONTRACT-OFF: # %bb.0: -+; LA64-CONTRACT-OFF-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 -+; LA64-CONTRACT-OFF-NEXT: ret -+ %nega = fneg contract nsz double %a -+ %mul = fmul contract nsz double %nega, %b -+ %add = fadd contract nsz double %mul, %c -+ ret double %add -+} -+ -+;; Check that fnmsub.d is not emitted. -+define double @not_contract_fnmsub_d(double %a, double %b, double %c) nounwind { -+; LA32-CONTRACT-FAST-LABEL: not_contract_fnmsub_d: -+; LA32-CONTRACT-FAST: # %bb.0: -+; LA32-CONTRACT-FAST-NEXT: fneg.d $fa0, $fa0 -+; LA32-CONTRACT-FAST-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 -+; LA32-CONTRACT-FAST-NEXT: ret -+; -+; LA32-CONTRACT-ON-LABEL: not_contract_fnmsub_d: -+; LA32-CONTRACT-ON: # %bb.0: -+; LA32-CONTRACT-ON-NEXT: fneg.d $fa0, $fa0 -+; LA32-CONTRACT-ON-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 -+; LA32-CONTRACT-ON-NEXT: ret -+; -+; LA32-CONTRACT-OFF-LABEL: not_contract_fnmsub_d: -+; LA32-CONTRACT-OFF: # %bb.0: -+; LA32-CONTRACT-OFF-NEXT: fneg.d $fa0, $fa0 -+; LA32-CONTRACT-OFF-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 -+; LA32-CONTRACT-OFF-NEXT: ret -+; -+; LA64-CONTRACT-FAST-LABEL: not_contract_fnmsub_d: -+; LA64-CONTRACT-FAST: # %bb.0: -+; LA64-CONTRACT-FAST-NEXT: fneg.d $fa0, $fa0 -+; LA64-CONTRACT-FAST-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 -+; LA64-CONTRACT-FAST-NEXT: ret -+; -+; LA64-CONTRACT-ON-LABEL: not_contract_fnmsub_d: -+; LA64-CONTRACT-ON: # %bb.0: -+; LA64-CONTRACT-ON-NEXT: fneg.d $fa0, $fa0 -+; LA64-CONTRACT-ON-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 -+; LA64-CONTRACT-ON-NEXT: ret -+; -+; LA64-CONTRACT-OFF-LABEL: not_contract_fnmsub_d: -+; LA64-CONTRACT-OFF: # %bb.0: -+; LA64-CONTRACT-OFF-NEXT: fneg.d $fa0, $fa0 -+; LA64-CONTRACT-OFF-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 - ; LA64-CONTRACT-OFF-NEXT: ret - %nega = fneg contract double %a - %mul = fmul contract double %nega, %b -@@ -592,8 +760,8 @@ define double @fnmadd_d_intrinsics(double %a, double %b, double %c) nounwind { - ; LA64-CONTRACT-OFF-NEXT: fnmadd.d $fa0, $fa0, $fa1, $fa2 - ; LA64-CONTRACT-OFF-NEXT: ret - %fma = call double @llvm.fma.f64(double %a, double %b, double %c) -- %neg = fneg double %fma -- ret double %neg -+ %negfma = fneg double %fma -+ ret double %negfma - } - - define double @fnmadd_d_nsz_intrinsics(double %a, double %b, double %c) nounwind { -@@ -704,44 +872,87 @@ define double @fnmsub_d_intrinsics(double %a, double %b, double %c) nounwind { - ; LA64-CONTRACT-OFF-LABEL: fnmsub_d_intrinsics: - ; LA64-CONTRACT-OFF: # %bb.0: - ; LA64-CONTRACT-OFF-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 -+; LA64-CONTRACT-OFF-NEXT: ret -+ %negc = fneg double %c -+ %fma = call double @llvm.fma.f64(double %a, double %b, double %negc) -+ %negfma = fneg double %fma -+ ret double %negfma -+} -+ -+define double @fnmsub_d_nsz_intrinsics(double %a, double %b, double %c) nounwind { -+; LA32-CONTRACT-FAST-LABEL: fnmsub_d_nsz_intrinsics: -+; LA32-CONTRACT-FAST: # %bb.0: -+; LA32-CONTRACT-FAST-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 -+; LA32-CONTRACT-FAST-NEXT: ret -+; -+; LA32-CONTRACT-ON-LABEL: fnmsub_d_nsz_intrinsics: -+; LA32-CONTRACT-ON: # %bb.0: -+; LA32-CONTRACT-ON-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 -+; LA32-CONTRACT-ON-NEXT: ret -+; -+; LA32-CONTRACT-OFF-LABEL: fnmsub_d_nsz_intrinsics: -+; LA32-CONTRACT-OFF: # %bb.0: -+; LA32-CONTRACT-OFF-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 -+; LA32-CONTRACT-OFF-NEXT: ret -+; -+; LA64-CONTRACT-FAST-LABEL: fnmsub_d_nsz_intrinsics: -+; LA64-CONTRACT-FAST: # %bb.0: -+; LA64-CONTRACT-FAST-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 -+; LA64-CONTRACT-FAST-NEXT: ret -+; -+; LA64-CONTRACT-ON-LABEL: fnmsub_d_nsz_intrinsics: -+; LA64-CONTRACT-ON: # %bb.0: -+; LA64-CONTRACT-ON-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 -+; LA64-CONTRACT-ON-NEXT: ret -+; -+; LA64-CONTRACT-OFF-LABEL: fnmsub_d_nsz_intrinsics: -+; LA64-CONTRACT-OFF: # %bb.0: -+; LA64-CONTRACT-OFF-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 - ; LA64-CONTRACT-OFF-NEXT: ret - %nega = fneg double %a -- %fma = call double @llvm.fma.f64(double %nega, double %b, double %c) -+ %fma = call nsz double @llvm.fma.f64(double %nega, double %b, double %c) - ret double %fma - } - --define double @fnmsub_d_swap_intrinsics(double %a, double %b, double %c) nounwind { --; LA32-CONTRACT-FAST-LABEL: fnmsub_d_swap_intrinsics: -+;; Check that fnmsub.d is not emitted. -+define double @not_fnmsub_d_intrinsics(double %a, double %b, double %c) nounwind { -+; LA32-CONTRACT-FAST-LABEL: not_fnmsub_d_intrinsics: - ; LA32-CONTRACT-FAST: # %bb.0: --; LA32-CONTRACT-FAST-NEXT: fnmsub.d $fa0, $fa1, $fa0, $fa2 -+; LA32-CONTRACT-FAST-NEXT: fneg.d $fa0, $fa0 -+; LA32-CONTRACT-FAST-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 - ; LA32-CONTRACT-FAST-NEXT: ret - ; --; LA32-CONTRACT-ON-LABEL: fnmsub_d_swap_intrinsics: -+; LA32-CONTRACT-ON-LABEL: not_fnmsub_d_intrinsics: - ; LA32-CONTRACT-ON: # %bb.0: --; LA32-CONTRACT-ON-NEXT: fnmsub.d $fa0, $fa1, $fa0, $fa2 -+; LA32-CONTRACT-ON-NEXT: fneg.d $fa0, $fa0 -+; LA32-CONTRACT-ON-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 - ; LA32-CONTRACT-ON-NEXT: ret - ; --; LA32-CONTRACT-OFF-LABEL: fnmsub_d_swap_intrinsics: -+; LA32-CONTRACT-OFF-LABEL: not_fnmsub_d_intrinsics: - ; LA32-CONTRACT-OFF: # %bb.0: --; LA32-CONTRACT-OFF-NEXT: fnmsub.d $fa0, $fa1, $fa0, $fa2 -+; LA32-CONTRACT-OFF-NEXT: fneg.d $fa0, $fa0 -+; LA32-CONTRACT-OFF-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 - ; LA32-CONTRACT-OFF-NEXT: ret - ; --; LA64-CONTRACT-FAST-LABEL: fnmsub_d_swap_intrinsics: -+; LA64-CONTRACT-FAST-LABEL: not_fnmsub_d_intrinsics: - ; LA64-CONTRACT-FAST: # %bb.0: --; LA64-CONTRACT-FAST-NEXT: fnmsub.d $fa0, $fa1, $fa0, $fa2 -+; LA64-CONTRACT-FAST-NEXT: fneg.d $fa0, $fa0 -+; LA64-CONTRACT-FAST-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 - ; LA64-CONTRACT-FAST-NEXT: ret - ; --; LA64-CONTRACT-ON-LABEL: fnmsub_d_swap_intrinsics: -+; LA64-CONTRACT-ON-LABEL: not_fnmsub_d_intrinsics: - ; LA64-CONTRACT-ON: # %bb.0: --; LA64-CONTRACT-ON-NEXT: fnmsub.d $fa0, $fa1, $fa0, $fa2 -+; LA64-CONTRACT-ON-NEXT: fneg.d $fa0, $fa0 -+; LA64-CONTRACT-ON-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 - ; LA64-CONTRACT-ON-NEXT: ret - ; --; LA64-CONTRACT-OFF-LABEL: fnmsub_d_swap_intrinsics: -+; LA64-CONTRACT-OFF-LABEL: not_fnmsub_d_intrinsics: - ; LA64-CONTRACT-OFF: # %bb.0: --; LA64-CONTRACT-OFF-NEXT: fnmsub.d $fa0, $fa1, $fa0, $fa2 -+; LA64-CONTRACT-OFF-NEXT: fneg.d $fa0, $fa0 -+; LA64-CONTRACT-OFF-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 - ; LA64-CONTRACT-OFF-NEXT: ret -- %negb = fneg double %b -- %fma = call double @llvm.fma.f64(double %a, double %negb, double %c) -+ %nega = fneg double %a -+ %fma = call double @llvm.fma.f64(double %nega, double %b, double %c) - ret double %fma - } - -@@ -882,6 +1093,8 @@ define double @fnmsub_d_contract(double %a, double %b, double %c) nounwind { - ; LA64-CONTRACT-OFF-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 - ; LA64-CONTRACT-OFF-NEXT: ret - %mul = fmul contract double %a, %b -- %sub = fsub contract double %c, %mul -- ret double %sub -+ %negc = fneg contract double %c -+ %add = fadd contract double %negc, %mul -+ %negadd = fneg contract double %add -+ ret double %negadd - } -diff --git a/llvm/test/CodeGen/LoongArch/float-fma.ll b/llvm/test/CodeGen/LoongArch/float-fma.ll -index 54dc56784006..c236255d971a 100644 ---- a/llvm/test/CodeGen/LoongArch/float-fma.ll -+++ b/llvm/test/CodeGen/LoongArch/float-fma.ll -@@ -236,13 +236,15 @@ define float @fnmsub_s(float %a, float %b, float %c) nounwind { - ; LA32-CONTRACT-ON-LABEL: fnmsub_s: - ; LA32-CONTRACT-ON: # %bb.0: - ; LA32-CONTRACT-ON-NEXT: fmul.s $fa0, $fa0, $fa1 --; LA32-CONTRACT-ON-NEXT: fsub.s $fa0, $fa2, $fa0 -+; LA32-CONTRACT-ON-NEXT: fsub.s $fa0, $fa0, $fa2 -+; LA32-CONTRACT-ON-NEXT: fneg.s $fa0, $fa0 - ; LA32-CONTRACT-ON-NEXT: ret - ; - ; LA32-CONTRACT-OFF-LABEL: fnmsub_s: - ; LA32-CONTRACT-OFF: # %bb.0: - ; LA32-CONTRACT-OFF-NEXT: fmul.s $fa0, $fa0, $fa1 --; LA32-CONTRACT-OFF-NEXT: fsub.s $fa0, $fa2, $fa0 -+; LA32-CONTRACT-OFF-NEXT: fsub.s $fa0, $fa0, $fa2 -+; LA32-CONTRACT-OFF-NEXT: fneg.s $fa0, $fa0 - ; LA32-CONTRACT-OFF-NEXT: ret - ; - ; LA64-CONTRACT-FAST-LABEL: fnmsub_s: -@@ -253,12 +255,98 @@ define float @fnmsub_s(float %a, float %b, float %c) nounwind { - ; LA64-CONTRACT-ON-LABEL: fnmsub_s: - ; LA64-CONTRACT-ON: # %bb.0: - ; LA64-CONTRACT-ON-NEXT: fmul.s $fa0, $fa0, $fa1 --; LA64-CONTRACT-ON-NEXT: fsub.s $fa0, $fa2, $fa0 -+; LA64-CONTRACT-ON-NEXT: fsub.s $fa0, $fa0, $fa2 -+; LA64-CONTRACT-ON-NEXT: fneg.s $fa0, $fa0 - ; LA64-CONTRACT-ON-NEXT: ret - ; - ; LA64-CONTRACT-OFF-LABEL: fnmsub_s: - ; LA64-CONTRACT-OFF: # %bb.0: - ; LA64-CONTRACT-OFF-NEXT: fmul.s $fa0, $fa0, $fa1 -+; LA64-CONTRACT-OFF-NEXT: fsub.s $fa0, $fa0, $fa2 -+; LA64-CONTRACT-OFF-NEXT: fneg.s $fa0, $fa0 -+; LA64-CONTRACT-OFF-NEXT: ret -+ %negc = fneg float %c -+ %mul = fmul float %a, %b -+ %add = fadd float %mul, %negc -+ %neg = fneg float %add -+ ret float %neg -+} -+ -+define float @fnmsub_s_nsz(float %a, float %b, float %c) nounwind { -+; LA32-CONTRACT-FAST-LABEL: fnmsub_s_nsz: -+; LA32-CONTRACT-FAST: # %bb.0: -+; LA32-CONTRACT-FAST-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 -+; LA32-CONTRACT-FAST-NEXT: ret -+; -+; LA32-CONTRACT-ON-LABEL: fnmsub_s_nsz: -+; LA32-CONTRACT-ON: # %bb.0: -+; LA32-CONTRACT-ON-NEXT: fmul.s $fa0, $fa0, $fa1 -+; LA32-CONTRACT-ON-NEXT: fsub.s $fa0, $fa2, $fa0 -+; LA32-CONTRACT-ON-NEXT: ret -+; -+; LA32-CONTRACT-OFF-LABEL: fnmsub_s_nsz: -+; LA32-CONTRACT-OFF: # %bb.0: -+; LA32-CONTRACT-OFF-NEXT: fmul.s $fa0, $fa0, $fa1 -+; LA32-CONTRACT-OFF-NEXT: fsub.s $fa0, $fa2, $fa0 -+; LA32-CONTRACT-OFF-NEXT: ret -+; -+; LA64-CONTRACT-FAST-LABEL: fnmsub_s_nsz: -+; LA64-CONTRACT-FAST: # %bb.0: -+; LA64-CONTRACT-FAST-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 -+; LA64-CONTRACT-FAST-NEXT: ret -+; -+; LA64-CONTRACT-ON-LABEL: fnmsub_s_nsz: -+; LA64-CONTRACT-ON: # %bb.0: -+; LA64-CONTRACT-ON-NEXT: fmul.s $fa0, $fa0, $fa1 -+; LA64-CONTRACT-ON-NEXT: fsub.s $fa0, $fa2, $fa0 -+; LA64-CONTRACT-ON-NEXT: ret -+; -+; LA64-CONTRACT-OFF-LABEL: fnmsub_s_nsz: -+; LA64-CONTRACT-OFF: # %bb.0: -+; LA64-CONTRACT-OFF-NEXT: fmul.s $fa0, $fa0, $fa1 -+; LA64-CONTRACT-OFF-NEXT: fsub.s $fa0, $fa2, $fa0 -+; LA64-CONTRACT-OFF-NEXT: ret -+ %nega = fneg nsz float %a -+ %mul = fmul nsz float %nega, %b -+ %add = fadd nsz float %mul, %c -+ ret float %add -+} -+ -+;; Check that fnmsub.s is not emitted. -+define float @not_fnmsub_s(float %a, float %b, float %c) nounwind { -+; LA32-CONTRACT-FAST-LABEL: not_fnmsub_s: -+; LA32-CONTRACT-FAST: # %bb.0: -+; LA32-CONTRACT-FAST-NEXT: fneg.s $fa0, $fa0 -+; LA32-CONTRACT-FAST-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 -+; LA32-CONTRACT-FAST-NEXT: ret -+; -+; LA32-CONTRACT-ON-LABEL: not_fnmsub_s: -+; LA32-CONTRACT-ON: # %bb.0: -+; LA32-CONTRACT-ON-NEXT: fmul.s $fa0, $fa0, $fa1 -+; LA32-CONTRACT-ON-NEXT: fsub.s $fa0, $fa2, $fa0 -+; LA32-CONTRACT-ON-NEXT: ret -+; -+; LA32-CONTRACT-OFF-LABEL: not_fnmsub_s: -+; LA32-CONTRACT-OFF: # %bb.0: -+; LA32-CONTRACT-OFF-NEXT: fmul.s $fa0, $fa0, $fa1 -+; LA32-CONTRACT-OFF-NEXT: fsub.s $fa0, $fa2, $fa0 -+; LA32-CONTRACT-OFF-NEXT: ret -+; -+; LA64-CONTRACT-FAST-LABEL: not_fnmsub_s: -+; LA64-CONTRACT-FAST: # %bb.0: -+; LA64-CONTRACT-FAST-NEXT: fneg.s $fa0, $fa0 -+; LA64-CONTRACT-FAST-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 -+; LA64-CONTRACT-FAST-NEXT: ret -+; -+; LA64-CONTRACT-ON-LABEL: not_fnmsub_s: -+; LA64-CONTRACT-ON: # %bb.0: -+; LA64-CONTRACT-ON-NEXT: fmul.s $fa0, $fa0, $fa1 -+; LA64-CONTRACT-ON-NEXT: fsub.s $fa0, $fa2, $fa0 -+; LA64-CONTRACT-ON-NEXT: ret -+; -+; LA64-CONTRACT-OFF-LABEL: not_fnmsub_s: -+; LA64-CONTRACT-OFF: # %bb.0: -+; LA64-CONTRACT-OFF-NEXT: fmul.s $fa0, $fa0, $fa1 - ; LA64-CONTRACT-OFF-NEXT: fsub.s $fa0, $fa2, $fa0 - ; LA64-CONTRACT-OFF-NEXT: ret - %nega = fneg float %a -@@ -483,6 +571,86 @@ define float @contract_fnmsub_s(float %a, float %b, float %c) nounwind { - ; LA64-CONTRACT-OFF-LABEL: contract_fnmsub_s: - ; LA64-CONTRACT-OFF: # %bb.0: - ; LA64-CONTRACT-OFF-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 -+; LA64-CONTRACT-OFF-NEXT: ret -+ %negc = fneg contract float %c -+ %mul = fmul contract float %a, %b -+ %add = fadd contract float %mul, %negc -+ %neg = fneg contract float %add -+ ret float %neg -+} -+ -+define float @contract_fnmsub_s_nsz(float %a, float %b, float %c) nounwind { -+; LA32-CONTRACT-FAST-LABEL: contract_fnmsub_s_nsz: -+; LA32-CONTRACT-FAST: # %bb.0: -+; LA32-CONTRACT-FAST-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 -+; LA32-CONTRACT-FAST-NEXT: ret -+; -+; LA32-CONTRACT-ON-LABEL: contract_fnmsub_s_nsz: -+; LA32-CONTRACT-ON: # %bb.0: -+; LA32-CONTRACT-ON-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 -+; LA32-CONTRACT-ON-NEXT: ret -+; -+; LA32-CONTRACT-OFF-LABEL: contract_fnmsub_s_nsz: -+; LA32-CONTRACT-OFF: # %bb.0: -+; LA32-CONTRACT-OFF-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 -+; LA32-CONTRACT-OFF-NEXT: ret -+; -+; LA64-CONTRACT-FAST-LABEL: contract_fnmsub_s_nsz: -+; LA64-CONTRACT-FAST: # %bb.0: -+; LA64-CONTRACT-FAST-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 -+; LA64-CONTRACT-FAST-NEXT: ret -+; -+; LA64-CONTRACT-ON-LABEL: contract_fnmsub_s_nsz: -+; LA64-CONTRACT-ON: # %bb.0: -+; LA64-CONTRACT-ON-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 -+; LA64-CONTRACT-ON-NEXT: ret -+; -+; LA64-CONTRACT-OFF-LABEL: contract_fnmsub_s_nsz: -+; LA64-CONTRACT-OFF: # %bb.0: -+; LA64-CONTRACT-OFF-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 -+; LA64-CONTRACT-OFF-NEXT: ret -+ %nega = fneg contract nsz float %a -+ %mul = fmul contract nsz float %nega, %b -+ %add = fadd contract nsz float %mul, %c -+ ret float %add -+} -+ -+;; Check that fnmsub.s is not emitted. -+define float @not_contract_fnmsub_s(float %a, float %b, float %c) nounwind { -+; LA32-CONTRACT-FAST-LABEL: not_contract_fnmsub_s: -+; LA32-CONTRACT-FAST: # %bb.0: -+; LA32-CONTRACT-FAST-NEXT: fneg.s $fa0, $fa0 -+; LA32-CONTRACT-FAST-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 -+; LA32-CONTRACT-FAST-NEXT: ret -+; -+; LA32-CONTRACT-ON-LABEL: not_contract_fnmsub_s: -+; LA32-CONTRACT-ON: # %bb.0: -+; LA32-CONTRACT-ON-NEXT: fneg.s $fa0, $fa0 -+; LA32-CONTRACT-ON-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 -+; LA32-CONTRACT-ON-NEXT: ret -+; -+; LA32-CONTRACT-OFF-LABEL: not_contract_fnmsub_s: -+; LA32-CONTRACT-OFF: # %bb.0: -+; LA32-CONTRACT-OFF-NEXT: fneg.s $fa0, $fa0 -+; LA32-CONTRACT-OFF-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 -+; LA32-CONTRACT-OFF-NEXT: ret -+; -+; LA64-CONTRACT-FAST-LABEL: not_contract_fnmsub_s: -+; LA64-CONTRACT-FAST: # %bb.0: -+; LA64-CONTRACT-FAST-NEXT: fneg.s $fa0, $fa0 -+; LA64-CONTRACT-FAST-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 -+; LA64-CONTRACT-FAST-NEXT: ret -+; -+; LA64-CONTRACT-ON-LABEL: not_contract_fnmsub_s: -+; LA64-CONTRACT-ON: # %bb.0: -+; LA64-CONTRACT-ON-NEXT: fneg.s $fa0, $fa0 -+; LA64-CONTRACT-ON-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 -+; LA64-CONTRACT-ON-NEXT: ret -+; -+; LA64-CONTRACT-OFF-LABEL: not_contract_fnmsub_s: -+; LA64-CONTRACT-OFF: # %bb.0: -+; LA64-CONTRACT-OFF-NEXT: fneg.s $fa0, $fa0 -+; LA64-CONTRACT-OFF-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 - ; LA64-CONTRACT-OFF-NEXT: ret - %nega = fneg contract float %a - %mul = fmul contract float %nega, %b -@@ -592,8 +760,8 @@ define float @fnmadd_s_intrinsics(float %a, float %b, float %c) nounwind { - ; LA64-CONTRACT-OFF-NEXT: fnmadd.s $fa0, $fa0, $fa1, $fa2 - ; LA64-CONTRACT-OFF-NEXT: ret - %fma = call float @llvm.fma.f64(float %a, float %b, float %c) -- %neg = fneg float %fma -- ret float %neg -+ %negfma = fneg float %fma -+ ret float %negfma - } - - define float @fnmadd_s_nsz_intrinsics(float %a, float %b, float %c) nounwind { -@@ -704,44 +872,87 @@ define float @fnmsub_s_intrinsics(float %a, float %b, float %c) nounwind { - ; LA64-CONTRACT-OFF-LABEL: fnmsub_s_intrinsics: - ; LA64-CONTRACT-OFF: # %bb.0: - ; LA64-CONTRACT-OFF-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 -+; LA64-CONTRACT-OFF-NEXT: ret -+ %negc = fneg float %c -+ %fma = call float @llvm.fma.f64(float %a, float %b, float %negc) -+ %negfma = fneg float %fma -+ ret float %negfma -+} -+ -+define float @fnmsub_s_nsz_intrinsics(float %a, float %b, float %c) nounwind { -+; LA32-CONTRACT-FAST-LABEL: fnmsub_s_nsz_intrinsics: -+; LA32-CONTRACT-FAST: # %bb.0: -+; LA32-CONTRACT-FAST-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 -+; LA32-CONTRACT-FAST-NEXT: ret -+; -+; LA32-CONTRACT-ON-LABEL: fnmsub_s_nsz_intrinsics: -+; LA32-CONTRACT-ON: # %bb.0: -+; LA32-CONTRACT-ON-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 -+; LA32-CONTRACT-ON-NEXT: ret -+; -+; LA32-CONTRACT-OFF-LABEL: fnmsub_s_nsz_intrinsics: -+; LA32-CONTRACT-OFF: # %bb.0: -+; LA32-CONTRACT-OFF-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 -+; LA32-CONTRACT-OFF-NEXT: ret -+; -+; LA64-CONTRACT-FAST-LABEL: fnmsub_s_nsz_intrinsics: -+; LA64-CONTRACT-FAST: # %bb.0: -+; LA64-CONTRACT-FAST-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 -+; LA64-CONTRACT-FAST-NEXT: ret -+; -+; LA64-CONTRACT-ON-LABEL: fnmsub_s_nsz_intrinsics: -+; LA64-CONTRACT-ON: # %bb.0: -+; LA64-CONTRACT-ON-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 -+; LA64-CONTRACT-ON-NEXT: ret -+; -+; LA64-CONTRACT-OFF-LABEL: fnmsub_s_nsz_intrinsics: -+; LA64-CONTRACT-OFF: # %bb.0: -+; LA64-CONTRACT-OFF-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 - ; LA64-CONTRACT-OFF-NEXT: ret - %nega = fneg float %a -- %fma = call float @llvm.fma.f64(float %nega, float %b, float %c) -+ %fma = call nsz float @llvm.fma.f64(float %nega, float %b, float %c) - ret float %fma - } - --define float @fnmsub_s_swap_intrinsics(float %a, float %b, float %c) nounwind { --; LA32-CONTRACT-FAST-LABEL: fnmsub_s_swap_intrinsics: -+;; Check that fnmsub.s is not emitted. -+define float @not_fnmsub_s_intrinsics(float %a, float %b, float %c) nounwind { -+; LA32-CONTRACT-FAST-LABEL: not_fnmsub_s_intrinsics: - ; LA32-CONTRACT-FAST: # %bb.0: --; LA32-CONTRACT-FAST-NEXT: fnmsub.s $fa0, $fa1, $fa0, $fa2 -+; LA32-CONTRACT-FAST-NEXT: fneg.s $fa0, $fa0 -+; LA32-CONTRACT-FAST-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 - ; LA32-CONTRACT-FAST-NEXT: ret - ; --; LA32-CONTRACT-ON-LABEL: fnmsub_s_swap_intrinsics: -+; LA32-CONTRACT-ON-LABEL: not_fnmsub_s_intrinsics: - ; LA32-CONTRACT-ON: # %bb.0: --; LA32-CONTRACT-ON-NEXT: fnmsub.s $fa0, $fa1, $fa0, $fa2 -+; LA32-CONTRACT-ON-NEXT: fneg.s $fa0, $fa0 -+; LA32-CONTRACT-ON-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 - ; LA32-CONTRACT-ON-NEXT: ret - ; --; LA32-CONTRACT-OFF-LABEL: fnmsub_s_swap_intrinsics: -+; LA32-CONTRACT-OFF-LABEL: not_fnmsub_s_intrinsics: - ; LA32-CONTRACT-OFF: # %bb.0: --; LA32-CONTRACT-OFF-NEXT: fnmsub.s $fa0, $fa1, $fa0, $fa2 -+; LA32-CONTRACT-OFF-NEXT: fneg.s $fa0, $fa0 -+; LA32-CONTRACT-OFF-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 - ; LA32-CONTRACT-OFF-NEXT: ret - ; --; LA64-CONTRACT-FAST-LABEL: fnmsub_s_swap_intrinsics: -+; LA64-CONTRACT-FAST-LABEL: not_fnmsub_s_intrinsics: - ; LA64-CONTRACT-FAST: # %bb.0: --; LA64-CONTRACT-FAST-NEXT: fnmsub.s $fa0, $fa1, $fa0, $fa2 -+; LA64-CONTRACT-FAST-NEXT: fneg.s $fa0, $fa0 -+; LA64-CONTRACT-FAST-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 - ; LA64-CONTRACT-FAST-NEXT: ret - ; --; LA64-CONTRACT-ON-LABEL: fnmsub_s_swap_intrinsics: -+; LA64-CONTRACT-ON-LABEL: not_fnmsub_s_intrinsics: - ; LA64-CONTRACT-ON: # %bb.0: --; LA64-CONTRACT-ON-NEXT: fnmsub.s $fa0, $fa1, $fa0, $fa2 -+; LA64-CONTRACT-ON-NEXT: fneg.s $fa0, $fa0 -+; LA64-CONTRACT-ON-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 - ; LA64-CONTRACT-ON-NEXT: ret - ; --; LA64-CONTRACT-OFF-LABEL: fnmsub_s_swap_intrinsics: -+; LA64-CONTRACT-OFF-LABEL: not_fnmsub_s_intrinsics: - ; LA64-CONTRACT-OFF: # %bb.0: --; LA64-CONTRACT-OFF-NEXT: fnmsub.s $fa0, $fa1, $fa0, $fa2 -+; LA64-CONTRACT-OFF-NEXT: fneg.s $fa0, $fa0 -+; LA64-CONTRACT-OFF-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 - ; LA64-CONTRACT-OFF-NEXT: ret -- %negb = fneg float %b -- %fma = call float @llvm.fma.f64(float %a, float %negb, float %c) -+ %nega = fneg float %a -+ %fma = call float @llvm.fma.f64(float %nega, float %b, float %c) - ret float %fma - } - -@@ -882,6 +1093,8 @@ define float @fnmsub_s_contract(float %a, float %b, float %c) nounwind { - ; LA64-CONTRACT-OFF-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 - ; LA64-CONTRACT-OFF-NEXT: ret - %mul = fmul contract float %a, %b -- %sub = fsub contract float %c, %mul -- ret float %sub -+ %negc = fneg contract float %c -+ %add = fadd contract float %negc, %mul -+ %negadd = fneg contract float %add -+ ret float %negadd - } --- -2.20.1 - - -From 7a3bd125d9c1d0265b265ce238a88d0d4550e5a0 Mon Sep 17 00:00:00 2001 -From: Weining Lu -Date: Wed, 3 Jan 2024 13:59:12 +0800 -Subject: [PATCH 13/14] [LoongArch] Fix the procossor series mask - -Refer PRID_SERIES_MASK definition in linux kernel: -arch/loongarch/include/asm/cpu.h. - -(cherry picked from commit 7e186d366d6c7def0543acc255931f617e76dff0) ---- - llvm/lib/TargetParser/Host.cpp | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp -index 81309280a44b..d11dc605e188 100644 ---- a/llvm/lib/TargetParser/Host.cpp -+++ b/llvm/lib/TargetParser/Host.cpp -@@ -1462,7 +1462,8 @@ StringRef sys::getHostCPUName() { - // Use processor id to detect cpu name. - uint32_t processor_id; - __asm__("cpucfg %[prid], $zero\n\t" : [prid] "=r"(processor_id)); -- switch (processor_id & 0xff00) { -+ // Refer PRID_SERIES_MASK in linux kernel: arch/loongarch/include/asm/cpu.h. -+ switch (processor_id & 0xf000) { - case 0xc000: // Loongson 64bit, 4-issue - return "la464"; - // TODO: Others. --- -2.20.1 - - -From 3634ac4cbc475509c46521f5b8a3fcbeca6d06c7 Mon Sep 17 00:00:00 2001 -From: wanglei -Date: Mon, 11 Mar 2024 08:59:17 +0800 -Subject: [PATCH 14/14] [LoongArch] Make sure that the LoongArchISD::BSTRINS - node uses the correct `MSB` value (#84454) - -The `MSB` must not be greater than `GRLen`. Without this patch, newly -added test cases will crash with LoongArch32, resulting in a 'cannot -select' error. - -(cherry picked from commit edd4c6c6dca4c556de22b2ab73d5bfc02d28e59b) -(cherry picked from commit d77c5c3830d925b3795e2f1535a6568399fe6626) ---- - llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp | 4 +++- - llvm/test/CodeGen/LoongArch/bstrins_w.ll | 13 +++++++++++++ - 2 files changed, 16 insertions(+), 1 deletion(-) - -diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -index ed106cb766bc..5affaf37ad5a 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -@@ -2310,7 +2310,9 @@ Retry: - return DAG.getNode( - LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0), - DAG.getConstant(CN1->getSExtValue() >> MaskIdx0, DL, ValTy), -- DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT), -+ DAG.getConstant(ValBits == 32 ? (MaskIdx0 + (MaskLen0 & 31) - 1) -+ : (MaskIdx0 + MaskLen0 - 1), -+ DL, GRLenVT), - DAG.getConstant(MaskIdx0, DL, GRLenVT)); - } - -diff --git a/llvm/test/CodeGen/LoongArch/bstrins_w.ll b/llvm/test/CodeGen/LoongArch/bstrins_w.ll -index dfbe000841cd..e008caacad2a 100644 ---- a/llvm/test/CodeGen/LoongArch/bstrins_w.ll -+++ b/llvm/test/CodeGen/LoongArch/bstrins_w.ll -@@ -145,6 +145,19 @@ define i32 @pat5(i32 %a) nounwind { - ret i32 %or - } - -+;; The high bits of `const` are zero. -+define i32 @pat5_high_zeros(i32 %a) nounwind { -+; CHECK-LABEL: pat5_high_zeros: -+; CHECK: # %bb.0: -+; CHECK-NEXT: lu12i.w $a1, 1 -+; CHECK-NEXT: ori $a1, $a1, 564 -+; CHECK-NEXT: bstrins.w $a0, $a1, 31, 16 -+; CHECK-NEXT: ret -+ %and = and i32 %a, 65535 ; 0x0000ffff -+ %or = or i32 %and, 305397760 ; 0x12340000 -+ ret i32 %or -+} -+ - ;; Pattern 6: a = b | ((c & mask) << shamt) - ;; In this testcase b is 0x10000002, but in fact we do not require b being a - ;; constant. As long as all positions in b to be overwritten by the incoming --- -2.20.1 - diff --git a/0013-Backport-Bitcode-Add-some-missing-GetTypeByID-failure-checks.patch b/0013-Backport-Bitcode-Add-some-missing-GetTypeByID-failure-checks.patch deleted file mode 100644 index 0d365dbb2013e578aa6441e0656b00f4d626c0a1..0000000000000000000000000000000000000000 --- a/0013-Backport-Bitcode-Add-some-missing-GetTypeByID-failure-checks.patch +++ /dev/null @@ -1,37 +0,0 @@ -From 58f4b115f97435efa1df1baa3247791a13444c00 Mon Sep 17 00:00:00 2001 -From: Nikita Popov -Date: Tue, 26 Sep 2023 16:51:40 +0200 -Subject: [PATCH 1/3] [Bitcode] Add some missing GetTypeByID failure checks - -Print an error instead of crashing. - -Fixes https://github.com/llvm/llvm-project/issues/67388. ---- - llvm/lib/Bitcode/Reader/MetadataLoader.cpp | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/llvm/lib/Bitcode/Reader/MetadataLoader.cpp b/llvm/lib/Bitcode/Reader/MetadataLoader.cpp -index 0a9a80688a41..2f024899b451 100644 ---- a/llvm/lib/Bitcode/Reader/MetadataLoader.cpp -+++ b/llvm/lib/Bitcode/Reader/MetadataLoader.cpp -@@ -1315,7 +1315,7 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( - - unsigned TyID = Record[0]; - Type *Ty = Callbacks.GetTypeByID(TyID); -- if (Ty->isMetadataTy() || Ty->isVoidTy()) { -+ if (!Ty || Ty->isMetadataTy() || Ty->isVoidTy()) { - dropRecord(); - break; - } -@@ -1366,7 +1366,7 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( - - unsigned TyID = Record[0]; - Type *Ty = Callbacks.GetTypeByID(TyID); -- if (Ty->isMetadataTy() || Ty->isVoidTy()) -+ if (!Ty || Ty->isMetadataTy() || Ty->isVoidTy()) - return error("Invalid record"); - - Value *V = ValueList.getValueFwdRef(Record[1], Ty, TyID, --- -2.33.0 - diff --git a/0014-Backport-X86-Inline-Skip-inline-asm-in-inlining-targ.patch b/0014-Backport-X86-Inline-Skip-inline-asm-in-inlining-targ.patch deleted file mode 100644 index c43bbd344dbd3196759baba43c44351fcac86c8b..0000000000000000000000000000000000000000 --- a/0014-Backport-X86-Inline-Skip-inline-asm-in-inlining-targ.patch +++ /dev/null @@ -1,74 +0,0 @@ -From 678cf3a36644847cac4b0be2d919aba77416088a Mon Sep 17 00:00:00 2001 -From: Nikita Popov -Date: Mon, 04 Mar 2024 07:00:37 +0800 -Subject: [PATCH] [Backport][X86][Inline] Skip inline asm in inlining target - feature check - -When inlining across functions with different target features, we -perform roughly two checks: -1. The caller features must be a superset of the callee features. -2. Calls in the callee cannot use types where the target features would -change the call ABI (e.g. by changing whether something is passed in a -zmm or two ymm registers). The latter check is very crude right now. - -The latter check currently also catches inline asm "calls". I believe -that inline asm should be excluded from this check, as it is independent -from the usual call ABI, and instead governed by the inline asm -constraint string. ---- - .../lib/Target/X86/X86TargetTransformInfo.cpp | 4 +++ - .../Inline/X86/call-abi-compatibility.ll | 26 +++++++++++++++++++ - 2 files changed, 30 insertions(+) - -diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp -index 129a2646d..9c7954230 100644 ---- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp -+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp -@@ -6046,6 +6046,10 @@ bool X86TTIImpl::areInlineCompatible(const Function *Caller, - - for (const Instruction &I : instructions(Callee)) { - if (const auto *CB = dyn_cast(&I)) { -+ // Having more target features is fine for inline ASM. -+ if (CB->isInlineAsm()) -+ continue; -+ - SmallVector Types; - for (Value *Arg : CB->args()) - Types.push_back(Arg->getType()); -diff --git a/llvm/test/Transforms/Inline/X86/call-abi-compatibility.ll b/llvm/test/Transforms/Inline/X86/call-abi-compatibility.ll -index 3a30980fe..6f582cab2 100644 ---- a/llvm/test/Transforms/Inline/X86/call-abi-compatibility.ll -+++ b/llvm/test/Transforms/Inline/X86/call-abi-compatibility.ll -@@ -93,3 +93,29 @@ define internal void @caller_not_avx4() { - } - - declare i64 @caller_unknown_simple(i64) -+ -+; This call should get inlined, because the callee only contains -+; inline ASM, not real calls. -+define <8 x i64> @caller_inline_asm(ptr %p0, i64 %k, ptr %p1, ptr %p2) #0 { -+; CHECK-LABEL: define {{[^@]+}}@caller_inline_asm -+; CHECK-SAME: (ptr [[P0:%.*]], i64 [[K:%.*]], ptr [[P1:%.*]], ptr [[P2:%.*]]) #[[ATTR2:[0-9]+]] { -+; CHECK-NEXT: [[SRC_I:%.*]] = load <8 x i64>, ptr [[P0]], align 64 -+; CHECK-NEXT: [[A_I:%.*]] = load <8 x i64>, ptr [[P1]], align 64 -+; CHECK-NEXT: [[B_I:%.*]] = load <8 x i64>, ptr [[P2]], align 64 -+; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i64> asm "vpaddb\09$($3, $2, $0 {$1}", "=v,^Yk,v,v,0,~{dirflag},~{fpsr},~{flags}"(i64 [[K]], <8 x i64> [[A_I]], <8 x i64> [[B_I]], <8 x i64> [[SRC_I]]) -+; CHECK-NEXT: ret <8 x i64> [[TMP1]] -+; -+ %call = call <8 x i64> @callee_inline_asm(ptr %p0, i64 %k, ptr %p1, ptr %p2) -+ ret <8 x i64> %call -+} -+ -+define internal <8 x i64> @callee_inline_asm(ptr %p0, i64 %k, ptr %p1, ptr %p2) #1 { -+ %src = load <8 x i64>, ptr %p0, align 64 -+ %a = load <8 x i64>, ptr %p1, align 64 -+ %b = load <8 x i64>, ptr %p2, align 64 -+ %1 = tail call <8 x i64> asm "vpaddb\09$($3, $2, $0 {$1}", "=v,^Yk,v,v,0,~{dirflag},~{fpsr},~{flags}"(i64 %k, <8 x i64> %a, <8 x i64> %b, <8 x i64> %src) #2 -+ ret <8 x i64> %1 -+} -+ -+attributes #0 = { "min-legal-vector-width"="512" "target-features"="+avx,+avx2,+avx512bw,+avx512dq,+avx512f,+cmov,+crc32,+cx8,+evex512,+f16c,+fma,+fxsr,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" "tune-cpu"="generic" } -+attributes #1 = { "min-legal-vector-width"="512" "target-features"="+avx,+avx2,+avx512bw,+avx512f,+cmov,+crc32,+cx8,+evex512,+f16c,+fma,+fxsr,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" "tune-cpu"="generic" } --- -2.33.0 - diff --git a/0015-Backport-ARM-Check-all-terms-in-emitPopInst-when-clearing-Res.patch b/0015-Backport-ARM-Check-all-terms-in-emitPopInst-when-clearing-Res.patch deleted file mode 100644 index 4528f50f5eed02080e361a10047266e91558a91a..0000000000000000000000000000000000000000 --- a/0015-Backport-ARM-Check-all-terms-in-emitPopInst-when-clearing-Res.patch +++ /dev/null @@ -1,87 +0,0 @@ -From 4aec2da60ce3f639e31d81406c09d5c88b3b8f53 Mon Sep 17 00:00:00 2001 -From: Florian Hahn -Date: Wed, 20 Dec 2023 16:56:15 +0100 -Subject: [PATCH 2/3] [ARM] Check all terms in emitPopInst when clearing - Restored for LR. (#75527) - -emitPopInst checks a single function exit MBB. If other paths also exit -the function and any of there terminators uses LR implicitly, it is not -save to clear the Restored bit. - -Check all terminators for the function before clearing Restored. - -This fixes a mis-compile in outlined-fn-may-clobber-lr-in-caller.ll -where the machine-outliner previously introduced BLs that clobbered LR -which in turn is used by the tail call return. - -Alternative to #73553 ---- - llvm/lib/Target/ARM/ARMFrameLowering.cpp | 30 +++++++++++++++++++++--- - llvm/lib/Target/ARM/ARMFrameLowering.h | 3 +++ - 2 files changed, 30 insertions(+), 3 deletions(-) - -diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/llvm/lib/Target/ARM/ARMFrameLowering.cpp -index 4496d4928ebe..650f4650eef0 100644 ---- a/llvm/lib/Target/ARM/ARMFrameLowering.cpp -+++ b/llvm/lib/Target/ARM/ARMFrameLowering.cpp -@@ -1645,9 +1645,6 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB, - // Fold the return instruction into the LDM. - DeleteRet = true; - LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET; -- // We 'restore' LR into PC so it is not live out of the return block: -- // Clear Restored bit. -- Info.setRestored(false); - } - - // If NoGap is true, pop consecutive registers and then leave the rest -@@ -2769,6 +2766,33 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, - AFI->setLRIsSpilled(SavedRegs.test(ARM::LR)); - } - -+void ARMFrameLowering::processFunctionBeforeFrameFinalized( -+ MachineFunction &MF, RegScavenger *RS) const { -+ TargetFrameLowering::processFunctionBeforeFrameFinalized(MF, RS); -+ -+ MachineFrameInfo &MFI = MF.getFrameInfo(); -+ if (!MFI.isCalleeSavedInfoValid()) -+ return; -+ -+ // Check if all terminators do not implicitly use LR. Then we can 'restore' LR -+ // into PC so it is not live out of the return block: Clear the Restored bit -+ // in that case. -+ for (CalleeSavedInfo &Info : MFI.getCalleeSavedInfo()) { -+ if (Info.getReg() != ARM::LR) -+ continue; -+ if (all_of(MF, [](const MachineBasicBlock &MBB) { -+ return all_of(MBB.terminators(), [](const MachineInstr &Term) { -+ return !Term.isReturn() || Term.getOpcode() == ARM::LDMIA_RET || -+ Term.getOpcode() == ARM::t2LDMIA_RET || -+ Term.getOpcode() == ARM::tPOP_RET; -+ }); -+ })) { -+ Info.setRestored(false); -+ break; -+ } -+ } -+} -+ - void ARMFrameLowering::getCalleeSaves(const MachineFunction &MF, - BitVector &SavedRegs) const { - TargetFrameLowering::getCalleeSaves(MF, SavedRegs); -diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.h b/llvm/lib/Target/ARM/ARMFrameLowering.h -index 16f2ce6bea6f..8d2b8beb9a58 100644 ---- a/llvm/lib/Target/ARM/ARMFrameLowering.h -+++ b/llvm/lib/Target/ARM/ARMFrameLowering.h -@@ -59,6 +59,9 @@ public: - void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, - RegScavenger *RS) const override; - -+ void processFunctionBeforeFrameFinalized( -+ MachineFunction &MF, RegScavenger *RS = nullptr) const override; -+ - void adjustForSegmentedStacks(MachineFunction &MF, - MachineBasicBlock &MBB) const override; - --- -2.33.0 - diff --git a/0016-Backport-ARM-Update-IsRestored-for-LR-based-on-all-returns-82.patch b/0016-Backport-ARM-Update-IsRestored-for-LR-based-on-all-returns-82.patch deleted file mode 100644 index eb34372c85aefb4a8e2d514f926adad455483eb9..0000000000000000000000000000000000000000 --- a/0016-Backport-ARM-Update-IsRestored-for-LR-based-on-all-returns-82.patch +++ /dev/null @@ -1,116 +0,0 @@ -From 369bfc8ea8c0a9da51b4bd964f0045cb389c3c2f Mon Sep 17 00:00:00 2001 -From: ostannard -Date: Mon, 26 Feb 2024 12:23:25 +0000 -Subject: [PATCH 3/3] [ARM] Update IsRestored for LR based on all returns - (#82745) - -PR #75527 fixed ARMFrameLowering to set the IsRestored flag for LR based -on all of the return instructions in the function, not just one. -However, there is also code in ARMLoadStoreOptimizer which changes -return instructions, but it set IsRestored based on the one instruction -it changed, not the whole function. - -The fix is to factor out the code added in #75527, and also call it from -ARMLoadStoreOptimizer if it made a change to return instructions. - -Fixes #80287. ---- - llvm/lib/Target/ARM/ARMFrameLowering.cpp | 11 +++++---- - llvm/lib/Target/ARM/ARMFrameLowering.h | 4 ++++ - llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp | 23 ++++++++----------- - 3 files changed, 21 insertions(+), 17 deletions(-) - -diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/llvm/lib/Target/ARM/ARMFrameLowering.cpp -index 650f4650eef0..008ba4e5924b 100644 ---- a/llvm/lib/Target/ARM/ARMFrameLowering.cpp -+++ b/llvm/lib/Target/ARM/ARMFrameLowering.cpp -@@ -2766,10 +2766,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, - AFI->setLRIsSpilled(SavedRegs.test(ARM::LR)); - } - --void ARMFrameLowering::processFunctionBeforeFrameFinalized( -- MachineFunction &MF, RegScavenger *RS) const { -- TargetFrameLowering::processFunctionBeforeFrameFinalized(MF, RS); -- -+void ARMFrameLowering::updateLRRestored(MachineFunction &MF) { - MachineFrameInfo &MFI = MF.getFrameInfo(); - if (!MFI.isCalleeSavedInfoValid()) - return; -@@ -2793,6 +2790,12 @@ void ARMFrameLowering::processFunctionBeforeFrameFinalized( - } - } - -+void ARMFrameLowering::processFunctionBeforeFrameFinalized( -+ MachineFunction &MF, RegScavenger *RS) const { -+ TargetFrameLowering::processFunctionBeforeFrameFinalized(MF, RS); -+ updateLRRestored(MF); -+} -+ - void ARMFrameLowering::getCalleeSaves(const MachineFunction &MF, - BitVector &SavedRegs) const { - TargetFrameLowering::getCalleeSaves(MF, SavedRegs); -diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.h b/llvm/lib/Target/ARM/ARMFrameLowering.h -index 8d2b8beb9a58..3c7358d8cd53 100644 ---- a/llvm/lib/Target/ARM/ARMFrameLowering.h -+++ b/llvm/lib/Target/ARM/ARMFrameLowering.h -@@ -59,6 +59,10 @@ public: - void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, - RegScavenger *RS) const override; - -+ /// Update the IsRestored flag on LR if it is spilled, based on the return -+ /// instructions. -+ static void updateLRRestored(MachineFunction &MF); -+ - void processFunctionBeforeFrameFinalized( - MachineFunction &MF, RegScavenger *RS = nullptr) const override; - -diff --git a/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp -index 93db983b92c0..37d9e1addd1e 100644 ---- a/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp -+++ b/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp -@@ -2062,17 +2062,6 @@ bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { - MO.setReg(ARM::PC); - PrevMI.copyImplicitOps(*MBB.getParent(), *MBBI); - MBB.erase(MBBI); -- // We now restore LR into PC so it is not live-out of the return block -- // anymore: Clear the CSI Restored bit. -- MachineFrameInfo &MFI = MBB.getParent()->getFrameInfo(); -- // CSI should be fixed after PrologEpilog Insertion -- assert(MFI.isCalleeSavedInfoValid() && "CSI should be valid"); -- for (CalleeSavedInfo &Info : MFI.getCalleeSavedInfo()) { -- if (Info.getReg() == ARM::LR) { -- Info.setRestored(false); -- break; -- } -- } - return true; - } - } -@@ -2120,14 +2109,22 @@ bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { - isThumb2 = AFI->isThumb2Function(); - isThumb1 = AFI->isThumbFunction() && !isThumb2; - -- bool Modified = false; -+ bool Modified = false, ModifiedLDMReturn = false; - for (MachineBasicBlock &MBB : Fn) { - Modified |= LoadStoreMultipleOpti(MBB); - if (STI->hasV5TOps() && !AFI->shouldSignReturnAddress()) -- Modified |= MergeReturnIntoLDM(MBB); -+ ModifiedLDMReturn |= MergeReturnIntoLDM(MBB); - if (isThumb1) - Modified |= CombineMovBx(MBB); - } -+ Modified |= ModifiedLDMReturn; -+ -+ // If we merged a BX instruction into an LDM, we need to re-calculate whether -+ // LR is restored. This check needs to consider the whole function, not just -+ // the instruction(s) we changed, because there may be other BX returns which -+ // still need LR to be restored. -+ if (ModifiedLDMReturn) -+ ARMFrameLowering::updateLRRestored(Fn); - - Allocator.DestroyAll(); - return Modified; --- -2.33.0 - diff --git a/0017-Add-the-support-for-classic-flang.patch b/0017-Add-the-support-for-classic-flang.patch deleted file mode 100644 index 74bcbc85bfc64c610d63cd5afa47bb5235f4816b..0000000000000000000000000000000000000000 --- a/0017-Add-the-support-for-classic-flang.patch +++ /dev/null @@ -1,1776 +0,0 @@ -From b297f30783da0dfb3098fe6d39b209caacd45691 Mon Sep 17 00:00:00 2001 -From: luofeng14 -Date: Fri, 1 Mar 2024 18:39:52 +0800 -Subject: [PATCH] sync classic flang patch - ---- - llvm/cmake/modules/HandleLLVMOptions.cmake | 8 + - llvm/include/llvm-c/DebugInfo.h | 5 +- - .../include/llvm/Analysis/TargetLibraryInfo.h | 6 +- - llvm/include/llvm/Analysis/VecFuncs.def | 592 ++++++++++++++++++ - llvm/include/llvm/IR/DIBuilder.h | 28 +- - llvm/include/llvm/IR/DebugInfoMetadata.h | 32 +- - llvm/lib/Analysis/TargetLibraryInfo.cpp | 38 +- - llvm/lib/AsmParser/LLParser.cpp | 22 +- - llvm/lib/Bitcode/Reader/MetadataLoader.cpp | 42 +- - llvm/lib/Bitcode/Writer/BitcodeWriter.cpp | 3 +- - llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h | 54 ++ - .../lib/CodeGen/AsmPrinter/DebugLocStream.cpp | 5 + - llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h | 6 + - .../CodeGen/AsmPrinter/DwarfCompileUnit.cpp | 3 + - llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h | 4 + - llvm/lib/IR/AsmWriter.cpp | 1 + - llvm/lib/IR/DIBuilder.cpp | 12 +- - llvm/lib/IR/DebugInfo.cpp | 10 +- - llvm/lib/IR/DebugInfoMetadata.cpp | 12 +- - llvm/lib/IR/LLVMContextImpl.h | 11 +- - .../Instrumentation/InstrProfiling.cpp | 4 +- - .../invalid-diglobalvariable-empty-name.ll | 1 + - .../Generic/fortran-subprogram-at.ll | 24 + - .../DebugInfo/Generic/more-subprogram-attr.ll | 38 ++ - llvm/test/DebugInfo/X86/DICommonBlock.ll | 36 ++ - llvm/test/lit.cfg.py | 3 + - llvm/test/lit.site.cfg.py.in | 1 + - llvm/tools/llvm-c-test/debuginfo.c | 4 +- - llvm/unittests/IR/MetadataTest.cpp | 47 +- - llvm/utils/lit/lit/TestingConfig.py | 1 + - llvm/utils/lit/lit/llvm/config.py | 10 + - 31 files changed, 983 insertions(+), 80 deletions(-) - create mode 100644 llvm/test/DebugInfo/Generic/fortran-subprogram-at.ll - create mode 100644 llvm/test/DebugInfo/Generic/more-subprogram-attr.ll - create mode 100644 llvm/test/DebugInfo/X86/DICommonBlock.ll - -diff --git a/llvm/cmake/modules/HandleLLVMOptions.cmake b/llvm/cmake/modules/HandleLLVMOptions.cmake -index 76723be696e5..492ea25b179b 100644 ---- a/llvm/cmake/modules/HandleLLVMOptions.cmake -+++ b/llvm/cmake/modules/HandleLLVMOptions.cmake -@@ -89,6 +89,14 @@ if( LLVM_ENABLE_ASSERTIONS ) - add_compile_definitions(_LIBCPP_ENABLE_HARDENED_MODE) - endif() - -+option(LLVM_ENABLE_CLASSIC_FLANG "Build support for classic Flang instead of the new built-in Flang" OFF) -+if(LLVM_ENABLE_CLASSIC_FLANG) -+ set(LLVM_ENABLE_CLASSIC_FLANG 1) -+ add_definitions( -DENABLE_CLASSIC_FLANG ) -+else() -+ set(LLVM_ENABLE_CLASSIC_FLANG 0) -+endif() -+ - if(LLVM_ENABLE_EXPENSIVE_CHECKS) - add_compile_definitions(EXPENSIVE_CHECKS) - -diff --git a/llvm/include/llvm-c/DebugInfo.h b/llvm/include/llvm-c/DebugInfo.h -index 5924294708cc..09d584c24711 100644 ---- a/llvm/include/llvm-c/DebugInfo.h -+++ b/llvm/include/llvm-c/DebugInfo.h -@@ -1148,7 +1148,8 @@ LLVMMetadataRef LLVMDIBuilderCreateGlobalVariableExpression( - LLVMDIBuilderRef Builder, LLVMMetadataRef Scope, const char *Name, - size_t NameLen, const char *Linkage, size_t LinkLen, LLVMMetadataRef File, - unsigned LineNo, LLVMMetadataRef Ty, LLVMBool LocalToUnit, -- LLVMMetadataRef Expr, LLVMMetadataRef Decl, uint32_t AlignInBits); -+ LLVMMetadataRef Expr, LLVMMetadataRef Decl, LLVMDIFlags Flags, -+ uint32_t AlignInBits); - - - /** -@@ -1246,7 +1247,7 @@ LLVMMetadataRef LLVMDIBuilderCreateTempGlobalVariableFwdDecl( - LLVMDIBuilderRef Builder, LLVMMetadataRef Scope, const char *Name, - size_t NameLen, const char *Linkage, size_t LnkLen, LLVMMetadataRef File, - unsigned LineNo, LLVMMetadataRef Ty, LLVMBool LocalToUnit, -- LLVMMetadataRef Decl, uint32_t AlignInBits); -+ LLVMMetadataRef Decl, LLVMDIFlags Flags, uint32_t AlignInBits); - - /** - * Insert a new llvm.dbg.declare intrinsic call before the given instruction. -diff --git a/llvm/include/llvm/Analysis/TargetLibraryInfo.h b/llvm/include/llvm/Analysis/TargetLibraryInfo.h -index 5d62e837c1f3..490252cd018a 100644 ---- a/llvm/include/llvm/Analysis/TargetLibraryInfo.h -+++ b/llvm/include/llvm/Analysis/TargetLibraryInfo.h -@@ -22,7 +22,6 @@ namespace llvm { - template class ArrayRef; - class Function; - class Module; --class Triple; - - /// Describes a possible vectorization of a function. - /// Function 'VectorFnName' is equivalent to 'ScalarFnName' vectorized -@@ -81,6 +80,8 @@ class TargetLibraryInfoImpl { - bool isValidProtoForLibFunc(const FunctionType &FTy, LibFunc F, - const Module &M) const; - -+ Triple T; -+ - public: - /// List of known vector-functions libraries. - /// -@@ -95,6 +96,9 @@ public: - DarwinLibSystemM, // Use Darwin's libsystem_m. - LIBMVEC_X86, // GLIBC Vector Math library. - MASSV, // IBM MASS vector library. -+#ifdef ENABLE_CLASSIC_FLANG -+ PGMATH, // PGI math library. -+#endif - SVML, // Intel short vector math library. - SLEEFGNUABI, // SLEEF - SIMD Library for Evaluating Elementary Functions. - ArmPL // Arm Performance Libraries. -diff --git a/llvm/include/llvm/Analysis/VecFuncs.def b/llvm/include/llvm/Analysis/VecFuncs.def -index b884c1e3911e..d1712d158423 100644 ---- a/llvm/include/llvm/Analysis/VecFuncs.def -+++ b/llvm/include/llvm/Analysis/VecFuncs.def -@@ -909,6 +909,596 @@ TLI_DEFINE_VECFUNC("tgammaf", "armpl_vtgammaq_f32", FIXED(4), NOMASK) - TLI_DEFINE_VECFUNC("tgamma", "armpl_svtgamma_f64_x", SCALABLE(2), MASKED) - TLI_DEFINE_VECFUNC("tgammaf", "armpl_svtgamma_f32_x", SCALABLE(4), MASKED) - -+#elif defined(TLI_DEFINE_PGMATH_AARCH64_VECFUNCS) -+// Classic flang libpgmath library's Vector Functions for AArch64 -+ -+TLI_DEFINE_VECFUNC("__fd_sin_1", "__fd_sin_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__fs_sin_1", "__fs_sin_4", FIXED(4)) -+ -+TLI_DEFINE_VECFUNC("__pd_sin_1", "__pd_sin_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__ps_sin_1", "__ps_sin_4", FIXED(4)) -+ -+TLI_DEFINE_VECFUNC("__rd_sin_1", "__rd_sin_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__rs_sin_1", "__rs_sin_4", FIXED(4)) -+ -+TLI_DEFINE_VECFUNC("__fd_cos_1", "__fd_cos_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__fs_cos_1", "__fs_cos_4", FIXED(4)) -+ -+TLI_DEFINE_VECFUNC("__pd_cos_1", "__pd_cos_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__ps_cos_1", "__ps_cos_4", FIXED(4)) -+ -+TLI_DEFINE_VECFUNC("__rd_cos_1", "__rd_cos_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__rs_cos_1", "__rs_cos_4", FIXED(4)) -+ -+TLI_DEFINE_VECFUNC("__fd_sincos_1", "__fd_sincos_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__fs_sincos_1", "__fs_sincos_4", FIXED(4)) -+ -+TLI_DEFINE_VECFUNC("__pd_sincos_1", "__pd_sincos_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__ps_sincos_1", "__ps_sincos_4", FIXED(4)) -+ -+TLI_DEFINE_VECFUNC("__rd_sincos_1", "__rd_sincos_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__rs_sincos_1", "__rs_sincos_4", FIXED(4)) -+ -+TLI_DEFINE_VECFUNC("__fd_tan_1", "__fd_tan_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__fs_tan_1", "__fs_tan_4", FIXED(4)) -+ -+TLI_DEFINE_VECFUNC("__pd_tan_1", "__pd_tan_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__ps_tan_1", "__ps_tan_4", FIXED(4)) -+ -+TLI_DEFINE_VECFUNC("__rd_tan_1", "__rd_tan_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__rs_tan_1", "__rs_tan_4", FIXED(4)) -+ -+TLI_DEFINE_VECFUNC("__fd_sinh_1", "__fd_sinh_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__fs_sinh_1", "__fs_sinh_4", FIXED(4)) -+ -+TLI_DEFINE_VECFUNC("__pd_sinh_1", "__pd_sinh_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__ps_sinh_1", "__ps_sinh_4", FIXED(4)) -+ -+TLI_DEFINE_VECFUNC("__rd_sinh_1", "__rd_sinh_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__rs_sinh_1", "__rs_sinh_4", FIXED(4)) -+ -+TLI_DEFINE_VECFUNC("__fd_cosh_1", "__fd_cosh_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__fs_cosh_1", "__fs_cosh_4", FIXED(4)) -+ -+TLI_DEFINE_VECFUNC("__pd_cosh_1", "__pd_cosh_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__ps_cosh_1", "__ps_cosh_4", FIXED(4)) -+ -+TLI_DEFINE_VECFUNC("__rd_cosh_1", "__rd_cosh_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__rs_cosh_1", "__rs_cosh_4", FIXED(4)) -+ -+TLI_DEFINE_VECFUNC("__fd_tanh_1", "__fd_tanh_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__fs_tanh_1", "__fs_tanh_4", FIXED(4)) -+ -+TLI_DEFINE_VECFUNC("__pd_tanh_1", "__pd_tanh_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__ps_tanh_1", "__ps_tanh_4", FIXED(4)) -+ -+TLI_DEFINE_VECFUNC("__rd_tanh_1", "__rd_tanh_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__rs_tanh_1", "__rs_tanh_4", FIXED(4)) -+ -+TLI_DEFINE_VECFUNC("__fd_asin_1", "__fd_asin_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__fs_asin_1", "__fs_asin_4", FIXED(4)) -+ -+TLI_DEFINE_VECFUNC("__pd_asin_1", "__pd_asin_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__ps_asin_1", "__ps_asin_4", FIXED(4)) -+ -+TLI_DEFINE_VECFUNC("__rd_asin_1", "__rd_asin_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__rs_asin_1", "__rs_asin_4", FIXED(4)) -+ -+TLI_DEFINE_VECFUNC("__fd_acos_1", "__fd_acos_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__fs_acos_1", "__fs_acos_4", FIXED(4)) -+ -+TLI_DEFINE_VECFUNC("__pd_acos_1", "__pd_acos_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__ps_acos_1", "__ps_acos_4", FIXED(4)) -+ -+TLI_DEFINE_VECFUNC("__rd_acos_1", "__rd_acos_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__rs_acos_1", "__rs_acos_4", FIXED(4)) -+ -+TLI_DEFINE_VECFUNC("__fd_atan_1", "__fd_atan_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__fs_atan_1", "__fs_atan_4", FIXED(4)) -+ -+TLI_DEFINE_VECFUNC("__pd_atan_1", "__pd_atan_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__ps_atan_1", "__ps_atan_4", FIXED(4)) -+ -+TLI_DEFINE_VECFUNC("__rd_atan_1", "__rd_atan_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__rs_atan_1", "__rs_atan_4", FIXED(4)) -+ -+TLI_DEFINE_VECFUNC("__fd_atan2_1", "__fd_atan2_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__fs_atan2_1", "__fs_atan2_4", FIXED(4)) -+ -+TLI_DEFINE_VECFUNC("__pd_atan2_1", "__pd_atan2_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__ps_atan2_1", "__ps_atan2_4", FIXED(4)) -+ -+TLI_DEFINE_VECFUNC("__rd_atan2_1", "__rd_atan2_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__rs_atan2_1", "__rs_atan2_4", FIXED(4)) -+ -+TLI_DEFINE_VECFUNC("__fd_pow_1", "__fd_pow_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__fs_pow_1", "__fs_pow_4", FIXED(4)) -+ -+TLI_DEFINE_VECFUNC("__pd_pow_1", "__pd_pow_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__ps_pow_1", "__ps_pow_4", FIXED(4)) -+ -+TLI_DEFINE_VECFUNC("__rd_pow_1", "__rd_pow_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__rs_pow_1", "__rs_pow_4", FIXED(4)) -+ -+TLI_DEFINE_VECFUNC("__fs_powi_1", "__fs_powi_4", FIXED(4)) -+ -+TLI_DEFINE_VECFUNC("__ps_powi_1", "__ps_powi_4", FIXED(4)) -+ -+TLI_DEFINE_VECFUNC("__rs_powi_1", "__rs_powi_4", FIXED(4)) -+ -+TLI_DEFINE_VECFUNC("__fd_powi1_1", "__fd_powi1_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__fs_powi1_1", "__fs_powi1_4", FIXED(4)) -+ -+TLI_DEFINE_VECFUNC("__pd_powi1_1", "__pd_powi1_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__ps_powi1_1", "__ps_powi1_4", FIXED(4)) -+ -+TLI_DEFINE_VECFUNC("__rd_powi1_1", "__rd_powi1_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__rs_powi1_1", "__rs_powi1_4", FIXED(4)) -+ -+TLI_DEFINE_VECFUNC("__fd_powk_1", "__fd_powk_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__fs_powk_1", "__fs_powk_4", FIXED(4)) -+ -+TLI_DEFINE_VECFUNC("__pd_powk_1", "__pd_powk_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__ps_powk_1", "__ps_powk_4", FIXED(4)) -+ -+TLI_DEFINE_VECFUNC("__rd_powk_1", "__rd_powk_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__rs_powk_1", "__rs_powk_4", FIXED(4)) -+ -+TLI_DEFINE_VECFUNC("__fd_powk1_1", "__fd_powk1_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__fs_powk1_1", "__fs_powk1_4", FIXED(4)) -+ -+TLI_DEFINE_VECFUNC("__pd_powk1_1", "__pd_powk1_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__ps_powk1_1", "__ps_powk1_4", FIXED(4)) -+ -+TLI_DEFINE_VECFUNC("__rd_powk1_1", "__rd_powk1_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__rs_powk1_1", "__rs_powk1_4", FIXED(4)) -+ -+TLI_DEFINE_VECFUNC("__fd_log10_1", "__fd_log10_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__fs_log10_1", "__fs_log10_4", FIXED(4)) -+ -+TLI_DEFINE_VECFUNC("__pd_log10_1", "__pd_log10_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__ps_log10_1", "__ps_log10_4", FIXED(4)) -+ -+TLI_DEFINE_VECFUNC("__rd_log10_1", "__rd_log10_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__rs_log10_1", "__rs_log10_4", FIXED(4)) -+ -+TLI_DEFINE_VECFUNC("__fd_log_1", "__fd_log_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__fs_log_1", "__fs_log_4", FIXED(4)) -+ -+TLI_DEFINE_VECFUNC("__pd_log_1", "__pd_log_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__ps_log_1", "__ps_log_4", FIXED(4)) -+ -+TLI_DEFINE_VECFUNC("__rd_log_1", "__rd_log_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__rs_log_1", "__rs_log_4", FIXED(4)) -+ -+TLI_DEFINE_VECFUNC("__fd_exp_1", "__fd_exp_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__fs_exp_1", "__fs_exp_4", FIXED(4)) -+ -+TLI_DEFINE_VECFUNC("__pd_exp_1", "__pd_exp_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__ps_exp_1", "__ps_exp_4", FIXED(4)) -+ -+TLI_DEFINE_VECFUNC("__rd_exp_1", "__rd_exp_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__rs_exp_1", "__rs_exp_4", FIXED(4)) -+ -+#elif defined(TLI_DEFINE_PGMATH_X86_VECFUNCS) -+// Classic flang libpgmath library's Vector Functions for X86 -+ -+TLI_DEFINE_VECFUNC("__fd_sin_1", "__fd_sin_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__fd_sin_1", "__fd_sin_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__fd_sin_1", "__fd_sin_8", FIXED(8)) -+ -+TLI_DEFINE_VECFUNC("__fs_sin_1", "__fs_sin_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__fs_sin_1", "__fs_sin_8", FIXED(8)) -+TLI_DEFINE_VECFUNC("__fs_sin_1", "__fs_sin_16", FIXED(16)) -+ -+TLI_DEFINE_VECFUNC("__pd_sin_1", "__pd_sin_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__pd_sin_1", "__pd_sin_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__pd_sin_1", "__pd_sin_8", FIXED(8)) -+ -+TLI_DEFINE_VECFUNC("__ps_sin_1", "__ps_sin_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__ps_sin_1", "__ps_sin_8", FIXED(8)) -+TLI_DEFINE_VECFUNC("__ps_sin_1", "__ps_sin_16", FIXED(16)) -+ -+TLI_DEFINE_VECFUNC("__rd_sin_1", "__rd_sin_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__rd_sin_1", "__rd_sin_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__rd_sin_1", "__rd_sin_8", FIXED(8)) -+ -+TLI_DEFINE_VECFUNC("__rs_sin_1", "__rs_sin_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__rs_sin_1", "__rs_sin_8", FIXED(8)) -+TLI_DEFINE_VECFUNC("__rs_sin_1", "__rs_sin_16", FIXED(16)) -+ -+TLI_DEFINE_VECFUNC("__fd_cos_1", "__fd_cos_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__fd_cos_1", "__fd_cos_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__fd_cos_1", "__fd_cos_8", FIXED(8)) -+ -+TLI_DEFINE_VECFUNC("__fs_cos_1", "__fs_cos_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__fs_cos_1", "__fs_cos_8", FIXED(8)) -+TLI_DEFINE_VECFUNC("__fs_cos_1", "__fs_cos_16", FIXED(16)) -+ -+TLI_DEFINE_VECFUNC("__pd_cos_1", "__pd_cos_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__pd_cos_1", "__pd_cos_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__pd_cos_1", "__pd_cos_8", FIXED(8)) -+ -+TLI_DEFINE_VECFUNC("__ps_cos_1", "__ps_cos_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__ps_cos_1", "__ps_cos_8", FIXED(8)) -+TLI_DEFINE_VECFUNC("__ps_cos_1", "__ps_cos_16", FIXED(16)) -+ -+TLI_DEFINE_VECFUNC("__rd_cos_1", "__rd_cos_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__rd_cos_1", "__rd_cos_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__rd_cos_1", "__rd_cos_8", FIXED(8)) -+ -+TLI_DEFINE_VECFUNC("__rs_cos_1", "__rs_cos_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__rs_cos_1", "__rs_cos_8", FIXED(8)) -+TLI_DEFINE_VECFUNC("__rs_cos_1", "__rs_cos_16", FIXED(16)) -+ -+TLI_DEFINE_VECFUNC("__fd_tan_1", "__fd_tan_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__fd_tan_1", "__fd_tan_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__fd_tan_1", "__fd_tan_8", FIXED(8)) -+ -+TLI_DEFINE_VECFUNC("__fs_tan_1", "__fs_tan_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__fs_tan_1", "__fs_tan_8", FIXED(8)) -+TLI_DEFINE_VECFUNC("__fs_tan_1", "__fs_tan_16", FIXED(16)) -+ -+TLI_DEFINE_VECFUNC("__pd_tan_1", "__pd_tan_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__pd_tan_1", "__pd_tan_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__pd_tan_1", "__pd_tan_8", FIXED(8)) -+ -+TLI_DEFINE_VECFUNC("__ps_tan_1", "__ps_tan_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__ps_tan_1", "__ps_tan_8", FIXED(8)) -+TLI_DEFINE_VECFUNC("__ps_tan_1", "__ps_tan_16", FIXED(16)) -+ -+TLI_DEFINE_VECFUNC("__rd_tan_1", "__rd_tan_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__rd_tan_1", "__rd_tan_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__rd_tan_1", "__rd_tan_8", FIXED(8)) -+ -+TLI_DEFINE_VECFUNC("__rs_tan_1", "__rs_tan_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__rs_tan_1", "__rs_tan_8", FIXED(8)) -+TLI_DEFINE_VECFUNC("__rs_tan_1", "__rs_tan_16", FIXED(16)) -+ -+TLI_DEFINE_VECFUNC("__fd_sinh_1", "__fd_sinh_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__fd_sinh_1", "__fd_sinh_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__fd_sinh_1", "__fd_sinh_8", FIXED(8)) -+ -+TLI_DEFINE_VECFUNC("__fs_sinh_1", "__fs_sinh_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__fs_sinh_1", "__fs_sinh_8", FIXED(8)) -+TLI_DEFINE_VECFUNC("__fs_sinh_1", "__fs_sinh_16", FIXED(16)) -+ -+TLI_DEFINE_VECFUNC("__pd_sinh_1", "__pd_sinh_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__pd_sinh_1", "__pd_sinh_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__pd_sinh_1", "__pd_sinh_8", FIXED(8)) -+ -+TLI_DEFINE_VECFUNC("__ps_sinh_1", "__ps_sinh_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__ps_sinh_1", "__ps_sinh_8", FIXED(8)) -+TLI_DEFINE_VECFUNC("__ps_sinh_1", "__ps_sinh_16", FIXED(16)) -+ -+TLI_DEFINE_VECFUNC("__rd_sinh_1", "__rd_sinh_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__rd_sinh_1", "__rd_sinh_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__rd_sinh_1", "__rd_sinh_8", FIXED(8)) -+ -+TLI_DEFINE_VECFUNC("__rs_sinh_1", "__rs_sinh_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__rs_sinh_1", "__rs_sinh_8", FIXED(8)) -+TLI_DEFINE_VECFUNC("__rs_sinh_1", "__rs_sinh_16", FIXED(16)) -+ -+TLI_DEFINE_VECFUNC("__fd_cosh_1", "__fd_cosh_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__fd_cosh_1", "__fd_cosh_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__fd_cosh_1", "__fd_cosh_8", FIXED(8)) -+ -+TLI_DEFINE_VECFUNC("__fs_cosh_1", "__fs_cosh_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__fs_cosh_1", "__fs_cosh_8", FIXED(8)) -+TLI_DEFINE_VECFUNC("__fs_cosh_1", "__fs_cosh_16", FIXED(16)) -+ -+TLI_DEFINE_VECFUNC("__pd_cosh_1", "__pd_cosh_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__pd_cosh_1", "__pd_cosh_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__pd_cosh_1", "__pd_cosh_8", FIXED(8)) -+ -+TLI_DEFINE_VECFUNC("__ps_cosh_1", "__ps_cosh_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__ps_cosh_1", "__ps_cosh_8", FIXED(8)) -+TLI_DEFINE_VECFUNC("__ps_cosh_1", "__ps_cosh_16", FIXED(16)) -+ -+TLI_DEFINE_VECFUNC("__rd_cosh_1", "__rd_cosh_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__rd_cosh_1", "__rd_cosh_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__rd_cosh_1", "__rd_cosh_8", FIXED(8)) -+ -+TLI_DEFINE_VECFUNC("__rs_cosh_1", "__rs_cosh_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__rs_cosh_1", "__rs_cosh_8", FIXED(8)) -+TLI_DEFINE_VECFUNC("__rs_cosh_1", "__rs_cosh_16", FIXED(16)) -+ -+TLI_DEFINE_VECFUNC("__fd_tanh_1", "__fd_tanh_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__fd_tanh_1", "__fd_tanh_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__fd_tanh_1", "__fd_tanh_8", FIXED(8)) -+ -+TLI_DEFINE_VECFUNC("__fs_tanh_1", "__fs_tanh_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__fs_tanh_1", "__fs_tanh_8", FIXED(8)) -+TLI_DEFINE_VECFUNC("__fs_tanh_1", "__fs_tanh_16", FIXED(16)) -+ -+TLI_DEFINE_VECFUNC("__pd_tanh_1", "__pd_tanh_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__pd_tanh_1", "__pd_tanh_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__pd_tanh_1", "__pd_tanh_8", FIXED(8)) -+ -+TLI_DEFINE_VECFUNC("__ps_tanh_1", "__ps_tanh_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__ps_tanh_1", "__ps_tanh_8", FIXED(8)) -+TLI_DEFINE_VECFUNC("__ps_tanh_1", "__ps_tanh_16", FIXED(16)) -+ -+TLI_DEFINE_VECFUNC("__rd_tanh_1", "__rd_tanh_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__rd_tanh_1", "__rd_tanh_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__rd_tanh_1", "__rd_tanh_8", FIXED(8)) -+ -+TLI_DEFINE_VECFUNC("__rs_tanh_1", "__rs_tanh_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__rs_tanh_1", "__rs_tanh_8", FIXED(8)) -+TLI_DEFINE_VECFUNC("__rs_tanh_1", "__rs_tanh_16", FIXED(16)) -+ -+TLI_DEFINE_VECFUNC("__fd_asin_1", "__fd_asin_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__fd_asin_1", "__fd_asin_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__fd_asin_1", "__fd_asin_8", FIXED(8)) -+ -+TLI_DEFINE_VECFUNC("__fs_asin_1", "__fs_asin_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__fs_asin_1", "__fs_asin_8", FIXED(8)) -+TLI_DEFINE_VECFUNC("__fs_asin_1", "__fs_asin_16", FIXED(16)) -+ -+TLI_DEFINE_VECFUNC("__pd_asin_1", "__pd_asin_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__pd_asin_1", "__pd_asin_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__pd_asin_1", "__pd_asin_8", FIXED(8)) -+ -+TLI_DEFINE_VECFUNC("__ps_asin_1", "__ps_asin_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__ps_asin_1", "__ps_asin_8", FIXED(8)) -+TLI_DEFINE_VECFUNC("__ps_asin_1", "__ps_asin_16", FIXED(16)) -+ -+TLI_DEFINE_VECFUNC("__rd_asin_1", "__rd_asin_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__rd_asin_1", "__rd_asin_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__rd_asin_1", "__rd_asin_8", FIXED(8)) -+ -+TLI_DEFINE_VECFUNC("__rs_asin_1", "__rs_asin_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__rs_asin_1", "__rs_asin_8", FIXED(8)) -+TLI_DEFINE_VECFUNC("__rs_asin_1", "__rs_asin_16", FIXED(16)) -+ -+TLI_DEFINE_VECFUNC("__fd_acos_1", "__fd_acos_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__fd_acos_1", "__fd_acos_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__fd_acos_1", "__fd_acos_8", FIXED(8)) -+ -+TLI_DEFINE_VECFUNC("__fs_acos_1", "__fs_acos_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__fs_acos_1", "__fs_acos_8", FIXED(8)) -+TLI_DEFINE_VECFUNC("__fs_acos_1", "__fs_acos_16", FIXED(16)) -+ -+TLI_DEFINE_VECFUNC("__pd_acos_1", "__pd_acos_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__pd_acos_1", "__pd_acos_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__pd_acos_1", "__pd_acos_8", FIXED(8)) -+ -+TLI_DEFINE_VECFUNC("__ps_acos_1", "__ps_acos_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__ps_acos_1", "__ps_acos_8", FIXED(8)) -+TLI_DEFINE_VECFUNC("__ps_acos_1", "__ps_acos_16", FIXED(16)) -+ -+TLI_DEFINE_VECFUNC("__rd_acos_1", "__rd_acos_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__rd_acos_1", "__rd_acos_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__rd_acos_1", "__rd_acos_8", FIXED(8)) -+ -+TLI_DEFINE_VECFUNC("__rs_acos_1", "__rs_acos_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__rs_acos_1", "__rs_acos_8", FIXED(8)) -+TLI_DEFINE_VECFUNC("__rs_acos_1", "__rs_acos_16", FIXED(16)) -+ -+TLI_DEFINE_VECFUNC("__fd_atan_1", "__fd_atan_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__fd_atan_1", "__fd_atan_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__fd_atan_1", "__fd_atan_8", FIXED(8)) -+ -+TLI_DEFINE_VECFUNC("__fs_atan_1", "__fs_atan_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__fs_atan_1", "__fs_atan_8", FIXED(8)) -+TLI_DEFINE_VECFUNC("__fs_atan_1", "__fs_atan_16", FIXED(16)) -+ -+TLI_DEFINE_VECFUNC("__pd_atan_1", "__pd_atan_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__pd_atan_1", "__pd_atan_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__pd_atan_1", "__pd_atan_8", FIXED(8)) -+ -+TLI_DEFINE_VECFUNC("__ps_atan_1", "__ps_atan_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__ps_atan_1", "__ps_atan_8", FIXED(8)) -+TLI_DEFINE_VECFUNC("__ps_atan_1", "__ps_atan_16", FIXED(16)) -+ -+TLI_DEFINE_VECFUNC("__rd_atan_1", "__rd_atan_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__rd_atan_1", "__rd_atan_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__rd_atan_1", "__rd_atan_8", FIXED(8)) -+ -+TLI_DEFINE_VECFUNC("__rs_atan_1", "__rs_atan_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__rs_atan_1", "__rs_atan_8", FIXED(8)) -+TLI_DEFINE_VECFUNC("__rs_atan_1", "__rs_atan_16", FIXED(16)) -+ -+TLI_DEFINE_VECFUNC("__fd_atan2_1", "__fd_atan2_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__fd_atan2_1", "__fd_atan2_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__fd_atan2_1", "__fd_atan2_8", FIXED(8)) -+ -+TLI_DEFINE_VECFUNC("__fs_atan2_1", "__fs_atan2_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__fs_atan2_1", "__fs_atan2_8", FIXED(8)) -+TLI_DEFINE_VECFUNC("__fs_atan2_1", "__fs_atan2_16", FIXED(16)) -+ -+TLI_DEFINE_VECFUNC("__pd_atan2_1", "__pd_atan2_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__pd_atan2_1", "__pd_atan2_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__pd_atan2_1", "__pd_atan2_8", FIXED(8)) -+ -+TLI_DEFINE_VECFUNC("__ps_atan2_1", "__ps_atan2_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__ps_atan2_1", "__ps_atan2_8", FIXED(8)) -+TLI_DEFINE_VECFUNC("__ps_atan2_1", "__ps_atan2_16", FIXED(16)) -+ -+TLI_DEFINE_VECFUNC("__rd_atan2_1", "__rd_atan2_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__rd_atan2_1", "__rd_atan2_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__rd_atan2_1", "__rd_atan2_8", FIXED(8)) -+ -+TLI_DEFINE_VECFUNC("__rs_atan2_1", "__rs_atan2_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__rs_atan2_1", "__rs_atan2_8", FIXED(8)) -+TLI_DEFINE_VECFUNC("__rs_atan2_1", "__rs_atan2_16", FIXED(16)) -+ -+TLI_DEFINE_VECFUNC("__fd_pow_1", "__fd_pow_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__fd_pow_1", "__fd_pow_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__fd_pow_1", "__fd_pow_8", FIXED(8)) -+ -+TLI_DEFINE_VECFUNC("__fs_pow_1", "__fs_pow_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__fs_pow_1", "__fs_pow_8", FIXED(8)) -+TLI_DEFINE_VECFUNC("__fs_pow_1", "__fs_pow_16", FIXED(16)) -+ -+TLI_DEFINE_VECFUNC("__pd_pow_1", "__pd_pow_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__pd_pow_1", "__pd_pow_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__pd_pow_1", "__pd_pow_8", FIXED(8)) -+ -+TLI_DEFINE_VECFUNC("__ps_pow_1", "__ps_pow_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__ps_pow_1", "__ps_pow_8", FIXED(8)) -+TLI_DEFINE_VECFUNC("__ps_pow_1", "__ps_pow_16", FIXED(16)) -+ -+TLI_DEFINE_VECFUNC("__rd_pow_1", "__rd_pow_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__rd_pow_1", "__rd_pow_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__rd_pow_1", "__rd_pow_8", FIXED(8)) -+ -+TLI_DEFINE_VECFUNC("__rs_pow_1", "__rs_pow_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__rs_pow_1", "__rs_pow_8", FIXED(8)) -+TLI_DEFINE_VECFUNC("__rs_pow_1", "__rs_pow_16", FIXED(16)) -+ -+TLI_DEFINE_VECFUNC("__fs_powi_1", "__fs_powi_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__fs_powi_1", "__fs_powi_8", FIXED(8)) -+TLI_DEFINE_VECFUNC("__fs_powi_1", "__fs_powi_16", FIXED(16)) -+ -+TLI_DEFINE_VECFUNC("__ps_powi_1", "__ps_powi_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__ps_powi_1", "__ps_powi_8", FIXED(8)) -+TLI_DEFINE_VECFUNC("__ps_powi_1", "__ps_powi_16", FIXED(16)) -+ -+TLI_DEFINE_VECFUNC("__rs_powi_1", "__rs_powi_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__rs_powi_1", "__rs_powi_8", FIXED(8)) -+TLI_DEFINE_VECFUNC("__rs_powi_1", "__rs_powi_16", FIXED(16)) -+ -+TLI_DEFINE_VECFUNC("__fd_powi1_1", "__fd_powi1_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__fd_powi1_1", "__fd_powi1_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__fd_powi1_1", "__fd_powi1_8", FIXED(8)) -+ -+TLI_DEFINE_VECFUNC("__fs_powi1_1", "__fs_powi1_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__fs_powi1_1", "__fs_powi1_8", FIXED(8)) -+TLI_DEFINE_VECFUNC("__fs_powi1_1", "__fs_powi1_16", FIXED(16)) -+ -+TLI_DEFINE_VECFUNC("__pd_powi1_1", "__pd_powi1_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__pd_powi1_1", "__pd_powi1_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__pd_powi1_1", "__pd_powi1_8", FIXED(8)) -+ -+TLI_DEFINE_VECFUNC("__ps_powi1_1", "__ps_powi1_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__ps_powi1_1", "__ps_powi1_8", FIXED(8)) -+TLI_DEFINE_VECFUNC("__ps_powi1_1", "__ps_powi1_16", FIXED(16)) -+ -+TLI_DEFINE_VECFUNC("__rd_powi1_1", "__rd_powi1_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__rd_powi1_1", "__rd_powi1_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__rd_powi1_1", "__rd_powi1_8", FIXED(8)) -+ -+TLI_DEFINE_VECFUNC("__rs_powi1_1", "__rs_powi1_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__rs_powi1_1", "__rs_powi1_8", FIXED(8)) -+TLI_DEFINE_VECFUNC("__rs_powi1_1", "__rs_powi1_16", FIXED(16)) -+ -+TLI_DEFINE_VECFUNC("__fd_powk_1", "__fd_powk_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__fd_powk_1", "__fd_powk_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__fd_powk_1", "__fd_powk_8", FIXED(8)) -+ -+TLI_DEFINE_VECFUNC("__fs_powk_1", "__fs_powk_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__fs_powk_1", "__fs_powk_8", FIXED(8)) -+TLI_DEFINE_VECFUNC("__fs_powk_1", "__fs_powk_16", FIXED(16)) -+ -+TLI_DEFINE_VECFUNC("__pd_powk_1", "__pd_powk_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__pd_powk_1", "__pd_powk_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__pd_powk_1", "__pd_powk_8", FIXED(8)) -+ -+TLI_DEFINE_VECFUNC("__ps_powk_1", "__ps_powk_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__ps_powk_1", "__ps_powk_8", FIXED(8)) -+TLI_DEFINE_VECFUNC("__ps_powk_1", "__ps_powk_16", FIXED(16)) -+ -+TLI_DEFINE_VECFUNC("__rd_powk_1", "__rd_powk_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__rd_powk_1", "__rd_powk_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__rd_powk_1", "__rd_powk_8", FIXED(8)) -+ -+TLI_DEFINE_VECFUNC("__rs_powk_1", "__rs_powk_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__rs_powk_1", "__rs_powk_8", FIXED(8)) -+TLI_DEFINE_VECFUNC("__rs_powk_1", "__rs_powk_16", FIXED(16)) -+ -+TLI_DEFINE_VECFUNC("__fd_powk1_1", "__fd_powk1_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__fd_powk1_1", "__fd_powk1_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__fd_powk1_1", "__fd_powk1_8", FIXED(8)) -+ -+TLI_DEFINE_VECFUNC("__fs_powk1_1", "__fs_powk1_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__fs_powk1_1", "__fs_powk1_8", FIXED(8)) -+TLI_DEFINE_VECFUNC("__fs_powk1_1", "__fs_powk1_16", FIXED(16)) -+ -+TLI_DEFINE_VECFUNC("__pd_powk1_1", "__pd_powk1_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__pd_powk1_1", "__pd_powk1_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__pd_powk1_1", "__pd_powk1_8", FIXED(8)) -+ -+TLI_DEFINE_VECFUNC("__ps_powk1_1", "__ps_powk1_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__ps_powk1_1", "__ps_powk1_8", FIXED(8)) -+TLI_DEFINE_VECFUNC("__ps_powk1_1", "__ps_powk1_16", FIXED(16)) -+ -+TLI_DEFINE_VECFUNC("__rd_powk1_1", "__rd_powk1_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__rd_powk1_1", "__rd_powk1_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__rd_powk1_1", "__rd_powk1_8", FIXED(8)) -+ -+TLI_DEFINE_VECFUNC("__rs_powk1_1", "__rs_powk1_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__rs_powk1_1", "__rs_powk1_8", FIXED(8)) -+TLI_DEFINE_VECFUNC("__rs_powk1_1", "__rs_powk1_16", FIXED(16)) -+ -+TLI_DEFINE_VECFUNC("__fd_log10_1", "__fd_log10_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__fd_log10_1", "__fd_log10_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__fd_log10_1", "__fd_log10_8", FIXED(8)) -+ -+TLI_DEFINE_VECFUNC("__fs_log10_1", "__fs_log10_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__fs_log10_1", "__fs_log10_8", FIXED(8)) -+TLI_DEFINE_VECFUNC("__fs_log10_1", "__fs_log10_16", FIXED(16)) -+ -+TLI_DEFINE_VECFUNC("__pd_log10_1", "__pd_log10_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__pd_log10_1", "__pd_log10_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__pd_log10_1", "__pd_log10_8", FIXED(8)) -+ -+TLI_DEFINE_VECFUNC("__ps_log10_1", "__ps_log10_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__ps_log10_1", "__ps_log10_8", FIXED(8)) -+TLI_DEFINE_VECFUNC("__ps_log10_1", "__ps_log10_16", FIXED(16)) -+ -+TLI_DEFINE_VECFUNC("__rd_log10_1", "__rd_log10_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__rd_log10_1", "__rd_log10_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__rd_log10_1", "__rd_log10_8", FIXED(8)) -+ -+TLI_DEFINE_VECFUNC("__rs_log10_1", "__rs_log10_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__rs_log10_1", "__rs_log10_8", FIXED(8)) -+TLI_DEFINE_VECFUNC("__rs_log10_1", "__rs_log10_16", FIXED(16)) -+ -+TLI_DEFINE_VECFUNC("__fd_log_1", "__fd_log_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__fd_log_1", "__fd_log_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__fd_log_1", "__fd_log_8", FIXED(8)) -+ -+TLI_DEFINE_VECFUNC("__fs_log_1", "__fs_log_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__fs_log_1", "__fs_log_8", FIXED(8)) -+TLI_DEFINE_VECFUNC("__fs_log_1", "__fs_log_16", FIXED(16)) -+ -+TLI_DEFINE_VECFUNC("__pd_log_1", "__pd_log_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__pd_log_1", "__pd_log_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__pd_log_1", "__pd_log_8", FIXED(8)) -+ -+TLI_DEFINE_VECFUNC("__ps_log_1", "__ps_log_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__ps_log_1", "__ps_log_8", FIXED(8)) -+TLI_DEFINE_VECFUNC("__ps_log_1", "__ps_log_16", FIXED(16)) -+ -+TLI_DEFINE_VECFUNC("__rd_log_1", "__rd_log_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__rd_log_1", "__rd_log_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__rd_log_1", "__rd_log_8", FIXED(8)) -+ -+TLI_DEFINE_VECFUNC("__rs_log_1", "__rs_log_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__rs_log_1", "__rs_log_8", FIXED(8)) -+TLI_DEFINE_VECFUNC("__rs_log_1", "__rs_log_16", FIXED(16)) -+ -+TLI_DEFINE_VECFUNC("__fs_exp_1", "__fs_exp_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__fs_exp_1", "__fs_exp_8", FIXED(8)) -+TLI_DEFINE_VECFUNC("__fs_exp_1", "__fs_exp_16", FIXED(16)) -+ -+TLI_DEFINE_VECFUNC("__pd_exp_1", "__pd_exp_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__pd_exp_1", "__pd_exp_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__pd_exp_1", "__pd_exp_8", FIXED(8)) -+ -+TLI_DEFINE_VECFUNC("__ps_exp_1", "__ps_exp_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__ps_exp_1", "__ps_exp_8", FIXED(8)) -+TLI_DEFINE_VECFUNC("__ps_exp_1", "__ps_exp_16", FIXED(16)) -+ -+TLI_DEFINE_VECFUNC("__rd_exp_1", "__rd_exp_2", FIXED(2)) -+TLI_DEFINE_VECFUNC("__rd_exp_1", "__rd_exp_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__rd_exp_1", "__rd_exp_8", FIXED(8)) -+ -+TLI_DEFINE_VECFUNC("__rs_exp_1", "__rs_exp_4", FIXED(4)) -+TLI_DEFINE_VECFUNC("__rs_exp_1", "__rs_exp_8", FIXED(8)) -+TLI_DEFINE_VECFUNC("__rs_exp_1", "__rs_exp_16", FIXED(16)) -+ - #else - #error "Must choose which vector library functions are to be defined." - #endif -@@ -929,3 +1519,5 @@ TLI_DEFINE_VECFUNC("tgammaf", "armpl_svtgamma_f32_x", SCALABLE(4), MASKED) - #undef TLI_DEFINE_SLEEFGNUABI_SCALABLE_VECFUNCS - #undef TLI_DEFINE_MASSV_VECFUNCS_NAMES - #undef TLI_DEFINE_ARMPL_VECFUNCS -+#undef TLI_DEFINE_PGMATH_AARCH64_VECFUNCS -+#undef TLI_DEFINE_PGMATH_X86_VECFUNCS -diff --git a/llvm/include/llvm/IR/DIBuilder.h b/llvm/include/llvm/IR/DIBuilder.h -index ecd6dd7b0a4f..e0e64c662f18 100644 ---- a/llvm/include/llvm/IR/DIBuilder.h -+++ b/llvm/include/llvm/IR/DIBuilder.h -@@ -681,15 +681,22 @@ namespace llvm { - DIScope *Context, StringRef Name, StringRef LinkageName, DIFile *File, - unsigned LineNo, DIType *Ty, bool IsLocalToUnit, bool isDefined = true, - DIExpression *Expr = nullptr, MDNode *Decl = nullptr, -- MDTuple *TemplateParams = nullptr, uint32_t AlignInBits = 0, -- DINodeArray Annotations = nullptr); -+ MDTuple *TemplateParams = nullptr, -+#ifdef ENABLE_CLASSIC_FLANG -+ DINode::DIFlags Flags = DINode::FlagZero, -+#endif -+ uint32_t AlignInBits = 0, DINodeArray Annotations = nullptr); - - /// Identical to createGlobalVariable - /// except that the resulting DbgNode is temporary and meant to be RAUWed. - DIGlobalVariable *createTempGlobalVariableFwdDecl( - DIScope *Context, StringRef Name, StringRef LinkageName, DIFile *File, -- unsigned LineNo, DIType *Ty, bool IsLocalToUnit, MDNode *Decl = nullptr, -- MDTuple *TemplateParams = nullptr, uint32_t AlignInBits = 0); -+ unsigned LineNo, DIType *Ty, bool isLocalToUnit, MDNode *Decl = nullptr, -+ MDTuple *TemplateParams = nullptr, -+#ifdef ENABLE_CLASSIC_FLANG -+ DINode::DIFlags Flags = DINode::FlagZero, -+#endif -+ uint32_t AlignInBits = 0); - - /// Create a new descriptor for an auto variable. This is a local variable - /// that is not a subprogram parameter. -@@ -820,6 +827,19 @@ namespace llvm { - StringRef Name, DIFile *File, - unsigned LineNo); - -+#ifdef ENABLE_CLASSIC_FLANG -+ /// Create common block entry for a Fortran common block -+ /// \param Scope Scope of this common block -+ /// \param Name The name of this common block -+ /// \param File The file this common block is defined -+ /// \param LineNo Line number -+ /// \param VarList List of variables that a located in common block -+ /// \param AlignInBits Common block alignment -+ DICommonBlock *createCommonBlock(DIScope *Scope, DIGlobalVariable *decl, -+ StringRef Name, DIFile *File, -+ unsigned LineNo, uint32_t AlignInBits = 0); -+ -+#endif - /// This creates new descriptor for a namespace with the specified - /// parent scope. - /// \param Scope Namespace scope -diff --git a/llvm/include/llvm/IR/DebugInfoMetadata.h b/llvm/include/llvm/IR/DebugInfoMetadata.h -index 656122405209..9bd86172a4c0 100644 ---- a/llvm/include/llvm/IR/DebugInfoMetadata.h -+++ b/llvm/include/llvm/IR/DebugInfoMetadata.h -@@ -3062,12 +3062,14 @@ class DIGlobalVariable : public DIVariable { - - bool IsLocalToUnit; - bool IsDefinition; -+ DIFlags Flags; - - DIGlobalVariable(LLVMContext &C, StorageType Storage, unsigned Line, -- bool IsLocalToUnit, bool IsDefinition, uint32_t AlignInBits, -- ArrayRef Ops) -+ bool IsLocalToUnit, bool IsDefinition, DIFlags Flags, -+ uint32_t AlignInBits, ArrayRef Ops) - : DIVariable(C, DIGlobalVariableKind, Storage, Line, Ops, AlignInBits), -- IsLocalToUnit(IsLocalToUnit), IsDefinition(IsDefinition) {} -+ IsLocalToUnit(IsLocalToUnit), IsDefinition(IsDefinition), -+ Flags(Flags) {} - ~DIGlobalVariable() = default; - - static DIGlobalVariable * -@@ -3075,12 +3077,12 @@ class DIGlobalVariable : public DIVariable { - StringRef LinkageName, DIFile *File, unsigned Line, DIType *Type, - bool IsLocalToUnit, bool IsDefinition, - DIDerivedType *StaticDataMemberDeclaration, MDTuple *TemplateParams, -- uint32_t AlignInBits, DINodeArray Annotations, StorageType Storage, -- bool ShouldCreate = true) { -+ DIFlags Flags, uint32_t AlignInBits, DINodeArray Annotations, -+ StorageType Storage, bool ShouldCreate = true) { - return getImpl(Context, Scope, getCanonicalMDString(Context, Name), - getCanonicalMDString(Context, LinkageName), File, Line, Type, - IsLocalToUnit, IsDefinition, StaticDataMemberDeclaration, -- cast_or_null(TemplateParams), AlignInBits, -+ cast_or_null(TemplateParams), Flags, AlignInBits, - Annotations.get(), Storage, ShouldCreate); - } - static DIGlobalVariable * -@@ -3088,14 +3090,14 @@ class DIGlobalVariable : public DIVariable { - MDString *LinkageName, Metadata *File, unsigned Line, Metadata *Type, - bool IsLocalToUnit, bool IsDefinition, - Metadata *StaticDataMemberDeclaration, Metadata *TemplateParams, -- uint32_t AlignInBits, Metadata *Annotations, StorageType Storage, -- bool ShouldCreate = true); -+ DIFlags Flags, uint32_t AlignInBits, Metadata *Annotations, -+ StorageType Storage, bool ShouldCreate = true); - - TempDIGlobalVariable cloneImpl() const { - return getTemporary(getContext(), getScope(), getName(), getLinkageName(), - getFile(), getLine(), getType(), isLocalToUnit(), - isDefinition(), getStaticDataMemberDeclaration(), -- getTemplateParams(), getAlignInBits(), -+ getTemplateParams(), getFlags(), getAlignInBits(), - getAnnotations()); - } - -@@ -3105,22 +3107,26 @@ public: - (DIScope * Scope, StringRef Name, StringRef LinkageName, DIFile *File, - unsigned Line, DIType *Type, bool IsLocalToUnit, bool IsDefinition, - DIDerivedType *StaticDataMemberDeclaration, MDTuple *TemplateParams, -- uint32_t AlignInBits, DINodeArray Annotations), -+ DIFlags Flags, uint32_t AlignInBits, DINodeArray Annotations), - (Scope, Name, LinkageName, File, Line, Type, IsLocalToUnit, IsDefinition, -- StaticDataMemberDeclaration, TemplateParams, AlignInBits, Annotations)) -+ StaticDataMemberDeclaration, TemplateParams, Flags, AlignInBits, -+ Annotations)) - DEFINE_MDNODE_GET( - DIGlobalVariable, - (Metadata * Scope, MDString *Name, MDString *LinkageName, Metadata *File, - unsigned Line, Metadata *Type, bool IsLocalToUnit, bool IsDefinition, - Metadata *StaticDataMemberDeclaration, Metadata *TemplateParams, -- uint32_t AlignInBits, Metadata *Annotations), -+ DIFlags Flags, uint32_t AlignInBits, Metadata *Annotations), - (Scope, Name, LinkageName, File, Line, Type, IsLocalToUnit, IsDefinition, -- StaticDataMemberDeclaration, TemplateParams, AlignInBits, Annotations)) -+ StaticDataMemberDeclaration, TemplateParams, Flags, AlignInBits, -+ Annotations)) - - TempDIGlobalVariable clone() const { return cloneImpl(); } - - bool isLocalToUnit() const { return IsLocalToUnit; } - bool isDefinition() const { return IsDefinition; } -+ DIFlags getFlags() const { return Flags; } -+ bool isArtificial() const { return getFlags() & FlagArtificial; } - StringRef getDisplayName() const { return getStringOperand(4); } - StringRef getLinkageName() const { return getStringOperand(5); } - DIDerivedType *getStaticDataMemberDeclaration() const { -diff --git a/llvm/lib/Analysis/TargetLibraryInfo.cpp b/llvm/lib/Analysis/TargetLibraryInfo.cpp -index 05fa67d0bbf1..a6593f6b3757 100644 ---- a/llvm/lib/Analysis/TargetLibraryInfo.cpp -+++ b/llvm/lib/Analysis/TargetLibraryInfo.cpp -@@ -30,6 +30,10 @@ static cl::opt ClVectorLibrary( - "GLIBC Vector Math library"), - clEnumValN(TargetLibraryInfoImpl::MASSV, "MASSV", - "IBM MASS vector library"), -+#ifdef ENABLE_CLASSIC_FLANG -+ clEnumValN(TargetLibraryInfoImpl::PGMATH, "PGMATH", -+ "PGI math library"), -+#endif - clEnumValN(TargetLibraryInfoImpl::SVML, "SVML", - "Intel SVML library"), - clEnumValN(TargetLibraryInfoImpl::SLEEFGNUABI, "sleefgnuabi", -@@ -867,14 +871,14 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, - TLI.addVectorizableFunctionsFromVecLib(ClVectorLibrary, T); - } - --TargetLibraryInfoImpl::TargetLibraryInfoImpl() { -+TargetLibraryInfoImpl::TargetLibraryInfoImpl() : T(Triple()) { - // Default to everything being available. - memset(AvailableArray, -1, sizeof(AvailableArray)); - -- initialize(*this, Triple(), StandardNames); -+ initialize(*this, T, StandardNames); - } - --TargetLibraryInfoImpl::TargetLibraryInfoImpl(const Triple &T) { -+TargetLibraryInfoImpl::TargetLibraryInfoImpl(const Triple &T) : T(T) { - // Default to everything being available. - memset(AvailableArray, -1, sizeof(AvailableArray)); - -@@ -886,7 +890,7 @@ TargetLibraryInfoImpl::TargetLibraryInfoImpl(const TargetLibraryInfoImpl &TLI) - ShouldExtI32Return(TLI.ShouldExtI32Return), - ShouldSignExtI32Param(TLI.ShouldSignExtI32Param), - ShouldSignExtI32Return(TLI.ShouldSignExtI32Return), -- SizeOfInt(TLI.SizeOfInt) { -+ SizeOfInt(TLI.SizeOfInt), T(TLI.T) { - memcpy(AvailableArray, TLI.AvailableArray, sizeof(AvailableArray)); - VectorDescs = TLI.VectorDescs; - ScalarDescs = TLI.ScalarDescs; -@@ -898,7 +902,7 @@ TargetLibraryInfoImpl::TargetLibraryInfoImpl(TargetLibraryInfoImpl &&TLI) - ShouldExtI32Return(TLI.ShouldExtI32Return), - ShouldSignExtI32Param(TLI.ShouldSignExtI32Param), - ShouldSignExtI32Return(TLI.ShouldSignExtI32Return), -- SizeOfInt(TLI.SizeOfInt) { -+ SizeOfInt(TLI.SizeOfInt), T(TLI.T) { - std::move(std::begin(TLI.AvailableArray), std::end(TLI.AvailableArray), - AvailableArray); - VectorDescs = TLI.VectorDescs; -@@ -912,6 +916,7 @@ TargetLibraryInfoImpl &TargetLibraryInfoImpl::operator=(const TargetLibraryInfoI - ShouldSignExtI32Param = TLI.ShouldSignExtI32Param; - ShouldSignExtI32Return = TLI.ShouldSignExtI32Return; - SizeOfInt = TLI.SizeOfInt; -+ T = TLI.T; - memcpy(AvailableArray, TLI.AvailableArray, sizeof(AvailableArray)); - return *this; - } -@@ -923,6 +928,7 @@ TargetLibraryInfoImpl &TargetLibraryInfoImpl::operator=(TargetLibraryInfoImpl && - ShouldSignExtI32Param = TLI.ShouldSignExtI32Param; - ShouldSignExtI32Return = TLI.ShouldSignExtI32Return; - SizeOfInt = TLI.SizeOfInt; -+ T = TLI.T; - std::move(std::begin(TLI.AvailableArray), std::end(TLI.AvailableArray), - AvailableArray); - return *this; -@@ -1234,6 +1240,28 @@ void TargetLibraryInfoImpl::addVectorizableFunctionsFromVecLib( - } - break; - } -+#ifdef ENABLE_CLASSIC_FLANG -+ // NOTE: All routines listed here are not available on all the architectures. -+ // Based on the size of vector registers available and the size of data, the -+ // vector width should be chosen correctly. -+ case PGMATH: { -+ if (T.getArch() == Triple::aarch64) { -+ const VecDesc VecFuncs[] = { -+ #define TLI_DEFINE_PGMATH_AARCH64_VECFUNCS -+ #include "llvm/Analysis/VecFuncs.def" -+ }; -+ addVectorizableFunctions(VecFuncs); -+ } else if (T.getArch() == Triple::x86_64) { -+ const VecDesc VecFuncs[] = { -+ #define TLI_DEFINE_PGMATH_X86_VECFUNCS -+ #include "llvm/Analysis/VecFuncs.def" -+ }; -+ addVectorizableFunctions(VecFuncs); -+ } -+ break; -+ } -+#endif -+ - case NoLibrary: - break; - } -diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp -index 5f0d1a76de79..d7eb34e3d148 100644 ---- a/llvm/lib/AsmParser/LLParser.cpp -+++ b/llvm/lib/AsmParser/LLParser.cpp -@@ -5405,6 +5405,22 @@ bool LLParser::parseDITemplateValueParameter(MDNode *&Result, bool IsDistinct) { - /// isDefinition: true, templateParams: !3, - /// declaration: !4, align: 8) - bool LLParser::parseDIGlobalVariable(MDNode *&Result, bool IsDistinct) { -+#ifdef ENABLE_CLASSIC_FLANG -+#define VISIT_MD_FIELDS(OPTIONAL, REQUIRED) \ -+ OPTIONAL(name, MDStringField, (/* AllowEmpty */ true)); \ -+ OPTIONAL(scope, MDField, ); \ -+ OPTIONAL(linkageName, MDStringField, ); \ -+ OPTIONAL(file, MDField, ); \ -+ OPTIONAL(line, LineField, ); \ -+ OPTIONAL(type, MDField, ); \ -+ OPTIONAL(isLocal, MDBoolField, ); \ -+ OPTIONAL(isDefinition, MDBoolField, (true)); \ -+ OPTIONAL(templateParams, MDField, ); \ -+ OPTIONAL(declaration, MDField, ); \ -+ OPTIONAL(flags, DIFlagField, ); \ -+ OPTIONAL(align, MDUnsignedField, (0, UINT32_MAX)); \ -+ OPTIONAL(annotations, MDField, ); -+#else - #define VISIT_MD_FIELDS(OPTIONAL, REQUIRED) \ - OPTIONAL(name, MDStringField, (/* AllowEmpty */ false)); \ - OPTIONAL(scope, MDField, ); \ -@@ -5416,8 +5432,10 @@ bool LLParser::parseDIGlobalVariable(MDNode *&Result, bool IsDistinct) { - OPTIONAL(isDefinition, MDBoolField, (true)); \ - OPTIONAL(templateParams, MDField, ); \ - OPTIONAL(declaration, MDField, ); \ -+ OPTIONAL(flags, DIFlagField, ); \ - OPTIONAL(align, MDUnsignedField, (0, UINT32_MAX)); \ - OPTIONAL(annotations, MDField, ); -+#endif - PARSE_MD_FIELDS(); - #undef VISIT_MD_FIELDS - -@@ -5425,8 +5443,8 @@ bool LLParser::parseDIGlobalVariable(MDNode *&Result, bool IsDistinct) { - GET_OR_DISTINCT(DIGlobalVariable, - (Context, scope.Val, name.Val, linkageName.Val, file.Val, - line.Val, type.Val, isLocal.Val, isDefinition.Val, -- declaration.Val, templateParams.Val, align.Val, -- annotations.Val)); -+ declaration.Val, templateParams.Val, flags.Val, -+ align.Val, annotations.Val)); - return false; - } - -diff --git a/llvm/lib/Bitcode/Reader/MetadataLoader.cpp b/llvm/lib/Bitcode/Reader/MetadataLoader.cpp -index 0a9a80688a41..c21e5e5dba97 100644 ---- a/llvm/lib/Bitcode/Reader/MetadataLoader.cpp -+++ b/llvm/lib/Bitcode/Reader/MetadataLoader.cpp -@@ -1979,25 +1979,43 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( - break; - } - case bitc::METADATA_GLOBAL_VAR: { -- if (Record.size() < 11 || Record.size() > 13) -+ if (Record.size() < 11 || Record.size() > 14) - return error("Invalid record"); - - IsDistinct = Record[0] & 1; - unsigned Version = Record[0] >> 1; - -- if (Version == 2) { -+ if (Version == 3) { -+ // Add support for DIFlags -+ Metadata *Annotations = nullptr; -+ if (Record.size() > 13) -+ Annotations = getMDOrNull(Record[13]); -+ -+ MetadataList.assignValue( -+ GET_OR_DISTINCT( -+ DIGlobalVariable, -+ (Context, getMDOrNull(Record[1]), getMDString(Record[2]), -+ getMDString(Record[3]), getMDOrNull(Record[4]), Record[5], -+ getDITypeRefOrNull(Record[6]), Record[7], Record[8], -+ getMDOrNull(Record[9]), getMDOrNull(Record[10]), -+ static_cast(Record[11]), Record[12], -+ Annotations)), -+ NextMetadataNo); -+ -+ NextMetadataNo++; -+ } else if (Version == 2) { - Metadata *Annotations = nullptr; - if (Record.size() > 12) - Annotations = getMDOrNull(Record[12]); - - MetadataList.assignValue( -- GET_OR_DISTINCT(DIGlobalVariable, -- (Context, getMDOrNull(Record[1]), -- getMDString(Record[2]), getMDString(Record[3]), -- getMDOrNull(Record[4]), Record[5], -- getDITypeRefOrNull(Record[6]), Record[7], Record[8], -- getMDOrNull(Record[9]), getMDOrNull(Record[10]), -- Record[11], Annotations)), -+ GET_OR_DISTINCT( -+ DIGlobalVariable, -+ (Context, getMDOrNull(Record[1]), getMDString(Record[2]), -+ getMDString(Record[3]), getMDOrNull(Record[4]), Record[5], -+ getDITypeRefOrNull(Record[6]), Record[7], Record[8], -+ getMDOrNull(Record[9]), getMDOrNull(Record[10]), -+ DINode::FlagZero, Record[11], Annotations)), - NextMetadataNo); - - NextMetadataNo++; -@@ -2010,7 +2028,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( - (Context, getMDOrNull(Record[1]), getMDString(Record[2]), - getMDString(Record[3]), getMDOrNull(Record[4]), Record[5], - getDITypeRefOrNull(Record[6]), Record[7], Record[8], -- getMDOrNull(Record[10]), nullptr, Record[11], nullptr)), -+ getMDOrNull(Record[10]), nullptr, DINode::FlagZero, Record[11], -+ nullptr)), - NextMetadataNo); - - NextMetadataNo++; -@@ -2043,7 +2062,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( - (Context, getMDOrNull(Record[1]), getMDString(Record[2]), - getMDString(Record[3]), getMDOrNull(Record[4]), Record[5], - getDITypeRefOrNull(Record[6]), Record[7], Record[8], -- getMDOrNull(Record[10]), nullptr, AlignInBits, nullptr)); -+ getMDOrNull(Record[10]), nullptr, DINode::FlagZero, AlignInBits, -+ nullptr)); - - DIGlobalVariableExpression *DGVE = nullptr; - if (Attach || Expr) -diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp -index 9416c7f5a03e..013e7ce2d425 100644 ---- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp -+++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp -@@ -2014,7 +2014,7 @@ void ModuleBitcodeWriter::writeDITemplateValueParameter( - void ModuleBitcodeWriter::writeDIGlobalVariable( - const DIGlobalVariable *N, SmallVectorImpl &Record, - unsigned Abbrev) { -- const uint64_t Version = 2 << 1; -+ const uint64_t Version = 3 << 1; - Record.push_back((uint64_t)N->isDistinct() | Version); - Record.push_back(VE.getMetadataOrNullID(N->getScope())); - Record.push_back(VE.getMetadataOrNullID(N->getRawName())); -@@ -2026,6 +2026,7 @@ void ModuleBitcodeWriter::writeDIGlobalVariable( - Record.push_back(N->isDefinition()); - Record.push_back(VE.getMetadataOrNullID(N->getStaticDataMemberDeclaration())); - Record.push_back(VE.getMetadataOrNullID(N->getTemplateParams())); -+ Record.push_back(N->getFlags()); - Record.push_back(N->getAlignInBits()); - Record.push_back(VE.getMetadataOrNullID(N->getAnnotations().get())); - -diff --git a/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h b/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h -index 726aba18bb80..ee8be3921ab7 100644 ---- a/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h -+++ b/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h -@@ -115,6 +115,29 @@ class DbgValueLoc { - SmallVector ValueLocEntries; - - bool IsVariadic; -+ /// Type of entry that this represents. -+ enum EntryType { -+ E_Location, -+ E_Integer, -+ E_ConstantFP, -+ E_ConstantInt, -+ E_TargetIndexLocation -+ }; -+ enum EntryType EntryKind; -+ -+ /// Either a constant, -+ union { -+ int64_t Int; -+ const ConstantFP *CFP; -+ const ConstantInt *CIP; -+ } Constant; -+ -+ union { -+ /// Or a location in the machine frame. -+ MachineLocation Loc; -+ /// Or a location from target specific location. -+ TargetIndexLocation TIL; -+ }; - - public: - DbgValueLoc(const DIExpression *Expr, ArrayRef Locs) -@@ -139,6 +162,37 @@ public: - assert(((Expr && Expr->isValid()) || !Loc.isLocation()) && - "DBG_VALUE with a machine location must have a valid expression."); - } -+ DbgValueLoc(const DIExpression *Expr, int64_t i) -+ : Expression(Expr), EntryKind(E_Integer) { -+ Constant.Int = i; -+ } -+ DbgValueLoc(const DIExpression *Expr, const ConstantFP *CFP) -+ : Expression(Expr), EntryKind(E_ConstantFP) { -+ Constant.CFP = CFP; -+ } -+ DbgValueLoc(const DIExpression *Expr, const ConstantInt *CIP) -+ : Expression(Expr), EntryKind(E_ConstantInt) { -+ Constant.CIP = CIP; -+ } -+ DbgValueLoc(const DIExpression *Expr, MachineLocation Loc) -+ : Expression(Expr), EntryKind(E_Location), Loc(Loc) { -+ assert(cast(Expr)->isValid()); -+ } -+ DbgValueLoc(const DIExpression *Expr, TargetIndexLocation Loc) -+ : Expression(Expr), EntryKind(E_TargetIndexLocation), TIL(Loc) {} -+ -+ bool isLocation() const { return EntryKind == E_Location; } -+ bool isTargetIndexLocation() const { -+ return EntryKind == E_TargetIndexLocation; -+ } -+ bool isInt() const { return EntryKind == E_Integer; } -+ bool isConstantFP() const { return EntryKind == E_ConstantFP; } -+ bool isConstantInt() const { return EntryKind == E_ConstantInt; } -+ int64_t getInt() const { return Constant.Int; } -+ const ConstantFP *getConstantFP() const { return Constant.CFP; } -+ const ConstantInt *getConstantInt() const { return Constant.CIP; } -+ MachineLocation getLoc() const { return Loc; } -+ TargetIndexLocation getTargetIndexLocation() const { return TIL; } - - bool isFragment() const { return getExpression()->isFragment(); } - bool isEntryVal() const { return getExpression()->isEntryValue(); } -diff --git a/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.cpp b/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.cpp -index 8c6109880afc..fbfcf65a34ec 100644 ---- a/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.cpp -+++ b/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.cpp -@@ -38,6 +38,11 @@ void DebugLocStream::finalizeEntry() { - } - - DebugLocStream::ListBuilder::~ListBuilder() { -+#ifdef ENABLE_CLASSIC_FLANG -+ if (Finalized) -+ return; -+ Finalized = true; -+#endif - if (!Locs.finalizeList(Asm)) - return; - V.initializeDbgValue(&MI); -diff --git a/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h b/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h -index a96bdd034918..0600f4f09d5e 100644 ---- a/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h -+++ b/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h -@@ -158,12 +158,18 @@ class DebugLocStream::ListBuilder { - DbgVariable &V; - const MachineInstr &MI; - size_t ListIndex; -+#ifdef ENABLE_CLASSIC_FLANG -+ bool Finalized; -+#endif - std::optional TagOffset; - - public: - ListBuilder(DebugLocStream &Locs, DwarfCompileUnit &CU, AsmPrinter &Asm, - DbgVariable &V, const MachineInstr &MI) - : Locs(Locs), Asm(Asm), V(V), MI(MI), ListIndex(Locs.startList(&CU)), -+#ifdef ENABLE_CLASSIC_FLANG -+ Finalized(false), -+#endif - TagOffset(std::nullopt) {} - - void setTagOffset(uint8_t TO) { -diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp -index 58ed21379d29..78ff0d351492 100644 ---- a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp -+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp -@@ -185,6 +185,9 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE( - else - addGlobalName(GV->getName(), *VariableDIE, DeclContext); - -+ if (GV->isArtificial()) -+ addFlag(*VariableDIE, dwarf::DW_AT_artificial); -+ - addAnnotation(*VariableDIE, GV->getAnnotations()); - - if (uint32_t AlignInBytes = GV->getAlignInBytes()) -diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h -index 1af4b643eb17..e526614792c7 100644 ---- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h -+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h -@@ -284,6 +284,8 @@ struct SymbolCU { - DwarfCompileUnit *CU; - }; - -+class DummyDwarfExpression; -+ - /// The kind of accelerator tables we should emit. - enum class AccelTableKind { - Default, ///< Platform default. -@@ -437,6 +439,8 @@ private: - /// Map for tracking Fortran deferred CHARACTER lengths. - DenseMap StringTypeLocMap; - -+ DenseMap VariableInDependentType; -+ - AddressPool AddrPool; - - /// Accelerator tables. -diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp -index be4a3ed79d88..c47dd4664ea6 100644 ---- a/llvm/lib/IR/AsmWriter.cpp -+++ b/llvm/lib/IR/AsmWriter.cpp -@@ -2281,6 +2281,7 @@ static void writeDIGlobalVariable(raw_ostream &Out, const DIGlobalVariable *N, - Printer.printBool("isDefinition", N->isDefinition()); - Printer.printMetadata("declaration", N->getRawStaticDataMemberDeclaration()); - Printer.printMetadata("templateParams", N->getRawTemplateParams()); -+ Printer.printDIFlags("flags", N->getFlags()); - Printer.printInt("align", N->getAlignInBits()); - Printer.printMetadata("annotations", N->getRawAnnotations()); - Out << ")"; -diff --git a/llvm/lib/IR/DIBuilder.cpp b/llvm/lib/IR/DIBuilder.cpp -index 1ce8c17f8a88..af6ebf702165 100644 ---- a/llvm/lib/IR/DIBuilder.cpp -+++ b/llvm/lib/IR/DIBuilder.cpp -@@ -725,14 +725,14 @@ DIGlobalVariableExpression *DIBuilder::createGlobalVariableExpression( - DIScope *Context, StringRef Name, StringRef LinkageName, DIFile *F, - unsigned LineNumber, DIType *Ty, bool IsLocalToUnit, bool isDefined, - DIExpression *Expr, MDNode *Decl, MDTuple *TemplateParams, -- uint32_t AlignInBits, DINodeArray Annotations) { -+ DINode::DIFlags Flags, uint32_t AlignInBits, DINodeArray Annotations) { - checkGlobalVariableScope(Context); - - auto *GV = DIGlobalVariable::getDistinct( - VMContext, cast_or_null(Context), Name, LinkageName, F, - LineNumber, Ty, IsLocalToUnit, isDefined, -- cast_or_null(Decl), TemplateParams, AlignInBits, -- Annotations); -+ cast_or_null(Decl), TemplateParams, Flags, -+ AlignInBits, Annotations); - if (!Expr) - Expr = createExpression(); - auto *N = DIGlobalVariableExpression::get(VMContext, GV, Expr); -@@ -743,14 +743,14 @@ DIGlobalVariableExpression *DIBuilder::createGlobalVariableExpression( - DIGlobalVariable *DIBuilder::createTempGlobalVariableFwdDecl( - DIScope *Context, StringRef Name, StringRef LinkageName, DIFile *F, - unsigned LineNumber, DIType *Ty, bool IsLocalToUnit, MDNode *Decl, -- MDTuple *TemplateParams, uint32_t AlignInBits) { -+ MDTuple *TemplateParams, DINode::DIFlags Flags, uint32_t AlignInBits) { - checkGlobalVariableScope(Context); - - return DIGlobalVariable::getTemporary( - VMContext, cast_or_null(Context), Name, LinkageName, F, - LineNumber, Ty, IsLocalToUnit, false, -- cast_or_null(Decl), TemplateParams, AlignInBits, -- nullptr) -+ cast_or_null(Decl), TemplateParams, Flags, -+ AlignInBits, nullptr) - .release(); - } - -diff --git a/llvm/lib/IR/DebugInfo.cpp b/llvm/lib/IR/DebugInfo.cpp -index 48b5501c55ba..3696beccdd0c 100644 ---- a/llvm/lib/IR/DebugInfo.cpp -+++ b/llvm/lib/IR/DebugInfo.cpp -@@ -1547,12 +1547,13 @@ LLVMMetadataRef LLVMDIBuilderCreateGlobalVariableExpression( - LLVMDIBuilderRef Builder, LLVMMetadataRef Scope, const char *Name, - size_t NameLen, const char *Linkage, size_t LinkLen, LLVMMetadataRef File, - unsigned LineNo, LLVMMetadataRef Ty, LLVMBool LocalToUnit, -- LLVMMetadataRef Expr, LLVMMetadataRef Decl, uint32_t AlignInBits) { -+ LLVMMetadataRef Expr, LLVMMetadataRef Decl, LLVMDIFlags Flags, -+ uint32_t AlignInBits) { - return wrap(unwrap(Builder)->createGlobalVariableExpression( - unwrapDI(Scope), {Name, NameLen}, {Linkage, LinkLen}, - unwrapDI(File), LineNo, unwrapDI(Ty), LocalToUnit, - true, unwrap(Expr), unwrapDI(Decl), -- nullptr, AlignInBits)); -+ nullptr, map_from_llvmDIFlags(Flags), AlignInBits)); - } - - LLVMMetadataRef LLVMDIGlobalVariableExpressionGetVariable(LLVMMetadataRef GVE) { -@@ -1597,11 +1598,12 @@ LLVMMetadataRef LLVMDIBuilderCreateTempGlobalVariableFwdDecl( - LLVMDIBuilderRef Builder, LLVMMetadataRef Scope, const char *Name, - size_t NameLen, const char *Linkage, size_t LnkLen, LLVMMetadataRef File, - unsigned LineNo, LLVMMetadataRef Ty, LLVMBool LocalToUnit, -- LLVMMetadataRef Decl, uint32_t AlignInBits) { -+ LLVMMetadataRef Decl, LLVMDIFlags Flags, uint32_t AlignInBits) { - return wrap(unwrap(Builder)->createTempGlobalVariableFwdDecl( - unwrapDI(Scope), {Name, NameLen}, {Linkage, LnkLen}, - unwrapDI(File), LineNo, unwrapDI(Ty), LocalToUnit, -- unwrapDI(Decl), nullptr, AlignInBits)); -+ unwrapDI(Decl), nullptr, map_from_llvmDIFlags(Flags), -+ AlignInBits)); - } - - LLVMValueRef -diff --git a/llvm/lib/IR/DebugInfoMetadata.cpp b/llvm/lib/IR/DebugInfoMetadata.cpp -index 4933b6032688..d599896ee456 100644 ---- a/llvm/lib/IR/DebugInfoMetadata.cpp -+++ b/llvm/lib/IR/DebugInfoMetadata.cpp -@@ -1258,15 +1258,16 @@ DIGlobalVariable::getImpl(LLVMContext &Context, Metadata *Scope, MDString *Name, - MDString *LinkageName, Metadata *File, unsigned Line, - Metadata *Type, bool IsLocalToUnit, bool IsDefinition, - Metadata *StaticDataMemberDeclaration, -- Metadata *TemplateParams, uint32_t AlignInBits, -- Metadata *Annotations, StorageType Storage, -- bool ShouldCreate) { -+ Metadata *TemplateParams, DIFlags Flags, -+ uint32_t AlignInBits, Metadata *Annotations, -+ StorageType Storage, bool ShouldCreate) { - assert(isCanonical(Name) && "Expected canonical MDString"); - assert(isCanonical(LinkageName) && "Expected canonical MDString"); - DEFINE_GETIMPL_LOOKUP( - DIGlobalVariable, - (Scope, Name, LinkageName, File, Line, Type, IsLocalToUnit, IsDefinition, -- StaticDataMemberDeclaration, TemplateParams, AlignInBits, Annotations)); -+ StaticDataMemberDeclaration, TemplateParams, Flags, AlignInBits, -+ Annotations)); - Metadata *Ops[] = {Scope, - Name, - File, -@@ -1277,7 +1278,8 @@ DIGlobalVariable::getImpl(LLVMContext &Context, Metadata *Scope, MDString *Name, - TemplateParams, - Annotations}; - DEFINE_GETIMPL_STORE(DIGlobalVariable, -- (Line, IsLocalToUnit, IsDefinition, AlignInBits), Ops); -+ (Line, IsLocalToUnit, IsDefinition, Flags, AlignInBits), -+ Ops); - } - - DILocalVariable * -diff --git a/llvm/lib/IR/LLVMContextImpl.h b/llvm/lib/IR/LLVMContextImpl.h -index 4cc3f8da6b75..8a621725f55e 100644 ---- a/llvm/lib/IR/LLVMContextImpl.h -+++ b/llvm/lib/IR/LLVMContextImpl.h -@@ -1055,6 +1055,7 @@ template <> struct MDNodeKeyImpl { - bool IsDefinition; - Metadata *StaticDataMemberDeclaration; - Metadata *TemplateParams; -+ unsigned Flags; - uint32_t AlignInBits; - Metadata *Annotations; - -@@ -1062,20 +1063,21 @@ template <> struct MDNodeKeyImpl { - Metadata *File, unsigned Line, Metadata *Type, - bool IsLocalToUnit, bool IsDefinition, - Metadata *StaticDataMemberDeclaration, Metadata *TemplateParams, -+ unsigned Flags, - uint32_t AlignInBits, Metadata *Annotations) - : Scope(Scope), Name(Name), LinkageName(LinkageName), File(File), - Line(Line), Type(Type), IsLocalToUnit(IsLocalToUnit), - IsDefinition(IsDefinition), - StaticDataMemberDeclaration(StaticDataMemberDeclaration), -- TemplateParams(TemplateParams), AlignInBits(AlignInBits), -- Annotations(Annotations) {} -+ TemplateParams(TemplateParams), Flags(Flags), -+ AlignInBits(AlignInBits), Annotations(Annotations) {} - MDNodeKeyImpl(const DIGlobalVariable *N) - : Scope(N->getRawScope()), Name(N->getRawName()), - LinkageName(N->getRawLinkageName()), File(N->getRawFile()), - Line(N->getLine()), Type(N->getRawType()), - IsLocalToUnit(N->isLocalToUnit()), IsDefinition(N->isDefinition()), - StaticDataMemberDeclaration(N->getRawStaticDataMemberDeclaration()), -- TemplateParams(N->getRawTemplateParams()), -+ TemplateParams(N->getRawTemplateParams()), Flags(N->getFlags()), - AlignInBits(N->getAlignInBits()), Annotations(N->getRawAnnotations()) {} - - bool isKeyOf(const DIGlobalVariable *RHS) const { -@@ -1087,6 +1089,7 @@ template <> struct MDNodeKeyImpl { - StaticDataMemberDeclaration == - RHS->getRawStaticDataMemberDeclaration() && - TemplateParams == RHS->getRawTemplateParams() && -+ Flags == RHS->getFlags() && - AlignInBits == RHS->getAlignInBits() && - Annotations == RHS->getRawAnnotations(); - } -@@ -1101,7 +1104,7 @@ template <> struct MDNodeKeyImpl { - // TODO: make hashing work fine with such situations - return hash_combine(Scope, Name, LinkageName, File, Line, Type, - IsLocalToUnit, IsDefinition, /* AlignInBits, */ -- StaticDataMemberDeclaration, Annotations); -+ StaticDataMemberDeclaration, Flags, Annotations); - } - }; - -diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp -index a7b1953ce81c..136132d7e65a 100644 ---- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp -+++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp -@@ -1052,8 +1052,8 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfInstBase *Inc) { - SP, CounterPtr->getName(), /*LinkageName=*/StringRef(), SP->getFile(), - /*LineNo=*/0, DB.createUnspecifiedType("Profile Data Type"), - CounterPtr->hasLocalLinkage(), /*IsDefined=*/true, /*Expr=*/nullptr, -- /*Decl=*/nullptr, /*TemplateParams=*/nullptr, /*AlignInBits=*/0, -- Annotations); -+ /*Decl=*/nullptr, /*TemplateParams=*/nullptr, -+ /*Flags=*/DINode::FlagZero, /*AlignInBits=*/0, Annotations); - CounterPtr->addDebugInfo(DICounter); - DB.finalize(); - } else { -diff --git a/llvm/test/Assembler/invalid-diglobalvariable-empty-name.ll b/llvm/test/Assembler/invalid-diglobalvariable-empty-name.ll -index a4e69f3c8b75..d3c476a03198 100644 ---- a/llvm/test/Assembler/invalid-diglobalvariable-empty-name.ll -+++ b/llvm/test/Assembler/invalid-diglobalvariable-empty-name.ll -@@ -1,4 +1,5 @@ - ; RUN: not llvm-as < %s -disable-output 2>&1 | FileCheck %s -+; UNSUPPORTED: classic_flang - - ; CHECK: :[[@LINE+1]]:30: error: 'name' cannot be empty - !0 = !DIGlobalVariable(name: "") -diff --git a/llvm/test/DebugInfo/Generic/fortran-subprogram-at.ll b/llvm/test/DebugInfo/Generic/fortran-subprogram-at.ll -new file mode 100644 -index 000000000000..988c388fe218 ---- /dev/null -+++ b/llvm/test/DebugInfo/Generic/fortran-subprogram-at.ll -@@ -0,0 +1,24 @@ -+; Test for DIFlagPure, DIFlagElement and DIFlagRecursive. These three -+; DIFlags are used to attach DW_AT_pure, DW_AT_element, and DW_AT_recursive -+; attributes to DW_TAG_subprogram DIEs. -+ -+; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | FileCheck %s -+; CHECK: !DISubprogram({{.*}}, spFlags: DISPFlagDefinition | DISPFlagPure | DISPFlagElemental | DISPFlagRecursive, -+ -+!llvm.module.flags = !{!0, !1} -+!llvm.dbg.cu = !{!2} -+ -+define void @subprgm() !dbg !6 { -+L: -+ ret void -+} -+ -+!0 = !{i32 2, !"Dwarf Version", i32 2} -+!1 = !{i32 1, !"Debug Info Version", i32 3} -+!2 = distinct !DICompileUnit(language: DW_LANG_Fortran90, file: !3, producer: "Flang", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, retainedTypes: !4, globals: !4, imports: !4) -+!3 = !DIFile(filename: "fortran-subprogram-at.f", directory: "/") -+!4 = !{} -+!5 = !DIBasicType(name: "real", size: 32, align: 32, encoding: DW_ATE_float) -+!6 = distinct !DISubprogram(name: "subprgm", scope: !2, file: !3, line: 256, type: !7, scopeLine: 256, spFlags: DISPFlagDefinition | DISPFlagPure | DISPFlagElemental | DISPFlagRecursive, unit: !2) -+!7 = !DISubroutineType(types: !8) -+!8 = !{null, !5} -diff --git a/llvm/test/DebugInfo/Generic/more-subprogram-attr.ll b/llvm/test/DebugInfo/Generic/more-subprogram-attr.ll -new file mode 100644 -index 000000000000..0533cf6b2367 ---- /dev/null -+++ b/llvm/test/DebugInfo/Generic/more-subprogram-attr.ll -@@ -0,0 +1,38 @@ -+; REQUIRES: object-emission -+ -+; RUN: %llc_dwarf -O0 -filetype=obj < %s > %t -+; RUN: llvm-dwarfdump -v -debug-info %t | FileCheck %s -+ -+; Make sure we're emitting DW_AT_{pure,elemental,recursive}. -+; CHECK: DW_TAG_subprogram -+; CHECK-NOT: {{DW_TAG|NULL}} -+; CHECK: DW_AT_name {{.*}} "main" -+; CHECK-NOT: {{DW_TAG|NULL}} -+; CHECK: DW_AT_pure [DW_FORM_flag_present] (true) -+; CHECK: DW_AT_elemental [DW_FORM_flag_present] (true) -+; CHECK: DW_AT_recursive [DW_FORM_flag_present] (true) -+ -+define dso_local i32 @main() !dbg !7 { -+entry: -+ %retval = alloca i32, align 4 -+ store i32 0, i32* %retval, align 4 -+ ret i32 0, !dbg !12 -+} -+ -+!llvm.dbg.cu = !{!0} -+!llvm.module.flags = !{!3, !4, !5} -+!llvm.ident = !{!6} -+ -+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, nameTableKind: None) -+!1 = !DIFile(filename: "x.c", directory: "/tmp") -+!2 = !{} -+!3 = !{i32 2, !"Dwarf Version", i32 4} -+!4 = !{i32 2, !"Debug Info Version", i32 3} -+!5 = !{i32 1, !"wchar_size", i32 4} -+!6 = !{!"clang"} -+!7 = distinct !DISubprogram(name: "main", scope: !8, file: !8, line: 1, type: !9, scopeLine: 2, spFlags: DISPFlagDefinition | DISPFlagPure | DISPFlagElemental | DISPFlagRecursive, unit: !0, retainedNodes: !2) -+!8 = !DIFile(filename: "x.c", directory: "/tmp") -+!9 = !DISubroutineType(types: !10) -+!10 = !{!11} -+!11 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) -+!12 = !DILocation(line: 3, column: 3, scope: !7) -diff --git a/llvm/test/DebugInfo/X86/DICommonBlock.ll b/llvm/test/DebugInfo/X86/DICommonBlock.ll -new file mode 100644 -index 000000000000..6cfb7a90640d ---- /dev/null -+++ b/llvm/test/DebugInfo/X86/DICommonBlock.ll -@@ -0,0 +1,36 @@ -+; ModuleID = 'none.f90' -+; RUN: llc %s -o %t -filetype=obj -+; RUN: llvm-dwarfdump -debug-info %t | FileCheck %s -+; CHECK: DW_TAG_common_block -+ -+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" -+target triple = "x86_64-apple-macosx" -+ -+@common_a = common global [32 x i8] zeroinitializer, align 8, !dbg !13 -+ -+define i32 @subr() !dbg !9 { -+ %1 = getelementptr inbounds [32 x i8], [32 x i8]* @common_a, i64 0, i32 8 -+ %2 = bitcast i8* %1 to i32* -+ %3 = load i32, i32* %2 -+ ret i32 %3 -+} -+ -+!llvm.dbg.cu = !{!0} -+!llvm.module.flags = !{!6, !7} -+!llvm.ident = !{!8} -+ -+!0 = distinct !DICompileUnit(language: DW_LANG_Fortran90, file: !1, producer: "PGI Fortran", isOptimized: false, runtimeVersion: 2, emissionKind: FullDebug, retainedTypes: !14, globals: !2) -+!1 = !DIFile(filename: "none.f90", directory: "/not/here/") -+!2 = !{!13} -+!3 = !{} -+!4 = !DIGlobalVariable(name: "common /a/", scope: !5, file: !1, line: 4, isLocal: false, isDefinition: true, type: !12) -+!5 = !DICommonBlock(scope: !9, declaration: !4, name: "a", file: !1, line: 4) -+!6 = !{i32 2, !"Dwarf Version", i32 4} -+!7 = !{i32 2, !"Debug Info Version", i32 3} -+!8 = !{!"PGI Fortran"} -+!9 = distinct !DISubprogram(name: "subrtn", scope: !0, file: !1, line: 1, type: !10, isLocal: false, isDefinition: true, unit: !0) -+!10 = !DISubroutineType(types: !11) -+!11 = !{!12, !12} -+!12 = !DIBasicType(name: "int", size: 32) -+!13 = !DIGlobalVariableExpression(var: !4, expr: !DIExpression()) -+!14 = !{!12, !10} -diff --git a/llvm/test/lit.cfg.py b/llvm/test/lit.cfg.py -index 4114bf7f54b2..9cc8520960c2 100644 ---- a/llvm/test/lit.cfg.py -+++ b/llvm/test/lit.cfg.py -@@ -645,3 +645,6 @@ if "aix" in config.target_triple: - # "OBJECT_MODE" to 'any' by default on AIX OS. - if "system-aix" in config.available_features: - config.environment["OBJECT_MODE"] = "any" -+ -+if config.use_classic_flang: -+ config.available_features.add("classic_flang") -diff --git a/llvm/test/lit.site.cfg.py.in b/llvm/test/lit.site.cfg.py.in -index 57ee2100dfb7..fc7ab6536309 100644 ---- a/llvm/test/lit.site.cfg.py.in -+++ b/llvm/test/lit.site.cfg.py.in -@@ -61,6 +61,7 @@ config.expensive_checks = @LLVM_ENABLE_EXPENSIVE_CHECKS@ - config.reverse_iteration = @LLVM_ENABLE_REVERSE_ITERATION@ - config.dxil_tests = @LLVM_INCLUDE_DXIL_TESTS@ - config.have_llvm_driver = @LLVM_TOOL_LLVM_DRIVER_BUILD@ -+config.use_classic_flang = @LLVM_ENABLE_CLASSIC_FLANG@ - - import lit.llvm - lit.llvm.initialize(lit_config, config) -diff --git a/llvm/tools/llvm-c-test/debuginfo.c b/llvm/tools/llvm-c-test/debuginfo.c -index a3e41be12e95..906c96f1c24b 100644 ---- a/llvm/tools/llvm-c-test/debuginfo.c -+++ b/llvm/tools/llvm-c-test/debuginfo.c -@@ -64,7 +64,7 @@ int llvm_test_dibuilder(void) { - LLVMDIBuilderCreateConstantValueExpression(DIB, 0); - LLVMDIBuilderCreateGlobalVariableExpression( - DIB, Module, "globalClass", 11, "", 0, File, 1, ClassTy, true, -- GlobalClassValueExpr, NULL, 0); -+ GlobalClassValueExpr, NULL, LLVMDIFlagZero, 0); - - LLVMMetadataRef Int64Ty = - LLVMDIBuilderCreateBasicType(DIB, "Int64", 5, 64, 0, LLVMDIFlagZero); -@@ -75,7 +75,7 @@ int llvm_test_dibuilder(void) { - LLVMDIBuilderCreateConstantValueExpression(DIB, 0); - LLVMDIBuilderCreateGlobalVariableExpression( - DIB, Module, "global", 6, "", 0, File, 1, Int64TypeDef, true, -- GlobalVarValueExpr, NULL, 0); -+ GlobalVarValueExpr, NULL, LLVMDIFlagZero, 0); - - LLVMMetadataRef NameSpace = - LLVMDIBuilderCreateNameSpace(DIB, Module, "NameSpace", 9, false); -diff --git a/llvm/unittests/IR/MetadataTest.cpp b/llvm/unittests/IR/MetadataTest.cpp -index 5342360109d0..4bce26851d2f 100644 ---- a/llvm/unittests/IR/MetadataTest.cpp -+++ b/llvm/unittests/IR/MetadataTest.cpp -@@ -2896,12 +2896,13 @@ TEST_F(DIGlobalVariableTest, get) { - DIDerivedType *StaticDataMemberDeclaration = - cast(getDerivedType()); - -+ DINode::DIFlags Flags = static_cast(7); - uint32_t AlignInBits = 8; - - auto *N = DIGlobalVariable::get( - Context, Scope, Name, LinkageName, File, Line, Type, IsLocalToUnit, -- IsDefinition, StaticDataMemberDeclaration, templateParams, AlignInBits, -- nullptr); -+ IsDefinition, StaticDataMemberDeclaration, templateParams, Flags, -+ AlignInBits, nullptr); - - EXPECT_EQ(dwarf::DW_TAG_variable, N->getTag()); - EXPECT_EQ(Scope, N->getScope()); -@@ -2914,57 +2915,66 @@ TEST_F(DIGlobalVariableTest, get) { - EXPECT_EQ(IsDefinition, N->isDefinition()); - EXPECT_EQ(StaticDataMemberDeclaration, N->getStaticDataMemberDeclaration()); - EXPECT_EQ(templateParams, N->getTemplateParams()); -+ EXPECT_EQ(Flags, N->getFlags()); - EXPECT_EQ(AlignInBits, N->getAlignInBits()); - EXPECT_EQ(N, DIGlobalVariable::get(Context, Scope, Name, LinkageName, File, - Line, Type, IsLocalToUnit, IsDefinition, - StaticDataMemberDeclaration, -- templateParams, AlignInBits, nullptr)); -+ templateParams, Flags, AlignInBits, -+ nullptr)); - - EXPECT_NE(N, DIGlobalVariable::get( - Context, getSubprogram(), Name, LinkageName, File, Line, - Type, IsLocalToUnit, IsDefinition, -- StaticDataMemberDeclaration, templateParams, AlignInBits, -- nullptr)); -+ StaticDataMemberDeclaration, templateParams, Flags, -+ AlignInBits, nullptr)); - EXPECT_NE(N, DIGlobalVariable::get(Context, Scope, "other", LinkageName, File, - Line, Type, IsLocalToUnit, IsDefinition, - StaticDataMemberDeclaration, -- templateParams, AlignInBits, nullptr)); -+ templateParams, Flags, AlignInBits, -+ nullptr)); - EXPECT_NE(N, DIGlobalVariable::get(Context, Scope, Name, "other", File, Line, - Type, IsLocalToUnit, IsDefinition, - StaticDataMemberDeclaration, -- templateParams, AlignInBits, nullptr)); -+ templateParams, Flags, AlignInBits, nullptr)); - EXPECT_NE(N, DIGlobalVariable::get(Context, Scope, Name, LinkageName, - getFile(), Line, Type, IsLocalToUnit, - IsDefinition, StaticDataMemberDeclaration, -- templateParams, AlignInBits, nullptr)); -+ templateParams, Flags, AlignInBits, nullptr)); - EXPECT_NE(N, DIGlobalVariable::get(Context, Scope, Name, LinkageName, File, - Line + 1, Type, IsLocalToUnit, - IsDefinition, StaticDataMemberDeclaration, -- templateParams, AlignInBits, nullptr)); -+ templateParams, Flags, AlignInBits, nullptr)); - EXPECT_NE(N, DIGlobalVariable::get(Context, Scope, Name, LinkageName, File, - Line, getDerivedType(), IsLocalToUnit, - IsDefinition, StaticDataMemberDeclaration, -- templateParams, AlignInBits, nullptr)); -+ templateParams, Flags, AlignInBits, nullptr)); - EXPECT_NE(N, DIGlobalVariable::get(Context, Scope, Name, LinkageName, File, - Line, Type, !IsLocalToUnit, IsDefinition, - StaticDataMemberDeclaration, -- templateParams, AlignInBits, nullptr)); -+ templateParams, Flags, AlignInBits, nullptr)); - EXPECT_NE(N, DIGlobalVariable::get(Context, Scope, Name, LinkageName, File, - Line, Type, IsLocalToUnit, !IsDefinition, - StaticDataMemberDeclaration, -- templateParams, AlignInBits, nullptr)); -+ templateParams, Flags, AlignInBits, nullptr)); - EXPECT_NE(N, DIGlobalVariable::get(Context, Scope, Name, LinkageName, File, - Line, Type, IsLocalToUnit, IsDefinition, - cast(getDerivedType()), -- templateParams, AlignInBits, nullptr)); -+ templateParams, Flags, AlignInBits, nullptr)); - EXPECT_NE(N, DIGlobalVariable::get(Context, Scope, Name, LinkageName, File, - Line, Type, IsLocalToUnit, IsDefinition, - StaticDataMemberDeclaration, nullptr, -+ Flags, AlignInBits, nullptr)); -+ EXPECT_NE(N, DIGlobalVariable::get(Context, Scope, Name, LinkageName, File, -+ Line, Type, IsLocalToUnit, IsDefinition, -+ StaticDataMemberDeclaration, -+ templateParams, -+ static_cast(Flags + 1), - AlignInBits, nullptr)); - EXPECT_NE(N, DIGlobalVariable::get(Context, Scope, Name, LinkageName, File, - Line, Type, IsLocalToUnit, IsDefinition, - StaticDataMemberDeclaration, -- templateParams, (AlignInBits << 1), -+ templateParams, Flags, (AlignInBits << 1), - nullptr)); - - TempDIGlobalVariable Temp = N->clone(); -@@ -2987,16 +2997,17 @@ TEST_F(DIGlobalVariableExpressionTest, get) { - auto *Expr2 = DIExpression::get(Context, {1, 2, 3}); - DIDerivedType *StaticDataMemberDeclaration = - cast(getDerivedType()); -+ DINode::DIFlags Flags = static_cast(7); - uint32_t AlignInBits = 8; - - auto *Var = DIGlobalVariable::get( - Context, Scope, Name, LinkageName, File, Line, Type, IsLocalToUnit, -- IsDefinition, StaticDataMemberDeclaration, templateParams, AlignInBits, -- nullptr); -+ IsDefinition, StaticDataMemberDeclaration, templateParams, Flags, -+ AlignInBits, nullptr); - auto *Var2 = DIGlobalVariable::get( - Context, Scope, "other", LinkageName, File, Line, Type, IsLocalToUnit, -- IsDefinition, StaticDataMemberDeclaration, templateParams, AlignInBits, -- nullptr); -+ IsDefinition, StaticDataMemberDeclaration, templateParams, Flags, -+ AlignInBits, nullptr); - auto *N = DIGlobalVariableExpression::get(Context, Var, Expr); - - EXPECT_EQ(Var, N->getVariable()); -diff --git a/llvm/utils/lit/lit/TestingConfig.py b/llvm/utils/lit/lit/TestingConfig.py -index 76fd66502009..399b74c4f00d 100644 ---- a/llvm/utils/lit/lit/TestingConfig.py -+++ b/llvm/utils/lit/lit/TestingConfig.py -@@ -26,6 +26,7 @@ class TestingConfig(object): - "SYSTEMROOT", - "TERM", - "CLANG", -+ "FLANG", - "LLDB", - "LD_PRELOAD", - "LLVM_SYMBOLIZER_PATH", -diff --git a/llvm/utils/lit/lit/llvm/config.py b/llvm/utils/lit/lit/llvm/config.py -index 16cc2968034b..d8dec6160071 100644 ---- a/llvm/utils/lit/lit/llvm/config.py -+++ b/llvm/utils/lit/lit/llvm/config.py -@@ -500,6 +500,8 @@ class LLVMConfig(object): - just-built or optionally an installed clang, and add a set of standard - substitutions useful to any test suite that makes use of clang. - -+ Also sets up use of flang -+ - """ - # Clear some environment variables that might affect Clang. - # -@@ -631,6 +633,14 @@ class LLVMConfig(object): - self.add_tool_substitutions(tool_substitutions) - self.config.substitutions.append(("%resource_dir", builtin_include_dir)) - -+ self.config.flang = self.use_llvm_tool( -+ 'flang', search_env='FLANG', required=required) -+ if self.config.flang: -+ tool_substitutions = [ -+ ToolSubst('%flang', command=self.config.flang) -+ ] -+ self.add_tool_substitutions(tool_substitutions) -+ - self.config.substitutions.append( - ( - "%itanium_abi_triple", --- -2.24.3 (Apple Git-128) - diff --git a/0018-Fix-declaration-definition-mismatch-for-classic-flang.patch b/0018-Fix-declaration-definition-mismatch-for-classic-flang.patch deleted file mode 100644 index e33514de851cf5c8518f90b7820672eaad482851..0000000000000000000000000000000000000000 --- a/0018-Fix-declaration-definition-mismatch-for-classic-flang.patch +++ /dev/null @@ -1,1041 +0,0 @@ -From d6d17d8f0362ac47100be32f0d9cb31fd66a1060 Mon Sep 17 00:00:00 2001 -From: luofeng14 -Date: Wed, 17 Apr 2024 14:26:07 +0800 -Subject: [PATCH] Fix declaration definition mismatch for classic flang - ---- - llvm/cmake/modules/TableGen.cmake | 4 + - llvm/include/llvm-c/DebugInfo.h | 11 ++- - .../include/llvm/Analysis/TargetLibraryInfo.h | 5 + - llvm/include/llvm/Analysis/VecFuncs.def | 8 ++ - llvm/include/llvm/IR/DebugInfoMetadata.h | 65 ++++++++++--- - llvm/lib/Analysis/TargetLibraryInfo.cpp | 24 +++++ - llvm/lib/AsmParser/LLParser.cpp | 6 +- - llvm/lib/Bitcode/Reader/MetadataLoader.cpp | 44 ++++++--- - llvm/lib/Bitcode/Writer/BitcodeWriter.cpp | 6 ++ - llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h | 4 + - .../CodeGen/AsmPrinter/DwarfCompileUnit.cpp | 4 +- - llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h | 8 +- - llvm/lib/IR/AsmWriter.cpp | 2 + - llvm/lib/IR/DIBuilder.cpp | 21 +++- - llvm/lib/IR/DebugInfo.cpp | 22 ++++- - llvm/lib/IR/DebugInfoMetadata.cpp | 19 +++- - llvm/lib/IR/LLVMContextImpl.h | 22 ++++- - .../Instrumentation/InstrProfiling.cpp | 5 +- - llvm/tools/llvm-c-test/debuginfo.c | 12 ++- - llvm/unittests/IR/MetadataTest.cpp | 97 +++++++++++++++---- - llvm/utils/lit/lit/llvm/config.py | 17 ++-- - 21 files changed, 321 insertions(+), 85 deletions(-) - -diff --git a/llvm/cmake/modules/TableGen.cmake b/llvm/cmake/modules/TableGen.cmake -index 7fd6628ef55d..d4d2c06c051b 100644 ---- a/llvm/cmake/modules/TableGen.cmake -+++ b/llvm/cmake/modules/TableGen.cmake -@@ -76,6 +76,10 @@ function(tablegen project ofn) - set(tblgen_change_flag "--write-if-changed") - endif() - -+ if (LLVM_ENABLE_CLASSIC_FLANG) -+ list(APPEND tblgen_change_flag "-DENABLE_CLASSIC_FLANG") -+ endif() -+ - if (NOT LLVM_ENABLE_WARNINGS) - list(APPEND LLVM_TABLEGEN_FLAGS "-no-warn-on-unused-template-args") - endif() -diff --git a/llvm/include/llvm-c/DebugInfo.h b/llvm/include/llvm-c/DebugInfo.h -index 09d584c24711..0201bac4349d 100644 ---- a/llvm/include/llvm-c/DebugInfo.h -+++ b/llvm/include/llvm-c/DebugInfo.h -@@ -1148,7 +1148,10 @@ LLVMMetadataRef LLVMDIBuilderCreateGlobalVariableExpression( - LLVMDIBuilderRef Builder, LLVMMetadataRef Scope, const char *Name, - size_t NameLen, const char *Linkage, size_t LinkLen, LLVMMetadataRef File, - unsigned LineNo, LLVMMetadataRef Ty, LLVMBool LocalToUnit, -- LLVMMetadataRef Expr, LLVMMetadataRef Decl, LLVMDIFlags Flags, -+ LLVMMetadataRef Expr, LLVMMetadataRef Decl, -+#ifdef ENABLE_CLASSIC_FLANG -+ LLVMDIFlags Flags, -+#endif - uint32_t AlignInBits); - - -@@ -1247,7 +1250,11 @@ LLVMMetadataRef LLVMDIBuilderCreateTempGlobalVariableFwdDecl( - LLVMDIBuilderRef Builder, LLVMMetadataRef Scope, const char *Name, - size_t NameLen, const char *Linkage, size_t LnkLen, LLVMMetadataRef File, - unsigned LineNo, LLVMMetadataRef Ty, LLVMBool LocalToUnit, -- LLVMMetadataRef Decl, LLVMDIFlags Flags, uint32_t AlignInBits); -+ LLVMMetadataRef Decl, -+#ifdef ENABLE_CLASSIC_FLANG -+ LLVMDIFlags Flags, -+#endif -+ uint32_t AlignInBits); - - /** - * Insert a new llvm.dbg.declare intrinsic call before the given instruction. -diff --git a/llvm/include/llvm/Analysis/TargetLibraryInfo.h b/llvm/include/llvm/Analysis/TargetLibraryInfo.h -index 490252cd018a..6805c6535189 100644 ---- a/llvm/include/llvm/Analysis/TargetLibraryInfo.h -+++ b/llvm/include/llvm/Analysis/TargetLibraryInfo.h -@@ -22,6 +22,9 @@ namespace llvm { - template class ArrayRef; - class Function; - class Module; -+#ifndef ENABLE_CLASSIC_FLANG -+class Triple; -+#endif - - /// Describes a possible vectorization of a function. - /// Function 'VectorFnName' is equivalent to 'ScalarFnName' vectorized -@@ -80,7 +83,9 @@ class TargetLibraryInfoImpl { - bool isValidProtoForLibFunc(const FunctionType &FTy, LibFunc F, - const Module &M) const; - -+#ifdef ENABLE_CLASSIC_FLANG - Triple T; -+#endif - - public: - /// List of known vector-functions libraries. -diff --git a/llvm/include/llvm/Analysis/VecFuncs.def b/llvm/include/llvm/Analysis/VecFuncs.def -index d1712d158423..679e28057d6e 100644 ---- a/llvm/include/llvm/Analysis/VecFuncs.def -+++ b/llvm/include/llvm/Analysis/VecFuncs.def -@@ -910,6 +910,8 @@ TLI_DEFINE_VECFUNC("tgamma", "armpl_svtgamma_f64_x", SCALABLE(2), MASKED) - TLI_DEFINE_VECFUNC("tgammaf", "armpl_svtgamma_f32_x", SCALABLE(4), MASKED) - - #elif defined(TLI_DEFINE_PGMATH_AARCH64_VECFUNCS) -+ -+#ifdef ENABLE_CLASSIC_FLANG - // Classic flang libpgmath library's Vector Functions for AArch64 - - TLI_DEFINE_VECFUNC("__fd_sin_1", "__fd_sin_2", FIXED(2)) -@@ -1079,8 +1081,11 @@ TLI_DEFINE_VECFUNC("__ps_exp_1", "__ps_exp_4", FIXED(4)) - - TLI_DEFINE_VECFUNC("__rd_exp_1", "__rd_exp_2", FIXED(2)) - TLI_DEFINE_VECFUNC("__rs_exp_1", "__rs_exp_4", FIXED(4)) -+#endif - - #elif defined(TLI_DEFINE_PGMATH_X86_VECFUNCS) -+ -+#ifdef ENABLE_CLASSIC_FLANG - // Classic flang libpgmath library's Vector Functions for X86 - - TLI_DEFINE_VECFUNC("__fd_sin_1", "__fd_sin_2", FIXED(2)) -@@ -1498,6 +1503,7 @@ TLI_DEFINE_VECFUNC("__rd_exp_1", "__rd_exp_8", FIXED(8)) - TLI_DEFINE_VECFUNC("__rs_exp_1", "__rs_exp_4", FIXED(4)) - TLI_DEFINE_VECFUNC("__rs_exp_1", "__rs_exp_8", FIXED(8)) - TLI_DEFINE_VECFUNC("__rs_exp_1", "__rs_exp_16", FIXED(16)) -+#endif - - #else - #error "Must choose which vector library functions are to be defined." -@@ -1519,5 +1525,7 @@ TLI_DEFINE_VECFUNC("__rs_exp_1", "__rs_exp_16", FIXED(16)) - #undef TLI_DEFINE_SLEEFGNUABI_SCALABLE_VECFUNCS - #undef TLI_DEFINE_MASSV_VECFUNCS_NAMES - #undef TLI_DEFINE_ARMPL_VECFUNCS -+#ifdef ENABLE_CLASSIC_FLANG - #undef TLI_DEFINE_PGMATH_AARCH64_VECFUNCS - #undef TLI_DEFINE_PGMATH_X86_VECFUNCS -+#endif -diff --git a/llvm/include/llvm/IR/DebugInfoMetadata.h b/llvm/include/llvm/IR/DebugInfoMetadata.h -index 9bd86172a4c0..277c2ddd4dd9 100644 ---- a/llvm/include/llvm/IR/DebugInfoMetadata.h -+++ b/llvm/include/llvm/IR/DebugInfoMetadata.h -@@ -3062,14 +3062,23 @@ class DIGlobalVariable : public DIVariable { - - bool IsLocalToUnit; - bool IsDefinition; -+#ifdef ENABLE_CLASSIC_FLANG - DIFlags Flags; -+#endif - - DIGlobalVariable(LLVMContext &C, StorageType Storage, unsigned Line, -- bool IsLocalToUnit, bool IsDefinition, DIFlags Flags, -+ bool IsLocalToUnit, bool IsDefinition, -+#ifdef ENABLE_CLASSIC_FLANG -+ DIFlags Flags, -+#endif - uint32_t AlignInBits, ArrayRef Ops) - : DIVariable(C, DIGlobalVariableKind, Storage, Line, Ops, AlignInBits), -- IsLocalToUnit(IsLocalToUnit), IsDefinition(IsDefinition), -- Flags(Flags) {} -+#ifdef ENABLE_CLASSIC_FLANG -+ IsLocalToUnit(IsLocalToUnit), IsDefinition(IsDefinition), Flags(Flags) {} -+#else -+ IsLocalToUnit(IsLocalToUnit), IsDefinition(IsDefinition) {} -+#endif -+ - ~DIGlobalVariable() = default; - - static DIGlobalVariable * -@@ -3077,28 +3086,40 @@ class DIGlobalVariable : public DIVariable { - StringRef LinkageName, DIFile *File, unsigned Line, DIType *Type, - bool IsLocalToUnit, bool IsDefinition, - DIDerivedType *StaticDataMemberDeclaration, MDTuple *TemplateParams, -- DIFlags Flags, uint32_t AlignInBits, DINodeArray Annotations, -+#ifdef ENABLE_CLASSIC_FLANG -+ DIFlags Flags, -+#endif -+ uint32_t AlignInBits, DINodeArray Annotations, - StorageType Storage, bool ShouldCreate = true) { - return getImpl(Context, Scope, getCanonicalMDString(Context, Name), - getCanonicalMDString(Context, LinkageName), File, Line, Type, - IsLocalToUnit, IsDefinition, StaticDataMemberDeclaration, -- cast_or_null(TemplateParams), Flags, AlignInBits, -- Annotations.get(), Storage, ShouldCreate); -+ cast_or_null(TemplateParams), -+#ifdef ENABLE_CLASSIC_FLANG -+ Flags, -+#endif -+ AlignInBits, Annotations.get(), Storage, ShouldCreate); - } - static DIGlobalVariable * - getImpl(LLVMContext &Context, Metadata *Scope, MDString *Name, - MDString *LinkageName, Metadata *File, unsigned Line, Metadata *Type, - bool IsLocalToUnit, bool IsDefinition, - Metadata *StaticDataMemberDeclaration, Metadata *TemplateParams, -- DIFlags Flags, uint32_t AlignInBits, Metadata *Annotations, -+#ifdef ENABLE_CLASSIC_FLANG -+ DIFlags Flags, -+#endif -+ uint32_t AlignInBits, Metadata *Annotations, - StorageType Storage, bool ShouldCreate = true); - - TempDIGlobalVariable cloneImpl() const { - return getTemporary(getContext(), getScope(), getName(), getLinkageName(), - getFile(), getLine(), getType(), isLocalToUnit(), - isDefinition(), getStaticDataMemberDeclaration(), -- getTemplateParams(), getFlags(), getAlignInBits(), -- getAnnotations()); -+ getTemplateParams(), -+#ifdef ENABLE_CLASSIC_FLANG -+ getFlags(), -+#endif -+ getAlignInBits(), getAnnotations()); - } - - public: -@@ -3107,26 +3128,40 @@ public: - (DIScope * Scope, StringRef Name, StringRef LinkageName, DIFile *File, - unsigned Line, DIType *Type, bool IsLocalToUnit, bool IsDefinition, - DIDerivedType *StaticDataMemberDeclaration, MDTuple *TemplateParams, -- DIFlags Flags, uint32_t AlignInBits, DINodeArray Annotations), -+#ifdef ENABLE_CLASSIC_FLANG -+ DIFlags Flags, -+#endif -+ uint32_t AlignInBits, DINodeArray Annotations), - (Scope, Name, LinkageName, File, Line, Type, IsLocalToUnit, IsDefinition, -- StaticDataMemberDeclaration, TemplateParams, Flags, AlignInBits, -- Annotations)) -+ StaticDataMemberDeclaration, TemplateParams, -+#ifdef ENABLE_CLASSIC_FLANG -+ Flags, -+#endif -+ AlignInBits, Annotations)) - DEFINE_MDNODE_GET( - DIGlobalVariable, - (Metadata * Scope, MDString *Name, MDString *LinkageName, Metadata *File, - unsigned Line, Metadata *Type, bool IsLocalToUnit, bool IsDefinition, - Metadata *StaticDataMemberDeclaration, Metadata *TemplateParams, -- DIFlags Flags, uint32_t AlignInBits, Metadata *Annotations), -+#ifdef ENABLE_CLASSIC_FLANG -+ DIFlags Flags, -+#endif -+ uint32_t AlignInBits, Metadata *Annotations), - (Scope, Name, LinkageName, File, Line, Type, IsLocalToUnit, IsDefinition, -- StaticDataMemberDeclaration, TemplateParams, Flags, AlignInBits, -- Annotations)) -+ StaticDataMemberDeclaration, TemplateParams, -+#ifdef ENABLE_CLASSIC_FLANG -+ Flags, -+#endif -+ AlignInBits, Annotations)) - - TempDIGlobalVariable clone() const { return cloneImpl(); } - - bool isLocalToUnit() const { return IsLocalToUnit; } - bool isDefinition() const { return IsDefinition; } -+#ifdef ENABLE_CLASSIC_FLANG - DIFlags getFlags() const { return Flags; } - bool isArtificial() const { return getFlags() & FlagArtificial; } -+#endif - StringRef getDisplayName() const { return getStringOperand(4); } - StringRef getLinkageName() const { return getStringOperand(5); } - DIDerivedType *getStaticDataMemberDeclaration() const { -diff --git a/llvm/lib/Analysis/TargetLibraryInfo.cpp b/llvm/lib/Analysis/TargetLibraryInfo.cpp -index a6593f6b3757..a9d69af5373c 100644 ---- a/llvm/lib/Analysis/TargetLibraryInfo.cpp -+++ b/llvm/lib/Analysis/TargetLibraryInfo.cpp -@@ -871,14 +871,26 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, - TLI.addVectorizableFunctionsFromVecLib(ClVectorLibrary, T); - } - -+#ifdef ENABLE_CLASSIC_FLANG - TargetLibraryInfoImpl::TargetLibraryInfoImpl() : T(Triple()) { -+#else -+TargetLibraryInfoImpl::TargetLibraryInfoImpl() { -+#endif - // Default to everything being available. - memset(AvailableArray, -1, sizeof(AvailableArray)); - -+#ifdef ENABLE_CLASSIC_FLANG - initialize(*this, T, StandardNames); -+#else -+ initialize(*this, Triple(), StandardNames); -+#endif - } - -+#ifdef ENABLE_CLASSIC_FLANG - TargetLibraryInfoImpl::TargetLibraryInfoImpl(const Triple &T) : T(T) { -+#else -+TargetLibraryInfoImpl::TargetLibraryInfoImpl(const Triple &T) { -+#endif - // Default to everything being available. - memset(AvailableArray, -1, sizeof(AvailableArray)); - -@@ -890,7 +902,11 @@ TargetLibraryInfoImpl::TargetLibraryInfoImpl(const TargetLibraryInfoImpl &TLI) - ShouldExtI32Return(TLI.ShouldExtI32Return), - ShouldSignExtI32Param(TLI.ShouldSignExtI32Param), - ShouldSignExtI32Return(TLI.ShouldSignExtI32Return), -+#ifdef ENABLE_CLASSIC_FLANG - SizeOfInt(TLI.SizeOfInt), T(TLI.T) { -+#else -+ SizeOfInt(TLI.SizeOfInt) { -+#endif - memcpy(AvailableArray, TLI.AvailableArray, sizeof(AvailableArray)); - VectorDescs = TLI.VectorDescs; - ScalarDescs = TLI.ScalarDescs; -@@ -902,7 +918,11 @@ TargetLibraryInfoImpl::TargetLibraryInfoImpl(TargetLibraryInfoImpl &&TLI) - ShouldExtI32Return(TLI.ShouldExtI32Return), - ShouldSignExtI32Param(TLI.ShouldSignExtI32Param), - ShouldSignExtI32Return(TLI.ShouldSignExtI32Return), -+#ifdef ENABLE_CLASSIC_FLANG - SizeOfInt(TLI.SizeOfInt), T(TLI.T) { -+#else -+ SizeOfInt(TLI.SizeOfInt) { -+#endif - std::move(std::begin(TLI.AvailableArray), std::end(TLI.AvailableArray), - AvailableArray); - VectorDescs = TLI.VectorDescs; -@@ -916,7 +936,9 @@ TargetLibraryInfoImpl &TargetLibraryInfoImpl::operator=(const TargetLibraryInfoI - ShouldSignExtI32Param = TLI.ShouldSignExtI32Param; - ShouldSignExtI32Return = TLI.ShouldSignExtI32Return; - SizeOfInt = TLI.SizeOfInt; -+#ifdef ENABLE_CLASSIC_FLANG - T = TLI.T; -+#endif - memcpy(AvailableArray, TLI.AvailableArray, sizeof(AvailableArray)); - return *this; - } -@@ -928,7 +950,9 @@ TargetLibraryInfoImpl &TargetLibraryInfoImpl::operator=(TargetLibraryInfoImpl && - ShouldSignExtI32Param = TLI.ShouldSignExtI32Param; - ShouldSignExtI32Return = TLI.ShouldSignExtI32Return; - SizeOfInt = TLI.SizeOfInt; -+#ifdef ENABLE_CLASSIC_FLANG - T = TLI.T; -+#endif - std::move(std::begin(TLI.AvailableArray), std::end(TLI.AvailableArray), - AvailableArray); - return *this; -diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp -index d7eb34e3d148..0a7166bd50b7 100644 ---- a/llvm/lib/AsmParser/LLParser.cpp -+++ b/llvm/lib/AsmParser/LLParser.cpp -@@ -5432,7 +5432,6 @@ bool LLParser::parseDIGlobalVariable(MDNode *&Result, bool IsDistinct) { - OPTIONAL(isDefinition, MDBoolField, (true)); \ - OPTIONAL(templateParams, MDField, ); \ - OPTIONAL(declaration, MDField, ); \ -- OPTIONAL(flags, DIFlagField, ); \ - OPTIONAL(align, MDUnsignedField, (0, UINT32_MAX)); \ - OPTIONAL(annotations, MDField, ); - #endif -@@ -5443,7 +5442,10 @@ bool LLParser::parseDIGlobalVariable(MDNode *&Result, bool IsDistinct) { - GET_OR_DISTINCT(DIGlobalVariable, - (Context, scope.Val, name.Val, linkageName.Val, file.Val, - line.Val, type.Val, isLocal.Val, isDefinition.Val, -- declaration.Val, templateParams.Val, flags.Val, -+ declaration.Val, templateParams.Val, -+#ifdef ENABLE_CLASSIC_FLANG -+ flags.Val, -+#endif - align.Val, annotations.Val)); - return false; - } -diff --git a/llvm/lib/Bitcode/Reader/MetadataLoader.cpp b/llvm/lib/Bitcode/Reader/MetadataLoader.cpp -index c21e5e5dba97..a33a0587d1c0 100644 ---- a/llvm/lib/Bitcode/Reader/MetadataLoader.cpp -+++ b/llvm/lib/Bitcode/Reader/MetadataLoader.cpp -@@ -1979,12 +1979,16 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( - break; - } - case bitc::METADATA_GLOBAL_VAR: { -+#ifdef ENABLE_CLASSIC_FLANG - if (Record.size() < 11 || Record.size() > 14) -+#else -+ if (Record.size() < 11 || Record.size() > 13) -+#endif - return error("Invalid record"); - - IsDistinct = Record[0] & 1; - unsigned Version = Record[0] >> 1; -- -+#ifdef ENABLE_CLASSIC_FLANG - if (Version == 3) { - // Add support for DIFlags - Metadata *Annotations = nullptr; -@@ -1998,24 +2002,30 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( - getMDString(Record[3]), getMDOrNull(Record[4]), Record[5], - getDITypeRefOrNull(Record[6]), Record[7], Record[8], - getMDOrNull(Record[9]), getMDOrNull(Record[10]), -- static_cast(Record[11]), Record[12], -- Annotations)), -+ static_cast(Record[11]), -+ Record[12], Annotations)), - NextMetadataNo); - - NextMetadataNo++; - } else if (Version == 2) { -+#else -+ if (Version == 2) { -+#endif - Metadata *Annotations = nullptr; - if (Record.size() > 12) - Annotations = getMDOrNull(Record[12]); - - MetadataList.assignValue( -- GET_OR_DISTINCT( -- DIGlobalVariable, -- (Context, getMDOrNull(Record[1]), getMDString(Record[2]), -- getMDString(Record[3]), getMDOrNull(Record[4]), Record[5], -- getDITypeRefOrNull(Record[6]), Record[7], Record[8], -- getMDOrNull(Record[9]), getMDOrNull(Record[10]), -- DINode::FlagZero, Record[11], Annotations)), -+ GET_OR_DISTINCT(DIGlobalVariable, -+ (Context, getMDOrNull(Record[1]), -+ getMDString(Record[2]), getMDString(Record[3]), -+ getMDOrNull(Record[4]), Record[5], -+ getDITypeRefOrNull(Record[6]), Record[7], Record[8], -+ getMDOrNull(Record[9]), getMDOrNull(Record[10]), -+#ifdef ENABLE_CLASSIC_FLANG -+ DINode::FlagZero, -+#endif -+ Record[11], Annotations)), - NextMetadataNo); - - NextMetadataNo++; -@@ -2028,8 +2038,11 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( - (Context, getMDOrNull(Record[1]), getMDString(Record[2]), - getMDString(Record[3]), getMDOrNull(Record[4]), Record[5], - getDITypeRefOrNull(Record[6]), Record[7], Record[8], -- getMDOrNull(Record[10]), nullptr, DINode::FlagZero, Record[11], -- nullptr)), -+ getMDOrNull(Record[10]), nullptr, -+#ifdef ENABLE_CLASSIC_FLANG -+ DINode::FlagZero, -+#endif -+ Record[11], nullptr)), - NextMetadataNo); - - NextMetadataNo++; -@@ -2062,8 +2075,11 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( - (Context, getMDOrNull(Record[1]), getMDString(Record[2]), - getMDString(Record[3]), getMDOrNull(Record[4]), Record[5], - getDITypeRefOrNull(Record[6]), Record[7], Record[8], -- getMDOrNull(Record[10]), nullptr, DINode::FlagZero, AlignInBits, -- nullptr)); -+ getMDOrNull(Record[10]), nullptr, -+#ifdef ENABLE_CLASSIC_FLANG -+ DINode::FlagZero, -+#endif -+ AlignInBits, nullptr)); - - DIGlobalVariableExpression *DGVE = nullptr; - if (Attach || Expr) -diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp -index 013e7ce2d425..d5bcd327a9b7 100644 ---- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp -+++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp -@@ -2014,7 +2014,11 @@ void ModuleBitcodeWriter::writeDITemplateValueParameter( - void ModuleBitcodeWriter::writeDIGlobalVariable( - const DIGlobalVariable *N, SmallVectorImpl &Record, - unsigned Abbrev) { -+#ifdef ENABLE_CLASSIC_FLANG - const uint64_t Version = 3 << 1; -+#else -+ const uint64_t Version = 2 << 1; -+#endif - Record.push_back((uint64_t)N->isDistinct() | Version); - Record.push_back(VE.getMetadataOrNullID(N->getScope())); - Record.push_back(VE.getMetadataOrNullID(N->getRawName())); -@@ -2026,7 +2030,9 @@ void ModuleBitcodeWriter::writeDIGlobalVariable( - Record.push_back(N->isDefinition()); - Record.push_back(VE.getMetadataOrNullID(N->getStaticDataMemberDeclaration())); - Record.push_back(VE.getMetadataOrNullID(N->getTemplateParams())); -+#ifdef ENABLE_CLASSIC_FLANG - Record.push_back(N->getFlags()); -+#endif - Record.push_back(N->getAlignInBits()); - Record.push_back(VE.getMetadataOrNullID(N->getAnnotations().get())); - -diff --git a/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h b/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h -index ee8be3921ab7..b4fe2295d0b1 100644 ---- a/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h -+++ b/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h -@@ -115,6 +115,7 @@ class DbgValueLoc { - SmallVector ValueLocEntries; - - bool IsVariadic; -+#ifdef ENABLE_CLASSIC_FLANG - /// Type of entry that this represents. - enum EntryType { - E_Location, -@@ -138,6 +139,7 @@ class DbgValueLoc { - /// Or a location from target specific location. - TargetIndexLocation TIL; - }; -+#endif - - public: - DbgValueLoc(const DIExpression *Expr, ArrayRef Locs) -@@ -162,6 +164,7 @@ public: - assert(((Expr && Expr->isValid()) || !Loc.isLocation()) && - "DBG_VALUE with a machine location must have a valid expression."); - } -+#ifdef ENABLE_CLASSIC_FLANG - DbgValueLoc(const DIExpression *Expr, int64_t i) - : Expression(Expr), EntryKind(E_Integer) { - Constant.Int = i; -@@ -193,6 +196,7 @@ public: - const ConstantInt *getConstantInt() const { return Constant.CIP; } - MachineLocation getLoc() const { return Loc; } - TargetIndexLocation getTargetIndexLocation() const { return TIL; } -+#endif - - bool isFragment() const { return getExpression()->isFragment(); } - bool isEntryVal() const { return getExpression()->isEntryValue(); } -diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp -index 78ff0d351492..4a70d1f07d6e 100644 ---- a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp -+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp -@@ -184,10 +184,10 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE( - addFlag(*VariableDIE, dwarf::DW_AT_declaration); - else - addGlobalName(GV->getName(), *VariableDIE, DeclContext); -- -+#ifdef ENABLE_CLASSIC_FLANG - if (GV->isArtificial()) - addFlag(*VariableDIE, dwarf::DW_AT_artificial); -- -+#endif - addAnnotation(*VariableDIE, GV->getAnnotations()); - - if (uint32_t AlignInBytes = GV->getAlignInBytes()) -diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h -index e526614792c7..1f7fe5c382e9 100644 ---- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h -+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h -@@ -283,9 +283,9 @@ struct SymbolCU { - const MCSymbol *Sym; - DwarfCompileUnit *CU; - }; -- -+#ifdef ENABLE_CLASSIC_FLANG - class DummyDwarfExpression; -- -+#endif - /// The kind of accelerator tables we should emit. - enum class AccelTableKind { - Default, ///< Platform default. -@@ -438,9 +438,9 @@ private: - - /// Map for tracking Fortran deferred CHARACTER lengths. - DenseMap StringTypeLocMap; -- -+#ifdef ENABLE_CLASSIC_FLANG - DenseMap VariableInDependentType; -- -+#endif - AddressPool AddrPool; - - /// Accelerator tables. -diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp -index c47dd4664ea6..df753b91ff90 100644 ---- a/llvm/lib/IR/AsmWriter.cpp -+++ b/llvm/lib/IR/AsmWriter.cpp -@@ -2281,7 +2281,9 @@ static void writeDIGlobalVariable(raw_ostream &Out, const DIGlobalVariable *N, - Printer.printBool("isDefinition", N->isDefinition()); - Printer.printMetadata("declaration", N->getRawStaticDataMemberDeclaration()); - Printer.printMetadata("templateParams", N->getRawTemplateParams()); -+#ifdef ENABLE_CLASSIC_FLANG - Printer.printDIFlags("flags", N->getFlags()); -+#endif - Printer.printInt("align", N->getAlignInBits()); - Printer.printMetadata("annotations", N->getRawAnnotations()); - Out << ")"; -diff --git a/llvm/lib/IR/DIBuilder.cpp b/llvm/lib/IR/DIBuilder.cpp -index af6ebf702165..41b2acd8661f 100644 ---- a/llvm/lib/IR/DIBuilder.cpp -+++ b/llvm/lib/IR/DIBuilder.cpp -@@ -725,13 +725,19 @@ DIGlobalVariableExpression *DIBuilder::createGlobalVariableExpression( - DIScope *Context, StringRef Name, StringRef LinkageName, DIFile *F, - unsigned LineNumber, DIType *Ty, bool IsLocalToUnit, bool isDefined, - DIExpression *Expr, MDNode *Decl, MDTuple *TemplateParams, -- DINode::DIFlags Flags, uint32_t AlignInBits, DINodeArray Annotations) { -+#ifdef ENABLE_CLASSIC_FLANG -+ DINode::DIFlags Flags, -+#endif -+ uint32_t AlignInBits, DINodeArray Annotations) { - checkGlobalVariableScope(Context); - - auto *GV = DIGlobalVariable::getDistinct( - VMContext, cast_or_null(Context), Name, LinkageName, F, - LineNumber, Ty, IsLocalToUnit, isDefined, -- cast_or_null(Decl), TemplateParams, Flags, -+ cast_or_null(Decl), TemplateParams, -+#ifdef ENABLE_CLASSIC_FLANG -+ Flags, -+#endif - AlignInBits, Annotations); - if (!Expr) - Expr = createExpression(); -@@ -743,13 +749,20 @@ DIGlobalVariableExpression *DIBuilder::createGlobalVariableExpression( - DIGlobalVariable *DIBuilder::createTempGlobalVariableFwdDecl( - DIScope *Context, StringRef Name, StringRef LinkageName, DIFile *F, - unsigned LineNumber, DIType *Ty, bool IsLocalToUnit, MDNode *Decl, -- MDTuple *TemplateParams, DINode::DIFlags Flags, uint32_t AlignInBits) { -+ MDTuple *TemplateParams, -+#ifdef ENABLE_CLASSIC_FLANG -+ DINode::DIFlags Flags, -+#endif -+ uint32_t AlignInBits) { - checkGlobalVariableScope(Context); - - return DIGlobalVariable::getTemporary( - VMContext, cast_or_null(Context), Name, LinkageName, F, - LineNumber, Ty, IsLocalToUnit, false, -- cast_or_null(Decl), TemplateParams, Flags, -+ cast_or_null(Decl), TemplateParams, -+#ifdef ENABLE_CLASSIC_FLANG -+ Flags, -+#endif - AlignInBits, nullptr) - .release(); - } -diff --git a/llvm/lib/IR/DebugInfo.cpp b/llvm/lib/IR/DebugInfo.cpp -index 3696beccdd0c..f21a8f6e3c10 100644 ---- a/llvm/lib/IR/DebugInfo.cpp -+++ b/llvm/lib/IR/DebugInfo.cpp -@@ -1547,13 +1547,20 @@ LLVMMetadataRef LLVMDIBuilderCreateGlobalVariableExpression( - LLVMDIBuilderRef Builder, LLVMMetadataRef Scope, const char *Name, - size_t NameLen, const char *Linkage, size_t LinkLen, LLVMMetadataRef File, - unsigned LineNo, LLVMMetadataRef Ty, LLVMBool LocalToUnit, -- LLVMMetadataRef Expr, LLVMMetadataRef Decl, LLVMDIFlags Flags, -+ LLVMMetadataRef Expr, LLVMMetadataRef Decl, -+#ifdef ENABLE_CLASSIC_FLANG -+ LLVMDIFlags Flags, -+#endif - uint32_t AlignInBits) { - return wrap(unwrap(Builder)->createGlobalVariableExpression( - unwrapDI(Scope), {Name, NameLen}, {Linkage, LinkLen}, - unwrapDI(File), LineNo, unwrapDI(Ty), LocalToUnit, - true, unwrap(Expr), unwrapDI(Decl), -- nullptr, map_from_llvmDIFlags(Flags), AlignInBits)); -+ nullptr, -+#ifdef ENABLE_CLASSIC_FLANG -+ map_from_llvmDIFlags(Flags), -+#endif -+ AlignInBits)); - } - - LLVMMetadataRef LLVMDIGlobalVariableExpressionGetVariable(LLVMMetadataRef GVE) { -@@ -1598,11 +1605,18 @@ LLVMMetadataRef LLVMDIBuilderCreateTempGlobalVariableFwdDecl( - LLVMDIBuilderRef Builder, LLVMMetadataRef Scope, const char *Name, - size_t NameLen, const char *Linkage, size_t LnkLen, LLVMMetadataRef File, - unsigned LineNo, LLVMMetadataRef Ty, LLVMBool LocalToUnit, -- LLVMMetadataRef Decl, LLVMDIFlags Flags, uint32_t AlignInBits) { -+ LLVMMetadataRef Decl, -+#ifdef ENABLE_CLASSIC_FLANG -+ LLVMDIFlags Flags, -+#endif -+ uint32_t AlignInBits) { - return wrap(unwrap(Builder)->createTempGlobalVariableFwdDecl( - unwrapDI(Scope), {Name, NameLen}, {Linkage, LnkLen}, - unwrapDI(File), LineNo, unwrapDI(Ty), LocalToUnit, -- unwrapDI(Decl), nullptr, map_from_llvmDIFlags(Flags), -+ unwrapDI(Decl), nullptr, -+#ifdef ENABLE_CLASSIC_FLANG -+ map_from_llvmDIFlags(Flags), -+#endif - AlignInBits)); - } - -diff --git a/llvm/lib/IR/DebugInfoMetadata.cpp b/llvm/lib/IR/DebugInfoMetadata.cpp -index d599896ee456..074529f6e1c6 100644 ---- a/llvm/lib/IR/DebugInfoMetadata.cpp -+++ b/llvm/lib/IR/DebugInfoMetadata.cpp -@@ -1258,7 +1258,10 @@ DIGlobalVariable::getImpl(LLVMContext &Context, Metadata *Scope, MDString *Name, - MDString *LinkageName, Metadata *File, unsigned Line, - Metadata *Type, bool IsLocalToUnit, bool IsDefinition, - Metadata *StaticDataMemberDeclaration, -- Metadata *TemplateParams, DIFlags Flags, -+ Metadata *TemplateParams, -+#ifdef ENABLE_CLASSIC_FLANG -+ DIFlags Flags, -+#endif - uint32_t AlignInBits, Metadata *Annotations, - StorageType Storage, bool ShouldCreate) { - assert(isCanonical(Name) && "Expected canonical MDString"); -@@ -1266,8 +1269,11 @@ DIGlobalVariable::getImpl(LLVMContext &Context, Metadata *Scope, MDString *Name, - DEFINE_GETIMPL_LOOKUP( - DIGlobalVariable, - (Scope, Name, LinkageName, File, Line, Type, IsLocalToUnit, IsDefinition, -- StaticDataMemberDeclaration, TemplateParams, Flags, AlignInBits, -- Annotations)); -+ StaticDataMemberDeclaration, TemplateParams, -+#ifdef ENABLE_CLASSIC_FLANG -+ Flags, -+#endif -+ AlignInBits, Annotations)); - Metadata *Ops[] = {Scope, - Name, - File, -@@ -1278,8 +1284,11 @@ DIGlobalVariable::getImpl(LLVMContext &Context, Metadata *Scope, MDString *Name, - TemplateParams, - Annotations}; - DEFINE_GETIMPL_STORE(DIGlobalVariable, -- (Line, IsLocalToUnit, IsDefinition, Flags, AlignInBits), -- Ops); -+ (Line, IsLocalToUnit, IsDefinition, -+#ifdef ENABLE_CLASSIC_FLANG -+ Flags, -+#endif -+ AlignInBits), Ops); - } - - DILocalVariable * -diff --git a/llvm/lib/IR/LLVMContextImpl.h b/llvm/lib/IR/LLVMContextImpl.h -index 8a621725f55e..01faec286b7d 100644 ---- a/llvm/lib/IR/LLVMContextImpl.h -+++ b/llvm/lib/IR/LLVMContextImpl.h -@@ -1055,7 +1055,9 @@ template <> struct MDNodeKeyImpl { - bool IsDefinition; - Metadata *StaticDataMemberDeclaration; - Metadata *TemplateParams; -+#ifdef ENABLE_CLASSIC_FLANG - unsigned Flags; -+#endif - uint32_t AlignInBits; - Metadata *Annotations; - -@@ -1063,13 +1065,18 @@ template <> struct MDNodeKeyImpl { - Metadata *File, unsigned Line, Metadata *Type, - bool IsLocalToUnit, bool IsDefinition, - Metadata *StaticDataMemberDeclaration, Metadata *TemplateParams, -+#ifdef ENABLE_CLASSIC_FLANG - unsigned Flags, -+#endif - uint32_t AlignInBits, Metadata *Annotations) - : Scope(Scope), Name(Name), LinkageName(LinkageName), File(File), - Line(Line), Type(Type), IsLocalToUnit(IsLocalToUnit), - IsDefinition(IsDefinition), - StaticDataMemberDeclaration(StaticDataMemberDeclaration), -- TemplateParams(TemplateParams), Flags(Flags), -+ TemplateParams(TemplateParams), -+#ifdef ENABLE_CLASSIC_FLANG -+ Flags(Flags), -+#endif - AlignInBits(AlignInBits), Annotations(Annotations) {} - MDNodeKeyImpl(const DIGlobalVariable *N) - : Scope(N->getRawScope()), Name(N->getRawName()), -@@ -1077,7 +1084,10 @@ template <> struct MDNodeKeyImpl { - Line(N->getLine()), Type(N->getRawType()), - IsLocalToUnit(N->isLocalToUnit()), IsDefinition(N->isDefinition()), - StaticDataMemberDeclaration(N->getRawStaticDataMemberDeclaration()), -- TemplateParams(N->getRawTemplateParams()), Flags(N->getFlags()), -+ TemplateParams(N->getRawTemplateParams()), -+#ifdef ENABLE_CLASSIC_FLANG -+ Flags(N->getFlags()), -+#endif - AlignInBits(N->getAlignInBits()), Annotations(N->getRawAnnotations()) {} - - bool isKeyOf(const DIGlobalVariable *RHS) const { -@@ -1089,7 +1099,9 @@ template <> struct MDNodeKeyImpl { - StaticDataMemberDeclaration == - RHS->getRawStaticDataMemberDeclaration() && - TemplateParams == RHS->getRawTemplateParams() && -+#ifdef ENABLE_CLASSIC_FLANG - Flags == RHS->getFlags() && -+#endif - AlignInBits == RHS->getAlignInBits() && - Annotations == RHS->getRawAnnotations(); - } -@@ -1104,7 +1116,11 @@ template <> struct MDNodeKeyImpl { - // TODO: make hashing work fine with such situations - return hash_combine(Scope, Name, LinkageName, File, Line, Type, - IsLocalToUnit, IsDefinition, /* AlignInBits, */ -- StaticDataMemberDeclaration, Flags, Annotations); -+ StaticDataMemberDeclaration, -+#ifdef ENABLE_CLASSIC_FLANG -+ Flags, -+#endif -+ Annotations); - } - }; - -diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp -index 136132d7e65a..343554241da3 100644 ---- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp -+++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp -@@ -1053,7 +1053,10 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfInstBase *Inc) { - /*LineNo=*/0, DB.createUnspecifiedType("Profile Data Type"), - CounterPtr->hasLocalLinkage(), /*IsDefined=*/true, /*Expr=*/nullptr, - /*Decl=*/nullptr, /*TemplateParams=*/nullptr, -- /*Flags=*/DINode::FlagZero, /*AlignInBits=*/0, Annotations); -+#ifdef ENABLE_CLASSIC_FLANG -+ /*Flags=*/DINode::FlagZero, -+#endif -+ /*AlignInBits=*/0, Annotations); - CounterPtr->addDebugInfo(DICounter); - DB.finalize(); - } else { -diff --git a/llvm/tools/llvm-c-test/debuginfo.c b/llvm/tools/llvm-c-test/debuginfo.c -index 906c96f1c24b..e1866443e762 100644 ---- a/llvm/tools/llvm-c-test/debuginfo.c -+++ b/llvm/tools/llvm-c-test/debuginfo.c -@@ -64,7 +64,11 @@ int llvm_test_dibuilder(void) { - LLVMDIBuilderCreateConstantValueExpression(DIB, 0); - LLVMDIBuilderCreateGlobalVariableExpression( - DIB, Module, "globalClass", 11, "", 0, File, 1, ClassTy, true, -- GlobalClassValueExpr, NULL, LLVMDIFlagZero, 0); -+ GlobalClassValueExpr, NULL, -+#ifdef ENABLE_CLASSIC_FLANG -+ LLVMDIFlagZero, -+#endif -+ 0); - - LLVMMetadataRef Int64Ty = - LLVMDIBuilderCreateBasicType(DIB, "Int64", 5, 64, 0, LLVMDIFlagZero); -@@ -75,7 +79,11 @@ int llvm_test_dibuilder(void) { - LLVMDIBuilderCreateConstantValueExpression(DIB, 0); - LLVMDIBuilderCreateGlobalVariableExpression( - DIB, Module, "global", 6, "", 0, File, 1, Int64TypeDef, true, -- GlobalVarValueExpr, NULL, LLVMDIFlagZero, 0); -+ GlobalVarValueExpr, NULL, -+#ifdef ENABLE_CLASSIC_FLANG -+ LLVMDIFlagZero, -+#endif -+ 0); - - LLVMMetadataRef NameSpace = - LLVMDIBuilderCreateNameSpace(DIB, Module, "NameSpace", 9, false); -diff --git a/llvm/unittests/IR/MetadataTest.cpp b/llvm/unittests/IR/MetadataTest.cpp -index 4bce26851d2f..788d514ad366 100644 ---- a/llvm/unittests/IR/MetadataTest.cpp -+++ b/llvm/unittests/IR/MetadataTest.cpp -@@ -2895,13 +2895,17 @@ TEST_F(DIGlobalVariableTest, get) { - MDTuple *templateParams = getTuple(); - DIDerivedType *StaticDataMemberDeclaration = - cast(getDerivedType()); -- -+#ifdef ENABLE_CLASSIC_FLANG - DINode::DIFlags Flags = static_cast(7); -+#endif - uint32_t AlignInBits = 8; - - auto *N = DIGlobalVariable::get( - Context, Scope, Name, LinkageName, File, Line, Type, IsLocalToUnit, -- IsDefinition, StaticDataMemberDeclaration, templateParams, Flags, -+ IsDefinition, StaticDataMemberDeclaration, templateParams, -+#ifdef ENABLE_CLASSIC_FLANG -+ Flags, -+#endif - AlignInBits, nullptr); - - EXPECT_EQ(dwarf::DW_TAG_variable, N->getTag()); -@@ -2915,67 +2919,114 @@ TEST_F(DIGlobalVariableTest, get) { - EXPECT_EQ(IsDefinition, N->isDefinition()); - EXPECT_EQ(StaticDataMemberDeclaration, N->getStaticDataMemberDeclaration()); - EXPECT_EQ(templateParams, N->getTemplateParams()); -+#ifdef ENABLE_CLASSIC_FLANG - EXPECT_EQ(Flags, N->getFlags()); -+#endif - EXPECT_EQ(AlignInBits, N->getAlignInBits()); - EXPECT_EQ(N, DIGlobalVariable::get(Context, Scope, Name, LinkageName, File, - Line, Type, IsLocalToUnit, IsDefinition, - StaticDataMemberDeclaration, -- templateParams, Flags, AlignInBits, -- nullptr)); -+ templateParams, -+#ifdef ENABLE_CLASSIC_FLANG -+ Flags, -+#endif -+ AlignInBits, nullptr)); - - EXPECT_NE(N, DIGlobalVariable::get( - Context, getSubprogram(), Name, LinkageName, File, Line, - Type, IsLocalToUnit, IsDefinition, -- StaticDataMemberDeclaration, templateParams, Flags, -+ StaticDataMemberDeclaration, templateParams, -+#ifdef ENABLE_CLASSIC_FLANG -+ Flags, -+#endif - AlignInBits, nullptr)); - EXPECT_NE(N, DIGlobalVariable::get(Context, Scope, "other", LinkageName, File, - Line, Type, IsLocalToUnit, IsDefinition, - StaticDataMemberDeclaration, -- templateParams, Flags, AlignInBits, -- nullptr)); -+ templateParams, -+#ifdef ENABLE_CLASSIC_FLANG -+ Flags, -+#endif -+ AlignInBits, nullptr)); - EXPECT_NE(N, DIGlobalVariable::get(Context, Scope, Name, "other", File, Line, - Type, IsLocalToUnit, IsDefinition, - StaticDataMemberDeclaration, -- templateParams, Flags, AlignInBits, nullptr)); -+ templateParams, -+#ifdef ENABLE_CLASSIC_FLANG -+ Flags, -+#endif -+ AlignInBits, nullptr)); - EXPECT_NE(N, DIGlobalVariable::get(Context, Scope, Name, LinkageName, - getFile(), Line, Type, IsLocalToUnit, - IsDefinition, StaticDataMemberDeclaration, -- templateParams, Flags, AlignInBits, nullptr)); -+ templateParams, -+#ifdef ENABLE_CLASSIC_FLANG -+ Flags, -+#endif -+ AlignInBits, nullptr)); - EXPECT_NE(N, DIGlobalVariable::get(Context, Scope, Name, LinkageName, File, - Line + 1, Type, IsLocalToUnit, - IsDefinition, StaticDataMemberDeclaration, -- templateParams, Flags, AlignInBits, nullptr)); -+ templateParams, -+#ifdef ENABLE_CLASSIC_FLANG -+ Flags, -+#endif -+ AlignInBits, nullptr)); - EXPECT_NE(N, DIGlobalVariable::get(Context, Scope, Name, LinkageName, File, - Line, getDerivedType(), IsLocalToUnit, - IsDefinition, StaticDataMemberDeclaration, -- templateParams, Flags, AlignInBits, nullptr)); -+ templateParams, -+#ifdef ENABLE_CLASSIC_FLANG -+ Flags, -+#endif -+ AlignInBits, nullptr)); - EXPECT_NE(N, DIGlobalVariable::get(Context, Scope, Name, LinkageName, File, - Line, Type, !IsLocalToUnit, IsDefinition, - StaticDataMemberDeclaration, -- templateParams, Flags, AlignInBits, nullptr)); -+ templateParams, -+#ifdef ENABLE_CLASSIC_FLANG -+ Flags, -+#endif -+ AlignInBits, nullptr)); - EXPECT_NE(N, DIGlobalVariable::get(Context, Scope, Name, LinkageName, File, - Line, Type, IsLocalToUnit, !IsDefinition, - StaticDataMemberDeclaration, -- templateParams, Flags, AlignInBits, nullptr)); -+ templateParams, -+#ifdef ENABLE_CLASSIC_FLANG -+ Flags, -+#endif -+ AlignInBits, nullptr)); - EXPECT_NE(N, DIGlobalVariable::get(Context, Scope, Name, LinkageName, File, - Line, Type, IsLocalToUnit, IsDefinition, - cast(getDerivedType()), -- templateParams, Flags, AlignInBits, nullptr)); -+ templateParams, -+#ifdef ENABLE_CLASSIC_FLANG -+ Flags, -+#endif -+ AlignInBits, nullptr)); - EXPECT_NE(N, DIGlobalVariable::get(Context, Scope, Name, LinkageName, File, - Line, Type, IsLocalToUnit, IsDefinition, - StaticDataMemberDeclaration, nullptr, -- Flags, AlignInBits, nullptr)); -+#ifdef ENABLE_CLASSIC_FLANG -+ Flags, -+#endif -+ AlignInBits, nullptr)); -+#ifdef ENABLE_CLASSIC_FLANG - EXPECT_NE(N, DIGlobalVariable::get(Context, Scope, Name, LinkageName, File, - Line, Type, IsLocalToUnit, IsDefinition, - StaticDataMemberDeclaration, - templateParams, - static_cast(Flags + 1), - AlignInBits, nullptr)); -+#endif - EXPECT_NE(N, DIGlobalVariable::get(Context, Scope, Name, LinkageName, File, - Line, Type, IsLocalToUnit, IsDefinition, - StaticDataMemberDeclaration, -- templateParams, Flags, (AlignInBits << 1), -- nullptr)); -+ templateParams, -+#ifdef ENABLE_CLASSIC_FLANG -+ Flags, -+#endif -+ (AlignInBits << 1), nullptr)); - - TempDIGlobalVariable Temp = N->clone(); - EXPECT_EQ(N, MDNode::replaceWithUniqued(std::move(Temp))); -@@ -2997,16 +3048,24 @@ TEST_F(DIGlobalVariableExpressionTest, get) { - auto *Expr2 = DIExpression::get(Context, {1, 2, 3}); - DIDerivedType *StaticDataMemberDeclaration = - cast(getDerivedType()); -+#ifdef ENABLE_CLASSIC_FLANG - DINode::DIFlags Flags = static_cast(7); -+#endif - uint32_t AlignInBits = 8; - - auto *Var = DIGlobalVariable::get( - Context, Scope, Name, LinkageName, File, Line, Type, IsLocalToUnit, -- IsDefinition, StaticDataMemberDeclaration, templateParams, Flags, -+ IsDefinition, StaticDataMemberDeclaration, templateParams, -+#ifdef ENABLE_CLASSIC_FLANG -+ Flags, -+#endif - AlignInBits, nullptr); - auto *Var2 = DIGlobalVariable::get( - Context, Scope, "other", LinkageName, File, Line, Type, IsLocalToUnit, -- IsDefinition, StaticDataMemberDeclaration, templateParams, Flags, -+ IsDefinition, StaticDataMemberDeclaration, templateParams, -+#ifdef ENABLE_CLASSIC_FLANG -+ Flags, -+#endif - AlignInBits, nullptr); - auto *N = DIGlobalVariableExpression::get(Context, Var, Expr); - -diff --git a/llvm/utils/lit/lit/llvm/config.py b/llvm/utils/lit/lit/llvm/config.py -index d8dec6160071..69ede49dadc5 100644 ---- a/llvm/utils/lit/lit/llvm/config.py -+++ b/llvm/utils/lit/lit/llvm/config.py -@@ -632,14 +632,15 @@ class LLVMConfig(object): - ] - self.add_tool_substitutions(tool_substitutions) - self.config.substitutions.append(("%resource_dir", builtin_include_dir)) -- -- self.config.flang = self.use_llvm_tool( -- 'flang', search_env='FLANG', required=required) -- if self.config.flang: -- tool_substitutions = [ -- ToolSubst('%flang', command=self.config.flang) -- ] -- self.add_tool_substitutions(tool_substitutions) -+ use_classic_flang = getattr(self.config, "use_classic_flang", None) -+ if use_classic_flang and use_classic_flang != "@LLVM_ENABLE_CLASSIC_FLANG@": -+ self.config.flang = self.use_llvm_tool( -+ 'flang', search_env='FLANG', required=required) -+ if self.config.flang: -+ tool_substitutions = [ -+ ToolSubst('%flang', command=self.config.flang) -+ ] -+ self.add_tool_substitutions(tool_substitutions) - - self.config.substitutions.append( - ( --- -2.24.3 (Apple Git-128) - diff --git a/0019-Backport-LoongArch-Improve-the-support-for-atomic-and-clear_cache.patch b/0019-Backport-LoongArch-Improve-the-support-for-atomic-and-clear_cache.patch deleted file mode 100644 index 98f265485213b610482fc6ea17e110ea0d54d977..0000000000000000000000000000000000000000 --- a/0019-Backport-LoongArch-Improve-the-support-for-atomic-and-clear_cache.patch +++ /dev/null @@ -1,12426 +0,0 @@ -From ad367d826e5959792ce7384be62ba1ccffbf0d9a Mon Sep 17 00:00:00 2001 -From: hev -Date: Wed, 11 Oct 2023 10:24:18 +0800 -Subject: [PATCH 1/7] [LoongArch] Improve codegen for atomic ops (#67391) - -This PR improves memory barriers generated by atomic operations. - -Memory barrier semantics of LL/SC: -``` -LL: + -SC: + -``` - -Changes: -* Remove unnecessary memory barriers before LL and between LL/SC. -* Fix acquire semantics. (If the SC instruction is not executed, then -the guarantee of acquiring semantics cannot be ensured. Therefore, an -acquire barrier needs to be generated when memory ordering includes an -acquire operation.) - -(cherry picked from commit 203ba238e33c570dba6cbcf247f1668bb2a13c26) ---- - .../LoongArchExpandAtomicPseudoInsts.cpp | 50 +-- - .../Target/LoongArch/LoongArchInstrInfo.td | 24 +- - .../LoongArch/atomicrmw-uinc-udec-wrap.ll | 24 +- - .../ir-instruction/atomic-cmpxchg.ll | 376 ++++++++++++++++-- - .../LoongArch/ir-instruction/atomicrmw-fp.ll | 24 +- - .../ir-instruction/atomicrmw-minmax.ll | 24 -- - .../LoongArch/ir-instruction/atomicrmw.ll | 31 -- - llvm/unittests/Target/LoongArch/InstSizes.cpp | 2 +- - 8 files changed, 407 insertions(+), 148 deletions(-) - -diff --git a/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp b/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp -index 51df0463e235..eb78ef065b21 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp -+++ b/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp -@@ -153,18 +153,12 @@ static void doAtomicBinOpExpansion(const LoongArchInstrInfo *TII, - Register ScratchReg = MI.getOperand(1).getReg(); - Register AddrReg = MI.getOperand(2).getReg(); - Register IncrReg = MI.getOperand(3).getReg(); -- AtomicOrdering Ordering = -- static_cast(MI.getOperand(4).getImm()); - - // .loop: -- // if(Ordering != AtomicOrdering::Monotonic) -- // dbar 0 - // ll.[w|d] dest, (addr) - // binop scratch, dest, val - // sc.[w|d] scratch, scratch, (addr) - // beqz scratch, loop -- if (Ordering != AtomicOrdering::Monotonic) -- BuildMI(LoopMBB, DL, TII->get(LoongArch::DBAR)).addImm(0); - BuildMI(LoopMBB, DL, - TII->get(Width == 32 ? LoongArch::LL_W : LoongArch::LL_D), DestReg) - .addReg(AddrReg) -@@ -251,12 +245,8 @@ static void doMaskedAtomicBinOpExpansion( - Register AddrReg = MI.getOperand(2).getReg(); - Register IncrReg = MI.getOperand(3).getReg(); - Register MaskReg = MI.getOperand(4).getReg(); -- AtomicOrdering Ordering = -- static_cast(MI.getOperand(5).getImm()); - - // .loop: -- // if(Ordering != AtomicOrdering::Monotonic) -- // dbar 0 - // ll.w destreg, (alignedaddr) - // binop scratch, destreg, incr - // xor scratch, destreg, scratch -@@ -264,8 +254,6 @@ static void doMaskedAtomicBinOpExpansion( - // xor scratch, destreg, scratch - // sc.w scratch, scratch, (alignedaddr) - // beqz scratch, loop -- if (Ordering != AtomicOrdering::Monotonic) -- BuildMI(LoopMBB, DL, TII->get(LoongArch::DBAR)).addImm(0); - BuildMI(LoopMBB, DL, TII->get(LoongArch::LL_W), DestReg) - .addReg(AddrReg) - .addImm(0); -@@ -372,23 +360,20 @@ bool LoongArchExpandAtomicPseudo::expandAtomicMinMaxOp( - auto LoopHeadMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); - auto LoopIfBodyMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); - auto LoopTailMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); -- auto TailMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); - auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); - - // Insert new MBBs. - MF->insert(++MBB.getIterator(), LoopHeadMBB); - MF->insert(++LoopHeadMBB->getIterator(), LoopIfBodyMBB); - MF->insert(++LoopIfBodyMBB->getIterator(), LoopTailMBB); -- MF->insert(++LoopTailMBB->getIterator(), TailMBB); -- MF->insert(++TailMBB->getIterator(), DoneMBB); -+ MF->insert(++LoopTailMBB->getIterator(), DoneMBB); - - // Set up successors and transfer remaining instructions to DoneMBB. - LoopHeadMBB->addSuccessor(LoopIfBodyMBB); - LoopHeadMBB->addSuccessor(LoopTailMBB); - LoopIfBodyMBB->addSuccessor(LoopTailMBB); - LoopTailMBB->addSuccessor(LoopHeadMBB); -- LoopTailMBB->addSuccessor(TailMBB); -- TailMBB->addSuccessor(DoneMBB); -+ LoopTailMBB->addSuccessor(DoneMBB); - DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end()); - DoneMBB->transferSuccessors(&MBB); - MBB.addSuccessor(LoopHeadMBB); -@@ -402,11 +387,9 @@ bool LoongArchExpandAtomicPseudo::expandAtomicMinMaxOp( - - // - // .loophead: -- // dbar 0 - // ll.w destreg, (alignedaddr) - // and scratch2, destreg, mask - // move scratch1, destreg -- BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::DBAR)).addImm(0); - BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::LL_W), DestReg) - .addReg(AddrReg) - .addImm(0); -@@ -463,7 +446,6 @@ bool LoongArchExpandAtomicPseudo::expandAtomicMinMaxOp( - // .looptail: - // sc.w scratch1, scratch1, (addr) - // beqz scratch1, loop -- // dbar 0x700 - BuildMI(LoopTailMBB, DL, TII->get(LoongArch::SC_W), Scratch1Reg) - .addReg(Scratch1Reg) - .addReg(AddrReg) -@@ -472,10 +454,6 @@ bool LoongArchExpandAtomicPseudo::expandAtomicMinMaxOp( - .addReg(Scratch1Reg) - .addMBB(LoopHeadMBB); - -- // .tail: -- // dbar 0x700 -- BuildMI(TailMBB, DL, TII->get(LoongArch::DBAR)).addImm(0x700); -- - NextMBBI = MBB.end(); - MI.eraseFromParent(); - -@@ -483,7 +461,6 @@ bool LoongArchExpandAtomicPseudo::expandAtomicMinMaxOp( - computeAndAddLiveIns(LiveRegs, *LoopHeadMBB); - computeAndAddLiveIns(LiveRegs, *LoopIfBodyMBB); - computeAndAddLiveIns(LiveRegs, *LoopTailMBB); -- computeAndAddLiveIns(LiveRegs, *TailMBB); - computeAndAddLiveIns(LiveRegs, *DoneMBB); - - return true; -@@ -535,12 +512,10 @@ bool LoongArchExpandAtomicPseudo::expandAtomicCmpXchg( - .addReg(CmpValReg) - .addMBB(TailMBB); - // .looptail: -- // dbar 0 - // move scratch, newval - // sc.[w|d] scratch, scratch, (addr) - // beqz scratch, loophead - // b done -- BuildMI(LoopTailMBB, DL, TII->get(LoongArch::DBAR)).addImm(0); - BuildMI(LoopTailMBB, DL, TII->get(LoongArch::OR), ScratchReg) - .addReg(NewValReg) - .addReg(LoongArch::R0); -@@ -573,13 +548,11 @@ bool LoongArchExpandAtomicPseudo::expandAtomicCmpXchg( - .addMBB(TailMBB); - - // .looptail: -- // dbar 0 - // andn scratch, dest, mask - // or scratch, scratch, newval - // sc.[w|d] scratch, scratch, (addr) - // beqz scratch, loophead - // b done -- BuildMI(LoopTailMBB, DL, TII->get(LoongArch::DBAR)).addImm(0); - BuildMI(LoopTailMBB, DL, TII->get(LoongArch::ANDN), ScratchReg) - .addReg(DestReg) - .addReg(MaskReg); -@@ -598,9 +571,24 @@ bool LoongArchExpandAtomicPseudo::expandAtomicCmpXchg( - BuildMI(LoopTailMBB, DL, TII->get(LoongArch::B)).addMBB(DoneMBB); - } - -+ AtomicOrdering Ordering = -+ static_cast(MI.getOperand(IsMasked ? 6 : 5).getImm()); -+ int hint; -+ -+ switch (Ordering) { -+ case AtomicOrdering::Acquire: -+ case AtomicOrdering::AcquireRelease: -+ case AtomicOrdering::SequentiallyConsistent: -+ // TODO: acquire -+ hint = 0; -+ break; -+ default: -+ hint = 0x700; -+ } -+ - // .tail: -- // dbar 0x700 -- BuildMI(TailMBB, DL, TII->get(LoongArch::DBAR)).addImm(0x700); -+ // dbar 0x700 | acquire -+ BuildMI(TailMBB, DL, TII->get(LoongArch::DBAR)).addImm(hint); - - NextMBBI = MBB.end(); - MI.eraseFromParent(); -diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td -index 05ae36a9781d..a9b0db30c2f6 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td -+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td -@@ -1731,7 +1731,7 @@ def PseudoMaskedAtomicLoadMin32 : PseudoMaskedAMMinMax; - - class PseudoCmpXchg - : Pseudo<(outs GPR:$res, GPR:$scratch), -- (ins GPR:$addr, GPR:$cmpval, GPR:$newval)> { -+ (ins GPR:$addr, GPR:$cmpval, GPR:$newval, grlenimm:$ordering)> { - let Constraints = "@earlyclobber $res,@earlyclobber $scratch"; - let mayLoad = 1; - let mayStore = 1; -@@ -1821,14 +1821,28 @@ def : AtomicPat; - --def : Pat<(atomic_cmp_swap_64 GPR:$addr, GPR:$cmp, GPR:$new), -- (PseudoCmpXchg64 GPR:$addr, GPR:$cmp, GPR:$new)>; -+// Ordering constants must be kept in sync with the AtomicOrdering enum in -+// AtomicOrdering.h. -+multiclass PseudoCmpXchgPat { -+ def : Pat<(vt (!cast(Op#"_monotonic") GPR:$addr, GPR:$cmp, GPR:$new)), -+ (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 2)>; -+ def : Pat<(vt (!cast(Op#"_acquire") GPR:$addr, GPR:$cmp, GPR:$new)), -+ (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 4)>; -+ def : Pat<(vt (!cast(Op#"_release") GPR:$addr, GPR:$cmp, GPR:$new)), -+ (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 5)>; -+ def : Pat<(vt (!cast(Op#"_acq_rel") GPR:$addr, GPR:$cmp, GPR:$new)), -+ (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 6)>; -+ def : Pat<(vt (!cast(Op#"_seq_cst") GPR:$addr, GPR:$cmp, GPR:$new)), -+ (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 7)>; -+} -+ -+defm : PseudoCmpXchgPat<"atomic_cmp_swap_32", PseudoCmpXchg32>; -+defm : PseudoCmpXchgPat<"atomic_cmp_swap_64", PseudoCmpXchg64, i64>; - def : Pat<(int_loongarch_masked_cmpxchg_i64 - GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering), - (PseudoMaskedCmpXchg32 - GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering)>; --def : Pat<(atomic_cmp_swap_32 GPR:$addr, GPR:$cmp, GPR:$new), -- (PseudoCmpXchg32 GPR:$addr, GPR:$cmp, GPR:$new)>; - - def : PseudoMaskedAMMinMaxPat; -diff --git a/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll b/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll -index f11af8fe6528..32106886c783 100644 ---- a/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll -+++ b/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll -@@ -34,14 +34,13 @@ define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) { - ; LA64-NEXT: bne $a5, $a3, .LBB0_5 - ; LA64-NEXT: # %bb.4: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB0_3 Depth=2 --; LA64-NEXT: dbar 0 - ; LA64-NEXT: move $a7, $a6 - ; LA64-NEXT: sc.w $a7, $a2, 0 - ; LA64-NEXT: beqz $a7, .LBB0_3 - ; LA64-NEXT: b .LBB0_6 - ; LA64-NEXT: .LBB0_5: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB0_1 Depth=1 --; LA64-NEXT: dbar 1792 -+; LA64-NEXT: dbar 0 - ; LA64-NEXT: .LBB0_6: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB0_1 Depth=1 - ; LA64-NEXT: addi.w $a6, $a3, 0 -@@ -88,14 +87,13 @@ define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) { - ; LA64-NEXT: bne $a5, $a3, .LBB1_5 - ; LA64-NEXT: # %bb.4: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB1_3 Depth=2 --; LA64-NEXT: dbar 0 - ; LA64-NEXT: move $a7, $a6 - ; LA64-NEXT: sc.w $a7, $a2, 0 - ; LA64-NEXT: beqz $a7, .LBB1_3 - ; LA64-NEXT: b .LBB1_6 - ; LA64-NEXT: .LBB1_5: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB1_1 Depth=1 --; LA64-NEXT: dbar 1792 -+; LA64-NEXT: dbar 0 - ; LA64-NEXT: .LBB1_6: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB1_1 Depth=1 - ; LA64-NEXT: addi.w $a6, $a3, 0 -@@ -129,14 +127,13 @@ define i32 @atomicrmw_uinc_wrap_i32(ptr %ptr, i32 %val) { - ; LA64-NEXT: bne $a1, $a3, .LBB2_5 - ; LA64-NEXT: # %bb.4: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB2_3 Depth=2 --; LA64-NEXT: dbar 0 - ; LA64-NEXT: move $a6, $a5 - ; LA64-NEXT: sc.w $a6, $a0, 0 - ; LA64-NEXT: beqz $a6, .LBB2_3 - ; LA64-NEXT: b .LBB2_6 - ; LA64-NEXT: .LBB2_5: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB2_1 Depth=1 --; LA64-NEXT: dbar 1792 -+; LA64-NEXT: dbar 0 - ; LA64-NEXT: .LBB2_6: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB2_1 Depth=1 - ; LA64-NEXT: move $a3, $a1 -@@ -168,14 +165,13 @@ define i64 @atomicrmw_uinc_wrap_i64(ptr %ptr, i64 %val) { - ; LA64-NEXT: bne $a2, $a3, .LBB3_5 - ; LA64-NEXT: # %bb.4: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB3_3 Depth=2 --; LA64-NEXT: dbar 0 - ; LA64-NEXT: move $a5, $a4 - ; LA64-NEXT: sc.d $a5, $a0, 0 - ; LA64-NEXT: beqz $a5, .LBB3_3 - ; LA64-NEXT: b .LBB3_6 - ; LA64-NEXT: .LBB3_5: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB3_1 Depth=1 --; LA64-NEXT: dbar 1792 -+; LA64-NEXT: dbar 0 - ; LA64-NEXT: .LBB3_6: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB3_1 Depth=1 - ; LA64-NEXT: bne $a2, $a3, .LBB3_1 -@@ -224,14 +220,13 @@ define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) { - ; LA64-NEXT: bne $a6, $a3, .LBB4_5 - ; LA64-NEXT: # %bb.4: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB4_3 Depth=2 --; LA64-NEXT: dbar 0 - ; LA64-NEXT: move $t0, $a7 - ; LA64-NEXT: sc.w $t0, $a2, 0 - ; LA64-NEXT: beqz $t0, .LBB4_3 - ; LA64-NEXT: b .LBB4_6 - ; LA64-NEXT: .LBB4_5: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB4_1 Depth=1 --; LA64-NEXT: dbar 1792 -+; LA64-NEXT: dbar 0 - ; LA64-NEXT: .LBB4_6: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB4_1 Depth=1 - ; LA64-NEXT: addi.w $a7, $a3, 0 -@@ -283,14 +278,13 @@ define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) { - ; LA64-NEXT: bne $a6, $a3, .LBB5_5 - ; LA64-NEXT: # %bb.4: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB5_3 Depth=2 --; LA64-NEXT: dbar 0 - ; LA64-NEXT: move $t0, $a7 - ; LA64-NEXT: sc.w $t0, $a2, 0 - ; LA64-NEXT: beqz $t0, .LBB5_3 - ; LA64-NEXT: b .LBB5_6 - ; LA64-NEXT: .LBB5_5: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB5_1 Depth=1 --; LA64-NEXT: dbar 1792 -+; LA64-NEXT: dbar 0 - ; LA64-NEXT: .LBB5_6: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB5_1 Depth=1 - ; LA64-NEXT: addi.w $a7, $a3, 0 -@@ -329,14 +323,13 @@ define i32 @atomicrmw_udec_wrap_i32(ptr %ptr, i32 %val) { - ; LA64-NEXT: bne $a2, $a4, .LBB6_5 - ; LA64-NEXT: # %bb.4: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB6_3 Depth=2 --; LA64-NEXT: dbar 0 - ; LA64-NEXT: move $a7, $a6 - ; LA64-NEXT: sc.w $a7, $a0, 0 - ; LA64-NEXT: beqz $a7, .LBB6_3 - ; LA64-NEXT: b .LBB6_6 - ; LA64-NEXT: .LBB6_5: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB6_1 Depth=1 --; LA64-NEXT: dbar 1792 -+; LA64-NEXT: dbar 0 - ; LA64-NEXT: .LBB6_6: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB6_1 Depth=1 - ; LA64-NEXT: move $a4, $a2 -@@ -373,14 +366,13 @@ define i64 @atomicrmw_udec_wrap_i64(ptr %ptr, i64 %val) { - ; LA64-NEXT: bne $a2, $a3, .LBB7_5 - ; LA64-NEXT: # %bb.4: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB7_3 Depth=2 --; LA64-NEXT: dbar 0 - ; LA64-NEXT: move $a5, $a4 - ; LA64-NEXT: sc.d $a5, $a0, 0 - ; LA64-NEXT: beqz $a5, .LBB7_3 - ; LA64-NEXT: b .LBB7_6 - ; LA64-NEXT: .LBB7_5: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB7_1 Depth=1 --; LA64-NEXT: dbar 1792 -+; LA64-NEXT: dbar 0 - ; LA64-NEXT: .LBB7_6: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB7_1 Depth=1 - ; LA64-NEXT: bne $a2, $a3, .LBB7_1 -diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll -index 76e51fe7d3e8..1ac20d10e587 100644 ---- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll -+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll -@@ -21,14 +21,13 @@ define void @cmpxchg_i8_acquire_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind { - ; LA64-NEXT: and $a5, $a4, $a0 - ; LA64-NEXT: bne $a5, $a1, .LBB0_3 - ; LA64-NEXT: # %bb.2: # in Loop: Header=BB0_1 Depth=1 --; LA64-NEXT: dbar 0 - ; LA64-NEXT: andn $a5, $a4, $a0 - ; LA64-NEXT: or $a5, $a5, $a2 - ; LA64-NEXT: sc.w $a5, $a3, 0 - ; LA64-NEXT: beqz $a5, .LBB0_1 - ; LA64-NEXT: b .LBB0_4 - ; LA64-NEXT: .LBB0_3: --; LA64-NEXT: dbar 1792 -+; LA64-NEXT: dbar 0 - ; LA64-NEXT: .LBB0_4: - ; LA64-NEXT: ret - %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val acquire acquire -@@ -56,14 +55,13 @@ define void @cmpxchg_i16_acquire_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind - ; LA64-NEXT: and $a5, $a4, $a0 - ; LA64-NEXT: bne $a5, $a1, .LBB1_3 - ; LA64-NEXT: # %bb.2: # in Loop: Header=BB1_1 Depth=1 --; LA64-NEXT: dbar 0 - ; LA64-NEXT: andn $a5, $a4, $a0 - ; LA64-NEXT: or $a5, $a5, $a2 - ; LA64-NEXT: sc.w $a5, $a3, 0 - ; LA64-NEXT: beqz $a5, .LBB1_1 - ; LA64-NEXT: b .LBB1_4 - ; LA64-NEXT: .LBB1_3: --; LA64-NEXT: dbar 1792 -+; LA64-NEXT: dbar 0 - ; LA64-NEXT: .LBB1_4: - ; LA64-NEXT: ret - %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val acquire acquire -@@ -77,13 +75,12 @@ define void @cmpxchg_i32_acquire_acquire(ptr %ptr, i32 %cmp, i32 %val) nounwind - ; LA64-NEXT: ll.w $a3, $a0, 0 - ; LA64-NEXT: bne $a3, $a1, .LBB2_3 - ; LA64-NEXT: # %bb.2: # in Loop: Header=BB2_1 Depth=1 --; LA64-NEXT: dbar 0 - ; LA64-NEXT: move $a4, $a2 - ; LA64-NEXT: sc.w $a4, $a0, 0 - ; LA64-NEXT: beqz $a4, .LBB2_1 - ; LA64-NEXT: b .LBB2_4 - ; LA64-NEXT: .LBB2_3: --; LA64-NEXT: dbar 1792 -+; LA64-NEXT: dbar 0 - ; LA64-NEXT: .LBB2_4: - ; LA64-NEXT: ret - %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val acquire acquire -@@ -97,13 +94,12 @@ define void @cmpxchg_i64_acquire_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind - ; LA64-NEXT: ll.d $a3, $a0, 0 - ; LA64-NEXT: bne $a3, $a1, .LBB3_3 - ; LA64-NEXT: # %bb.2: # in Loop: Header=BB3_1 Depth=1 --; LA64-NEXT: dbar 0 - ; LA64-NEXT: move $a4, $a2 - ; LA64-NEXT: sc.d $a4, $a0, 0 - ; LA64-NEXT: beqz $a4, .LBB3_1 - ; LA64-NEXT: b .LBB3_4 - ; LA64-NEXT: .LBB3_3: --; LA64-NEXT: dbar 1792 -+; LA64-NEXT: dbar 0 - ; LA64-NEXT: .LBB3_4: - ; LA64-NEXT: ret - %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val acquire acquire -@@ -130,14 +126,13 @@ define i8 @cmpxchg_i8_acquire_acquire_reti8(ptr %ptr, i8 %cmp, i8 %val) nounwind - ; LA64-NEXT: and $a6, $a5, $a4 - ; LA64-NEXT: bne $a6, $a1, .LBB4_3 - ; LA64-NEXT: # %bb.2: # in Loop: Header=BB4_1 Depth=1 --; LA64-NEXT: dbar 0 - ; LA64-NEXT: andn $a6, $a5, $a4 - ; LA64-NEXT: or $a6, $a6, $a2 - ; LA64-NEXT: sc.w $a6, $a3, 0 - ; LA64-NEXT: beqz $a6, .LBB4_1 - ; LA64-NEXT: b .LBB4_4 - ; LA64-NEXT: .LBB4_3: --; LA64-NEXT: dbar 1792 -+; LA64-NEXT: dbar 0 - ; LA64-NEXT: .LBB4_4: - ; LA64-NEXT: srl.w $a0, $a5, $a0 - ; LA64-NEXT: ret -@@ -167,14 +162,13 @@ define i16 @cmpxchg_i16_acquire_acquire_reti16(ptr %ptr, i16 %cmp, i16 %val) nou - ; LA64-NEXT: and $a6, $a5, $a4 - ; LA64-NEXT: bne $a6, $a1, .LBB5_3 - ; LA64-NEXT: # %bb.2: # in Loop: Header=BB5_1 Depth=1 --; LA64-NEXT: dbar 0 - ; LA64-NEXT: andn $a6, $a5, $a4 - ; LA64-NEXT: or $a6, $a6, $a2 - ; LA64-NEXT: sc.w $a6, $a3, 0 - ; LA64-NEXT: beqz $a6, .LBB5_1 - ; LA64-NEXT: b .LBB5_4 - ; LA64-NEXT: .LBB5_3: --; LA64-NEXT: dbar 1792 -+; LA64-NEXT: dbar 0 - ; LA64-NEXT: .LBB5_4: - ; LA64-NEXT: srl.w $a0, $a5, $a0 - ; LA64-NEXT: ret -@@ -190,13 +184,12 @@ define i32 @cmpxchg_i32_acquire_acquire_reti32(ptr %ptr, i32 %cmp, i32 %val) nou - ; LA64-NEXT: ll.w $a3, $a0, 0 - ; LA64-NEXT: bne $a3, $a1, .LBB6_3 - ; LA64-NEXT: # %bb.2: # in Loop: Header=BB6_1 Depth=1 --; LA64-NEXT: dbar 0 - ; LA64-NEXT: move $a4, $a2 - ; LA64-NEXT: sc.w $a4, $a0, 0 - ; LA64-NEXT: beqz $a4, .LBB6_1 - ; LA64-NEXT: b .LBB6_4 - ; LA64-NEXT: .LBB6_3: --; LA64-NEXT: dbar 1792 -+; LA64-NEXT: dbar 0 - ; LA64-NEXT: .LBB6_4: - ; LA64-NEXT: move $a0, $a3 - ; LA64-NEXT: ret -@@ -212,13 +205,12 @@ define i64 @cmpxchg_i64_acquire_acquire_reti64(ptr %ptr, i64 %cmp, i64 %val) nou - ; LA64-NEXT: ll.d $a3, $a0, 0 - ; LA64-NEXT: bne $a3, $a1, .LBB7_3 - ; LA64-NEXT: # %bb.2: # in Loop: Header=BB7_1 Depth=1 --; LA64-NEXT: dbar 0 - ; LA64-NEXT: move $a4, $a2 - ; LA64-NEXT: sc.d $a4, $a0, 0 - ; LA64-NEXT: beqz $a4, .LBB7_1 - ; LA64-NEXT: b .LBB7_4 - ; LA64-NEXT: .LBB7_3: --; LA64-NEXT: dbar 1792 -+; LA64-NEXT: dbar 0 - ; LA64-NEXT: .LBB7_4: - ; LA64-NEXT: move $a0, $a3 - ; LA64-NEXT: ret -@@ -247,14 +239,13 @@ define i1 @cmpxchg_i8_acquire_acquire_reti1(ptr %ptr, i8 %cmp, i8 %val) nounwind - ; LA64-NEXT: and $a6, $a5, $a2 - ; LA64-NEXT: bne $a6, $a1, .LBB8_3 - ; LA64-NEXT: # %bb.2: # in Loop: Header=BB8_1 Depth=1 --; LA64-NEXT: dbar 0 - ; LA64-NEXT: andn $a6, $a5, $a2 - ; LA64-NEXT: or $a6, $a6, $a0 - ; LA64-NEXT: sc.w $a6, $a3, 0 - ; LA64-NEXT: beqz $a6, .LBB8_1 - ; LA64-NEXT: b .LBB8_4 - ; LA64-NEXT: .LBB8_3: --; LA64-NEXT: dbar 1792 -+; LA64-NEXT: dbar 0 - ; LA64-NEXT: .LBB8_4: - ; LA64-NEXT: and $a0, $a5, $a4 - ; LA64-NEXT: addi.w $a0, $a0, 0 -@@ -287,14 +278,13 @@ define i1 @cmpxchg_i16_acquire_acquire_reti1(ptr %ptr, i16 %cmp, i16 %val) nounw - ; LA64-NEXT: and $a6, $a5, $a2 - ; LA64-NEXT: bne $a6, $a1, .LBB9_3 - ; LA64-NEXT: # %bb.2: # in Loop: Header=BB9_1 Depth=1 --; LA64-NEXT: dbar 0 - ; LA64-NEXT: andn $a6, $a5, $a2 - ; LA64-NEXT: or $a6, $a6, $a0 - ; LA64-NEXT: sc.w $a6, $a3, 0 - ; LA64-NEXT: beqz $a6, .LBB9_1 - ; LA64-NEXT: b .LBB9_4 - ; LA64-NEXT: .LBB9_3: --; LA64-NEXT: dbar 1792 -+; LA64-NEXT: dbar 0 - ; LA64-NEXT: .LBB9_4: - ; LA64-NEXT: and $a0, $a5, $a4 - ; LA64-NEXT: addi.w $a0, $a0, 0 -@@ -313,13 +303,12 @@ define i1 @cmpxchg_i32_acquire_acquire_reti1(ptr %ptr, i32 %cmp, i32 %val) nounw - ; LA64-NEXT: ll.w $a3, $a0, 0 - ; LA64-NEXT: bne $a3, $a1, .LBB10_3 - ; LA64-NEXT: # %bb.2: # in Loop: Header=BB10_1 Depth=1 --; LA64-NEXT: dbar 0 - ; LA64-NEXT: move $a4, $a2 - ; LA64-NEXT: sc.w $a4, $a0, 0 - ; LA64-NEXT: beqz $a4, .LBB10_1 - ; LA64-NEXT: b .LBB10_4 - ; LA64-NEXT: .LBB10_3: --; LA64-NEXT: dbar 1792 -+; LA64-NEXT: dbar 0 - ; LA64-NEXT: .LBB10_4: - ; LA64-NEXT: addi.w $a0, $a1, 0 - ; LA64-NEXT: xor $a0, $a3, $a0 -@@ -337,13 +326,12 @@ define i1 @cmpxchg_i64_acquire_acquire_reti1(ptr %ptr, i64 %cmp, i64 %val) nounw - ; LA64-NEXT: ll.d $a3, $a0, 0 - ; LA64-NEXT: bne $a3, $a1, .LBB11_3 - ; LA64-NEXT: # %bb.2: # in Loop: Header=BB11_1 Depth=1 --; LA64-NEXT: dbar 0 - ; LA64-NEXT: move $a4, $a2 - ; LA64-NEXT: sc.d $a4, $a0, 0 - ; LA64-NEXT: beqz $a4, .LBB11_1 - ; LA64-NEXT: b .LBB11_4 - ; LA64-NEXT: .LBB11_3: --; LA64-NEXT: dbar 1792 -+; LA64-NEXT: dbar 0 - ; LA64-NEXT: .LBB11_4: - ; LA64-NEXT: xor $a0, $a3, $a1 - ; LA64-NEXT: sltui $a0, $a0, 1 -@@ -352,3 +340,343 @@ define i1 @cmpxchg_i64_acquire_acquire_reti1(ptr %ptr, i64 %cmp, i64 %val) nounw - %res = extractvalue { i64, i1 } %tmp, 1 - ret i1 %res - } -+ -+define void @cmpxchg_i8_monotonic_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind { -+; LA64-LABEL: cmpxchg_i8_monotonic_monotonic: -+; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a3, $zero, -4 -+; LA64-NEXT: and $a3, $a0, $a3 -+; LA64-NEXT: slli.d $a0, $a0, 3 -+; LA64-NEXT: andi $a1, $a1, 255 -+; LA64-NEXT: sll.w $a1, $a1, $a0 -+; LA64-NEXT: andi $a2, $a2, 255 -+; LA64-NEXT: sll.w $a2, $a2, $a0 -+; LA64-NEXT: ori $a4, $zero, 255 -+; LA64-NEXT: sll.w $a0, $a4, $a0 -+; LA64-NEXT: addi.w $a0, $a0, 0 -+; LA64-NEXT: addi.w $a2, $a2, 0 -+; LA64-NEXT: addi.w $a1, $a1, 0 -+; LA64-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.w $a4, $a3, 0 -+; LA64-NEXT: and $a5, $a4, $a0 -+; LA64-NEXT: bne $a5, $a1, .LBB12_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB12_1 Depth=1 -+; LA64-NEXT: andn $a5, $a4, $a0 -+; LA64-NEXT: or $a5, $a5, $a2 -+; LA64-NEXT: sc.w $a5, $a3, 0 -+; LA64-NEXT: beqz $a5, .LBB12_1 -+; LA64-NEXT: b .LBB12_4 -+; LA64-NEXT: .LBB12_3: -+; LA64-NEXT: dbar 1792 -+; LA64-NEXT: .LBB12_4: -+; LA64-NEXT: ret -+ %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val monotonic monotonic -+ ret void -+} -+ -+define void @cmpxchg_i16_monotonic_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwind { -+; LA64-LABEL: cmpxchg_i16_monotonic_monotonic: -+; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a3, $zero, -4 -+; LA64-NEXT: and $a3, $a0, $a3 -+; LA64-NEXT: slli.d $a0, $a0, 3 -+; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 -+; LA64-NEXT: sll.w $a1, $a1, $a0 -+; LA64-NEXT: bstrpick.d $a2, $a2, 15, 0 -+; LA64-NEXT: sll.w $a2, $a2, $a0 -+; LA64-NEXT: lu12i.w $a4, 15 -+; LA64-NEXT: ori $a4, $a4, 4095 -+; LA64-NEXT: sll.w $a0, $a4, $a0 -+; LA64-NEXT: addi.w $a0, $a0, 0 -+; LA64-NEXT: addi.w $a2, $a2, 0 -+; LA64-NEXT: addi.w $a1, $a1, 0 -+; LA64-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.w $a4, $a3, 0 -+; LA64-NEXT: and $a5, $a4, $a0 -+; LA64-NEXT: bne $a5, $a1, .LBB13_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB13_1 Depth=1 -+; LA64-NEXT: andn $a5, $a4, $a0 -+; LA64-NEXT: or $a5, $a5, $a2 -+; LA64-NEXT: sc.w $a5, $a3, 0 -+; LA64-NEXT: beqz $a5, .LBB13_1 -+; LA64-NEXT: b .LBB13_4 -+; LA64-NEXT: .LBB13_3: -+; LA64-NEXT: dbar 1792 -+; LA64-NEXT: .LBB13_4: -+; LA64-NEXT: ret -+ %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val monotonic monotonic -+ ret void -+} -+ -+define void @cmpxchg_i32_monotonic_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwind { -+; LA64-LABEL: cmpxchg_i32_monotonic_monotonic: -+; LA64: # %bb.0: -+; LA64-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.w $a3, $a0, 0 -+; LA64-NEXT: bne $a3, $a1, .LBB14_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB14_1 Depth=1 -+; LA64-NEXT: move $a4, $a2 -+; LA64-NEXT: sc.w $a4, $a0, 0 -+; LA64-NEXT: beqz $a4, .LBB14_1 -+; LA64-NEXT: b .LBB14_4 -+; LA64-NEXT: .LBB14_3: -+; LA64-NEXT: dbar 1792 -+; LA64-NEXT: .LBB14_4: -+; LA64-NEXT: ret -+ %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val monotonic monotonic -+ ret void -+} -+ -+define void @cmpxchg_i64_monotonic_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwind { -+; LA64-LABEL: cmpxchg_i64_monotonic_monotonic: -+; LA64: # %bb.0: -+; LA64-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.d $a3, $a0, 0 -+; LA64-NEXT: bne $a3, $a1, .LBB15_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 -+; LA64-NEXT: move $a4, $a2 -+; LA64-NEXT: sc.d $a4, $a0, 0 -+; LA64-NEXT: beqz $a4, .LBB15_1 -+; LA64-NEXT: b .LBB15_4 -+; LA64-NEXT: .LBB15_3: -+; LA64-NEXT: dbar 1792 -+; LA64-NEXT: .LBB15_4: -+; LA64-NEXT: ret -+ %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val monotonic monotonic -+ ret void -+} -+ -+define i8 @cmpxchg_i8_monotonic_monotonic_reti8(ptr %ptr, i8 %cmp, i8 %val) nounwind { -+; LA64-LABEL: cmpxchg_i8_monotonic_monotonic_reti8: -+; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a3, $zero, -4 -+; LA64-NEXT: and $a3, $a0, $a3 -+; LA64-NEXT: slli.d $a0, $a0, 3 -+; LA64-NEXT: ori $a4, $zero, 255 -+; LA64-NEXT: sll.w $a4, $a4, $a0 -+; LA64-NEXT: addi.w $a4, $a4, 0 -+; LA64-NEXT: andi $a2, $a2, 255 -+; LA64-NEXT: sll.w $a2, $a2, $a0 -+; LA64-NEXT: addi.w $a2, $a2, 0 -+; LA64-NEXT: andi $a1, $a1, 255 -+; LA64-NEXT: sll.w $a1, $a1, $a0 -+; LA64-NEXT: addi.w $a1, $a1, 0 -+; LA64-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.w $a5, $a3, 0 -+; LA64-NEXT: and $a6, $a5, $a4 -+; LA64-NEXT: bne $a6, $a1, .LBB16_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB16_1 Depth=1 -+; LA64-NEXT: andn $a6, $a5, $a4 -+; LA64-NEXT: or $a6, $a6, $a2 -+; LA64-NEXT: sc.w $a6, $a3, 0 -+; LA64-NEXT: beqz $a6, .LBB16_1 -+; LA64-NEXT: b .LBB16_4 -+; LA64-NEXT: .LBB16_3: -+; LA64-NEXT: dbar 1792 -+; LA64-NEXT: .LBB16_4: -+; LA64-NEXT: srl.w $a0, $a5, $a0 -+; LA64-NEXT: ret -+ %tmp = cmpxchg ptr %ptr, i8 %cmp, i8 %val monotonic monotonic -+ %res = extractvalue { i8, i1 } %tmp, 0 -+ ret i8 %res -+} -+ -+define i16 @cmpxchg_i16_monotonic_monotonic_reti16(ptr %ptr, i16 %cmp, i16 %val) nounwind { -+; LA64-LABEL: cmpxchg_i16_monotonic_monotonic_reti16: -+; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a3, $zero, -4 -+; LA64-NEXT: and $a3, $a0, $a3 -+; LA64-NEXT: slli.d $a0, $a0, 3 -+; LA64-NEXT: lu12i.w $a4, 15 -+; LA64-NEXT: ori $a4, $a4, 4095 -+; LA64-NEXT: sll.w $a4, $a4, $a0 -+; LA64-NEXT: addi.w $a4, $a4, 0 -+; LA64-NEXT: bstrpick.d $a2, $a2, 15, 0 -+; LA64-NEXT: sll.w $a2, $a2, $a0 -+; LA64-NEXT: addi.w $a2, $a2, 0 -+; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 -+; LA64-NEXT: sll.w $a1, $a1, $a0 -+; LA64-NEXT: addi.w $a1, $a1, 0 -+; LA64-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.w $a5, $a3, 0 -+; LA64-NEXT: and $a6, $a5, $a4 -+; LA64-NEXT: bne $a6, $a1, .LBB17_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB17_1 Depth=1 -+; LA64-NEXT: andn $a6, $a5, $a4 -+; LA64-NEXT: or $a6, $a6, $a2 -+; LA64-NEXT: sc.w $a6, $a3, 0 -+; LA64-NEXT: beqz $a6, .LBB17_1 -+; LA64-NEXT: b .LBB17_4 -+; LA64-NEXT: .LBB17_3: -+; LA64-NEXT: dbar 1792 -+; LA64-NEXT: .LBB17_4: -+; LA64-NEXT: srl.w $a0, $a5, $a0 -+; LA64-NEXT: ret -+ %tmp = cmpxchg ptr %ptr, i16 %cmp, i16 %val monotonic monotonic -+ %res = extractvalue { i16, i1 } %tmp, 0 -+ ret i16 %res -+} -+ -+define i32 @cmpxchg_i32_monotonic_monotonic_reti32(ptr %ptr, i32 %cmp, i32 %val) nounwind { -+; LA64-LABEL: cmpxchg_i32_monotonic_monotonic_reti32: -+; LA64: # %bb.0: -+; LA64-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.w $a3, $a0, 0 -+; LA64-NEXT: bne $a3, $a1, .LBB18_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB18_1 Depth=1 -+; LA64-NEXT: move $a4, $a2 -+; LA64-NEXT: sc.w $a4, $a0, 0 -+; LA64-NEXT: beqz $a4, .LBB18_1 -+; LA64-NEXT: b .LBB18_4 -+; LA64-NEXT: .LBB18_3: -+; LA64-NEXT: dbar 1792 -+; LA64-NEXT: .LBB18_4: -+; LA64-NEXT: move $a0, $a3 -+; LA64-NEXT: ret -+ %tmp = cmpxchg ptr %ptr, i32 %cmp, i32 %val monotonic monotonic -+ %res = extractvalue { i32, i1 } %tmp, 0 -+ ret i32 %res -+} -+ -+define i64 @cmpxchg_i64_monotonic_monotonic_reti64(ptr %ptr, i64 %cmp, i64 %val) nounwind { -+; LA64-LABEL: cmpxchg_i64_monotonic_monotonic_reti64: -+; LA64: # %bb.0: -+; LA64-NEXT: .LBB19_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.d $a3, $a0, 0 -+; LA64-NEXT: bne $a3, $a1, .LBB19_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB19_1 Depth=1 -+; LA64-NEXT: move $a4, $a2 -+; LA64-NEXT: sc.d $a4, $a0, 0 -+; LA64-NEXT: beqz $a4, .LBB19_1 -+; LA64-NEXT: b .LBB19_4 -+; LA64-NEXT: .LBB19_3: -+; LA64-NEXT: dbar 1792 -+; LA64-NEXT: .LBB19_4: -+; LA64-NEXT: move $a0, $a3 -+; LA64-NEXT: ret -+ %tmp = cmpxchg ptr %ptr, i64 %cmp, i64 %val monotonic monotonic -+ %res = extractvalue { i64, i1 } %tmp, 0 -+ ret i64 %res -+} -+ -+define i1 @cmpxchg_i8_monotonic_monotonic_reti1(ptr %ptr, i8 %cmp, i8 %val) nounwind { -+; LA64-LABEL: cmpxchg_i8_monotonic_monotonic_reti1: -+; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a3, $zero, -4 -+; LA64-NEXT: and $a3, $a0, $a3 -+; LA64-NEXT: slli.d $a0, $a0, 3 -+; LA64-NEXT: ori $a4, $zero, 255 -+; LA64-NEXT: sll.w $a4, $a4, $a0 -+; LA64-NEXT: andi $a1, $a1, 255 -+; LA64-NEXT: sll.w $a1, $a1, $a0 -+; LA64-NEXT: andi $a2, $a2, 255 -+; LA64-NEXT: sll.w $a0, $a2, $a0 -+; LA64-NEXT: addi.w $a0, $a0, 0 -+; LA64-NEXT: addi.w $a1, $a1, 0 -+; LA64-NEXT: addi.w $a2, $a4, 0 -+; LA64-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.w $a5, $a3, 0 -+; LA64-NEXT: and $a6, $a5, $a2 -+; LA64-NEXT: bne $a6, $a1, .LBB20_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB20_1 Depth=1 -+; LA64-NEXT: andn $a6, $a5, $a2 -+; LA64-NEXT: or $a6, $a6, $a0 -+; LA64-NEXT: sc.w $a6, $a3, 0 -+; LA64-NEXT: beqz $a6, .LBB20_1 -+; LA64-NEXT: b .LBB20_4 -+; LA64-NEXT: .LBB20_3: -+; LA64-NEXT: dbar 1792 -+; LA64-NEXT: .LBB20_4: -+; LA64-NEXT: and $a0, $a5, $a4 -+; LA64-NEXT: addi.w $a0, $a0, 0 -+; LA64-NEXT: xor $a0, $a1, $a0 -+; LA64-NEXT: sltui $a0, $a0, 1 -+; LA64-NEXT: ret -+ %tmp = cmpxchg ptr %ptr, i8 %cmp, i8 %val monotonic monotonic -+ %res = extractvalue { i8, i1 } %tmp, 1 -+ ret i1 %res -+} -+ -+define i1 @cmpxchg_i16_monotonic_monotonic_reti1(ptr %ptr, i16 %cmp, i16 %val) nounwind { -+; LA64-LABEL: cmpxchg_i16_monotonic_monotonic_reti1: -+; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a3, $zero, -4 -+; LA64-NEXT: and $a3, $a0, $a3 -+; LA64-NEXT: slli.d $a0, $a0, 3 -+; LA64-NEXT: lu12i.w $a4, 15 -+; LA64-NEXT: ori $a4, $a4, 4095 -+; LA64-NEXT: sll.w $a4, $a4, $a0 -+; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 -+; LA64-NEXT: sll.w $a1, $a1, $a0 -+; LA64-NEXT: bstrpick.d $a2, $a2, 15, 0 -+; LA64-NEXT: sll.w $a0, $a2, $a0 -+; LA64-NEXT: addi.w $a0, $a0, 0 -+; LA64-NEXT: addi.w $a1, $a1, 0 -+; LA64-NEXT: addi.w $a2, $a4, 0 -+; LA64-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.w $a5, $a3, 0 -+; LA64-NEXT: and $a6, $a5, $a2 -+; LA64-NEXT: bne $a6, $a1, .LBB21_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB21_1 Depth=1 -+; LA64-NEXT: andn $a6, $a5, $a2 -+; LA64-NEXT: or $a6, $a6, $a0 -+; LA64-NEXT: sc.w $a6, $a3, 0 -+; LA64-NEXT: beqz $a6, .LBB21_1 -+; LA64-NEXT: b .LBB21_4 -+; LA64-NEXT: .LBB21_3: -+; LA64-NEXT: dbar 1792 -+; LA64-NEXT: .LBB21_4: -+; LA64-NEXT: and $a0, $a5, $a4 -+; LA64-NEXT: addi.w $a0, $a0, 0 -+; LA64-NEXT: xor $a0, $a1, $a0 -+; LA64-NEXT: sltui $a0, $a0, 1 -+; LA64-NEXT: ret -+ %tmp = cmpxchg ptr %ptr, i16 %cmp, i16 %val monotonic monotonic -+ %res = extractvalue { i16, i1 } %tmp, 1 -+ ret i1 %res -+} -+ -+define i1 @cmpxchg_i32_monotonic_monotonic_reti1(ptr %ptr, i32 %cmp, i32 %val) nounwind { -+; LA64-LABEL: cmpxchg_i32_monotonic_monotonic_reti1: -+; LA64: # %bb.0: -+; LA64-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.w $a3, $a0, 0 -+; LA64-NEXT: bne $a3, $a1, .LBB22_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB22_1 Depth=1 -+; LA64-NEXT: move $a4, $a2 -+; LA64-NEXT: sc.w $a4, $a0, 0 -+; LA64-NEXT: beqz $a4, .LBB22_1 -+; LA64-NEXT: b .LBB22_4 -+; LA64-NEXT: .LBB22_3: -+; LA64-NEXT: dbar 1792 -+; LA64-NEXT: .LBB22_4: -+; LA64-NEXT: addi.w $a0, $a1, 0 -+; LA64-NEXT: xor $a0, $a3, $a0 -+; LA64-NEXT: sltui $a0, $a0, 1 -+; LA64-NEXT: ret -+ %tmp = cmpxchg ptr %ptr, i32 %cmp, i32 %val monotonic monotonic -+ %res = extractvalue { i32, i1 } %tmp, 1 -+ ret i1 %res -+} -+ -+define i1 @cmpxchg_i64_monotonic_monotonic_reti1(ptr %ptr, i64 %cmp, i64 %val) nounwind { -+; LA64-LABEL: cmpxchg_i64_monotonic_monotonic_reti1: -+; LA64: # %bb.0: -+; LA64-NEXT: .LBB23_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.d $a3, $a0, 0 -+; LA64-NEXT: bne $a3, $a1, .LBB23_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB23_1 Depth=1 -+; LA64-NEXT: move $a4, $a2 -+; LA64-NEXT: sc.d $a4, $a0, 0 -+; LA64-NEXT: beqz $a4, .LBB23_1 -+; LA64-NEXT: b .LBB23_4 -+; LA64-NEXT: .LBB23_3: -+; LA64-NEXT: dbar 1792 -+; LA64-NEXT: .LBB23_4: -+; LA64-NEXT: xor $a0, $a3, $a1 -+; LA64-NEXT: sltui $a0, $a0, 1 -+; LA64-NEXT: ret -+ %tmp = cmpxchg ptr %ptr, i64 %cmp, i64 %val monotonic monotonic -+ %res = extractvalue { i64, i1 } %tmp, 1 -+ ret i1 %res -+} -diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll -index 9767717395b6..9a29d67e9982 100644 ---- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll -+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll -@@ -25,14 +25,13 @@ define float @float_fadd_acquire(ptr %p) nounwind { - ; LA64F-NEXT: bne $a3, $a2, .LBB0_5 - ; LA64F-NEXT: # %bb.4: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB0_3 Depth=2 --; LA64F-NEXT: dbar 0 - ; LA64F-NEXT: move $a4, $a1 - ; LA64F-NEXT: sc.w $a4, $a0, 0 - ; LA64F-NEXT: beqz $a4, .LBB0_3 - ; LA64F-NEXT: b .LBB0_6 - ; LA64F-NEXT: .LBB0_5: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB0_1 Depth=1 --; LA64F-NEXT: dbar 1792 -+; LA64F-NEXT: dbar 0 - ; LA64F-NEXT: .LBB0_6: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB0_1 Depth=1 - ; LA64F-NEXT: movgr2fr.w $fa0, $a3 -@@ -61,14 +60,13 @@ define float @float_fadd_acquire(ptr %p) nounwind { - ; LA64D-NEXT: bne $a3, $a2, .LBB0_5 - ; LA64D-NEXT: # %bb.4: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB0_3 Depth=2 --; LA64D-NEXT: dbar 0 - ; LA64D-NEXT: move $a4, $a1 - ; LA64D-NEXT: sc.w $a4, $a0, 0 - ; LA64D-NEXT: beqz $a4, .LBB0_3 - ; LA64D-NEXT: b .LBB0_6 - ; LA64D-NEXT: .LBB0_5: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB0_1 Depth=1 --; LA64D-NEXT: dbar 1792 -+; LA64D-NEXT: dbar 0 - ; LA64D-NEXT: .LBB0_6: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB0_1 Depth=1 - ; LA64D-NEXT: movgr2fr.w $fa0, $a3 -@@ -101,14 +99,13 @@ define float @float_fsub_acquire(ptr %p) nounwind { - ; LA64F-NEXT: bne $a3, $a2, .LBB1_5 - ; LA64F-NEXT: # %bb.4: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB1_3 Depth=2 --; LA64F-NEXT: dbar 0 - ; LA64F-NEXT: move $a4, $a1 - ; LA64F-NEXT: sc.w $a4, $a0, 0 - ; LA64F-NEXT: beqz $a4, .LBB1_3 - ; LA64F-NEXT: b .LBB1_6 - ; LA64F-NEXT: .LBB1_5: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB1_1 Depth=1 --; LA64F-NEXT: dbar 1792 -+; LA64F-NEXT: dbar 0 - ; LA64F-NEXT: .LBB1_6: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB1_1 Depth=1 - ; LA64F-NEXT: movgr2fr.w $fa0, $a3 -@@ -137,14 +134,13 @@ define float @float_fsub_acquire(ptr %p) nounwind { - ; LA64D-NEXT: bne $a3, $a2, .LBB1_5 - ; LA64D-NEXT: # %bb.4: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB1_3 Depth=2 --; LA64D-NEXT: dbar 0 - ; LA64D-NEXT: move $a4, $a1 - ; LA64D-NEXT: sc.w $a4, $a0, 0 - ; LA64D-NEXT: beqz $a4, .LBB1_3 - ; LA64D-NEXT: b .LBB1_6 - ; LA64D-NEXT: .LBB1_5: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB1_1 Depth=1 --; LA64D-NEXT: dbar 1792 -+; LA64D-NEXT: dbar 0 - ; LA64D-NEXT: .LBB1_6: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB1_1 Depth=1 - ; LA64D-NEXT: movgr2fr.w $fa0, $a3 -@@ -178,14 +174,13 @@ define float @float_fmin_acquire(ptr %p) nounwind { - ; LA64F-NEXT: bne $a3, $a2, .LBB2_5 - ; LA64F-NEXT: # %bb.4: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB2_3 Depth=2 --; LA64F-NEXT: dbar 0 - ; LA64F-NEXT: move $a4, $a1 - ; LA64F-NEXT: sc.w $a4, $a0, 0 - ; LA64F-NEXT: beqz $a4, .LBB2_3 - ; LA64F-NEXT: b .LBB2_6 - ; LA64F-NEXT: .LBB2_5: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB2_1 Depth=1 --; LA64F-NEXT: dbar 1792 -+; LA64F-NEXT: dbar 0 - ; LA64F-NEXT: .LBB2_6: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB2_1 Depth=1 - ; LA64F-NEXT: movgr2fr.w $fa0, $a3 -@@ -215,14 +210,13 @@ define float @float_fmin_acquire(ptr %p) nounwind { - ; LA64D-NEXT: bne $a3, $a2, .LBB2_5 - ; LA64D-NEXT: # %bb.4: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB2_3 Depth=2 --; LA64D-NEXT: dbar 0 - ; LA64D-NEXT: move $a4, $a1 - ; LA64D-NEXT: sc.w $a4, $a0, 0 - ; LA64D-NEXT: beqz $a4, .LBB2_3 - ; LA64D-NEXT: b .LBB2_6 - ; LA64D-NEXT: .LBB2_5: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB2_1 Depth=1 --; LA64D-NEXT: dbar 1792 -+; LA64D-NEXT: dbar 0 - ; LA64D-NEXT: .LBB2_6: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB2_1 Depth=1 - ; LA64D-NEXT: movgr2fr.w $fa0, $a3 -@@ -256,14 +250,13 @@ define float @float_fmax_acquire(ptr %p) nounwind { - ; LA64F-NEXT: bne $a3, $a2, .LBB3_5 - ; LA64F-NEXT: # %bb.4: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB3_3 Depth=2 --; LA64F-NEXT: dbar 0 - ; LA64F-NEXT: move $a4, $a1 - ; LA64F-NEXT: sc.w $a4, $a0, 0 - ; LA64F-NEXT: beqz $a4, .LBB3_3 - ; LA64F-NEXT: b .LBB3_6 - ; LA64F-NEXT: .LBB3_5: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB3_1 Depth=1 --; LA64F-NEXT: dbar 1792 -+; LA64F-NEXT: dbar 0 - ; LA64F-NEXT: .LBB3_6: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB3_1 Depth=1 - ; LA64F-NEXT: movgr2fr.w $fa0, $a3 -@@ -293,14 +286,13 @@ define float @float_fmax_acquire(ptr %p) nounwind { - ; LA64D-NEXT: bne $a3, $a2, .LBB3_5 - ; LA64D-NEXT: # %bb.4: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB3_3 Depth=2 --; LA64D-NEXT: dbar 0 - ; LA64D-NEXT: move $a4, $a1 - ; LA64D-NEXT: sc.w $a4, $a0, 0 - ; LA64D-NEXT: beqz $a4, .LBB3_3 - ; LA64D-NEXT: b .LBB3_6 - ; LA64D-NEXT: .LBB3_5: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB3_1 Depth=1 --; LA64D-NEXT: dbar 1792 -+; LA64D-NEXT: dbar 0 - ; LA64D-NEXT: .LBB3_6: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB3_1 Depth=1 - ; LA64D-NEXT: movgr2fr.w $fa0, $a3 -diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll -index cd4a9e7fa9c4..26ba77e8d4fd 100644 ---- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll -+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll -@@ -17,7 +17,6 @@ define i8 @atomicrmw_umax_i8_acquire(ptr %a, i8 %b) nounwind { - ; LA64-NEXT: sll.w $a1, $a1, $a0 - ; LA64-NEXT: addi.w $a1, $a1, 0 - ; LA64-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 --; LA64-NEXT: dbar 0 - ; LA64-NEXT: ll.w $a4, $a2, 0 - ; LA64-NEXT: and $a6, $a4, $a3 - ; LA64-NEXT: move $a5, $a4 -@@ -30,8 +29,6 @@ define i8 @atomicrmw_umax_i8_acquire(ptr %a, i8 %b) nounwind { - ; LA64-NEXT: sc.w $a5, $a2, 0 - ; LA64-NEXT: beqz $a5, .LBB0_1 - ; LA64-NEXT: # %bb.4: --; LA64-NEXT: dbar 1792 --; LA64-NEXT: # %bb.5: - ; LA64-NEXT: srl.w $a0, $a4, $a0 - ; LA64-NEXT: ret - %1 = atomicrmw umax ptr %a, i8 %b acquire -@@ -52,7 +49,6 @@ define i16 @atomicrmw_umax_i16_acquire(ptr %a, i16 %b) nounwind { - ; LA64-NEXT: sll.w $a1, $a1, $a0 - ; LA64-NEXT: addi.w $a1, $a1, 0 - ; LA64-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 --; LA64-NEXT: dbar 0 - ; LA64-NEXT: ll.w $a4, $a2, 0 - ; LA64-NEXT: and $a6, $a4, $a3 - ; LA64-NEXT: move $a5, $a4 -@@ -65,8 +61,6 @@ define i16 @atomicrmw_umax_i16_acquire(ptr %a, i16 %b) nounwind { - ; LA64-NEXT: sc.w $a5, $a2, 0 - ; LA64-NEXT: beqz $a5, .LBB1_1 - ; LA64-NEXT: # %bb.4: --; LA64-NEXT: dbar 1792 --; LA64-NEXT: # %bb.5: - ; LA64-NEXT: srl.w $a0, $a4, $a0 - ; LA64-NEXT: ret - %1 = atomicrmw umax ptr %a, i16 %b acquire -@@ -106,7 +100,6 @@ define i8 @atomicrmw_umin_i8_acquire(ptr %a, i8 %b) nounwind { - ; LA64-NEXT: sll.w $a1, $a1, $a0 - ; LA64-NEXT: addi.w $a1, $a1, 0 - ; LA64-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 --; LA64-NEXT: dbar 0 - ; LA64-NEXT: ll.w $a4, $a2, 0 - ; LA64-NEXT: and $a6, $a4, $a3 - ; LA64-NEXT: move $a5, $a4 -@@ -119,8 +112,6 @@ define i8 @atomicrmw_umin_i8_acquire(ptr %a, i8 %b) nounwind { - ; LA64-NEXT: sc.w $a5, $a2, 0 - ; LA64-NEXT: beqz $a5, .LBB4_1 - ; LA64-NEXT: # %bb.4: --; LA64-NEXT: dbar 1792 --; LA64-NEXT: # %bb.5: - ; LA64-NEXT: srl.w $a0, $a4, $a0 - ; LA64-NEXT: ret - %1 = atomicrmw umin ptr %a, i8 %b acquire -@@ -141,7 +132,6 @@ define i16 @atomicrmw_umin_i16_acquire(ptr %a, i16 %b) nounwind { - ; LA64-NEXT: sll.w $a1, $a1, $a0 - ; LA64-NEXT: addi.w $a1, $a1, 0 - ; LA64-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 --; LA64-NEXT: dbar 0 - ; LA64-NEXT: ll.w $a4, $a2, 0 - ; LA64-NEXT: and $a6, $a4, $a3 - ; LA64-NEXT: move $a5, $a4 -@@ -154,8 +144,6 @@ define i16 @atomicrmw_umin_i16_acquire(ptr %a, i16 %b) nounwind { - ; LA64-NEXT: sc.w $a5, $a2, 0 - ; LA64-NEXT: beqz $a5, .LBB5_1 - ; LA64-NEXT: # %bb.4: --; LA64-NEXT: dbar 1792 --; LA64-NEXT: # %bb.5: - ; LA64-NEXT: srl.w $a0, $a4, $a0 - ; LA64-NEXT: ret - %1 = atomicrmw umin ptr %a, i16 %b acquire -@@ -197,7 +185,6 @@ define i8 @atomicrmw_max_i8_acquire(ptr %a, i8 %b) nounwind { - ; LA64-NEXT: andi $a4, $a0, 24 - ; LA64-NEXT: xori $a4, $a4, 56 - ; LA64-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 --; LA64-NEXT: dbar 0 - ; LA64-NEXT: ll.w $a5, $a2, 0 - ; LA64-NEXT: and $a7, $a5, $a3 - ; LA64-NEXT: move $a6, $a5 -@@ -212,8 +199,6 @@ define i8 @atomicrmw_max_i8_acquire(ptr %a, i8 %b) nounwind { - ; LA64-NEXT: sc.w $a6, $a2, 0 - ; LA64-NEXT: beqz $a6, .LBB8_1 - ; LA64-NEXT: # %bb.4: --; LA64-NEXT: dbar 1792 --; LA64-NEXT: # %bb.5: - ; LA64-NEXT: srl.w $a0, $a5, $a0 - ; LA64-NEXT: ret - %1 = atomicrmw max ptr %a, i8 %b acquire -@@ -237,7 +222,6 @@ define i16 @atomicrmw_max_i16_acquire(ptr %a, i16 %b) nounwind { - ; LA64-NEXT: sll.w $a1, $a1, $a0 - ; LA64-NEXT: addi.w $a1, $a1, 0 - ; LA64-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 --; LA64-NEXT: dbar 0 - ; LA64-NEXT: ll.w $a5, $a2, 0 - ; LA64-NEXT: and $a7, $a5, $a4 - ; LA64-NEXT: move $a6, $a5 -@@ -252,8 +236,6 @@ define i16 @atomicrmw_max_i16_acquire(ptr %a, i16 %b) nounwind { - ; LA64-NEXT: sc.w $a6, $a2, 0 - ; LA64-NEXT: beqz $a6, .LBB9_1 - ; LA64-NEXT: # %bb.4: --; LA64-NEXT: dbar 1792 --; LA64-NEXT: # %bb.5: - ; LA64-NEXT: srl.w $a0, $a5, $a0 - ; LA64-NEXT: ret - %1 = atomicrmw max ptr %a, i16 %b acquire -@@ -295,7 +277,6 @@ define i8 @atomicrmw_min_i8_acquire(ptr %a, i8 %b) nounwind { - ; LA64-NEXT: andi $a4, $a0, 24 - ; LA64-NEXT: xori $a4, $a4, 56 - ; LA64-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 --; LA64-NEXT: dbar 0 - ; LA64-NEXT: ll.w $a5, $a2, 0 - ; LA64-NEXT: and $a7, $a5, $a3 - ; LA64-NEXT: move $a6, $a5 -@@ -310,8 +291,6 @@ define i8 @atomicrmw_min_i8_acquire(ptr %a, i8 %b) nounwind { - ; LA64-NEXT: sc.w $a6, $a2, 0 - ; LA64-NEXT: beqz $a6, .LBB12_1 - ; LA64-NEXT: # %bb.4: --; LA64-NEXT: dbar 1792 --; LA64-NEXT: # %bb.5: - ; LA64-NEXT: srl.w $a0, $a5, $a0 - ; LA64-NEXT: ret - %1 = atomicrmw min ptr %a, i8 %b acquire -@@ -335,7 +314,6 @@ define i16 @atomicrmw_min_i16_acquire(ptr %a, i16 %b) nounwind { - ; LA64-NEXT: sll.w $a1, $a1, $a0 - ; LA64-NEXT: addi.w $a1, $a1, 0 - ; LA64-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 --; LA64-NEXT: dbar 0 - ; LA64-NEXT: ll.w $a5, $a2, 0 - ; LA64-NEXT: and $a7, $a5, $a4 - ; LA64-NEXT: move $a6, $a5 -@@ -350,8 +328,6 @@ define i16 @atomicrmw_min_i16_acquire(ptr %a, i16 %b) nounwind { - ; LA64-NEXT: sc.w $a6, $a2, 0 - ; LA64-NEXT: beqz $a6, .LBB13_1 - ; LA64-NEXT: # %bb.4: --; LA64-NEXT: dbar 1792 --; LA64-NEXT: # %bb.5: - ; LA64-NEXT: srl.w $a0, $a5, $a0 - ; LA64-NEXT: ret - %1 = atomicrmw min ptr %a, i16 %b acquire -diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw.ll -index c077d14f728f..626276ba05f7 100644 ---- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw.ll -+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw.ll -@@ -13,7 +13,6 @@ define i8 @atomicrmw_xchg_i8_acquire(ptr %a, i8 %b) nounwind { - ; LA32-NEXT: andi $a1, $a1, 255 - ; LA32-NEXT: sll.w $a1, $a1, $a0 - ; LA32-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 --; LA32-NEXT: dbar 0 - ; LA32-NEXT: ll.w $a4, $a2, 0 - ; LA32-NEXT: addi.w $a5, $a1, 0 - ; LA32-NEXT: xor $a5, $a4, $a5 -@@ -37,7 +36,6 @@ define i8 @atomicrmw_xchg_i8_acquire(ptr %a, i8 %b) nounwind { - ; LA64-NEXT: sll.w $a1, $a1, $a0 - ; LA64-NEXT: addi.w $a1, $a1, 0 - ; LA64-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 --; LA64-NEXT: dbar 0 - ; LA64-NEXT: ll.w $a4, $a2, 0 - ; LA64-NEXT: addi.w $a5, $a1, 0 - ; LA64-NEXT: xor $a5, $a4, $a5 -@@ -64,7 +62,6 @@ define i16 @atomicrmw_xchg_i16_acquire(ptr %a, i16 %b) nounwind { - ; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 - ; LA32-NEXT: sll.w $a1, $a1, $a0 - ; LA32-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 --; LA32-NEXT: dbar 0 - ; LA32-NEXT: ll.w $a4, $a2, 0 - ; LA32-NEXT: addi.w $a5, $a1, 0 - ; LA32-NEXT: xor $a5, $a4, $a5 -@@ -89,7 +86,6 @@ define i16 @atomicrmw_xchg_i16_acquire(ptr %a, i16 %b) nounwind { - ; LA64-NEXT: sll.w $a1, $a1, $a0 - ; LA64-NEXT: addi.w $a1, $a1, 0 - ; LA64-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 --; LA64-NEXT: dbar 0 - ; LA64-NEXT: ll.w $a4, $a2, 0 - ; LA64-NEXT: addi.w $a5, $a1, 0 - ; LA64-NEXT: xor $a5, $a4, $a5 -@@ -108,7 +104,6 @@ define i32 @atomicrmw_xchg_i32_acquire(ptr %a, i32 %b) nounwind { - ; LA32-LABEL: atomicrmw_xchg_i32_acquire: - ; LA32: # %bb.0: - ; LA32-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 --; LA32-NEXT: dbar 0 - ; LA32-NEXT: ll.w $a2, $a0, 0 - ; LA32-NEXT: move $a3, $a1 - ; LA32-NEXT: sc.w $a3, $a0, 0 -@@ -157,7 +152,6 @@ define i8 @atomicrmw_add_i8_acquire(ptr %a, i8 %b) nounwind { - ; LA32-NEXT: andi $a1, $a1, 255 - ; LA32-NEXT: sll.w $a1, $a1, $a0 - ; LA32-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 --; LA32-NEXT: dbar 0 - ; LA32-NEXT: ll.w $a4, $a2, 0 - ; LA32-NEXT: add.w $a5, $a4, $a1 - ; LA32-NEXT: xor $a5, $a4, $a5 -@@ -181,7 +175,6 @@ define i8 @atomicrmw_add_i8_acquire(ptr %a, i8 %b) nounwind { - ; LA64-NEXT: sll.w $a1, $a1, $a0 - ; LA64-NEXT: addi.w $a1, $a1, 0 - ; LA64-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 --; LA64-NEXT: dbar 0 - ; LA64-NEXT: ll.w $a4, $a2, 0 - ; LA64-NEXT: add.w $a5, $a4, $a1 - ; LA64-NEXT: xor $a5, $a4, $a5 -@@ -208,7 +201,6 @@ define i16 @atomicrmw_add_i16_acquire(ptr %a, i16 %b) nounwind { - ; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 - ; LA32-NEXT: sll.w $a1, $a1, $a0 - ; LA32-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 --; LA32-NEXT: dbar 0 - ; LA32-NEXT: ll.w $a4, $a2, 0 - ; LA32-NEXT: add.w $a5, $a4, $a1 - ; LA32-NEXT: xor $a5, $a4, $a5 -@@ -233,7 +225,6 @@ define i16 @atomicrmw_add_i16_acquire(ptr %a, i16 %b) nounwind { - ; LA64-NEXT: sll.w $a1, $a1, $a0 - ; LA64-NEXT: addi.w $a1, $a1, 0 - ; LA64-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 --; LA64-NEXT: dbar 0 - ; LA64-NEXT: ll.w $a4, $a2, 0 - ; LA64-NEXT: add.w $a5, $a4, $a1 - ; LA64-NEXT: xor $a5, $a4, $a5 -@@ -252,7 +243,6 @@ define i32 @atomicrmw_add_i32_acquire(ptr %a, i32 %b) nounwind { - ; LA32-LABEL: atomicrmw_add_i32_acquire: - ; LA32: # %bb.0: - ; LA32-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 --; LA32-NEXT: dbar 0 - ; LA32-NEXT: ll.w $a2, $a0, 0 - ; LA32-NEXT: add.w $a3, $a2, $a1 - ; LA32-NEXT: sc.w $a3, $a0, 0 -@@ -301,7 +291,6 @@ define i8 @atomicrmw_sub_i8_acquire(ptr %a, i8 %b) nounwind { - ; LA32-NEXT: andi $a1, $a1, 255 - ; LA32-NEXT: sll.w $a1, $a1, $a0 - ; LA32-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 --; LA32-NEXT: dbar 0 - ; LA32-NEXT: ll.w $a4, $a2, 0 - ; LA32-NEXT: sub.w $a5, $a4, $a1 - ; LA32-NEXT: xor $a5, $a4, $a5 -@@ -325,7 +314,6 @@ define i8 @atomicrmw_sub_i8_acquire(ptr %a, i8 %b) nounwind { - ; LA64-NEXT: sll.w $a1, $a1, $a0 - ; LA64-NEXT: addi.w $a1, $a1, 0 - ; LA64-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 --; LA64-NEXT: dbar 0 - ; LA64-NEXT: ll.w $a4, $a2, 0 - ; LA64-NEXT: sub.w $a5, $a4, $a1 - ; LA64-NEXT: xor $a5, $a4, $a5 -@@ -352,7 +340,6 @@ define i16 @atomicrmw_sub_i16_acquire(ptr %a, i16 %b) nounwind { - ; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 - ; LA32-NEXT: sll.w $a1, $a1, $a0 - ; LA32-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 --; LA32-NEXT: dbar 0 - ; LA32-NEXT: ll.w $a4, $a2, 0 - ; LA32-NEXT: sub.w $a5, $a4, $a1 - ; LA32-NEXT: xor $a5, $a4, $a5 -@@ -377,7 +364,6 @@ define i16 @atomicrmw_sub_i16_acquire(ptr %a, i16 %b) nounwind { - ; LA64-NEXT: sll.w $a1, $a1, $a0 - ; LA64-NEXT: addi.w $a1, $a1, 0 - ; LA64-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 --; LA64-NEXT: dbar 0 - ; LA64-NEXT: ll.w $a4, $a2, 0 - ; LA64-NEXT: sub.w $a5, $a4, $a1 - ; LA64-NEXT: xor $a5, $a4, $a5 -@@ -396,7 +382,6 @@ define i32 @atomicrmw_sub_i32_acquire(ptr %a, i32 %b) nounwind { - ; LA32-LABEL: atomicrmw_sub_i32_acquire: - ; LA32: # %bb.0: - ; LA32-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1 --; LA32-NEXT: dbar 0 - ; LA32-NEXT: ll.w $a2, $a0, 0 - ; LA32-NEXT: sub.w $a3, $a2, $a1 - ; LA32-NEXT: sc.w $a3, $a0, 0 -@@ -447,7 +432,6 @@ define i8 @atomicrmw_nand_i8_acquire(ptr %a, i8 %b) nounwind { - ; LA32-NEXT: andi $a1, $a1, 255 - ; LA32-NEXT: sll.w $a1, $a1, $a0 - ; LA32-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 --; LA32-NEXT: dbar 0 - ; LA32-NEXT: ll.w $a4, $a2, 0 - ; LA32-NEXT: and $a5, $a4, $a1 - ; LA32-NEXT: nor $a5, $a5, $zero -@@ -472,7 +456,6 @@ define i8 @atomicrmw_nand_i8_acquire(ptr %a, i8 %b) nounwind { - ; LA64-NEXT: sll.w $a1, $a1, $a0 - ; LA64-NEXT: addi.w $a1, $a1, 0 - ; LA64-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 --; LA64-NEXT: dbar 0 - ; LA64-NEXT: ll.w $a4, $a2, 0 - ; LA64-NEXT: and $a5, $a4, $a1 - ; LA64-NEXT: nor $a5, $a5, $zero -@@ -500,7 +483,6 @@ define i16 @atomicrmw_nand_i16_acquire(ptr %a, i16 %b) nounwind { - ; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 - ; LA32-NEXT: sll.w $a1, $a1, $a0 - ; LA32-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 --; LA32-NEXT: dbar 0 - ; LA32-NEXT: ll.w $a4, $a2, 0 - ; LA32-NEXT: and $a5, $a4, $a1 - ; LA32-NEXT: nor $a5, $a5, $zero -@@ -526,7 +508,6 @@ define i16 @atomicrmw_nand_i16_acquire(ptr %a, i16 %b) nounwind { - ; LA64-NEXT: sll.w $a1, $a1, $a0 - ; LA64-NEXT: addi.w $a1, $a1, 0 - ; LA64-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 --; LA64-NEXT: dbar 0 - ; LA64-NEXT: ll.w $a4, $a2, 0 - ; LA64-NEXT: and $a5, $a4, $a1 - ; LA64-NEXT: nor $a5, $a5, $zero -@@ -546,7 +527,6 @@ define i32 @atomicrmw_nand_i32_acquire(ptr %a, i32 %b) nounwind { - ; LA32-LABEL: atomicrmw_nand_i32_acquire: - ; LA32: # %bb.0: - ; LA32-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 --; LA32-NEXT: dbar 0 - ; LA32-NEXT: ll.w $a2, $a0, 0 - ; LA32-NEXT: and $a3, $a2, $a1 - ; LA32-NEXT: nor $a3, $a3, $zero -@@ -559,7 +539,6 @@ define i32 @atomicrmw_nand_i32_acquire(ptr %a, i32 %b) nounwind { - ; LA64-LABEL: atomicrmw_nand_i32_acquire: - ; LA64: # %bb.0: - ; LA64-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 --; LA64-NEXT: dbar 0 - ; LA64-NEXT: ll.w $a2, $a0, 0 - ; LA64-NEXT: and $a3, $a2, $a1 - ; LA64-NEXT: nor $a3, $a3, $zero -@@ -586,7 +565,6 @@ define i64 @atomicrmw_nand_i64_acquire(ptr %a, i64 %b) nounwind { - ; LA64-LABEL: atomicrmw_nand_i64_acquire: - ; LA64: # %bb.0: - ; LA64-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 --; LA64-NEXT: dbar 0 - ; LA64-NEXT: ll.d $a2, $a0, 0 - ; LA64-NEXT: and $a3, $a2, $a1 - ; LA64-NEXT: nor $a3, $a3, $zero -@@ -611,7 +589,6 @@ define i8 @atomicrmw_and_i8_acquire(ptr %a, i8 %b) nounwind { - ; LA32-NEXT: addi.w $a3, $zero, -4 - ; LA32-NEXT: and $a0, $a0, $a3 - ; LA32-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 --; LA32-NEXT: dbar 0 - ; LA32-NEXT: ll.w $a3, $a0, 0 - ; LA32-NEXT: and $a4, $a3, $a1 - ; LA32-NEXT: sc.w $a4, $a0, 0 -@@ -650,7 +627,6 @@ define i16 @atomicrmw_and_i16_acquire(ptr %a, i16 %b) nounwind { - ; LA32-NEXT: addi.w $a2, $zero, -4 - ; LA32-NEXT: and $a0, $a0, $a2 - ; LA32-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1 --; LA32-NEXT: dbar 0 - ; LA32-NEXT: ll.w $a2, $a0, 0 - ; LA32-NEXT: and $a4, $a2, $a1 - ; LA32-NEXT: sc.w $a4, $a0, 0 -@@ -681,7 +657,6 @@ define i32 @atomicrmw_and_i32_acquire(ptr %a, i32 %b) nounwind { - ; LA32-LABEL: atomicrmw_and_i32_acquire: - ; LA32: # %bb.0: - ; LA32-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1 --; LA32-NEXT: dbar 0 - ; LA32-NEXT: ll.w $a2, $a0, 0 - ; LA32-NEXT: and $a3, $a2, $a1 - ; LA32-NEXT: sc.w $a3, $a0, 0 -@@ -728,7 +703,6 @@ define i8 @atomicrmw_or_i8_acquire(ptr %a, i8 %b) nounwind { - ; LA32-NEXT: andi $a1, $a1, 255 - ; LA32-NEXT: sll.w $a1, $a1, $a0 - ; LA32-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1 --; LA32-NEXT: dbar 0 - ; LA32-NEXT: ll.w $a3, $a2, 0 - ; LA32-NEXT: or $a4, $a3, $a1 - ; LA32-NEXT: sc.w $a4, $a2, 0 -@@ -760,7 +734,6 @@ define i16 @atomicrmw_or_i16_acquire(ptr %a, i16 %b) nounwind { - ; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 - ; LA32-NEXT: sll.w $a1, $a1, $a0 - ; LA32-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1 --; LA32-NEXT: dbar 0 - ; LA32-NEXT: ll.w $a3, $a2, 0 - ; LA32-NEXT: or $a4, $a3, $a1 - ; LA32-NEXT: sc.w $a4, $a2, 0 -@@ -787,7 +760,6 @@ define i32 @atomicrmw_or_i32_acquire(ptr %a, i32 %b) nounwind { - ; LA32-LABEL: atomicrmw_or_i32_acquire: - ; LA32: # %bb.0: - ; LA32-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1 --; LA32-NEXT: dbar 0 - ; LA32-NEXT: ll.w $a2, $a0, 0 - ; LA32-NEXT: or $a3, $a2, $a1 - ; LA32-NEXT: sc.w $a3, $a0, 0 -@@ -834,7 +806,6 @@ define i8 @atomicrmw_xor_i8_acquire(ptr %a, i8 %b) nounwind { - ; LA32-NEXT: andi $a1, $a1, 255 - ; LA32-NEXT: sll.w $a1, $a1, $a0 - ; LA32-NEXT: .LBB24_1: # =>This Inner Loop Header: Depth=1 --; LA32-NEXT: dbar 0 - ; LA32-NEXT: ll.w $a3, $a2, 0 - ; LA32-NEXT: xor $a4, $a3, $a1 - ; LA32-NEXT: sc.w $a4, $a2, 0 -@@ -866,7 +837,6 @@ define i16 @atomicrmw_xor_i16_acquire(ptr %a, i16 %b) nounwind { - ; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 - ; LA32-NEXT: sll.w $a1, $a1, $a0 - ; LA32-NEXT: .LBB25_1: # =>This Inner Loop Header: Depth=1 --; LA32-NEXT: dbar 0 - ; LA32-NEXT: ll.w $a3, $a2, 0 - ; LA32-NEXT: xor $a4, $a3, $a1 - ; LA32-NEXT: sc.w $a4, $a2, 0 -@@ -893,7 +863,6 @@ define i32 @atomicrmw_xor_i32_acquire(ptr %a, i32 %b) nounwind { - ; LA32-LABEL: atomicrmw_xor_i32_acquire: - ; LA32: # %bb.0: - ; LA32-NEXT: .LBB26_1: # =>This Inner Loop Header: Depth=1 --; LA32-NEXT: dbar 0 - ; LA32-NEXT: ll.w $a2, $a0, 0 - ; LA32-NEXT: xor $a3, $a2, $a1 - ; LA32-NEXT: sc.w $a3, $a0, 0 -diff --git a/llvm/unittests/Target/LoongArch/InstSizes.cpp b/llvm/unittests/Target/LoongArch/InstSizes.cpp -index 1a5d4369c48b..3180c7237a79 100644 ---- a/llvm/unittests/Target/LoongArch/InstSizes.cpp -+++ b/llvm/unittests/Target/LoongArch/InstSizes.cpp -@@ -121,7 +121,7 @@ TEST(InstSizes, AtomicPseudo) { - " dead early-clobber renamable $r10, dead early-clobber renamable $r11 = PseudoAtomicLoadAdd32 renamable $r7, renamable $r6, renamable $r8\n" - " dead early-clobber renamable $r5, dead early-clobber renamable $r9, dead early-clobber renamable $r10 = PseudoMaskedAtomicLoadUMax32 renamable $r7, renamable $r6, renamable $r8, 4\n" - " early-clobber renamable $r9, dead early-clobber renamable $r10, dead early-clobber renamable $r11 = PseudoMaskedAtomicLoadMax32 killed renamable $r6, killed renamable $r5, killed renamable $r7, killed renamable $r8, 4\n" -- " dead early-clobber renamable $r5, dead early-clobber renamable $r9 = PseudoCmpXchg32 renamable $r7, renamable $r4, renamable $r6\n" -+ " dead early-clobber renamable $r5, dead early-clobber renamable $r9 = PseudoCmpXchg32 renamable $r7, renamable $r4, renamable $r6, 4\n" - " dead early-clobber renamable $r5, dead early-clobber renamable $r9 = PseudoMaskedCmpXchg32 killed renamable $r7, killed renamable $r4, killed renamable $r6, killed renamable $r8, 4\n", - // clang-format on - [](LoongArchInstrInfo &II, MachineFunction &MF) { --- -2.20.1 - - -From 5f2a6174965bccaeefdeb410cf67ea0cb378b26c Mon Sep 17 00:00:00 2001 -From: hev -Date: Wed, 11 Oct 2023 18:28:04 +0800 -Subject: [PATCH 2/7] [LoongArch] Add some atomic tests (#68766) - -(cherry picked from commit 37b93f07cd7ba2b1e6e81116cd49d34396b7b70a) ---- - .../LoongArch/ir-instruction/atomicrmw-fp.ll | 2714 ++++++++++++- - .../ir-instruction/atomicrmw-minmax.ll | 1400 +++++++ - .../LoongArch/ir-instruction/atomicrmw.ll | 3346 ++++++++++++++++- - .../ir-instruction/fence-singlethread.ll | 17 + - .../ir-instruction/load-store-atomic.ll | 196 + - 5 files changed, 7609 insertions(+), 64 deletions(-) - create mode 100644 llvm/test/CodeGen/LoongArch/ir-instruction/fence-singlethread.ll - -diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll -index 9a29d67e9982..02d481cb3865 100644 ---- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll -+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll -@@ -2,8 +2,6 @@ - ; RUN: llc --mtriple=loongarch64 --mattr=+f,-d < %s | FileCheck %s --check-prefix=LA64F - ; RUN: llc --mtriple=loongarch64 --mattr=+d < %s | FileCheck %s --check-prefix=LA64D - --;; Add more test cases after supporting different AtomicOrdering. -- - define float @float_fadd_acquire(ptr %p) nounwind { - ; LA64F-LABEL: float_fadd_acquire: - ; LA64F: # %bb.0: -@@ -681,3 +679,2715 @@ define double @double_fmax_acquire(ptr %p) nounwind { - %v = atomicrmw fmax ptr %p, double 1.0 acquire, align 4 - ret double %v - } -+ -+define float @float_fadd_release(ptr %p) nounwind { -+; LA64F-LABEL: float_fadd_release: -+; LA64F: # %bb.0: -+; LA64F-NEXT: fld.s $fa0, $a0, 0 -+; LA64F-NEXT: addi.w $a1, $zero, 1 -+; LA64F-NEXT: movgr2fr.w $fa1, $a1 -+; LA64F-NEXT: ffint.s.w $fa1, $fa1 -+; LA64F-NEXT: .p2align 4, , 16 -+; LA64F-NEXT: .LBB8_1: # %atomicrmw.start -+; LA64F-NEXT: # =>This Loop Header: Depth=1 -+; LA64F-NEXT: # Child Loop BB8_3 Depth 2 -+; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 -+; LA64F-NEXT: movfr2gr.s $a1, $fa2 -+; LA64F-NEXT: movfr2gr.s $a2, $fa0 -+; LA64F-NEXT: .LBB8_3: # %atomicrmw.start -+; LA64F-NEXT: # Parent Loop BB8_1 Depth=1 -+; LA64F-NEXT: # => This Inner Loop Header: Depth=2 -+; LA64F-NEXT: ll.w $a3, $a0, 0 -+; LA64F-NEXT: bne $a3, $a2, .LBB8_5 -+; LA64F-NEXT: # %bb.4: # %atomicrmw.start -+; LA64F-NEXT: # in Loop: Header=BB8_3 Depth=2 -+; LA64F-NEXT: move $a4, $a1 -+; LA64F-NEXT: sc.w $a4, $a0, 0 -+; LA64F-NEXT: beqz $a4, .LBB8_3 -+; LA64F-NEXT: b .LBB8_6 -+; LA64F-NEXT: .LBB8_5: # %atomicrmw.start -+; LA64F-NEXT: # in Loop: Header=BB8_1 Depth=1 -+; LA64F-NEXT: dbar 1792 -+; LA64F-NEXT: .LBB8_6: # %atomicrmw.start -+; LA64F-NEXT: # in Loop: Header=BB8_1 Depth=1 -+; LA64F-NEXT: movgr2fr.w $fa0, $a3 -+; LA64F-NEXT: addi.w $a1, $a2, 0 -+; LA64F-NEXT: bne $a3, $a1, .LBB8_1 -+; LA64F-NEXT: # %bb.2: # %atomicrmw.end -+; LA64F-NEXT: ret -+; -+; LA64D-LABEL: float_fadd_release: -+; LA64D: # %bb.0: -+; LA64D-NEXT: fld.s $fa0, $a0, 0 -+; LA64D-NEXT: addi.w $a1, $zero, 1 -+; LA64D-NEXT: movgr2fr.w $fa1, $a1 -+; LA64D-NEXT: ffint.s.w $fa1, $fa1 -+; LA64D-NEXT: .p2align 4, , 16 -+; LA64D-NEXT: .LBB8_1: # %atomicrmw.start -+; LA64D-NEXT: # =>This Loop Header: Depth=1 -+; LA64D-NEXT: # Child Loop BB8_3 Depth 2 -+; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 -+; LA64D-NEXT: movfr2gr.s $a1, $fa2 -+; LA64D-NEXT: movfr2gr.s $a2, $fa0 -+; LA64D-NEXT: .LBB8_3: # %atomicrmw.start -+; LA64D-NEXT: # Parent Loop BB8_1 Depth=1 -+; LA64D-NEXT: # => This Inner Loop Header: Depth=2 -+; LA64D-NEXT: ll.w $a3, $a0, 0 -+; LA64D-NEXT: bne $a3, $a2, .LBB8_5 -+; LA64D-NEXT: # %bb.4: # %atomicrmw.start -+; LA64D-NEXT: # in Loop: Header=BB8_3 Depth=2 -+; LA64D-NEXT: move $a4, $a1 -+; LA64D-NEXT: sc.w $a4, $a0, 0 -+; LA64D-NEXT: beqz $a4, .LBB8_3 -+; LA64D-NEXT: b .LBB8_6 -+; LA64D-NEXT: .LBB8_5: # %atomicrmw.start -+; LA64D-NEXT: # in Loop: Header=BB8_1 Depth=1 -+; LA64D-NEXT: dbar 1792 -+; LA64D-NEXT: .LBB8_6: # %atomicrmw.start -+; LA64D-NEXT: # in Loop: Header=BB8_1 Depth=1 -+; LA64D-NEXT: movgr2fr.w $fa0, $a3 -+; LA64D-NEXT: addi.w $a1, $a2, 0 -+; LA64D-NEXT: bne $a3, $a1, .LBB8_1 -+; LA64D-NEXT: # %bb.2: # %atomicrmw.end -+; LA64D-NEXT: ret -+ %v = atomicrmw fadd ptr %p, float 1.0 release, align 4 -+ ret float %v -+} -+ -+define float @float_fsub_release(ptr %p) nounwind { -+; LA64F-LABEL: float_fsub_release: -+; LA64F: # %bb.0: -+; LA64F-NEXT: fld.s $fa0, $a0, 0 -+; LA64F-NEXT: pcalau12i $a1, %pc_hi20(.LCPI9_0) -+; LA64F-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI9_0) -+; LA64F-NEXT: fld.s $fa1, $a1, 0 -+; LA64F-NEXT: .p2align 4, , 16 -+; LA64F-NEXT: .LBB9_1: # %atomicrmw.start -+; LA64F-NEXT: # =>This Loop Header: Depth=1 -+; LA64F-NEXT: # Child Loop BB9_3 Depth 2 -+; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 -+; LA64F-NEXT: movfr2gr.s $a1, $fa2 -+; LA64F-NEXT: movfr2gr.s $a2, $fa0 -+; LA64F-NEXT: .LBB9_3: # %atomicrmw.start -+; LA64F-NEXT: # Parent Loop BB9_1 Depth=1 -+; LA64F-NEXT: # => This Inner Loop Header: Depth=2 -+; LA64F-NEXT: ll.w $a3, $a0, 0 -+; LA64F-NEXT: bne $a3, $a2, .LBB9_5 -+; LA64F-NEXT: # %bb.4: # %atomicrmw.start -+; LA64F-NEXT: # in Loop: Header=BB9_3 Depth=2 -+; LA64F-NEXT: move $a4, $a1 -+; LA64F-NEXT: sc.w $a4, $a0, 0 -+; LA64F-NEXT: beqz $a4, .LBB9_3 -+; LA64F-NEXT: b .LBB9_6 -+; LA64F-NEXT: .LBB9_5: # %atomicrmw.start -+; LA64F-NEXT: # in Loop: Header=BB9_1 Depth=1 -+; LA64F-NEXT: dbar 1792 -+; LA64F-NEXT: .LBB9_6: # %atomicrmw.start -+; LA64F-NEXT: # in Loop: Header=BB9_1 Depth=1 -+; LA64F-NEXT: movgr2fr.w $fa0, $a3 -+; LA64F-NEXT: addi.w $a1, $a2, 0 -+; LA64F-NEXT: bne $a3, $a1, .LBB9_1 -+; LA64F-NEXT: # %bb.2: # %atomicrmw.end -+; LA64F-NEXT: ret -+; -+; LA64D-LABEL: float_fsub_release: -+; LA64D: # %bb.0: -+; LA64D-NEXT: fld.s $fa0, $a0, 0 -+; LA64D-NEXT: pcalau12i $a1, %pc_hi20(.LCPI9_0) -+; LA64D-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI9_0) -+; LA64D-NEXT: fld.s $fa1, $a1, 0 -+; LA64D-NEXT: .p2align 4, , 16 -+; LA64D-NEXT: .LBB9_1: # %atomicrmw.start -+; LA64D-NEXT: # =>This Loop Header: Depth=1 -+; LA64D-NEXT: # Child Loop BB9_3 Depth 2 -+; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 -+; LA64D-NEXT: movfr2gr.s $a1, $fa2 -+; LA64D-NEXT: movfr2gr.s $a2, $fa0 -+; LA64D-NEXT: .LBB9_3: # %atomicrmw.start -+; LA64D-NEXT: # Parent Loop BB9_1 Depth=1 -+; LA64D-NEXT: # => This Inner Loop Header: Depth=2 -+; LA64D-NEXT: ll.w $a3, $a0, 0 -+; LA64D-NEXT: bne $a3, $a2, .LBB9_5 -+; LA64D-NEXT: # %bb.4: # %atomicrmw.start -+; LA64D-NEXT: # in Loop: Header=BB9_3 Depth=2 -+; LA64D-NEXT: move $a4, $a1 -+; LA64D-NEXT: sc.w $a4, $a0, 0 -+; LA64D-NEXT: beqz $a4, .LBB9_3 -+; LA64D-NEXT: b .LBB9_6 -+; LA64D-NEXT: .LBB9_5: # %atomicrmw.start -+; LA64D-NEXT: # in Loop: Header=BB9_1 Depth=1 -+; LA64D-NEXT: dbar 1792 -+; LA64D-NEXT: .LBB9_6: # %atomicrmw.start -+; LA64D-NEXT: # in Loop: Header=BB9_1 Depth=1 -+; LA64D-NEXT: movgr2fr.w $fa0, $a3 -+; LA64D-NEXT: addi.w $a1, $a2, 0 -+; LA64D-NEXT: bne $a3, $a1, .LBB9_1 -+; LA64D-NEXT: # %bb.2: # %atomicrmw.end -+; LA64D-NEXT: ret -+ %v = atomicrmw fsub ptr %p, float 1.0 release, align 4 -+ ret float %v -+} -+ -+define float @float_fmin_release(ptr %p) nounwind { -+; LA64F-LABEL: float_fmin_release: -+; LA64F: # %bb.0: -+; LA64F-NEXT: fld.s $fa0, $a0, 0 -+; LA64F-NEXT: addi.w $a1, $zero, 1 -+; LA64F-NEXT: movgr2fr.w $fa1, $a1 -+; LA64F-NEXT: ffint.s.w $fa1, $fa1 -+; LA64F-NEXT: .p2align 4, , 16 -+; LA64F-NEXT: .LBB10_1: # %atomicrmw.start -+; LA64F-NEXT: # =>This Loop Header: Depth=1 -+; LA64F-NEXT: # Child Loop BB10_3 Depth 2 -+; LA64F-NEXT: fmax.s $fa2, $fa0, $fa0 -+; LA64F-NEXT: fmin.s $fa2, $fa2, $fa1 -+; LA64F-NEXT: movfr2gr.s $a1, $fa2 -+; LA64F-NEXT: movfr2gr.s $a2, $fa0 -+; LA64F-NEXT: .LBB10_3: # %atomicrmw.start -+; LA64F-NEXT: # Parent Loop BB10_1 Depth=1 -+; LA64F-NEXT: # => This Inner Loop Header: Depth=2 -+; LA64F-NEXT: ll.w $a3, $a0, 0 -+; LA64F-NEXT: bne $a3, $a2, .LBB10_5 -+; LA64F-NEXT: # %bb.4: # %atomicrmw.start -+; LA64F-NEXT: # in Loop: Header=BB10_3 Depth=2 -+; LA64F-NEXT: move $a4, $a1 -+; LA64F-NEXT: sc.w $a4, $a0, 0 -+; LA64F-NEXT: beqz $a4, .LBB10_3 -+; LA64F-NEXT: b .LBB10_6 -+; LA64F-NEXT: .LBB10_5: # %atomicrmw.start -+; LA64F-NEXT: # in Loop: Header=BB10_1 Depth=1 -+; LA64F-NEXT: dbar 1792 -+; LA64F-NEXT: .LBB10_6: # %atomicrmw.start -+; LA64F-NEXT: # in Loop: Header=BB10_1 Depth=1 -+; LA64F-NEXT: movgr2fr.w $fa0, $a3 -+; LA64F-NEXT: addi.w $a1, $a2, 0 -+; LA64F-NEXT: bne $a3, $a1, .LBB10_1 -+; LA64F-NEXT: # %bb.2: # %atomicrmw.end -+; LA64F-NEXT: ret -+; -+; LA64D-LABEL: float_fmin_release: -+; LA64D: # %bb.0: -+; LA64D-NEXT: fld.s $fa0, $a0, 0 -+; LA64D-NEXT: addi.w $a1, $zero, 1 -+; LA64D-NEXT: movgr2fr.w $fa1, $a1 -+; LA64D-NEXT: ffint.s.w $fa1, $fa1 -+; LA64D-NEXT: .p2align 4, , 16 -+; LA64D-NEXT: .LBB10_1: # %atomicrmw.start -+; LA64D-NEXT: # =>This Loop Header: Depth=1 -+; LA64D-NEXT: # Child Loop BB10_3 Depth 2 -+; LA64D-NEXT: fmax.s $fa2, $fa0, $fa0 -+; LA64D-NEXT: fmin.s $fa2, $fa2, $fa1 -+; LA64D-NEXT: movfr2gr.s $a1, $fa2 -+; LA64D-NEXT: movfr2gr.s $a2, $fa0 -+; LA64D-NEXT: .LBB10_3: # %atomicrmw.start -+; LA64D-NEXT: # Parent Loop BB10_1 Depth=1 -+; LA64D-NEXT: # => This Inner Loop Header: Depth=2 -+; LA64D-NEXT: ll.w $a3, $a0, 0 -+; LA64D-NEXT: bne $a3, $a2, .LBB10_5 -+; LA64D-NEXT: # %bb.4: # %atomicrmw.start -+; LA64D-NEXT: # in Loop: Header=BB10_3 Depth=2 -+; LA64D-NEXT: move $a4, $a1 -+; LA64D-NEXT: sc.w $a4, $a0, 0 -+; LA64D-NEXT: beqz $a4, .LBB10_3 -+; LA64D-NEXT: b .LBB10_6 -+; LA64D-NEXT: .LBB10_5: # %atomicrmw.start -+; LA64D-NEXT: # in Loop: Header=BB10_1 Depth=1 -+; LA64D-NEXT: dbar 1792 -+; LA64D-NEXT: .LBB10_6: # %atomicrmw.start -+; LA64D-NEXT: # in Loop: Header=BB10_1 Depth=1 -+; LA64D-NEXT: movgr2fr.w $fa0, $a3 -+; LA64D-NEXT: addi.w $a1, $a2, 0 -+; LA64D-NEXT: bne $a3, $a1, .LBB10_1 -+; LA64D-NEXT: # %bb.2: # %atomicrmw.end -+; LA64D-NEXT: ret -+ %v = atomicrmw fmin ptr %p, float 1.0 release, align 4 -+ ret float %v -+} -+ -+define float @float_fmax_release(ptr %p) nounwind { -+; LA64F-LABEL: float_fmax_release: -+; LA64F: # %bb.0: -+; LA64F-NEXT: fld.s $fa0, $a0, 0 -+; LA64F-NEXT: addi.w $a1, $zero, 1 -+; LA64F-NEXT: movgr2fr.w $fa1, $a1 -+; LA64F-NEXT: ffint.s.w $fa1, $fa1 -+; LA64F-NEXT: .p2align 4, , 16 -+; LA64F-NEXT: .LBB11_1: # %atomicrmw.start -+; LA64F-NEXT: # =>This Loop Header: Depth=1 -+; LA64F-NEXT: # Child Loop BB11_3 Depth 2 -+; LA64F-NEXT: fmax.s $fa2, $fa0, $fa0 -+; LA64F-NEXT: fmax.s $fa2, $fa2, $fa1 -+; LA64F-NEXT: movfr2gr.s $a1, $fa2 -+; LA64F-NEXT: movfr2gr.s $a2, $fa0 -+; LA64F-NEXT: .LBB11_3: # %atomicrmw.start -+; LA64F-NEXT: # Parent Loop BB11_1 Depth=1 -+; LA64F-NEXT: # => This Inner Loop Header: Depth=2 -+; LA64F-NEXT: ll.w $a3, $a0, 0 -+; LA64F-NEXT: bne $a3, $a2, .LBB11_5 -+; LA64F-NEXT: # %bb.4: # %atomicrmw.start -+; LA64F-NEXT: # in Loop: Header=BB11_3 Depth=2 -+; LA64F-NEXT: move $a4, $a1 -+; LA64F-NEXT: sc.w $a4, $a0, 0 -+; LA64F-NEXT: beqz $a4, .LBB11_3 -+; LA64F-NEXT: b .LBB11_6 -+; LA64F-NEXT: .LBB11_5: # %atomicrmw.start -+; LA64F-NEXT: # in Loop: Header=BB11_1 Depth=1 -+; LA64F-NEXT: dbar 1792 -+; LA64F-NEXT: .LBB11_6: # %atomicrmw.start -+; LA64F-NEXT: # in Loop: Header=BB11_1 Depth=1 -+; LA64F-NEXT: movgr2fr.w $fa0, $a3 -+; LA64F-NEXT: addi.w $a1, $a2, 0 -+; LA64F-NEXT: bne $a3, $a1, .LBB11_1 -+; LA64F-NEXT: # %bb.2: # %atomicrmw.end -+; LA64F-NEXT: ret -+; -+; LA64D-LABEL: float_fmax_release: -+; LA64D: # %bb.0: -+; LA64D-NEXT: fld.s $fa0, $a0, 0 -+; LA64D-NEXT: addi.w $a1, $zero, 1 -+; LA64D-NEXT: movgr2fr.w $fa1, $a1 -+; LA64D-NEXT: ffint.s.w $fa1, $fa1 -+; LA64D-NEXT: .p2align 4, , 16 -+; LA64D-NEXT: .LBB11_1: # %atomicrmw.start -+; LA64D-NEXT: # =>This Loop Header: Depth=1 -+; LA64D-NEXT: # Child Loop BB11_3 Depth 2 -+; LA64D-NEXT: fmax.s $fa2, $fa0, $fa0 -+; LA64D-NEXT: fmax.s $fa2, $fa2, $fa1 -+; LA64D-NEXT: movfr2gr.s $a1, $fa2 -+; LA64D-NEXT: movfr2gr.s $a2, $fa0 -+; LA64D-NEXT: .LBB11_3: # %atomicrmw.start -+; LA64D-NEXT: # Parent Loop BB11_1 Depth=1 -+; LA64D-NEXT: # => This Inner Loop Header: Depth=2 -+; LA64D-NEXT: ll.w $a3, $a0, 0 -+; LA64D-NEXT: bne $a3, $a2, .LBB11_5 -+; LA64D-NEXT: # %bb.4: # %atomicrmw.start -+; LA64D-NEXT: # in Loop: Header=BB11_3 Depth=2 -+; LA64D-NEXT: move $a4, $a1 -+; LA64D-NEXT: sc.w $a4, $a0, 0 -+; LA64D-NEXT: beqz $a4, .LBB11_3 -+; LA64D-NEXT: b .LBB11_6 -+; LA64D-NEXT: .LBB11_5: # %atomicrmw.start -+; LA64D-NEXT: # in Loop: Header=BB11_1 Depth=1 -+; LA64D-NEXT: dbar 1792 -+; LA64D-NEXT: .LBB11_6: # %atomicrmw.start -+; LA64D-NEXT: # in Loop: Header=BB11_1 Depth=1 -+; LA64D-NEXT: movgr2fr.w $fa0, $a3 -+; LA64D-NEXT: addi.w $a1, $a2, 0 -+; LA64D-NEXT: bne $a3, $a1, .LBB11_1 -+; LA64D-NEXT: # %bb.2: # %atomicrmw.end -+; LA64D-NEXT: ret -+ %v = atomicrmw fmax ptr %p, float 1.0 release, align 4 -+ ret float %v -+} -+ -+define double @double_fadd_release(ptr %p) nounwind { -+; LA64F-LABEL: double_fadd_release: -+; LA64F: # %bb.0: -+; LA64F-NEXT: addi.d $sp, $sp, -80 -+; LA64F-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill -+; LA64F-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill -+; LA64F-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill -+; LA64F-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill -+; LA64F-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill -+; LA64F-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill -+; LA64F-NEXT: st.d $s4, $sp, 24 # 8-byte Folded Spill -+; LA64F-NEXT: move $fp, $a0 -+; LA64F-NEXT: ld.d $a0, $a0, 0 -+; LA64F-NEXT: lu52i.d $s0, $zero, 1023 -+; LA64F-NEXT: ori $s1, $zero, 8 -+; LA64F-NEXT: addi.d $s2, $sp, 16 -+; LA64F-NEXT: addi.d $s3, $sp, 8 -+; LA64F-NEXT: ori $s4, $zero, 3 -+; LA64F-NEXT: .p2align 4, , 16 -+; LA64F-NEXT: .LBB12_1: # %atomicrmw.start -+; LA64F-NEXT: # =>This Inner Loop Header: Depth=1 -+; LA64F-NEXT: st.d $a0, $sp, 16 -+; LA64F-NEXT: move $a1, $s0 -+; LA64F-NEXT: bl %plt(__adddf3) -+; LA64F-NEXT: st.d $a0, $sp, 8 -+; LA64F-NEXT: move $a0, $s1 -+; LA64F-NEXT: move $a1, $fp -+; LA64F-NEXT: move $a2, $s2 -+; LA64F-NEXT: move $a3, $s3 -+; LA64F-NEXT: move $a4, $s4 -+; LA64F-NEXT: move $a5, $zero -+; LA64F-NEXT: bl %plt(__atomic_compare_exchange) -+; LA64F-NEXT: move $a1, $a0 -+; LA64F-NEXT: ld.d $a0, $sp, 16 -+; LA64F-NEXT: beqz $a1, .LBB12_1 -+; LA64F-NEXT: # %bb.2: # %atomicrmw.end -+; LA64F-NEXT: ld.d $s4, $sp, 24 # 8-byte Folded Reload -+; LA64F-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload -+; LA64F-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload -+; LA64F-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload -+; LA64F-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload -+; LA64F-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload -+; LA64F-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload -+; LA64F-NEXT: addi.d $sp, $sp, 80 -+; LA64F-NEXT: ret -+; -+; LA64D-LABEL: double_fadd_release: -+; LA64D: # %bb.0: -+; LA64D-NEXT: addi.d $sp, $sp, -80 -+; LA64D-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill -+; LA64D-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill -+; LA64D-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill -+; LA64D-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill -+; LA64D-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill -+; LA64D-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill -+; LA64D-NEXT: fst.d $fs0, $sp, 24 # 8-byte Folded Spill -+; LA64D-NEXT: move $fp, $a0 -+; LA64D-NEXT: fld.d $fa0, $a0, 0 -+; LA64D-NEXT: addi.d $a0, $zero, 1 -+; LA64D-NEXT: movgr2fr.d $fa1, $a0 -+; LA64D-NEXT: ffint.d.l $fs0, $fa1 -+; LA64D-NEXT: ori $s0, $zero, 8 -+; LA64D-NEXT: addi.d $s1, $sp, 16 -+; LA64D-NEXT: addi.d $s2, $sp, 8 -+; LA64D-NEXT: ori $s3, $zero, 3 -+; LA64D-NEXT: .p2align 4, , 16 -+; LA64D-NEXT: .LBB12_1: # %atomicrmw.start -+; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 -+; LA64D-NEXT: fst.d $fa0, $sp, 16 -+; LA64D-NEXT: fadd.d $fa0, $fa0, $fs0 -+; LA64D-NEXT: fst.d $fa0, $sp, 8 -+; LA64D-NEXT: move $a0, $s0 -+; LA64D-NEXT: move $a1, $fp -+; LA64D-NEXT: move $a2, $s1 -+; LA64D-NEXT: move $a3, $s2 -+; LA64D-NEXT: move $a4, $s3 -+; LA64D-NEXT: move $a5, $zero -+; LA64D-NEXT: bl %plt(__atomic_compare_exchange) -+; LA64D-NEXT: fld.d $fa0, $sp, 16 -+; LA64D-NEXT: beqz $a0, .LBB12_1 -+; LA64D-NEXT: # %bb.2: # %atomicrmw.end -+; LA64D-NEXT: fld.d $fs0, $sp, 24 # 8-byte Folded Reload -+; LA64D-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload -+; LA64D-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload -+; LA64D-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload -+; LA64D-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload -+; LA64D-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload -+; LA64D-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload -+; LA64D-NEXT: addi.d $sp, $sp, 80 -+; LA64D-NEXT: ret -+ %v = atomicrmw fadd ptr %p, double 1.0 release, align 4 -+ ret double %v -+} -+ -+define double @double_fsub_release(ptr %p) nounwind { -+; LA64F-LABEL: double_fsub_release: -+; LA64F: # %bb.0: -+; LA64F-NEXT: addi.d $sp, $sp, -80 -+; LA64F-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill -+; LA64F-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill -+; LA64F-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill -+; LA64F-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill -+; LA64F-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill -+; LA64F-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill -+; LA64F-NEXT: st.d $s4, $sp, 24 # 8-byte Folded Spill -+; LA64F-NEXT: move $fp, $a0 -+; LA64F-NEXT: ld.d $a0, $a0, 0 -+; LA64F-NEXT: lu52i.d $s0, $zero, -1025 -+; LA64F-NEXT: ori $s1, $zero, 8 -+; LA64F-NEXT: addi.d $s2, $sp, 16 -+; LA64F-NEXT: addi.d $s3, $sp, 8 -+; LA64F-NEXT: ori $s4, $zero, 3 -+; LA64F-NEXT: .p2align 4, , 16 -+; LA64F-NEXT: .LBB13_1: # %atomicrmw.start -+; LA64F-NEXT: # =>This Inner Loop Header: Depth=1 -+; LA64F-NEXT: st.d $a0, $sp, 16 -+; LA64F-NEXT: move $a1, $s0 -+; LA64F-NEXT: bl %plt(__adddf3) -+; LA64F-NEXT: st.d $a0, $sp, 8 -+; LA64F-NEXT: move $a0, $s1 -+; LA64F-NEXT: move $a1, $fp -+; LA64F-NEXT: move $a2, $s2 -+; LA64F-NEXT: move $a3, $s3 -+; LA64F-NEXT: move $a4, $s4 -+; LA64F-NEXT: move $a5, $zero -+; LA64F-NEXT: bl %plt(__atomic_compare_exchange) -+; LA64F-NEXT: move $a1, $a0 -+; LA64F-NEXT: ld.d $a0, $sp, 16 -+; LA64F-NEXT: beqz $a1, .LBB13_1 -+; LA64F-NEXT: # %bb.2: # %atomicrmw.end -+; LA64F-NEXT: ld.d $s4, $sp, 24 # 8-byte Folded Reload -+; LA64F-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload -+; LA64F-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload -+; LA64F-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload -+; LA64F-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload -+; LA64F-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload -+; LA64F-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload -+; LA64F-NEXT: addi.d $sp, $sp, 80 -+; LA64F-NEXT: ret -+; -+; LA64D-LABEL: double_fsub_release: -+; LA64D: # %bb.0: -+; LA64D-NEXT: addi.d $sp, $sp, -80 -+; LA64D-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill -+; LA64D-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill -+; LA64D-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill -+; LA64D-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill -+; LA64D-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill -+; LA64D-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill -+; LA64D-NEXT: fst.d $fs0, $sp, 24 # 8-byte Folded Spill -+; LA64D-NEXT: move $fp, $a0 -+; LA64D-NEXT: fld.d $fa0, $a0, 0 -+; LA64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI13_0) -+; LA64D-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI13_0) -+; LA64D-NEXT: fld.d $fs0, $a0, 0 -+; LA64D-NEXT: ori $s0, $zero, 8 -+; LA64D-NEXT: addi.d $s1, $sp, 16 -+; LA64D-NEXT: addi.d $s2, $sp, 8 -+; LA64D-NEXT: ori $s3, $zero, 3 -+; LA64D-NEXT: .p2align 4, , 16 -+; LA64D-NEXT: .LBB13_1: # %atomicrmw.start -+; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 -+; LA64D-NEXT: fst.d $fa0, $sp, 16 -+; LA64D-NEXT: fadd.d $fa0, $fa0, $fs0 -+; LA64D-NEXT: fst.d $fa0, $sp, 8 -+; LA64D-NEXT: move $a0, $s0 -+; LA64D-NEXT: move $a1, $fp -+; LA64D-NEXT: move $a2, $s1 -+; LA64D-NEXT: move $a3, $s2 -+; LA64D-NEXT: move $a4, $s3 -+; LA64D-NEXT: move $a5, $zero -+; LA64D-NEXT: bl %plt(__atomic_compare_exchange) -+; LA64D-NEXT: fld.d $fa0, $sp, 16 -+; LA64D-NEXT: beqz $a0, .LBB13_1 -+; LA64D-NEXT: # %bb.2: # %atomicrmw.end -+; LA64D-NEXT: fld.d $fs0, $sp, 24 # 8-byte Folded Reload -+; LA64D-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload -+; LA64D-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload -+; LA64D-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload -+; LA64D-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload -+; LA64D-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload -+; LA64D-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload -+; LA64D-NEXT: addi.d $sp, $sp, 80 -+; LA64D-NEXT: ret -+ %v = atomicrmw fsub ptr %p, double 1.0 release, align 4 -+ ret double %v -+} -+ -+define double @double_fmin_release(ptr %p) nounwind { -+; LA64F-LABEL: double_fmin_release: -+; LA64F: # %bb.0: -+; LA64F-NEXT: addi.d $sp, $sp, -80 -+; LA64F-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill -+; LA64F-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill -+; LA64F-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill -+; LA64F-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill -+; LA64F-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill -+; LA64F-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill -+; LA64F-NEXT: st.d $s4, $sp, 24 # 8-byte Folded Spill -+; LA64F-NEXT: move $fp, $a0 -+; LA64F-NEXT: ld.d $a0, $a0, 0 -+; LA64F-NEXT: lu52i.d $s0, $zero, 1023 -+; LA64F-NEXT: ori $s1, $zero, 8 -+; LA64F-NEXT: addi.d $s2, $sp, 16 -+; LA64F-NEXT: addi.d $s3, $sp, 8 -+; LA64F-NEXT: ori $s4, $zero, 3 -+; LA64F-NEXT: .p2align 4, , 16 -+; LA64F-NEXT: .LBB14_1: # %atomicrmw.start -+; LA64F-NEXT: # =>This Inner Loop Header: Depth=1 -+; LA64F-NEXT: st.d $a0, $sp, 16 -+; LA64F-NEXT: move $a1, $s0 -+; LA64F-NEXT: bl %plt(fmin) -+; LA64F-NEXT: st.d $a0, $sp, 8 -+; LA64F-NEXT: move $a0, $s1 -+; LA64F-NEXT: move $a1, $fp -+; LA64F-NEXT: move $a2, $s2 -+; LA64F-NEXT: move $a3, $s3 -+; LA64F-NEXT: move $a4, $s4 -+; LA64F-NEXT: move $a5, $zero -+; LA64F-NEXT: bl %plt(__atomic_compare_exchange) -+; LA64F-NEXT: move $a1, $a0 -+; LA64F-NEXT: ld.d $a0, $sp, 16 -+; LA64F-NEXT: beqz $a1, .LBB14_1 -+; LA64F-NEXT: # %bb.2: # %atomicrmw.end -+; LA64F-NEXT: ld.d $s4, $sp, 24 # 8-byte Folded Reload -+; LA64F-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload -+; LA64F-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload -+; LA64F-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload -+; LA64F-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload -+; LA64F-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload -+; LA64F-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload -+; LA64F-NEXT: addi.d $sp, $sp, 80 -+; LA64F-NEXT: ret -+; -+; LA64D-LABEL: double_fmin_release: -+; LA64D: # %bb.0: -+; LA64D-NEXT: addi.d $sp, $sp, -80 -+; LA64D-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill -+; LA64D-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill -+; LA64D-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill -+; LA64D-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill -+; LA64D-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill -+; LA64D-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill -+; LA64D-NEXT: fst.d $fs0, $sp, 24 # 8-byte Folded Spill -+; LA64D-NEXT: move $fp, $a0 -+; LA64D-NEXT: fld.d $fa0, $a0, 0 -+; LA64D-NEXT: addi.d $a0, $zero, 1 -+; LA64D-NEXT: movgr2fr.d $fa1, $a0 -+; LA64D-NEXT: ffint.d.l $fs0, $fa1 -+; LA64D-NEXT: ori $s0, $zero, 8 -+; LA64D-NEXT: addi.d $s1, $sp, 16 -+; LA64D-NEXT: addi.d $s2, $sp, 8 -+; LA64D-NEXT: ori $s3, $zero, 3 -+; LA64D-NEXT: .p2align 4, , 16 -+; LA64D-NEXT: .LBB14_1: # %atomicrmw.start -+; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 -+; LA64D-NEXT: fst.d $fa0, $sp, 16 -+; LA64D-NEXT: fmax.d $fa0, $fa0, $fa0 -+; LA64D-NEXT: fmin.d $fa0, $fa0, $fs0 -+; LA64D-NEXT: fst.d $fa0, $sp, 8 -+; LA64D-NEXT: move $a0, $s0 -+; LA64D-NEXT: move $a1, $fp -+; LA64D-NEXT: move $a2, $s1 -+; LA64D-NEXT: move $a3, $s2 -+; LA64D-NEXT: move $a4, $s3 -+; LA64D-NEXT: move $a5, $zero -+; LA64D-NEXT: bl %plt(__atomic_compare_exchange) -+; LA64D-NEXT: fld.d $fa0, $sp, 16 -+; LA64D-NEXT: beqz $a0, .LBB14_1 -+; LA64D-NEXT: # %bb.2: # %atomicrmw.end -+; LA64D-NEXT: fld.d $fs0, $sp, 24 # 8-byte Folded Reload -+; LA64D-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload -+; LA64D-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload -+; LA64D-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload -+; LA64D-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload -+; LA64D-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload -+; LA64D-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload -+; LA64D-NEXT: addi.d $sp, $sp, 80 -+; LA64D-NEXT: ret -+ %v = atomicrmw fmin ptr %p, double 1.0 release, align 4 -+ ret double %v -+} -+ -+define double @double_fmax_release(ptr %p) nounwind { -+; LA64F-LABEL: double_fmax_release: -+; LA64F: # %bb.0: -+; LA64F-NEXT: addi.d $sp, $sp, -80 -+; LA64F-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill -+; LA64F-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill -+; LA64F-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill -+; LA64F-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill -+; LA64F-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill -+; LA64F-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill -+; LA64F-NEXT: st.d $s4, $sp, 24 # 8-byte Folded Spill -+; LA64F-NEXT: move $fp, $a0 -+; LA64F-NEXT: ld.d $a0, $a0, 0 -+; LA64F-NEXT: lu52i.d $s0, $zero, 1023 -+; LA64F-NEXT: ori $s1, $zero, 8 -+; LA64F-NEXT: addi.d $s2, $sp, 16 -+; LA64F-NEXT: addi.d $s3, $sp, 8 -+; LA64F-NEXT: ori $s4, $zero, 3 -+; LA64F-NEXT: .p2align 4, , 16 -+; LA64F-NEXT: .LBB15_1: # %atomicrmw.start -+; LA64F-NEXT: # =>This Inner Loop Header: Depth=1 -+; LA64F-NEXT: st.d $a0, $sp, 16 -+; LA64F-NEXT: move $a1, $s0 -+; LA64F-NEXT: bl %plt(fmax) -+; LA64F-NEXT: st.d $a0, $sp, 8 -+; LA64F-NEXT: move $a0, $s1 -+; LA64F-NEXT: move $a1, $fp -+; LA64F-NEXT: move $a2, $s2 -+; LA64F-NEXT: move $a3, $s3 -+; LA64F-NEXT: move $a4, $s4 -+; LA64F-NEXT: move $a5, $zero -+; LA64F-NEXT: bl %plt(__atomic_compare_exchange) -+; LA64F-NEXT: move $a1, $a0 -+; LA64F-NEXT: ld.d $a0, $sp, 16 -+; LA64F-NEXT: beqz $a1, .LBB15_1 -+; LA64F-NEXT: # %bb.2: # %atomicrmw.end -+; LA64F-NEXT: ld.d $s4, $sp, 24 # 8-byte Folded Reload -+; LA64F-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload -+; LA64F-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload -+; LA64F-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload -+; LA64F-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload -+; LA64F-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload -+; LA64F-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload -+; LA64F-NEXT: addi.d $sp, $sp, 80 -+; LA64F-NEXT: ret -+; -+; LA64D-LABEL: double_fmax_release: -+; LA64D: # %bb.0: -+; LA64D-NEXT: addi.d $sp, $sp, -80 -+; LA64D-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill -+; LA64D-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill -+; LA64D-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill -+; LA64D-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill -+; LA64D-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill -+; LA64D-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill -+; LA64D-NEXT: fst.d $fs0, $sp, 24 # 8-byte Folded Spill -+; LA64D-NEXT: move $fp, $a0 -+; LA64D-NEXT: fld.d $fa0, $a0, 0 -+; LA64D-NEXT: addi.d $a0, $zero, 1 -+; LA64D-NEXT: movgr2fr.d $fa1, $a0 -+; LA64D-NEXT: ffint.d.l $fs0, $fa1 -+; LA64D-NEXT: ori $s0, $zero, 8 -+; LA64D-NEXT: addi.d $s1, $sp, 16 -+; LA64D-NEXT: addi.d $s2, $sp, 8 -+; LA64D-NEXT: ori $s3, $zero, 3 -+; LA64D-NEXT: .p2align 4, , 16 -+; LA64D-NEXT: .LBB15_1: # %atomicrmw.start -+; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 -+; LA64D-NEXT: fst.d $fa0, $sp, 16 -+; LA64D-NEXT: fmax.d $fa0, $fa0, $fa0 -+; LA64D-NEXT: fmax.d $fa0, $fa0, $fs0 -+; LA64D-NEXT: fst.d $fa0, $sp, 8 -+; LA64D-NEXT: move $a0, $s0 -+; LA64D-NEXT: move $a1, $fp -+; LA64D-NEXT: move $a2, $s1 -+; LA64D-NEXT: move $a3, $s2 -+; LA64D-NEXT: move $a4, $s3 -+; LA64D-NEXT: move $a5, $zero -+; LA64D-NEXT: bl %plt(__atomic_compare_exchange) -+; LA64D-NEXT: fld.d $fa0, $sp, 16 -+; LA64D-NEXT: beqz $a0, .LBB15_1 -+; LA64D-NEXT: # %bb.2: # %atomicrmw.end -+; LA64D-NEXT: fld.d $fs0, $sp, 24 # 8-byte Folded Reload -+; LA64D-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload -+; LA64D-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload -+; LA64D-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload -+; LA64D-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload -+; LA64D-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload -+; LA64D-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload -+; LA64D-NEXT: addi.d $sp, $sp, 80 -+; LA64D-NEXT: ret -+ %v = atomicrmw fmax ptr %p, double 1.0 release, align 4 -+ ret double %v -+} -+ -+define float @float_fadd_acq_rel(ptr %p) nounwind { -+; LA64F-LABEL: float_fadd_acq_rel: -+; LA64F: # %bb.0: -+; LA64F-NEXT: fld.s $fa0, $a0, 0 -+; LA64F-NEXT: addi.w $a1, $zero, 1 -+; LA64F-NEXT: movgr2fr.w $fa1, $a1 -+; LA64F-NEXT: ffint.s.w $fa1, $fa1 -+; LA64F-NEXT: .p2align 4, , 16 -+; LA64F-NEXT: .LBB16_1: # %atomicrmw.start -+; LA64F-NEXT: # =>This Loop Header: Depth=1 -+; LA64F-NEXT: # Child Loop BB16_3 Depth 2 -+; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 -+; LA64F-NEXT: movfr2gr.s $a1, $fa2 -+; LA64F-NEXT: movfr2gr.s $a2, $fa0 -+; LA64F-NEXT: .LBB16_3: # %atomicrmw.start -+; LA64F-NEXT: # Parent Loop BB16_1 Depth=1 -+; LA64F-NEXT: # => This Inner Loop Header: Depth=2 -+; LA64F-NEXT: ll.w $a3, $a0, 0 -+; LA64F-NEXT: bne $a3, $a2, .LBB16_5 -+; LA64F-NEXT: # %bb.4: # %atomicrmw.start -+; LA64F-NEXT: # in Loop: Header=BB16_3 Depth=2 -+; LA64F-NEXT: move $a4, $a1 -+; LA64F-NEXT: sc.w $a4, $a0, 0 -+; LA64F-NEXT: beqz $a4, .LBB16_3 -+; LA64F-NEXT: b .LBB16_6 -+; LA64F-NEXT: .LBB16_5: # %atomicrmw.start -+; LA64F-NEXT: # in Loop: Header=BB16_1 Depth=1 -+; LA64F-NEXT: dbar 0 -+; LA64F-NEXT: .LBB16_6: # %atomicrmw.start -+; LA64F-NEXT: # in Loop: Header=BB16_1 Depth=1 -+; LA64F-NEXT: movgr2fr.w $fa0, $a3 -+; LA64F-NEXT: addi.w $a1, $a2, 0 -+; LA64F-NEXT: bne $a3, $a1, .LBB16_1 -+; LA64F-NEXT: # %bb.2: # %atomicrmw.end -+; LA64F-NEXT: ret -+; -+; LA64D-LABEL: float_fadd_acq_rel: -+; LA64D: # %bb.0: -+; LA64D-NEXT: fld.s $fa0, $a0, 0 -+; LA64D-NEXT: addi.w $a1, $zero, 1 -+; LA64D-NEXT: movgr2fr.w $fa1, $a1 -+; LA64D-NEXT: ffint.s.w $fa1, $fa1 -+; LA64D-NEXT: .p2align 4, , 16 -+; LA64D-NEXT: .LBB16_1: # %atomicrmw.start -+; LA64D-NEXT: # =>This Loop Header: Depth=1 -+; LA64D-NEXT: # Child Loop BB16_3 Depth 2 -+; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 -+; LA64D-NEXT: movfr2gr.s $a1, $fa2 -+; LA64D-NEXT: movfr2gr.s $a2, $fa0 -+; LA64D-NEXT: .LBB16_3: # %atomicrmw.start -+; LA64D-NEXT: # Parent Loop BB16_1 Depth=1 -+; LA64D-NEXT: # => This Inner Loop Header: Depth=2 -+; LA64D-NEXT: ll.w $a3, $a0, 0 -+; LA64D-NEXT: bne $a3, $a2, .LBB16_5 -+; LA64D-NEXT: # %bb.4: # %atomicrmw.start -+; LA64D-NEXT: # in Loop: Header=BB16_3 Depth=2 -+; LA64D-NEXT: move $a4, $a1 -+; LA64D-NEXT: sc.w $a4, $a0, 0 -+; LA64D-NEXT: beqz $a4, .LBB16_3 -+; LA64D-NEXT: b .LBB16_6 -+; LA64D-NEXT: .LBB16_5: # %atomicrmw.start -+; LA64D-NEXT: # in Loop: Header=BB16_1 Depth=1 -+; LA64D-NEXT: dbar 0 -+; LA64D-NEXT: .LBB16_6: # %atomicrmw.start -+; LA64D-NEXT: # in Loop: Header=BB16_1 Depth=1 -+; LA64D-NEXT: movgr2fr.w $fa0, $a3 -+; LA64D-NEXT: addi.w $a1, $a2, 0 -+; LA64D-NEXT: bne $a3, $a1, .LBB16_1 -+; LA64D-NEXT: # %bb.2: # %atomicrmw.end -+; LA64D-NEXT: ret -+ %v = atomicrmw fadd ptr %p, float 1.0 acq_rel, align 4 -+ ret float %v -+} -+ -+define float @float_fsub_acq_rel(ptr %p) nounwind { -+; LA64F-LABEL: float_fsub_acq_rel: -+; LA64F: # %bb.0: -+; LA64F-NEXT: fld.s $fa0, $a0, 0 -+; LA64F-NEXT: pcalau12i $a1, %pc_hi20(.LCPI17_0) -+; LA64F-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI17_0) -+; LA64F-NEXT: fld.s $fa1, $a1, 0 -+; LA64F-NEXT: .p2align 4, , 16 -+; LA64F-NEXT: .LBB17_1: # %atomicrmw.start -+; LA64F-NEXT: # =>This Loop Header: Depth=1 -+; LA64F-NEXT: # Child Loop BB17_3 Depth 2 -+; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 -+; LA64F-NEXT: movfr2gr.s $a1, $fa2 -+; LA64F-NEXT: movfr2gr.s $a2, $fa0 -+; LA64F-NEXT: .LBB17_3: # %atomicrmw.start -+; LA64F-NEXT: # Parent Loop BB17_1 Depth=1 -+; LA64F-NEXT: # => This Inner Loop Header: Depth=2 -+; LA64F-NEXT: ll.w $a3, $a0, 0 -+; LA64F-NEXT: bne $a3, $a2, .LBB17_5 -+; LA64F-NEXT: # %bb.4: # %atomicrmw.start -+; LA64F-NEXT: # in Loop: Header=BB17_3 Depth=2 -+; LA64F-NEXT: move $a4, $a1 -+; LA64F-NEXT: sc.w $a4, $a0, 0 -+; LA64F-NEXT: beqz $a4, .LBB17_3 -+; LA64F-NEXT: b .LBB17_6 -+; LA64F-NEXT: .LBB17_5: # %atomicrmw.start -+; LA64F-NEXT: # in Loop: Header=BB17_1 Depth=1 -+; LA64F-NEXT: dbar 0 -+; LA64F-NEXT: .LBB17_6: # %atomicrmw.start -+; LA64F-NEXT: # in Loop: Header=BB17_1 Depth=1 -+; LA64F-NEXT: movgr2fr.w $fa0, $a3 -+; LA64F-NEXT: addi.w $a1, $a2, 0 -+; LA64F-NEXT: bne $a3, $a1, .LBB17_1 -+; LA64F-NEXT: # %bb.2: # %atomicrmw.end -+; LA64F-NEXT: ret -+; -+; LA64D-LABEL: float_fsub_acq_rel: -+; LA64D: # %bb.0: -+; LA64D-NEXT: fld.s $fa0, $a0, 0 -+; LA64D-NEXT: pcalau12i $a1, %pc_hi20(.LCPI17_0) -+; LA64D-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI17_0) -+; LA64D-NEXT: fld.s $fa1, $a1, 0 -+; LA64D-NEXT: .p2align 4, , 16 -+; LA64D-NEXT: .LBB17_1: # %atomicrmw.start -+; LA64D-NEXT: # =>This Loop Header: Depth=1 -+; LA64D-NEXT: # Child Loop BB17_3 Depth 2 -+; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 -+; LA64D-NEXT: movfr2gr.s $a1, $fa2 -+; LA64D-NEXT: movfr2gr.s $a2, $fa0 -+; LA64D-NEXT: .LBB17_3: # %atomicrmw.start -+; LA64D-NEXT: # Parent Loop BB17_1 Depth=1 -+; LA64D-NEXT: # => This Inner Loop Header: Depth=2 -+; LA64D-NEXT: ll.w $a3, $a0, 0 -+; LA64D-NEXT: bne $a3, $a2, .LBB17_5 -+; LA64D-NEXT: # %bb.4: # %atomicrmw.start -+; LA64D-NEXT: # in Loop: Header=BB17_3 Depth=2 -+; LA64D-NEXT: move $a4, $a1 -+; LA64D-NEXT: sc.w $a4, $a0, 0 -+; LA64D-NEXT: beqz $a4, .LBB17_3 -+; LA64D-NEXT: b .LBB17_6 -+; LA64D-NEXT: .LBB17_5: # %atomicrmw.start -+; LA64D-NEXT: # in Loop: Header=BB17_1 Depth=1 -+; LA64D-NEXT: dbar 0 -+; LA64D-NEXT: .LBB17_6: # %atomicrmw.start -+; LA64D-NEXT: # in Loop: Header=BB17_1 Depth=1 -+; LA64D-NEXT: movgr2fr.w $fa0, $a3 -+; LA64D-NEXT: addi.w $a1, $a2, 0 -+; LA64D-NEXT: bne $a3, $a1, .LBB17_1 -+; LA64D-NEXT: # %bb.2: # %atomicrmw.end -+; LA64D-NEXT: ret -+ %v = atomicrmw fsub ptr %p, float 1.0 acq_rel, align 4 -+ ret float %v -+} -+ -+define float @float_fmin_acq_rel(ptr %p) nounwind { -+; LA64F-LABEL: float_fmin_acq_rel: -+; LA64F: # %bb.0: -+; LA64F-NEXT: fld.s $fa0, $a0, 0 -+; LA64F-NEXT: addi.w $a1, $zero, 1 -+; LA64F-NEXT: movgr2fr.w $fa1, $a1 -+; LA64F-NEXT: ffint.s.w $fa1, $fa1 -+; LA64F-NEXT: .p2align 4, , 16 -+; LA64F-NEXT: .LBB18_1: # %atomicrmw.start -+; LA64F-NEXT: # =>This Loop Header: Depth=1 -+; LA64F-NEXT: # Child Loop BB18_3 Depth 2 -+; LA64F-NEXT: fmax.s $fa2, $fa0, $fa0 -+; LA64F-NEXT: fmin.s $fa2, $fa2, $fa1 -+; LA64F-NEXT: movfr2gr.s $a1, $fa2 -+; LA64F-NEXT: movfr2gr.s $a2, $fa0 -+; LA64F-NEXT: .LBB18_3: # %atomicrmw.start -+; LA64F-NEXT: # Parent Loop BB18_1 Depth=1 -+; LA64F-NEXT: # => This Inner Loop Header: Depth=2 -+; LA64F-NEXT: ll.w $a3, $a0, 0 -+; LA64F-NEXT: bne $a3, $a2, .LBB18_5 -+; LA64F-NEXT: # %bb.4: # %atomicrmw.start -+; LA64F-NEXT: # in Loop: Header=BB18_3 Depth=2 -+; LA64F-NEXT: move $a4, $a1 -+; LA64F-NEXT: sc.w $a4, $a0, 0 -+; LA64F-NEXT: beqz $a4, .LBB18_3 -+; LA64F-NEXT: b .LBB18_6 -+; LA64F-NEXT: .LBB18_5: # %atomicrmw.start -+; LA64F-NEXT: # in Loop: Header=BB18_1 Depth=1 -+; LA64F-NEXT: dbar 0 -+; LA64F-NEXT: .LBB18_6: # %atomicrmw.start -+; LA64F-NEXT: # in Loop: Header=BB18_1 Depth=1 -+; LA64F-NEXT: movgr2fr.w $fa0, $a3 -+; LA64F-NEXT: addi.w $a1, $a2, 0 -+; LA64F-NEXT: bne $a3, $a1, .LBB18_1 -+; LA64F-NEXT: # %bb.2: # %atomicrmw.end -+; LA64F-NEXT: ret -+; -+; LA64D-LABEL: float_fmin_acq_rel: -+; LA64D: # %bb.0: -+; LA64D-NEXT: fld.s $fa0, $a0, 0 -+; LA64D-NEXT: addi.w $a1, $zero, 1 -+; LA64D-NEXT: movgr2fr.w $fa1, $a1 -+; LA64D-NEXT: ffint.s.w $fa1, $fa1 -+; LA64D-NEXT: .p2align 4, , 16 -+; LA64D-NEXT: .LBB18_1: # %atomicrmw.start -+; LA64D-NEXT: # =>This Loop Header: Depth=1 -+; LA64D-NEXT: # Child Loop BB18_3 Depth 2 -+; LA64D-NEXT: fmax.s $fa2, $fa0, $fa0 -+; LA64D-NEXT: fmin.s $fa2, $fa2, $fa1 -+; LA64D-NEXT: movfr2gr.s $a1, $fa2 -+; LA64D-NEXT: movfr2gr.s $a2, $fa0 -+; LA64D-NEXT: .LBB18_3: # %atomicrmw.start -+; LA64D-NEXT: # Parent Loop BB18_1 Depth=1 -+; LA64D-NEXT: # => This Inner Loop Header: Depth=2 -+; LA64D-NEXT: ll.w $a3, $a0, 0 -+; LA64D-NEXT: bne $a3, $a2, .LBB18_5 -+; LA64D-NEXT: # %bb.4: # %atomicrmw.start -+; LA64D-NEXT: # in Loop: Header=BB18_3 Depth=2 -+; LA64D-NEXT: move $a4, $a1 -+; LA64D-NEXT: sc.w $a4, $a0, 0 -+; LA64D-NEXT: beqz $a4, .LBB18_3 -+; LA64D-NEXT: b .LBB18_6 -+; LA64D-NEXT: .LBB18_5: # %atomicrmw.start -+; LA64D-NEXT: # in Loop: Header=BB18_1 Depth=1 -+; LA64D-NEXT: dbar 0 -+; LA64D-NEXT: .LBB18_6: # %atomicrmw.start -+; LA64D-NEXT: # in Loop: Header=BB18_1 Depth=1 -+; LA64D-NEXT: movgr2fr.w $fa0, $a3 -+; LA64D-NEXT: addi.w $a1, $a2, 0 -+; LA64D-NEXT: bne $a3, $a1, .LBB18_1 -+; LA64D-NEXT: # %bb.2: # %atomicrmw.end -+; LA64D-NEXT: ret -+ %v = atomicrmw fmin ptr %p, float 1.0 acq_rel, align 4 -+ ret float %v -+} -+ -+define float @float_fmax_acq_rel(ptr %p) nounwind { -+; LA64F-LABEL: float_fmax_acq_rel: -+; LA64F: # %bb.0: -+; LA64F-NEXT: fld.s $fa0, $a0, 0 -+; LA64F-NEXT: addi.w $a1, $zero, 1 -+; LA64F-NEXT: movgr2fr.w $fa1, $a1 -+; LA64F-NEXT: ffint.s.w $fa1, $fa1 -+; LA64F-NEXT: .p2align 4, , 16 -+; LA64F-NEXT: .LBB19_1: # %atomicrmw.start -+; LA64F-NEXT: # =>This Loop Header: Depth=1 -+; LA64F-NEXT: # Child Loop BB19_3 Depth 2 -+; LA64F-NEXT: fmax.s $fa2, $fa0, $fa0 -+; LA64F-NEXT: fmax.s $fa2, $fa2, $fa1 -+; LA64F-NEXT: movfr2gr.s $a1, $fa2 -+; LA64F-NEXT: movfr2gr.s $a2, $fa0 -+; LA64F-NEXT: .LBB19_3: # %atomicrmw.start -+; LA64F-NEXT: # Parent Loop BB19_1 Depth=1 -+; LA64F-NEXT: # => This Inner Loop Header: Depth=2 -+; LA64F-NEXT: ll.w $a3, $a0, 0 -+; LA64F-NEXT: bne $a3, $a2, .LBB19_5 -+; LA64F-NEXT: # %bb.4: # %atomicrmw.start -+; LA64F-NEXT: # in Loop: Header=BB19_3 Depth=2 -+; LA64F-NEXT: move $a4, $a1 -+; LA64F-NEXT: sc.w $a4, $a0, 0 -+; LA64F-NEXT: beqz $a4, .LBB19_3 -+; LA64F-NEXT: b .LBB19_6 -+; LA64F-NEXT: .LBB19_5: # %atomicrmw.start -+; LA64F-NEXT: # in Loop: Header=BB19_1 Depth=1 -+; LA64F-NEXT: dbar 0 -+; LA64F-NEXT: .LBB19_6: # %atomicrmw.start -+; LA64F-NEXT: # in Loop: Header=BB19_1 Depth=1 -+; LA64F-NEXT: movgr2fr.w $fa0, $a3 -+; LA64F-NEXT: addi.w $a1, $a2, 0 -+; LA64F-NEXT: bne $a3, $a1, .LBB19_1 -+; LA64F-NEXT: # %bb.2: # %atomicrmw.end -+; LA64F-NEXT: ret -+; -+; LA64D-LABEL: float_fmax_acq_rel: -+; LA64D: # %bb.0: -+; LA64D-NEXT: fld.s $fa0, $a0, 0 -+; LA64D-NEXT: addi.w $a1, $zero, 1 -+; LA64D-NEXT: movgr2fr.w $fa1, $a1 -+; LA64D-NEXT: ffint.s.w $fa1, $fa1 -+; LA64D-NEXT: .p2align 4, , 16 -+; LA64D-NEXT: .LBB19_1: # %atomicrmw.start -+; LA64D-NEXT: # =>This Loop Header: Depth=1 -+; LA64D-NEXT: # Child Loop BB19_3 Depth 2 -+; LA64D-NEXT: fmax.s $fa2, $fa0, $fa0 -+; LA64D-NEXT: fmax.s $fa2, $fa2, $fa1 -+; LA64D-NEXT: movfr2gr.s $a1, $fa2 -+; LA64D-NEXT: movfr2gr.s $a2, $fa0 -+; LA64D-NEXT: .LBB19_3: # %atomicrmw.start -+; LA64D-NEXT: # Parent Loop BB19_1 Depth=1 -+; LA64D-NEXT: # => This Inner Loop Header: Depth=2 -+; LA64D-NEXT: ll.w $a3, $a0, 0 -+; LA64D-NEXT: bne $a3, $a2, .LBB19_5 -+; LA64D-NEXT: # %bb.4: # %atomicrmw.start -+; LA64D-NEXT: # in Loop: Header=BB19_3 Depth=2 -+; LA64D-NEXT: move $a4, $a1 -+; LA64D-NEXT: sc.w $a4, $a0, 0 -+; LA64D-NEXT: beqz $a4, .LBB19_3 -+; LA64D-NEXT: b .LBB19_6 -+; LA64D-NEXT: .LBB19_5: # %atomicrmw.start -+; LA64D-NEXT: # in Loop: Header=BB19_1 Depth=1 -+; LA64D-NEXT: dbar 0 -+; LA64D-NEXT: .LBB19_6: # %atomicrmw.start -+; LA64D-NEXT: # in Loop: Header=BB19_1 Depth=1 -+; LA64D-NEXT: movgr2fr.w $fa0, $a3 -+; LA64D-NEXT: addi.w $a1, $a2, 0 -+; LA64D-NEXT: bne $a3, $a1, .LBB19_1 -+; LA64D-NEXT: # %bb.2: # %atomicrmw.end -+; LA64D-NEXT: ret -+ %v = atomicrmw fmax ptr %p, float 1.0 acq_rel, align 4 -+ ret float %v -+} -+ -+define double @double_fadd_acq_rel(ptr %p) nounwind { -+; LA64F-LABEL: double_fadd_acq_rel: -+; LA64F: # %bb.0: -+; LA64F-NEXT: addi.d $sp, $sp, -80 -+; LA64F-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill -+; LA64F-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill -+; LA64F-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill -+; LA64F-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill -+; LA64F-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill -+; LA64F-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill -+; LA64F-NEXT: st.d $s4, $sp, 24 # 8-byte Folded Spill -+; LA64F-NEXT: st.d $s5, $sp, 16 # 8-byte Folded Spill -+; LA64F-NEXT: move $fp, $a0 -+; LA64F-NEXT: ld.d $a0, $a0, 0 -+; LA64F-NEXT: lu52i.d $s0, $zero, 1023 -+; LA64F-NEXT: ori $s1, $zero, 8 -+; LA64F-NEXT: addi.d $s2, $sp, 8 -+; LA64F-NEXT: addi.d $s3, $sp, 0 -+; LA64F-NEXT: ori $s4, $zero, 4 -+; LA64F-NEXT: ori $s5, $zero, 2 -+; LA64F-NEXT: .p2align 4, , 16 -+; LA64F-NEXT: .LBB20_1: # %atomicrmw.start -+; LA64F-NEXT: # =>This Inner Loop Header: Depth=1 -+; LA64F-NEXT: st.d $a0, $sp, 8 -+; LA64F-NEXT: move $a1, $s0 -+; LA64F-NEXT: bl %plt(__adddf3) -+; LA64F-NEXT: st.d $a0, $sp, 0 -+; LA64F-NEXT: move $a0, $s1 -+; LA64F-NEXT: move $a1, $fp -+; LA64F-NEXT: move $a2, $s2 -+; LA64F-NEXT: move $a3, $s3 -+; LA64F-NEXT: move $a4, $s4 -+; LA64F-NEXT: move $a5, $s5 -+; LA64F-NEXT: bl %plt(__atomic_compare_exchange) -+; LA64F-NEXT: move $a1, $a0 -+; LA64F-NEXT: ld.d $a0, $sp, 8 -+; LA64F-NEXT: beqz $a1, .LBB20_1 -+; LA64F-NEXT: # %bb.2: # %atomicrmw.end -+; LA64F-NEXT: ld.d $s5, $sp, 16 # 8-byte Folded Reload -+; LA64F-NEXT: ld.d $s4, $sp, 24 # 8-byte Folded Reload -+; LA64F-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload -+; LA64F-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload -+; LA64F-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload -+; LA64F-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload -+; LA64F-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload -+; LA64F-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload -+; LA64F-NEXT: addi.d $sp, $sp, 80 -+; LA64F-NEXT: ret -+; -+; LA64D-LABEL: double_fadd_acq_rel: -+; LA64D: # %bb.0: -+; LA64D-NEXT: addi.d $sp, $sp, -80 -+; LA64D-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill -+; LA64D-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill -+; LA64D-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill -+; LA64D-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill -+; LA64D-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill -+; LA64D-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill -+; LA64D-NEXT: st.d $s4, $sp, 24 # 8-byte Folded Spill -+; LA64D-NEXT: fst.d $fs0, $sp, 16 # 8-byte Folded Spill -+; LA64D-NEXT: move $fp, $a0 -+; LA64D-NEXT: fld.d $fa0, $a0, 0 -+; LA64D-NEXT: addi.d $a0, $zero, 1 -+; LA64D-NEXT: movgr2fr.d $fa1, $a0 -+; LA64D-NEXT: ffint.d.l $fs0, $fa1 -+; LA64D-NEXT: ori $s0, $zero, 8 -+; LA64D-NEXT: addi.d $s1, $sp, 8 -+; LA64D-NEXT: addi.d $s2, $sp, 0 -+; LA64D-NEXT: ori $s3, $zero, 4 -+; LA64D-NEXT: ori $s4, $zero, 2 -+; LA64D-NEXT: .p2align 4, , 16 -+; LA64D-NEXT: .LBB20_1: # %atomicrmw.start -+; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 -+; LA64D-NEXT: fst.d $fa0, $sp, 8 -+; LA64D-NEXT: fadd.d $fa0, $fa0, $fs0 -+; LA64D-NEXT: fst.d $fa0, $sp, 0 -+; LA64D-NEXT: move $a0, $s0 -+; LA64D-NEXT: move $a1, $fp -+; LA64D-NEXT: move $a2, $s1 -+; LA64D-NEXT: move $a3, $s2 -+; LA64D-NEXT: move $a4, $s3 -+; LA64D-NEXT: move $a5, $s4 -+; LA64D-NEXT: bl %plt(__atomic_compare_exchange) -+; LA64D-NEXT: fld.d $fa0, $sp, 8 -+; LA64D-NEXT: beqz $a0, .LBB20_1 -+; LA64D-NEXT: # %bb.2: # %atomicrmw.end -+; LA64D-NEXT: fld.d $fs0, $sp, 16 # 8-byte Folded Reload -+; LA64D-NEXT: ld.d $s4, $sp, 24 # 8-byte Folded Reload -+; LA64D-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload -+; LA64D-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload -+; LA64D-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload -+; LA64D-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload -+; LA64D-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload -+; LA64D-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload -+; LA64D-NEXT: addi.d $sp, $sp, 80 -+; LA64D-NEXT: ret -+ %v = atomicrmw fadd ptr %p, double 1.0 acq_rel, align 4 -+ ret double %v -+} -+ -+define double @double_fsub_acq_rel(ptr %p) nounwind { -+; LA64F-LABEL: double_fsub_acq_rel: -+; LA64F: # %bb.0: -+; LA64F-NEXT: addi.d $sp, $sp, -80 -+; LA64F-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill -+; LA64F-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill -+; LA64F-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill -+; LA64F-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill -+; LA64F-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill -+; LA64F-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill -+; LA64F-NEXT: st.d $s4, $sp, 24 # 8-byte Folded Spill -+; LA64F-NEXT: st.d $s5, $sp, 16 # 8-byte Folded Spill -+; LA64F-NEXT: move $fp, $a0 -+; LA64F-NEXT: ld.d $a0, $a0, 0 -+; LA64F-NEXT: lu52i.d $s0, $zero, -1025 -+; LA64F-NEXT: ori $s1, $zero, 8 -+; LA64F-NEXT: addi.d $s2, $sp, 8 -+; LA64F-NEXT: addi.d $s3, $sp, 0 -+; LA64F-NEXT: ori $s4, $zero, 4 -+; LA64F-NEXT: ori $s5, $zero, 2 -+; LA64F-NEXT: .p2align 4, , 16 -+; LA64F-NEXT: .LBB21_1: # %atomicrmw.start -+; LA64F-NEXT: # =>This Inner Loop Header: Depth=1 -+; LA64F-NEXT: st.d $a0, $sp, 8 -+; LA64F-NEXT: move $a1, $s0 -+; LA64F-NEXT: bl %plt(__adddf3) -+; LA64F-NEXT: st.d $a0, $sp, 0 -+; LA64F-NEXT: move $a0, $s1 -+; LA64F-NEXT: move $a1, $fp -+; LA64F-NEXT: move $a2, $s2 -+; LA64F-NEXT: move $a3, $s3 -+; LA64F-NEXT: move $a4, $s4 -+; LA64F-NEXT: move $a5, $s5 -+; LA64F-NEXT: bl %plt(__atomic_compare_exchange) -+; LA64F-NEXT: move $a1, $a0 -+; LA64F-NEXT: ld.d $a0, $sp, 8 -+; LA64F-NEXT: beqz $a1, .LBB21_1 -+; LA64F-NEXT: # %bb.2: # %atomicrmw.end -+; LA64F-NEXT: ld.d $s5, $sp, 16 # 8-byte Folded Reload -+; LA64F-NEXT: ld.d $s4, $sp, 24 # 8-byte Folded Reload -+; LA64F-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload -+; LA64F-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload -+; LA64F-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload -+; LA64F-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload -+; LA64F-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload -+; LA64F-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload -+; LA64F-NEXT: addi.d $sp, $sp, 80 -+; LA64F-NEXT: ret -+; -+; LA64D-LABEL: double_fsub_acq_rel: -+; LA64D: # %bb.0: -+; LA64D-NEXT: addi.d $sp, $sp, -80 -+; LA64D-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill -+; LA64D-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill -+; LA64D-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill -+; LA64D-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill -+; LA64D-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill -+; LA64D-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill -+; LA64D-NEXT: st.d $s4, $sp, 24 # 8-byte Folded Spill -+; LA64D-NEXT: fst.d $fs0, $sp, 16 # 8-byte Folded Spill -+; LA64D-NEXT: move $fp, $a0 -+; LA64D-NEXT: fld.d $fa0, $a0, 0 -+; LA64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI21_0) -+; LA64D-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI21_0) -+; LA64D-NEXT: fld.d $fs0, $a0, 0 -+; LA64D-NEXT: ori $s0, $zero, 8 -+; LA64D-NEXT: addi.d $s1, $sp, 8 -+; LA64D-NEXT: addi.d $s2, $sp, 0 -+; LA64D-NEXT: ori $s3, $zero, 4 -+; LA64D-NEXT: ori $s4, $zero, 2 -+; LA64D-NEXT: .p2align 4, , 16 -+; LA64D-NEXT: .LBB21_1: # %atomicrmw.start -+; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 -+; LA64D-NEXT: fst.d $fa0, $sp, 8 -+; LA64D-NEXT: fadd.d $fa0, $fa0, $fs0 -+; LA64D-NEXT: fst.d $fa0, $sp, 0 -+; LA64D-NEXT: move $a0, $s0 -+; LA64D-NEXT: move $a1, $fp -+; LA64D-NEXT: move $a2, $s1 -+; LA64D-NEXT: move $a3, $s2 -+; LA64D-NEXT: move $a4, $s3 -+; LA64D-NEXT: move $a5, $s4 -+; LA64D-NEXT: bl %plt(__atomic_compare_exchange) -+; LA64D-NEXT: fld.d $fa0, $sp, 8 -+; LA64D-NEXT: beqz $a0, .LBB21_1 -+; LA64D-NEXT: # %bb.2: # %atomicrmw.end -+; LA64D-NEXT: fld.d $fs0, $sp, 16 # 8-byte Folded Reload -+; LA64D-NEXT: ld.d $s4, $sp, 24 # 8-byte Folded Reload -+; LA64D-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload -+; LA64D-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload -+; LA64D-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload -+; LA64D-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload -+; LA64D-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload -+; LA64D-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload -+; LA64D-NEXT: addi.d $sp, $sp, 80 -+; LA64D-NEXT: ret -+ %v = atomicrmw fsub ptr %p, double 1.0 acq_rel, align 4 -+ ret double %v -+} -+ -+define double @double_fmin_acq_rel(ptr %p) nounwind { -+; LA64F-LABEL: double_fmin_acq_rel: -+; LA64F: # %bb.0: -+; LA64F-NEXT: addi.d $sp, $sp, -80 -+; LA64F-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill -+; LA64F-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill -+; LA64F-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill -+; LA64F-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill -+; LA64F-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill -+; LA64F-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill -+; LA64F-NEXT: st.d $s4, $sp, 24 # 8-byte Folded Spill -+; LA64F-NEXT: st.d $s5, $sp, 16 # 8-byte Folded Spill -+; LA64F-NEXT: move $fp, $a0 -+; LA64F-NEXT: ld.d $a0, $a0, 0 -+; LA64F-NEXT: lu52i.d $s0, $zero, 1023 -+; LA64F-NEXT: ori $s1, $zero, 8 -+; LA64F-NEXT: addi.d $s2, $sp, 8 -+; LA64F-NEXT: addi.d $s3, $sp, 0 -+; LA64F-NEXT: ori $s4, $zero, 4 -+; LA64F-NEXT: ori $s5, $zero, 2 -+; LA64F-NEXT: .p2align 4, , 16 -+; LA64F-NEXT: .LBB22_1: # %atomicrmw.start -+; LA64F-NEXT: # =>This Inner Loop Header: Depth=1 -+; LA64F-NEXT: st.d $a0, $sp, 8 -+; LA64F-NEXT: move $a1, $s0 -+; LA64F-NEXT: bl %plt(fmin) -+; LA64F-NEXT: st.d $a0, $sp, 0 -+; LA64F-NEXT: move $a0, $s1 -+; LA64F-NEXT: move $a1, $fp -+; LA64F-NEXT: move $a2, $s2 -+; LA64F-NEXT: move $a3, $s3 -+; LA64F-NEXT: move $a4, $s4 -+; LA64F-NEXT: move $a5, $s5 -+; LA64F-NEXT: bl %plt(__atomic_compare_exchange) -+; LA64F-NEXT: move $a1, $a0 -+; LA64F-NEXT: ld.d $a0, $sp, 8 -+; LA64F-NEXT: beqz $a1, .LBB22_1 -+; LA64F-NEXT: # %bb.2: # %atomicrmw.end -+; LA64F-NEXT: ld.d $s5, $sp, 16 # 8-byte Folded Reload -+; LA64F-NEXT: ld.d $s4, $sp, 24 # 8-byte Folded Reload -+; LA64F-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload -+; LA64F-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload -+; LA64F-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload -+; LA64F-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload -+; LA64F-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload -+; LA64F-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload -+; LA64F-NEXT: addi.d $sp, $sp, 80 -+; LA64F-NEXT: ret -+; -+; LA64D-LABEL: double_fmin_acq_rel: -+; LA64D: # %bb.0: -+; LA64D-NEXT: addi.d $sp, $sp, -80 -+; LA64D-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill -+; LA64D-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill -+; LA64D-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill -+; LA64D-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill -+; LA64D-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill -+; LA64D-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill -+; LA64D-NEXT: st.d $s4, $sp, 24 # 8-byte Folded Spill -+; LA64D-NEXT: fst.d $fs0, $sp, 16 # 8-byte Folded Spill -+; LA64D-NEXT: move $fp, $a0 -+; LA64D-NEXT: fld.d $fa0, $a0, 0 -+; LA64D-NEXT: addi.d $a0, $zero, 1 -+; LA64D-NEXT: movgr2fr.d $fa1, $a0 -+; LA64D-NEXT: ffint.d.l $fs0, $fa1 -+; LA64D-NEXT: ori $s0, $zero, 8 -+; LA64D-NEXT: addi.d $s1, $sp, 8 -+; LA64D-NEXT: addi.d $s2, $sp, 0 -+; LA64D-NEXT: ori $s3, $zero, 4 -+; LA64D-NEXT: ori $s4, $zero, 2 -+; LA64D-NEXT: .p2align 4, , 16 -+; LA64D-NEXT: .LBB22_1: # %atomicrmw.start -+; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 -+; LA64D-NEXT: fst.d $fa0, $sp, 8 -+; LA64D-NEXT: fmax.d $fa0, $fa0, $fa0 -+; LA64D-NEXT: fmin.d $fa0, $fa0, $fs0 -+; LA64D-NEXT: fst.d $fa0, $sp, 0 -+; LA64D-NEXT: move $a0, $s0 -+; LA64D-NEXT: move $a1, $fp -+; LA64D-NEXT: move $a2, $s1 -+; LA64D-NEXT: move $a3, $s2 -+; LA64D-NEXT: move $a4, $s3 -+; LA64D-NEXT: move $a5, $s4 -+; LA64D-NEXT: bl %plt(__atomic_compare_exchange) -+; LA64D-NEXT: fld.d $fa0, $sp, 8 -+; LA64D-NEXT: beqz $a0, .LBB22_1 -+; LA64D-NEXT: # %bb.2: # %atomicrmw.end -+; LA64D-NEXT: fld.d $fs0, $sp, 16 # 8-byte Folded Reload -+; LA64D-NEXT: ld.d $s4, $sp, 24 # 8-byte Folded Reload -+; LA64D-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload -+; LA64D-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload -+; LA64D-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload -+; LA64D-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload -+; LA64D-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload -+; LA64D-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload -+; LA64D-NEXT: addi.d $sp, $sp, 80 -+; LA64D-NEXT: ret -+ %v = atomicrmw fmin ptr %p, double 1.0 acq_rel, align 4 -+ ret double %v -+} -+ -+define double @double_fmax_acq_rel(ptr %p) nounwind { -+; LA64F-LABEL: double_fmax_acq_rel: -+; LA64F: # %bb.0: -+; LA64F-NEXT: addi.d $sp, $sp, -80 -+; LA64F-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill -+; LA64F-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill -+; LA64F-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill -+; LA64F-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill -+; LA64F-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill -+; LA64F-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill -+; LA64F-NEXT: st.d $s4, $sp, 24 # 8-byte Folded Spill -+; LA64F-NEXT: st.d $s5, $sp, 16 # 8-byte Folded Spill -+; LA64F-NEXT: move $fp, $a0 -+; LA64F-NEXT: ld.d $a0, $a0, 0 -+; LA64F-NEXT: lu52i.d $s0, $zero, 1023 -+; LA64F-NEXT: ori $s1, $zero, 8 -+; LA64F-NEXT: addi.d $s2, $sp, 8 -+; LA64F-NEXT: addi.d $s3, $sp, 0 -+; LA64F-NEXT: ori $s4, $zero, 4 -+; LA64F-NEXT: ori $s5, $zero, 2 -+; LA64F-NEXT: .p2align 4, , 16 -+; LA64F-NEXT: .LBB23_1: # %atomicrmw.start -+; LA64F-NEXT: # =>This Inner Loop Header: Depth=1 -+; LA64F-NEXT: st.d $a0, $sp, 8 -+; LA64F-NEXT: move $a1, $s0 -+; LA64F-NEXT: bl %plt(fmax) -+; LA64F-NEXT: st.d $a0, $sp, 0 -+; LA64F-NEXT: move $a0, $s1 -+; LA64F-NEXT: move $a1, $fp -+; LA64F-NEXT: move $a2, $s2 -+; LA64F-NEXT: move $a3, $s3 -+; LA64F-NEXT: move $a4, $s4 -+; LA64F-NEXT: move $a5, $s5 -+; LA64F-NEXT: bl %plt(__atomic_compare_exchange) -+; LA64F-NEXT: move $a1, $a0 -+; LA64F-NEXT: ld.d $a0, $sp, 8 -+; LA64F-NEXT: beqz $a1, .LBB23_1 -+; LA64F-NEXT: # %bb.2: # %atomicrmw.end -+; LA64F-NEXT: ld.d $s5, $sp, 16 # 8-byte Folded Reload -+; LA64F-NEXT: ld.d $s4, $sp, 24 # 8-byte Folded Reload -+; LA64F-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload -+; LA64F-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload -+; LA64F-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload -+; LA64F-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload -+; LA64F-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload -+; LA64F-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload -+; LA64F-NEXT: addi.d $sp, $sp, 80 -+; LA64F-NEXT: ret -+; -+; LA64D-LABEL: double_fmax_acq_rel: -+; LA64D: # %bb.0: -+; LA64D-NEXT: addi.d $sp, $sp, -80 -+; LA64D-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill -+; LA64D-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill -+; LA64D-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill -+; LA64D-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill -+; LA64D-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill -+; LA64D-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill -+; LA64D-NEXT: st.d $s4, $sp, 24 # 8-byte Folded Spill -+; LA64D-NEXT: fst.d $fs0, $sp, 16 # 8-byte Folded Spill -+; LA64D-NEXT: move $fp, $a0 -+; LA64D-NEXT: fld.d $fa0, $a0, 0 -+; LA64D-NEXT: addi.d $a0, $zero, 1 -+; LA64D-NEXT: movgr2fr.d $fa1, $a0 -+; LA64D-NEXT: ffint.d.l $fs0, $fa1 -+; LA64D-NEXT: ori $s0, $zero, 8 -+; LA64D-NEXT: addi.d $s1, $sp, 8 -+; LA64D-NEXT: addi.d $s2, $sp, 0 -+; LA64D-NEXT: ori $s3, $zero, 4 -+; LA64D-NEXT: ori $s4, $zero, 2 -+; LA64D-NEXT: .p2align 4, , 16 -+; LA64D-NEXT: .LBB23_1: # %atomicrmw.start -+; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 -+; LA64D-NEXT: fst.d $fa0, $sp, 8 -+; LA64D-NEXT: fmax.d $fa0, $fa0, $fa0 -+; LA64D-NEXT: fmax.d $fa0, $fa0, $fs0 -+; LA64D-NEXT: fst.d $fa0, $sp, 0 -+; LA64D-NEXT: move $a0, $s0 -+; LA64D-NEXT: move $a1, $fp -+; LA64D-NEXT: move $a2, $s1 -+; LA64D-NEXT: move $a3, $s2 -+; LA64D-NEXT: move $a4, $s3 -+; LA64D-NEXT: move $a5, $s4 -+; LA64D-NEXT: bl %plt(__atomic_compare_exchange) -+; LA64D-NEXT: fld.d $fa0, $sp, 8 -+; LA64D-NEXT: beqz $a0, .LBB23_1 -+; LA64D-NEXT: # %bb.2: # %atomicrmw.end -+; LA64D-NEXT: fld.d $fs0, $sp, 16 # 8-byte Folded Reload -+; LA64D-NEXT: ld.d $s4, $sp, 24 # 8-byte Folded Reload -+; LA64D-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload -+; LA64D-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload -+; LA64D-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload -+; LA64D-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload -+; LA64D-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload -+; LA64D-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload -+; LA64D-NEXT: addi.d $sp, $sp, 80 -+; LA64D-NEXT: ret -+ %v = atomicrmw fmax ptr %p, double 1.0 acq_rel, align 4 -+ ret double %v -+} -+ -+define float @float_fadd_seq_cst(ptr %p) nounwind { -+; LA64F-LABEL: float_fadd_seq_cst: -+; LA64F: # %bb.0: -+; LA64F-NEXT: fld.s $fa0, $a0, 0 -+; LA64F-NEXT: addi.w $a1, $zero, 1 -+; LA64F-NEXT: movgr2fr.w $fa1, $a1 -+; LA64F-NEXT: ffint.s.w $fa1, $fa1 -+; LA64F-NEXT: .p2align 4, , 16 -+; LA64F-NEXT: .LBB24_1: # %atomicrmw.start -+; LA64F-NEXT: # =>This Loop Header: Depth=1 -+; LA64F-NEXT: # Child Loop BB24_3 Depth 2 -+; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 -+; LA64F-NEXT: movfr2gr.s $a1, $fa2 -+; LA64F-NEXT: movfr2gr.s $a2, $fa0 -+; LA64F-NEXT: .LBB24_3: # %atomicrmw.start -+; LA64F-NEXT: # Parent Loop BB24_1 Depth=1 -+; LA64F-NEXT: # => This Inner Loop Header: Depth=2 -+; LA64F-NEXT: ll.w $a3, $a0, 0 -+; LA64F-NEXT: bne $a3, $a2, .LBB24_5 -+; LA64F-NEXT: # %bb.4: # %atomicrmw.start -+; LA64F-NEXT: # in Loop: Header=BB24_3 Depth=2 -+; LA64F-NEXT: move $a4, $a1 -+; LA64F-NEXT: sc.w $a4, $a0, 0 -+; LA64F-NEXT: beqz $a4, .LBB24_3 -+; LA64F-NEXT: b .LBB24_6 -+; LA64F-NEXT: .LBB24_5: # %atomicrmw.start -+; LA64F-NEXT: # in Loop: Header=BB24_1 Depth=1 -+; LA64F-NEXT: dbar 0 -+; LA64F-NEXT: .LBB24_6: # %atomicrmw.start -+; LA64F-NEXT: # in Loop: Header=BB24_1 Depth=1 -+; LA64F-NEXT: movgr2fr.w $fa0, $a3 -+; LA64F-NEXT: addi.w $a1, $a2, 0 -+; LA64F-NEXT: bne $a3, $a1, .LBB24_1 -+; LA64F-NEXT: # %bb.2: # %atomicrmw.end -+; LA64F-NEXT: ret -+; -+; LA64D-LABEL: float_fadd_seq_cst: -+; LA64D: # %bb.0: -+; LA64D-NEXT: fld.s $fa0, $a0, 0 -+; LA64D-NEXT: addi.w $a1, $zero, 1 -+; LA64D-NEXT: movgr2fr.w $fa1, $a1 -+; LA64D-NEXT: ffint.s.w $fa1, $fa1 -+; LA64D-NEXT: .p2align 4, , 16 -+; LA64D-NEXT: .LBB24_1: # %atomicrmw.start -+; LA64D-NEXT: # =>This Loop Header: Depth=1 -+; LA64D-NEXT: # Child Loop BB24_3 Depth 2 -+; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 -+; LA64D-NEXT: movfr2gr.s $a1, $fa2 -+; LA64D-NEXT: movfr2gr.s $a2, $fa0 -+; LA64D-NEXT: .LBB24_3: # %atomicrmw.start -+; LA64D-NEXT: # Parent Loop BB24_1 Depth=1 -+; LA64D-NEXT: # => This Inner Loop Header: Depth=2 -+; LA64D-NEXT: ll.w $a3, $a0, 0 -+; LA64D-NEXT: bne $a3, $a2, .LBB24_5 -+; LA64D-NEXT: # %bb.4: # %atomicrmw.start -+; LA64D-NEXT: # in Loop: Header=BB24_3 Depth=2 -+; LA64D-NEXT: move $a4, $a1 -+; LA64D-NEXT: sc.w $a4, $a0, 0 -+; LA64D-NEXT: beqz $a4, .LBB24_3 -+; LA64D-NEXT: b .LBB24_6 -+; LA64D-NEXT: .LBB24_5: # %atomicrmw.start -+; LA64D-NEXT: # in Loop: Header=BB24_1 Depth=1 -+; LA64D-NEXT: dbar 0 -+; LA64D-NEXT: .LBB24_6: # %atomicrmw.start -+; LA64D-NEXT: # in Loop: Header=BB24_1 Depth=1 -+; LA64D-NEXT: movgr2fr.w $fa0, $a3 -+; LA64D-NEXT: addi.w $a1, $a2, 0 -+; LA64D-NEXT: bne $a3, $a1, .LBB24_1 -+; LA64D-NEXT: # %bb.2: # %atomicrmw.end -+; LA64D-NEXT: ret -+ %v = atomicrmw fadd ptr %p, float 1.0 seq_cst, align 4 -+ ret float %v -+} -+ -+define float @float_fsub_seq_cst(ptr %p) nounwind { -+; LA64F-LABEL: float_fsub_seq_cst: -+; LA64F: # %bb.0: -+; LA64F-NEXT: fld.s $fa0, $a0, 0 -+; LA64F-NEXT: pcalau12i $a1, %pc_hi20(.LCPI25_0) -+; LA64F-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI25_0) -+; LA64F-NEXT: fld.s $fa1, $a1, 0 -+; LA64F-NEXT: .p2align 4, , 16 -+; LA64F-NEXT: .LBB25_1: # %atomicrmw.start -+; LA64F-NEXT: # =>This Loop Header: Depth=1 -+; LA64F-NEXT: # Child Loop BB25_3 Depth 2 -+; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 -+; LA64F-NEXT: movfr2gr.s $a1, $fa2 -+; LA64F-NEXT: movfr2gr.s $a2, $fa0 -+; LA64F-NEXT: .LBB25_3: # %atomicrmw.start -+; LA64F-NEXT: # Parent Loop BB25_1 Depth=1 -+; LA64F-NEXT: # => This Inner Loop Header: Depth=2 -+; LA64F-NEXT: ll.w $a3, $a0, 0 -+; LA64F-NEXT: bne $a3, $a2, .LBB25_5 -+; LA64F-NEXT: # %bb.4: # %atomicrmw.start -+; LA64F-NEXT: # in Loop: Header=BB25_3 Depth=2 -+; LA64F-NEXT: move $a4, $a1 -+; LA64F-NEXT: sc.w $a4, $a0, 0 -+; LA64F-NEXT: beqz $a4, .LBB25_3 -+; LA64F-NEXT: b .LBB25_6 -+; LA64F-NEXT: .LBB25_5: # %atomicrmw.start -+; LA64F-NEXT: # in Loop: Header=BB25_1 Depth=1 -+; LA64F-NEXT: dbar 0 -+; LA64F-NEXT: .LBB25_6: # %atomicrmw.start -+; LA64F-NEXT: # in Loop: Header=BB25_1 Depth=1 -+; LA64F-NEXT: movgr2fr.w $fa0, $a3 -+; LA64F-NEXT: addi.w $a1, $a2, 0 -+; LA64F-NEXT: bne $a3, $a1, .LBB25_1 -+; LA64F-NEXT: # %bb.2: # %atomicrmw.end -+; LA64F-NEXT: ret -+; -+; LA64D-LABEL: float_fsub_seq_cst: -+; LA64D: # %bb.0: -+; LA64D-NEXT: fld.s $fa0, $a0, 0 -+; LA64D-NEXT: pcalau12i $a1, %pc_hi20(.LCPI25_0) -+; LA64D-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI25_0) -+; LA64D-NEXT: fld.s $fa1, $a1, 0 -+; LA64D-NEXT: .p2align 4, , 16 -+; LA64D-NEXT: .LBB25_1: # %atomicrmw.start -+; LA64D-NEXT: # =>This Loop Header: Depth=1 -+; LA64D-NEXT: # Child Loop BB25_3 Depth 2 -+; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 -+; LA64D-NEXT: movfr2gr.s $a1, $fa2 -+; LA64D-NEXT: movfr2gr.s $a2, $fa0 -+; LA64D-NEXT: .LBB25_3: # %atomicrmw.start -+; LA64D-NEXT: # Parent Loop BB25_1 Depth=1 -+; LA64D-NEXT: # => This Inner Loop Header: Depth=2 -+; LA64D-NEXT: ll.w $a3, $a0, 0 -+; LA64D-NEXT: bne $a3, $a2, .LBB25_5 -+; LA64D-NEXT: # %bb.4: # %atomicrmw.start -+; LA64D-NEXT: # in Loop: Header=BB25_3 Depth=2 -+; LA64D-NEXT: move $a4, $a1 -+; LA64D-NEXT: sc.w $a4, $a0, 0 -+; LA64D-NEXT: beqz $a4, .LBB25_3 -+; LA64D-NEXT: b .LBB25_6 -+; LA64D-NEXT: .LBB25_5: # %atomicrmw.start -+; LA64D-NEXT: # in Loop: Header=BB25_1 Depth=1 -+; LA64D-NEXT: dbar 0 -+; LA64D-NEXT: .LBB25_6: # %atomicrmw.start -+; LA64D-NEXT: # in Loop: Header=BB25_1 Depth=1 -+; LA64D-NEXT: movgr2fr.w $fa0, $a3 -+; LA64D-NEXT: addi.w $a1, $a2, 0 -+; LA64D-NEXT: bne $a3, $a1, .LBB25_1 -+; LA64D-NEXT: # %bb.2: # %atomicrmw.end -+; LA64D-NEXT: ret -+ %v = atomicrmw fsub ptr %p, float 1.0 seq_cst, align 4 -+ ret float %v -+} -+ -+define float @float_fmin_seq_cst(ptr %p) nounwind { -+; LA64F-LABEL: float_fmin_seq_cst: -+; LA64F: # %bb.0: -+; LA64F-NEXT: fld.s $fa0, $a0, 0 -+; LA64F-NEXT: addi.w $a1, $zero, 1 -+; LA64F-NEXT: movgr2fr.w $fa1, $a1 -+; LA64F-NEXT: ffint.s.w $fa1, $fa1 -+; LA64F-NEXT: .p2align 4, , 16 -+; LA64F-NEXT: .LBB26_1: # %atomicrmw.start -+; LA64F-NEXT: # =>This Loop Header: Depth=1 -+; LA64F-NEXT: # Child Loop BB26_3 Depth 2 -+; LA64F-NEXT: fmax.s $fa2, $fa0, $fa0 -+; LA64F-NEXT: fmin.s $fa2, $fa2, $fa1 -+; LA64F-NEXT: movfr2gr.s $a1, $fa2 -+; LA64F-NEXT: movfr2gr.s $a2, $fa0 -+; LA64F-NEXT: .LBB26_3: # %atomicrmw.start -+; LA64F-NEXT: # Parent Loop BB26_1 Depth=1 -+; LA64F-NEXT: # => This Inner Loop Header: Depth=2 -+; LA64F-NEXT: ll.w $a3, $a0, 0 -+; LA64F-NEXT: bne $a3, $a2, .LBB26_5 -+; LA64F-NEXT: # %bb.4: # %atomicrmw.start -+; LA64F-NEXT: # in Loop: Header=BB26_3 Depth=2 -+; LA64F-NEXT: move $a4, $a1 -+; LA64F-NEXT: sc.w $a4, $a0, 0 -+; LA64F-NEXT: beqz $a4, .LBB26_3 -+; LA64F-NEXT: b .LBB26_6 -+; LA64F-NEXT: .LBB26_5: # %atomicrmw.start -+; LA64F-NEXT: # in Loop: Header=BB26_1 Depth=1 -+; LA64F-NEXT: dbar 0 -+; LA64F-NEXT: .LBB26_6: # %atomicrmw.start -+; LA64F-NEXT: # in Loop: Header=BB26_1 Depth=1 -+; LA64F-NEXT: movgr2fr.w $fa0, $a3 -+; LA64F-NEXT: addi.w $a1, $a2, 0 -+; LA64F-NEXT: bne $a3, $a1, .LBB26_1 -+; LA64F-NEXT: # %bb.2: # %atomicrmw.end -+; LA64F-NEXT: ret -+; -+; LA64D-LABEL: float_fmin_seq_cst: -+; LA64D: # %bb.0: -+; LA64D-NEXT: fld.s $fa0, $a0, 0 -+; LA64D-NEXT: addi.w $a1, $zero, 1 -+; LA64D-NEXT: movgr2fr.w $fa1, $a1 -+; LA64D-NEXT: ffint.s.w $fa1, $fa1 -+; LA64D-NEXT: .p2align 4, , 16 -+; LA64D-NEXT: .LBB26_1: # %atomicrmw.start -+; LA64D-NEXT: # =>This Loop Header: Depth=1 -+; LA64D-NEXT: # Child Loop BB26_3 Depth 2 -+; LA64D-NEXT: fmax.s $fa2, $fa0, $fa0 -+; LA64D-NEXT: fmin.s $fa2, $fa2, $fa1 -+; LA64D-NEXT: movfr2gr.s $a1, $fa2 -+; LA64D-NEXT: movfr2gr.s $a2, $fa0 -+; LA64D-NEXT: .LBB26_3: # %atomicrmw.start -+; LA64D-NEXT: # Parent Loop BB26_1 Depth=1 -+; LA64D-NEXT: # => This Inner Loop Header: Depth=2 -+; LA64D-NEXT: ll.w $a3, $a0, 0 -+; LA64D-NEXT: bne $a3, $a2, .LBB26_5 -+; LA64D-NEXT: # %bb.4: # %atomicrmw.start -+; LA64D-NEXT: # in Loop: Header=BB26_3 Depth=2 -+; LA64D-NEXT: move $a4, $a1 -+; LA64D-NEXT: sc.w $a4, $a0, 0 -+; LA64D-NEXT: beqz $a4, .LBB26_3 -+; LA64D-NEXT: b .LBB26_6 -+; LA64D-NEXT: .LBB26_5: # %atomicrmw.start -+; LA64D-NEXT: # in Loop: Header=BB26_1 Depth=1 -+; LA64D-NEXT: dbar 0 -+; LA64D-NEXT: .LBB26_6: # %atomicrmw.start -+; LA64D-NEXT: # in Loop: Header=BB26_1 Depth=1 -+; LA64D-NEXT: movgr2fr.w $fa0, $a3 -+; LA64D-NEXT: addi.w $a1, $a2, 0 -+; LA64D-NEXT: bne $a3, $a1, .LBB26_1 -+; LA64D-NEXT: # %bb.2: # %atomicrmw.end -+; LA64D-NEXT: ret -+ %v = atomicrmw fmin ptr %p, float 1.0 seq_cst, align 4 -+ ret float %v -+} -+ -+define float @float_fmax_seq_cst(ptr %p) nounwind { -+; LA64F-LABEL: float_fmax_seq_cst: -+; LA64F: # %bb.0: -+; LA64F-NEXT: fld.s $fa0, $a0, 0 -+; LA64F-NEXT: addi.w $a1, $zero, 1 -+; LA64F-NEXT: movgr2fr.w $fa1, $a1 -+; LA64F-NEXT: ffint.s.w $fa1, $fa1 -+; LA64F-NEXT: .p2align 4, , 16 -+; LA64F-NEXT: .LBB27_1: # %atomicrmw.start -+; LA64F-NEXT: # =>This Loop Header: Depth=1 -+; LA64F-NEXT: # Child Loop BB27_3 Depth 2 -+; LA64F-NEXT: fmax.s $fa2, $fa0, $fa0 -+; LA64F-NEXT: fmax.s $fa2, $fa2, $fa1 -+; LA64F-NEXT: movfr2gr.s $a1, $fa2 -+; LA64F-NEXT: movfr2gr.s $a2, $fa0 -+; LA64F-NEXT: .LBB27_3: # %atomicrmw.start -+; LA64F-NEXT: # Parent Loop BB27_1 Depth=1 -+; LA64F-NEXT: # => This Inner Loop Header: Depth=2 -+; LA64F-NEXT: ll.w $a3, $a0, 0 -+; LA64F-NEXT: bne $a3, $a2, .LBB27_5 -+; LA64F-NEXT: # %bb.4: # %atomicrmw.start -+; LA64F-NEXT: # in Loop: Header=BB27_3 Depth=2 -+; LA64F-NEXT: move $a4, $a1 -+; LA64F-NEXT: sc.w $a4, $a0, 0 -+; LA64F-NEXT: beqz $a4, .LBB27_3 -+; LA64F-NEXT: b .LBB27_6 -+; LA64F-NEXT: .LBB27_5: # %atomicrmw.start -+; LA64F-NEXT: # in Loop: Header=BB27_1 Depth=1 -+; LA64F-NEXT: dbar 0 -+; LA64F-NEXT: .LBB27_6: # %atomicrmw.start -+; LA64F-NEXT: # in Loop: Header=BB27_1 Depth=1 -+; LA64F-NEXT: movgr2fr.w $fa0, $a3 -+; LA64F-NEXT: addi.w $a1, $a2, 0 -+; LA64F-NEXT: bne $a3, $a1, .LBB27_1 -+; LA64F-NEXT: # %bb.2: # %atomicrmw.end -+; LA64F-NEXT: ret -+; -+; LA64D-LABEL: float_fmax_seq_cst: -+; LA64D: # %bb.0: -+; LA64D-NEXT: fld.s $fa0, $a0, 0 -+; LA64D-NEXT: addi.w $a1, $zero, 1 -+; LA64D-NEXT: movgr2fr.w $fa1, $a1 -+; LA64D-NEXT: ffint.s.w $fa1, $fa1 -+; LA64D-NEXT: .p2align 4, , 16 -+; LA64D-NEXT: .LBB27_1: # %atomicrmw.start -+; LA64D-NEXT: # =>This Loop Header: Depth=1 -+; LA64D-NEXT: # Child Loop BB27_3 Depth 2 -+; LA64D-NEXT: fmax.s $fa2, $fa0, $fa0 -+; LA64D-NEXT: fmax.s $fa2, $fa2, $fa1 -+; LA64D-NEXT: movfr2gr.s $a1, $fa2 -+; LA64D-NEXT: movfr2gr.s $a2, $fa0 -+; LA64D-NEXT: .LBB27_3: # %atomicrmw.start -+; LA64D-NEXT: # Parent Loop BB27_1 Depth=1 -+; LA64D-NEXT: # => This Inner Loop Header: Depth=2 -+; LA64D-NEXT: ll.w $a3, $a0, 0 -+; LA64D-NEXT: bne $a3, $a2, .LBB27_5 -+; LA64D-NEXT: # %bb.4: # %atomicrmw.start -+; LA64D-NEXT: # in Loop: Header=BB27_3 Depth=2 -+; LA64D-NEXT: move $a4, $a1 -+; LA64D-NEXT: sc.w $a4, $a0, 0 -+; LA64D-NEXT: beqz $a4, .LBB27_3 -+; LA64D-NEXT: b .LBB27_6 -+; LA64D-NEXT: .LBB27_5: # %atomicrmw.start -+; LA64D-NEXT: # in Loop: Header=BB27_1 Depth=1 -+; LA64D-NEXT: dbar 0 -+; LA64D-NEXT: .LBB27_6: # %atomicrmw.start -+; LA64D-NEXT: # in Loop: Header=BB27_1 Depth=1 -+; LA64D-NEXT: movgr2fr.w $fa0, $a3 -+; LA64D-NEXT: addi.w $a1, $a2, 0 -+; LA64D-NEXT: bne $a3, $a1, .LBB27_1 -+; LA64D-NEXT: # %bb.2: # %atomicrmw.end -+; LA64D-NEXT: ret -+ %v = atomicrmw fmax ptr %p, float 1.0 seq_cst, align 4 -+ ret float %v -+} -+ -+define double @double_fadd_seq_cst(ptr %p) nounwind { -+; LA64F-LABEL: double_fadd_seq_cst: -+; LA64F: # %bb.0: -+; LA64F-NEXT: addi.d $sp, $sp, -80 -+; LA64F-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill -+; LA64F-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill -+; LA64F-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill -+; LA64F-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill -+; LA64F-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill -+; LA64F-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill -+; LA64F-NEXT: st.d $s4, $sp, 24 # 8-byte Folded Spill -+; LA64F-NEXT: move $fp, $a0 -+; LA64F-NEXT: ld.d $a0, $a0, 0 -+; LA64F-NEXT: lu52i.d $s0, $zero, 1023 -+; LA64F-NEXT: ori $s1, $zero, 8 -+; LA64F-NEXT: addi.d $s2, $sp, 16 -+; LA64F-NEXT: addi.d $s3, $sp, 8 -+; LA64F-NEXT: ori $s4, $zero, 5 -+; LA64F-NEXT: .p2align 4, , 16 -+; LA64F-NEXT: .LBB28_1: # %atomicrmw.start -+; LA64F-NEXT: # =>This Inner Loop Header: Depth=1 -+; LA64F-NEXT: st.d $a0, $sp, 16 -+; LA64F-NEXT: move $a1, $s0 -+; LA64F-NEXT: bl %plt(__adddf3) -+; LA64F-NEXT: st.d $a0, $sp, 8 -+; LA64F-NEXT: move $a0, $s1 -+; LA64F-NEXT: move $a1, $fp -+; LA64F-NEXT: move $a2, $s2 -+; LA64F-NEXT: move $a3, $s3 -+; LA64F-NEXT: move $a4, $s4 -+; LA64F-NEXT: move $a5, $s4 -+; LA64F-NEXT: bl %plt(__atomic_compare_exchange) -+; LA64F-NEXT: move $a1, $a0 -+; LA64F-NEXT: ld.d $a0, $sp, 16 -+; LA64F-NEXT: beqz $a1, .LBB28_1 -+; LA64F-NEXT: # %bb.2: # %atomicrmw.end -+; LA64F-NEXT: ld.d $s4, $sp, 24 # 8-byte Folded Reload -+; LA64F-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload -+; LA64F-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload -+; LA64F-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload -+; LA64F-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload -+; LA64F-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload -+; LA64F-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload -+; LA64F-NEXT: addi.d $sp, $sp, 80 -+; LA64F-NEXT: ret -+; -+; LA64D-LABEL: double_fadd_seq_cst: -+; LA64D: # %bb.0: -+; LA64D-NEXT: addi.d $sp, $sp, -80 -+; LA64D-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill -+; LA64D-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill -+; LA64D-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill -+; LA64D-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill -+; LA64D-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill -+; LA64D-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill -+; LA64D-NEXT: fst.d $fs0, $sp, 24 # 8-byte Folded Spill -+; LA64D-NEXT: move $fp, $a0 -+; LA64D-NEXT: fld.d $fa0, $a0, 0 -+; LA64D-NEXT: addi.d $a0, $zero, 1 -+; LA64D-NEXT: movgr2fr.d $fa1, $a0 -+; LA64D-NEXT: ffint.d.l $fs0, $fa1 -+; LA64D-NEXT: ori $s0, $zero, 8 -+; LA64D-NEXT: addi.d $s1, $sp, 16 -+; LA64D-NEXT: addi.d $s2, $sp, 8 -+; LA64D-NEXT: ori $s3, $zero, 5 -+; LA64D-NEXT: .p2align 4, , 16 -+; LA64D-NEXT: .LBB28_1: # %atomicrmw.start -+; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 -+; LA64D-NEXT: fst.d $fa0, $sp, 16 -+; LA64D-NEXT: fadd.d $fa0, $fa0, $fs0 -+; LA64D-NEXT: fst.d $fa0, $sp, 8 -+; LA64D-NEXT: move $a0, $s0 -+; LA64D-NEXT: move $a1, $fp -+; LA64D-NEXT: move $a2, $s1 -+; LA64D-NEXT: move $a3, $s2 -+; LA64D-NEXT: move $a4, $s3 -+; LA64D-NEXT: move $a5, $s3 -+; LA64D-NEXT: bl %plt(__atomic_compare_exchange) -+; LA64D-NEXT: fld.d $fa0, $sp, 16 -+; LA64D-NEXT: beqz $a0, .LBB28_1 -+; LA64D-NEXT: # %bb.2: # %atomicrmw.end -+; LA64D-NEXT: fld.d $fs0, $sp, 24 # 8-byte Folded Reload -+; LA64D-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload -+; LA64D-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload -+; LA64D-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload -+; LA64D-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload -+; LA64D-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload -+; LA64D-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload -+; LA64D-NEXT: addi.d $sp, $sp, 80 -+; LA64D-NEXT: ret -+ %v = atomicrmw fadd ptr %p, double 1.0 seq_cst, align 4 -+ ret double %v -+} -+ -+define double @double_fsub_seq_cst(ptr %p) nounwind { -+; LA64F-LABEL: double_fsub_seq_cst: -+; LA64F: # %bb.0: -+; LA64F-NEXT: addi.d $sp, $sp, -80 -+; LA64F-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill -+; LA64F-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill -+; LA64F-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill -+; LA64F-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill -+; LA64F-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill -+; LA64F-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill -+; LA64F-NEXT: st.d $s4, $sp, 24 # 8-byte Folded Spill -+; LA64F-NEXT: move $fp, $a0 -+; LA64F-NEXT: ld.d $a0, $a0, 0 -+; LA64F-NEXT: lu52i.d $s0, $zero, -1025 -+; LA64F-NEXT: ori $s1, $zero, 8 -+; LA64F-NEXT: addi.d $s2, $sp, 16 -+; LA64F-NEXT: addi.d $s3, $sp, 8 -+; LA64F-NEXT: ori $s4, $zero, 5 -+; LA64F-NEXT: .p2align 4, , 16 -+; LA64F-NEXT: .LBB29_1: # %atomicrmw.start -+; LA64F-NEXT: # =>This Inner Loop Header: Depth=1 -+; LA64F-NEXT: st.d $a0, $sp, 16 -+; LA64F-NEXT: move $a1, $s0 -+; LA64F-NEXT: bl %plt(__adddf3) -+; LA64F-NEXT: st.d $a0, $sp, 8 -+; LA64F-NEXT: move $a0, $s1 -+; LA64F-NEXT: move $a1, $fp -+; LA64F-NEXT: move $a2, $s2 -+; LA64F-NEXT: move $a3, $s3 -+; LA64F-NEXT: move $a4, $s4 -+; LA64F-NEXT: move $a5, $s4 -+; LA64F-NEXT: bl %plt(__atomic_compare_exchange) -+; LA64F-NEXT: move $a1, $a0 -+; LA64F-NEXT: ld.d $a0, $sp, 16 -+; LA64F-NEXT: beqz $a1, .LBB29_1 -+; LA64F-NEXT: # %bb.2: # %atomicrmw.end -+; LA64F-NEXT: ld.d $s4, $sp, 24 # 8-byte Folded Reload -+; LA64F-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload -+; LA64F-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload -+; LA64F-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload -+; LA64F-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload -+; LA64F-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload -+; LA64F-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload -+; LA64F-NEXT: addi.d $sp, $sp, 80 -+; LA64F-NEXT: ret -+; -+; LA64D-LABEL: double_fsub_seq_cst: -+; LA64D: # %bb.0: -+; LA64D-NEXT: addi.d $sp, $sp, -80 -+; LA64D-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill -+; LA64D-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill -+; LA64D-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill -+; LA64D-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill -+; LA64D-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill -+; LA64D-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill -+; LA64D-NEXT: fst.d $fs0, $sp, 24 # 8-byte Folded Spill -+; LA64D-NEXT: move $fp, $a0 -+; LA64D-NEXT: fld.d $fa0, $a0, 0 -+; LA64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI29_0) -+; LA64D-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI29_0) -+; LA64D-NEXT: fld.d $fs0, $a0, 0 -+; LA64D-NEXT: ori $s0, $zero, 8 -+; LA64D-NEXT: addi.d $s1, $sp, 16 -+; LA64D-NEXT: addi.d $s2, $sp, 8 -+; LA64D-NEXT: ori $s3, $zero, 5 -+; LA64D-NEXT: .p2align 4, , 16 -+; LA64D-NEXT: .LBB29_1: # %atomicrmw.start -+; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 -+; LA64D-NEXT: fst.d $fa0, $sp, 16 -+; LA64D-NEXT: fadd.d $fa0, $fa0, $fs0 -+; LA64D-NEXT: fst.d $fa0, $sp, 8 -+; LA64D-NEXT: move $a0, $s0 -+; LA64D-NEXT: move $a1, $fp -+; LA64D-NEXT: move $a2, $s1 -+; LA64D-NEXT: move $a3, $s2 -+; LA64D-NEXT: move $a4, $s3 -+; LA64D-NEXT: move $a5, $s3 -+; LA64D-NEXT: bl %plt(__atomic_compare_exchange) -+; LA64D-NEXT: fld.d $fa0, $sp, 16 -+; LA64D-NEXT: beqz $a0, .LBB29_1 -+; LA64D-NEXT: # %bb.2: # %atomicrmw.end -+; LA64D-NEXT: fld.d $fs0, $sp, 24 # 8-byte Folded Reload -+; LA64D-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload -+; LA64D-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload -+; LA64D-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload -+; LA64D-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload -+; LA64D-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload -+; LA64D-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload -+; LA64D-NEXT: addi.d $sp, $sp, 80 -+; LA64D-NEXT: ret -+ %v = atomicrmw fsub ptr %p, double 1.0 seq_cst, align 4 -+ ret double %v -+} -+ -+define double @double_fmin_seq_cst(ptr %p) nounwind { -+; LA64F-LABEL: double_fmin_seq_cst: -+; LA64F: # %bb.0: -+; LA64F-NEXT: addi.d $sp, $sp, -80 -+; LA64F-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill -+; LA64F-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill -+; LA64F-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill -+; LA64F-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill -+; LA64F-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill -+; LA64F-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill -+; LA64F-NEXT: st.d $s4, $sp, 24 # 8-byte Folded Spill -+; LA64F-NEXT: move $fp, $a0 -+; LA64F-NEXT: ld.d $a0, $a0, 0 -+; LA64F-NEXT: lu52i.d $s0, $zero, 1023 -+; LA64F-NEXT: ori $s1, $zero, 8 -+; LA64F-NEXT: addi.d $s2, $sp, 16 -+; LA64F-NEXT: addi.d $s3, $sp, 8 -+; LA64F-NEXT: ori $s4, $zero, 5 -+; LA64F-NEXT: .p2align 4, , 16 -+; LA64F-NEXT: .LBB30_1: # %atomicrmw.start -+; LA64F-NEXT: # =>This Inner Loop Header: Depth=1 -+; LA64F-NEXT: st.d $a0, $sp, 16 -+; LA64F-NEXT: move $a1, $s0 -+; LA64F-NEXT: bl %plt(fmin) -+; LA64F-NEXT: st.d $a0, $sp, 8 -+; LA64F-NEXT: move $a0, $s1 -+; LA64F-NEXT: move $a1, $fp -+; LA64F-NEXT: move $a2, $s2 -+; LA64F-NEXT: move $a3, $s3 -+; LA64F-NEXT: move $a4, $s4 -+; LA64F-NEXT: move $a5, $s4 -+; LA64F-NEXT: bl %plt(__atomic_compare_exchange) -+; LA64F-NEXT: move $a1, $a0 -+; LA64F-NEXT: ld.d $a0, $sp, 16 -+; LA64F-NEXT: beqz $a1, .LBB30_1 -+; LA64F-NEXT: # %bb.2: # %atomicrmw.end -+; LA64F-NEXT: ld.d $s4, $sp, 24 # 8-byte Folded Reload -+; LA64F-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload -+; LA64F-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload -+; LA64F-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload -+; LA64F-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload -+; LA64F-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload -+; LA64F-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload -+; LA64F-NEXT: addi.d $sp, $sp, 80 -+; LA64F-NEXT: ret -+; -+; LA64D-LABEL: double_fmin_seq_cst: -+; LA64D: # %bb.0: -+; LA64D-NEXT: addi.d $sp, $sp, -80 -+; LA64D-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill -+; LA64D-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill -+; LA64D-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill -+; LA64D-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill -+; LA64D-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill -+; LA64D-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill -+; LA64D-NEXT: fst.d $fs0, $sp, 24 # 8-byte Folded Spill -+; LA64D-NEXT: move $fp, $a0 -+; LA64D-NEXT: fld.d $fa0, $a0, 0 -+; LA64D-NEXT: addi.d $a0, $zero, 1 -+; LA64D-NEXT: movgr2fr.d $fa1, $a0 -+; LA64D-NEXT: ffint.d.l $fs0, $fa1 -+; LA64D-NEXT: ori $s0, $zero, 8 -+; LA64D-NEXT: addi.d $s1, $sp, 16 -+; LA64D-NEXT: addi.d $s2, $sp, 8 -+; LA64D-NEXT: ori $s3, $zero, 5 -+; LA64D-NEXT: .p2align 4, , 16 -+; LA64D-NEXT: .LBB30_1: # %atomicrmw.start -+; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 -+; LA64D-NEXT: fst.d $fa0, $sp, 16 -+; LA64D-NEXT: fmax.d $fa0, $fa0, $fa0 -+; LA64D-NEXT: fmin.d $fa0, $fa0, $fs0 -+; LA64D-NEXT: fst.d $fa0, $sp, 8 -+; LA64D-NEXT: move $a0, $s0 -+; LA64D-NEXT: move $a1, $fp -+; LA64D-NEXT: move $a2, $s1 -+; LA64D-NEXT: move $a3, $s2 -+; LA64D-NEXT: move $a4, $s3 -+; LA64D-NEXT: move $a5, $s3 -+; LA64D-NEXT: bl %plt(__atomic_compare_exchange) -+; LA64D-NEXT: fld.d $fa0, $sp, 16 -+; LA64D-NEXT: beqz $a0, .LBB30_1 -+; LA64D-NEXT: # %bb.2: # %atomicrmw.end -+; LA64D-NEXT: fld.d $fs0, $sp, 24 # 8-byte Folded Reload -+; LA64D-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload -+; LA64D-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload -+; LA64D-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload -+; LA64D-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload -+; LA64D-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload -+; LA64D-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload -+; LA64D-NEXT: addi.d $sp, $sp, 80 -+; LA64D-NEXT: ret -+ %v = atomicrmw fmin ptr %p, double 1.0 seq_cst, align 4 -+ ret double %v -+} -+ -+define double @double_fmax_seq_cst(ptr %p) nounwind { -+; LA64F-LABEL: double_fmax_seq_cst: -+; LA64F: # %bb.0: -+; LA64F-NEXT: addi.d $sp, $sp, -80 -+; LA64F-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill -+; LA64F-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill -+; LA64F-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill -+; LA64F-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill -+; LA64F-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill -+; LA64F-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill -+; LA64F-NEXT: st.d $s4, $sp, 24 # 8-byte Folded Spill -+; LA64F-NEXT: move $fp, $a0 -+; LA64F-NEXT: ld.d $a0, $a0, 0 -+; LA64F-NEXT: lu52i.d $s0, $zero, 1023 -+; LA64F-NEXT: ori $s1, $zero, 8 -+; LA64F-NEXT: addi.d $s2, $sp, 16 -+; LA64F-NEXT: addi.d $s3, $sp, 8 -+; LA64F-NEXT: ori $s4, $zero, 5 -+; LA64F-NEXT: .p2align 4, , 16 -+; LA64F-NEXT: .LBB31_1: # %atomicrmw.start -+; LA64F-NEXT: # =>This Inner Loop Header: Depth=1 -+; LA64F-NEXT: st.d $a0, $sp, 16 -+; LA64F-NEXT: move $a1, $s0 -+; LA64F-NEXT: bl %plt(fmax) -+; LA64F-NEXT: st.d $a0, $sp, 8 -+; LA64F-NEXT: move $a0, $s1 -+; LA64F-NEXT: move $a1, $fp -+; LA64F-NEXT: move $a2, $s2 -+; LA64F-NEXT: move $a3, $s3 -+; LA64F-NEXT: move $a4, $s4 -+; LA64F-NEXT: move $a5, $s4 -+; LA64F-NEXT: bl %plt(__atomic_compare_exchange) -+; LA64F-NEXT: move $a1, $a0 -+; LA64F-NEXT: ld.d $a0, $sp, 16 -+; LA64F-NEXT: beqz $a1, .LBB31_1 -+; LA64F-NEXT: # %bb.2: # %atomicrmw.end -+; LA64F-NEXT: ld.d $s4, $sp, 24 # 8-byte Folded Reload -+; LA64F-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload -+; LA64F-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload -+; LA64F-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload -+; LA64F-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload -+; LA64F-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload -+; LA64F-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload -+; LA64F-NEXT: addi.d $sp, $sp, 80 -+; LA64F-NEXT: ret -+; -+; LA64D-LABEL: double_fmax_seq_cst: -+; LA64D: # %bb.0: -+; LA64D-NEXT: addi.d $sp, $sp, -80 -+; LA64D-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill -+; LA64D-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill -+; LA64D-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill -+; LA64D-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill -+; LA64D-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill -+; LA64D-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill -+; LA64D-NEXT: fst.d $fs0, $sp, 24 # 8-byte Folded Spill -+; LA64D-NEXT: move $fp, $a0 -+; LA64D-NEXT: fld.d $fa0, $a0, 0 -+; LA64D-NEXT: addi.d $a0, $zero, 1 -+; LA64D-NEXT: movgr2fr.d $fa1, $a0 -+; LA64D-NEXT: ffint.d.l $fs0, $fa1 -+; LA64D-NEXT: ori $s0, $zero, 8 -+; LA64D-NEXT: addi.d $s1, $sp, 16 -+; LA64D-NEXT: addi.d $s2, $sp, 8 -+; LA64D-NEXT: ori $s3, $zero, 5 -+; LA64D-NEXT: .p2align 4, , 16 -+; LA64D-NEXT: .LBB31_1: # %atomicrmw.start -+; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 -+; LA64D-NEXT: fst.d $fa0, $sp, 16 -+; LA64D-NEXT: fmax.d $fa0, $fa0, $fa0 -+; LA64D-NEXT: fmax.d $fa0, $fa0, $fs0 -+; LA64D-NEXT: fst.d $fa0, $sp, 8 -+; LA64D-NEXT: move $a0, $s0 -+; LA64D-NEXT: move $a1, $fp -+; LA64D-NEXT: move $a2, $s1 -+; LA64D-NEXT: move $a3, $s2 -+; LA64D-NEXT: move $a4, $s3 -+; LA64D-NEXT: move $a5, $s3 -+; LA64D-NEXT: bl %plt(__atomic_compare_exchange) -+; LA64D-NEXT: fld.d $fa0, $sp, 16 -+; LA64D-NEXT: beqz $a0, .LBB31_1 -+; LA64D-NEXT: # %bb.2: # %atomicrmw.end -+; LA64D-NEXT: fld.d $fs0, $sp, 24 # 8-byte Folded Reload -+; LA64D-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload -+; LA64D-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload -+; LA64D-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload -+; LA64D-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload -+; LA64D-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload -+; LA64D-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload -+; LA64D-NEXT: addi.d $sp, $sp, 80 -+; LA64D-NEXT: ret -+ %v = atomicrmw fmax ptr %p, double 1.0 seq_cst, align 4 -+ ret double %v -+} -+ -+define float @float_fadd_monotonic(ptr %p) nounwind { -+; LA64F-LABEL: float_fadd_monotonic: -+; LA64F: # %bb.0: -+; LA64F-NEXT: fld.s $fa0, $a0, 0 -+; LA64F-NEXT: addi.w $a1, $zero, 1 -+; LA64F-NEXT: movgr2fr.w $fa1, $a1 -+; LA64F-NEXT: ffint.s.w $fa1, $fa1 -+; LA64F-NEXT: .p2align 4, , 16 -+; LA64F-NEXT: .LBB32_1: # %atomicrmw.start -+; LA64F-NEXT: # =>This Loop Header: Depth=1 -+; LA64F-NEXT: # Child Loop BB32_3 Depth 2 -+; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 -+; LA64F-NEXT: movfr2gr.s $a1, $fa2 -+; LA64F-NEXT: movfr2gr.s $a2, $fa0 -+; LA64F-NEXT: .LBB32_3: # %atomicrmw.start -+; LA64F-NEXT: # Parent Loop BB32_1 Depth=1 -+; LA64F-NEXT: # => This Inner Loop Header: Depth=2 -+; LA64F-NEXT: ll.w $a3, $a0, 0 -+; LA64F-NEXT: bne $a3, $a2, .LBB32_5 -+; LA64F-NEXT: # %bb.4: # %atomicrmw.start -+; LA64F-NEXT: # in Loop: Header=BB32_3 Depth=2 -+; LA64F-NEXT: move $a4, $a1 -+; LA64F-NEXT: sc.w $a4, $a0, 0 -+; LA64F-NEXT: beqz $a4, .LBB32_3 -+; LA64F-NEXT: b .LBB32_6 -+; LA64F-NEXT: .LBB32_5: # %atomicrmw.start -+; LA64F-NEXT: # in Loop: Header=BB32_1 Depth=1 -+; LA64F-NEXT: dbar 1792 -+; LA64F-NEXT: .LBB32_6: # %atomicrmw.start -+; LA64F-NEXT: # in Loop: Header=BB32_1 Depth=1 -+; LA64F-NEXT: movgr2fr.w $fa0, $a3 -+; LA64F-NEXT: addi.w $a1, $a2, 0 -+; LA64F-NEXT: bne $a3, $a1, .LBB32_1 -+; LA64F-NEXT: # %bb.2: # %atomicrmw.end -+; LA64F-NEXT: ret -+; -+; LA64D-LABEL: float_fadd_monotonic: -+; LA64D: # %bb.0: -+; LA64D-NEXT: fld.s $fa0, $a0, 0 -+; LA64D-NEXT: addi.w $a1, $zero, 1 -+; LA64D-NEXT: movgr2fr.w $fa1, $a1 -+; LA64D-NEXT: ffint.s.w $fa1, $fa1 -+; LA64D-NEXT: .p2align 4, , 16 -+; LA64D-NEXT: .LBB32_1: # %atomicrmw.start -+; LA64D-NEXT: # =>This Loop Header: Depth=1 -+; LA64D-NEXT: # Child Loop BB32_3 Depth 2 -+; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 -+; LA64D-NEXT: movfr2gr.s $a1, $fa2 -+; LA64D-NEXT: movfr2gr.s $a2, $fa0 -+; LA64D-NEXT: .LBB32_3: # %atomicrmw.start -+; LA64D-NEXT: # Parent Loop BB32_1 Depth=1 -+; LA64D-NEXT: # => This Inner Loop Header: Depth=2 -+; LA64D-NEXT: ll.w $a3, $a0, 0 -+; LA64D-NEXT: bne $a3, $a2, .LBB32_5 -+; LA64D-NEXT: # %bb.4: # %atomicrmw.start -+; LA64D-NEXT: # in Loop: Header=BB32_3 Depth=2 -+; LA64D-NEXT: move $a4, $a1 -+; LA64D-NEXT: sc.w $a4, $a0, 0 -+; LA64D-NEXT: beqz $a4, .LBB32_3 -+; LA64D-NEXT: b .LBB32_6 -+; LA64D-NEXT: .LBB32_5: # %atomicrmw.start -+; LA64D-NEXT: # in Loop: Header=BB32_1 Depth=1 -+; LA64D-NEXT: dbar 1792 -+; LA64D-NEXT: .LBB32_6: # %atomicrmw.start -+; LA64D-NEXT: # in Loop: Header=BB32_1 Depth=1 -+; LA64D-NEXT: movgr2fr.w $fa0, $a3 -+; LA64D-NEXT: addi.w $a1, $a2, 0 -+; LA64D-NEXT: bne $a3, $a1, .LBB32_1 -+; LA64D-NEXT: # %bb.2: # %atomicrmw.end -+; LA64D-NEXT: ret -+ %v = atomicrmw fadd ptr %p, float 1.0 monotonic, align 4 -+ ret float %v -+} -+ -+define float @float_fsub_monotonic(ptr %p) nounwind { -+; LA64F-LABEL: float_fsub_monotonic: -+; LA64F: # %bb.0: -+; LA64F-NEXT: fld.s $fa0, $a0, 0 -+; LA64F-NEXT: pcalau12i $a1, %pc_hi20(.LCPI33_0) -+; LA64F-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI33_0) -+; LA64F-NEXT: fld.s $fa1, $a1, 0 -+; LA64F-NEXT: .p2align 4, , 16 -+; LA64F-NEXT: .LBB33_1: # %atomicrmw.start -+; LA64F-NEXT: # =>This Loop Header: Depth=1 -+; LA64F-NEXT: # Child Loop BB33_3 Depth 2 -+; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 -+; LA64F-NEXT: movfr2gr.s $a1, $fa2 -+; LA64F-NEXT: movfr2gr.s $a2, $fa0 -+; LA64F-NEXT: .LBB33_3: # %atomicrmw.start -+; LA64F-NEXT: # Parent Loop BB33_1 Depth=1 -+; LA64F-NEXT: # => This Inner Loop Header: Depth=2 -+; LA64F-NEXT: ll.w $a3, $a0, 0 -+; LA64F-NEXT: bne $a3, $a2, .LBB33_5 -+; LA64F-NEXT: # %bb.4: # %atomicrmw.start -+; LA64F-NEXT: # in Loop: Header=BB33_3 Depth=2 -+; LA64F-NEXT: move $a4, $a1 -+; LA64F-NEXT: sc.w $a4, $a0, 0 -+; LA64F-NEXT: beqz $a4, .LBB33_3 -+; LA64F-NEXT: b .LBB33_6 -+; LA64F-NEXT: .LBB33_5: # %atomicrmw.start -+; LA64F-NEXT: # in Loop: Header=BB33_1 Depth=1 -+; LA64F-NEXT: dbar 1792 -+; LA64F-NEXT: .LBB33_6: # %atomicrmw.start -+; LA64F-NEXT: # in Loop: Header=BB33_1 Depth=1 -+; LA64F-NEXT: movgr2fr.w $fa0, $a3 -+; LA64F-NEXT: addi.w $a1, $a2, 0 -+; LA64F-NEXT: bne $a3, $a1, .LBB33_1 -+; LA64F-NEXT: # %bb.2: # %atomicrmw.end -+; LA64F-NEXT: ret -+; -+; LA64D-LABEL: float_fsub_monotonic: -+; LA64D: # %bb.0: -+; LA64D-NEXT: fld.s $fa0, $a0, 0 -+; LA64D-NEXT: pcalau12i $a1, %pc_hi20(.LCPI33_0) -+; LA64D-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI33_0) -+; LA64D-NEXT: fld.s $fa1, $a1, 0 -+; LA64D-NEXT: .p2align 4, , 16 -+; LA64D-NEXT: .LBB33_1: # %atomicrmw.start -+; LA64D-NEXT: # =>This Loop Header: Depth=1 -+; LA64D-NEXT: # Child Loop BB33_3 Depth 2 -+; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 -+; LA64D-NEXT: movfr2gr.s $a1, $fa2 -+; LA64D-NEXT: movfr2gr.s $a2, $fa0 -+; LA64D-NEXT: .LBB33_3: # %atomicrmw.start -+; LA64D-NEXT: # Parent Loop BB33_1 Depth=1 -+; LA64D-NEXT: # => This Inner Loop Header: Depth=2 -+; LA64D-NEXT: ll.w $a3, $a0, 0 -+; LA64D-NEXT: bne $a3, $a2, .LBB33_5 -+; LA64D-NEXT: # %bb.4: # %atomicrmw.start -+; LA64D-NEXT: # in Loop: Header=BB33_3 Depth=2 -+; LA64D-NEXT: move $a4, $a1 -+; LA64D-NEXT: sc.w $a4, $a0, 0 -+; LA64D-NEXT: beqz $a4, .LBB33_3 -+; LA64D-NEXT: b .LBB33_6 -+; LA64D-NEXT: .LBB33_5: # %atomicrmw.start -+; LA64D-NEXT: # in Loop: Header=BB33_1 Depth=1 -+; LA64D-NEXT: dbar 1792 -+; LA64D-NEXT: .LBB33_6: # %atomicrmw.start -+; LA64D-NEXT: # in Loop: Header=BB33_1 Depth=1 -+; LA64D-NEXT: movgr2fr.w $fa0, $a3 -+; LA64D-NEXT: addi.w $a1, $a2, 0 -+; LA64D-NEXT: bne $a3, $a1, .LBB33_1 -+; LA64D-NEXT: # %bb.2: # %atomicrmw.end -+; LA64D-NEXT: ret -+ %v = atomicrmw fsub ptr %p, float 1.0 monotonic, align 4 -+ ret float %v -+} -+ -+define float @float_fmin_monotonic(ptr %p) nounwind { -+; LA64F-LABEL: float_fmin_monotonic: -+; LA64F: # %bb.0: -+; LA64F-NEXT: fld.s $fa0, $a0, 0 -+; LA64F-NEXT: addi.w $a1, $zero, 1 -+; LA64F-NEXT: movgr2fr.w $fa1, $a1 -+; LA64F-NEXT: ffint.s.w $fa1, $fa1 -+; LA64F-NEXT: .p2align 4, , 16 -+; LA64F-NEXT: .LBB34_1: # %atomicrmw.start -+; LA64F-NEXT: # =>This Loop Header: Depth=1 -+; LA64F-NEXT: # Child Loop BB34_3 Depth 2 -+; LA64F-NEXT: fmax.s $fa2, $fa0, $fa0 -+; LA64F-NEXT: fmin.s $fa2, $fa2, $fa1 -+; LA64F-NEXT: movfr2gr.s $a1, $fa2 -+; LA64F-NEXT: movfr2gr.s $a2, $fa0 -+; LA64F-NEXT: .LBB34_3: # %atomicrmw.start -+; LA64F-NEXT: # Parent Loop BB34_1 Depth=1 -+; LA64F-NEXT: # => This Inner Loop Header: Depth=2 -+; LA64F-NEXT: ll.w $a3, $a0, 0 -+; LA64F-NEXT: bne $a3, $a2, .LBB34_5 -+; LA64F-NEXT: # %bb.4: # %atomicrmw.start -+; LA64F-NEXT: # in Loop: Header=BB34_3 Depth=2 -+; LA64F-NEXT: move $a4, $a1 -+; LA64F-NEXT: sc.w $a4, $a0, 0 -+; LA64F-NEXT: beqz $a4, .LBB34_3 -+; LA64F-NEXT: b .LBB34_6 -+; LA64F-NEXT: .LBB34_5: # %atomicrmw.start -+; LA64F-NEXT: # in Loop: Header=BB34_1 Depth=1 -+; LA64F-NEXT: dbar 1792 -+; LA64F-NEXT: .LBB34_6: # %atomicrmw.start -+; LA64F-NEXT: # in Loop: Header=BB34_1 Depth=1 -+; LA64F-NEXT: movgr2fr.w $fa0, $a3 -+; LA64F-NEXT: addi.w $a1, $a2, 0 -+; LA64F-NEXT: bne $a3, $a1, .LBB34_1 -+; LA64F-NEXT: # %bb.2: # %atomicrmw.end -+; LA64F-NEXT: ret -+; -+; LA64D-LABEL: float_fmin_monotonic: -+; LA64D: # %bb.0: -+; LA64D-NEXT: fld.s $fa0, $a0, 0 -+; LA64D-NEXT: addi.w $a1, $zero, 1 -+; LA64D-NEXT: movgr2fr.w $fa1, $a1 -+; LA64D-NEXT: ffint.s.w $fa1, $fa1 -+; LA64D-NEXT: .p2align 4, , 16 -+; LA64D-NEXT: .LBB34_1: # %atomicrmw.start -+; LA64D-NEXT: # =>This Loop Header: Depth=1 -+; LA64D-NEXT: # Child Loop BB34_3 Depth 2 -+; LA64D-NEXT: fmax.s $fa2, $fa0, $fa0 -+; LA64D-NEXT: fmin.s $fa2, $fa2, $fa1 -+; LA64D-NEXT: movfr2gr.s $a1, $fa2 -+; LA64D-NEXT: movfr2gr.s $a2, $fa0 -+; LA64D-NEXT: .LBB34_3: # %atomicrmw.start -+; LA64D-NEXT: # Parent Loop BB34_1 Depth=1 -+; LA64D-NEXT: # => This Inner Loop Header: Depth=2 -+; LA64D-NEXT: ll.w $a3, $a0, 0 -+; LA64D-NEXT: bne $a3, $a2, .LBB34_5 -+; LA64D-NEXT: # %bb.4: # %atomicrmw.start -+; LA64D-NEXT: # in Loop: Header=BB34_3 Depth=2 -+; LA64D-NEXT: move $a4, $a1 -+; LA64D-NEXT: sc.w $a4, $a0, 0 -+; LA64D-NEXT: beqz $a4, .LBB34_3 -+; LA64D-NEXT: b .LBB34_6 -+; LA64D-NEXT: .LBB34_5: # %atomicrmw.start -+; LA64D-NEXT: # in Loop: Header=BB34_1 Depth=1 -+; LA64D-NEXT: dbar 1792 -+; LA64D-NEXT: .LBB34_6: # %atomicrmw.start -+; LA64D-NEXT: # in Loop: Header=BB34_1 Depth=1 -+; LA64D-NEXT: movgr2fr.w $fa0, $a3 -+; LA64D-NEXT: addi.w $a1, $a2, 0 -+; LA64D-NEXT: bne $a3, $a1, .LBB34_1 -+; LA64D-NEXT: # %bb.2: # %atomicrmw.end -+; LA64D-NEXT: ret -+ %v = atomicrmw fmin ptr %p, float 1.0 monotonic, align 4 -+ ret float %v -+} -+ -+define float @float_fmax_monotonic(ptr %p) nounwind { -+; LA64F-LABEL: float_fmax_monotonic: -+; LA64F: # %bb.0: -+; LA64F-NEXT: fld.s $fa0, $a0, 0 -+; LA64F-NEXT: addi.w $a1, $zero, 1 -+; LA64F-NEXT: movgr2fr.w $fa1, $a1 -+; LA64F-NEXT: ffint.s.w $fa1, $fa1 -+; LA64F-NEXT: .p2align 4, , 16 -+; LA64F-NEXT: .LBB35_1: # %atomicrmw.start -+; LA64F-NEXT: # =>This Loop Header: Depth=1 -+; LA64F-NEXT: # Child Loop BB35_3 Depth 2 -+; LA64F-NEXT: fmax.s $fa2, $fa0, $fa0 -+; LA64F-NEXT: fmax.s $fa2, $fa2, $fa1 -+; LA64F-NEXT: movfr2gr.s $a1, $fa2 -+; LA64F-NEXT: movfr2gr.s $a2, $fa0 -+; LA64F-NEXT: .LBB35_3: # %atomicrmw.start -+; LA64F-NEXT: # Parent Loop BB35_1 Depth=1 -+; LA64F-NEXT: # => This Inner Loop Header: Depth=2 -+; LA64F-NEXT: ll.w $a3, $a0, 0 -+; LA64F-NEXT: bne $a3, $a2, .LBB35_5 -+; LA64F-NEXT: # %bb.4: # %atomicrmw.start -+; LA64F-NEXT: # in Loop: Header=BB35_3 Depth=2 -+; LA64F-NEXT: move $a4, $a1 -+; LA64F-NEXT: sc.w $a4, $a0, 0 -+; LA64F-NEXT: beqz $a4, .LBB35_3 -+; LA64F-NEXT: b .LBB35_6 -+; LA64F-NEXT: .LBB35_5: # %atomicrmw.start -+; LA64F-NEXT: # in Loop: Header=BB35_1 Depth=1 -+; LA64F-NEXT: dbar 1792 -+; LA64F-NEXT: .LBB35_6: # %atomicrmw.start -+; LA64F-NEXT: # in Loop: Header=BB35_1 Depth=1 -+; LA64F-NEXT: movgr2fr.w $fa0, $a3 -+; LA64F-NEXT: addi.w $a1, $a2, 0 -+; LA64F-NEXT: bne $a3, $a1, .LBB35_1 -+; LA64F-NEXT: # %bb.2: # %atomicrmw.end -+; LA64F-NEXT: ret -+; -+; LA64D-LABEL: float_fmax_monotonic: -+; LA64D: # %bb.0: -+; LA64D-NEXT: fld.s $fa0, $a0, 0 -+; LA64D-NEXT: addi.w $a1, $zero, 1 -+; LA64D-NEXT: movgr2fr.w $fa1, $a1 -+; LA64D-NEXT: ffint.s.w $fa1, $fa1 -+; LA64D-NEXT: .p2align 4, , 16 -+; LA64D-NEXT: .LBB35_1: # %atomicrmw.start -+; LA64D-NEXT: # =>This Loop Header: Depth=1 -+; LA64D-NEXT: # Child Loop BB35_3 Depth 2 -+; LA64D-NEXT: fmax.s $fa2, $fa0, $fa0 -+; LA64D-NEXT: fmax.s $fa2, $fa2, $fa1 -+; LA64D-NEXT: movfr2gr.s $a1, $fa2 -+; LA64D-NEXT: movfr2gr.s $a2, $fa0 -+; LA64D-NEXT: .LBB35_3: # %atomicrmw.start -+; LA64D-NEXT: # Parent Loop BB35_1 Depth=1 -+; LA64D-NEXT: # => This Inner Loop Header: Depth=2 -+; LA64D-NEXT: ll.w $a3, $a0, 0 -+; LA64D-NEXT: bne $a3, $a2, .LBB35_5 -+; LA64D-NEXT: # %bb.4: # %atomicrmw.start -+; LA64D-NEXT: # in Loop: Header=BB35_3 Depth=2 -+; LA64D-NEXT: move $a4, $a1 -+; LA64D-NEXT: sc.w $a4, $a0, 0 -+; LA64D-NEXT: beqz $a4, .LBB35_3 -+; LA64D-NEXT: b .LBB35_6 -+; LA64D-NEXT: .LBB35_5: # %atomicrmw.start -+; LA64D-NEXT: # in Loop: Header=BB35_1 Depth=1 -+; LA64D-NEXT: dbar 1792 -+; LA64D-NEXT: .LBB35_6: # %atomicrmw.start -+; LA64D-NEXT: # in Loop: Header=BB35_1 Depth=1 -+; LA64D-NEXT: movgr2fr.w $fa0, $a3 -+; LA64D-NEXT: addi.w $a1, $a2, 0 -+; LA64D-NEXT: bne $a3, $a1, .LBB35_1 -+; LA64D-NEXT: # %bb.2: # %atomicrmw.end -+; LA64D-NEXT: ret -+ %v = atomicrmw fmax ptr %p, float 1.0 monotonic, align 4 -+ ret float %v -+} -+ -+define double @double_fadd_monotonic(ptr %p) nounwind { -+; LA64F-LABEL: double_fadd_monotonic: -+; LA64F: # %bb.0: -+; LA64F-NEXT: addi.d $sp, $sp, -64 -+; LA64F-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill -+; LA64F-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill -+; LA64F-NEXT: st.d $s0, $sp, 40 # 8-byte Folded Spill -+; LA64F-NEXT: st.d $s1, $sp, 32 # 8-byte Folded Spill -+; LA64F-NEXT: st.d $s2, $sp, 24 # 8-byte Folded Spill -+; LA64F-NEXT: st.d $s3, $sp, 16 # 8-byte Folded Spill -+; LA64F-NEXT: move $fp, $a0 -+; LA64F-NEXT: ld.d $a0, $a0, 0 -+; LA64F-NEXT: lu52i.d $s0, $zero, 1023 -+; LA64F-NEXT: ori $s1, $zero, 8 -+; LA64F-NEXT: addi.d $s2, $sp, 8 -+; LA64F-NEXT: addi.d $s3, $sp, 0 -+; LA64F-NEXT: .p2align 4, , 16 -+; LA64F-NEXT: .LBB36_1: # %atomicrmw.start -+; LA64F-NEXT: # =>This Inner Loop Header: Depth=1 -+; LA64F-NEXT: st.d $a0, $sp, 8 -+; LA64F-NEXT: move $a1, $s0 -+; LA64F-NEXT: bl %plt(__adddf3) -+; LA64F-NEXT: st.d $a0, $sp, 0 -+; LA64F-NEXT: move $a0, $s1 -+; LA64F-NEXT: move $a1, $fp -+; LA64F-NEXT: move $a2, $s2 -+; LA64F-NEXT: move $a3, $s3 -+; LA64F-NEXT: move $a4, $zero -+; LA64F-NEXT: move $a5, $zero -+; LA64F-NEXT: bl %plt(__atomic_compare_exchange) -+; LA64F-NEXT: move $a1, $a0 -+; LA64F-NEXT: ld.d $a0, $sp, 8 -+; LA64F-NEXT: beqz $a1, .LBB36_1 -+; LA64F-NEXT: # %bb.2: # %atomicrmw.end -+; LA64F-NEXT: ld.d $s3, $sp, 16 # 8-byte Folded Reload -+; LA64F-NEXT: ld.d $s2, $sp, 24 # 8-byte Folded Reload -+; LA64F-NEXT: ld.d $s1, $sp, 32 # 8-byte Folded Reload -+; LA64F-NEXT: ld.d $s0, $sp, 40 # 8-byte Folded Reload -+; LA64F-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload -+; LA64F-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload -+; LA64F-NEXT: addi.d $sp, $sp, 64 -+; LA64F-NEXT: ret -+; -+; LA64D-LABEL: double_fadd_monotonic: -+; LA64D: # %bb.0: -+; LA64D-NEXT: addi.d $sp, $sp, -64 -+; LA64D-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill -+; LA64D-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill -+; LA64D-NEXT: st.d $s0, $sp, 40 # 8-byte Folded Spill -+; LA64D-NEXT: st.d $s1, $sp, 32 # 8-byte Folded Spill -+; LA64D-NEXT: st.d $s2, $sp, 24 # 8-byte Folded Spill -+; LA64D-NEXT: fst.d $fs0, $sp, 16 # 8-byte Folded Spill -+; LA64D-NEXT: move $fp, $a0 -+; LA64D-NEXT: fld.d $fa0, $a0, 0 -+; LA64D-NEXT: addi.d $a0, $zero, 1 -+; LA64D-NEXT: movgr2fr.d $fa1, $a0 -+; LA64D-NEXT: ffint.d.l $fs0, $fa1 -+; LA64D-NEXT: ori $s0, $zero, 8 -+; LA64D-NEXT: addi.d $s1, $sp, 8 -+; LA64D-NEXT: addi.d $s2, $sp, 0 -+; LA64D-NEXT: .p2align 4, , 16 -+; LA64D-NEXT: .LBB36_1: # %atomicrmw.start -+; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 -+; LA64D-NEXT: fst.d $fa0, $sp, 8 -+; LA64D-NEXT: fadd.d $fa0, $fa0, $fs0 -+; LA64D-NEXT: fst.d $fa0, $sp, 0 -+; LA64D-NEXT: move $a0, $s0 -+; LA64D-NEXT: move $a1, $fp -+; LA64D-NEXT: move $a2, $s1 -+; LA64D-NEXT: move $a3, $s2 -+; LA64D-NEXT: move $a4, $zero -+; LA64D-NEXT: move $a5, $zero -+; LA64D-NEXT: bl %plt(__atomic_compare_exchange) -+; LA64D-NEXT: fld.d $fa0, $sp, 8 -+; LA64D-NEXT: beqz $a0, .LBB36_1 -+; LA64D-NEXT: # %bb.2: # %atomicrmw.end -+; LA64D-NEXT: fld.d $fs0, $sp, 16 # 8-byte Folded Reload -+; LA64D-NEXT: ld.d $s2, $sp, 24 # 8-byte Folded Reload -+; LA64D-NEXT: ld.d $s1, $sp, 32 # 8-byte Folded Reload -+; LA64D-NEXT: ld.d $s0, $sp, 40 # 8-byte Folded Reload -+; LA64D-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload -+; LA64D-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload -+; LA64D-NEXT: addi.d $sp, $sp, 64 -+; LA64D-NEXT: ret -+ %v = atomicrmw fadd ptr %p, double 1.0 monotonic, align 4 -+ ret double %v -+} -+ -+define double @double_fsub_monotonic(ptr %p) nounwind { -+; LA64F-LABEL: double_fsub_monotonic: -+; LA64F: # %bb.0: -+; LA64F-NEXT: addi.d $sp, $sp, -64 -+; LA64F-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill -+; LA64F-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill -+; LA64F-NEXT: st.d $s0, $sp, 40 # 8-byte Folded Spill -+; LA64F-NEXT: st.d $s1, $sp, 32 # 8-byte Folded Spill -+; LA64F-NEXT: st.d $s2, $sp, 24 # 8-byte Folded Spill -+; LA64F-NEXT: st.d $s3, $sp, 16 # 8-byte Folded Spill -+; LA64F-NEXT: move $fp, $a0 -+; LA64F-NEXT: ld.d $a0, $a0, 0 -+; LA64F-NEXT: lu52i.d $s0, $zero, -1025 -+; LA64F-NEXT: ori $s1, $zero, 8 -+; LA64F-NEXT: addi.d $s2, $sp, 8 -+; LA64F-NEXT: addi.d $s3, $sp, 0 -+; LA64F-NEXT: .p2align 4, , 16 -+; LA64F-NEXT: .LBB37_1: # %atomicrmw.start -+; LA64F-NEXT: # =>This Inner Loop Header: Depth=1 -+; LA64F-NEXT: st.d $a0, $sp, 8 -+; LA64F-NEXT: move $a1, $s0 -+; LA64F-NEXT: bl %plt(__adddf3) -+; LA64F-NEXT: st.d $a0, $sp, 0 -+; LA64F-NEXT: move $a0, $s1 -+; LA64F-NEXT: move $a1, $fp -+; LA64F-NEXT: move $a2, $s2 -+; LA64F-NEXT: move $a3, $s3 -+; LA64F-NEXT: move $a4, $zero -+; LA64F-NEXT: move $a5, $zero -+; LA64F-NEXT: bl %plt(__atomic_compare_exchange) -+; LA64F-NEXT: move $a1, $a0 -+; LA64F-NEXT: ld.d $a0, $sp, 8 -+; LA64F-NEXT: beqz $a1, .LBB37_1 -+; LA64F-NEXT: # %bb.2: # %atomicrmw.end -+; LA64F-NEXT: ld.d $s3, $sp, 16 # 8-byte Folded Reload -+; LA64F-NEXT: ld.d $s2, $sp, 24 # 8-byte Folded Reload -+; LA64F-NEXT: ld.d $s1, $sp, 32 # 8-byte Folded Reload -+; LA64F-NEXT: ld.d $s0, $sp, 40 # 8-byte Folded Reload -+; LA64F-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload -+; LA64F-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload -+; LA64F-NEXT: addi.d $sp, $sp, 64 -+; LA64F-NEXT: ret -+; -+; LA64D-LABEL: double_fsub_monotonic: -+; LA64D: # %bb.0: -+; LA64D-NEXT: addi.d $sp, $sp, -64 -+; LA64D-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill -+; LA64D-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill -+; LA64D-NEXT: st.d $s0, $sp, 40 # 8-byte Folded Spill -+; LA64D-NEXT: st.d $s1, $sp, 32 # 8-byte Folded Spill -+; LA64D-NEXT: st.d $s2, $sp, 24 # 8-byte Folded Spill -+; LA64D-NEXT: fst.d $fs0, $sp, 16 # 8-byte Folded Spill -+; LA64D-NEXT: move $fp, $a0 -+; LA64D-NEXT: fld.d $fa0, $a0, 0 -+; LA64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI37_0) -+; LA64D-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI37_0) -+; LA64D-NEXT: fld.d $fs0, $a0, 0 -+; LA64D-NEXT: ori $s0, $zero, 8 -+; LA64D-NEXT: addi.d $s1, $sp, 8 -+; LA64D-NEXT: addi.d $s2, $sp, 0 -+; LA64D-NEXT: .p2align 4, , 16 -+; LA64D-NEXT: .LBB37_1: # %atomicrmw.start -+; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 -+; LA64D-NEXT: fst.d $fa0, $sp, 8 -+; LA64D-NEXT: fadd.d $fa0, $fa0, $fs0 -+; LA64D-NEXT: fst.d $fa0, $sp, 0 -+; LA64D-NEXT: move $a0, $s0 -+; LA64D-NEXT: move $a1, $fp -+; LA64D-NEXT: move $a2, $s1 -+; LA64D-NEXT: move $a3, $s2 -+; LA64D-NEXT: move $a4, $zero -+; LA64D-NEXT: move $a5, $zero -+; LA64D-NEXT: bl %plt(__atomic_compare_exchange) -+; LA64D-NEXT: fld.d $fa0, $sp, 8 -+; LA64D-NEXT: beqz $a0, .LBB37_1 -+; LA64D-NEXT: # %bb.2: # %atomicrmw.end -+; LA64D-NEXT: fld.d $fs0, $sp, 16 # 8-byte Folded Reload -+; LA64D-NEXT: ld.d $s2, $sp, 24 # 8-byte Folded Reload -+; LA64D-NEXT: ld.d $s1, $sp, 32 # 8-byte Folded Reload -+; LA64D-NEXT: ld.d $s0, $sp, 40 # 8-byte Folded Reload -+; LA64D-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload -+; LA64D-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload -+; LA64D-NEXT: addi.d $sp, $sp, 64 -+; LA64D-NEXT: ret -+ %v = atomicrmw fsub ptr %p, double 1.0 monotonic, align 4 -+ ret double %v -+} -+ -+define double @double_fmin_monotonic(ptr %p) nounwind { -+; LA64F-LABEL: double_fmin_monotonic: -+; LA64F: # %bb.0: -+; LA64F-NEXT: addi.d $sp, $sp, -64 -+; LA64F-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill -+; LA64F-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill -+; LA64F-NEXT: st.d $s0, $sp, 40 # 8-byte Folded Spill -+; LA64F-NEXT: st.d $s1, $sp, 32 # 8-byte Folded Spill -+; LA64F-NEXT: st.d $s2, $sp, 24 # 8-byte Folded Spill -+; LA64F-NEXT: st.d $s3, $sp, 16 # 8-byte Folded Spill -+; LA64F-NEXT: move $fp, $a0 -+; LA64F-NEXT: ld.d $a0, $a0, 0 -+; LA64F-NEXT: lu52i.d $s0, $zero, 1023 -+; LA64F-NEXT: ori $s1, $zero, 8 -+; LA64F-NEXT: addi.d $s2, $sp, 8 -+; LA64F-NEXT: addi.d $s3, $sp, 0 -+; LA64F-NEXT: .p2align 4, , 16 -+; LA64F-NEXT: .LBB38_1: # %atomicrmw.start -+; LA64F-NEXT: # =>This Inner Loop Header: Depth=1 -+; LA64F-NEXT: st.d $a0, $sp, 8 -+; LA64F-NEXT: move $a1, $s0 -+; LA64F-NEXT: bl %plt(fmin) -+; LA64F-NEXT: st.d $a0, $sp, 0 -+; LA64F-NEXT: move $a0, $s1 -+; LA64F-NEXT: move $a1, $fp -+; LA64F-NEXT: move $a2, $s2 -+; LA64F-NEXT: move $a3, $s3 -+; LA64F-NEXT: move $a4, $zero -+; LA64F-NEXT: move $a5, $zero -+; LA64F-NEXT: bl %plt(__atomic_compare_exchange) -+; LA64F-NEXT: move $a1, $a0 -+; LA64F-NEXT: ld.d $a0, $sp, 8 -+; LA64F-NEXT: beqz $a1, .LBB38_1 -+; LA64F-NEXT: # %bb.2: # %atomicrmw.end -+; LA64F-NEXT: ld.d $s3, $sp, 16 # 8-byte Folded Reload -+; LA64F-NEXT: ld.d $s2, $sp, 24 # 8-byte Folded Reload -+; LA64F-NEXT: ld.d $s1, $sp, 32 # 8-byte Folded Reload -+; LA64F-NEXT: ld.d $s0, $sp, 40 # 8-byte Folded Reload -+; LA64F-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload -+; LA64F-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload -+; LA64F-NEXT: addi.d $sp, $sp, 64 -+; LA64F-NEXT: ret -+; -+; LA64D-LABEL: double_fmin_monotonic: -+; LA64D: # %bb.0: -+; LA64D-NEXT: addi.d $sp, $sp, -64 -+; LA64D-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill -+; LA64D-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill -+; LA64D-NEXT: st.d $s0, $sp, 40 # 8-byte Folded Spill -+; LA64D-NEXT: st.d $s1, $sp, 32 # 8-byte Folded Spill -+; LA64D-NEXT: st.d $s2, $sp, 24 # 8-byte Folded Spill -+; LA64D-NEXT: fst.d $fs0, $sp, 16 # 8-byte Folded Spill -+; LA64D-NEXT: move $fp, $a0 -+; LA64D-NEXT: fld.d $fa0, $a0, 0 -+; LA64D-NEXT: addi.d $a0, $zero, 1 -+; LA64D-NEXT: movgr2fr.d $fa1, $a0 -+; LA64D-NEXT: ffint.d.l $fs0, $fa1 -+; LA64D-NEXT: ori $s0, $zero, 8 -+; LA64D-NEXT: addi.d $s1, $sp, 8 -+; LA64D-NEXT: addi.d $s2, $sp, 0 -+; LA64D-NEXT: .p2align 4, , 16 -+; LA64D-NEXT: .LBB38_1: # %atomicrmw.start -+; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 -+; LA64D-NEXT: fst.d $fa0, $sp, 8 -+; LA64D-NEXT: fmax.d $fa0, $fa0, $fa0 -+; LA64D-NEXT: fmin.d $fa0, $fa0, $fs0 -+; LA64D-NEXT: fst.d $fa0, $sp, 0 -+; LA64D-NEXT: move $a0, $s0 -+; LA64D-NEXT: move $a1, $fp -+; LA64D-NEXT: move $a2, $s1 -+; LA64D-NEXT: move $a3, $s2 -+; LA64D-NEXT: move $a4, $zero -+; LA64D-NEXT: move $a5, $zero -+; LA64D-NEXT: bl %plt(__atomic_compare_exchange) -+; LA64D-NEXT: fld.d $fa0, $sp, 8 -+; LA64D-NEXT: beqz $a0, .LBB38_1 -+; LA64D-NEXT: # %bb.2: # %atomicrmw.end -+; LA64D-NEXT: fld.d $fs0, $sp, 16 # 8-byte Folded Reload -+; LA64D-NEXT: ld.d $s2, $sp, 24 # 8-byte Folded Reload -+; LA64D-NEXT: ld.d $s1, $sp, 32 # 8-byte Folded Reload -+; LA64D-NEXT: ld.d $s0, $sp, 40 # 8-byte Folded Reload -+; LA64D-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload -+; LA64D-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload -+; LA64D-NEXT: addi.d $sp, $sp, 64 -+; LA64D-NEXT: ret -+ %v = atomicrmw fmin ptr %p, double 1.0 monotonic, align 4 -+ ret double %v -+} -+ -+define double @double_fmax_monotonic(ptr %p) nounwind { -+; LA64F-LABEL: double_fmax_monotonic: -+; LA64F: # %bb.0: -+; LA64F-NEXT: addi.d $sp, $sp, -64 -+; LA64F-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill -+; LA64F-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill -+; LA64F-NEXT: st.d $s0, $sp, 40 # 8-byte Folded Spill -+; LA64F-NEXT: st.d $s1, $sp, 32 # 8-byte Folded Spill -+; LA64F-NEXT: st.d $s2, $sp, 24 # 8-byte Folded Spill -+; LA64F-NEXT: st.d $s3, $sp, 16 # 8-byte Folded Spill -+; LA64F-NEXT: move $fp, $a0 -+; LA64F-NEXT: ld.d $a0, $a0, 0 -+; LA64F-NEXT: lu52i.d $s0, $zero, 1023 -+; LA64F-NEXT: ori $s1, $zero, 8 -+; LA64F-NEXT: addi.d $s2, $sp, 8 -+; LA64F-NEXT: addi.d $s3, $sp, 0 -+; LA64F-NEXT: .p2align 4, , 16 -+; LA64F-NEXT: .LBB39_1: # %atomicrmw.start -+; LA64F-NEXT: # =>This Inner Loop Header: Depth=1 -+; LA64F-NEXT: st.d $a0, $sp, 8 -+; LA64F-NEXT: move $a1, $s0 -+; LA64F-NEXT: bl %plt(fmax) -+; LA64F-NEXT: st.d $a0, $sp, 0 -+; LA64F-NEXT: move $a0, $s1 -+; LA64F-NEXT: move $a1, $fp -+; LA64F-NEXT: move $a2, $s2 -+; LA64F-NEXT: move $a3, $s3 -+; LA64F-NEXT: move $a4, $zero -+; LA64F-NEXT: move $a5, $zero -+; LA64F-NEXT: bl %plt(__atomic_compare_exchange) -+; LA64F-NEXT: move $a1, $a0 -+; LA64F-NEXT: ld.d $a0, $sp, 8 -+; LA64F-NEXT: beqz $a1, .LBB39_1 -+; LA64F-NEXT: # %bb.2: # %atomicrmw.end -+; LA64F-NEXT: ld.d $s3, $sp, 16 # 8-byte Folded Reload -+; LA64F-NEXT: ld.d $s2, $sp, 24 # 8-byte Folded Reload -+; LA64F-NEXT: ld.d $s1, $sp, 32 # 8-byte Folded Reload -+; LA64F-NEXT: ld.d $s0, $sp, 40 # 8-byte Folded Reload -+; LA64F-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload -+; LA64F-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload -+; LA64F-NEXT: addi.d $sp, $sp, 64 -+; LA64F-NEXT: ret -+; -+; LA64D-LABEL: double_fmax_monotonic: -+; LA64D: # %bb.0: -+; LA64D-NEXT: addi.d $sp, $sp, -64 -+; LA64D-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill -+; LA64D-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill -+; LA64D-NEXT: st.d $s0, $sp, 40 # 8-byte Folded Spill -+; LA64D-NEXT: st.d $s1, $sp, 32 # 8-byte Folded Spill -+; LA64D-NEXT: st.d $s2, $sp, 24 # 8-byte Folded Spill -+; LA64D-NEXT: fst.d $fs0, $sp, 16 # 8-byte Folded Spill -+; LA64D-NEXT: move $fp, $a0 -+; LA64D-NEXT: fld.d $fa0, $a0, 0 -+; LA64D-NEXT: addi.d $a0, $zero, 1 -+; LA64D-NEXT: movgr2fr.d $fa1, $a0 -+; LA64D-NEXT: ffint.d.l $fs0, $fa1 -+; LA64D-NEXT: ori $s0, $zero, 8 -+; LA64D-NEXT: addi.d $s1, $sp, 8 -+; LA64D-NEXT: addi.d $s2, $sp, 0 -+; LA64D-NEXT: .p2align 4, , 16 -+; LA64D-NEXT: .LBB39_1: # %atomicrmw.start -+; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 -+; LA64D-NEXT: fst.d $fa0, $sp, 8 -+; LA64D-NEXT: fmax.d $fa0, $fa0, $fa0 -+; LA64D-NEXT: fmax.d $fa0, $fa0, $fs0 -+; LA64D-NEXT: fst.d $fa0, $sp, 0 -+; LA64D-NEXT: move $a0, $s0 -+; LA64D-NEXT: move $a1, $fp -+; LA64D-NEXT: move $a2, $s1 -+; LA64D-NEXT: move $a3, $s2 -+; LA64D-NEXT: move $a4, $zero -+; LA64D-NEXT: move $a5, $zero -+; LA64D-NEXT: bl %plt(__atomic_compare_exchange) -+; LA64D-NEXT: fld.d $fa0, $sp, 8 -+; LA64D-NEXT: beqz $a0, .LBB39_1 -+; LA64D-NEXT: # %bb.2: # %atomicrmw.end -+; LA64D-NEXT: fld.d $fs0, $sp, 16 # 8-byte Folded Reload -+; LA64D-NEXT: ld.d $s2, $sp, 24 # 8-byte Folded Reload -+; LA64D-NEXT: ld.d $s1, $sp, 32 # 8-byte Folded Reload -+; LA64D-NEXT: ld.d $s0, $sp, 40 # 8-byte Folded Reload -+; LA64D-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload -+; LA64D-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload -+; LA64D-NEXT: addi.d $sp, $sp, 64 -+; LA64D-NEXT: ret -+ %v = atomicrmw fmax ptr %p, double 1.0 monotonic, align 4 -+ ret double %v -+} -diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll -index 26ba77e8d4fd..770358a05bfd 100644 ---- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll -+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll -@@ -353,3 +353,1403 @@ define i64 @atomicrmw_min_i64_acquire(ptr %a, i64 %b) nounwind { - %1 = atomicrmw min ptr %a, i64 %b acquire - ret i64 %1 - } -+ -+define i8 @atomicrmw_umax_i8_release(ptr %a, i8 %b) nounwind { -+; LA64-LABEL: atomicrmw_umax_i8_release: -+; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a2, $zero, -4 -+; LA64-NEXT: and $a2, $a0, $a2 -+; LA64-NEXT: slli.d $a0, $a0, 3 -+; LA64-NEXT: ori $a3, $zero, 255 -+; LA64-NEXT: sll.w $a3, $a3, $a0 -+; LA64-NEXT: addi.w $a3, $a3, 0 -+; LA64-NEXT: andi $a1, $a1, 255 -+; LA64-NEXT: sll.w $a1, $a1, $a0 -+; LA64-NEXT: addi.w $a1, $a1, 0 -+; LA64-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.w $a4, $a2, 0 -+; LA64-NEXT: and $a6, $a4, $a3 -+; LA64-NEXT: move $a5, $a4 -+; LA64-NEXT: bgeu $a6, $a1, .LBB16_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB16_1 Depth=1 -+; LA64-NEXT: xor $a5, $a4, $a1 -+; LA64-NEXT: and $a5, $a5, $a3 -+; LA64-NEXT: xor $a5, $a4, $a5 -+; LA64-NEXT: .LBB16_3: # in Loop: Header=BB16_1 Depth=1 -+; LA64-NEXT: sc.w $a5, $a2, 0 -+; LA64-NEXT: beqz $a5, .LBB16_1 -+; LA64-NEXT: # %bb.4: -+; LA64-NEXT: srl.w $a0, $a4, $a0 -+; LA64-NEXT: ret -+ %1 = atomicrmw umax ptr %a, i8 %b release -+ ret i8 %1 -+} -+ -+define i16 @atomicrmw_umax_i16_release(ptr %a, i16 %b) nounwind { -+; LA64-LABEL: atomicrmw_umax_i16_release: -+; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a2, $zero, -4 -+; LA64-NEXT: and $a2, $a0, $a2 -+; LA64-NEXT: slli.d $a0, $a0, 3 -+; LA64-NEXT: lu12i.w $a3, 15 -+; LA64-NEXT: ori $a3, $a3, 4095 -+; LA64-NEXT: sll.w $a3, $a3, $a0 -+; LA64-NEXT: addi.w $a3, $a3, 0 -+; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 -+; LA64-NEXT: sll.w $a1, $a1, $a0 -+; LA64-NEXT: addi.w $a1, $a1, 0 -+; LA64-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.w $a4, $a2, 0 -+; LA64-NEXT: and $a6, $a4, $a3 -+; LA64-NEXT: move $a5, $a4 -+; LA64-NEXT: bgeu $a6, $a1, .LBB17_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB17_1 Depth=1 -+; LA64-NEXT: xor $a5, $a4, $a1 -+; LA64-NEXT: and $a5, $a5, $a3 -+; LA64-NEXT: xor $a5, $a4, $a5 -+; LA64-NEXT: .LBB17_3: # in Loop: Header=BB17_1 Depth=1 -+; LA64-NEXT: sc.w $a5, $a2, 0 -+; LA64-NEXT: beqz $a5, .LBB17_1 -+; LA64-NEXT: # %bb.4: -+; LA64-NEXT: srl.w $a0, $a4, $a0 -+; LA64-NEXT: ret -+ %1 = atomicrmw umax ptr %a, i16 %b release -+ ret i16 %1 -+} -+ -+define i32 @atomicrmw_umax_i32_release(ptr %a, i32 %b) nounwind { -+; LA64-LABEL: atomicrmw_umax_i32_release: -+; LA64: # %bb.0: -+; LA64-NEXT: ammax_db.wu $a2, $a1, $a0 -+; LA64-NEXT: move $a0, $a2 -+; LA64-NEXT: ret -+ %1 = atomicrmw umax ptr %a, i32 %b release -+ ret i32 %1 -+} -+ -+define i64 @atomicrmw_umax_i64_release(ptr %a, i64 %b) nounwind { -+; LA64-LABEL: atomicrmw_umax_i64_release: -+; LA64: # %bb.0: -+; LA64-NEXT: ammax_db.du $a2, $a1, $a0 -+; LA64-NEXT: move $a0, $a2 -+; LA64-NEXT: ret -+ %1 = atomicrmw umax ptr %a, i64 %b release -+ ret i64 %1 -+} -+ -+define i8 @atomicrmw_umin_i8_release(ptr %a, i8 %b) nounwind { -+; LA64-LABEL: atomicrmw_umin_i8_release: -+; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a2, $zero, -4 -+; LA64-NEXT: and $a2, $a0, $a2 -+; LA64-NEXT: slli.d $a0, $a0, 3 -+; LA64-NEXT: ori $a3, $zero, 255 -+; LA64-NEXT: sll.w $a3, $a3, $a0 -+; LA64-NEXT: addi.w $a3, $a3, 0 -+; LA64-NEXT: andi $a1, $a1, 255 -+; LA64-NEXT: sll.w $a1, $a1, $a0 -+; LA64-NEXT: addi.w $a1, $a1, 0 -+; LA64-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.w $a4, $a2, 0 -+; LA64-NEXT: and $a6, $a4, $a3 -+; LA64-NEXT: move $a5, $a4 -+; LA64-NEXT: bgeu $a1, $a6, .LBB20_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB20_1 Depth=1 -+; LA64-NEXT: xor $a5, $a4, $a1 -+; LA64-NEXT: and $a5, $a5, $a3 -+; LA64-NEXT: xor $a5, $a4, $a5 -+; LA64-NEXT: .LBB20_3: # in Loop: Header=BB20_1 Depth=1 -+; LA64-NEXT: sc.w $a5, $a2, 0 -+; LA64-NEXT: beqz $a5, .LBB20_1 -+; LA64-NEXT: # %bb.4: -+; LA64-NEXT: srl.w $a0, $a4, $a0 -+; LA64-NEXT: ret -+ %1 = atomicrmw umin ptr %a, i8 %b release -+ ret i8 %1 -+} -+ -+define i16 @atomicrmw_umin_i16_release(ptr %a, i16 %b) nounwind { -+; LA64-LABEL: atomicrmw_umin_i16_release: -+; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a2, $zero, -4 -+; LA64-NEXT: and $a2, $a0, $a2 -+; LA64-NEXT: slli.d $a0, $a0, 3 -+; LA64-NEXT: lu12i.w $a3, 15 -+; LA64-NEXT: ori $a3, $a3, 4095 -+; LA64-NEXT: sll.w $a3, $a3, $a0 -+; LA64-NEXT: addi.w $a3, $a3, 0 -+; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 -+; LA64-NEXT: sll.w $a1, $a1, $a0 -+; LA64-NEXT: addi.w $a1, $a1, 0 -+; LA64-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.w $a4, $a2, 0 -+; LA64-NEXT: and $a6, $a4, $a3 -+; LA64-NEXT: move $a5, $a4 -+; LA64-NEXT: bgeu $a1, $a6, .LBB21_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB21_1 Depth=1 -+; LA64-NEXT: xor $a5, $a4, $a1 -+; LA64-NEXT: and $a5, $a5, $a3 -+; LA64-NEXT: xor $a5, $a4, $a5 -+; LA64-NEXT: .LBB21_3: # in Loop: Header=BB21_1 Depth=1 -+; LA64-NEXT: sc.w $a5, $a2, 0 -+; LA64-NEXT: beqz $a5, .LBB21_1 -+; LA64-NEXT: # %bb.4: -+; LA64-NEXT: srl.w $a0, $a4, $a0 -+; LA64-NEXT: ret -+ %1 = atomicrmw umin ptr %a, i16 %b release -+ ret i16 %1 -+} -+ -+define i32 @atomicrmw_umin_i32_release(ptr %a, i32 %b) nounwind { -+; LA64-LABEL: atomicrmw_umin_i32_release: -+; LA64: # %bb.0: -+; LA64-NEXT: ammin_db.wu $a2, $a1, $a0 -+; LA64-NEXT: move $a0, $a2 -+; LA64-NEXT: ret -+ %1 = atomicrmw umin ptr %a, i32 %b release -+ ret i32 %1 -+} -+ -+define i64 @atomicrmw_umin_i64_release(ptr %a, i64 %b) nounwind { -+; LA64-LABEL: atomicrmw_umin_i64_release: -+; LA64: # %bb.0: -+; LA64-NEXT: ammin_db.du $a2, $a1, $a0 -+; LA64-NEXT: move $a0, $a2 -+; LA64-NEXT: ret -+ %1 = atomicrmw umin ptr %a, i64 %b release -+ ret i64 %1 -+} -+ -+define i8 @atomicrmw_max_i8_release(ptr %a, i8 %b) nounwind { -+; LA64-LABEL: atomicrmw_max_i8_release: -+; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a2, $zero, -4 -+; LA64-NEXT: and $a2, $a0, $a2 -+; LA64-NEXT: slli.d $a0, $a0, 3 -+; LA64-NEXT: ori $a3, $zero, 255 -+; LA64-NEXT: sll.w $a3, $a3, $a0 -+; LA64-NEXT: addi.w $a3, $a3, 0 -+; LA64-NEXT: ext.w.b $a1, $a1 -+; LA64-NEXT: sll.w $a1, $a1, $a0 -+; LA64-NEXT: addi.w $a1, $a1, 0 -+; LA64-NEXT: andi $a4, $a0, 24 -+; LA64-NEXT: xori $a4, $a4, 56 -+; LA64-NEXT: .LBB24_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.w $a5, $a2, 0 -+; LA64-NEXT: and $a7, $a5, $a3 -+; LA64-NEXT: move $a6, $a5 -+; LA64-NEXT: sll.w $a7, $a7, $a4 -+; LA64-NEXT: sra.w $a7, $a7, $a4 -+; LA64-NEXT: bge $a7, $a1, .LBB24_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB24_1 Depth=1 -+; LA64-NEXT: xor $a6, $a5, $a1 -+; LA64-NEXT: and $a6, $a6, $a3 -+; LA64-NEXT: xor $a6, $a5, $a6 -+; LA64-NEXT: .LBB24_3: # in Loop: Header=BB24_1 Depth=1 -+; LA64-NEXT: sc.w $a6, $a2, 0 -+; LA64-NEXT: beqz $a6, .LBB24_1 -+; LA64-NEXT: # %bb.4: -+; LA64-NEXT: srl.w $a0, $a5, $a0 -+; LA64-NEXT: ret -+ %1 = atomicrmw max ptr %a, i8 %b release -+ ret i8 %1 -+} -+ -+define i16 @atomicrmw_max_i16_release(ptr %a, i16 %b) nounwind { -+; LA64-LABEL: atomicrmw_max_i16_release: -+; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a2, $zero, -4 -+; LA64-NEXT: and $a2, $a0, $a2 -+; LA64-NEXT: slli.d $a0, $a0, 3 -+; LA64-NEXT: andi $a3, $a0, 24 -+; LA64-NEXT: ori $a4, $zero, 48 -+; LA64-NEXT: sub.d $a3, $a4, $a3 -+; LA64-NEXT: lu12i.w $a4, 15 -+; LA64-NEXT: ori $a4, $a4, 4095 -+; LA64-NEXT: sll.w $a4, $a4, $a0 -+; LA64-NEXT: addi.w $a4, $a4, 0 -+; LA64-NEXT: ext.w.h $a1, $a1 -+; LA64-NEXT: sll.w $a1, $a1, $a0 -+; LA64-NEXT: addi.w $a1, $a1, 0 -+; LA64-NEXT: .LBB25_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.w $a5, $a2, 0 -+; LA64-NEXT: and $a7, $a5, $a4 -+; LA64-NEXT: move $a6, $a5 -+; LA64-NEXT: sll.w $a7, $a7, $a3 -+; LA64-NEXT: sra.w $a7, $a7, $a3 -+; LA64-NEXT: bge $a7, $a1, .LBB25_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB25_1 Depth=1 -+; LA64-NEXT: xor $a6, $a5, $a1 -+; LA64-NEXT: and $a6, $a6, $a4 -+; LA64-NEXT: xor $a6, $a5, $a6 -+; LA64-NEXT: .LBB25_3: # in Loop: Header=BB25_1 Depth=1 -+; LA64-NEXT: sc.w $a6, $a2, 0 -+; LA64-NEXT: beqz $a6, .LBB25_1 -+; LA64-NEXT: # %bb.4: -+; LA64-NEXT: srl.w $a0, $a5, $a0 -+; LA64-NEXT: ret -+ %1 = atomicrmw max ptr %a, i16 %b release -+ ret i16 %1 -+} -+ -+define i32 @atomicrmw_max_i32_release(ptr %a, i32 %b) nounwind { -+; LA64-LABEL: atomicrmw_max_i32_release: -+; LA64: # %bb.0: -+; LA64-NEXT: ammax_db.w $a2, $a1, $a0 -+; LA64-NEXT: move $a0, $a2 -+; LA64-NEXT: ret -+ %1 = atomicrmw max ptr %a, i32 %b release -+ ret i32 %1 -+} -+ -+define i64 @atomicrmw_max_i64_release(ptr %a, i64 %b) nounwind { -+; LA64-LABEL: atomicrmw_max_i64_release: -+; LA64: # %bb.0: -+; LA64-NEXT: ammax_db.d $a2, $a1, $a0 -+; LA64-NEXT: move $a0, $a2 -+; LA64-NEXT: ret -+ %1 = atomicrmw max ptr %a, i64 %b release -+ ret i64 %1 -+} -+ -+define i8 @atomicrmw_min_i8_release(ptr %a, i8 %b) nounwind { -+; LA64-LABEL: atomicrmw_min_i8_release: -+; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a2, $zero, -4 -+; LA64-NEXT: and $a2, $a0, $a2 -+; LA64-NEXT: slli.d $a0, $a0, 3 -+; LA64-NEXT: ori $a3, $zero, 255 -+; LA64-NEXT: sll.w $a3, $a3, $a0 -+; LA64-NEXT: addi.w $a3, $a3, 0 -+; LA64-NEXT: ext.w.b $a1, $a1 -+; LA64-NEXT: sll.w $a1, $a1, $a0 -+; LA64-NEXT: addi.w $a1, $a1, 0 -+; LA64-NEXT: andi $a4, $a0, 24 -+; LA64-NEXT: xori $a4, $a4, 56 -+; LA64-NEXT: .LBB28_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.w $a5, $a2, 0 -+; LA64-NEXT: and $a7, $a5, $a3 -+; LA64-NEXT: move $a6, $a5 -+; LA64-NEXT: sll.w $a7, $a7, $a4 -+; LA64-NEXT: sra.w $a7, $a7, $a4 -+; LA64-NEXT: bge $a1, $a7, .LBB28_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB28_1 Depth=1 -+; LA64-NEXT: xor $a6, $a5, $a1 -+; LA64-NEXT: and $a6, $a6, $a3 -+; LA64-NEXT: xor $a6, $a5, $a6 -+; LA64-NEXT: .LBB28_3: # in Loop: Header=BB28_1 Depth=1 -+; LA64-NEXT: sc.w $a6, $a2, 0 -+; LA64-NEXT: beqz $a6, .LBB28_1 -+; LA64-NEXT: # %bb.4: -+; LA64-NEXT: srl.w $a0, $a5, $a0 -+; LA64-NEXT: ret -+ %1 = atomicrmw min ptr %a, i8 %b release -+ ret i8 %1 -+} -+ -+define i16 @atomicrmw_min_i16_release(ptr %a, i16 %b) nounwind { -+; LA64-LABEL: atomicrmw_min_i16_release: -+; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a2, $zero, -4 -+; LA64-NEXT: and $a2, $a0, $a2 -+; LA64-NEXT: slli.d $a0, $a0, 3 -+; LA64-NEXT: andi $a3, $a0, 24 -+; LA64-NEXT: ori $a4, $zero, 48 -+; LA64-NEXT: sub.d $a3, $a4, $a3 -+; LA64-NEXT: lu12i.w $a4, 15 -+; LA64-NEXT: ori $a4, $a4, 4095 -+; LA64-NEXT: sll.w $a4, $a4, $a0 -+; LA64-NEXT: addi.w $a4, $a4, 0 -+; LA64-NEXT: ext.w.h $a1, $a1 -+; LA64-NEXT: sll.w $a1, $a1, $a0 -+; LA64-NEXT: addi.w $a1, $a1, 0 -+; LA64-NEXT: .LBB29_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.w $a5, $a2, 0 -+; LA64-NEXT: and $a7, $a5, $a4 -+; LA64-NEXT: move $a6, $a5 -+; LA64-NEXT: sll.w $a7, $a7, $a3 -+; LA64-NEXT: sra.w $a7, $a7, $a3 -+; LA64-NEXT: bge $a1, $a7, .LBB29_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB29_1 Depth=1 -+; LA64-NEXT: xor $a6, $a5, $a1 -+; LA64-NEXT: and $a6, $a6, $a4 -+; LA64-NEXT: xor $a6, $a5, $a6 -+; LA64-NEXT: .LBB29_3: # in Loop: Header=BB29_1 Depth=1 -+; LA64-NEXT: sc.w $a6, $a2, 0 -+; LA64-NEXT: beqz $a6, .LBB29_1 -+; LA64-NEXT: # %bb.4: -+; LA64-NEXT: srl.w $a0, $a5, $a0 -+; LA64-NEXT: ret -+ %1 = atomicrmw min ptr %a, i16 %b release -+ ret i16 %1 -+} -+ -+define i32 @atomicrmw_min_i32_release(ptr %a, i32 %b) nounwind { -+; LA64-LABEL: atomicrmw_min_i32_release: -+; LA64: # %bb.0: -+; LA64-NEXT: ammin_db.w $a2, $a1, $a0 -+; LA64-NEXT: move $a0, $a2 -+; LA64-NEXT: ret -+ %1 = atomicrmw min ptr %a, i32 %b release -+ ret i32 %1 -+} -+ -+define i64 @atomicrmw_min_i64_release(ptr %a, i64 %b) nounwind { -+; LA64-LABEL: atomicrmw_min_i64_release: -+; LA64: # %bb.0: -+; LA64-NEXT: ammin_db.d $a2, $a1, $a0 -+; LA64-NEXT: move $a0, $a2 -+; LA64-NEXT: ret -+ %1 = atomicrmw min ptr %a, i64 %b release -+ ret i64 %1 -+} -+ -+define i8 @atomicrmw_umax_i8_acq_rel(ptr %a, i8 %b) nounwind { -+; LA64-LABEL: atomicrmw_umax_i8_acq_rel: -+; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a2, $zero, -4 -+; LA64-NEXT: and $a2, $a0, $a2 -+; LA64-NEXT: slli.d $a0, $a0, 3 -+; LA64-NEXT: ori $a3, $zero, 255 -+; LA64-NEXT: sll.w $a3, $a3, $a0 -+; LA64-NEXT: addi.w $a3, $a3, 0 -+; LA64-NEXT: andi $a1, $a1, 255 -+; LA64-NEXT: sll.w $a1, $a1, $a0 -+; LA64-NEXT: addi.w $a1, $a1, 0 -+; LA64-NEXT: .LBB32_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.w $a4, $a2, 0 -+; LA64-NEXT: and $a6, $a4, $a3 -+; LA64-NEXT: move $a5, $a4 -+; LA64-NEXT: bgeu $a6, $a1, .LBB32_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB32_1 Depth=1 -+; LA64-NEXT: xor $a5, $a4, $a1 -+; LA64-NEXT: and $a5, $a5, $a3 -+; LA64-NEXT: xor $a5, $a4, $a5 -+; LA64-NEXT: .LBB32_3: # in Loop: Header=BB32_1 Depth=1 -+; LA64-NEXT: sc.w $a5, $a2, 0 -+; LA64-NEXT: beqz $a5, .LBB32_1 -+; LA64-NEXT: # %bb.4: -+; LA64-NEXT: srl.w $a0, $a4, $a0 -+; LA64-NEXT: ret -+ %1 = atomicrmw umax ptr %a, i8 %b acq_rel -+ ret i8 %1 -+} -+ -+define i16 @atomicrmw_umax_i16_acq_rel(ptr %a, i16 %b) nounwind { -+; LA64-LABEL: atomicrmw_umax_i16_acq_rel: -+; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a2, $zero, -4 -+; LA64-NEXT: and $a2, $a0, $a2 -+; LA64-NEXT: slli.d $a0, $a0, 3 -+; LA64-NEXT: lu12i.w $a3, 15 -+; LA64-NEXT: ori $a3, $a3, 4095 -+; LA64-NEXT: sll.w $a3, $a3, $a0 -+; LA64-NEXT: addi.w $a3, $a3, 0 -+; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 -+; LA64-NEXT: sll.w $a1, $a1, $a0 -+; LA64-NEXT: addi.w $a1, $a1, 0 -+; LA64-NEXT: .LBB33_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.w $a4, $a2, 0 -+; LA64-NEXT: and $a6, $a4, $a3 -+; LA64-NEXT: move $a5, $a4 -+; LA64-NEXT: bgeu $a6, $a1, .LBB33_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB33_1 Depth=1 -+; LA64-NEXT: xor $a5, $a4, $a1 -+; LA64-NEXT: and $a5, $a5, $a3 -+; LA64-NEXT: xor $a5, $a4, $a5 -+; LA64-NEXT: .LBB33_3: # in Loop: Header=BB33_1 Depth=1 -+; LA64-NEXT: sc.w $a5, $a2, 0 -+; LA64-NEXT: beqz $a5, .LBB33_1 -+; LA64-NEXT: # %bb.4: -+; LA64-NEXT: srl.w $a0, $a4, $a0 -+; LA64-NEXT: ret -+ %1 = atomicrmw umax ptr %a, i16 %b acq_rel -+ ret i16 %1 -+} -+ -+define i32 @atomicrmw_umax_i32_acq_rel(ptr %a, i32 %b) nounwind { -+; LA64-LABEL: atomicrmw_umax_i32_acq_rel: -+; LA64: # %bb.0: -+; LA64-NEXT: ammax_db.wu $a2, $a1, $a0 -+; LA64-NEXT: move $a0, $a2 -+; LA64-NEXT: ret -+ %1 = atomicrmw umax ptr %a, i32 %b acq_rel -+ ret i32 %1 -+} -+ -+define i64 @atomicrmw_umax_i64_acq_rel(ptr %a, i64 %b) nounwind { -+; LA64-LABEL: atomicrmw_umax_i64_acq_rel: -+; LA64: # %bb.0: -+; LA64-NEXT: ammax_db.du $a2, $a1, $a0 -+; LA64-NEXT: move $a0, $a2 -+; LA64-NEXT: ret -+ %1 = atomicrmw umax ptr %a, i64 %b acq_rel -+ ret i64 %1 -+} -+ -+define i8 @atomicrmw_umin_i8_acq_rel(ptr %a, i8 %b) nounwind { -+; LA64-LABEL: atomicrmw_umin_i8_acq_rel: -+; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a2, $zero, -4 -+; LA64-NEXT: and $a2, $a0, $a2 -+; LA64-NEXT: slli.d $a0, $a0, 3 -+; LA64-NEXT: ori $a3, $zero, 255 -+; LA64-NEXT: sll.w $a3, $a3, $a0 -+; LA64-NEXT: addi.w $a3, $a3, 0 -+; LA64-NEXT: andi $a1, $a1, 255 -+; LA64-NEXT: sll.w $a1, $a1, $a0 -+; LA64-NEXT: addi.w $a1, $a1, 0 -+; LA64-NEXT: .LBB36_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.w $a4, $a2, 0 -+; LA64-NEXT: and $a6, $a4, $a3 -+; LA64-NEXT: move $a5, $a4 -+; LA64-NEXT: bgeu $a1, $a6, .LBB36_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB36_1 Depth=1 -+; LA64-NEXT: xor $a5, $a4, $a1 -+; LA64-NEXT: and $a5, $a5, $a3 -+; LA64-NEXT: xor $a5, $a4, $a5 -+; LA64-NEXT: .LBB36_3: # in Loop: Header=BB36_1 Depth=1 -+; LA64-NEXT: sc.w $a5, $a2, 0 -+; LA64-NEXT: beqz $a5, .LBB36_1 -+; LA64-NEXT: # %bb.4: -+; LA64-NEXT: srl.w $a0, $a4, $a0 -+; LA64-NEXT: ret -+ %1 = atomicrmw umin ptr %a, i8 %b acq_rel -+ ret i8 %1 -+} -+ -+define i16 @atomicrmw_umin_i16_acq_rel(ptr %a, i16 %b) nounwind { -+; LA64-LABEL: atomicrmw_umin_i16_acq_rel: -+; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a2, $zero, -4 -+; LA64-NEXT: and $a2, $a0, $a2 -+; LA64-NEXT: slli.d $a0, $a0, 3 -+; LA64-NEXT: lu12i.w $a3, 15 -+; LA64-NEXT: ori $a3, $a3, 4095 -+; LA64-NEXT: sll.w $a3, $a3, $a0 -+; LA64-NEXT: addi.w $a3, $a3, 0 -+; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 -+; LA64-NEXT: sll.w $a1, $a1, $a0 -+; LA64-NEXT: addi.w $a1, $a1, 0 -+; LA64-NEXT: .LBB37_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.w $a4, $a2, 0 -+; LA64-NEXT: and $a6, $a4, $a3 -+; LA64-NEXT: move $a5, $a4 -+; LA64-NEXT: bgeu $a1, $a6, .LBB37_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB37_1 Depth=1 -+; LA64-NEXT: xor $a5, $a4, $a1 -+; LA64-NEXT: and $a5, $a5, $a3 -+; LA64-NEXT: xor $a5, $a4, $a5 -+; LA64-NEXT: .LBB37_3: # in Loop: Header=BB37_1 Depth=1 -+; LA64-NEXT: sc.w $a5, $a2, 0 -+; LA64-NEXT: beqz $a5, .LBB37_1 -+; LA64-NEXT: # %bb.4: -+; LA64-NEXT: srl.w $a0, $a4, $a0 -+; LA64-NEXT: ret -+ %1 = atomicrmw umin ptr %a, i16 %b acq_rel -+ ret i16 %1 -+} -+ -+define i32 @atomicrmw_umin_i32_acq_rel(ptr %a, i32 %b) nounwind { -+; LA64-LABEL: atomicrmw_umin_i32_acq_rel: -+; LA64: # %bb.0: -+; LA64-NEXT: ammin_db.wu $a2, $a1, $a0 -+; LA64-NEXT: move $a0, $a2 -+; LA64-NEXT: ret -+ %1 = atomicrmw umin ptr %a, i32 %b acq_rel -+ ret i32 %1 -+} -+ -+define i64 @atomicrmw_umin_i64_acq_rel(ptr %a, i64 %b) nounwind { -+; LA64-LABEL: atomicrmw_umin_i64_acq_rel: -+; LA64: # %bb.0: -+; LA64-NEXT: ammin_db.du $a2, $a1, $a0 -+; LA64-NEXT: move $a0, $a2 -+; LA64-NEXT: ret -+ %1 = atomicrmw umin ptr %a, i64 %b acq_rel -+ ret i64 %1 -+} -+ -+define i8 @atomicrmw_max_i8_acq_rel(ptr %a, i8 %b) nounwind { -+; LA64-LABEL: atomicrmw_max_i8_acq_rel: -+; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a2, $zero, -4 -+; LA64-NEXT: and $a2, $a0, $a2 -+; LA64-NEXT: slli.d $a0, $a0, 3 -+; LA64-NEXT: ori $a3, $zero, 255 -+; LA64-NEXT: sll.w $a3, $a3, $a0 -+; LA64-NEXT: addi.w $a3, $a3, 0 -+; LA64-NEXT: ext.w.b $a1, $a1 -+; LA64-NEXT: sll.w $a1, $a1, $a0 -+; LA64-NEXT: addi.w $a1, $a1, 0 -+; LA64-NEXT: andi $a4, $a0, 24 -+; LA64-NEXT: xori $a4, $a4, 56 -+; LA64-NEXT: .LBB40_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.w $a5, $a2, 0 -+; LA64-NEXT: and $a7, $a5, $a3 -+; LA64-NEXT: move $a6, $a5 -+; LA64-NEXT: sll.w $a7, $a7, $a4 -+; LA64-NEXT: sra.w $a7, $a7, $a4 -+; LA64-NEXT: bge $a7, $a1, .LBB40_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB40_1 Depth=1 -+; LA64-NEXT: xor $a6, $a5, $a1 -+; LA64-NEXT: and $a6, $a6, $a3 -+; LA64-NEXT: xor $a6, $a5, $a6 -+; LA64-NEXT: .LBB40_3: # in Loop: Header=BB40_1 Depth=1 -+; LA64-NEXT: sc.w $a6, $a2, 0 -+; LA64-NEXT: beqz $a6, .LBB40_1 -+; LA64-NEXT: # %bb.4: -+; LA64-NEXT: srl.w $a0, $a5, $a0 -+; LA64-NEXT: ret -+ %1 = atomicrmw max ptr %a, i8 %b acq_rel -+ ret i8 %1 -+} -+ -+define i16 @atomicrmw_max_i16_acq_rel(ptr %a, i16 %b) nounwind { -+; LA64-LABEL: atomicrmw_max_i16_acq_rel: -+; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a2, $zero, -4 -+; LA64-NEXT: and $a2, $a0, $a2 -+; LA64-NEXT: slli.d $a0, $a0, 3 -+; LA64-NEXT: andi $a3, $a0, 24 -+; LA64-NEXT: ori $a4, $zero, 48 -+; LA64-NEXT: sub.d $a3, $a4, $a3 -+; LA64-NEXT: lu12i.w $a4, 15 -+; LA64-NEXT: ori $a4, $a4, 4095 -+; LA64-NEXT: sll.w $a4, $a4, $a0 -+; LA64-NEXT: addi.w $a4, $a4, 0 -+; LA64-NEXT: ext.w.h $a1, $a1 -+; LA64-NEXT: sll.w $a1, $a1, $a0 -+; LA64-NEXT: addi.w $a1, $a1, 0 -+; LA64-NEXT: .LBB41_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.w $a5, $a2, 0 -+; LA64-NEXT: and $a7, $a5, $a4 -+; LA64-NEXT: move $a6, $a5 -+; LA64-NEXT: sll.w $a7, $a7, $a3 -+; LA64-NEXT: sra.w $a7, $a7, $a3 -+; LA64-NEXT: bge $a7, $a1, .LBB41_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB41_1 Depth=1 -+; LA64-NEXT: xor $a6, $a5, $a1 -+; LA64-NEXT: and $a6, $a6, $a4 -+; LA64-NEXT: xor $a6, $a5, $a6 -+; LA64-NEXT: .LBB41_3: # in Loop: Header=BB41_1 Depth=1 -+; LA64-NEXT: sc.w $a6, $a2, 0 -+; LA64-NEXT: beqz $a6, .LBB41_1 -+; LA64-NEXT: # %bb.4: -+; LA64-NEXT: srl.w $a0, $a5, $a0 -+; LA64-NEXT: ret -+ %1 = atomicrmw max ptr %a, i16 %b acq_rel -+ ret i16 %1 -+} -+ -+define i32 @atomicrmw_max_i32_acq_rel(ptr %a, i32 %b) nounwind { -+; LA64-LABEL: atomicrmw_max_i32_acq_rel: -+; LA64: # %bb.0: -+; LA64-NEXT: ammax_db.w $a2, $a1, $a0 -+; LA64-NEXT: move $a0, $a2 -+; LA64-NEXT: ret -+ %1 = atomicrmw max ptr %a, i32 %b acq_rel -+ ret i32 %1 -+} -+ -+define i64 @atomicrmw_max_i64_acq_rel(ptr %a, i64 %b) nounwind { -+; LA64-LABEL: atomicrmw_max_i64_acq_rel: -+; LA64: # %bb.0: -+; LA64-NEXT: ammax_db.d $a2, $a1, $a0 -+; LA64-NEXT: move $a0, $a2 -+; LA64-NEXT: ret -+ %1 = atomicrmw max ptr %a, i64 %b acq_rel -+ ret i64 %1 -+} -+ -+define i8 @atomicrmw_min_i8_acq_rel(ptr %a, i8 %b) nounwind { -+; LA64-LABEL: atomicrmw_min_i8_acq_rel: -+; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a2, $zero, -4 -+; LA64-NEXT: and $a2, $a0, $a2 -+; LA64-NEXT: slli.d $a0, $a0, 3 -+; LA64-NEXT: ori $a3, $zero, 255 -+; LA64-NEXT: sll.w $a3, $a3, $a0 -+; LA64-NEXT: addi.w $a3, $a3, 0 -+; LA64-NEXT: ext.w.b $a1, $a1 -+; LA64-NEXT: sll.w $a1, $a1, $a0 -+; LA64-NEXT: addi.w $a1, $a1, 0 -+; LA64-NEXT: andi $a4, $a0, 24 -+; LA64-NEXT: xori $a4, $a4, 56 -+; LA64-NEXT: .LBB44_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.w $a5, $a2, 0 -+; LA64-NEXT: and $a7, $a5, $a3 -+; LA64-NEXT: move $a6, $a5 -+; LA64-NEXT: sll.w $a7, $a7, $a4 -+; LA64-NEXT: sra.w $a7, $a7, $a4 -+; LA64-NEXT: bge $a1, $a7, .LBB44_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB44_1 Depth=1 -+; LA64-NEXT: xor $a6, $a5, $a1 -+; LA64-NEXT: and $a6, $a6, $a3 -+; LA64-NEXT: xor $a6, $a5, $a6 -+; LA64-NEXT: .LBB44_3: # in Loop: Header=BB44_1 Depth=1 -+; LA64-NEXT: sc.w $a6, $a2, 0 -+; LA64-NEXT: beqz $a6, .LBB44_1 -+; LA64-NEXT: # %bb.4: -+; LA64-NEXT: srl.w $a0, $a5, $a0 -+; LA64-NEXT: ret -+ %1 = atomicrmw min ptr %a, i8 %b acq_rel -+ ret i8 %1 -+} -+ -+define i16 @atomicrmw_min_i16_acq_rel(ptr %a, i16 %b) nounwind { -+; LA64-LABEL: atomicrmw_min_i16_acq_rel: -+; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a2, $zero, -4 -+; LA64-NEXT: and $a2, $a0, $a2 -+; LA64-NEXT: slli.d $a0, $a0, 3 -+; LA64-NEXT: andi $a3, $a0, 24 -+; LA64-NEXT: ori $a4, $zero, 48 -+; LA64-NEXT: sub.d $a3, $a4, $a3 -+; LA64-NEXT: lu12i.w $a4, 15 -+; LA64-NEXT: ori $a4, $a4, 4095 -+; LA64-NEXT: sll.w $a4, $a4, $a0 -+; LA64-NEXT: addi.w $a4, $a4, 0 -+; LA64-NEXT: ext.w.h $a1, $a1 -+; LA64-NEXT: sll.w $a1, $a1, $a0 -+; LA64-NEXT: addi.w $a1, $a1, 0 -+; LA64-NEXT: .LBB45_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.w $a5, $a2, 0 -+; LA64-NEXT: and $a7, $a5, $a4 -+; LA64-NEXT: move $a6, $a5 -+; LA64-NEXT: sll.w $a7, $a7, $a3 -+; LA64-NEXT: sra.w $a7, $a7, $a3 -+; LA64-NEXT: bge $a1, $a7, .LBB45_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB45_1 Depth=1 -+; LA64-NEXT: xor $a6, $a5, $a1 -+; LA64-NEXT: and $a6, $a6, $a4 -+; LA64-NEXT: xor $a6, $a5, $a6 -+; LA64-NEXT: .LBB45_3: # in Loop: Header=BB45_1 Depth=1 -+; LA64-NEXT: sc.w $a6, $a2, 0 -+; LA64-NEXT: beqz $a6, .LBB45_1 -+; LA64-NEXT: # %bb.4: -+; LA64-NEXT: srl.w $a0, $a5, $a0 -+; LA64-NEXT: ret -+ %1 = atomicrmw min ptr %a, i16 %b acq_rel -+ ret i16 %1 -+} -+ -+define i32 @atomicrmw_min_i32_acq_rel(ptr %a, i32 %b) nounwind { -+; LA64-LABEL: atomicrmw_min_i32_acq_rel: -+; LA64: # %bb.0: -+; LA64-NEXT: ammin_db.w $a2, $a1, $a0 -+; LA64-NEXT: move $a0, $a2 -+; LA64-NEXT: ret -+ %1 = atomicrmw min ptr %a, i32 %b acq_rel -+ ret i32 %1 -+} -+ -+define i64 @atomicrmw_min_i64_acq_rel(ptr %a, i64 %b) nounwind { -+; LA64-LABEL: atomicrmw_min_i64_acq_rel: -+; LA64: # %bb.0: -+; LA64-NEXT: ammin_db.d $a2, $a1, $a0 -+; LA64-NEXT: move $a0, $a2 -+; LA64-NEXT: ret -+ %1 = atomicrmw min ptr %a, i64 %b acq_rel -+ ret i64 %1 -+} -+ -+define i8 @atomicrmw_umax_i8_seq_cst(ptr %a, i8 %b) nounwind { -+; LA64-LABEL: atomicrmw_umax_i8_seq_cst: -+; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a2, $zero, -4 -+; LA64-NEXT: and $a2, $a0, $a2 -+; LA64-NEXT: slli.d $a0, $a0, 3 -+; LA64-NEXT: ori $a3, $zero, 255 -+; LA64-NEXT: sll.w $a3, $a3, $a0 -+; LA64-NEXT: addi.w $a3, $a3, 0 -+; LA64-NEXT: andi $a1, $a1, 255 -+; LA64-NEXT: sll.w $a1, $a1, $a0 -+; LA64-NEXT: addi.w $a1, $a1, 0 -+; LA64-NEXT: .LBB48_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.w $a4, $a2, 0 -+; LA64-NEXT: and $a6, $a4, $a3 -+; LA64-NEXT: move $a5, $a4 -+; LA64-NEXT: bgeu $a6, $a1, .LBB48_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB48_1 Depth=1 -+; LA64-NEXT: xor $a5, $a4, $a1 -+; LA64-NEXT: and $a5, $a5, $a3 -+; LA64-NEXT: xor $a5, $a4, $a5 -+; LA64-NEXT: .LBB48_3: # in Loop: Header=BB48_1 Depth=1 -+; LA64-NEXT: sc.w $a5, $a2, 0 -+; LA64-NEXT: beqz $a5, .LBB48_1 -+; LA64-NEXT: # %bb.4: -+; LA64-NEXT: srl.w $a0, $a4, $a0 -+; LA64-NEXT: ret -+ %1 = atomicrmw umax ptr %a, i8 %b seq_cst -+ ret i8 %1 -+} -+ -+define i16 @atomicrmw_umax_i16_seq_cst(ptr %a, i16 %b) nounwind { -+; LA64-LABEL: atomicrmw_umax_i16_seq_cst: -+; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a2, $zero, -4 -+; LA64-NEXT: and $a2, $a0, $a2 -+; LA64-NEXT: slli.d $a0, $a0, 3 -+; LA64-NEXT: lu12i.w $a3, 15 -+; LA64-NEXT: ori $a3, $a3, 4095 -+; LA64-NEXT: sll.w $a3, $a3, $a0 -+; LA64-NEXT: addi.w $a3, $a3, 0 -+; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 -+; LA64-NEXT: sll.w $a1, $a1, $a0 -+; LA64-NEXT: addi.w $a1, $a1, 0 -+; LA64-NEXT: .LBB49_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.w $a4, $a2, 0 -+; LA64-NEXT: and $a6, $a4, $a3 -+; LA64-NEXT: move $a5, $a4 -+; LA64-NEXT: bgeu $a6, $a1, .LBB49_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB49_1 Depth=1 -+; LA64-NEXT: xor $a5, $a4, $a1 -+; LA64-NEXT: and $a5, $a5, $a3 -+; LA64-NEXT: xor $a5, $a4, $a5 -+; LA64-NEXT: .LBB49_3: # in Loop: Header=BB49_1 Depth=1 -+; LA64-NEXT: sc.w $a5, $a2, 0 -+; LA64-NEXT: beqz $a5, .LBB49_1 -+; LA64-NEXT: # %bb.4: -+; LA64-NEXT: srl.w $a0, $a4, $a0 -+; LA64-NEXT: ret -+ %1 = atomicrmw umax ptr %a, i16 %b seq_cst -+ ret i16 %1 -+} -+ -+define i32 @atomicrmw_umax_i32_seq_cst(ptr %a, i32 %b) nounwind { -+; LA64-LABEL: atomicrmw_umax_i32_seq_cst: -+; LA64: # %bb.0: -+; LA64-NEXT: ammax_db.wu $a2, $a1, $a0 -+; LA64-NEXT: move $a0, $a2 -+; LA64-NEXT: ret -+ %1 = atomicrmw umax ptr %a, i32 %b seq_cst -+ ret i32 %1 -+} -+ -+define i64 @atomicrmw_umax_i64_seq_cst(ptr %a, i64 %b) nounwind { -+; LA64-LABEL: atomicrmw_umax_i64_seq_cst: -+; LA64: # %bb.0: -+; LA64-NEXT: ammax_db.du $a2, $a1, $a0 -+; LA64-NEXT: move $a0, $a2 -+; LA64-NEXT: ret -+ %1 = atomicrmw umax ptr %a, i64 %b seq_cst -+ ret i64 %1 -+} -+ -+define i8 @atomicrmw_umin_i8_seq_cst(ptr %a, i8 %b) nounwind { -+; LA64-LABEL: atomicrmw_umin_i8_seq_cst: -+; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a2, $zero, -4 -+; LA64-NEXT: and $a2, $a0, $a2 -+; LA64-NEXT: slli.d $a0, $a0, 3 -+; LA64-NEXT: ori $a3, $zero, 255 -+; LA64-NEXT: sll.w $a3, $a3, $a0 -+; LA64-NEXT: addi.w $a3, $a3, 0 -+; LA64-NEXT: andi $a1, $a1, 255 -+; LA64-NEXT: sll.w $a1, $a1, $a0 -+; LA64-NEXT: addi.w $a1, $a1, 0 -+; LA64-NEXT: .LBB52_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.w $a4, $a2, 0 -+; LA64-NEXT: and $a6, $a4, $a3 -+; LA64-NEXT: move $a5, $a4 -+; LA64-NEXT: bgeu $a1, $a6, .LBB52_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB52_1 Depth=1 -+; LA64-NEXT: xor $a5, $a4, $a1 -+; LA64-NEXT: and $a5, $a5, $a3 -+; LA64-NEXT: xor $a5, $a4, $a5 -+; LA64-NEXT: .LBB52_3: # in Loop: Header=BB52_1 Depth=1 -+; LA64-NEXT: sc.w $a5, $a2, 0 -+; LA64-NEXT: beqz $a5, .LBB52_1 -+; LA64-NEXT: # %bb.4: -+; LA64-NEXT: srl.w $a0, $a4, $a0 -+; LA64-NEXT: ret -+ %1 = atomicrmw umin ptr %a, i8 %b seq_cst -+ ret i8 %1 -+} -+ -+define i16 @atomicrmw_umin_i16_seq_cst(ptr %a, i16 %b) nounwind { -+; LA64-LABEL: atomicrmw_umin_i16_seq_cst: -+; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a2, $zero, -4 -+; LA64-NEXT: and $a2, $a0, $a2 -+; LA64-NEXT: slli.d $a0, $a0, 3 -+; LA64-NEXT: lu12i.w $a3, 15 -+; LA64-NEXT: ori $a3, $a3, 4095 -+; LA64-NEXT: sll.w $a3, $a3, $a0 -+; LA64-NEXT: addi.w $a3, $a3, 0 -+; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 -+; LA64-NEXT: sll.w $a1, $a1, $a0 -+; LA64-NEXT: addi.w $a1, $a1, 0 -+; LA64-NEXT: .LBB53_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.w $a4, $a2, 0 -+; LA64-NEXT: and $a6, $a4, $a3 -+; LA64-NEXT: move $a5, $a4 -+; LA64-NEXT: bgeu $a1, $a6, .LBB53_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB53_1 Depth=1 -+; LA64-NEXT: xor $a5, $a4, $a1 -+; LA64-NEXT: and $a5, $a5, $a3 -+; LA64-NEXT: xor $a5, $a4, $a5 -+; LA64-NEXT: .LBB53_3: # in Loop: Header=BB53_1 Depth=1 -+; LA64-NEXT: sc.w $a5, $a2, 0 -+; LA64-NEXT: beqz $a5, .LBB53_1 -+; LA64-NEXT: # %bb.4: -+; LA64-NEXT: srl.w $a0, $a4, $a0 -+; LA64-NEXT: ret -+ %1 = atomicrmw umin ptr %a, i16 %b seq_cst -+ ret i16 %1 -+} -+ -+define i32 @atomicrmw_umin_i32_seq_cst(ptr %a, i32 %b) nounwind { -+; LA64-LABEL: atomicrmw_umin_i32_seq_cst: -+; LA64: # %bb.0: -+; LA64-NEXT: ammin_db.wu $a2, $a1, $a0 -+; LA64-NEXT: move $a0, $a2 -+; LA64-NEXT: ret -+ %1 = atomicrmw umin ptr %a, i32 %b seq_cst -+ ret i32 %1 -+} -+ -+define i64 @atomicrmw_umin_i64_seq_cst(ptr %a, i64 %b) nounwind { -+; LA64-LABEL: atomicrmw_umin_i64_seq_cst: -+; LA64: # %bb.0: -+; LA64-NEXT: ammin_db.du $a2, $a1, $a0 -+; LA64-NEXT: move $a0, $a2 -+; LA64-NEXT: ret -+ %1 = atomicrmw umin ptr %a, i64 %b seq_cst -+ ret i64 %1 -+} -+ -+define i8 @atomicrmw_max_i8_seq_cst(ptr %a, i8 %b) nounwind { -+; LA64-LABEL: atomicrmw_max_i8_seq_cst: -+; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a2, $zero, -4 -+; LA64-NEXT: and $a2, $a0, $a2 -+; LA64-NEXT: slli.d $a0, $a0, 3 -+; LA64-NEXT: ori $a3, $zero, 255 -+; LA64-NEXT: sll.w $a3, $a3, $a0 -+; LA64-NEXT: addi.w $a3, $a3, 0 -+; LA64-NEXT: ext.w.b $a1, $a1 -+; LA64-NEXT: sll.w $a1, $a1, $a0 -+; LA64-NEXT: addi.w $a1, $a1, 0 -+; LA64-NEXT: andi $a4, $a0, 24 -+; LA64-NEXT: xori $a4, $a4, 56 -+; LA64-NEXT: .LBB56_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.w $a5, $a2, 0 -+; LA64-NEXT: and $a7, $a5, $a3 -+; LA64-NEXT: move $a6, $a5 -+; LA64-NEXT: sll.w $a7, $a7, $a4 -+; LA64-NEXT: sra.w $a7, $a7, $a4 -+; LA64-NEXT: bge $a7, $a1, .LBB56_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB56_1 Depth=1 -+; LA64-NEXT: xor $a6, $a5, $a1 -+; LA64-NEXT: and $a6, $a6, $a3 -+; LA64-NEXT: xor $a6, $a5, $a6 -+; LA64-NEXT: .LBB56_3: # in Loop: Header=BB56_1 Depth=1 -+; LA64-NEXT: sc.w $a6, $a2, 0 -+; LA64-NEXT: beqz $a6, .LBB56_1 -+; LA64-NEXT: # %bb.4: -+; LA64-NEXT: srl.w $a0, $a5, $a0 -+; LA64-NEXT: ret -+ %1 = atomicrmw max ptr %a, i8 %b seq_cst -+ ret i8 %1 -+} -+ -+define i16 @atomicrmw_max_i16_seq_cst(ptr %a, i16 %b) nounwind { -+; LA64-LABEL: atomicrmw_max_i16_seq_cst: -+; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a2, $zero, -4 -+; LA64-NEXT: and $a2, $a0, $a2 -+; LA64-NEXT: slli.d $a0, $a0, 3 -+; LA64-NEXT: andi $a3, $a0, 24 -+; LA64-NEXT: ori $a4, $zero, 48 -+; LA64-NEXT: sub.d $a3, $a4, $a3 -+; LA64-NEXT: lu12i.w $a4, 15 -+; LA64-NEXT: ori $a4, $a4, 4095 -+; LA64-NEXT: sll.w $a4, $a4, $a0 -+; LA64-NEXT: addi.w $a4, $a4, 0 -+; LA64-NEXT: ext.w.h $a1, $a1 -+; LA64-NEXT: sll.w $a1, $a1, $a0 -+; LA64-NEXT: addi.w $a1, $a1, 0 -+; LA64-NEXT: .LBB57_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.w $a5, $a2, 0 -+; LA64-NEXT: and $a7, $a5, $a4 -+; LA64-NEXT: move $a6, $a5 -+; LA64-NEXT: sll.w $a7, $a7, $a3 -+; LA64-NEXT: sra.w $a7, $a7, $a3 -+; LA64-NEXT: bge $a7, $a1, .LBB57_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB57_1 Depth=1 -+; LA64-NEXT: xor $a6, $a5, $a1 -+; LA64-NEXT: and $a6, $a6, $a4 -+; LA64-NEXT: xor $a6, $a5, $a6 -+; LA64-NEXT: .LBB57_3: # in Loop: Header=BB57_1 Depth=1 -+; LA64-NEXT: sc.w $a6, $a2, 0 -+; LA64-NEXT: beqz $a6, .LBB57_1 -+; LA64-NEXT: # %bb.4: -+; LA64-NEXT: srl.w $a0, $a5, $a0 -+; LA64-NEXT: ret -+ %1 = atomicrmw max ptr %a, i16 %b seq_cst -+ ret i16 %1 -+} -+ -+define i32 @atomicrmw_max_i32_seq_cst(ptr %a, i32 %b) nounwind { -+; LA64-LABEL: atomicrmw_max_i32_seq_cst: -+; LA64: # %bb.0: -+; LA64-NEXT: ammax_db.w $a2, $a1, $a0 -+; LA64-NEXT: move $a0, $a2 -+; LA64-NEXT: ret -+ %1 = atomicrmw max ptr %a, i32 %b seq_cst -+ ret i32 %1 -+} -+ -+define i64 @atomicrmw_max_i64_seq_cst(ptr %a, i64 %b) nounwind { -+; LA64-LABEL: atomicrmw_max_i64_seq_cst: -+; LA64: # %bb.0: -+; LA64-NEXT: ammax_db.d $a2, $a1, $a0 -+; LA64-NEXT: move $a0, $a2 -+; LA64-NEXT: ret -+ %1 = atomicrmw max ptr %a, i64 %b seq_cst -+ ret i64 %1 -+} -+ -+define i8 @atomicrmw_min_i8_seq_cst(ptr %a, i8 %b) nounwind { -+; LA64-LABEL: atomicrmw_min_i8_seq_cst: -+; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a2, $zero, -4 -+; LA64-NEXT: and $a2, $a0, $a2 -+; LA64-NEXT: slli.d $a0, $a0, 3 -+; LA64-NEXT: ori $a3, $zero, 255 -+; LA64-NEXT: sll.w $a3, $a3, $a0 -+; LA64-NEXT: addi.w $a3, $a3, 0 -+; LA64-NEXT: ext.w.b $a1, $a1 -+; LA64-NEXT: sll.w $a1, $a1, $a0 -+; LA64-NEXT: addi.w $a1, $a1, 0 -+; LA64-NEXT: andi $a4, $a0, 24 -+; LA64-NEXT: xori $a4, $a4, 56 -+; LA64-NEXT: .LBB60_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.w $a5, $a2, 0 -+; LA64-NEXT: and $a7, $a5, $a3 -+; LA64-NEXT: move $a6, $a5 -+; LA64-NEXT: sll.w $a7, $a7, $a4 -+; LA64-NEXT: sra.w $a7, $a7, $a4 -+; LA64-NEXT: bge $a1, $a7, .LBB60_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB60_1 Depth=1 -+; LA64-NEXT: xor $a6, $a5, $a1 -+; LA64-NEXT: and $a6, $a6, $a3 -+; LA64-NEXT: xor $a6, $a5, $a6 -+; LA64-NEXT: .LBB60_3: # in Loop: Header=BB60_1 Depth=1 -+; LA64-NEXT: sc.w $a6, $a2, 0 -+; LA64-NEXT: beqz $a6, .LBB60_1 -+; LA64-NEXT: # %bb.4: -+; LA64-NEXT: srl.w $a0, $a5, $a0 -+; LA64-NEXT: ret -+ %1 = atomicrmw min ptr %a, i8 %b seq_cst -+ ret i8 %1 -+} -+ -+define i16 @atomicrmw_min_i16_seq_cst(ptr %a, i16 %b) nounwind { -+; LA64-LABEL: atomicrmw_min_i16_seq_cst: -+; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a2, $zero, -4 -+; LA64-NEXT: and $a2, $a0, $a2 -+; LA64-NEXT: slli.d $a0, $a0, 3 -+; LA64-NEXT: andi $a3, $a0, 24 -+; LA64-NEXT: ori $a4, $zero, 48 -+; LA64-NEXT: sub.d $a3, $a4, $a3 -+; LA64-NEXT: lu12i.w $a4, 15 -+; LA64-NEXT: ori $a4, $a4, 4095 -+; LA64-NEXT: sll.w $a4, $a4, $a0 -+; LA64-NEXT: addi.w $a4, $a4, 0 -+; LA64-NEXT: ext.w.h $a1, $a1 -+; LA64-NEXT: sll.w $a1, $a1, $a0 -+; LA64-NEXT: addi.w $a1, $a1, 0 -+; LA64-NEXT: .LBB61_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.w $a5, $a2, 0 -+; LA64-NEXT: and $a7, $a5, $a4 -+; LA64-NEXT: move $a6, $a5 -+; LA64-NEXT: sll.w $a7, $a7, $a3 -+; LA64-NEXT: sra.w $a7, $a7, $a3 -+; LA64-NEXT: bge $a1, $a7, .LBB61_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB61_1 Depth=1 -+; LA64-NEXT: xor $a6, $a5, $a1 -+; LA64-NEXT: and $a6, $a6, $a4 -+; LA64-NEXT: xor $a6, $a5, $a6 -+; LA64-NEXT: .LBB61_3: # in Loop: Header=BB61_1 Depth=1 -+; LA64-NEXT: sc.w $a6, $a2, 0 -+; LA64-NEXT: beqz $a6, .LBB61_1 -+; LA64-NEXT: # %bb.4: -+; LA64-NEXT: srl.w $a0, $a5, $a0 -+; LA64-NEXT: ret -+ %1 = atomicrmw min ptr %a, i16 %b seq_cst -+ ret i16 %1 -+} -+ -+define i32 @atomicrmw_min_i32_seq_cst(ptr %a, i32 %b) nounwind { -+; LA64-LABEL: atomicrmw_min_i32_seq_cst: -+; LA64: # %bb.0: -+; LA64-NEXT: ammin_db.w $a2, $a1, $a0 -+; LA64-NEXT: move $a0, $a2 -+; LA64-NEXT: ret -+ %1 = atomicrmw min ptr %a, i32 %b seq_cst -+ ret i32 %1 -+} -+ -+define i64 @atomicrmw_min_i64_seq_cst(ptr %a, i64 %b) nounwind { -+; LA64-LABEL: atomicrmw_min_i64_seq_cst: -+; LA64: # %bb.0: -+; LA64-NEXT: ammin_db.d $a2, $a1, $a0 -+; LA64-NEXT: move $a0, $a2 -+; LA64-NEXT: ret -+ %1 = atomicrmw min ptr %a, i64 %b seq_cst -+ ret i64 %1 -+} -+ -+define i8 @atomicrmw_umax_i8_monotonic(ptr %a, i8 %b) nounwind { -+; LA64-LABEL: atomicrmw_umax_i8_monotonic: -+; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a2, $zero, -4 -+; LA64-NEXT: and $a2, $a0, $a2 -+; LA64-NEXT: slli.d $a0, $a0, 3 -+; LA64-NEXT: ori $a3, $zero, 255 -+; LA64-NEXT: sll.w $a3, $a3, $a0 -+; LA64-NEXT: addi.w $a3, $a3, 0 -+; LA64-NEXT: andi $a1, $a1, 255 -+; LA64-NEXT: sll.w $a1, $a1, $a0 -+; LA64-NEXT: addi.w $a1, $a1, 0 -+; LA64-NEXT: .LBB64_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.w $a4, $a2, 0 -+; LA64-NEXT: and $a6, $a4, $a3 -+; LA64-NEXT: move $a5, $a4 -+; LA64-NEXT: bgeu $a6, $a1, .LBB64_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB64_1 Depth=1 -+; LA64-NEXT: xor $a5, $a4, $a1 -+; LA64-NEXT: and $a5, $a5, $a3 -+; LA64-NEXT: xor $a5, $a4, $a5 -+; LA64-NEXT: .LBB64_3: # in Loop: Header=BB64_1 Depth=1 -+; LA64-NEXT: sc.w $a5, $a2, 0 -+; LA64-NEXT: beqz $a5, .LBB64_1 -+; LA64-NEXT: # %bb.4: -+; LA64-NEXT: srl.w $a0, $a4, $a0 -+; LA64-NEXT: ret -+ %1 = atomicrmw umax ptr %a, i8 %b monotonic -+ ret i8 %1 -+} -+ -+define i16 @atomicrmw_umax_i16_monotonic(ptr %a, i16 %b) nounwind { -+; LA64-LABEL: atomicrmw_umax_i16_monotonic: -+; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a2, $zero, -4 -+; LA64-NEXT: and $a2, $a0, $a2 -+; LA64-NEXT: slli.d $a0, $a0, 3 -+; LA64-NEXT: lu12i.w $a3, 15 -+; LA64-NEXT: ori $a3, $a3, 4095 -+; LA64-NEXT: sll.w $a3, $a3, $a0 -+; LA64-NEXT: addi.w $a3, $a3, 0 -+; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 -+; LA64-NEXT: sll.w $a1, $a1, $a0 -+; LA64-NEXT: addi.w $a1, $a1, 0 -+; LA64-NEXT: .LBB65_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.w $a4, $a2, 0 -+; LA64-NEXT: and $a6, $a4, $a3 -+; LA64-NEXT: move $a5, $a4 -+; LA64-NEXT: bgeu $a6, $a1, .LBB65_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB65_1 Depth=1 -+; LA64-NEXT: xor $a5, $a4, $a1 -+; LA64-NEXT: and $a5, $a5, $a3 -+; LA64-NEXT: xor $a5, $a4, $a5 -+; LA64-NEXT: .LBB65_3: # in Loop: Header=BB65_1 Depth=1 -+; LA64-NEXT: sc.w $a5, $a2, 0 -+; LA64-NEXT: beqz $a5, .LBB65_1 -+; LA64-NEXT: # %bb.4: -+; LA64-NEXT: srl.w $a0, $a4, $a0 -+; LA64-NEXT: ret -+ %1 = atomicrmw umax ptr %a, i16 %b monotonic -+ ret i16 %1 -+} -+ -+define i32 @atomicrmw_umax_i32_monotonic(ptr %a, i32 %b) nounwind { -+; LA64-LABEL: atomicrmw_umax_i32_monotonic: -+; LA64: # %bb.0: -+; LA64-NEXT: ammax_db.wu $a2, $a1, $a0 -+; LA64-NEXT: move $a0, $a2 -+; LA64-NEXT: ret -+ %1 = atomicrmw umax ptr %a, i32 %b monotonic -+ ret i32 %1 -+} -+ -+define i64 @atomicrmw_umax_i64_monotonic(ptr %a, i64 %b) nounwind { -+; LA64-LABEL: atomicrmw_umax_i64_monotonic: -+; LA64: # %bb.0: -+; LA64-NEXT: ammax_db.du $a2, $a1, $a0 -+; LA64-NEXT: move $a0, $a2 -+; LA64-NEXT: ret -+ %1 = atomicrmw umax ptr %a, i64 %b monotonic -+ ret i64 %1 -+} -+ -+define i8 @atomicrmw_umin_i8_monotonic(ptr %a, i8 %b) nounwind { -+; LA64-LABEL: atomicrmw_umin_i8_monotonic: -+; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a2, $zero, -4 -+; LA64-NEXT: and $a2, $a0, $a2 -+; LA64-NEXT: slli.d $a0, $a0, 3 -+; LA64-NEXT: ori $a3, $zero, 255 -+; LA64-NEXT: sll.w $a3, $a3, $a0 -+; LA64-NEXT: addi.w $a3, $a3, 0 -+; LA64-NEXT: andi $a1, $a1, 255 -+; LA64-NEXT: sll.w $a1, $a1, $a0 -+; LA64-NEXT: addi.w $a1, $a1, 0 -+; LA64-NEXT: .LBB68_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.w $a4, $a2, 0 -+; LA64-NEXT: and $a6, $a4, $a3 -+; LA64-NEXT: move $a5, $a4 -+; LA64-NEXT: bgeu $a1, $a6, .LBB68_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB68_1 Depth=1 -+; LA64-NEXT: xor $a5, $a4, $a1 -+; LA64-NEXT: and $a5, $a5, $a3 -+; LA64-NEXT: xor $a5, $a4, $a5 -+; LA64-NEXT: .LBB68_3: # in Loop: Header=BB68_1 Depth=1 -+; LA64-NEXT: sc.w $a5, $a2, 0 -+; LA64-NEXT: beqz $a5, .LBB68_1 -+; LA64-NEXT: # %bb.4: -+; LA64-NEXT: srl.w $a0, $a4, $a0 -+; LA64-NEXT: ret -+ %1 = atomicrmw umin ptr %a, i8 %b monotonic -+ ret i8 %1 -+} -+ -+define i16 @atomicrmw_umin_i16_monotonic(ptr %a, i16 %b) nounwind { -+; LA64-LABEL: atomicrmw_umin_i16_monotonic: -+; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a2, $zero, -4 -+; LA64-NEXT: and $a2, $a0, $a2 -+; LA64-NEXT: slli.d $a0, $a0, 3 -+; LA64-NEXT: lu12i.w $a3, 15 -+; LA64-NEXT: ori $a3, $a3, 4095 -+; LA64-NEXT: sll.w $a3, $a3, $a0 -+; LA64-NEXT: addi.w $a3, $a3, 0 -+; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 -+; LA64-NEXT: sll.w $a1, $a1, $a0 -+; LA64-NEXT: addi.w $a1, $a1, 0 -+; LA64-NEXT: .LBB69_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.w $a4, $a2, 0 -+; LA64-NEXT: and $a6, $a4, $a3 -+; LA64-NEXT: move $a5, $a4 -+; LA64-NEXT: bgeu $a1, $a6, .LBB69_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB69_1 Depth=1 -+; LA64-NEXT: xor $a5, $a4, $a1 -+; LA64-NEXT: and $a5, $a5, $a3 -+; LA64-NEXT: xor $a5, $a4, $a5 -+; LA64-NEXT: .LBB69_3: # in Loop: Header=BB69_1 Depth=1 -+; LA64-NEXT: sc.w $a5, $a2, 0 -+; LA64-NEXT: beqz $a5, .LBB69_1 -+; LA64-NEXT: # %bb.4: -+; LA64-NEXT: srl.w $a0, $a4, $a0 -+; LA64-NEXT: ret -+ %1 = atomicrmw umin ptr %a, i16 %b monotonic -+ ret i16 %1 -+} -+ -+define i32 @atomicrmw_umin_i32_monotonic(ptr %a, i32 %b) nounwind { -+; LA64-LABEL: atomicrmw_umin_i32_monotonic: -+; LA64: # %bb.0: -+; LA64-NEXT: ammin_db.wu $a2, $a1, $a0 -+; LA64-NEXT: move $a0, $a2 -+; LA64-NEXT: ret -+ %1 = atomicrmw umin ptr %a, i32 %b monotonic -+ ret i32 %1 -+} -+ -+define i64 @atomicrmw_umin_i64_monotonic(ptr %a, i64 %b) nounwind { -+; LA64-LABEL: atomicrmw_umin_i64_monotonic: -+; LA64: # %bb.0: -+; LA64-NEXT: ammin_db.du $a2, $a1, $a0 -+; LA64-NEXT: move $a0, $a2 -+; LA64-NEXT: ret -+ %1 = atomicrmw umin ptr %a, i64 %b monotonic -+ ret i64 %1 -+} -+ -+define i8 @atomicrmw_max_i8_monotonic(ptr %a, i8 %b) nounwind { -+; LA64-LABEL: atomicrmw_max_i8_monotonic: -+; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a2, $zero, -4 -+; LA64-NEXT: and $a2, $a0, $a2 -+; LA64-NEXT: slli.d $a0, $a0, 3 -+; LA64-NEXT: ori $a3, $zero, 255 -+; LA64-NEXT: sll.w $a3, $a3, $a0 -+; LA64-NEXT: addi.w $a3, $a3, 0 -+; LA64-NEXT: ext.w.b $a1, $a1 -+; LA64-NEXT: sll.w $a1, $a1, $a0 -+; LA64-NEXT: addi.w $a1, $a1, 0 -+; LA64-NEXT: andi $a4, $a0, 24 -+; LA64-NEXT: xori $a4, $a4, 56 -+; LA64-NEXT: .LBB72_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.w $a5, $a2, 0 -+; LA64-NEXT: and $a7, $a5, $a3 -+; LA64-NEXT: move $a6, $a5 -+; LA64-NEXT: sll.w $a7, $a7, $a4 -+; LA64-NEXT: sra.w $a7, $a7, $a4 -+; LA64-NEXT: bge $a7, $a1, .LBB72_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB72_1 Depth=1 -+; LA64-NEXT: xor $a6, $a5, $a1 -+; LA64-NEXT: and $a6, $a6, $a3 -+; LA64-NEXT: xor $a6, $a5, $a6 -+; LA64-NEXT: .LBB72_3: # in Loop: Header=BB72_1 Depth=1 -+; LA64-NEXT: sc.w $a6, $a2, 0 -+; LA64-NEXT: beqz $a6, .LBB72_1 -+; LA64-NEXT: # %bb.4: -+; LA64-NEXT: srl.w $a0, $a5, $a0 -+; LA64-NEXT: ret -+ %1 = atomicrmw max ptr %a, i8 %b monotonic -+ ret i8 %1 -+} -+ -+define i16 @atomicrmw_max_i16_monotonic(ptr %a, i16 %b) nounwind { -+; LA64-LABEL: atomicrmw_max_i16_monotonic: -+; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a2, $zero, -4 -+; LA64-NEXT: and $a2, $a0, $a2 -+; LA64-NEXT: slli.d $a0, $a0, 3 -+; LA64-NEXT: andi $a3, $a0, 24 -+; LA64-NEXT: ori $a4, $zero, 48 -+; LA64-NEXT: sub.d $a3, $a4, $a3 -+; LA64-NEXT: lu12i.w $a4, 15 -+; LA64-NEXT: ori $a4, $a4, 4095 -+; LA64-NEXT: sll.w $a4, $a4, $a0 -+; LA64-NEXT: addi.w $a4, $a4, 0 -+; LA64-NEXT: ext.w.h $a1, $a1 -+; LA64-NEXT: sll.w $a1, $a1, $a0 -+; LA64-NEXT: addi.w $a1, $a1, 0 -+; LA64-NEXT: .LBB73_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.w $a5, $a2, 0 -+; LA64-NEXT: and $a7, $a5, $a4 -+; LA64-NEXT: move $a6, $a5 -+; LA64-NEXT: sll.w $a7, $a7, $a3 -+; LA64-NEXT: sra.w $a7, $a7, $a3 -+; LA64-NEXT: bge $a7, $a1, .LBB73_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB73_1 Depth=1 -+; LA64-NEXT: xor $a6, $a5, $a1 -+; LA64-NEXT: and $a6, $a6, $a4 -+; LA64-NEXT: xor $a6, $a5, $a6 -+; LA64-NEXT: .LBB73_3: # in Loop: Header=BB73_1 Depth=1 -+; LA64-NEXT: sc.w $a6, $a2, 0 -+; LA64-NEXT: beqz $a6, .LBB73_1 -+; LA64-NEXT: # %bb.4: -+; LA64-NEXT: srl.w $a0, $a5, $a0 -+; LA64-NEXT: ret -+ %1 = atomicrmw max ptr %a, i16 %b monotonic -+ ret i16 %1 -+} -+ -+define i32 @atomicrmw_max_i32_monotonic(ptr %a, i32 %b) nounwind { -+; LA64-LABEL: atomicrmw_max_i32_monotonic: -+; LA64: # %bb.0: -+; LA64-NEXT: ammax_db.w $a2, $a1, $a0 -+; LA64-NEXT: move $a0, $a2 -+; LA64-NEXT: ret -+ %1 = atomicrmw max ptr %a, i32 %b monotonic -+ ret i32 %1 -+} -+ -+define i64 @atomicrmw_max_i64_monotonic(ptr %a, i64 %b) nounwind { -+; LA64-LABEL: atomicrmw_max_i64_monotonic: -+; LA64: # %bb.0: -+; LA64-NEXT: ammax_db.d $a2, $a1, $a0 -+; LA64-NEXT: move $a0, $a2 -+; LA64-NEXT: ret -+ %1 = atomicrmw max ptr %a, i64 %b monotonic -+ ret i64 %1 -+} -+ -+define i8 @atomicrmw_min_i8_monotonic(ptr %a, i8 %b) nounwind { -+; LA64-LABEL: atomicrmw_min_i8_monotonic: -+; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a2, $zero, -4 -+; LA64-NEXT: and $a2, $a0, $a2 -+; LA64-NEXT: slli.d $a0, $a0, 3 -+; LA64-NEXT: ori $a3, $zero, 255 -+; LA64-NEXT: sll.w $a3, $a3, $a0 -+; LA64-NEXT: addi.w $a3, $a3, 0 -+; LA64-NEXT: ext.w.b $a1, $a1 -+; LA64-NEXT: sll.w $a1, $a1, $a0 -+; LA64-NEXT: addi.w $a1, $a1, 0 -+; LA64-NEXT: andi $a4, $a0, 24 -+; LA64-NEXT: xori $a4, $a4, 56 -+; LA64-NEXT: .LBB76_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.w $a5, $a2, 0 -+; LA64-NEXT: and $a7, $a5, $a3 -+; LA64-NEXT: move $a6, $a5 -+; LA64-NEXT: sll.w $a7, $a7, $a4 -+; LA64-NEXT: sra.w $a7, $a7, $a4 -+; LA64-NEXT: bge $a1, $a7, .LBB76_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB76_1 Depth=1 -+; LA64-NEXT: xor $a6, $a5, $a1 -+; LA64-NEXT: and $a6, $a6, $a3 -+; LA64-NEXT: xor $a6, $a5, $a6 -+; LA64-NEXT: .LBB76_3: # in Loop: Header=BB76_1 Depth=1 -+; LA64-NEXT: sc.w $a6, $a2, 0 -+; LA64-NEXT: beqz $a6, .LBB76_1 -+; LA64-NEXT: # %bb.4: -+; LA64-NEXT: srl.w $a0, $a5, $a0 -+; LA64-NEXT: ret -+ %1 = atomicrmw min ptr %a, i8 %b monotonic -+ ret i8 %1 -+} -+ -+define i16 @atomicrmw_min_i16_monotonic(ptr %a, i16 %b) nounwind { -+; LA64-LABEL: atomicrmw_min_i16_monotonic: -+; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a2, $zero, -4 -+; LA64-NEXT: and $a2, $a0, $a2 -+; LA64-NEXT: slli.d $a0, $a0, 3 -+; LA64-NEXT: andi $a3, $a0, 24 -+; LA64-NEXT: ori $a4, $zero, 48 -+; LA64-NEXT: sub.d $a3, $a4, $a3 -+; LA64-NEXT: lu12i.w $a4, 15 -+; LA64-NEXT: ori $a4, $a4, 4095 -+; LA64-NEXT: sll.w $a4, $a4, $a0 -+; LA64-NEXT: addi.w $a4, $a4, 0 -+; LA64-NEXT: ext.w.h $a1, $a1 -+; LA64-NEXT: sll.w $a1, $a1, $a0 -+; LA64-NEXT: addi.w $a1, $a1, 0 -+; LA64-NEXT: .LBB77_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.w $a5, $a2, 0 -+; LA64-NEXT: and $a7, $a5, $a4 -+; LA64-NEXT: move $a6, $a5 -+; LA64-NEXT: sll.w $a7, $a7, $a3 -+; LA64-NEXT: sra.w $a7, $a7, $a3 -+; LA64-NEXT: bge $a1, $a7, .LBB77_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB77_1 Depth=1 -+; LA64-NEXT: xor $a6, $a5, $a1 -+; LA64-NEXT: and $a6, $a6, $a4 -+; LA64-NEXT: xor $a6, $a5, $a6 -+; LA64-NEXT: .LBB77_3: # in Loop: Header=BB77_1 Depth=1 -+; LA64-NEXT: sc.w $a6, $a2, 0 -+; LA64-NEXT: beqz $a6, .LBB77_1 -+; LA64-NEXT: # %bb.4: -+; LA64-NEXT: srl.w $a0, $a5, $a0 -+; LA64-NEXT: ret -+ %1 = atomicrmw min ptr %a, i16 %b monotonic -+ ret i16 %1 -+} -+ -+define i32 @atomicrmw_min_i32_monotonic(ptr %a, i32 %b) nounwind { -+; LA64-LABEL: atomicrmw_min_i32_monotonic: -+; LA64: # %bb.0: -+; LA64-NEXT: ammin_db.w $a2, $a1, $a0 -+; LA64-NEXT: move $a0, $a2 -+; LA64-NEXT: ret -+ %1 = atomicrmw min ptr %a, i32 %b monotonic -+ ret i32 %1 -+} -+ -+define i64 @atomicrmw_min_i64_monotonic(ptr %a, i64 %b) nounwind { -+; LA64-LABEL: atomicrmw_min_i64_monotonic: -+; LA64: # %bb.0: -+; LA64-NEXT: ammin_db.d $a2, $a1, $a0 -+; LA64-NEXT: move $a0, $a2 -+; LA64-NEXT: ret -+ %1 = atomicrmw min ptr %a, i64 %b monotonic -+ ret i64 %1 -+} -diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw.ll -index 626276ba05f7..94a26e4ed9c7 100644 ---- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw.ll -+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw.ll -@@ -900,6 +900,3228 @@ define i64 @atomicrmw_xor_i64_acquire(ptr %a, i64 %b) nounwind { - ret i64 %1 - } - -+define i8 @atomicrmw_xchg_i8_release(ptr %a, i8 %b) nounwind { -+; LA32-LABEL: atomicrmw_xchg_i8_release: -+; LA32: # %bb.0: -+; LA32-NEXT: addi.w $a2, $zero, -4 -+; LA32-NEXT: and $a2, $a0, $a2 -+; LA32-NEXT: slli.w $a0, $a0, 3 -+; LA32-NEXT: ori $a3, $zero, 255 -+; LA32-NEXT: sll.w $a3, $a3, $a0 -+; LA32-NEXT: andi $a1, $a1, 255 -+; LA32-NEXT: sll.w $a1, $a1, $a0 -+; LA32-NEXT: .LBB28_1: # =>This Inner Loop Header: Depth=1 -+; LA32-NEXT: ll.w $a4, $a2, 0 -+; LA32-NEXT: addi.w $a5, $a1, 0 -+; LA32-NEXT: xor $a5, $a4, $a5 -+; LA32-NEXT: and $a5, $a5, $a3 -+; LA32-NEXT: xor $a5, $a4, $a5 -+; LA32-NEXT: sc.w $a5, $a2, 0 -+; LA32-NEXT: beqz $a5, .LBB28_1 -+; LA32-NEXT: # %bb.2: -+; LA32-NEXT: srl.w $a0, $a4, $a0 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: atomicrmw_xchg_i8_release: -+; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a2, $zero, -4 -+; LA64-NEXT: and $a2, $a0, $a2 -+; LA64-NEXT: slli.d $a0, $a0, 3 -+; LA64-NEXT: ori $a3, $zero, 255 -+; LA64-NEXT: sll.w $a3, $a3, $a0 -+; LA64-NEXT: addi.w $a3, $a3, 0 -+; LA64-NEXT: andi $a1, $a1, 255 -+; LA64-NEXT: sll.w $a1, $a1, $a0 -+; LA64-NEXT: addi.w $a1, $a1, 0 -+; LA64-NEXT: .LBB28_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.w $a4, $a2, 0 -+; LA64-NEXT: addi.w $a5, $a1, 0 -+; LA64-NEXT: xor $a5, $a4, $a5 -+; LA64-NEXT: and $a5, $a5, $a3 -+; LA64-NEXT: xor $a5, $a4, $a5 -+; LA64-NEXT: sc.w $a5, $a2, 0 -+; LA64-NEXT: beqz $a5, .LBB28_1 -+; LA64-NEXT: # %bb.2: -+; LA64-NEXT: srl.w $a0, $a4, $a0 -+; LA64-NEXT: ret -+ %1 = atomicrmw xchg ptr %a, i8 %b release -+ ret i8 %1 -+} -+ -+define i8 @atomicrmw_xchg_0_i8_release(ptr %a) nounwind { -+; LA32-LABEL: atomicrmw_xchg_0_i8_release: -+; LA32: # %bb.0: -+; LA32-NEXT: addi.w $a1, $zero, -4 -+; LA32-NEXT: and $a1, $a0, $a1 -+; LA32-NEXT: slli.w $a0, $a0, 3 -+; LA32-NEXT: ori $a2, $zero, 255 -+; LA32-NEXT: sll.w $a2, $a2, $a0 -+; LA32-NEXT: .LBB29_1: # =>This Inner Loop Header: Depth=1 -+; LA32-NEXT: ll.w $a3, $a1, 0 -+; LA32-NEXT: addi.w $a4, $zero, 0 -+; LA32-NEXT: xor $a4, $a3, $a4 -+; LA32-NEXT: and $a4, $a4, $a2 -+; LA32-NEXT: xor $a4, $a3, $a4 -+; LA32-NEXT: sc.w $a4, $a1, 0 -+; LA32-NEXT: beqz $a4, .LBB29_1 -+; LA32-NEXT: # %bb.2: -+; LA32-NEXT: srl.w $a0, $a3, $a0 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: atomicrmw_xchg_0_i8_release: -+; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a1, $zero, -4 -+; LA64-NEXT: and $a1, $a0, $a1 -+; LA64-NEXT: slli.d $a0, $a0, 3 -+; LA64-NEXT: ori $a2, $zero, 255 -+; LA64-NEXT: sll.w $a2, $a2, $a0 -+; LA64-NEXT: addi.w $a2, $a2, 0 -+; LA64-NEXT: .LBB29_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.w $a3, $a1, 0 -+; LA64-NEXT: addi.w $a4, $zero, 0 -+; LA64-NEXT: xor $a4, $a3, $a4 -+; LA64-NEXT: and $a4, $a4, $a2 -+; LA64-NEXT: xor $a4, $a3, $a4 -+; LA64-NEXT: sc.w $a4, $a1, 0 -+; LA64-NEXT: beqz $a4, .LBB29_1 -+; LA64-NEXT: # %bb.2: -+; LA64-NEXT: srl.w $a0, $a3, $a0 -+; LA64-NEXT: ret -+ %1 = atomicrmw xchg ptr %a, i8 0 release -+ ret i8 %1 -+} -+ -+define i8 @atomicrmw_xchg_minus_1_i8_release(ptr %a) nounwind { -+; LA32-LABEL: atomicrmw_xchg_minus_1_i8_release: -+; LA32: # %bb.0: -+; LA32-NEXT: addi.w $a1, $zero, -4 -+; LA32-NEXT: and $a1, $a0, $a1 -+; LA32-NEXT: slli.w $a0, $a0, 3 -+; LA32-NEXT: ori $a2, $zero, 255 -+; LA32-NEXT: sll.w $a2, $a2, $a0 -+; LA32-NEXT: .LBB30_1: # =>This Inner Loop Header: Depth=1 -+; LA32-NEXT: ll.w $a3, $a1, 0 -+; LA32-NEXT: addi.w $a4, $a2, 0 -+; LA32-NEXT: xor $a4, $a3, $a4 -+; LA32-NEXT: and $a4, $a4, $a2 -+; LA32-NEXT: xor $a4, $a3, $a4 -+; LA32-NEXT: sc.w $a4, $a1, 0 -+; LA32-NEXT: beqz $a4, .LBB30_1 -+; LA32-NEXT: # %bb.2: -+; LA32-NEXT: srl.w $a0, $a3, $a0 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: atomicrmw_xchg_minus_1_i8_release: -+; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a1, $zero, -4 -+; LA64-NEXT: and $a1, $a0, $a1 -+; LA64-NEXT: slli.d $a0, $a0, 3 -+; LA64-NEXT: ori $a2, $zero, 255 -+; LA64-NEXT: sll.w $a2, $a2, $a0 -+; LA64-NEXT: addi.w $a2, $a2, 0 -+; LA64-NEXT: .LBB30_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.w $a3, $a1, 0 -+; LA64-NEXT: addi.w $a4, $a2, 0 -+; LA64-NEXT: xor $a4, $a3, $a4 -+; LA64-NEXT: and $a4, $a4, $a2 -+; LA64-NEXT: xor $a4, $a3, $a4 -+; LA64-NEXT: sc.w $a4, $a1, 0 -+; LA64-NEXT: beqz $a4, .LBB30_1 -+; LA64-NEXT: # %bb.2: -+; LA64-NEXT: srl.w $a0, $a3, $a0 -+; LA64-NEXT: ret -+ %1 = atomicrmw xchg ptr %a, i8 -1 release -+ ret i8 %1 -+} -+ -+define i16 @atomicrmw_xchg_i16_release(ptr %a, i16 %b) nounwind { -+; LA32-LABEL: atomicrmw_xchg_i16_release: -+; LA32: # %bb.0: -+; LA32-NEXT: addi.w $a2, $zero, -4 -+; LA32-NEXT: and $a2, $a0, $a2 -+; LA32-NEXT: slli.w $a0, $a0, 3 -+; LA32-NEXT: lu12i.w $a3, 15 -+; LA32-NEXT: ori $a3, $a3, 4095 -+; LA32-NEXT: sll.w $a3, $a3, $a0 -+; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 -+; LA32-NEXT: sll.w $a1, $a1, $a0 -+; LA32-NEXT: .LBB31_1: # =>This Inner Loop Header: Depth=1 -+; LA32-NEXT: ll.w $a4, $a2, 0 -+; LA32-NEXT: addi.w $a5, $a1, 0 -+; LA32-NEXT: xor $a5, $a4, $a5 -+; LA32-NEXT: and $a5, $a5, $a3 -+; LA32-NEXT: xor $a5, $a4, $a5 -+; LA32-NEXT: sc.w $a5, $a2, 0 -+; LA32-NEXT: beqz $a5, .LBB31_1 -+; LA32-NEXT: # %bb.2: -+; LA32-NEXT: srl.w $a0, $a4, $a0 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: atomicrmw_xchg_i16_release: -+; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a2, $zero, -4 -+; LA64-NEXT: and $a2, $a0, $a2 -+; LA64-NEXT: slli.d $a0, $a0, 3 -+; LA64-NEXT: lu12i.w $a3, 15 -+; LA64-NEXT: ori $a3, $a3, 4095 -+; LA64-NEXT: sll.w $a3, $a3, $a0 -+; LA64-NEXT: addi.w $a3, $a3, 0 -+; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 -+; LA64-NEXT: sll.w $a1, $a1, $a0 -+; LA64-NEXT: addi.w $a1, $a1, 0 -+; LA64-NEXT: .LBB31_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.w $a4, $a2, 0 -+; LA64-NEXT: addi.w $a5, $a1, 0 -+; LA64-NEXT: xor $a5, $a4, $a5 -+; LA64-NEXT: and $a5, $a5, $a3 -+; LA64-NEXT: xor $a5, $a4, $a5 -+; LA64-NEXT: sc.w $a5, $a2, 0 -+; LA64-NEXT: beqz $a5, .LBB31_1 -+; LA64-NEXT: # %bb.2: -+; LA64-NEXT: srl.w $a0, $a4, $a0 -+; LA64-NEXT: ret -+ %1 = atomicrmw xchg ptr %a, i16 %b release -+ ret i16 %1 -+} -+ -+define i16 @atomicrmw_xchg_0_i16_release(ptr %a) nounwind { -+; LA32-LABEL: atomicrmw_xchg_0_i16_release: -+; LA32: # %bb.0: -+; LA32-NEXT: addi.w $a1, $zero, -4 -+; LA32-NEXT: and $a1, $a0, $a1 -+; LA32-NEXT: slli.w $a0, $a0, 3 -+; LA32-NEXT: lu12i.w $a2, 15 -+; LA32-NEXT: ori $a2, $a2, 4095 -+; LA32-NEXT: sll.w $a2, $a2, $a0 -+; LA32-NEXT: .LBB32_1: # =>This Inner Loop Header: Depth=1 -+; LA32-NEXT: ll.w $a3, $a1, 0 -+; LA32-NEXT: addi.w $a4, $zero, 0 -+; LA32-NEXT: xor $a4, $a3, $a4 -+; LA32-NEXT: and $a4, $a4, $a2 -+; LA32-NEXT: xor $a4, $a3, $a4 -+; LA32-NEXT: sc.w $a4, $a1, 0 -+; LA32-NEXT: beqz $a4, .LBB32_1 -+; LA32-NEXT: # %bb.2: -+; LA32-NEXT: srl.w $a0, $a3, $a0 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: atomicrmw_xchg_0_i16_release: -+; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a1, $zero, -4 -+; LA64-NEXT: and $a1, $a0, $a1 -+; LA64-NEXT: slli.d $a0, $a0, 3 -+; LA64-NEXT: lu12i.w $a2, 15 -+; LA64-NEXT: ori $a2, $a2, 4095 -+; LA64-NEXT: sll.w $a2, $a2, $a0 -+; LA64-NEXT: addi.w $a2, $a2, 0 -+; LA64-NEXT: .LBB32_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.w $a3, $a1, 0 -+; LA64-NEXT: addi.w $a4, $zero, 0 -+; LA64-NEXT: xor $a4, $a3, $a4 -+; LA64-NEXT: and $a4, $a4, $a2 -+; LA64-NEXT: xor $a4, $a3, $a4 -+; LA64-NEXT: sc.w $a4, $a1, 0 -+; LA64-NEXT: beqz $a4, .LBB32_1 -+; LA64-NEXT: # %bb.2: -+; LA64-NEXT: srl.w $a0, $a3, $a0 -+; LA64-NEXT: ret -+ %1 = atomicrmw xchg ptr %a, i16 0 release -+ ret i16 %1 -+} -+ -+define i16 @atomicrmw_xchg_minus_1_i16_release(ptr %a) nounwind { -+; LA32-LABEL: atomicrmw_xchg_minus_1_i16_release: -+; LA32: # %bb.0: -+; LA32-NEXT: addi.w $a1, $zero, -4 -+; LA32-NEXT: and $a1, $a0, $a1 -+; LA32-NEXT: slli.w $a0, $a0, 3 -+; LA32-NEXT: lu12i.w $a2, 15 -+; LA32-NEXT: ori $a2, $a2, 4095 -+; LA32-NEXT: sll.w $a2, $a2, $a0 -+; LA32-NEXT: .LBB33_1: # =>This Inner Loop Header: Depth=1 -+; LA32-NEXT: ll.w $a3, $a1, 0 -+; LA32-NEXT: addi.w $a4, $a2, 0 -+; LA32-NEXT: xor $a4, $a3, $a4 -+; LA32-NEXT: and $a4, $a4, $a2 -+; LA32-NEXT: xor $a4, $a3, $a4 -+; LA32-NEXT: sc.w $a4, $a1, 0 -+; LA32-NEXT: beqz $a4, .LBB33_1 -+; LA32-NEXT: # %bb.2: -+; LA32-NEXT: srl.w $a0, $a3, $a0 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: atomicrmw_xchg_minus_1_i16_release: -+; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a1, $zero, -4 -+; LA64-NEXT: and $a1, $a0, $a1 -+; LA64-NEXT: slli.d $a0, $a0, 3 -+; LA64-NEXT: lu12i.w $a2, 15 -+; LA64-NEXT: ori $a2, $a2, 4095 -+; LA64-NEXT: sll.w $a2, $a2, $a0 -+; LA64-NEXT: addi.w $a2, $a2, 0 -+; LA64-NEXT: .LBB33_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.w $a3, $a1, 0 -+; LA64-NEXT: addi.w $a4, $a2, 0 -+; LA64-NEXT: xor $a4, $a3, $a4 -+; LA64-NEXT: and $a4, $a4, $a2 -+; LA64-NEXT: xor $a4, $a3, $a4 -+; LA64-NEXT: sc.w $a4, $a1, 0 -+; LA64-NEXT: beqz $a4, .LBB33_1 -+; LA64-NEXT: # %bb.2: -+; LA64-NEXT: srl.w $a0, $a3, $a0 -+; LA64-NEXT: ret -+ %1 = atomicrmw xchg ptr %a, i16 -1 release -+ ret i16 %1 -+} -+ -+define i32 @atomicrmw_xchg_i32_release(ptr %a, i32 %b) nounwind { -+; LA32-LABEL: atomicrmw_xchg_i32_release: -+; LA32: # %bb.0: -+; LA32-NEXT: .LBB34_1: # =>This Inner Loop Header: Depth=1 -+; LA32-NEXT: ll.w $a2, $a0, 0 -+; LA32-NEXT: move $a3, $a1 -+; LA32-NEXT: sc.w $a3, $a0, 0 -+; LA32-NEXT: beqz $a3, .LBB34_1 -+; LA32-NEXT: # %bb.2: -+; LA32-NEXT: move $a0, $a2 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: atomicrmw_xchg_i32_release: -+; LA64: # %bb.0: -+; LA64-NEXT: amswap_db.w $a2, $a1, $a0 -+; LA64-NEXT: move $a0, $a2 -+; LA64-NEXT: ret -+ %1 = atomicrmw xchg ptr %a, i32 %b release -+ ret i32 %1 -+} -+ -+define i64 @atomicrmw_xchg_i64_release(ptr %a, i64 %b) nounwind { -+; LA32-LABEL: atomicrmw_xchg_i64_release: -+; LA32: # %bb.0: -+; LA32-NEXT: addi.w $sp, $sp, -16 -+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -+; LA32-NEXT: ori $a3, $zero, 3 -+; LA32-NEXT: bl %plt(__atomic_exchange_8) -+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -+; LA32-NEXT: addi.w $sp, $sp, 16 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: atomicrmw_xchg_i64_release: -+; LA64: # %bb.0: -+; LA64-NEXT: amswap_db.d $a2, $a1, $a0 -+; LA64-NEXT: move $a0, $a2 -+; LA64-NEXT: ret -+ %1 = atomicrmw xchg ptr %a, i64 %b release -+ ret i64 %1 -+} -+ -+define i8 @atomicrmw_add_i8_release(ptr %a, i8 %b) nounwind { -+; LA32-LABEL: atomicrmw_add_i8_release: -+; LA32: # %bb.0: -+; LA32-NEXT: addi.w $a2, $zero, -4 -+; LA32-NEXT: and $a2, $a0, $a2 -+; LA32-NEXT: slli.w $a0, $a0, 3 -+; LA32-NEXT: ori $a3, $zero, 255 -+; LA32-NEXT: sll.w $a3, $a3, $a0 -+; LA32-NEXT: andi $a1, $a1, 255 -+; LA32-NEXT: sll.w $a1, $a1, $a0 -+; LA32-NEXT: .LBB36_1: # =>This Inner Loop Header: Depth=1 -+; LA32-NEXT: ll.w $a4, $a2, 0 -+; LA32-NEXT: add.w $a5, $a4, $a1 -+; LA32-NEXT: xor $a5, $a4, $a5 -+; LA32-NEXT: and $a5, $a5, $a3 -+; LA32-NEXT: xor $a5, $a4, $a5 -+; LA32-NEXT: sc.w $a5, $a2, 0 -+; LA32-NEXT: beqz $a5, .LBB36_1 -+; LA32-NEXT: # %bb.2: -+; LA32-NEXT: srl.w $a0, $a4, $a0 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: atomicrmw_add_i8_release: -+; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a2, $zero, -4 -+; LA64-NEXT: and $a2, $a0, $a2 -+; LA64-NEXT: slli.d $a0, $a0, 3 -+; LA64-NEXT: ori $a3, $zero, 255 -+; LA64-NEXT: sll.w $a3, $a3, $a0 -+; LA64-NEXT: addi.w $a3, $a3, 0 -+; LA64-NEXT: andi $a1, $a1, 255 -+; LA64-NEXT: sll.w $a1, $a1, $a0 -+; LA64-NEXT: addi.w $a1, $a1, 0 -+; LA64-NEXT: .LBB36_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.w $a4, $a2, 0 -+; LA64-NEXT: add.w $a5, $a4, $a1 -+; LA64-NEXT: xor $a5, $a4, $a5 -+; LA64-NEXT: and $a5, $a5, $a3 -+; LA64-NEXT: xor $a5, $a4, $a5 -+; LA64-NEXT: sc.w $a5, $a2, 0 -+; LA64-NEXT: beqz $a5, .LBB36_1 -+; LA64-NEXT: # %bb.2: -+; LA64-NEXT: srl.w $a0, $a4, $a0 -+; LA64-NEXT: ret -+ %1 = atomicrmw add ptr %a, i8 %b release -+ ret i8 %1 -+} -+ -+define i16 @atomicrmw_add_i16_release(ptr %a, i16 %b) nounwind { -+; LA32-LABEL: atomicrmw_add_i16_release: -+; LA32: # %bb.0: -+; LA32-NEXT: addi.w $a2, $zero, -4 -+; LA32-NEXT: and $a2, $a0, $a2 -+; LA32-NEXT: slli.w $a0, $a0, 3 -+; LA32-NEXT: lu12i.w $a3, 15 -+; LA32-NEXT: ori $a3, $a3, 4095 -+; LA32-NEXT: sll.w $a3, $a3, $a0 -+; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 -+; LA32-NEXT: sll.w $a1, $a1, $a0 -+; LA32-NEXT: .LBB37_1: # =>This Inner Loop Header: Depth=1 -+; LA32-NEXT: ll.w $a4, $a2, 0 -+; LA32-NEXT: add.w $a5, $a4, $a1 -+; LA32-NEXT: xor $a5, $a4, $a5 -+; LA32-NEXT: and $a5, $a5, $a3 -+; LA32-NEXT: xor $a5, $a4, $a5 -+; LA32-NEXT: sc.w $a5, $a2, 0 -+; LA32-NEXT: beqz $a5, .LBB37_1 -+; LA32-NEXT: # %bb.2: -+; LA32-NEXT: srl.w $a0, $a4, $a0 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: atomicrmw_add_i16_release: -+; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a2, $zero, -4 -+; LA64-NEXT: and $a2, $a0, $a2 -+; LA64-NEXT: slli.d $a0, $a0, 3 -+; LA64-NEXT: lu12i.w $a3, 15 -+; LA64-NEXT: ori $a3, $a3, 4095 -+; LA64-NEXT: sll.w $a3, $a3, $a0 -+; LA64-NEXT: addi.w $a3, $a3, 0 -+; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 -+; LA64-NEXT: sll.w $a1, $a1, $a0 -+; LA64-NEXT: addi.w $a1, $a1, 0 -+; LA64-NEXT: .LBB37_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.w $a4, $a2, 0 -+; LA64-NEXT: add.w $a5, $a4, $a1 -+; LA64-NEXT: xor $a5, $a4, $a5 -+; LA64-NEXT: and $a5, $a5, $a3 -+; LA64-NEXT: xor $a5, $a4, $a5 -+; LA64-NEXT: sc.w $a5, $a2, 0 -+; LA64-NEXT: beqz $a5, .LBB37_1 -+; LA64-NEXT: # %bb.2: -+; LA64-NEXT: srl.w $a0, $a4, $a0 -+; LA64-NEXT: ret -+ %1 = atomicrmw add ptr %a, i16 %b release -+ ret i16 %1 -+} -+ -+define i32 @atomicrmw_add_i32_release(ptr %a, i32 %b) nounwind { -+; LA32-LABEL: atomicrmw_add_i32_release: -+; LA32: # %bb.0: -+; LA32-NEXT: .LBB38_1: # =>This Inner Loop Header: Depth=1 -+; LA32-NEXT: ll.w $a2, $a0, 0 -+; LA32-NEXT: add.w $a3, $a2, $a1 -+; LA32-NEXT: sc.w $a3, $a0, 0 -+; LA32-NEXT: beqz $a3, .LBB38_1 -+; LA32-NEXT: # %bb.2: -+; LA32-NEXT: move $a0, $a2 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: atomicrmw_add_i32_release: -+; LA64: # %bb.0: -+; LA64-NEXT: amadd_db.w $a2, $a1, $a0 -+; LA64-NEXT: move $a0, $a2 -+; LA64-NEXT: ret -+ %1 = atomicrmw add ptr %a, i32 %b release -+ ret i32 %1 -+} -+ -+define i64 @atomicrmw_add_i64_release(ptr %a, i64 %b) nounwind { -+; LA32-LABEL: atomicrmw_add_i64_release: -+; LA32: # %bb.0: -+; LA32-NEXT: addi.w $sp, $sp, -16 -+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -+; LA32-NEXT: ori $a3, $zero, 3 -+; LA32-NEXT: bl %plt(__atomic_fetch_add_8) -+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -+; LA32-NEXT: addi.w $sp, $sp, 16 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: atomicrmw_add_i64_release: -+; LA64: # %bb.0: -+; LA64-NEXT: amadd_db.d $a2, $a1, $a0 -+; LA64-NEXT: move $a0, $a2 -+; LA64-NEXT: ret -+ %1 = atomicrmw add ptr %a, i64 %b release -+ ret i64 %1 -+} -+ -+define i8 @atomicrmw_sub_i8_release(ptr %a, i8 %b) nounwind { -+; LA32-LABEL: atomicrmw_sub_i8_release: -+; LA32: # %bb.0: -+; LA32-NEXT: addi.w $a2, $zero, -4 -+; LA32-NEXT: and $a2, $a0, $a2 -+; LA32-NEXT: slli.w $a0, $a0, 3 -+; LA32-NEXT: ori $a3, $zero, 255 -+; LA32-NEXT: sll.w $a3, $a3, $a0 -+; LA32-NEXT: andi $a1, $a1, 255 -+; LA32-NEXT: sll.w $a1, $a1, $a0 -+; LA32-NEXT: .LBB40_1: # =>This Inner Loop Header: Depth=1 -+; LA32-NEXT: ll.w $a4, $a2, 0 -+; LA32-NEXT: sub.w $a5, $a4, $a1 -+; LA32-NEXT: xor $a5, $a4, $a5 -+; LA32-NEXT: and $a5, $a5, $a3 -+; LA32-NEXT: xor $a5, $a4, $a5 -+; LA32-NEXT: sc.w $a5, $a2, 0 -+; LA32-NEXT: beqz $a5, .LBB40_1 -+; LA32-NEXT: # %bb.2: -+; LA32-NEXT: srl.w $a0, $a4, $a0 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: atomicrmw_sub_i8_release: -+; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a2, $zero, -4 -+; LA64-NEXT: and $a2, $a0, $a2 -+; LA64-NEXT: slli.d $a0, $a0, 3 -+; LA64-NEXT: ori $a3, $zero, 255 -+; LA64-NEXT: sll.w $a3, $a3, $a0 -+; LA64-NEXT: addi.w $a3, $a3, 0 -+; LA64-NEXT: andi $a1, $a1, 255 -+; LA64-NEXT: sll.w $a1, $a1, $a0 -+; LA64-NEXT: addi.w $a1, $a1, 0 -+; LA64-NEXT: .LBB40_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.w $a4, $a2, 0 -+; LA64-NEXT: sub.w $a5, $a4, $a1 -+; LA64-NEXT: xor $a5, $a4, $a5 -+; LA64-NEXT: and $a5, $a5, $a3 -+; LA64-NEXT: xor $a5, $a4, $a5 -+; LA64-NEXT: sc.w $a5, $a2, 0 -+; LA64-NEXT: beqz $a5, .LBB40_1 -+; LA64-NEXT: # %bb.2: -+; LA64-NEXT: srl.w $a0, $a4, $a0 -+; LA64-NEXT: ret -+ %1 = atomicrmw sub ptr %a, i8 %b release -+ ret i8 %1 -+} -+ -+define i16 @atomicrmw_sub_i16_release(ptr %a, i16 %b) nounwind { -+; LA32-LABEL: atomicrmw_sub_i16_release: -+; LA32: # %bb.0: -+; LA32-NEXT: addi.w $a2, $zero, -4 -+; LA32-NEXT: and $a2, $a0, $a2 -+; LA32-NEXT: slli.w $a0, $a0, 3 -+; LA32-NEXT: lu12i.w $a3, 15 -+; LA32-NEXT: ori $a3, $a3, 4095 -+; LA32-NEXT: sll.w $a3, $a3, $a0 -+; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 -+; LA32-NEXT: sll.w $a1, $a1, $a0 -+; LA32-NEXT: .LBB41_1: # =>This Inner Loop Header: Depth=1 -+; LA32-NEXT: ll.w $a4, $a2, 0 -+; LA32-NEXT: sub.w $a5, $a4, $a1 -+; LA32-NEXT: xor $a5, $a4, $a5 -+; LA32-NEXT: and $a5, $a5, $a3 -+; LA32-NEXT: xor $a5, $a4, $a5 -+; LA32-NEXT: sc.w $a5, $a2, 0 -+; LA32-NEXT: beqz $a5, .LBB41_1 -+; LA32-NEXT: # %bb.2: -+; LA32-NEXT: srl.w $a0, $a4, $a0 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: atomicrmw_sub_i16_release: -+; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a2, $zero, -4 -+; LA64-NEXT: and $a2, $a0, $a2 -+; LA64-NEXT: slli.d $a0, $a0, 3 -+; LA64-NEXT: lu12i.w $a3, 15 -+; LA64-NEXT: ori $a3, $a3, 4095 -+; LA64-NEXT: sll.w $a3, $a3, $a0 -+; LA64-NEXT: addi.w $a3, $a3, 0 -+; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 -+; LA64-NEXT: sll.w $a1, $a1, $a0 -+; LA64-NEXT: addi.w $a1, $a1, 0 -+; LA64-NEXT: .LBB41_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.w $a4, $a2, 0 -+; LA64-NEXT: sub.w $a5, $a4, $a1 -+; LA64-NEXT: xor $a5, $a4, $a5 -+; LA64-NEXT: and $a5, $a5, $a3 -+; LA64-NEXT: xor $a5, $a4, $a5 -+; LA64-NEXT: sc.w $a5, $a2, 0 -+; LA64-NEXT: beqz $a5, .LBB41_1 -+; LA64-NEXT: # %bb.2: -+; LA64-NEXT: srl.w $a0, $a4, $a0 -+; LA64-NEXT: ret -+ %1 = atomicrmw sub ptr %a, i16 %b release -+ ret i16 %1 -+} -+ -+define i32 @atomicrmw_sub_i32_release(ptr %a, i32 %b) nounwind { -+; LA32-LABEL: atomicrmw_sub_i32_release: -+; LA32: # %bb.0: -+; LA32-NEXT: .LBB42_1: # =>This Inner Loop Header: Depth=1 -+; LA32-NEXT: ll.w $a2, $a0, 0 -+; LA32-NEXT: sub.w $a3, $a2, $a1 -+; LA32-NEXT: sc.w $a3, $a0, 0 -+; LA32-NEXT: beqz $a3, .LBB42_1 -+; LA32-NEXT: # %bb.2: -+; LA32-NEXT: move $a0, $a2 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: atomicrmw_sub_i32_release: -+; LA64: # %bb.0: -+; LA64-NEXT: sub.w $a2, $zero, $a1 -+; LA64-NEXT: amadd_db.w $a1, $a2, $a0 -+; LA64-NEXT: move $a0, $a1 -+; LA64-NEXT: ret -+ %1 = atomicrmw sub ptr %a, i32 %b release -+ ret i32 %1 -+} -+ -+define i64 @atomicrmw_sub_i64_release(ptr %a, i64 %b) nounwind { -+; LA32-LABEL: atomicrmw_sub_i64_release: -+; LA32: # %bb.0: -+; LA32-NEXT: addi.w $sp, $sp, -16 -+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -+; LA32-NEXT: ori $a3, $zero, 3 -+; LA32-NEXT: bl %plt(__atomic_fetch_sub_8) -+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -+; LA32-NEXT: addi.w $sp, $sp, 16 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: atomicrmw_sub_i64_release: -+; LA64: # %bb.0: -+; LA64-NEXT: sub.d $a2, $zero, $a1 -+; LA64-NEXT: amadd_db.d $a1, $a2, $a0 -+; LA64-NEXT: move $a0, $a1 -+; LA64-NEXT: ret -+ %1 = atomicrmw sub ptr %a, i64 %b release -+ ret i64 %1 -+} -+ -+define i8 @atomicrmw_nand_i8_release(ptr %a, i8 %b) nounwind { -+; LA32-LABEL: atomicrmw_nand_i8_release: -+; LA32: # %bb.0: -+; LA32-NEXT: addi.w $a2, $zero, -4 -+; LA32-NEXT: and $a2, $a0, $a2 -+; LA32-NEXT: slli.w $a0, $a0, 3 -+; LA32-NEXT: ori $a3, $zero, 255 -+; LA32-NEXT: sll.w $a3, $a3, $a0 -+; LA32-NEXT: andi $a1, $a1, 255 -+; LA32-NEXT: sll.w $a1, $a1, $a0 -+; LA32-NEXT: .LBB44_1: # =>This Inner Loop Header: Depth=1 -+; LA32-NEXT: ll.w $a4, $a2, 0 -+; LA32-NEXT: and $a5, $a4, $a1 -+; LA32-NEXT: nor $a5, $a5, $zero -+; LA32-NEXT: xor $a5, $a4, $a5 -+; LA32-NEXT: and $a5, $a5, $a3 -+; LA32-NEXT: xor $a5, $a4, $a5 -+; LA32-NEXT: sc.w $a5, $a2, 0 -+; LA32-NEXT: beqz $a5, .LBB44_1 -+; LA32-NEXT: # %bb.2: -+; LA32-NEXT: srl.w $a0, $a4, $a0 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: atomicrmw_nand_i8_release: -+; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a2, $zero, -4 -+; LA64-NEXT: and $a2, $a0, $a2 -+; LA64-NEXT: slli.d $a0, $a0, 3 -+; LA64-NEXT: ori $a3, $zero, 255 -+; LA64-NEXT: sll.w $a3, $a3, $a0 -+; LA64-NEXT: addi.w $a3, $a3, 0 -+; LA64-NEXT: andi $a1, $a1, 255 -+; LA64-NEXT: sll.w $a1, $a1, $a0 -+; LA64-NEXT: addi.w $a1, $a1, 0 -+; LA64-NEXT: .LBB44_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.w $a4, $a2, 0 -+; LA64-NEXT: and $a5, $a4, $a1 -+; LA64-NEXT: nor $a5, $a5, $zero -+; LA64-NEXT: xor $a5, $a4, $a5 -+; LA64-NEXT: and $a5, $a5, $a3 -+; LA64-NEXT: xor $a5, $a4, $a5 -+; LA64-NEXT: sc.w $a5, $a2, 0 -+; LA64-NEXT: beqz $a5, .LBB44_1 -+; LA64-NEXT: # %bb.2: -+; LA64-NEXT: srl.w $a0, $a4, $a0 -+; LA64-NEXT: ret -+ %1 = atomicrmw nand ptr %a, i8 %b release -+ ret i8 %1 -+} -+ -+define i16 @atomicrmw_nand_i16_release(ptr %a, i16 %b) nounwind { -+; LA32-LABEL: atomicrmw_nand_i16_release: -+; LA32: # %bb.0: -+; LA32-NEXT: addi.w $a2, $zero, -4 -+; LA32-NEXT: and $a2, $a0, $a2 -+; LA32-NEXT: slli.w $a0, $a0, 3 -+; LA32-NEXT: lu12i.w $a3, 15 -+; LA32-NEXT: ori $a3, $a3, 4095 -+; LA32-NEXT: sll.w $a3, $a3, $a0 -+; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 -+; LA32-NEXT: sll.w $a1, $a1, $a0 -+; LA32-NEXT: .LBB45_1: # =>This Inner Loop Header: Depth=1 -+; LA32-NEXT: ll.w $a4, $a2, 0 -+; LA32-NEXT: and $a5, $a4, $a1 -+; LA32-NEXT: nor $a5, $a5, $zero -+; LA32-NEXT: xor $a5, $a4, $a5 -+; LA32-NEXT: and $a5, $a5, $a3 -+; LA32-NEXT: xor $a5, $a4, $a5 -+; LA32-NEXT: sc.w $a5, $a2, 0 -+; LA32-NEXT: beqz $a5, .LBB45_1 -+; LA32-NEXT: # %bb.2: -+; LA32-NEXT: srl.w $a0, $a4, $a0 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: atomicrmw_nand_i16_release: -+; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a2, $zero, -4 -+; LA64-NEXT: and $a2, $a0, $a2 -+; LA64-NEXT: slli.d $a0, $a0, 3 -+; LA64-NEXT: lu12i.w $a3, 15 -+; LA64-NEXT: ori $a3, $a3, 4095 -+; LA64-NEXT: sll.w $a3, $a3, $a0 -+; LA64-NEXT: addi.w $a3, $a3, 0 -+; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 -+; LA64-NEXT: sll.w $a1, $a1, $a0 -+; LA64-NEXT: addi.w $a1, $a1, 0 -+; LA64-NEXT: .LBB45_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.w $a4, $a2, 0 -+; LA64-NEXT: and $a5, $a4, $a1 -+; LA64-NEXT: nor $a5, $a5, $zero -+; LA64-NEXT: xor $a5, $a4, $a5 -+; LA64-NEXT: and $a5, $a5, $a3 -+; LA64-NEXT: xor $a5, $a4, $a5 -+; LA64-NEXT: sc.w $a5, $a2, 0 -+; LA64-NEXT: beqz $a5, .LBB45_1 -+; LA64-NEXT: # %bb.2: -+; LA64-NEXT: srl.w $a0, $a4, $a0 -+; LA64-NEXT: ret -+ %1 = atomicrmw nand ptr %a, i16 %b release -+ ret i16 %1 -+} -+ -+define i32 @atomicrmw_nand_i32_release(ptr %a, i32 %b) nounwind { -+; LA32-LABEL: atomicrmw_nand_i32_release: -+; LA32: # %bb.0: -+; LA32-NEXT: .LBB46_1: # =>This Inner Loop Header: Depth=1 -+; LA32-NEXT: ll.w $a2, $a0, 0 -+; LA32-NEXT: and $a3, $a2, $a1 -+; LA32-NEXT: nor $a3, $a3, $zero -+; LA32-NEXT: sc.w $a3, $a0, 0 -+; LA32-NEXT: beqz $a3, .LBB46_1 -+; LA32-NEXT: # %bb.2: -+; LA32-NEXT: move $a0, $a2 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: atomicrmw_nand_i32_release: -+; LA64: # %bb.0: -+; LA64-NEXT: .LBB46_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.w $a2, $a0, 0 -+; LA64-NEXT: and $a3, $a2, $a1 -+; LA64-NEXT: nor $a3, $a3, $zero -+; LA64-NEXT: sc.w $a3, $a0, 0 -+; LA64-NEXT: beqz $a3, .LBB46_1 -+; LA64-NEXT: # %bb.2: -+; LA64-NEXT: move $a0, $a2 -+; LA64-NEXT: ret -+ %1 = atomicrmw nand ptr %a, i32 %b release -+ ret i32 %1 -+} -+ -+define i64 @atomicrmw_nand_i64_release(ptr %a, i64 %b) nounwind { -+; LA32-LABEL: atomicrmw_nand_i64_release: -+; LA32: # %bb.0: -+; LA32-NEXT: addi.w $sp, $sp, -16 -+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -+; LA32-NEXT: ori $a3, $zero, 3 -+; LA32-NEXT: bl %plt(__atomic_fetch_nand_8) -+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -+; LA32-NEXT: addi.w $sp, $sp, 16 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: atomicrmw_nand_i64_release: -+; LA64: # %bb.0: -+; LA64-NEXT: .LBB47_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.d $a2, $a0, 0 -+; LA64-NEXT: and $a3, $a2, $a1 -+; LA64-NEXT: nor $a3, $a3, $zero -+; LA64-NEXT: sc.d $a3, $a0, 0 -+; LA64-NEXT: beqz $a3, .LBB47_1 -+; LA64-NEXT: # %bb.2: -+; LA64-NEXT: move $a0, $a2 -+; LA64-NEXT: ret -+ %1 = atomicrmw nand ptr %a, i64 %b release -+ ret i64 %1 -+} -+ -+define i8 @atomicrmw_and_i8_release(ptr %a, i8 %b) nounwind { -+; LA32-LABEL: atomicrmw_and_i8_release: -+; LA32: # %bb.0: -+; LA32-NEXT: slli.w $a2, $a0, 3 -+; LA32-NEXT: ori $a3, $zero, 255 -+; LA32-NEXT: sll.w $a3, $a3, $a2 -+; LA32-NEXT: andi $a1, $a1, 255 -+; LA32-NEXT: sll.w $a1, $a1, $a2 -+; LA32-NEXT: orn $a1, $a1, $a3 -+; LA32-NEXT: addi.w $a3, $zero, -4 -+; LA32-NEXT: and $a0, $a0, $a3 -+; LA32-NEXT: .LBB48_1: # =>This Inner Loop Header: Depth=1 -+; LA32-NEXT: ll.w $a3, $a0, 0 -+; LA32-NEXT: and $a4, $a3, $a1 -+; LA32-NEXT: sc.w $a4, $a0, 0 -+; LA32-NEXT: beqz $a4, .LBB48_1 -+; LA32-NEXT: # %bb.2: -+; LA32-NEXT: srl.w $a0, $a3, $a2 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: atomicrmw_and_i8_release: -+; LA64: # %bb.0: -+; LA64-NEXT: slli.d $a2, $a0, 3 -+; LA64-NEXT: ori $a3, $zero, 255 -+; LA64-NEXT: sll.w $a3, $a3, $a2 -+; LA64-NEXT: andi $a1, $a1, 255 -+; LA64-NEXT: sll.w $a1, $a1, $a2 -+; LA64-NEXT: orn $a1, $a1, $a3 -+; LA64-NEXT: addi.w $a3, $zero, -4 -+; LA64-NEXT: and $a0, $a0, $a3 -+; LA64-NEXT: amand_db.w $a3, $a1, $a0 -+; LA64-NEXT: srl.w $a0, $a3, $a2 -+; LA64-NEXT: ret -+ %1 = atomicrmw and ptr %a, i8 %b release -+ ret i8 %1 -+} -+ -+define i16 @atomicrmw_and_i16_release(ptr %a, i16 %b) nounwind { -+; LA32-LABEL: atomicrmw_and_i16_release: -+; LA32: # %bb.0: -+; LA32-NEXT: lu12i.w $a2, 15 -+; LA32-NEXT: ori $a2, $a2, 4095 -+; LA32-NEXT: slli.w $a3, $a0, 3 -+; LA32-NEXT: sll.w $a2, $a2, $a3 -+; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 -+; LA32-NEXT: sll.w $a1, $a1, $a3 -+; LA32-NEXT: orn $a1, $a1, $a2 -+; LA32-NEXT: addi.w $a2, $zero, -4 -+; LA32-NEXT: and $a0, $a0, $a2 -+; LA32-NEXT: .LBB49_1: # =>This Inner Loop Header: Depth=1 -+; LA32-NEXT: ll.w $a2, $a0, 0 -+; LA32-NEXT: and $a4, $a2, $a1 -+; LA32-NEXT: sc.w $a4, $a0, 0 -+; LA32-NEXT: beqz $a4, .LBB49_1 -+; LA32-NEXT: # %bb.2: -+; LA32-NEXT: srl.w $a0, $a2, $a3 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: atomicrmw_and_i16_release: -+; LA64: # %bb.0: -+; LA64-NEXT: lu12i.w $a2, 15 -+; LA64-NEXT: ori $a2, $a2, 4095 -+; LA64-NEXT: slli.d $a3, $a0, 3 -+; LA64-NEXT: sll.w $a2, $a2, $a3 -+; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 -+; LA64-NEXT: sll.w $a1, $a1, $a3 -+; LA64-NEXT: orn $a1, $a1, $a2 -+; LA64-NEXT: addi.w $a2, $zero, -4 -+; LA64-NEXT: and $a0, $a0, $a2 -+; LA64-NEXT: amand_db.w $a2, $a1, $a0 -+; LA64-NEXT: srl.w $a0, $a2, $a3 -+; LA64-NEXT: ret -+ %1 = atomicrmw and ptr %a, i16 %b release -+ ret i16 %1 -+} -+ -+define i32 @atomicrmw_and_i32_release(ptr %a, i32 %b) nounwind { -+; LA32-LABEL: atomicrmw_and_i32_release: -+; LA32: # %bb.0: -+; LA32-NEXT: .LBB50_1: # =>This Inner Loop Header: Depth=1 -+; LA32-NEXT: ll.w $a2, $a0, 0 -+; LA32-NEXT: and $a3, $a2, $a1 -+; LA32-NEXT: sc.w $a3, $a0, 0 -+; LA32-NEXT: beqz $a3, .LBB50_1 -+; LA32-NEXT: # %bb.2: -+; LA32-NEXT: move $a0, $a2 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: atomicrmw_and_i32_release: -+; LA64: # %bb.0: -+; LA64-NEXT: amand_db.w $a2, $a1, $a0 -+; LA64-NEXT: move $a0, $a2 -+; LA64-NEXT: ret -+ %1 = atomicrmw and ptr %a, i32 %b release -+ ret i32 %1 -+} -+ -+define i64 @atomicrmw_and_i64_release(ptr %a, i64 %b) nounwind { -+; LA32-LABEL: atomicrmw_and_i64_release: -+; LA32: # %bb.0: -+; LA32-NEXT: addi.w $sp, $sp, -16 -+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -+; LA32-NEXT: ori $a3, $zero, 3 -+; LA32-NEXT: bl %plt(__atomic_fetch_and_8) -+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -+; LA32-NEXT: addi.w $sp, $sp, 16 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: atomicrmw_and_i64_release: -+; LA64: # %bb.0: -+; LA64-NEXT: amand_db.d $a2, $a1, $a0 -+; LA64-NEXT: move $a0, $a2 -+; LA64-NEXT: ret -+ %1 = atomicrmw and ptr %a, i64 %b release -+ ret i64 %1 -+} -+ -+define i8 @atomicrmw_or_i8_release(ptr %a, i8 %b) nounwind { -+; LA32-LABEL: atomicrmw_or_i8_release: -+; LA32: # %bb.0: -+; LA32-NEXT: addi.w $a2, $zero, -4 -+; LA32-NEXT: and $a2, $a0, $a2 -+; LA32-NEXT: slli.w $a0, $a0, 3 -+; LA32-NEXT: andi $a1, $a1, 255 -+; LA32-NEXT: sll.w $a1, $a1, $a0 -+; LA32-NEXT: .LBB52_1: # =>This Inner Loop Header: Depth=1 -+; LA32-NEXT: ll.w $a3, $a2, 0 -+; LA32-NEXT: or $a4, $a3, $a1 -+; LA32-NEXT: sc.w $a4, $a2, 0 -+; LA32-NEXT: beqz $a4, .LBB52_1 -+; LA32-NEXT: # %bb.2: -+; LA32-NEXT: srl.w $a0, $a3, $a0 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: atomicrmw_or_i8_release: -+; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a2, $zero, -4 -+; LA64-NEXT: and $a2, $a0, $a2 -+; LA64-NEXT: slli.d $a0, $a0, 3 -+; LA64-NEXT: andi $a1, $a1, 255 -+; LA64-NEXT: sll.w $a1, $a1, $a0 -+; LA64-NEXT: amor_db.w $a3, $a1, $a2 -+; LA64-NEXT: srl.w $a0, $a3, $a0 -+; LA64-NEXT: ret -+ %1 = atomicrmw or ptr %a, i8 %b release -+ ret i8 %1 -+} -+ -+define i16 @atomicrmw_or_i16_release(ptr %a, i16 %b) nounwind { -+; LA32-LABEL: atomicrmw_or_i16_release: -+; LA32: # %bb.0: -+; LA32-NEXT: addi.w $a2, $zero, -4 -+; LA32-NEXT: and $a2, $a0, $a2 -+; LA32-NEXT: slli.w $a0, $a0, 3 -+; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 -+; LA32-NEXT: sll.w $a1, $a1, $a0 -+; LA32-NEXT: .LBB53_1: # =>This Inner Loop Header: Depth=1 -+; LA32-NEXT: ll.w $a3, $a2, 0 -+; LA32-NEXT: or $a4, $a3, $a1 -+; LA32-NEXT: sc.w $a4, $a2, 0 -+; LA32-NEXT: beqz $a4, .LBB53_1 -+; LA32-NEXT: # %bb.2: -+; LA32-NEXT: srl.w $a0, $a3, $a0 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: atomicrmw_or_i16_release: -+; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a2, $zero, -4 -+; LA64-NEXT: and $a2, $a0, $a2 -+; LA64-NEXT: slli.d $a0, $a0, 3 -+; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 -+; LA64-NEXT: sll.w $a1, $a1, $a0 -+; LA64-NEXT: amor_db.w $a3, $a1, $a2 -+; LA64-NEXT: srl.w $a0, $a3, $a0 -+; LA64-NEXT: ret -+ %1 = atomicrmw or ptr %a, i16 %b release -+ ret i16 %1 -+} -+ -+define i32 @atomicrmw_or_i32_release(ptr %a, i32 %b) nounwind { -+; LA32-LABEL: atomicrmw_or_i32_release: -+; LA32: # %bb.0: -+; LA32-NEXT: .LBB54_1: # =>This Inner Loop Header: Depth=1 -+; LA32-NEXT: ll.w $a2, $a0, 0 -+; LA32-NEXT: or $a3, $a2, $a1 -+; LA32-NEXT: sc.w $a3, $a0, 0 -+; LA32-NEXT: beqz $a3, .LBB54_1 -+; LA32-NEXT: # %bb.2: -+; LA32-NEXT: move $a0, $a2 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: atomicrmw_or_i32_release: -+; LA64: # %bb.0: -+; LA64-NEXT: amor_db.w $a2, $a1, $a0 -+; LA64-NEXT: move $a0, $a2 -+; LA64-NEXT: ret -+ %1 = atomicrmw or ptr %a, i32 %b release -+ ret i32 %1 -+} -+ -+define i64 @atomicrmw_or_i64_release(ptr %a, i64 %b) nounwind { -+; LA32-LABEL: atomicrmw_or_i64_release: -+; LA32: # %bb.0: -+; LA32-NEXT: addi.w $sp, $sp, -16 -+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -+; LA32-NEXT: ori $a3, $zero, 3 -+; LA32-NEXT: bl %plt(__atomic_fetch_or_8) -+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -+; LA32-NEXT: addi.w $sp, $sp, 16 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: atomicrmw_or_i64_release: -+; LA64: # %bb.0: -+; LA64-NEXT: amor_db.d $a2, $a1, $a0 -+; LA64-NEXT: move $a0, $a2 -+; LA64-NEXT: ret -+ %1 = atomicrmw or ptr %a, i64 %b release -+ ret i64 %1 -+} -+ -+define i8 @atomicrmw_xor_i8_release(ptr %a, i8 %b) nounwind { -+; LA32-LABEL: atomicrmw_xor_i8_release: -+; LA32: # %bb.0: -+; LA32-NEXT: addi.w $a2, $zero, -4 -+; LA32-NEXT: and $a2, $a0, $a2 -+; LA32-NEXT: slli.w $a0, $a0, 3 -+; LA32-NEXT: andi $a1, $a1, 255 -+; LA32-NEXT: sll.w $a1, $a1, $a0 -+; LA32-NEXT: .LBB56_1: # =>This Inner Loop Header: Depth=1 -+; LA32-NEXT: ll.w $a3, $a2, 0 -+; LA32-NEXT: xor $a4, $a3, $a1 -+; LA32-NEXT: sc.w $a4, $a2, 0 -+; LA32-NEXT: beqz $a4, .LBB56_1 -+; LA32-NEXT: # %bb.2: -+; LA32-NEXT: srl.w $a0, $a3, $a0 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: atomicrmw_xor_i8_release: -+; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a2, $zero, -4 -+; LA64-NEXT: and $a2, $a0, $a2 -+; LA64-NEXT: slli.d $a0, $a0, 3 -+; LA64-NEXT: andi $a1, $a1, 255 -+; LA64-NEXT: sll.w $a1, $a1, $a0 -+; LA64-NEXT: amxor_db.w $a3, $a1, $a2 -+; LA64-NEXT: srl.w $a0, $a3, $a0 -+; LA64-NEXT: ret -+ %1 = atomicrmw xor ptr %a, i8 %b release -+ ret i8 %1 -+} -+ -+define i16 @atomicrmw_xor_i16_release(ptr %a, i16 %b) nounwind { -+; LA32-LABEL: atomicrmw_xor_i16_release: -+; LA32: # %bb.0: -+; LA32-NEXT: addi.w $a2, $zero, -4 -+; LA32-NEXT: and $a2, $a0, $a2 -+; LA32-NEXT: slli.w $a0, $a0, 3 -+; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 -+; LA32-NEXT: sll.w $a1, $a1, $a0 -+; LA32-NEXT: .LBB57_1: # =>This Inner Loop Header: Depth=1 -+; LA32-NEXT: ll.w $a3, $a2, 0 -+; LA32-NEXT: xor $a4, $a3, $a1 -+; LA32-NEXT: sc.w $a4, $a2, 0 -+; LA32-NEXT: beqz $a4, .LBB57_1 -+; LA32-NEXT: # %bb.2: -+; LA32-NEXT: srl.w $a0, $a3, $a0 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: atomicrmw_xor_i16_release: -+; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a2, $zero, -4 -+; LA64-NEXT: and $a2, $a0, $a2 -+; LA64-NEXT: slli.d $a0, $a0, 3 -+; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 -+; LA64-NEXT: sll.w $a1, $a1, $a0 -+; LA64-NEXT: amxor_db.w $a3, $a1, $a2 -+; LA64-NEXT: srl.w $a0, $a3, $a0 -+; LA64-NEXT: ret -+ %1 = atomicrmw xor ptr %a, i16 %b release -+ ret i16 %1 -+} -+ -+define i32 @atomicrmw_xor_i32_release(ptr %a, i32 %b) nounwind { -+; LA32-LABEL: atomicrmw_xor_i32_release: -+; LA32: # %bb.0: -+; LA32-NEXT: .LBB58_1: # =>This Inner Loop Header: Depth=1 -+; LA32-NEXT: ll.w $a2, $a0, 0 -+; LA32-NEXT: xor $a3, $a2, $a1 -+; LA32-NEXT: sc.w $a3, $a0, 0 -+; LA32-NEXT: beqz $a3, .LBB58_1 -+; LA32-NEXT: # %bb.2: -+; LA32-NEXT: move $a0, $a2 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: atomicrmw_xor_i32_release: -+; LA64: # %bb.0: -+; LA64-NEXT: amxor_db.w $a2, $a1, $a0 -+; LA64-NEXT: move $a0, $a2 -+; LA64-NEXT: ret -+ %1 = atomicrmw xor ptr %a, i32 %b release -+ ret i32 %1 -+} -+ -+define i64 @atomicrmw_xor_i64_release(ptr %a, i64 %b) nounwind { -+; LA32-LABEL: atomicrmw_xor_i64_release: -+; LA32: # %bb.0: -+; LA32-NEXT: addi.w $sp, $sp, -16 -+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -+; LA32-NEXT: ori $a3, $zero, 3 -+; LA32-NEXT: bl %plt(__atomic_fetch_xor_8) -+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -+; LA32-NEXT: addi.w $sp, $sp, 16 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: atomicrmw_xor_i64_release: -+; LA64: # %bb.0: -+; LA64-NEXT: amxor_db.d $a2, $a1, $a0 -+; LA64-NEXT: move $a0, $a2 -+; LA64-NEXT: ret -+ %1 = atomicrmw xor ptr %a, i64 %b release -+ ret i64 %1 -+} -+ -+define i8 @atomicrmw_xchg_i8_acq_rel(ptr %a, i8 %b) nounwind { -+; LA32-LABEL: atomicrmw_xchg_i8_acq_rel: -+; LA32: # %bb.0: -+; LA32-NEXT: addi.w $a2, $zero, -4 -+; LA32-NEXT: and $a2, $a0, $a2 -+; LA32-NEXT: slli.w $a0, $a0, 3 -+; LA32-NEXT: ori $a3, $zero, 255 -+; LA32-NEXT: sll.w $a3, $a3, $a0 -+; LA32-NEXT: andi $a1, $a1, 255 -+; LA32-NEXT: sll.w $a1, $a1, $a0 -+; LA32-NEXT: .LBB60_1: # =>This Inner Loop Header: Depth=1 -+; LA32-NEXT: ll.w $a4, $a2, 0 -+; LA32-NEXT: addi.w $a5, $a1, 0 -+; LA32-NEXT: xor $a5, $a4, $a5 -+; LA32-NEXT: and $a5, $a5, $a3 -+; LA32-NEXT: xor $a5, $a4, $a5 -+; LA32-NEXT: sc.w $a5, $a2, 0 -+; LA32-NEXT: beqz $a5, .LBB60_1 -+; LA32-NEXT: # %bb.2: -+; LA32-NEXT: srl.w $a0, $a4, $a0 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: atomicrmw_xchg_i8_acq_rel: -+; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a2, $zero, -4 -+; LA64-NEXT: and $a2, $a0, $a2 -+; LA64-NEXT: slli.d $a0, $a0, 3 -+; LA64-NEXT: ori $a3, $zero, 255 -+; LA64-NEXT: sll.w $a3, $a3, $a0 -+; LA64-NEXT: addi.w $a3, $a3, 0 -+; LA64-NEXT: andi $a1, $a1, 255 -+; LA64-NEXT: sll.w $a1, $a1, $a0 -+; LA64-NEXT: addi.w $a1, $a1, 0 -+; LA64-NEXT: .LBB60_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.w $a4, $a2, 0 -+; LA64-NEXT: addi.w $a5, $a1, 0 -+; LA64-NEXT: xor $a5, $a4, $a5 -+; LA64-NEXT: and $a5, $a5, $a3 -+; LA64-NEXT: xor $a5, $a4, $a5 -+; LA64-NEXT: sc.w $a5, $a2, 0 -+; LA64-NEXT: beqz $a5, .LBB60_1 -+; LA64-NEXT: # %bb.2: -+; LA64-NEXT: srl.w $a0, $a4, $a0 -+; LA64-NEXT: ret -+ %1 = atomicrmw xchg ptr %a, i8 %b acq_rel -+ ret i8 %1 -+} -+ -+define i8 @atomicrmw_xchg_0_i8_acq_rel(ptr %a) nounwind { -+; LA32-LABEL: atomicrmw_xchg_0_i8_acq_rel: -+; LA32: # %bb.0: -+; LA32-NEXT: addi.w $a1, $zero, -4 -+; LA32-NEXT: and $a1, $a0, $a1 -+; LA32-NEXT: slli.w $a0, $a0, 3 -+; LA32-NEXT: ori $a2, $zero, 255 -+; LA32-NEXT: sll.w $a2, $a2, $a0 -+; LA32-NEXT: .LBB61_1: # =>This Inner Loop Header: Depth=1 -+; LA32-NEXT: ll.w $a3, $a1, 0 -+; LA32-NEXT: addi.w $a4, $zero, 0 -+; LA32-NEXT: xor $a4, $a3, $a4 -+; LA32-NEXT: and $a4, $a4, $a2 -+; LA32-NEXT: xor $a4, $a3, $a4 -+; LA32-NEXT: sc.w $a4, $a1, 0 -+; LA32-NEXT: beqz $a4, .LBB61_1 -+; LA32-NEXT: # %bb.2: -+; LA32-NEXT: srl.w $a0, $a3, $a0 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: atomicrmw_xchg_0_i8_acq_rel: -+; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a1, $zero, -4 -+; LA64-NEXT: and $a1, $a0, $a1 -+; LA64-NEXT: slli.d $a0, $a0, 3 -+; LA64-NEXT: ori $a2, $zero, 255 -+; LA64-NEXT: sll.w $a2, $a2, $a0 -+; LA64-NEXT: addi.w $a2, $a2, 0 -+; LA64-NEXT: .LBB61_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.w $a3, $a1, 0 -+; LA64-NEXT: addi.w $a4, $zero, 0 -+; LA64-NEXT: xor $a4, $a3, $a4 -+; LA64-NEXT: and $a4, $a4, $a2 -+; LA64-NEXT: xor $a4, $a3, $a4 -+; LA64-NEXT: sc.w $a4, $a1, 0 -+; LA64-NEXT: beqz $a4, .LBB61_1 -+; LA64-NEXT: # %bb.2: -+; LA64-NEXT: srl.w $a0, $a3, $a0 -+; LA64-NEXT: ret -+ %1 = atomicrmw xchg ptr %a, i8 0 acq_rel -+ ret i8 %1 -+} -+ -+define i8 @atomicrmw_xchg_minus_1_i8_acq_rel(ptr %a) nounwind { -+; LA32-LABEL: atomicrmw_xchg_minus_1_i8_acq_rel: -+; LA32: # %bb.0: -+; LA32-NEXT: addi.w $a1, $zero, -4 -+; LA32-NEXT: and $a1, $a0, $a1 -+; LA32-NEXT: slli.w $a0, $a0, 3 -+; LA32-NEXT: ori $a2, $zero, 255 -+; LA32-NEXT: sll.w $a2, $a2, $a0 -+; LA32-NEXT: .LBB62_1: # =>This Inner Loop Header: Depth=1 -+; LA32-NEXT: ll.w $a3, $a1, 0 -+; LA32-NEXT: addi.w $a4, $a2, 0 -+; LA32-NEXT: xor $a4, $a3, $a4 -+; LA32-NEXT: and $a4, $a4, $a2 -+; LA32-NEXT: xor $a4, $a3, $a4 -+; LA32-NEXT: sc.w $a4, $a1, 0 -+; LA32-NEXT: beqz $a4, .LBB62_1 -+; LA32-NEXT: # %bb.2: -+; LA32-NEXT: srl.w $a0, $a3, $a0 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: atomicrmw_xchg_minus_1_i8_acq_rel: -+; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a1, $zero, -4 -+; LA64-NEXT: and $a1, $a0, $a1 -+; LA64-NEXT: slli.d $a0, $a0, 3 -+; LA64-NEXT: ori $a2, $zero, 255 -+; LA64-NEXT: sll.w $a2, $a2, $a0 -+; LA64-NEXT: addi.w $a2, $a2, 0 -+; LA64-NEXT: .LBB62_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.w $a3, $a1, 0 -+; LA64-NEXT: addi.w $a4, $a2, 0 -+; LA64-NEXT: xor $a4, $a3, $a4 -+; LA64-NEXT: and $a4, $a4, $a2 -+; LA64-NEXT: xor $a4, $a3, $a4 -+; LA64-NEXT: sc.w $a4, $a1, 0 -+; LA64-NEXT: beqz $a4, .LBB62_1 -+; LA64-NEXT: # %bb.2: -+; LA64-NEXT: srl.w $a0, $a3, $a0 -+; LA64-NEXT: ret -+ %1 = atomicrmw xchg ptr %a, i8 -1 acq_rel -+ ret i8 %1 -+} -+ -+define i16 @atomicrmw_xchg_i16_acq_rel(ptr %a, i16 %b) nounwind { -+; LA32-LABEL: atomicrmw_xchg_i16_acq_rel: -+; LA32: # %bb.0: -+; LA32-NEXT: addi.w $a2, $zero, -4 -+; LA32-NEXT: and $a2, $a0, $a2 -+; LA32-NEXT: slli.w $a0, $a0, 3 -+; LA32-NEXT: lu12i.w $a3, 15 -+; LA32-NEXT: ori $a3, $a3, 4095 -+; LA32-NEXT: sll.w $a3, $a3, $a0 -+; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 -+; LA32-NEXT: sll.w $a1, $a1, $a0 -+; LA32-NEXT: .LBB63_1: # =>This Inner Loop Header: Depth=1 -+; LA32-NEXT: ll.w $a4, $a2, 0 -+; LA32-NEXT: addi.w $a5, $a1, 0 -+; LA32-NEXT: xor $a5, $a4, $a5 -+; LA32-NEXT: and $a5, $a5, $a3 -+; LA32-NEXT: xor $a5, $a4, $a5 -+; LA32-NEXT: sc.w $a5, $a2, 0 -+; LA32-NEXT: beqz $a5, .LBB63_1 -+; LA32-NEXT: # %bb.2: -+; LA32-NEXT: srl.w $a0, $a4, $a0 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: atomicrmw_xchg_i16_acq_rel: -+; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a2, $zero, -4 -+; LA64-NEXT: and $a2, $a0, $a2 -+; LA64-NEXT: slli.d $a0, $a0, 3 -+; LA64-NEXT: lu12i.w $a3, 15 -+; LA64-NEXT: ori $a3, $a3, 4095 -+; LA64-NEXT: sll.w $a3, $a3, $a0 -+; LA64-NEXT: addi.w $a3, $a3, 0 -+; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 -+; LA64-NEXT: sll.w $a1, $a1, $a0 -+; LA64-NEXT: addi.w $a1, $a1, 0 -+; LA64-NEXT: .LBB63_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.w $a4, $a2, 0 -+; LA64-NEXT: addi.w $a5, $a1, 0 -+; LA64-NEXT: xor $a5, $a4, $a5 -+; LA64-NEXT: and $a5, $a5, $a3 -+; LA64-NEXT: xor $a5, $a4, $a5 -+; LA64-NEXT: sc.w $a5, $a2, 0 -+; LA64-NEXT: beqz $a5, .LBB63_1 -+; LA64-NEXT: # %bb.2: -+; LA64-NEXT: srl.w $a0, $a4, $a0 -+; LA64-NEXT: ret -+ %1 = atomicrmw xchg ptr %a, i16 %b acq_rel -+ ret i16 %1 -+} -+ -+define i16 @atomicrmw_xchg_0_i16_acq_rel(ptr %a) nounwind { -+; LA32-LABEL: atomicrmw_xchg_0_i16_acq_rel: -+; LA32: # %bb.0: -+; LA32-NEXT: addi.w $a1, $zero, -4 -+; LA32-NEXT: and $a1, $a0, $a1 -+; LA32-NEXT: slli.w $a0, $a0, 3 -+; LA32-NEXT: lu12i.w $a2, 15 -+; LA32-NEXT: ori $a2, $a2, 4095 -+; LA32-NEXT: sll.w $a2, $a2, $a0 -+; LA32-NEXT: .LBB64_1: # =>This Inner Loop Header: Depth=1 -+; LA32-NEXT: ll.w $a3, $a1, 0 -+; LA32-NEXT: addi.w $a4, $zero, 0 -+; LA32-NEXT: xor $a4, $a3, $a4 -+; LA32-NEXT: and $a4, $a4, $a2 -+; LA32-NEXT: xor $a4, $a3, $a4 -+; LA32-NEXT: sc.w $a4, $a1, 0 -+; LA32-NEXT: beqz $a4, .LBB64_1 -+; LA32-NEXT: # %bb.2: -+; LA32-NEXT: srl.w $a0, $a3, $a0 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: atomicrmw_xchg_0_i16_acq_rel: -+; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a1, $zero, -4 -+; LA64-NEXT: and $a1, $a0, $a1 -+; LA64-NEXT: slli.d $a0, $a0, 3 -+; LA64-NEXT: lu12i.w $a2, 15 -+; LA64-NEXT: ori $a2, $a2, 4095 -+; LA64-NEXT: sll.w $a2, $a2, $a0 -+; LA64-NEXT: addi.w $a2, $a2, 0 -+; LA64-NEXT: .LBB64_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.w $a3, $a1, 0 -+; LA64-NEXT: addi.w $a4, $zero, 0 -+; LA64-NEXT: xor $a4, $a3, $a4 -+; LA64-NEXT: and $a4, $a4, $a2 -+; LA64-NEXT: xor $a4, $a3, $a4 -+; LA64-NEXT: sc.w $a4, $a1, 0 -+; LA64-NEXT: beqz $a4, .LBB64_1 -+; LA64-NEXT: # %bb.2: -+; LA64-NEXT: srl.w $a0, $a3, $a0 -+; LA64-NEXT: ret -+ %1 = atomicrmw xchg ptr %a, i16 0 acq_rel -+ ret i16 %1 -+} -+ -+define i16 @atomicrmw_xchg_minus_1_i16_acq_rel(ptr %a) nounwind { -+; LA32-LABEL: atomicrmw_xchg_minus_1_i16_acq_rel: -+; LA32: # %bb.0: -+; LA32-NEXT: addi.w $a1, $zero, -4 -+; LA32-NEXT: and $a1, $a0, $a1 -+; LA32-NEXT: slli.w $a0, $a0, 3 -+; LA32-NEXT: lu12i.w $a2, 15 -+; LA32-NEXT: ori $a2, $a2, 4095 -+; LA32-NEXT: sll.w $a2, $a2, $a0 -+; LA32-NEXT: .LBB65_1: # =>This Inner Loop Header: Depth=1 -+; LA32-NEXT: ll.w $a3, $a1, 0 -+; LA32-NEXT: addi.w $a4, $a2, 0 -+; LA32-NEXT: xor $a4, $a3, $a4 -+; LA32-NEXT: and $a4, $a4, $a2 -+; LA32-NEXT: xor $a4, $a3, $a4 -+; LA32-NEXT: sc.w $a4, $a1, 0 -+; LA32-NEXT: beqz $a4, .LBB65_1 -+; LA32-NEXT: # %bb.2: -+; LA32-NEXT: srl.w $a0, $a3, $a0 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: atomicrmw_xchg_minus_1_i16_acq_rel: -+; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a1, $zero, -4 -+; LA64-NEXT: and $a1, $a0, $a1 -+; LA64-NEXT: slli.d $a0, $a0, 3 -+; LA64-NEXT: lu12i.w $a2, 15 -+; LA64-NEXT: ori $a2, $a2, 4095 -+; LA64-NEXT: sll.w $a2, $a2, $a0 -+; LA64-NEXT: addi.w $a2, $a2, 0 -+; LA64-NEXT: .LBB65_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.w $a3, $a1, 0 -+; LA64-NEXT: addi.w $a4, $a2, 0 -+; LA64-NEXT: xor $a4, $a3, $a4 -+; LA64-NEXT: and $a4, $a4, $a2 -+; LA64-NEXT: xor $a4, $a3, $a4 -+; LA64-NEXT: sc.w $a4, $a1, 0 -+; LA64-NEXT: beqz $a4, .LBB65_1 -+; LA64-NEXT: # %bb.2: -+; LA64-NEXT: srl.w $a0, $a3, $a0 -+; LA64-NEXT: ret -+ %1 = atomicrmw xchg ptr %a, i16 -1 acq_rel -+ ret i16 %1 -+} -+ -+define i32 @atomicrmw_xchg_i32_acq_rel(ptr %a, i32 %b) nounwind { -+; LA32-LABEL: atomicrmw_xchg_i32_acq_rel: -+; LA32: # %bb.0: -+; LA32-NEXT: .LBB66_1: # =>This Inner Loop Header: Depth=1 -+; LA32-NEXT: ll.w $a2, $a0, 0 -+; LA32-NEXT: move $a3, $a1 -+; LA32-NEXT: sc.w $a3, $a0, 0 -+; LA32-NEXT: beqz $a3, .LBB66_1 -+; LA32-NEXT: # %bb.2: -+; LA32-NEXT: move $a0, $a2 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: atomicrmw_xchg_i32_acq_rel: -+; LA64: # %bb.0: -+; LA64-NEXT: amswap_db.w $a2, $a1, $a0 -+; LA64-NEXT: move $a0, $a2 -+; LA64-NEXT: ret -+ %1 = atomicrmw xchg ptr %a, i32 %b acq_rel -+ ret i32 %1 -+} -+ -+define i64 @atomicrmw_xchg_i64_acq_rel(ptr %a, i64 %b) nounwind { -+; LA32-LABEL: atomicrmw_xchg_i64_acq_rel: -+; LA32: # %bb.0: -+; LA32-NEXT: addi.w $sp, $sp, -16 -+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -+; LA32-NEXT: ori $a3, $zero, 4 -+; LA32-NEXT: bl %plt(__atomic_exchange_8) -+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -+; LA32-NEXT: addi.w $sp, $sp, 16 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: atomicrmw_xchg_i64_acq_rel: -+; LA64: # %bb.0: -+; LA64-NEXT: amswap_db.d $a2, $a1, $a0 -+; LA64-NEXT: move $a0, $a2 -+; LA64-NEXT: ret -+ %1 = atomicrmw xchg ptr %a, i64 %b acq_rel -+ ret i64 %1 -+} -+ -+define i8 @atomicrmw_add_i8_acq_rel(ptr %a, i8 %b) nounwind { -+; LA32-LABEL: atomicrmw_add_i8_acq_rel: -+; LA32: # %bb.0: -+; LA32-NEXT: addi.w $a2, $zero, -4 -+; LA32-NEXT: and $a2, $a0, $a2 -+; LA32-NEXT: slli.w $a0, $a0, 3 -+; LA32-NEXT: ori $a3, $zero, 255 -+; LA32-NEXT: sll.w $a3, $a3, $a0 -+; LA32-NEXT: andi $a1, $a1, 255 -+; LA32-NEXT: sll.w $a1, $a1, $a0 -+; LA32-NEXT: .LBB68_1: # =>This Inner Loop Header: Depth=1 -+; LA32-NEXT: ll.w $a4, $a2, 0 -+; LA32-NEXT: add.w $a5, $a4, $a1 -+; LA32-NEXT: xor $a5, $a4, $a5 -+; LA32-NEXT: and $a5, $a5, $a3 -+; LA32-NEXT: xor $a5, $a4, $a5 -+; LA32-NEXT: sc.w $a5, $a2, 0 -+; LA32-NEXT: beqz $a5, .LBB68_1 -+; LA32-NEXT: # %bb.2: -+; LA32-NEXT: srl.w $a0, $a4, $a0 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: atomicrmw_add_i8_acq_rel: -+; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a2, $zero, -4 -+; LA64-NEXT: and $a2, $a0, $a2 -+; LA64-NEXT: slli.d $a0, $a0, 3 -+; LA64-NEXT: ori $a3, $zero, 255 -+; LA64-NEXT: sll.w $a3, $a3, $a0 -+; LA64-NEXT: addi.w $a3, $a3, 0 -+; LA64-NEXT: andi $a1, $a1, 255 -+; LA64-NEXT: sll.w $a1, $a1, $a0 -+; LA64-NEXT: addi.w $a1, $a1, 0 -+; LA64-NEXT: .LBB68_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.w $a4, $a2, 0 -+; LA64-NEXT: add.w $a5, $a4, $a1 -+; LA64-NEXT: xor $a5, $a4, $a5 -+; LA64-NEXT: and $a5, $a5, $a3 -+; LA64-NEXT: xor $a5, $a4, $a5 -+; LA64-NEXT: sc.w $a5, $a2, 0 -+; LA64-NEXT: beqz $a5, .LBB68_1 -+; LA64-NEXT: # %bb.2: -+; LA64-NEXT: srl.w $a0, $a4, $a0 -+; LA64-NEXT: ret -+ %1 = atomicrmw add ptr %a, i8 %b acq_rel -+ ret i8 %1 -+} -+ -+define i16 @atomicrmw_add_i16_acq_rel(ptr %a, i16 %b) nounwind { -+; LA32-LABEL: atomicrmw_add_i16_acq_rel: -+; LA32: # %bb.0: -+; LA32-NEXT: addi.w $a2, $zero, -4 -+; LA32-NEXT: and $a2, $a0, $a2 -+; LA32-NEXT: slli.w $a0, $a0, 3 -+; LA32-NEXT: lu12i.w $a3, 15 -+; LA32-NEXT: ori $a3, $a3, 4095 -+; LA32-NEXT: sll.w $a3, $a3, $a0 -+; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 -+; LA32-NEXT: sll.w $a1, $a1, $a0 -+; LA32-NEXT: .LBB69_1: # =>This Inner Loop Header: Depth=1 -+; LA32-NEXT: ll.w $a4, $a2, 0 -+; LA32-NEXT: add.w $a5, $a4, $a1 -+; LA32-NEXT: xor $a5, $a4, $a5 -+; LA32-NEXT: and $a5, $a5, $a3 -+; LA32-NEXT: xor $a5, $a4, $a5 -+; LA32-NEXT: sc.w $a5, $a2, 0 -+; LA32-NEXT: beqz $a5, .LBB69_1 -+; LA32-NEXT: # %bb.2: -+; LA32-NEXT: srl.w $a0, $a4, $a0 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: atomicrmw_add_i16_acq_rel: -+; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a2, $zero, -4 -+; LA64-NEXT: and $a2, $a0, $a2 -+; LA64-NEXT: slli.d $a0, $a0, 3 -+; LA64-NEXT: lu12i.w $a3, 15 -+; LA64-NEXT: ori $a3, $a3, 4095 -+; LA64-NEXT: sll.w $a3, $a3, $a0 -+; LA64-NEXT: addi.w $a3, $a3, 0 -+; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 -+; LA64-NEXT: sll.w $a1, $a1, $a0 -+; LA64-NEXT: addi.w $a1, $a1, 0 -+; LA64-NEXT: .LBB69_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.w $a4, $a2, 0 -+; LA64-NEXT: add.w $a5, $a4, $a1 -+; LA64-NEXT: xor $a5, $a4, $a5 -+; LA64-NEXT: and $a5, $a5, $a3 -+; LA64-NEXT: xor $a5, $a4, $a5 -+; LA64-NEXT: sc.w $a5, $a2, 0 -+; LA64-NEXT: beqz $a5, .LBB69_1 -+; LA64-NEXT: # %bb.2: -+; LA64-NEXT: srl.w $a0, $a4, $a0 -+; LA64-NEXT: ret -+ %1 = atomicrmw add ptr %a, i16 %b acq_rel -+ ret i16 %1 -+} -+ -+define i32 @atomicrmw_add_i32_acq_rel(ptr %a, i32 %b) nounwind { -+; LA32-LABEL: atomicrmw_add_i32_acq_rel: -+; LA32: # %bb.0: -+; LA32-NEXT: .LBB70_1: # =>This Inner Loop Header: Depth=1 -+; LA32-NEXT: ll.w $a2, $a0, 0 -+; LA32-NEXT: add.w $a3, $a2, $a1 -+; LA32-NEXT: sc.w $a3, $a0, 0 -+; LA32-NEXT: beqz $a3, .LBB70_1 -+; LA32-NEXT: # %bb.2: -+; LA32-NEXT: move $a0, $a2 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: atomicrmw_add_i32_acq_rel: -+; LA64: # %bb.0: -+; LA64-NEXT: amadd_db.w $a2, $a1, $a0 -+; LA64-NEXT: move $a0, $a2 -+; LA64-NEXT: ret -+ %1 = atomicrmw add ptr %a, i32 %b acq_rel -+ ret i32 %1 -+} -+ -+define i64 @atomicrmw_add_i64_acq_rel(ptr %a, i64 %b) nounwind { -+; LA32-LABEL: atomicrmw_add_i64_acq_rel: -+; LA32: # %bb.0: -+; LA32-NEXT: addi.w $sp, $sp, -16 -+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -+; LA32-NEXT: ori $a3, $zero, 4 -+; LA32-NEXT: bl %plt(__atomic_fetch_add_8) -+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -+; LA32-NEXT: addi.w $sp, $sp, 16 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: atomicrmw_add_i64_acq_rel: -+; LA64: # %bb.0: -+; LA64-NEXT: amadd_db.d $a2, $a1, $a0 -+; LA64-NEXT: move $a0, $a2 -+; LA64-NEXT: ret -+ %1 = atomicrmw add ptr %a, i64 %b acq_rel -+ ret i64 %1 -+} -+ -+define i8 @atomicrmw_sub_i8_acq_rel(ptr %a, i8 %b) nounwind { -+; LA32-LABEL: atomicrmw_sub_i8_acq_rel: -+; LA32: # %bb.0: -+; LA32-NEXT: addi.w $a2, $zero, -4 -+; LA32-NEXT: and $a2, $a0, $a2 -+; LA32-NEXT: slli.w $a0, $a0, 3 -+; LA32-NEXT: ori $a3, $zero, 255 -+; LA32-NEXT: sll.w $a3, $a3, $a0 -+; LA32-NEXT: andi $a1, $a1, 255 -+; LA32-NEXT: sll.w $a1, $a1, $a0 -+; LA32-NEXT: .LBB72_1: # =>This Inner Loop Header: Depth=1 -+; LA32-NEXT: ll.w $a4, $a2, 0 -+; LA32-NEXT: sub.w $a5, $a4, $a1 -+; LA32-NEXT: xor $a5, $a4, $a5 -+; LA32-NEXT: and $a5, $a5, $a3 -+; LA32-NEXT: xor $a5, $a4, $a5 -+; LA32-NEXT: sc.w $a5, $a2, 0 -+; LA32-NEXT: beqz $a5, .LBB72_1 -+; LA32-NEXT: # %bb.2: -+; LA32-NEXT: srl.w $a0, $a4, $a0 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: atomicrmw_sub_i8_acq_rel: -+; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a2, $zero, -4 -+; LA64-NEXT: and $a2, $a0, $a2 -+; LA64-NEXT: slli.d $a0, $a0, 3 -+; LA64-NEXT: ori $a3, $zero, 255 -+; LA64-NEXT: sll.w $a3, $a3, $a0 -+; LA64-NEXT: addi.w $a3, $a3, 0 -+; LA64-NEXT: andi $a1, $a1, 255 -+; LA64-NEXT: sll.w $a1, $a1, $a0 -+; LA64-NEXT: addi.w $a1, $a1, 0 -+; LA64-NEXT: .LBB72_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.w $a4, $a2, 0 -+; LA64-NEXT: sub.w $a5, $a4, $a1 -+; LA64-NEXT: xor $a5, $a4, $a5 -+; LA64-NEXT: and $a5, $a5, $a3 -+; LA64-NEXT: xor $a5, $a4, $a5 -+; LA64-NEXT: sc.w $a5, $a2, 0 -+; LA64-NEXT: beqz $a5, .LBB72_1 -+; LA64-NEXT: # %bb.2: -+; LA64-NEXT: srl.w $a0, $a4, $a0 -+; LA64-NEXT: ret -+ %1 = atomicrmw sub ptr %a, i8 %b acq_rel -+ ret i8 %1 -+} -+ -+define i16 @atomicrmw_sub_i16_acq_rel(ptr %a, i16 %b) nounwind { -+; LA32-LABEL: atomicrmw_sub_i16_acq_rel: -+; LA32: # %bb.0: -+; LA32-NEXT: addi.w $a2, $zero, -4 -+; LA32-NEXT: and $a2, $a0, $a2 -+; LA32-NEXT: slli.w $a0, $a0, 3 -+; LA32-NEXT: lu12i.w $a3, 15 -+; LA32-NEXT: ori $a3, $a3, 4095 -+; LA32-NEXT: sll.w $a3, $a3, $a0 -+; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 -+; LA32-NEXT: sll.w $a1, $a1, $a0 -+; LA32-NEXT: .LBB73_1: # =>This Inner Loop Header: Depth=1 -+; LA32-NEXT: ll.w $a4, $a2, 0 -+; LA32-NEXT: sub.w $a5, $a4, $a1 -+; LA32-NEXT: xor $a5, $a4, $a5 -+; LA32-NEXT: and $a5, $a5, $a3 -+; LA32-NEXT: xor $a5, $a4, $a5 -+; LA32-NEXT: sc.w $a5, $a2, 0 -+; LA32-NEXT: beqz $a5, .LBB73_1 -+; LA32-NEXT: # %bb.2: -+; LA32-NEXT: srl.w $a0, $a4, $a0 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: atomicrmw_sub_i16_acq_rel: -+; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a2, $zero, -4 -+; LA64-NEXT: and $a2, $a0, $a2 -+; LA64-NEXT: slli.d $a0, $a0, 3 -+; LA64-NEXT: lu12i.w $a3, 15 -+; LA64-NEXT: ori $a3, $a3, 4095 -+; LA64-NEXT: sll.w $a3, $a3, $a0 -+; LA64-NEXT: addi.w $a3, $a3, 0 -+; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 -+; LA64-NEXT: sll.w $a1, $a1, $a0 -+; LA64-NEXT: addi.w $a1, $a1, 0 -+; LA64-NEXT: .LBB73_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.w $a4, $a2, 0 -+; LA64-NEXT: sub.w $a5, $a4, $a1 -+; LA64-NEXT: xor $a5, $a4, $a5 -+; LA64-NEXT: and $a5, $a5, $a3 -+; LA64-NEXT: xor $a5, $a4, $a5 -+; LA64-NEXT: sc.w $a5, $a2, 0 -+; LA64-NEXT: beqz $a5, .LBB73_1 -+; LA64-NEXT: # %bb.2: -+; LA64-NEXT: srl.w $a0, $a4, $a0 -+; LA64-NEXT: ret -+ %1 = atomicrmw sub ptr %a, i16 %b acq_rel -+ ret i16 %1 -+} -+ -+define i32 @atomicrmw_sub_i32_acq_rel(ptr %a, i32 %b) nounwind { -+; LA32-LABEL: atomicrmw_sub_i32_acq_rel: -+; LA32: # %bb.0: -+; LA32-NEXT: .LBB74_1: # =>This Inner Loop Header: Depth=1 -+; LA32-NEXT: ll.w $a2, $a0, 0 -+; LA32-NEXT: sub.w $a3, $a2, $a1 -+; LA32-NEXT: sc.w $a3, $a0, 0 -+; LA32-NEXT: beqz $a3, .LBB74_1 -+; LA32-NEXT: # %bb.2: -+; LA32-NEXT: move $a0, $a2 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: atomicrmw_sub_i32_acq_rel: -+; LA64: # %bb.0: -+; LA64-NEXT: sub.w $a2, $zero, $a1 -+; LA64-NEXT: amadd_db.w $a1, $a2, $a0 -+; LA64-NEXT: move $a0, $a1 -+; LA64-NEXT: ret -+ %1 = atomicrmw sub ptr %a, i32 %b acq_rel -+ ret i32 %1 -+} -+ -+define i64 @atomicrmw_sub_i64_acq_rel(ptr %a, i64 %b) nounwind { -+; LA32-LABEL: atomicrmw_sub_i64_acq_rel: -+; LA32: # %bb.0: -+; LA32-NEXT: addi.w $sp, $sp, -16 -+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -+; LA32-NEXT: ori $a3, $zero, 4 -+; LA32-NEXT: bl %plt(__atomic_fetch_sub_8) -+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -+; LA32-NEXT: addi.w $sp, $sp, 16 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: atomicrmw_sub_i64_acq_rel: -+; LA64: # %bb.0: -+; LA64-NEXT: sub.d $a2, $zero, $a1 -+; LA64-NEXT: amadd_db.d $a1, $a2, $a0 -+; LA64-NEXT: move $a0, $a1 -+; LA64-NEXT: ret -+ %1 = atomicrmw sub ptr %a, i64 %b acq_rel -+ ret i64 %1 -+} -+ -+define i8 @atomicrmw_nand_i8_acq_rel(ptr %a, i8 %b) nounwind { -+; LA32-LABEL: atomicrmw_nand_i8_acq_rel: -+; LA32: # %bb.0: -+; LA32-NEXT: addi.w $a2, $zero, -4 -+; LA32-NEXT: and $a2, $a0, $a2 -+; LA32-NEXT: slli.w $a0, $a0, 3 -+; LA32-NEXT: ori $a3, $zero, 255 -+; LA32-NEXT: sll.w $a3, $a3, $a0 -+; LA32-NEXT: andi $a1, $a1, 255 -+; LA32-NEXT: sll.w $a1, $a1, $a0 -+; LA32-NEXT: .LBB76_1: # =>This Inner Loop Header: Depth=1 -+; LA32-NEXT: ll.w $a4, $a2, 0 -+; LA32-NEXT: and $a5, $a4, $a1 -+; LA32-NEXT: nor $a5, $a5, $zero -+; LA32-NEXT: xor $a5, $a4, $a5 -+; LA32-NEXT: and $a5, $a5, $a3 -+; LA32-NEXT: xor $a5, $a4, $a5 -+; LA32-NEXT: sc.w $a5, $a2, 0 -+; LA32-NEXT: beqz $a5, .LBB76_1 -+; LA32-NEXT: # %bb.2: -+; LA32-NEXT: srl.w $a0, $a4, $a0 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: atomicrmw_nand_i8_acq_rel: -+; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a2, $zero, -4 -+; LA64-NEXT: and $a2, $a0, $a2 -+; LA64-NEXT: slli.d $a0, $a0, 3 -+; LA64-NEXT: ori $a3, $zero, 255 -+; LA64-NEXT: sll.w $a3, $a3, $a0 -+; LA64-NEXT: addi.w $a3, $a3, 0 -+; LA64-NEXT: andi $a1, $a1, 255 -+; LA64-NEXT: sll.w $a1, $a1, $a0 -+; LA64-NEXT: addi.w $a1, $a1, 0 -+; LA64-NEXT: .LBB76_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.w $a4, $a2, 0 -+; LA64-NEXT: and $a5, $a4, $a1 -+; LA64-NEXT: nor $a5, $a5, $zero -+; LA64-NEXT: xor $a5, $a4, $a5 -+; LA64-NEXT: and $a5, $a5, $a3 -+; LA64-NEXT: xor $a5, $a4, $a5 -+; LA64-NEXT: sc.w $a5, $a2, 0 -+; LA64-NEXT: beqz $a5, .LBB76_1 -+; LA64-NEXT: # %bb.2: -+; LA64-NEXT: srl.w $a0, $a4, $a0 -+; LA64-NEXT: ret -+ %1 = atomicrmw nand ptr %a, i8 %b acq_rel -+ ret i8 %1 -+} -+ -+define i16 @atomicrmw_nand_i16_acq_rel(ptr %a, i16 %b) nounwind { -+; LA32-LABEL: atomicrmw_nand_i16_acq_rel: -+; LA32: # %bb.0: -+; LA32-NEXT: addi.w $a2, $zero, -4 -+; LA32-NEXT: and $a2, $a0, $a2 -+; LA32-NEXT: slli.w $a0, $a0, 3 -+; LA32-NEXT: lu12i.w $a3, 15 -+; LA32-NEXT: ori $a3, $a3, 4095 -+; LA32-NEXT: sll.w $a3, $a3, $a0 -+; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 -+; LA32-NEXT: sll.w $a1, $a1, $a0 -+; LA32-NEXT: .LBB77_1: # =>This Inner Loop Header: Depth=1 -+; LA32-NEXT: ll.w $a4, $a2, 0 -+; LA32-NEXT: and $a5, $a4, $a1 -+; LA32-NEXT: nor $a5, $a5, $zero -+; LA32-NEXT: xor $a5, $a4, $a5 -+; LA32-NEXT: and $a5, $a5, $a3 -+; LA32-NEXT: xor $a5, $a4, $a5 -+; LA32-NEXT: sc.w $a5, $a2, 0 -+; LA32-NEXT: beqz $a5, .LBB77_1 -+; LA32-NEXT: # %bb.2: -+; LA32-NEXT: srl.w $a0, $a4, $a0 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: atomicrmw_nand_i16_acq_rel: -+; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a2, $zero, -4 -+; LA64-NEXT: and $a2, $a0, $a2 -+; LA64-NEXT: slli.d $a0, $a0, 3 -+; LA64-NEXT: lu12i.w $a3, 15 -+; LA64-NEXT: ori $a3, $a3, 4095 -+; LA64-NEXT: sll.w $a3, $a3, $a0 -+; LA64-NEXT: addi.w $a3, $a3, 0 -+; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 -+; LA64-NEXT: sll.w $a1, $a1, $a0 -+; LA64-NEXT: addi.w $a1, $a1, 0 -+; LA64-NEXT: .LBB77_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.w $a4, $a2, 0 -+; LA64-NEXT: and $a5, $a4, $a1 -+; LA64-NEXT: nor $a5, $a5, $zero -+; LA64-NEXT: xor $a5, $a4, $a5 -+; LA64-NEXT: and $a5, $a5, $a3 -+; LA64-NEXT: xor $a5, $a4, $a5 -+; LA64-NEXT: sc.w $a5, $a2, 0 -+; LA64-NEXT: beqz $a5, .LBB77_1 -+; LA64-NEXT: # %bb.2: -+; LA64-NEXT: srl.w $a0, $a4, $a0 -+; LA64-NEXT: ret -+ %1 = atomicrmw nand ptr %a, i16 %b acq_rel -+ ret i16 %1 -+} -+ -+define i32 @atomicrmw_nand_i32_acq_rel(ptr %a, i32 %b) nounwind { -+; LA32-LABEL: atomicrmw_nand_i32_acq_rel: -+; LA32: # %bb.0: -+; LA32-NEXT: .LBB78_1: # =>This Inner Loop Header: Depth=1 -+; LA32-NEXT: ll.w $a2, $a0, 0 -+; LA32-NEXT: and $a3, $a2, $a1 -+; LA32-NEXT: nor $a3, $a3, $zero -+; LA32-NEXT: sc.w $a3, $a0, 0 -+; LA32-NEXT: beqz $a3, .LBB78_1 -+; LA32-NEXT: # %bb.2: -+; LA32-NEXT: move $a0, $a2 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: atomicrmw_nand_i32_acq_rel: -+; LA64: # %bb.0: -+; LA64-NEXT: .LBB78_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.w $a2, $a0, 0 -+; LA64-NEXT: and $a3, $a2, $a1 -+; LA64-NEXT: nor $a3, $a3, $zero -+; LA64-NEXT: sc.w $a3, $a0, 0 -+; LA64-NEXT: beqz $a3, .LBB78_1 -+; LA64-NEXT: # %bb.2: -+; LA64-NEXT: move $a0, $a2 -+; LA64-NEXT: ret -+ %1 = atomicrmw nand ptr %a, i32 %b acq_rel -+ ret i32 %1 -+} -+ -+define i64 @atomicrmw_nand_i64_acq_rel(ptr %a, i64 %b) nounwind { -+; LA32-LABEL: atomicrmw_nand_i64_acq_rel: -+; LA32: # %bb.0: -+; LA32-NEXT: addi.w $sp, $sp, -16 -+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -+; LA32-NEXT: ori $a3, $zero, 4 -+; LA32-NEXT: bl %plt(__atomic_fetch_nand_8) -+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -+; LA32-NEXT: addi.w $sp, $sp, 16 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: atomicrmw_nand_i64_acq_rel: -+; LA64: # %bb.0: -+; LA64-NEXT: .LBB79_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.d $a2, $a0, 0 -+; LA64-NEXT: and $a3, $a2, $a1 -+; LA64-NEXT: nor $a3, $a3, $zero -+; LA64-NEXT: sc.d $a3, $a0, 0 -+; LA64-NEXT: beqz $a3, .LBB79_1 -+; LA64-NEXT: # %bb.2: -+; LA64-NEXT: move $a0, $a2 -+; LA64-NEXT: ret -+ %1 = atomicrmw nand ptr %a, i64 %b acq_rel -+ ret i64 %1 -+} -+ -+define i8 @atomicrmw_and_i8_acq_rel(ptr %a, i8 %b) nounwind { -+; LA32-LABEL: atomicrmw_and_i8_acq_rel: -+; LA32: # %bb.0: -+; LA32-NEXT: slli.w $a2, $a0, 3 -+; LA32-NEXT: ori $a3, $zero, 255 -+; LA32-NEXT: sll.w $a3, $a3, $a2 -+; LA32-NEXT: andi $a1, $a1, 255 -+; LA32-NEXT: sll.w $a1, $a1, $a2 -+; LA32-NEXT: orn $a1, $a1, $a3 -+; LA32-NEXT: addi.w $a3, $zero, -4 -+; LA32-NEXT: and $a0, $a0, $a3 -+; LA32-NEXT: .LBB80_1: # =>This Inner Loop Header: Depth=1 -+; LA32-NEXT: ll.w $a3, $a0, 0 -+; LA32-NEXT: and $a4, $a3, $a1 -+; LA32-NEXT: sc.w $a4, $a0, 0 -+; LA32-NEXT: beqz $a4, .LBB80_1 -+; LA32-NEXT: # %bb.2: -+; LA32-NEXT: srl.w $a0, $a3, $a2 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: atomicrmw_and_i8_acq_rel: -+; LA64: # %bb.0: -+; LA64-NEXT: slli.d $a2, $a0, 3 -+; LA64-NEXT: ori $a3, $zero, 255 -+; LA64-NEXT: sll.w $a3, $a3, $a2 -+; LA64-NEXT: andi $a1, $a1, 255 -+; LA64-NEXT: sll.w $a1, $a1, $a2 -+; LA64-NEXT: orn $a1, $a1, $a3 -+; LA64-NEXT: addi.w $a3, $zero, -4 -+; LA64-NEXT: and $a0, $a0, $a3 -+; LA64-NEXT: amand_db.w $a3, $a1, $a0 -+; LA64-NEXT: srl.w $a0, $a3, $a2 -+; LA64-NEXT: ret -+ %1 = atomicrmw and ptr %a, i8 %b acq_rel -+ ret i8 %1 -+} -+ -+define i16 @atomicrmw_and_i16_acq_rel(ptr %a, i16 %b) nounwind { -+; LA32-LABEL: atomicrmw_and_i16_acq_rel: -+; LA32: # %bb.0: -+; LA32-NEXT: lu12i.w $a2, 15 -+; LA32-NEXT: ori $a2, $a2, 4095 -+; LA32-NEXT: slli.w $a3, $a0, 3 -+; LA32-NEXT: sll.w $a2, $a2, $a3 -+; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 -+; LA32-NEXT: sll.w $a1, $a1, $a3 -+; LA32-NEXT: orn $a1, $a1, $a2 -+; LA32-NEXT: addi.w $a2, $zero, -4 -+; LA32-NEXT: and $a0, $a0, $a2 -+; LA32-NEXT: .LBB81_1: # =>This Inner Loop Header: Depth=1 -+; LA32-NEXT: ll.w $a2, $a0, 0 -+; LA32-NEXT: and $a4, $a2, $a1 -+; LA32-NEXT: sc.w $a4, $a0, 0 -+; LA32-NEXT: beqz $a4, .LBB81_1 -+; LA32-NEXT: # %bb.2: -+; LA32-NEXT: srl.w $a0, $a2, $a3 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: atomicrmw_and_i16_acq_rel: -+; LA64: # %bb.0: -+; LA64-NEXT: lu12i.w $a2, 15 -+; LA64-NEXT: ori $a2, $a2, 4095 -+; LA64-NEXT: slli.d $a3, $a0, 3 -+; LA64-NEXT: sll.w $a2, $a2, $a3 -+; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 -+; LA64-NEXT: sll.w $a1, $a1, $a3 -+; LA64-NEXT: orn $a1, $a1, $a2 -+; LA64-NEXT: addi.w $a2, $zero, -4 -+; LA64-NEXT: and $a0, $a0, $a2 -+; LA64-NEXT: amand_db.w $a2, $a1, $a0 -+; LA64-NEXT: srl.w $a0, $a2, $a3 -+; LA64-NEXT: ret -+ %1 = atomicrmw and ptr %a, i16 %b acq_rel -+ ret i16 %1 -+} -+ -+define i32 @atomicrmw_and_i32_acq_rel(ptr %a, i32 %b) nounwind { -+; LA32-LABEL: atomicrmw_and_i32_acq_rel: -+; LA32: # %bb.0: -+; LA32-NEXT: .LBB82_1: # =>This Inner Loop Header: Depth=1 -+; LA32-NEXT: ll.w $a2, $a0, 0 -+; LA32-NEXT: and $a3, $a2, $a1 -+; LA32-NEXT: sc.w $a3, $a0, 0 -+; LA32-NEXT: beqz $a3, .LBB82_1 -+; LA32-NEXT: # %bb.2: -+; LA32-NEXT: move $a0, $a2 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: atomicrmw_and_i32_acq_rel: -+; LA64: # %bb.0: -+; LA64-NEXT: amand_db.w $a2, $a1, $a0 -+; LA64-NEXT: move $a0, $a2 -+; LA64-NEXT: ret -+ %1 = atomicrmw and ptr %a, i32 %b acq_rel -+ ret i32 %1 -+} -+ -+define i64 @atomicrmw_and_i64_acq_rel(ptr %a, i64 %b) nounwind { -+; LA32-LABEL: atomicrmw_and_i64_acq_rel: -+; LA32: # %bb.0: -+; LA32-NEXT: addi.w $sp, $sp, -16 -+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -+; LA32-NEXT: ori $a3, $zero, 4 -+; LA32-NEXT: bl %plt(__atomic_fetch_and_8) -+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -+; LA32-NEXT: addi.w $sp, $sp, 16 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: atomicrmw_and_i64_acq_rel: -+; LA64: # %bb.0: -+; LA64-NEXT: amand_db.d $a2, $a1, $a0 -+; LA64-NEXT: move $a0, $a2 -+; LA64-NEXT: ret -+ %1 = atomicrmw and ptr %a, i64 %b acq_rel -+ ret i64 %1 -+} -+ -+define i8 @atomicrmw_or_i8_acq_rel(ptr %a, i8 %b) nounwind { -+; LA32-LABEL: atomicrmw_or_i8_acq_rel: -+; LA32: # %bb.0: -+; LA32-NEXT: addi.w $a2, $zero, -4 -+; LA32-NEXT: and $a2, $a0, $a2 -+; LA32-NEXT: slli.w $a0, $a0, 3 -+; LA32-NEXT: andi $a1, $a1, 255 -+; LA32-NEXT: sll.w $a1, $a1, $a0 -+; LA32-NEXT: .LBB84_1: # =>This Inner Loop Header: Depth=1 -+; LA32-NEXT: ll.w $a3, $a2, 0 -+; LA32-NEXT: or $a4, $a3, $a1 -+; LA32-NEXT: sc.w $a4, $a2, 0 -+; LA32-NEXT: beqz $a4, .LBB84_1 -+; LA32-NEXT: # %bb.2: -+; LA32-NEXT: srl.w $a0, $a3, $a0 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: atomicrmw_or_i8_acq_rel: -+; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a2, $zero, -4 -+; LA64-NEXT: and $a2, $a0, $a2 -+; LA64-NEXT: slli.d $a0, $a0, 3 -+; LA64-NEXT: andi $a1, $a1, 255 -+; LA64-NEXT: sll.w $a1, $a1, $a0 -+; LA64-NEXT: amor_db.w $a3, $a1, $a2 -+; LA64-NEXT: srl.w $a0, $a3, $a0 -+; LA64-NEXT: ret -+ %1 = atomicrmw or ptr %a, i8 %b acq_rel -+ ret i8 %1 -+} -+ -+define i16 @atomicrmw_or_i16_acq_rel(ptr %a, i16 %b) nounwind { -+; LA32-LABEL: atomicrmw_or_i16_acq_rel: -+; LA32: # %bb.0: -+; LA32-NEXT: addi.w $a2, $zero, -4 -+; LA32-NEXT: and $a2, $a0, $a2 -+; LA32-NEXT: slli.w $a0, $a0, 3 -+; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 -+; LA32-NEXT: sll.w $a1, $a1, $a0 -+; LA32-NEXT: .LBB85_1: # =>This Inner Loop Header: Depth=1 -+; LA32-NEXT: ll.w $a3, $a2, 0 -+; LA32-NEXT: or $a4, $a3, $a1 -+; LA32-NEXT: sc.w $a4, $a2, 0 -+; LA32-NEXT: beqz $a4, .LBB85_1 -+; LA32-NEXT: # %bb.2: -+; LA32-NEXT: srl.w $a0, $a3, $a0 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: atomicrmw_or_i16_acq_rel: -+; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a2, $zero, -4 -+; LA64-NEXT: and $a2, $a0, $a2 -+; LA64-NEXT: slli.d $a0, $a0, 3 -+; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 -+; LA64-NEXT: sll.w $a1, $a1, $a0 -+; LA64-NEXT: amor_db.w $a3, $a1, $a2 -+; LA64-NEXT: srl.w $a0, $a3, $a0 -+; LA64-NEXT: ret -+ %1 = atomicrmw or ptr %a, i16 %b acq_rel -+ ret i16 %1 -+} -+ -+define i32 @atomicrmw_or_i32_acq_rel(ptr %a, i32 %b) nounwind { -+; LA32-LABEL: atomicrmw_or_i32_acq_rel: -+; LA32: # %bb.0: -+; LA32-NEXT: .LBB86_1: # =>This Inner Loop Header: Depth=1 -+; LA32-NEXT: ll.w $a2, $a0, 0 -+; LA32-NEXT: or $a3, $a2, $a1 -+; LA32-NEXT: sc.w $a3, $a0, 0 -+; LA32-NEXT: beqz $a3, .LBB86_1 -+; LA32-NEXT: # %bb.2: -+; LA32-NEXT: move $a0, $a2 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: atomicrmw_or_i32_acq_rel: -+; LA64: # %bb.0: -+; LA64-NEXT: amor_db.w $a2, $a1, $a0 -+; LA64-NEXT: move $a0, $a2 -+; LA64-NEXT: ret -+ %1 = atomicrmw or ptr %a, i32 %b acq_rel -+ ret i32 %1 -+} -+ -+define i64 @atomicrmw_or_i64_acq_rel(ptr %a, i64 %b) nounwind { -+; LA32-LABEL: atomicrmw_or_i64_acq_rel: -+; LA32: # %bb.0: -+; LA32-NEXT: addi.w $sp, $sp, -16 -+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -+; LA32-NEXT: ori $a3, $zero, 4 -+; LA32-NEXT: bl %plt(__atomic_fetch_or_8) -+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -+; LA32-NEXT: addi.w $sp, $sp, 16 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: atomicrmw_or_i64_acq_rel: -+; LA64: # %bb.0: -+; LA64-NEXT: amor_db.d $a2, $a1, $a0 -+; LA64-NEXT: move $a0, $a2 -+; LA64-NEXT: ret -+ %1 = atomicrmw or ptr %a, i64 %b acq_rel -+ ret i64 %1 -+} -+ -+define i8 @atomicrmw_xor_i8_acq_rel(ptr %a, i8 %b) nounwind { -+; LA32-LABEL: atomicrmw_xor_i8_acq_rel: -+; LA32: # %bb.0: -+; LA32-NEXT: addi.w $a2, $zero, -4 -+; LA32-NEXT: and $a2, $a0, $a2 -+; LA32-NEXT: slli.w $a0, $a0, 3 -+; LA32-NEXT: andi $a1, $a1, 255 -+; LA32-NEXT: sll.w $a1, $a1, $a0 -+; LA32-NEXT: .LBB88_1: # =>This Inner Loop Header: Depth=1 -+; LA32-NEXT: ll.w $a3, $a2, 0 -+; LA32-NEXT: xor $a4, $a3, $a1 -+; LA32-NEXT: sc.w $a4, $a2, 0 -+; LA32-NEXT: beqz $a4, .LBB88_1 -+; LA32-NEXT: # %bb.2: -+; LA32-NEXT: srl.w $a0, $a3, $a0 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: atomicrmw_xor_i8_acq_rel: -+; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a2, $zero, -4 -+; LA64-NEXT: and $a2, $a0, $a2 -+; LA64-NEXT: slli.d $a0, $a0, 3 -+; LA64-NEXT: andi $a1, $a1, 255 -+; LA64-NEXT: sll.w $a1, $a1, $a0 -+; LA64-NEXT: amxor_db.w $a3, $a1, $a2 -+; LA64-NEXT: srl.w $a0, $a3, $a0 -+; LA64-NEXT: ret -+ %1 = atomicrmw xor ptr %a, i8 %b acq_rel -+ ret i8 %1 -+} -+ -+define i16 @atomicrmw_xor_i16_acq_rel(ptr %a, i16 %b) nounwind { -+; LA32-LABEL: atomicrmw_xor_i16_acq_rel: -+; LA32: # %bb.0: -+; LA32-NEXT: addi.w $a2, $zero, -4 -+; LA32-NEXT: and $a2, $a0, $a2 -+; LA32-NEXT: slli.w $a0, $a0, 3 -+; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 -+; LA32-NEXT: sll.w $a1, $a1, $a0 -+; LA32-NEXT: .LBB89_1: # =>This Inner Loop Header: Depth=1 -+; LA32-NEXT: ll.w $a3, $a2, 0 -+; LA32-NEXT: xor $a4, $a3, $a1 -+; LA32-NEXT: sc.w $a4, $a2, 0 -+; LA32-NEXT: beqz $a4, .LBB89_1 -+; LA32-NEXT: # %bb.2: -+; LA32-NEXT: srl.w $a0, $a3, $a0 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: atomicrmw_xor_i16_acq_rel: -+; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a2, $zero, -4 -+; LA64-NEXT: and $a2, $a0, $a2 -+; LA64-NEXT: slli.d $a0, $a0, 3 -+; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 -+; LA64-NEXT: sll.w $a1, $a1, $a0 -+; LA64-NEXT: amxor_db.w $a3, $a1, $a2 -+; LA64-NEXT: srl.w $a0, $a3, $a0 -+; LA64-NEXT: ret -+ %1 = atomicrmw xor ptr %a, i16 %b acq_rel -+ ret i16 %1 -+} -+ -+define i32 @atomicrmw_xor_i32_acq_rel(ptr %a, i32 %b) nounwind { -+; LA32-LABEL: atomicrmw_xor_i32_acq_rel: -+; LA32: # %bb.0: -+; LA32-NEXT: .LBB90_1: # =>This Inner Loop Header: Depth=1 -+; LA32-NEXT: ll.w $a2, $a0, 0 -+; LA32-NEXT: xor $a3, $a2, $a1 -+; LA32-NEXT: sc.w $a3, $a0, 0 -+; LA32-NEXT: beqz $a3, .LBB90_1 -+; LA32-NEXT: # %bb.2: -+; LA32-NEXT: move $a0, $a2 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: atomicrmw_xor_i32_acq_rel: -+; LA64: # %bb.0: -+; LA64-NEXT: amxor_db.w $a2, $a1, $a0 -+; LA64-NEXT: move $a0, $a2 -+; LA64-NEXT: ret -+ %1 = atomicrmw xor ptr %a, i32 %b acq_rel -+ ret i32 %1 -+} -+ -+define i64 @atomicrmw_xor_i64_acq_rel(ptr %a, i64 %b) nounwind { -+; LA32-LABEL: atomicrmw_xor_i64_acq_rel: -+; LA32: # %bb.0: -+; LA32-NEXT: addi.w $sp, $sp, -16 -+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -+; LA32-NEXT: ori $a3, $zero, 4 -+; LA32-NEXT: bl %plt(__atomic_fetch_xor_8) -+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -+; LA32-NEXT: addi.w $sp, $sp, 16 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: atomicrmw_xor_i64_acq_rel: -+; LA64: # %bb.0: -+; LA64-NEXT: amxor_db.d $a2, $a1, $a0 -+; LA64-NEXT: move $a0, $a2 -+; LA64-NEXT: ret -+ %1 = atomicrmw xor ptr %a, i64 %b acq_rel -+ ret i64 %1 -+} -+ -+define i8 @atomicrmw_xchg_i8_seq_cst(ptr %a, i8 %b) nounwind { -+; LA32-LABEL: atomicrmw_xchg_i8_seq_cst: -+; LA32: # %bb.0: -+; LA32-NEXT: addi.w $a2, $zero, -4 -+; LA32-NEXT: and $a2, $a0, $a2 -+; LA32-NEXT: slli.w $a0, $a0, 3 -+; LA32-NEXT: ori $a3, $zero, 255 -+; LA32-NEXT: sll.w $a3, $a3, $a0 -+; LA32-NEXT: andi $a1, $a1, 255 -+; LA32-NEXT: sll.w $a1, $a1, $a0 -+; LA32-NEXT: .LBB92_1: # =>This Inner Loop Header: Depth=1 -+; LA32-NEXT: ll.w $a4, $a2, 0 -+; LA32-NEXT: addi.w $a5, $a1, 0 -+; LA32-NEXT: xor $a5, $a4, $a5 -+; LA32-NEXT: and $a5, $a5, $a3 -+; LA32-NEXT: xor $a5, $a4, $a5 -+; LA32-NEXT: sc.w $a5, $a2, 0 -+; LA32-NEXT: beqz $a5, .LBB92_1 -+; LA32-NEXT: # %bb.2: -+; LA32-NEXT: srl.w $a0, $a4, $a0 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: atomicrmw_xchg_i8_seq_cst: -+; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a2, $zero, -4 -+; LA64-NEXT: and $a2, $a0, $a2 -+; LA64-NEXT: slli.d $a0, $a0, 3 -+; LA64-NEXT: ori $a3, $zero, 255 -+; LA64-NEXT: sll.w $a3, $a3, $a0 -+; LA64-NEXT: addi.w $a3, $a3, 0 -+; LA64-NEXT: andi $a1, $a1, 255 -+; LA64-NEXT: sll.w $a1, $a1, $a0 -+; LA64-NEXT: addi.w $a1, $a1, 0 -+; LA64-NEXT: .LBB92_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.w $a4, $a2, 0 -+; LA64-NEXT: addi.w $a5, $a1, 0 -+; LA64-NEXT: xor $a5, $a4, $a5 -+; LA64-NEXT: and $a5, $a5, $a3 -+; LA64-NEXT: xor $a5, $a4, $a5 -+; LA64-NEXT: sc.w $a5, $a2, 0 -+; LA64-NEXT: beqz $a5, .LBB92_1 -+; LA64-NEXT: # %bb.2: -+; LA64-NEXT: srl.w $a0, $a4, $a0 -+; LA64-NEXT: ret -+ %1 = atomicrmw xchg ptr %a, i8 %b seq_cst -+ ret i8 %1 -+} -+ -+define i8 @atomicrmw_xchg_0_i8_seq_cst(ptr %a) nounwind { -+; LA32-LABEL: atomicrmw_xchg_0_i8_seq_cst: -+; LA32: # %bb.0: -+; LA32-NEXT: addi.w $a1, $zero, -4 -+; LA32-NEXT: and $a1, $a0, $a1 -+; LA32-NEXT: slli.w $a0, $a0, 3 -+; LA32-NEXT: ori $a2, $zero, 255 -+; LA32-NEXT: sll.w $a2, $a2, $a0 -+; LA32-NEXT: .LBB93_1: # =>This Inner Loop Header: Depth=1 -+; LA32-NEXT: ll.w $a3, $a1, 0 -+; LA32-NEXT: addi.w $a4, $zero, 0 -+; LA32-NEXT: xor $a4, $a3, $a4 -+; LA32-NEXT: and $a4, $a4, $a2 -+; LA32-NEXT: xor $a4, $a3, $a4 -+; LA32-NEXT: sc.w $a4, $a1, 0 -+; LA32-NEXT: beqz $a4, .LBB93_1 -+; LA32-NEXT: # %bb.2: -+; LA32-NEXT: srl.w $a0, $a3, $a0 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: atomicrmw_xchg_0_i8_seq_cst: -+; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a1, $zero, -4 -+; LA64-NEXT: and $a1, $a0, $a1 -+; LA64-NEXT: slli.d $a0, $a0, 3 -+; LA64-NEXT: ori $a2, $zero, 255 -+; LA64-NEXT: sll.w $a2, $a2, $a0 -+; LA64-NEXT: addi.w $a2, $a2, 0 -+; LA64-NEXT: .LBB93_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.w $a3, $a1, 0 -+; LA64-NEXT: addi.w $a4, $zero, 0 -+; LA64-NEXT: xor $a4, $a3, $a4 -+; LA64-NEXT: and $a4, $a4, $a2 -+; LA64-NEXT: xor $a4, $a3, $a4 -+; LA64-NEXT: sc.w $a4, $a1, 0 -+; LA64-NEXT: beqz $a4, .LBB93_1 -+; LA64-NEXT: # %bb.2: -+; LA64-NEXT: srl.w $a0, $a3, $a0 -+; LA64-NEXT: ret -+ %1 = atomicrmw xchg ptr %a, i8 0 seq_cst -+ ret i8 %1 -+} -+ -+define i8 @atomicrmw_xchg_minus_1_i8_seq_cst(ptr %a) nounwind { -+; LA32-LABEL: atomicrmw_xchg_minus_1_i8_seq_cst: -+; LA32: # %bb.0: -+; LA32-NEXT: addi.w $a1, $zero, -4 -+; LA32-NEXT: and $a1, $a0, $a1 -+; LA32-NEXT: slli.w $a0, $a0, 3 -+; LA32-NEXT: ori $a2, $zero, 255 -+; LA32-NEXT: sll.w $a2, $a2, $a0 -+; LA32-NEXT: .LBB94_1: # =>This Inner Loop Header: Depth=1 -+; LA32-NEXT: ll.w $a3, $a1, 0 -+; LA32-NEXT: addi.w $a4, $a2, 0 -+; LA32-NEXT: xor $a4, $a3, $a4 -+; LA32-NEXT: and $a4, $a4, $a2 -+; LA32-NEXT: xor $a4, $a3, $a4 -+; LA32-NEXT: sc.w $a4, $a1, 0 -+; LA32-NEXT: beqz $a4, .LBB94_1 -+; LA32-NEXT: # %bb.2: -+; LA32-NEXT: srl.w $a0, $a3, $a0 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: atomicrmw_xchg_minus_1_i8_seq_cst: -+; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a1, $zero, -4 -+; LA64-NEXT: and $a1, $a0, $a1 -+; LA64-NEXT: slli.d $a0, $a0, 3 -+; LA64-NEXT: ori $a2, $zero, 255 -+; LA64-NEXT: sll.w $a2, $a2, $a0 -+; LA64-NEXT: addi.w $a2, $a2, 0 -+; LA64-NEXT: .LBB94_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.w $a3, $a1, 0 -+; LA64-NEXT: addi.w $a4, $a2, 0 -+; LA64-NEXT: xor $a4, $a3, $a4 -+; LA64-NEXT: and $a4, $a4, $a2 -+; LA64-NEXT: xor $a4, $a3, $a4 -+; LA64-NEXT: sc.w $a4, $a1, 0 -+; LA64-NEXT: beqz $a4, .LBB94_1 -+; LA64-NEXT: # %bb.2: -+; LA64-NEXT: srl.w $a0, $a3, $a0 -+; LA64-NEXT: ret -+ %1 = atomicrmw xchg ptr %a, i8 -1 seq_cst -+ ret i8 %1 -+} -+ -+define i16 @atomicrmw_xchg_i16_seq_cst(ptr %a, i16 %b) nounwind { -+; LA32-LABEL: atomicrmw_xchg_i16_seq_cst: -+; LA32: # %bb.0: -+; LA32-NEXT: addi.w $a2, $zero, -4 -+; LA32-NEXT: and $a2, $a0, $a2 -+; LA32-NEXT: slli.w $a0, $a0, 3 -+; LA32-NEXT: lu12i.w $a3, 15 -+; LA32-NEXT: ori $a3, $a3, 4095 -+; LA32-NEXT: sll.w $a3, $a3, $a0 -+; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 -+; LA32-NEXT: sll.w $a1, $a1, $a0 -+; LA32-NEXT: .LBB95_1: # =>This Inner Loop Header: Depth=1 -+; LA32-NEXT: ll.w $a4, $a2, 0 -+; LA32-NEXT: addi.w $a5, $a1, 0 -+; LA32-NEXT: xor $a5, $a4, $a5 -+; LA32-NEXT: and $a5, $a5, $a3 -+; LA32-NEXT: xor $a5, $a4, $a5 -+; LA32-NEXT: sc.w $a5, $a2, 0 -+; LA32-NEXT: beqz $a5, .LBB95_1 -+; LA32-NEXT: # %bb.2: -+; LA32-NEXT: srl.w $a0, $a4, $a0 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: atomicrmw_xchg_i16_seq_cst: -+; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a2, $zero, -4 -+; LA64-NEXT: and $a2, $a0, $a2 -+; LA64-NEXT: slli.d $a0, $a0, 3 -+; LA64-NEXT: lu12i.w $a3, 15 -+; LA64-NEXT: ori $a3, $a3, 4095 -+; LA64-NEXT: sll.w $a3, $a3, $a0 -+; LA64-NEXT: addi.w $a3, $a3, 0 -+; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 -+; LA64-NEXT: sll.w $a1, $a1, $a0 -+; LA64-NEXT: addi.w $a1, $a1, 0 -+; LA64-NEXT: .LBB95_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.w $a4, $a2, 0 -+; LA64-NEXT: addi.w $a5, $a1, 0 -+; LA64-NEXT: xor $a5, $a4, $a5 -+; LA64-NEXT: and $a5, $a5, $a3 -+; LA64-NEXT: xor $a5, $a4, $a5 -+; LA64-NEXT: sc.w $a5, $a2, 0 -+; LA64-NEXT: beqz $a5, .LBB95_1 -+; LA64-NEXT: # %bb.2: -+; LA64-NEXT: srl.w $a0, $a4, $a0 -+; LA64-NEXT: ret -+ %1 = atomicrmw xchg ptr %a, i16 %b seq_cst -+ ret i16 %1 -+} -+ -+define i16 @atomicrmw_xchg_0_i16_seq_cst(ptr %a) nounwind { -+; LA32-LABEL: atomicrmw_xchg_0_i16_seq_cst: -+; LA32: # %bb.0: -+; LA32-NEXT: addi.w $a1, $zero, -4 -+; LA32-NEXT: and $a1, $a0, $a1 -+; LA32-NEXT: slli.w $a0, $a0, 3 -+; LA32-NEXT: lu12i.w $a2, 15 -+; LA32-NEXT: ori $a2, $a2, 4095 -+; LA32-NEXT: sll.w $a2, $a2, $a0 -+; LA32-NEXT: .LBB96_1: # =>This Inner Loop Header: Depth=1 -+; LA32-NEXT: ll.w $a3, $a1, 0 -+; LA32-NEXT: addi.w $a4, $zero, 0 -+; LA32-NEXT: xor $a4, $a3, $a4 -+; LA32-NEXT: and $a4, $a4, $a2 -+; LA32-NEXT: xor $a4, $a3, $a4 -+; LA32-NEXT: sc.w $a4, $a1, 0 -+; LA32-NEXT: beqz $a4, .LBB96_1 -+; LA32-NEXT: # %bb.2: -+; LA32-NEXT: srl.w $a0, $a3, $a0 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: atomicrmw_xchg_0_i16_seq_cst: -+; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a1, $zero, -4 -+; LA64-NEXT: and $a1, $a0, $a1 -+; LA64-NEXT: slli.d $a0, $a0, 3 -+; LA64-NEXT: lu12i.w $a2, 15 -+; LA64-NEXT: ori $a2, $a2, 4095 -+; LA64-NEXT: sll.w $a2, $a2, $a0 -+; LA64-NEXT: addi.w $a2, $a2, 0 -+; LA64-NEXT: .LBB96_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.w $a3, $a1, 0 -+; LA64-NEXT: addi.w $a4, $zero, 0 -+; LA64-NEXT: xor $a4, $a3, $a4 -+; LA64-NEXT: and $a4, $a4, $a2 -+; LA64-NEXT: xor $a4, $a3, $a4 -+; LA64-NEXT: sc.w $a4, $a1, 0 -+; LA64-NEXT: beqz $a4, .LBB96_1 -+; LA64-NEXT: # %bb.2: -+; LA64-NEXT: srl.w $a0, $a3, $a0 -+; LA64-NEXT: ret -+ %1 = atomicrmw xchg ptr %a, i16 0 seq_cst -+ ret i16 %1 -+} -+ -+define i16 @atomicrmw_xchg_minus_1_i16_seq_cst(ptr %a) nounwind { -+; LA32-LABEL: atomicrmw_xchg_minus_1_i16_seq_cst: -+; LA32: # %bb.0: -+; LA32-NEXT: addi.w $a1, $zero, -4 -+; LA32-NEXT: and $a1, $a0, $a1 -+; LA32-NEXT: slli.w $a0, $a0, 3 -+; LA32-NEXT: lu12i.w $a2, 15 -+; LA32-NEXT: ori $a2, $a2, 4095 -+; LA32-NEXT: sll.w $a2, $a2, $a0 -+; LA32-NEXT: .LBB97_1: # =>This Inner Loop Header: Depth=1 -+; LA32-NEXT: ll.w $a3, $a1, 0 -+; LA32-NEXT: addi.w $a4, $a2, 0 -+; LA32-NEXT: xor $a4, $a3, $a4 -+; LA32-NEXT: and $a4, $a4, $a2 -+; LA32-NEXT: xor $a4, $a3, $a4 -+; LA32-NEXT: sc.w $a4, $a1, 0 -+; LA32-NEXT: beqz $a4, .LBB97_1 -+; LA32-NEXT: # %bb.2: -+; LA32-NEXT: srl.w $a0, $a3, $a0 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: atomicrmw_xchg_minus_1_i16_seq_cst: -+; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a1, $zero, -4 -+; LA64-NEXT: and $a1, $a0, $a1 -+; LA64-NEXT: slli.d $a0, $a0, 3 -+; LA64-NEXT: lu12i.w $a2, 15 -+; LA64-NEXT: ori $a2, $a2, 4095 -+; LA64-NEXT: sll.w $a2, $a2, $a0 -+; LA64-NEXT: addi.w $a2, $a2, 0 -+; LA64-NEXT: .LBB97_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.w $a3, $a1, 0 -+; LA64-NEXT: addi.w $a4, $a2, 0 -+; LA64-NEXT: xor $a4, $a3, $a4 -+; LA64-NEXT: and $a4, $a4, $a2 -+; LA64-NEXT: xor $a4, $a3, $a4 -+; LA64-NEXT: sc.w $a4, $a1, 0 -+; LA64-NEXT: beqz $a4, .LBB97_1 -+; LA64-NEXT: # %bb.2: -+; LA64-NEXT: srl.w $a0, $a3, $a0 -+; LA64-NEXT: ret -+ %1 = atomicrmw xchg ptr %a, i16 -1 seq_cst -+ ret i16 %1 -+} -+ -+define i32 @atomicrmw_xchg_i32_seq_cst(ptr %a, i32 %b) nounwind { -+; LA32-LABEL: atomicrmw_xchg_i32_seq_cst: -+; LA32: # %bb.0: -+; LA32-NEXT: .LBB98_1: # =>This Inner Loop Header: Depth=1 -+; LA32-NEXT: ll.w $a2, $a0, 0 -+; LA32-NEXT: move $a3, $a1 -+; LA32-NEXT: sc.w $a3, $a0, 0 -+; LA32-NEXT: beqz $a3, .LBB98_1 -+; LA32-NEXT: # %bb.2: -+; LA32-NEXT: move $a0, $a2 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: atomicrmw_xchg_i32_seq_cst: -+; LA64: # %bb.0: -+; LA64-NEXT: amswap_db.w $a2, $a1, $a0 -+; LA64-NEXT: move $a0, $a2 -+; LA64-NEXT: ret -+ %1 = atomicrmw xchg ptr %a, i32 %b seq_cst -+ ret i32 %1 -+} -+ -+define i64 @atomicrmw_xchg_i64_seq_cst(ptr %a, i64 %b) nounwind { -+; LA32-LABEL: atomicrmw_xchg_i64_seq_cst: -+; LA32: # %bb.0: -+; LA32-NEXT: addi.w $sp, $sp, -16 -+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -+; LA32-NEXT: ori $a3, $zero, 5 -+; LA32-NEXT: bl %plt(__atomic_exchange_8) -+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -+; LA32-NEXT: addi.w $sp, $sp, 16 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: atomicrmw_xchg_i64_seq_cst: -+; LA64: # %bb.0: -+; LA64-NEXT: amswap_db.d $a2, $a1, $a0 -+; LA64-NEXT: move $a0, $a2 -+; LA64-NEXT: ret -+ %1 = atomicrmw xchg ptr %a, i64 %b seq_cst -+ ret i64 %1 -+} -+ -+define i8 @atomicrmw_add_i8_seq_cst(ptr %a, i8 %b) nounwind { -+; LA32-LABEL: atomicrmw_add_i8_seq_cst: -+; LA32: # %bb.0: -+; LA32-NEXT: addi.w $a2, $zero, -4 -+; LA32-NEXT: and $a2, $a0, $a2 -+; LA32-NEXT: slli.w $a0, $a0, 3 -+; LA32-NEXT: ori $a3, $zero, 255 -+; LA32-NEXT: sll.w $a3, $a3, $a0 -+; LA32-NEXT: andi $a1, $a1, 255 -+; LA32-NEXT: sll.w $a1, $a1, $a0 -+; LA32-NEXT: .LBB100_1: # =>This Inner Loop Header: Depth=1 -+; LA32-NEXT: ll.w $a4, $a2, 0 -+; LA32-NEXT: add.w $a5, $a4, $a1 -+; LA32-NEXT: xor $a5, $a4, $a5 -+; LA32-NEXT: and $a5, $a5, $a3 -+; LA32-NEXT: xor $a5, $a4, $a5 -+; LA32-NEXT: sc.w $a5, $a2, 0 -+; LA32-NEXT: beqz $a5, .LBB100_1 -+; LA32-NEXT: # %bb.2: -+; LA32-NEXT: srl.w $a0, $a4, $a0 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: atomicrmw_add_i8_seq_cst: -+; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a2, $zero, -4 -+; LA64-NEXT: and $a2, $a0, $a2 -+; LA64-NEXT: slli.d $a0, $a0, 3 -+; LA64-NEXT: ori $a3, $zero, 255 -+; LA64-NEXT: sll.w $a3, $a3, $a0 -+; LA64-NEXT: addi.w $a3, $a3, 0 -+; LA64-NEXT: andi $a1, $a1, 255 -+; LA64-NEXT: sll.w $a1, $a1, $a0 -+; LA64-NEXT: addi.w $a1, $a1, 0 -+; LA64-NEXT: .LBB100_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.w $a4, $a2, 0 -+; LA64-NEXT: add.w $a5, $a4, $a1 -+; LA64-NEXT: xor $a5, $a4, $a5 -+; LA64-NEXT: and $a5, $a5, $a3 -+; LA64-NEXT: xor $a5, $a4, $a5 -+; LA64-NEXT: sc.w $a5, $a2, 0 -+; LA64-NEXT: beqz $a5, .LBB100_1 -+; LA64-NEXT: # %bb.2: -+; LA64-NEXT: srl.w $a0, $a4, $a0 -+; LA64-NEXT: ret -+ %1 = atomicrmw add ptr %a, i8 %b seq_cst -+ ret i8 %1 -+} -+ -+define i16 @atomicrmw_add_i16_seq_cst(ptr %a, i16 %b) nounwind { -+; LA32-LABEL: atomicrmw_add_i16_seq_cst: -+; LA32: # %bb.0: -+; LA32-NEXT: addi.w $a2, $zero, -4 -+; LA32-NEXT: and $a2, $a0, $a2 -+; LA32-NEXT: slli.w $a0, $a0, 3 -+; LA32-NEXT: lu12i.w $a3, 15 -+; LA32-NEXT: ori $a3, $a3, 4095 -+; LA32-NEXT: sll.w $a3, $a3, $a0 -+; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 -+; LA32-NEXT: sll.w $a1, $a1, $a0 -+; LA32-NEXT: .LBB101_1: # =>This Inner Loop Header: Depth=1 -+; LA32-NEXT: ll.w $a4, $a2, 0 -+; LA32-NEXT: add.w $a5, $a4, $a1 -+; LA32-NEXT: xor $a5, $a4, $a5 -+; LA32-NEXT: and $a5, $a5, $a3 -+; LA32-NEXT: xor $a5, $a4, $a5 -+; LA32-NEXT: sc.w $a5, $a2, 0 -+; LA32-NEXT: beqz $a5, .LBB101_1 -+; LA32-NEXT: # %bb.2: -+; LA32-NEXT: srl.w $a0, $a4, $a0 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: atomicrmw_add_i16_seq_cst: -+; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a2, $zero, -4 -+; LA64-NEXT: and $a2, $a0, $a2 -+; LA64-NEXT: slli.d $a0, $a0, 3 -+; LA64-NEXT: lu12i.w $a3, 15 -+; LA64-NEXT: ori $a3, $a3, 4095 -+; LA64-NEXT: sll.w $a3, $a3, $a0 -+; LA64-NEXT: addi.w $a3, $a3, 0 -+; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 -+; LA64-NEXT: sll.w $a1, $a1, $a0 -+; LA64-NEXT: addi.w $a1, $a1, 0 -+; LA64-NEXT: .LBB101_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.w $a4, $a2, 0 -+; LA64-NEXT: add.w $a5, $a4, $a1 -+; LA64-NEXT: xor $a5, $a4, $a5 -+; LA64-NEXT: and $a5, $a5, $a3 -+; LA64-NEXT: xor $a5, $a4, $a5 -+; LA64-NEXT: sc.w $a5, $a2, 0 -+; LA64-NEXT: beqz $a5, .LBB101_1 -+; LA64-NEXT: # %bb.2: -+; LA64-NEXT: srl.w $a0, $a4, $a0 -+; LA64-NEXT: ret -+ %1 = atomicrmw add ptr %a, i16 %b seq_cst -+ ret i16 %1 -+} -+ -+define i32 @atomicrmw_add_i32_seq_cst(ptr %a, i32 %b) nounwind { -+; LA32-LABEL: atomicrmw_add_i32_seq_cst: -+; LA32: # %bb.0: -+; LA32-NEXT: .LBB102_1: # =>This Inner Loop Header: Depth=1 -+; LA32-NEXT: ll.w $a2, $a0, 0 -+; LA32-NEXT: add.w $a3, $a2, $a1 -+; LA32-NEXT: sc.w $a3, $a0, 0 -+; LA32-NEXT: beqz $a3, .LBB102_1 -+; LA32-NEXT: # %bb.2: -+; LA32-NEXT: move $a0, $a2 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: atomicrmw_add_i32_seq_cst: -+; LA64: # %bb.0: -+; LA64-NEXT: amadd_db.w $a2, $a1, $a0 -+; LA64-NEXT: move $a0, $a2 -+; LA64-NEXT: ret -+ %1 = atomicrmw add ptr %a, i32 %b seq_cst -+ ret i32 %1 -+} -+ -+define i64 @atomicrmw_add_i64_seq_cst(ptr %a, i64 %b) nounwind { -+; LA32-LABEL: atomicrmw_add_i64_seq_cst: -+; LA32: # %bb.0: -+; LA32-NEXT: addi.w $sp, $sp, -16 -+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -+; LA32-NEXT: ori $a3, $zero, 5 -+; LA32-NEXT: bl %plt(__atomic_fetch_add_8) -+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -+; LA32-NEXT: addi.w $sp, $sp, 16 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: atomicrmw_add_i64_seq_cst: -+; LA64: # %bb.0: -+; LA64-NEXT: amadd_db.d $a2, $a1, $a0 -+; LA64-NEXT: move $a0, $a2 -+; LA64-NEXT: ret -+ %1 = atomicrmw add ptr %a, i64 %b seq_cst -+ ret i64 %1 -+} -+ -+define i8 @atomicrmw_sub_i8_seq_cst(ptr %a, i8 %b) nounwind { -+; LA32-LABEL: atomicrmw_sub_i8_seq_cst: -+; LA32: # %bb.0: -+; LA32-NEXT: addi.w $a2, $zero, -4 -+; LA32-NEXT: and $a2, $a0, $a2 -+; LA32-NEXT: slli.w $a0, $a0, 3 -+; LA32-NEXT: ori $a3, $zero, 255 -+; LA32-NEXT: sll.w $a3, $a3, $a0 -+; LA32-NEXT: andi $a1, $a1, 255 -+; LA32-NEXT: sll.w $a1, $a1, $a0 -+; LA32-NEXT: .LBB104_1: # =>This Inner Loop Header: Depth=1 -+; LA32-NEXT: ll.w $a4, $a2, 0 -+; LA32-NEXT: sub.w $a5, $a4, $a1 -+; LA32-NEXT: xor $a5, $a4, $a5 -+; LA32-NEXT: and $a5, $a5, $a3 -+; LA32-NEXT: xor $a5, $a4, $a5 -+; LA32-NEXT: sc.w $a5, $a2, 0 -+; LA32-NEXT: beqz $a5, .LBB104_1 -+; LA32-NEXT: # %bb.2: -+; LA32-NEXT: srl.w $a0, $a4, $a0 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: atomicrmw_sub_i8_seq_cst: -+; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a2, $zero, -4 -+; LA64-NEXT: and $a2, $a0, $a2 -+; LA64-NEXT: slli.d $a0, $a0, 3 -+; LA64-NEXT: ori $a3, $zero, 255 -+; LA64-NEXT: sll.w $a3, $a3, $a0 -+; LA64-NEXT: addi.w $a3, $a3, 0 -+; LA64-NEXT: andi $a1, $a1, 255 -+; LA64-NEXT: sll.w $a1, $a1, $a0 -+; LA64-NEXT: addi.w $a1, $a1, 0 -+; LA64-NEXT: .LBB104_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.w $a4, $a2, 0 -+; LA64-NEXT: sub.w $a5, $a4, $a1 -+; LA64-NEXT: xor $a5, $a4, $a5 -+; LA64-NEXT: and $a5, $a5, $a3 -+; LA64-NEXT: xor $a5, $a4, $a5 -+; LA64-NEXT: sc.w $a5, $a2, 0 -+; LA64-NEXT: beqz $a5, .LBB104_1 -+; LA64-NEXT: # %bb.2: -+; LA64-NEXT: srl.w $a0, $a4, $a0 -+; LA64-NEXT: ret -+ %1 = atomicrmw sub ptr %a, i8 %b seq_cst -+ ret i8 %1 -+} -+ -+define i16 @atomicrmw_sub_i16_seq_cst(ptr %a, i16 %b) nounwind { -+; LA32-LABEL: atomicrmw_sub_i16_seq_cst: -+; LA32: # %bb.0: -+; LA32-NEXT: addi.w $a2, $zero, -4 -+; LA32-NEXT: and $a2, $a0, $a2 -+; LA32-NEXT: slli.w $a0, $a0, 3 -+; LA32-NEXT: lu12i.w $a3, 15 -+; LA32-NEXT: ori $a3, $a3, 4095 -+; LA32-NEXT: sll.w $a3, $a3, $a0 -+; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 -+; LA32-NEXT: sll.w $a1, $a1, $a0 -+; LA32-NEXT: .LBB105_1: # =>This Inner Loop Header: Depth=1 -+; LA32-NEXT: ll.w $a4, $a2, 0 -+; LA32-NEXT: sub.w $a5, $a4, $a1 -+; LA32-NEXT: xor $a5, $a4, $a5 -+; LA32-NEXT: and $a5, $a5, $a3 -+; LA32-NEXT: xor $a5, $a4, $a5 -+; LA32-NEXT: sc.w $a5, $a2, 0 -+; LA32-NEXT: beqz $a5, .LBB105_1 -+; LA32-NEXT: # %bb.2: -+; LA32-NEXT: srl.w $a0, $a4, $a0 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: atomicrmw_sub_i16_seq_cst: -+; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a2, $zero, -4 -+; LA64-NEXT: and $a2, $a0, $a2 -+; LA64-NEXT: slli.d $a0, $a0, 3 -+; LA64-NEXT: lu12i.w $a3, 15 -+; LA64-NEXT: ori $a3, $a3, 4095 -+; LA64-NEXT: sll.w $a3, $a3, $a0 -+; LA64-NEXT: addi.w $a3, $a3, 0 -+; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 -+; LA64-NEXT: sll.w $a1, $a1, $a0 -+; LA64-NEXT: addi.w $a1, $a1, 0 -+; LA64-NEXT: .LBB105_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.w $a4, $a2, 0 -+; LA64-NEXT: sub.w $a5, $a4, $a1 -+; LA64-NEXT: xor $a5, $a4, $a5 -+; LA64-NEXT: and $a5, $a5, $a3 -+; LA64-NEXT: xor $a5, $a4, $a5 -+; LA64-NEXT: sc.w $a5, $a2, 0 -+; LA64-NEXT: beqz $a5, .LBB105_1 -+; LA64-NEXT: # %bb.2: -+; LA64-NEXT: srl.w $a0, $a4, $a0 -+; LA64-NEXT: ret -+ %1 = atomicrmw sub ptr %a, i16 %b seq_cst -+ ret i16 %1 -+} -+ -+define i32 @atomicrmw_sub_i32_seq_cst(ptr %a, i32 %b) nounwind { -+; LA32-LABEL: atomicrmw_sub_i32_seq_cst: -+; LA32: # %bb.0: -+; LA32-NEXT: .LBB106_1: # =>This Inner Loop Header: Depth=1 -+; LA32-NEXT: ll.w $a2, $a0, 0 -+; LA32-NEXT: sub.w $a3, $a2, $a1 -+; LA32-NEXT: sc.w $a3, $a0, 0 -+; LA32-NEXT: beqz $a3, .LBB106_1 -+; LA32-NEXT: # %bb.2: -+; LA32-NEXT: move $a0, $a2 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: atomicrmw_sub_i32_seq_cst: -+; LA64: # %bb.0: -+; LA64-NEXT: sub.w $a2, $zero, $a1 -+; LA64-NEXT: amadd_db.w $a1, $a2, $a0 -+; LA64-NEXT: move $a0, $a1 -+; LA64-NEXT: ret -+ %1 = atomicrmw sub ptr %a, i32 %b seq_cst -+ ret i32 %1 -+} -+ -+define i64 @atomicrmw_sub_i64_seq_cst(ptr %a, i64 %b) nounwind { -+; LA32-LABEL: atomicrmw_sub_i64_seq_cst: -+; LA32: # %bb.0: -+; LA32-NEXT: addi.w $sp, $sp, -16 -+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -+; LA32-NEXT: ori $a3, $zero, 5 -+; LA32-NEXT: bl %plt(__atomic_fetch_sub_8) -+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -+; LA32-NEXT: addi.w $sp, $sp, 16 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: atomicrmw_sub_i64_seq_cst: -+; LA64: # %bb.0: -+; LA64-NEXT: sub.d $a2, $zero, $a1 -+; LA64-NEXT: amadd_db.d $a1, $a2, $a0 -+; LA64-NEXT: move $a0, $a1 -+; LA64-NEXT: ret -+ %1 = atomicrmw sub ptr %a, i64 %b seq_cst -+ ret i64 %1 -+} -+ -+define i8 @atomicrmw_nand_i8_seq_cst(ptr %a, i8 %b) nounwind { -+; LA32-LABEL: atomicrmw_nand_i8_seq_cst: -+; LA32: # %bb.0: -+; LA32-NEXT: addi.w $a2, $zero, -4 -+; LA32-NEXT: and $a2, $a0, $a2 -+; LA32-NEXT: slli.w $a0, $a0, 3 -+; LA32-NEXT: ori $a3, $zero, 255 -+; LA32-NEXT: sll.w $a3, $a3, $a0 -+; LA32-NEXT: andi $a1, $a1, 255 -+; LA32-NEXT: sll.w $a1, $a1, $a0 -+; LA32-NEXT: .LBB108_1: # =>This Inner Loop Header: Depth=1 -+; LA32-NEXT: ll.w $a4, $a2, 0 -+; LA32-NEXT: and $a5, $a4, $a1 -+; LA32-NEXT: nor $a5, $a5, $zero -+; LA32-NEXT: xor $a5, $a4, $a5 -+; LA32-NEXT: and $a5, $a5, $a3 -+; LA32-NEXT: xor $a5, $a4, $a5 -+; LA32-NEXT: sc.w $a5, $a2, 0 -+; LA32-NEXT: beqz $a5, .LBB108_1 -+; LA32-NEXT: # %bb.2: -+; LA32-NEXT: srl.w $a0, $a4, $a0 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: atomicrmw_nand_i8_seq_cst: -+; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a2, $zero, -4 -+; LA64-NEXT: and $a2, $a0, $a2 -+; LA64-NEXT: slli.d $a0, $a0, 3 -+; LA64-NEXT: ori $a3, $zero, 255 -+; LA64-NEXT: sll.w $a3, $a3, $a0 -+; LA64-NEXT: addi.w $a3, $a3, 0 -+; LA64-NEXT: andi $a1, $a1, 255 -+; LA64-NEXT: sll.w $a1, $a1, $a0 -+; LA64-NEXT: addi.w $a1, $a1, 0 -+; LA64-NEXT: .LBB108_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.w $a4, $a2, 0 -+; LA64-NEXT: and $a5, $a4, $a1 -+; LA64-NEXT: nor $a5, $a5, $zero -+; LA64-NEXT: xor $a5, $a4, $a5 -+; LA64-NEXT: and $a5, $a5, $a3 -+; LA64-NEXT: xor $a5, $a4, $a5 -+; LA64-NEXT: sc.w $a5, $a2, 0 -+; LA64-NEXT: beqz $a5, .LBB108_1 -+; LA64-NEXT: # %bb.2: -+; LA64-NEXT: srl.w $a0, $a4, $a0 -+; LA64-NEXT: ret -+ %1 = atomicrmw nand ptr %a, i8 %b seq_cst -+ ret i8 %1 -+} -+ -+define i16 @atomicrmw_nand_i16_seq_cst(ptr %a, i16 %b) nounwind { -+; LA32-LABEL: atomicrmw_nand_i16_seq_cst: -+; LA32: # %bb.0: -+; LA32-NEXT: addi.w $a2, $zero, -4 -+; LA32-NEXT: and $a2, $a0, $a2 -+; LA32-NEXT: slli.w $a0, $a0, 3 -+; LA32-NEXT: lu12i.w $a3, 15 -+; LA32-NEXT: ori $a3, $a3, 4095 -+; LA32-NEXT: sll.w $a3, $a3, $a0 -+; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 -+; LA32-NEXT: sll.w $a1, $a1, $a0 -+; LA32-NEXT: .LBB109_1: # =>This Inner Loop Header: Depth=1 -+; LA32-NEXT: ll.w $a4, $a2, 0 -+; LA32-NEXT: and $a5, $a4, $a1 -+; LA32-NEXT: nor $a5, $a5, $zero -+; LA32-NEXT: xor $a5, $a4, $a5 -+; LA32-NEXT: and $a5, $a5, $a3 -+; LA32-NEXT: xor $a5, $a4, $a5 -+; LA32-NEXT: sc.w $a5, $a2, 0 -+; LA32-NEXT: beqz $a5, .LBB109_1 -+; LA32-NEXT: # %bb.2: -+; LA32-NEXT: srl.w $a0, $a4, $a0 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: atomicrmw_nand_i16_seq_cst: -+; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a2, $zero, -4 -+; LA64-NEXT: and $a2, $a0, $a2 -+; LA64-NEXT: slli.d $a0, $a0, 3 -+; LA64-NEXT: lu12i.w $a3, 15 -+; LA64-NEXT: ori $a3, $a3, 4095 -+; LA64-NEXT: sll.w $a3, $a3, $a0 -+; LA64-NEXT: addi.w $a3, $a3, 0 -+; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 -+; LA64-NEXT: sll.w $a1, $a1, $a0 -+; LA64-NEXT: addi.w $a1, $a1, 0 -+; LA64-NEXT: .LBB109_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.w $a4, $a2, 0 -+; LA64-NEXT: and $a5, $a4, $a1 -+; LA64-NEXT: nor $a5, $a5, $zero -+; LA64-NEXT: xor $a5, $a4, $a5 -+; LA64-NEXT: and $a5, $a5, $a3 -+; LA64-NEXT: xor $a5, $a4, $a5 -+; LA64-NEXT: sc.w $a5, $a2, 0 -+; LA64-NEXT: beqz $a5, .LBB109_1 -+; LA64-NEXT: # %bb.2: -+; LA64-NEXT: srl.w $a0, $a4, $a0 -+; LA64-NEXT: ret -+ %1 = atomicrmw nand ptr %a, i16 %b seq_cst -+ ret i16 %1 -+} -+ -+define i32 @atomicrmw_nand_i32_seq_cst(ptr %a, i32 %b) nounwind { -+; LA32-LABEL: atomicrmw_nand_i32_seq_cst: -+; LA32: # %bb.0: -+; LA32-NEXT: .LBB110_1: # =>This Inner Loop Header: Depth=1 -+; LA32-NEXT: ll.w $a2, $a0, 0 -+; LA32-NEXT: and $a3, $a2, $a1 -+; LA32-NEXT: nor $a3, $a3, $zero -+; LA32-NEXT: sc.w $a3, $a0, 0 -+; LA32-NEXT: beqz $a3, .LBB110_1 -+; LA32-NEXT: # %bb.2: -+; LA32-NEXT: move $a0, $a2 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: atomicrmw_nand_i32_seq_cst: -+; LA64: # %bb.0: -+; LA64-NEXT: .LBB110_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.w $a2, $a0, 0 -+; LA64-NEXT: and $a3, $a2, $a1 -+; LA64-NEXT: nor $a3, $a3, $zero -+; LA64-NEXT: sc.w $a3, $a0, 0 -+; LA64-NEXT: beqz $a3, .LBB110_1 -+; LA64-NEXT: # %bb.2: -+; LA64-NEXT: move $a0, $a2 -+; LA64-NEXT: ret -+ %1 = atomicrmw nand ptr %a, i32 %b seq_cst -+ ret i32 %1 -+} -+ -+define i64 @atomicrmw_nand_i64_seq_cst(ptr %a, i64 %b) nounwind { -+; LA32-LABEL: atomicrmw_nand_i64_seq_cst: -+; LA32: # %bb.0: -+; LA32-NEXT: addi.w $sp, $sp, -16 -+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -+; LA32-NEXT: ori $a3, $zero, 5 -+; LA32-NEXT: bl %plt(__atomic_fetch_nand_8) -+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -+; LA32-NEXT: addi.w $sp, $sp, 16 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: atomicrmw_nand_i64_seq_cst: -+; LA64: # %bb.0: -+; LA64-NEXT: .LBB111_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.d $a2, $a0, 0 -+; LA64-NEXT: and $a3, $a2, $a1 -+; LA64-NEXT: nor $a3, $a3, $zero -+; LA64-NEXT: sc.d $a3, $a0, 0 -+; LA64-NEXT: beqz $a3, .LBB111_1 -+; LA64-NEXT: # %bb.2: -+; LA64-NEXT: move $a0, $a2 -+; LA64-NEXT: ret -+ %1 = atomicrmw nand ptr %a, i64 %b seq_cst -+ ret i64 %1 -+} -+ -+define i8 @atomicrmw_and_i8_seq_cst(ptr %a, i8 %b) nounwind { -+; LA32-LABEL: atomicrmw_and_i8_seq_cst: -+; LA32: # %bb.0: -+; LA32-NEXT: slli.w $a2, $a0, 3 -+; LA32-NEXT: ori $a3, $zero, 255 -+; LA32-NEXT: sll.w $a3, $a3, $a2 -+; LA32-NEXT: andi $a1, $a1, 255 -+; LA32-NEXT: sll.w $a1, $a1, $a2 -+; LA32-NEXT: orn $a1, $a1, $a3 -+; LA32-NEXT: addi.w $a3, $zero, -4 -+; LA32-NEXT: and $a0, $a0, $a3 -+; LA32-NEXT: .LBB112_1: # =>This Inner Loop Header: Depth=1 -+; LA32-NEXT: ll.w $a3, $a0, 0 -+; LA32-NEXT: and $a4, $a3, $a1 -+; LA32-NEXT: sc.w $a4, $a0, 0 -+; LA32-NEXT: beqz $a4, .LBB112_1 -+; LA32-NEXT: # %bb.2: -+; LA32-NEXT: srl.w $a0, $a3, $a2 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: atomicrmw_and_i8_seq_cst: -+; LA64: # %bb.0: -+; LA64-NEXT: slli.d $a2, $a0, 3 -+; LA64-NEXT: ori $a3, $zero, 255 -+; LA64-NEXT: sll.w $a3, $a3, $a2 -+; LA64-NEXT: andi $a1, $a1, 255 -+; LA64-NEXT: sll.w $a1, $a1, $a2 -+; LA64-NEXT: orn $a1, $a1, $a3 -+; LA64-NEXT: addi.w $a3, $zero, -4 -+; LA64-NEXT: and $a0, $a0, $a3 -+; LA64-NEXT: amand_db.w $a3, $a1, $a0 -+; LA64-NEXT: srl.w $a0, $a3, $a2 -+; LA64-NEXT: ret -+ %1 = atomicrmw and ptr %a, i8 %b seq_cst -+ ret i8 %1 -+} -+ -+define i16 @atomicrmw_and_i16_seq_cst(ptr %a, i16 %b) nounwind { -+; LA32-LABEL: atomicrmw_and_i16_seq_cst: -+; LA32: # %bb.0: -+; LA32-NEXT: lu12i.w $a2, 15 -+; LA32-NEXT: ori $a2, $a2, 4095 -+; LA32-NEXT: slli.w $a3, $a0, 3 -+; LA32-NEXT: sll.w $a2, $a2, $a3 -+; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 -+; LA32-NEXT: sll.w $a1, $a1, $a3 -+; LA32-NEXT: orn $a1, $a1, $a2 -+; LA32-NEXT: addi.w $a2, $zero, -4 -+; LA32-NEXT: and $a0, $a0, $a2 -+; LA32-NEXT: .LBB113_1: # =>This Inner Loop Header: Depth=1 -+; LA32-NEXT: ll.w $a2, $a0, 0 -+; LA32-NEXT: and $a4, $a2, $a1 -+; LA32-NEXT: sc.w $a4, $a0, 0 -+; LA32-NEXT: beqz $a4, .LBB113_1 -+; LA32-NEXT: # %bb.2: -+; LA32-NEXT: srl.w $a0, $a2, $a3 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: atomicrmw_and_i16_seq_cst: -+; LA64: # %bb.0: -+; LA64-NEXT: lu12i.w $a2, 15 -+; LA64-NEXT: ori $a2, $a2, 4095 -+; LA64-NEXT: slli.d $a3, $a0, 3 -+; LA64-NEXT: sll.w $a2, $a2, $a3 -+; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 -+; LA64-NEXT: sll.w $a1, $a1, $a3 -+; LA64-NEXT: orn $a1, $a1, $a2 -+; LA64-NEXT: addi.w $a2, $zero, -4 -+; LA64-NEXT: and $a0, $a0, $a2 -+; LA64-NEXT: amand_db.w $a2, $a1, $a0 -+; LA64-NEXT: srl.w $a0, $a2, $a3 -+; LA64-NEXT: ret -+ %1 = atomicrmw and ptr %a, i16 %b seq_cst -+ ret i16 %1 -+} -+ -+define i32 @atomicrmw_and_i32_seq_cst(ptr %a, i32 %b) nounwind { -+; LA32-LABEL: atomicrmw_and_i32_seq_cst: -+; LA32: # %bb.0: -+; LA32-NEXT: .LBB114_1: # =>This Inner Loop Header: Depth=1 -+; LA32-NEXT: ll.w $a2, $a0, 0 -+; LA32-NEXT: and $a3, $a2, $a1 -+; LA32-NEXT: sc.w $a3, $a0, 0 -+; LA32-NEXT: beqz $a3, .LBB114_1 -+; LA32-NEXT: # %bb.2: -+; LA32-NEXT: move $a0, $a2 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: atomicrmw_and_i32_seq_cst: -+; LA64: # %bb.0: -+; LA64-NEXT: amand_db.w $a2, $a1, $a0 -+; LA64-NEXT: move $a0, $a2 -+; LA64-NEXT: ret -+ %1 = atomicrmw and ptr %a, i32 %b seq_cst -+ ret i32 %1 -+} -+ -+define i64 @atomicrmw_and_i64_seq_cst(ptr %a, i64 %b) nounwind { -+; LA32-LABEL: atomicrmw_and_i64_seq_cst: -+; LA32: # %bb.0: -+; LA32-NEXT: addi.w $sp, $sp, -16 -+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -+; LA32-NEXT: ori $a3, $zero, 5 -+; LA32-NEXT: bl %plt(__atomic_fetch_and_8) -+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -+; LA32-NEXT: addi.w $sp, $sp, 16 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: atomicrmw_and_i64_seq_cst: -+; LA64: # %bb.0: -+; LA64-NEXT: amand_db.d $a2, $a1, $a0 -+; LA64-NEXT: move $a0, $a2 -+; LA64-NEXT: ret -+ %1 = atomicrmw and ptr %a, i64 %b seq_cst -+ ret i64 %1 -+} -+ -+define i8 @atomicrmw_or_i8_seq_cst(ptr %a, i8 %b) nounwind { -+; LA32-LABEL: atomicrmw_or_i8_seq_cst: -+; LA32: # %bb.0: -+; LA32-NEXT: addi.w $a2, $zero, -4 -+; LA32-NEXT: and $a2, $a0, $a2 -+; LA32-NEXT: slli.w $a0, $a0, 3 -+; LA32-NEXT: andi $a1, $a1, 255 -+; LA32-NEXT: sll.w $a1, $a1, $a0 -+; LA32-NEXT: .LBB116_1: # =>This Inner Loop Header: Depth=1 -+; LA32-NEXT: ll.w $a3, $a2, 0 -+; LA32-NEXT: or $a4, $a3, $a1 -+; LA32-NEXT: sc.w $a4, $a2, 0 -+; LA32-NEXT: beqz $a4, .LBB116_1 -+; LA32-NEXT: # %bb.2: -+; LA32-NEXT: srl.w $a0, $a3, $a0 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: atomicrmw_or_i8_seq_cst: -+; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a2, $zero, -4 -+; LA64-NEXT: and $a2, $a0, $a2 -+; LA64-NEXT: slli.d $a0, $a0, 3 -+; LA64-NEXT: andi $a1, $a1, 255 -+; LA64-NEXT: sll.w $a1, $a1, $a0 -+; LA64-NEXT: amor_db.w $a3, $a1, $a2 -+; LA64-NEXT: srl.w $a0, $a3, $a0 -+; LA64-NEXT: ret -+ %1 = atomicrmw or ptr %a, i8 %b seq_cst -+ ret i8 %1 -+} -+ -+define i16 @atomicrmw_or_i16_seq_cst(ptr %a, i16 %b) nounwind { -+; LA32-LABEL: atomicrmw_or_i16_seq_cst: -+; LA32: # %bb.0: -+; LA32-NEXT: addi.w $a2, $zero, -4 -+; LA32-NEXT: and $a2, $a0, $a2 -+; LA32-NEXT: slli.w $a0, $a0, 3 -+; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 -+; LA32-NEXT: sll.w $a1, $a1, $a0 -+; LA32-NEXT: .LBB117_1: # =>This Inner Loop Header: Depth=1 -+; LA32-NEXT: ll.w $a3, $a2, 0 -+; LA32-NEXT: or $a4, $a3, $a1 -+; LA32-NEXT: sc.w $a4, $a2, 0 -+; LA32-NEXT: beqz $a4, .LBB117_1 -+; LA32-NEXT: # %bb.2: -+; LA32-NEXT: srl.w $a0, $a3, $a0 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: atomicrmw_or_i16_seq_cst: -+; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a2, $zero, -4 -+; LA64-NEXT: and $a2, $a0, $a2 -+; LA64-NEXT: slli.d $a0, $a0, 3 -+; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 -+; LA64-NEXT: sll.w $a1, $a1, $a0 -+; LA64-NEXT: amor_db.w $a3, $a1, $a2 -+; LA64-NEXT: srl.w $a0, $a3, $a0 -+; LA64-NEXT: ret -+ %1 = atomicrmw or ptr %a, i16 %b seq_cst -+ ret i16 %1 -+} -+ -+define i32 @atomicrmw_or_i32_seq_cst(ptr %a, i32 %b) nounwind { -+; LA32-LABEL: atomicrmw_or_i32_seq_cst: -+; LA32: # %bb.0: -+; LA32-NEXT: .LBB118_1: # =>This Inner Loop Header: Depth=1 -+; LA32-NEXT: ll.w $a2, $a0, 0 -+; LA32-NEXT: or $a3, $a2, $a1 -+; LA32-NEXT: sc.w $a3, $a0, 0 -+; LA32-NEXT: beqz $a3, .LBB118_1 -+; LA32-NEXT: # %bb.2: -+; LA32-NEXT: move $a0, $a2 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: atomicrmw_or_i32_seq_cst: -+; LA64: # %bb.0: -+; LA64-NEXT: amor_db.w $a2, $a1, $a0 -+; LA64-NEXT: move $a0, $a2 -+; LA64-NEXT: ret -+ %1 = atomicrmw or ptr %a, i32 %b seq_cst -+ ret i32 %1 -+} -+ -+define i64 @atomicrmw_or_i64_seq_cst(ptr %a, i64 %b) nounwind { -+; LA32-LABEL: atomicrmw_or_i64_seq_cst: -+; LA32: # %bb.0: -+; LA32-NEXT: addi.w $sp, $sp, -16 -+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -+; LA32-NEXT: ori $a3, $zero, 5 -+; LA32-NEXT: bl %plt(__atomic_fetch_or_8) -+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -+; LA32-NEXT: addi.w $sp, $sp, 16 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: atomicrmw_or_i64_seq_cst: -+; LA64: # %bb.0: -+; LA64-NEXT: amor_db.d $a2, $a1, $a0 -+; LA64-NEXT: move $a0, $a2 -+; LA64-NEXT: ret -+ %1 = atomicrmw or ptr %a, i64 %b seq_cst -+ ret i64 %1 -+} -+ -+define i8 @atomicrmw_xor_i8_seq_cst(ptr %a, i8 %b) nounwind { -+; LA32-LABEL: atomicrmw_xor_i8_seq_cst: -+; LA32: # %bb.0: -+; LA32-NEXT: addi.w $a2, $zero, -4 -+; LA32-NEXT: and $a2, $a0, $a2 -+; LA32-NEXT: slli.w $a0, $a0, 3 -+; LA32-NEXT: andi $a1, $a1, 255 -+; LA32-NEXT: sll.w $a1, $a1, $a0 -+; LA32-NEXT: .LBB120_1: # =>This Inner Loop Header: Depth=1 -+; LA32-NEXT: ll.w $a3, $a2, 0 -+; LA32-NEXT: xor $a4, $a3, $a1 -+; LA32-NEXT: sc.w $a4, $a2, 0 -+; LA32-NEXT: beqz $a4, .LBB120_1 -+; LA32-NEXT: # %bb.2: -+; LA32-NEXT: srl.w $a0, $a3, $a0 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: atomicrmw_xor_i8_seq_cst: -+; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a2, $zero, -4 -+; LA64-NEXT: and $a2, $a0, $a2 -+; LA64-NEXT: slli.d $a0, $a0, 3 -+; LA64-NEXT: andi $a1, $a1, 255 -+; LA64-NEXT: sll.w $a1, $a1, $a0 -+; LA64-NEXT: amxor_db.w $a3, $a1, $a2 -+; LA64-NEXT: srl.w $a0, $a3, $a0 -+; LA64-NEXT: ret -+ %1 = atomicrmw xor ptr %a, i8 %b seq_cst -+ ret i8 %1 -+} -+ -+define i16 @atomicrmw_xor_i16_seq_cst(ptr %a, i16 %b) nounwind { -+; LA32-LABEL: atomicrmw_xor_i16_seq_cst: -+; LA32: # %bb.0: -+; LA32-NEXT: addi.w $a2, $zero, -4 -+; LA32-NEXT: and $a2, $a0, $a2 -+; LA32-NEXT: slli.w $a0, $a0, 3 -+; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 -+; LA32-NEXT: sll.w $a1, $a1, $a0 -+; LA32-NEXT: .LBB121_1: # =>This Inner Loop Header: Depth=1 -+; LA32-NEXT: ll.w $a3, $a2, 0 -+; LA32-NEXT: xor $a4, $a3, $a1 -+; LA32-NEXT: sc.w $a4, $a2, 0 -+; LA32-NEXT: beqz $a4, .LBB121_1 -+; LA32-NEXT: # %bb.2: -+; LA32-NEXT: srl.w $a0, $a3, $a0 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: atomicrmw_xor_i16_seq_cst: -+; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a2, $zero, -4 -+; LA64-NEXT: and $a2, $a0, $a2 -+; LA64-NEXT: slli.d $a0, $a0, 3 -+; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 -+; LA64-NEXT: sll.w $a1, $a1, $a0 -+; LA64-NEXT: amxor_db.w $a3, $a1, $a2 -+; LA64-NEXT: srl.w $a0, $a3, $a0 -+; LA64-NEXT: ret -+ %1 = atomicrmw xor ptr %a, i16 %b seq_cst -+ ret i16 %1 -+} -+ -+define i32 @atomicrmw_xor_i32_seq_cst(ptr %a, i32 %b) nounwind { -+; LA32-LABEL: atomicrmw_xor_i32_seq_cst: -+; LA32: # %bb.0: -+; LA32-NEXT: .LBB122_1: # =>This Inner Loop Header: Depth=1 -+; LA32-NEXT: ll.w $a2, $a0, 0 -+; LA32-NEXT: xor $a3, $a2, $a1 -+; LA32-NEXT: sc.w $a3, $a0, 0 -+; LA32-NEXT: beqz $a3, .LBB122_1 -+; LA32-NEXT: # %bb.2: -+; LA32-NEXT: move $a0, $a2 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: atomicrmw_xor_i32_seq_cst: -+; LA64: # %bb.0: -+; LA64-NEXT: amxor_db.w $a2, $a1, $a0 -+; LA64-NEXT: move $a0, $a2 -+; LA64-NEXT: ret -+ %1 = atomicrmw xor ptr %a, i32 %b seq_cst -+ ret i32 %1 -+} -+ -+define i64 @atomicrmw_xor_i64_seq_cst(ptr %a, i64 %b) nounwind { -+; LA32-LABEL: atomicrmw_xor_i64_seq_cst: -+; LA32: # %bb.0: -+; LA32-NEXT: addi.w $sp, $sp, -16 -+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -+; LA32-NEXT: ori $a3, $zero, 5 -+; LA32-NEXT: bl %plt(__atomic_fetch_xor_8) -+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -+; LA32-NEXT: addi.w $sp, $sp, 16 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: atomicrmw_xor_i64_seq_cst: -+; LA64: # %bb.0: -+; LA64-NEXT: amxor_db.d $a2, $a1, $a0 -+; LA64-NEXT: move $a0, $a2 -+; LA64-NEXT: ret -+ %1 = atomicrmw xor ptr %a, i64 %b seq_cst -+ ret i64 %1 -+} -+ - define i8 @atomicrmw_xchg_i8_monotonic(ptr %a, i8 %b) nounwind { - ; LA32-LABEL: atomicrmw_xchg_i8_monotonic: - ; LA32: # %bb.0: -@@ -910,14 +4132,14 @@ define i8 @atomicrmw_xchg_i8_monotonic(ptr %a, i8 %b) nounwind { - ; LA32-NEXT: sll.w $a3, $a3, $a0 - ; LA32-NEXT: andi $a1, $a1, 255 - ; LA32-NEXT: sll.w $a1, $a1, $a0 --; LA32-NEXT: .LBB28_1: # =>This Inner Loop Header: Depth=1 -+; LA32-NEXT: .LBB124_1: # =>This Inner Loop Header: Depth=1 - ; LA32-NEXT: ll.w $a4, $a2, 0 - ; LA32-NEXT: addi.w $a5, $a1, 0 - ; LA32-NEXT: xor $a5, $a4, $a5 - ; LA32-NEXT: and $a5, $a5, $a3 - ; LA32-NEXT: xor $a5, $a4, $a5 - ; LA32-NEXT: sc.w $a5, $a2, 0 --; LA32-NEXT: beqz $a5, .LBB28_1 -+; LA32-NEXT: beqz $a5, .LBB124_1 - ; LA32-NEXT: # %bb.2: - ; LA32-NEXT: srl.w $a0, $a4, $a0 - ; LA32-NEXT: ret -@@ -933,14 +4155,14 @@ define i8 @atomicrmw_xchg_i8_monotonic(ptr %a, i8 %b) nounwind { - ; LA64-NEXT: andi $a1, $a1, 255 - ; LA64-NEXT: sll.w $a1, $a1, $a0 - ; LA64-NEXT: addi.w $a1, $a1, 0 --; LA64-NEXT: .LBB28_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: .LBB124_1: # =>This Inner Loop Header: Depth=1 - ; LA64-NEXT: ll.w $a4, $a2, 0 - ; LA64-NEXT: addi.w $a5, $a1, 0 - ; LA64-NEXT: xor $a5, $a4, $a5 - ; LA64-NEXT: and $a5, $a5, $a3 - ; LA64-NEXT: xor $a5, $a4, $a5 - ; LA64-NEXT: sc.w $a5, $a2, 0 --; LA64-NEXT: beqz $a5, .LBB28_1 -+; LA64-NEXT: beqz $a5, .LBB124_1 - ; LA64-NEXT: # %bb.2: - ; LA64-NEXT: srl.w $a0, $a4, $a0 - ; LA64-NEXT: ret -@@ -959,14 +4181,14 @@ define i16 @atomicrmw_xchg_i16_monotonic(ptr %a, i16 %b) nounwind { - ; LA32-NEXT: sll.w $a3, $a3, $a0 - ; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 - ; LA32-NEXT: sll.w $a1, $a1, $a0 --; LA32-NEXT: .LBB29_1: # =>This Inner Loop Header: Depth=1 -+; LA32-NEXT: .LBB125_1: # =>This Inner Loop Header: Depth=1 - ; LA32-NEXT: ll.w $a4, $a2, 0 - ; LA32-NEXT: addi.w $a5, $a1, 0 - ; LA32-NEXT: xor $a5, $a4, $a5 - ; LA32-NEXT: and $a5, $a5, $a3 - ; LA32-NEXT: xor $a5, $a4, $a5 - ; LA32-NEXT: sc.w $a5, $a2, 0 --; LA32-NEXT: beqz $a5, .LBB29_1 -+; LA32-NEXT: beqz $a5, .LBB125_1 - ; LA32-NEXT: # %bb.2: - ; LA32-NEXT: srl.w $a0, $a4, $a0 - ; LA32-NEXT: ret -@@ -983,14 +4205,14 @@ define i16 @atomicrmw_xchg_i16_monotonic(ptr %a, i16 %b) nounwind { - ; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 - ; LA64-NEXT: sll.w $a1, $a1, $a0 - ; LA64-NEXT: addi.w $a1, $a1, 0 --; LA64-NEXT: .LBB29_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: .LBB125_1: # =>This Inner Loop Header: Depth=1 - ; LA64-NEXT: ll.w $a4, $a2, 0 - ; LA64-NEXT: addi.w $a5, $a1, 0 - ; LA64-NEXT: xor $a5, $a4, $a5 - ; LA64-NEXT: and $a5, $a5, $a3 - ; LA64-NEXT: xor $a5, $a4, $a5 - ; LA64-NEXT: sc.w $a5, $a2, 0 --; LA64-NEXT: beqz $a5, .LBB29_1 -+; LA64-NEXT: beqz $a5, .LBB125_1 - ; LA64-NEXT: # %bb.2: - ; LA64-NEXT: srl.w $a0, $a4, $a0 - ; LA64-NEXT: ret -@@ -1001,11 +4223,11 @@ define i16 @atomicrmw_xchg_i16_monotonic(ptr %a, i16 %b) nounwind { - define i32 @atomicrmw_xchg_i32_monotonic(ptr %a, i32 %b) nounwind { - ; LA32-LABEL: atomicrmw_xchg_i32_monotonic: - ; LA32: # %bb.0: --; LA32-NEXT: .LBB30_1: # =>This Inner Loop Header: Depth=1 -+; LA32-NEXT: .LBB126_1: # =>This Inner Loop Header: Depth=1 - ; LA32-NEXT: ll.w $a2, $a0, 0 - ; LA32-NEXT: move $a3, $a1 - ; LA32-NEXT: sc.w $a3, $a0, 0 --; LA32-NEXT: beqz $a3, .LBB30_1 -+; LA32-NEXT: beqz $a3, .LBB126_1 - ; LA32-NEXT: # %bb.2: - ; LA32-NEXT: move $a0, $a2 - ; LA32-NEXT: ret -@@ -1049,14 +4271,14 @@ define i8 @atomicrmw_add_i8_monotonic(ptr %a, i8 %b) nounwind { - ; LA32-NEXT: sll.w $a3, $a3, $a0 - ; LA32-NEXT: andi $a1, $a1, 255 - ; LA32-NEXT: sll.w $a1, $a1, $a0 --; LA32-NEXT: .LBB32_1: # =>This Inner Loop Header: Depth=1 -+; LA32-NEXT: .LBB128_1: # =>This Inner Loop Header: Depth=1 - ; LA32-NEXT: ll.w $a4, $a2, 0 - ; LA32-NEXT: add.w $a5, $a4, $a1 - ; LA32-NEXT: xor $a5, $a4, $a5 - ; LA32-NEXT: and $a5, $a5, $a3 - ; LA32-NEXT: xor $a5, $a4, $a5 - ; LA32-NEXT: sc.w $a5, $a2, 0 --; LA32-NEXT: beqz $a5, .LBB32_1 -+; LA32-NEXT: beqz $a5, .LBB128_1 - ; LA32-NEXT: # %bb.2: - ; LA32-NEXT: srl.w $a0, $a4, $a0 - ; LA32-NEXT: ret -@@ -1072,14 +4294,14 @@ define i8 @atomicrmw_add_i8_monotonic(ptr %a, i8 %b) nounwind { - ; LA64-NEXT: andi $a1, $a1, 255 - ; LA64-NEXT: sll.w $a1, $a1, $a0 - ; LA64-NEXT: addi.w $a1, $a1, 0 --; LA64-NEXT: .LBB32_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: .LBB128_1: # =>This Inner Loop Header: Depth=1 - ; LA64-NEXT: ll.w $a4, $a2, 0 - ; LA64-NEXT: add.w $a5, $a4, $a1 - ; LA64-NEXT: xor $a5, $a4, $a5 - ; LA64-NEXT: and $a5, $a5, $a3 - ; LA64-NEXT: xor $a5, $a4, $a5 - ; LA64-NEXT: sc.w $a5, $a2, 0 --; LA64-NEXT: beqz $a5, .LBB32_1 -+; LA64-NEXT: beqz $a5, .LBB128_1 - ; LA64-NEXT: # %bb.2: - ; LA64-NEXT: srl.w $a0, $a4, $a0 - ; LA64-NEXT: ret -@@ -1098,14 +4320,14 @@ define i16 @atomicrmw_add_i16_monotonic(ptr %a, i16 %b) nounwind { - ; LA32-NEXT: sll.w $a3, $a3, $a0 - ; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 - ; LA32-NEXT: sll.w $a1, $a1, $a0 --; LA32-NEXT: .LBB33_1: # =>This Inner Loop Header: Depth=1 -+; LA32-NEXT: .LBB129_1: # =>This Inner Loop Header: Depth=1 - ; LA32-NEXT: ll.w $a4, $a2, 0 - ; LA32-NEXT: add.w $a5, $a4, $a1 - ; LA32-NEXT: xor $a5, $a4, $a5 - ; LA32-NEXT: and $a5, $a5, $a3 - ; LA32-NEXT: xor $a5, $a4, $a5 - ; LA32-NEXT: sc.w $a5, $a2, 0 --; LA32-NEXT: beqz $a5, .LBB33_1 -+; LA32-NEXT: beqz $a5, .LBB129_1 - ; LA32-NEXT: # %bb.2: - ; LA32-NEXT: srl.w $a0, $a4, $a0 - ; LA32-NEXT: ret -@@ -1122,14 +4344,14 @@ define i16 @atomicrmw_add_i16_monotonic(ptr %a, i16 %b) nounwind { - ; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 - ; LA64-NEXT: sll.w $a1, $a1, $a0 - ; LA64-NEXT: addi.w $a1, $a1, 0 --; LA64-NEXT: .LBB33_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: .LBB129_1: # =>This Inner Loop Header: Depth=1 - ; LA64-NEXT: ll.w $a4, $a2, 0 - ; LA64-NEXT: add.w $a5, $a4, $a1 - ; LA64-NEXT: xor $a5, $a4, $a5 - ; LA64-NEXT: and $a5, $a5, $a3 - ; LA64-NEXT: xor $a5, $a4, $a5 - ; LA64-NEXT: sc.w $a5, $a2, 0 --; LA64-NEXT: beqz $a5, .LBB33_1 -+; LA64-NEXT: beqz $a5, .LBB129_1 - ; LA64-NEXT: # %bb.2: - ; LA64-NEXT: srl.w $a0, $a4, $a0 - ; LA64-NEXT: ret -@@ -1140,11 +4362,11 @@ define i16 @atomicrmw_add_i16_monotonic(ptr %a, i16 %b) nounwind { - define i32 @atomicrmw_add_i32_monotonic(ptr %a, i32 %b) nounwind { - ; LA32-LABEL: atomicrmw_add_i32_monotonic: - ; LA32: # %bb.0: --; LA32-NEXT: .LBB34_1: # =>This Inner Loop Header: Depth=1 -+; LA32-NEXT: .LBB130_1: # =>This Inner Loop Header: Depth=1 - ; LA32-NEXT: ll.w $a2, $a0, 0 - ; LA32-NEXT: add.w $a3, $a2, $a1 - ; LA32-NEXT: sc.w $a3, $a0, 0 --; LA32-NEXT: beqz $a3, .LBB34_1 -+; LA32-NEXT: beqz $a3, .LBB130_1 - ; LA32-NEXT: # %bb.2: - ; LA32-NEXT: move $a0, $a2 - ; LA32-NEXT: ret -@@ -1188,14 +4410,14 @@ define i8 @atomicrmw_sub_i8_monotonic(ptr %a, i8 %b) nounwind { - ; LA32-NEXT: sll.w $a3, $a3, $a0 - ; LA32-NEXT: andi $a1, $a1, 255 - ; LA32-NEXT: sll.w $a1, $a1, $a0 --; LA32-NEXT: .LBB36_1: # =>This Inner Loop Header: Depth=1 -+; LA32-NEXT: .LBB132_1: # =>This Inner Loop Header: Depth=1 - ; LA32-NEXT: ll.w $a4, $a2, 0 - ; LA32-NEXT: sub.w $a5, $a4, $a1 - ; LA32-NEXT: xor $a5, $a4, $a5 - ; LA32-NEXT: and $a5, $a5, $a3 - ; LA32-NEXT: xor $a5, $a4, $a5 - ; LA32-NEXT: sc.w $a5, $a2, 0 --; LA32-NEXT: beqz $a5, .LBB36_1 -+; LA32-NEXT: beqz $a5, .LBB132_1 - ; LA32-NEXT: # %bb.2: - ; LA32-NEXT: srl.w $a0, $a4, $a0 - ; LA32-NEXT: ret -@@ -1211,14 +4433,14 @@ define i8 @atomicrmw_sub_i8_monotonic(ptr %a, i8 %b) nounwind { - ; LA64-NEXT: andi $a1, $a1, 255 - ; LA64-NEXT: sll.w $a1, $a1, $a0 - ; LA64-NEXT: addi.w $a1, $a1, 0 --; LA64-NEXT: .LBB36_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: .LBB132_1: # =>This Inner Loop Header: Depth=1 - ; LA64-NEXT: ll.w $a4, $a2, 0 - ; LA64-NEXT: sub.w $a5, $a4, $a1 - ; LA64-NEXT: xor $a5, $a4, $a5 - ; LA64-NEXT: and $a5, $a5, $a3 - ; LA64-NEXT: xor $a5, $a4, $a5 - ; LA64-NEXT: sc.w $a5, $a2, 0 --; LA64-NEXT: beqz $a5, .LBB36_1 -+; LA64-NEXT: beqz $a5, .LBB132_1 - ; LA64-NEXT: # %bb.2: - ; LA64-NEXT: srl.w $a0, $a4, $a0 - ; LA64-NEXT: ret -@@ -1237,14 +4459,14 @@ define i16 @atomicrmw_sub_i16_monotonic(ptr %a, i16 %b) nounwind { - ; LA32-NEXT: sll.w $a3, $a3, $a0 - ; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 - ; LA32-NEXT: sll.w $a1, $a1, $a0 --; LA32-NEXT: .LBB37_1: # =>This Inner Loop Header: Depth=1 -+; LA32-NEXT: .LBB133_1: # =>This Inner Loop Header: Depth=1 - ; LA32-NEXT: ll.w $a4, $a2, 0 - ; LA32-NEXT: sub.w $a5, $a4, $a1 - ; LA32-NEXT: xor $a5, $a4, $a5 - ; LA32-NEXT: and $a5, $a5, $a3 - ; LA32-NEXT: xor $a5, $a4, $a5 - ; LA32-NEXT: sc.w $a5, $a2, 0 --; LA32-NEXT: beqz $a5, .LBB37_1 -+; LA32-NEXT: beqz $a5, .LBB133_1 - ; LA32-NEXT: # %bb.2: - ; LA32-NEXT: srl.w $a0, $a4, $a0 - ; LA32-NEXT: ret -@@ -1261,14 +4483,14 @@ define i16 @atomicrmw_sub_i16_monotonic(ptr %a, i16 %b) nounwind { - ; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 - ; LA64-NEXT: sll.w $a1, $a1, $a0 - ; LA64-NEXT: addi.w $a1, $a1, 0 --; LA64-NEXT: .LBB37_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: .LBB133_1: # =>This Inner Loop Header: Depth=1 - ; LA64-NEXT: ll.w $a4, $a2, 0 - ; LA64-NEXT: sub.w $a5, $a4, $a1 - ; LA64-NEXT: xor $a5, $a4, $a5 - ; LA64-NEXT: and $a5, $a5, $a3 - ; LA64-NEXT: xor $a5, $a4, $a5 - ; LA64-NEXT: sc.w $a5, $a2, 0 --; LA64-NEXT: beqz $a5, .LBB37_1 -+; LA64-NEXT: beqz $a5, .LBB133_1 - ; LA64-NEXT: # %bb.2: - ; LA64-NEXT: srl.w $a0, $a4, $a0 - ; LA64-NEXT: ret -@@ -1279,11 +4501,11 @@ define i16 @atomicrmw_sub_i16_monotonic(ptr %a, i16 %b) nounwind { - define i32 @atomicrmw_sub_i32_monotonic(ptr %a, i32 %b) nounwind { - ; LA32-LABEL: atomicrmw_sub_i32_monotonic: - ; LA32: # %bb.0: --; LA32-NEXT: .LBB38_1: # =>This Inner Loop Header: Depth=1 -+; LA32-NEXT: .LBB134_1: # =>This Inner Loop Header: Depth=1 - ; LA32-NEXT: ll.w $a2, $a0, 0 - ; LA32-NEXT: sub.w $a3, $a2, $a1 - ; LA32-NEXT: sc.w $a3, $a0, 0 --; LA32-NEXT: beqz $a3, .LBB38_1 -+; LA32-NEXT: beqz $a3, .LBB134_1 - ; LA32-NEXT: # %bb.2: - ; LA32-NEXT: move $a0, $a2 - ; LA32-NEXT: ret -@@ -1329,7 +4551,7 @@ define i8 @atomicrmw_nand_i8_monotonic(ptr %a, i8 %b) nounwind { - ; LA32-NEXT: sll.w $a3, $a3, $a0 - ; LA32-NEXT: andi $a1, $a1, 255 - ; LA32-NEXT: sll.w $a1, $a1, $a0 --; LA32-NEXT: .LBB40_1: # =>This Inner Loop Header: Depth=1 -+; LA32-NEXT: .LBB136_1: # =>This Inner Loop Header: Depth=1 - ; LA32-NEXT: ll.w $a4, $a2, 0 - ; LA32-NEXT: and $a5, $a4, $a1 - ; LA32-NEXT: nor $a5, $a5, $zero -@@ -1337,7 +4559,7 @@ define i8 @atomicrmw_nand_i8_monotonic(ptr %a, i8 %b) nounwind { - ; LA32-NEXT: and $a5, $a5, $a3 - ; LA32-NEXT: xor $a5, $a4, $a5 - ; LA32-NEXT: sc.w $a5, $a2, 0 --; LA32-NEXT: beqz $a5, .LBB40_1 -+; LA32-NEXT: beqz $a5, .LBB136_1 - ; LA32-NEXT: # %bb.2: - ; LA32-NEXT: srl.w $a0, $a4, $a0 - ; LA32-NEXT: ret -@@ -1353,7 +4575,7 @@ define i8 @atomicrmw_nand_i8_monotonic(ptr %a, i8 %b) nounwind { - ; LA64-NEXT: andi $a1, $a1, 255 - ; LA64-NEXT: sll.w $a1, $a1, $a0 - ; LA64-NEXT: addi.w $a1, $a1, 0 --; LA64-NEXT: .LBB40_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: .LBB136_1: # =>This Inner Loop Header: Depth=1 - ; LA64-NEXT: ll.w $a4, $a2, 0 - ; LA64-NEXT: and $a5, $a4, $a1 - ; LA64-NEXT: nor $a5, $a5, $zero -@@ -1361,7 +4583,7 @@ define i8 @atomicrmw_nand_i8_monotonic(ptr %a, i8 %b) nounwind { - ; LA64-NEXT: and $a5, $a5, $a3 - ; LA64-NEXT: xor $a5, $a4, $a5 - ; LA64-NEXT: sc.w $a5, $a2, 0 --; LA64-NEXT: beqz $a5, .LBB40_1 -+; LA64-NEXT: beqz $a5, .LBB136_1 - ; LA64-NEXT: # %bb.2: - ; LA64-NEXT: srl.w $a0, $a4, $a0 - ; LA64-NEXT: ret -@@ -1380,7 +4602,7 @@ define i16 @atomicrmw_nand_i16_monotonic(ptr %a, i16 %b) nounwind { - ; LA32-NEXT: sll.w $a3, $a3, $a0 - ; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 - ; LA32-NEXT: sll.w $a1, $a1, $a0 --; LA32-NEXT: .LBB41_1: # =>This Inner Loop Header: Depth=1 -+; LA32-NEXT: .LBB137_1: # =>This Inner Loop Header: Depth=1 - ; LA32-NEXT: ll.w $a4, $a2, 0 - ; LA32-NEXT: and $a5, $a4, $a1 - ; LA32-NEXT: nor $a5, $a5, $zero -@@ -1388,7 +4610,7 @@ define i16 @atomicrmw_nand_i16_monotonic(ptr %a, i16 %b) nounwind { - ; LA32-NEXT: and $a5, $a5, $a3 - ; LA32-NEXT: xor $a5, $a4, $a5 - ; LA32-NEXT: sc.w $a5, $a2, 0 --; LA32-NEXT: beqz $a5, .LBB41_1 -+; LA32-NEXT: beqz $a5, .LBB137_1 - ; LA32-NEXT: # %bb.2: - ; LA32-NEXT: srl.w $a0, $a4, $a0 - ; LA32-NEXT: ret -@@ -1405,7 +4627,7 @@ define i16 @atomicrmw_nand_i16_monotonic(ptr %a, i16 %b) nounwind { - ; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 - ; LA64-NEXT: sll.w $a1, $a1, $a0 - ; LA64-NEXT: addi.w $a1, $a1, 0 --; LA64-NEXT: .LBB41_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: .LBB137_1: # =>This Inner Loop Header: Depth=1 - ; LA64-NEXT: ll.w $a4, $a2, 0 - ; LA64-NEXT: and $a5, $a4, $a1 - ; LA64-NEXT: nor $a5, $a5, $zero -@@ -1413,7 +4635,7 @@ define i16 @atomicrmw_nand_i16_monotonic(ptr %a, i16 %b) nounwind { - ; LA64-NEXT: and $a5, $a5, $a3 - ; LA64-NEXT: xor $a5, $a4, $a5 - ; LA64-NEXT: sc.w $a5, $a2, 0 --; LA64-NEXT: beqz $a5, .LBB41_1 -+; LA64-NEXT: beqz $a5, .LBB137_1 - ; LA64-NEXT: # %bb.2: - ; LA64-NEXT: srl.w $a0, $a4, $a0 - ; LA64-NEXT: ret -@@ -1424,24 +4646,24 @@ define i16 @atomicrmw_nand_i16_monotonic(ptr %a, i16 %b) nounwind { - define i32 @atomicrmw_nand_i32_monotonic(ptr %a, i32 %b) nounwind { - ; LA32-LABEL: atomicrmw_nand_i32_monotonic: - ; LA32: # %bb.0: --; LA32-NEXT: .LBB42_1: # =>This Inner Loop Header: Depth=1 -+; LA32-NEXT: .LBB138_1: # =>This Inner Loop Header: Depth=1 - ; LA32-NEXT: ll.w $a2, $a0, 0 - ; LA32-NEXT: and $a3, $a2, $a1 - ; LA32-NEXT: nor $a3, $a3, $zero - ; LA32-NEXT: sc.w $a3, $a0, 0 --; LA32-NEXT: beqz $a3, .LBB42_1 -+; LA32-NEXT: beqz $a3, .LBB138_1 - ; LA32-NEXT: # %bb.2: - ; LA32-NEXT: move $a0, $a2 - ; LA32-NEXT: ret - ; - ; LA64-LABEL: atomicrmw_nand_i32_monotonic: - ; LA64: # %bb.0: --; LA64-NEXT: .LBB42_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: .LBB138_1: # =>This Inner Loop Header: Depth=1 - ; LA64-NEXT: ll.w $a2, $a0, 0 - ; LA64-NEXT: and $a3, $a2, $a1 - ; LA64-NEXT: nor $a3, $a3, $zero - ; LA64-NEXT: sc.w $a3, $a0, 0 --; LA64-NEXT: beqz $a3, .LBB42_1 -+; LA64-NEXT: beqz $a3, .LBB138_1 - ; LA64-NEXT: # %bb.2: - ; LA64-NEXT: move $a0, $a2 - ; LA64-NEXT: ret -@@ -1462,12 +4684,12 @@ define i64 @atomicrmw_nand_i64_monotonic(ptr %a, i64 %b) nounwind { - ; - ; LA64-LABEL: atomicrmw_nand_i64_monotonic: - ; LA64: # %bb.0: --; LA64-NEXT: .LBB43_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: .LBB139_1: # =>This Inner Loop Header: Depth=1 - ; LA64-NEXT: ll.d $a2, $a0, 0 - ; LA64-NEXT: and $a3, $a2, $a1 - ; LA64-NEXT: nor $a3, $a3, $zero - ; LA64-NEXT: sc.d $a3, $a0, 0 --; LA64-NEXT: beqz $a3, .LBB43_1 -+; LA64-NEXT: beqz $a3, .LBB139_1 - ; LA64-NEXT: # %bb.2: - ; LA64-NEXT: move $a0, $a2 - ; LA64-NEXT: ret -@@ -1486,11 +4708,11 @@ define i8 @atomicrmw_and_i8_monotonic(ptr %a, i8 %b) nounwind { - ; LA32-NEXT: orn $a1, $a1, $a3 - ; LA32-NEXT: addi.w $a3, $zero, -4 - ; LA32-NEXT: and $a0, $a0, $a3 --; LA32-NEXT: .LBB44_1: # =>This Inner Loop Header: Depth=1 -+; LA32-NEXT: .LBB140_1: # =>This Inner Loop Header: Depth=1 - ; LA32-NEXT: ll.w $a3, $a0, 0 - ; LA32-NEXT: and $a4, $a3, $a1 - ; LA32-NEXT: sc.w $a4, $a0, 0 --; LA32-NEXT: beqz $a4, .LBB44_1 -+; LA32-NEXT: beqz $a4, .LBB140_1 - ; LA32-NEXT: # %bb.2: - ; LA32-NEXT: srl.w $a0, $a3, $a2 - ; LA32-NEXT: ret -@@ -1524,11 +4746,11 @@ define i16 @atomicrmw_and_i16_monotonic(ptr %a, i16 %b) nounwind { - ; LA32-NEXT: orn $a1, $a1, $a2 - ; LA32-NEXT: addi.w $a2, $zero, -4 - ; LA32-NEXT: and $a0, $a0, $a2 --; LA32-NEXT: .LBB45_1: # =>This Inner Loop Header: Depth=1 -+; LA32-NEXT: .LBB141_1: # =>This Inner Loop Header: Depth=1 - ; LA32-NEXT: ll.w $a2, $a0, 0 - ; LA32-NEXT: and $a4, $a2, $a1 - ; LA32-NEXT: sc.w $a4, $a0, 0 --; LA32-NEXT: beqz $a4, .LBB45_1 -+; LA32-NEXT: beqz $a4, .LBB141_1 - ; LA32-NEXT: # %bb.2: - ; LA32-NEXT: srl.w $a0, $a2, $a3 - ; LA32-NEXT: ret -@@ -1554,11 +4776,11 @@ define i16 @atomicrmw_and_i16_monotonic(ptr %a, i16 %b) nounwind { - define i32 @atomicrmw_and_i32_monotonic(ptr %a, i32 %b) nounwind { - ; LA32-LABEL: atomicrmw_and_i32_monotonic: - ; LA32: # %bb.0: --; LA32-NEXT: .LBB46_1: # =>This Inner Loop Header: Depth=1 -+; LA32-NEXT: .LBB142_1: # =>This Inner Loop Header: Depth=1 - ; LA32-NEXT: ll.w $a2, $a0, 0 - ; LA32-NEXT: and $a3, $a2, $a1 - ; LA32-NEXT: sc.w $a3, $a0, 0 --; LA32-NEXT: beqz $a3, .LBB46_1 -+; LA32-NEXT: beqz $a3, .LBB142_1 - ; LA32-NEXT: # %bb.2: - ; LA32-NEXT: move $a0, $a2 - ; LA32-NEXT: ret -@@ -1600,11 +4822,11 @@ define i8 @atomicrmw_or_i8_monotonic(ptr %a, i8 %b) nounwind { - ; LA32-NEXT: slli.w $a0, $a0, 3 - ; LA32-NEXT: andi $a1, $a1, 255 - ; LA32-NEXT: sll.w $a1, $a1, $a0 --; LA32-NEXT: .LBB48_1: # =>This Inner Loop Header: Depth=1 -+; LA32-NEXT: .LBB144_1: # =>This Inner Loop Header: Depth=1 - ; LA32-NEXT: ll.w $a3, $a2, 0 - ; LA32-NEXT: or $a4, $a3, $a1 - ; LA32-NEXT: sc.w $a4, $a2, 0 --; LA32-NEXT: beqz $a4, .LBB48_1 -+; LA32-NEXT: beqz $a4, .LBB144_1 - ; LA32-NEXT: # %bb.2: - ; LA32-NEXT: srl.w $a0, $a3, $a0 - ; LA32-NEXT: ret -@@ -1631,11 +4853,11 @@ define i16 @atomicrmw_or_i16_monotonic(ptr %a, i16 %b) nounwind { - ; LA32-NEXT: slli.w $a0, $a0, 3 - ; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 - ; LA32-NEXT: sll.w $a1, $a1, $a0 --; LA32-NEXT: .LBB49_1: # =>This Inner Loop Header: Depth=1 -+; LA32-NEXT: .LBB145_1: # =>This Inner Loop Header: Depth=1 - ; LA32-NEXT: ll.w $a3, $a2, 0 - ; LA32-NEXT: or $a4, $a3, $a1 - ; LA32-NEXT: sc.w $a4, $a2, 0 --; LA32-NEXT: beqz $a4, .LBB49_1 -+; LA32-NEXT: beqz $a4, .LBB145_1 - ; LA32-NEXT: # %bb.2: - ; LA32-NEXT: srl.w $a0, $a3, $a0 - ; LA32-NEXT: ret -@@ -1657,11 +4879,11 @@ define i16 @atomicrmw_or_i16_monotonic(ptr %a, i16 %b) nounwind { - define i32 @atomicrmw_or_i32_monotonic(ptr %a, i32 %b) nounwind { - ; LA32-LABEL: atomicrmw_or_i32_monotonic: - ; LA32: # %bb.0: --; LA32-NEXT: .LBB50_1: # =>This Inner Loop Header: Depth=1 -+; LA32-NEXT: .LBB146_1: # =>This Inner Loop Header: Depth=1 - ; LA32-NEXT: ll.w $a2, $a0, 0 - ; LA32-NEXT: or $a3, $a2, $a1 - ; LA32-NEXT: sc.w $a3, $a0, 0 --; LA32-NEXT: beqz $a3, .LBB50_1 -+; LA32-NEXT: beqz $a3, .LBB146_1 - ; LA32-NEXT: # %bb.2: - ; LA32-NEXT: move $a0, $a2 - ; LA32-NEXT: ret -@@ -1703,11 +4925,11 @@ define i8 @atomicrmw_xor_i8_monotonic(ptr %a, i8 %b) nounwind { - ; LA32-NEXT: slli.w $a0, $a0, 3 - ; LA32-NEXT: andi $a1, $a1, 255 - ; LA32-NEXT: sll.w $a1, $a1, $a0 --; LA32-NEXT: .LBB52_1: # =>This Inner Loop Header: Depth=1 -+; LA32-NEXT: .LBB148_1: # =>This Inner Loop Header: Depth=1 - ; LA32-NEXT: ll.w $a3, $a2, 0 - ; LA32-NEXT: xor $a4, $a3, $a1 - ; LA32-NEXT: sc.w $a4, $a2, 0 --; LA32-NEXT: beqz $a4, .LBB52_1 -+; LA32-NEXT: beqz $a4, .LBB148_1 - ; LA32-NEXT: # %bb.2: - ; LA32-NEXT: srl.w $a0, $a3, $a0 - ; LA32-NEXT: ret -@@ -1734,11 +4956,11 @@ define i16 @atomicrmw_xor_i16_monotonic(ptr %a, i16 %b) nounwind { - ; LA32-NEXT: slli.w $a0, $a0, 3 - ; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 - ; LA32-NEXT: sll.w $a1, $a1, $a0 --; LA32-NEXT: .LBB53_1: # =>This Inner Loop Header: Depth=1 -+; LA32-NEXT: .LBB149_1: # =>This Inner Loop Header: Depth=1 - ; LA32-NEXT: ll.w $a3, $a2, 0 - ; LA32-NEXT: xor $a4, $a3, $a1 - ; LA32-NEXT: sc.w $a4, $a2, 0 --; LA32-NEXT: beqz $a4, .LBB53_1 -+; LA32-NEXT: beqz $a4, .LBB149_1 - ; LA32-NEXT: # %bb.2: - ; LA32-NEXT: srl.w $a0, $a3, $a0 - ; LA32-NEXT: ret -@@ -1760,11 +4982,11 @@ define i16 @atomicrmw_xor_i16_monotonic(ptr %a, i16 %b) nounwind { - define i32 @atomicrmw_xor_i32_monotonic(ptr %a, i32 %b) nounwind { - ; LA32-LABEL: atomicrmw_xor_i32_monotonic: - ; LA32: # %bb.0: --; LA32-NEXT: .LBB54_1: # =>This Inner Loop Header: Depth=1 -+; LA32-NEXT: .LBB150_1: # =>This Inner Loop Header: Depth=1 - ; LA32-NEXT: ll.w $a2, $a0, 0 - ; LA32-NEXT: xor $a3, $a2, $a1 - ; LA32-NEXT: sc.w $a3, $a0, 0 --; LA32-NEXT: beqz $a3, .LBB54_1 -+; LA32-NEXT: beqz $a3, .LBB150_1 - ; LA32-NEXT: # %bb.2: - ; LA32-NEXT: move $a0, $a2 - ; LA32-NEXT: ret -diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/fence-singlethread.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/fence-singlethread.ll -new file mode 100644 -index 000000000000..8d6056bc7677 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/fence-singlethread.ll -@@ -0,0 +1,17 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefix=LA32 -+; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64 -+ -+define void @fence_singlethread() { -+; LA32-LABEL: fence_singlethread: -+; LA32: # %bb.0: -+; LA32-NEXT: dbar 0 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: fence_singlethread: -+; LA64: # %bb.0: -+; LA64-NEXT: dbar 0 -+; LA64-NEXT: ret -+ fence syncscope("singlethread") seq_cst -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll -index e91d0c145eab..deff11723d27 100644 ---- a/llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll -+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll -@@ -72,6 +72,202 @@ define i64 @load_acquire_i64(ptr %ptr) { - ret i64 %val - } - -+define i8 @load_unordered_i8(ptr %ptr) { -+; LA32-LABEL: load_unordered_i8: -+; LA32: # %bb.0: -+; LA32-NEXT: ld.b $a0, $a0, 0 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: load_unordered_i8: -+; LA64: # %bb.0: -+; LA64-NEXT: ld.b $a0, $a0, 0 -+; LA64-NEXT: ret -+ %val = load atomic i8, ptr %ptr unordered, align 1 -+ ret i8 %val -+} -+ -+define i16 @load_unordered_i16(ptr %ptr) { -+; LA32-LABEL: load_unordered_i16: -+; LA32: # %bb.0: -+; LA32-NEXT: ld.h $a0, $a0, 0 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: load_unordered_i16: -+; LA64: # %bb.0: -+; LA64-NEXT: ld.h $a0, $a0, 0 -+; LA64-NEXT: ret -+ %val = load atomic i16, ptr %ptr unordered, align 2 -+ ret i16 %val -+} -+ -+define i32 @load_unordered_i32(ptr %ptr) { -+; LA32-LABEL: load_unordered_i32: -+; LA32: # %bb.0: -+; LA32-NEXT: ld.w $a0, $a0, 0 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: load_unordered_i32: -+; LA64: # %bb.0: -+; LA64-NEXT: ld.w $a0, $a0, 0 -+; LA64-NEXT: ret -+ %val = load atomic i32, ptr %ptr unordered, align 4 -+ ret i32 %val -+} -+ -+define i64 @load_unordered_i64(ptr %ptr) { -+; LA32-LABEL: load_unordered_i64: -+; LA32: # %bb.0: -+; LA32-NEXT: addi.w $sp, $sp, -16 -+; LA32-NEXT: .cfi_def_cfa_offset 16 -+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -+; LA32-NEXT: .cfi_offset 1, -4 -+; LA32-NEXT: move $a1, $zero -+; LA32-NEXT: bl %plt(__atomic_load_8) -+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -+; LA32-NEXT: addi.w $sp, $sp, 16 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: load_unordered_i64: -+; LA64: # %bb.0: -+; LA64-NEXT: ld.d $a0, $a0, 0 -+; LA64-NEXT: ret -+ %val = load atomic i64, ptr %ptr unordered, align 8 -+ ret i64 %val -+} -+ -+define i8 @load_monotonic_i8(ptr %ptr) { -+; LA32-LABEL: load_monotonic_i8: -+; LA32: # %bb.0: -+; LA32-NEXT: ld.b $a0, $a0, 0 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: load_monotonic_i8: -+; LA64: # %bb.0: -+; LA64-NEXT: ld.b $a0, $a0, 0 -+; LA64-NEXT: ret -+ %val = load atomic i8, ptr %ptr monotonic, align 1 -+ ret i8 %val -+} -+ -+define i16 @load_monotonic_i16(ptr %ptr) { -+; LA32-LABEL: load_monotonic_i16: -+; LA32: # %bb.0: -+; LA32-NEXT: ld.h $a0, $a0, 0 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: load_monotonic_i16: -+; LA64: # %bb.0: -+; LA64-NEXT: ld.h $a0, $a0, 0 -+; LA64-NEXT: ret -+ %val = load atomic i16, ptr %ptr monotonic, align 2 -+ ret i16 %val -+} -+ -+define i32 @load_monotonic_i32(ptr %ptr) { -+; LA32-LABEL: load_monotonic_i32: -+; LA32: # %bb.0: -+; LA32-NEXT: ld.w $a0, $a0, 0 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: load_monotonic_i32: -+; LA64: # %bb.0: -+; LA64-NEXT: ld.w $a0, $a0, 0 -+; LA64-NEXT: ret -+ %val = load atomic i32, ptr %ptr monotonic, align 4 -+ ret i32 %val -+} -+ -+define i64 @load_monotonic_i64(ptr %ptr) { -+; LA32-LABEL: load_monotonic_i64: -+; LA32: # %bb.0: -+; LA32-NEXT: addi.w $sp, $sp, -16 -+; LA32-NEXT: .cfi_def_cfa_offset 16 -+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -+; LA32-NEXT: .cfi_offset 1, -4 -+; LA32-NEXT: move $a1, $zero -+; LA32-NEXT: bl %plt(__atomic_load_8) -+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -+; LA32-NEXT: addi.w $sp, $sp, 16 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: load_monotonic_i64: -+; LA64: # %bb.0: -+; LA64-NEXT: ld.d $a0, $a0, 0 -+; LA64-NEXT: ret -+ %val = load atomic i64, ptr %ptr monotonic, align 8 -+ ret i64 %val -+} -+ -+define i8 @load_seq_cst_i8(ptr %ptr) { -+; LA32-LABEL: load_seq_cst_i8: -+; LA32: # %bb.0: -+; LA32-NEXT: ld.b $a0, $a0, 0 -+; LA32-NEXT: dbar 0 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: load_seq_cst_i8: -+; LA64: # %bb.0: -+; LA64-NEXT: ld.b $a0, $a0, 0 -+; LA64-NEXT: dbar 0 -+; LA64-NEXT: ret -+ %val = load atomic i8, ptr %ptr seq_cst, align 1 -+ ret i8 %val -+} -+ -+define i16 @load_seq_cst_i16(ptr %ptr) { -+; LA32-LABEL: load_seq_cst_i16: -+; LA32: # %bb.0: -+; LA32-NEXT: ld.h $a0, $a0, 0 -+; LA32-NEXT: dbar 0 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: load_seq_cst_i16: -+; LA64: # %bb.0: -+; LA64-NEXT: ld.h $a0, $a0, 0 -+; LA64-NEXT: dbar 0 -+; LA64-NEXT: ret -+ %val = load atomic i16, ptr %ptr seq_cst, align 2 -+ ret i16 %val -+} -+ -+define i32 @load_seq_cst_i32(ptr %ptr) { -+; LA32-LABEL: load_seq_cst_i32: -+; LA32: # %bb.0: -+; LA32-NEXT: ld.w $a0, $a0, 0 -+; LA32-NEXT: dbar 0 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: load_seq_cst_i32: -+; LA64: # %bb.0: -+; LA64-NEXT: ld.w $a0, $a0, 0 -+; LA64-NEXT: dbar 0 -+; LA64-NEXT: ret -+ %val = load atomic i32, ptr %ptr seq_cst, align 4 -+ ret i32 %val -+} -+ -+define i64 @load_seq_cst_i64(ptr %ptr) { -+; LA32-LABEL: load_seq_cst_i64: -+; LA32: # %bb.0: -+; LA32-NEXT: addi.w $sp, $sp, -16 -+; LA32-NEXT: .cfi_def_cfa_offset 16 -+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -+; LA32-NEXT: .cfi_offset 1, -4 -+; LA32-NEXT: ori $a1, $zero, 5 -+; LA32-NEXT: bl %plt(__atomic_load_8) -+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -+; LA32-NEXT: addi.w $sp, $sp, 16 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: load_seq_cst_i64: -+; LA64: # %bb.0: -+; LA64-NEXT: ld.d $a0, $a0, 0 -+; LA64-NEXT: dbar 0 -+; LA64-NEXT: ret -+ %val = load atomic i64, ptr %ptr seq_cst, align 8 -+ ret i64 %val -+} -+ - define void @store_release_i8(ptr %ptr, i8 signext %v) { - ; LA32-LABEL: store_release_i8: - ; LA32: # %bb.0: --- -2.20.1 - - -From 0f189600f07f701d96940c2cc52ca762d2be9104 Mon Sep 17 00:00:00 2001 -From: WANG Xuerui -Date: Wed, 11 Oct 2023 10:39:13 +0800 -Subject: [PATCH 3/7] [LoongArch] Support finer-grained DBAR hints for LA664+ - (#68787) - -These are treated as DBAR 0 on older uarchs, so we can start to -unconditionally emit the new hints right away. - -Co-authored-by: WANG Rui -(cherry picked from commit 956482de13107b640cffedd08610fcccd98f708f) ---- - .../LoongArchExpandAtomicPseudoInsts.cpp | 4 +- - .../LoongArch/LoongArchISelLowering.cpp | 20 +++++++ - .../Target/LoongArch/LoongArchISelLowering.h | 1 + - .../Target/LoongArch/LoongArchInstrInfo.td | 24 +++++++- - .../LoongArch/atomicrmw-uinc-udec-wrap.ll | 16 ++--- - .../ir-instruction/atomic-cmpxchg.ll | 24 ++++---- - .../LoongArch/ir-instruction/atomicrmw-fp.ll | 48 +++++++-------- - .../ir-instruction/fence-singlethread.ll | 4 +- - .../CodeGen/LoongArch/ir-instruction/fence.ll | 16 ++--- - .../ir-instruction/load-store-atomic.ll | 58 +++++++++---------- - 10 files changed, 129 insertions(+), 86 deletions(-) - -diff --git a/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp b/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp -index eb78ef065b21..b348cb56c136 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp -+++ b/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp -@@ -579,8 +579,8 @@ bool LoongArchExpandAtomicPseudo::expandAtomicCmpXchg( - case AtomicOrdering::Acquire: - case AtomicOrdering::AcquireRelease: - case AtomicOrdering::SequentiallyConsistent: -- // TODO: acquire -- hint = 0; -+ // acquire -+ hint = 0b10100; - break; - default: - hint = 0x700; -diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -index 5affaf37ad5a..33a3197013cc 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -@@ -159,6 +159,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, - // The MULO libcall is not part of libgcc, only compiler-rt. - setLibcallName(RTLIB::MULO_I128, nullptr); - -+ setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); -+ - static const ISD::CondCode FPCCToExpand[] = { - ISD::SETOGT, ISD::SETOGE, ISD::SETUGT, ISD::SETUGE, - ISD::SETGE, ISD::SETNE, ISD::SETGT}; -@@ -366,6 +368,8 @@ bool LoongArchTargetLowering::isOffsetFoldingLegal( - SDValue LoongArchTargetLowering::LowerOperation(SDValue Op, - SelectionDAG &DAG) const { - switch (Op.getOpcode()) { -+ case ISD::ATOMIC_FENCE: -+ return lowerATOMIC_FENCE(Op, DAG); - case ISD::EH_DWARF_CFA: - return lowerEH_DWARF_CFA(Op, DAG); - case ISD::GlobalAddress: -@@ -542,6 +546,22 @@ LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, - return SDValue(); - } - -+SDValue LoongArchTargetLowering::lowerATOMIC_FENCE(SDValue Op, -+ SelectionDAG &DAG) const { -+ SDLoc DL(Op); -+ SyncScope::ID FenceSSID = -+ static_cast(Op.getConstantOperandVal(2)); -+ -+ // singlethread fences only synchronize with signal handlers on the same -+ // thread and thus only need to preserve instruction order, not actually -+ // enforce memory ordering. -+ if (FenceSSID == SyncScope::SingleThread) -+ // MEMBARRIER is a compiler barrier; it codegens to a no-op. -+ return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0)); -+ -+ return Op; -+} -+ - SDValue LoongArchTargetLowering::lowerWRITE_REGISTER(SDValue Op, - SelectionDAG &DAG) const { - -diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h -index 6b5a851ec55d..23b90640a690 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h -+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h -@@ -266,6 +266,7 @@ private: - MachineBasicBlock * - EmitInstrWithCustomInserter(MachineInstr &MI, - MachineBasicBlock *BB) const override; -+ SDValue lowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) const; - SDValue lowerConstantPool(SDValue Op, SelectionDAG &DAG) const; - SDValue lowerEH_DWARF_CFA(SDValue Op, SelectionDAG &DAG) const; - SDValue lowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const; -diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td -index a9b0db30c2f6..fcbd314507a5 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td -+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td -@@ -1590,7 +1590,29 @@ def : RegRegStPat; - - /// Atomic loads and stores - --def : Pat<(atomic_fence timm, timm), (DBAR 0)>; -+// DBAR hint encoding for LA664 and later micro-architectures, paraphrased from -+// the Linux patch revealing it [1]: -+// -+// - Bit 4: kind of constraint (0: completion, 1: ordering) -+// - Bit 3: barrier for previous read (0: true, 1: false) -+// - Bit 2: barrier for previous write (0: true, 1: false) -+// - Bit 1: barrier for succeeding read (0: true, 1: false) -+// - Bit 0: barrier for succeeding write (0: true, 1: false) -+// -+// Hint 0x700: barrier for "read after read" from the same address, which is -+// e.g. needed by LL-SC loops on older models. (DBAR 0x700 behaves the same as -+// nop if such reordering is disabled on supporting newer models.) -+// -+// [1]: https://lore.kernel.org/loongarch/20230516124536.535343-1-chenhuacai@loongson.cn/ -+// -+// Implementations without support for the finer-granularity hints simply treat -+// all as the full barrier (DBAR 0), so we can unconditionally start emiting the -+// more precise hints right away. -+ -+def : Pat<(atomic_fence 4, timm), (DBAR 0b10100)>; // acquire -+def : Pat<(atomic_fence 5, timm), (DBAR 0b10010)>; // release -+def : Pat<(atomic_fence 6, timm), (DBAR 0b10000)>; // acqrel -+def : Pat<(atomic_fence 7, timm), (DBAR 0b10000)>; // seqcst - - defm : LdPat; - defm : LdPat; -diff --git a/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll b/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll -index 32106886c783..d8908acbc945 100644 ---- a/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll -+++ b/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll -@@ -40,7 +40,7 @@ define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) { - ; LA64-NEXT: b .LBB0_6 - ; LA64-NEXT: .LBB0_5: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB0_1 Depth=1 --; LA64-NEXT: dbar 0 -+; LA64-NEXT: dbar 20 - ; LA64-NEXT: .LBB0_6: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB0_1 Depth=1 - ; LA64-NEXT: addi.w $a6, $a3, 0 -@@ -93,7 +93,7 @@ define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) { - ; LA64-NEXT: b .LBB1_6 - ; LA64-NEXT: .LBB1_5: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB1_1 Depth=1 --; LA64-NEXT: dbar 0 -+; LA64-NEXT: dbar 20 - ; LA64-NEXT: .LBB1_6: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB1_1 Depth=1 - ; LA64-NEXT: addi.w $a6, $a3, 0 -@@ -133,7 +133,7 @@ define i32 @atomicrmw_uinc_wrap_i32(ptr %ptr, i32 %val) { - ; LA64-NEXT: b .LBB2_6 - ; LA64-NEXT: .LBB2_5: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB2_1 Depth=1 --; LA64-NEXT: dbar 0 -+; LA64-NEXT: dbar 20 - ; LA64-NEXT: .LBB2_6: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB2_1 Depth=1 - ; LA64-NEXT: move $a3, $a1 -@@ -171,7 +171,7 @@ define i64 @atomicrmw_uinc_wrap_i64(ptr %ptr, i64 %val) { - ; LA64-NEXT: b .LBB3_6 - ; LA64-NEXT: .LBB3_5: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB3_1 Depth=1 --; LA64-NEXT: dbar 0 -+; LA64-NEXT: dbar 20 - ; LA64-NEXT: .LBB3_6: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB3_1 Depth=1 - ; LA64-NEXT: bne $a2, $a3, .LBB3_1 -@@ -226,7 +226,7 @@ define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) { - ; LA64-NEXT: b .LBB4_6 - ; LA64-NEXT: .LBB4_5: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB4_1 Depth=1 --; LA64-NEXT: dbar 0 -+; LA64-NEXT: dbar 20 - ; LA64-NEXT: .LBB4_6: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB4_1 Depth=1 - ; LA64-NEXT: addi.w $a7, $a3, 0 -@@ -284,7 +284,7 @@ define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) { - ; LA64-NEXT: b .LBB5_6 - ; LA64-NEXT: .LBB5_5: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB5_1 Depth=1 --; LA64-NEXT: dbar 0 -+; LA64-NEXT: dbar 20 - ; LA64-NEXT: .LBB5_6: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB5_1 Depth=1 - ; LA64-NEXT: addi.w $a7, $a3, 0 -@@ -329,7 +329,7 @@ define i32 @atomicrmw_udec_wrap_i32(ptr %ptr, i32 %val) { - ; LA64-NEXT: b .LBB6_6 - ; LA64-NEXT: .LBB6_5: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB6_1 Depth=1 --; LA64-NEXT: dbar 0 -+; LA64-NEXT: dbar 20 - ; LA64-NEXT: .LBB6_6: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB6_1 Depth=1 - ; LA64-NEXT: move $a4, $a2 -@@ -372,7 +372,7 @@ define i64 @atomicrmw_udec_wrap_i64(ptr %ptr, i64 %val) { - ; LA64-NEXT: b .LBB7_6 - ; LA64-NEXT: .LBB7_5: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB7_1 Depth=1 --; LA64-NEXT: dbar 0 -+; LA64-NEXT: dbar 20 - ; LA64-NEXT: .LBB7_6: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB7_1 Depth=1 - ; LA64-NEXT: bne $a2, $a3, .LBB7_1 -diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll -index 1ac20d10e587..4f25a1d69af1 100644 ---- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll -+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll -@@ -27,7 +27,7 @@ define void @cmpxchg_i8_acquire_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind { - ; LA64-NEXT: beqz $a5, .LBB0_1 - ; LA64-NEXT: b .LBB0_4 - ; LA64-NEXT: .LBB0_3: --; LA64-NEXT: dbar 0 -+; LA64-NEXT: dbar 20 - ; LA64-NEXT: .LBB0_4: - ; LA64-NEXT: ret - %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val acquire acquire -@@ -61,7 +61,7 @@ define void @cmpxchg_i16_acquire_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind - ; LA64-NEXT: beqz $a5, .LBB1_1 - ; LA64-NEXT: b .LBB1_4 - ; LA64-NEXT: .LBB1_3: --; LA64-NEXT: dbar 0 -+; LA64-NEXT: dbar 20 - ; LA64-NEXT: .LBB1_4: - ; LA64-NEXT: ret - %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val acquire acquire -@@ -80,7 +80,7 @@ define void @cmpxchg_i32_acquire_acquire(ptr %ptr, i32 %cmp, i32 %val) nounwind - ; LA64-NEXT: beqz $a4, .LBB2_1 - ; LA64-NEXT: b .LBB2_4 - ; LA64-NEXT: .LBB2_3: --; LA64-NEXT: dbar 0 -+; LA64-NEXT: dbar 20 - ; LA64-NEXT: .LBB2_4: - ; LA64-NEXT: ret - %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val acquire acquire -@@ -99,7 +99,7 @@ define void @cmpxchg_i64_acquire_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind - ; LA64-NEXT: beqz $a4, .LBB3_1 - ; LA64-NEXT: b .LBB3_4 - ; LA64-NEXT: .LBB3_3: --; LA64-NEXT: dbar 0 -+; LA64-NEXT: dbar 20 - ; LA64-NEXT: .LBB3_4: - ; LA64-NEXT: ret - %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val acquire acquire -@@ -132,7 +132,7 @@ define i8 @cmpxchg_i8_acquire_acquire_reti8(ptr %ptr, i8 %cmp, i8 %val) nounwind - ; LA64-NEXT: beqz $a6, .LBB4_1 - ; LA64-NEXT: b .LBB4_4 - ; LA64-NEXT: .LBB4_3: --; LA64-NEXT: dbar 0 -+; LA64-NEXT: dbar 20 - ; LA64-NEXT: .LBB4_4: - ; LA64-NEXT: srl.w $a0, $a5, $a0 - ; LA64-NEXT: ret -@@ -168,7 +168,7 @@ define i16 @cmpxchg_i16_acquire_acquire_reti16(ptr %ptr, i16 %cmp, i16 %val) nou - ; LA64-NEXT: beqz $a6, .LBB5_1 - ; LA64-NEXT: b .LBB5_4 - ; LA64-NEXT: .LBB5_3: --; LA64-NEXT: dbar 0 -+; LA64-NEXT: dbar 20 - ; LA64-NEXT: .LBB5_4: - ; LA64-NEXT: srl.w $a0, $a5, $a0 - ; LA64-NEXT: ret -@@ -189,7 +189,7 @@ define i32 @cmpxchg_i32_acquire_acquire_reti32(ptr %ptr, i32 %cmp, i32 %val) nou - ; LA64-NEXT: beqz $a4, .LBB6_1 - ; LA64-NEXT: b .LBB6_4 - ; LA64-NEXT: .LBB6_3: --; LA64-NEXT: dbar 0 -+; LA64-NEXT: dbar 20 - ; LA64-NEXT: .LBB6_4: - ; LA64-NEXT: move $a0, $a3 - ; LA64-NEXT: ret -@@ -210,7 +210,7 @@ define i64 @cmpxchg_i64_acquire_acquire_reti64(ptr %ptr, i64 %cmp, i64 %val) nou - ; LA64-NEXT: beqz $a4, .LBB7_1 - ; LA64-NEXT: b .LBB7_4 - ; LA64-NEXT: .LBB7_3: --; LA64-NEXT: dbar 0 -+; LA64-NEXT: dbar 20 - ; LA64-NEXT: .LBB7_4: - ; LA64-NEXT: move $a0, $a3 - ; LA64-NEXT: ret -@@ -245,7 +245,7 @@ define i1 @cmpxchg_i8_acquire_acquire_reti1(ptr %ptr, i8 %cmp, i8 %val) nounwind - ; LA64-NEXT: beqz $a6, .LBB8_1 - ; LA64-NEXT: b .LBB8_4 - ; LA64-NEXT: .LBB8_3: --; LA64-NEXT: dbar 0 -+; LA64-NEXT: dbar 20 - ; LA64-NEXT: .LBB8_4: - ; LA64-NEXT: and $a0, $a5, $a4 - ; LA64-NEXT: addi.w $a0, $a0, 0 -@@ -284,7 +284,7 @@ define i1 @cmpxchg_i16_acquire_acquire_reti1(ptr %ptr, i16 %cmp, i16 %val) nounw - ; LA64-NEXT: beqz $a6, .LBB9_1 - ; LA64-NEXT: b .LBB9_4 - ; LA64-NEXT: .LBB9_3: --; LA64-NEXT: dbar 0 -+; LA64-NEXT: dbar 20 - ; LA64-NEXT: .LBB9_4: - ; LA64-NEXT: and $a0, $a5, $a4 - ; LA64-NEXT: addi.w $a0, $a0, 0 -@@ -308,7 +308,7 @@ define i1 @cmpxchg_i32_acquire_acquire_reti1(ptr %ptr, i32 %cmp, i32 %val) nounw - ; LA64-NEXT: beqz $a4, .LBB10_1 - ; LA64-NEXT: b .LBB10_4 - ; LA64-NEXT: .LBB10_3: --; LA64-NEXT: dbar 0 -+; LA64-NEXT: dbar 20 - ; LA64-NEXT: .LBB10_4: - ; LA64-NEXT: addi.w $a0, $a1, 0 - ; LA64-NEXT: xor $a0, $a3, $a0 -@@ -331,7 +331,7 @@ define i1 @cmpxchg_i64_acquire_acquire_reti1(ptr %ptr, i64 %cmp, i64 %val) nounw - ; LA64-NEXT: beqz $a4, .LBB11_1 - ; LA64-NEXT: b .LBB11_4 - ; LA64-NEXT: .LBB11_3: --; LA64-NEXT: dbar 0 -+; LA64-NEXT: dbar 20 - ; LA64-NEXT: .LBB11_4: - ; LA64-NEXT: xor $a0, $a3, $a1 - ; LA64-NEXT: sltui $a0, $a0, 1 -diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll -index 02d481cb3865..589360823b14 100644 ---- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll -+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll -@@ -29,7 +29,7 @@ define float @float_fadd_acquire(ptr %p) nounwind { - ; LA64F-NEXT: b .LBB0_6 - ; LA64F-NEXT: .LBB0_5: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB0_1 Depth=1 --; LA64F-NEXT: dbar 0 -+; LA64F-NEXT: dbar 20 - ; LA64F-NEXT: .LBB0_6: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB0_1 Depth=1 - ; LA64F-NEXT: movgr2fr.w $fa0, $a3 -@@ -64,7 +64,7 @@ define float @float_fadd_acquire(ptr %p) nounwind { - ; LA64D-NEXT: b .LBB0_6 - ; LA64D-NEXT: .LBB0_5: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB0_1 Depth=1 --; LA64D-NEXT: dbar 0 -+; LA64D-NEXT: dbar 20 - ; LA64D-NEXT: .LBB0_6: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB0_1 Depth=1 - ; LA64D-NEXT: movgr2fr.w $fa0, $a3 -@@ -103,7 +103,7 @@ define float @float_fsub_acquire(ptr %p) nounwind { - ; LA64F-NEXT: b .LBB1_6 - ; LA64F-NEXT: .LBB1_5: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB1_1 Depth=1 --; LA64F-NEXT: dbar 0 -+; LA64F-NEXT: dbar 20 - ; LA64F-NEXT: .LBB1_6: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB1_1 Depth=1 - ; LA64F-NEXT: movgr2fr.w $fa0, $a3 -@@ -138,7 +138,7 @@ define float @float_fsub_acquire(ptr %p) nounwind { - ; LA64D-NEXT: b .LBB1_6 - ; LA64D-NEXT: .LBB1_5: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB1_1 Depth=1 --; LA64D-NEXT: dbar 0 -+; LA64D-NEXT: dbar 20 - ; LA64D-NEXT: .LBB1_6: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB1_1 Depth=1 - ; LA64D-NEXT: movgr2fr.w $fa0, $a3 -@@ -178,7 +178,7 @@ define float @float_fmin_acquire(ptr %p) nounwind { - ; LA64F-NEXT: b .LBB2_6 - ; LA64F-NEXT: .LBB2_5: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB2_1 Depth=1 --; LA64F-NEXT: dbar 0 -+; LA64F-NEXT: dbar 20 - ; LA64F-NEXT: .LBB2_6: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB2_1 Depth=1 - ; LA64F-NEXT: movgr2fr.w $fa0, $a3 -@@ -214,7 +214,7 @@ define float @float_fmin_acquire(ptr %p) nounwind { - ; LA64D-NEXT: b .LBB2_6 - ; LA64D-NEXT: .LBB2_5: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB2_1 Depth=1 --; LA64D-NEXT: dbar 0 -+; LA64D-NEXT: dbar 20 - ; LA64D-NEXT: .LBB2_6: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB2_1 Depth=1 - ; LA64D-NEXT: movgr2fr.w $fa0, $a3 -@@ -254,7 +254,7 @@ define float @float_fmax_acquire(ptr %p) nounwind { - ; LA64F-NEXT: b .LBB3_6 - ; LA64F-NEXT: .LBB3_5: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB3_1 Depth=1 --; LA64F-NEXT: dbar 0 -+; LA64F-NEXT: dbar 20 - ; LA64F-NEXT: .LBB3_6: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB3_1 Depth=1 - ; LA64F-NEXT: movgr2fr.w $fa0, $a3 -@@ -290,7 +290,7 @@ define float @float_fmax_acquire(ptr %p) nounwind { - ; LA64D-NEXT: b .LBB3_6 - ; LA64D-NEXT: .LBB3_5: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB3_1 Depth=1 --; LA64D-NEXT: dbar 0 -+; LA64D-NEXT: dbar 20 - ; LA64D-NEXT: .LBB3_6: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB3_1 Depth=1 - ; LA64D-NEXT: movgr2fr.w $fa0, $a3 -@@ -1385,7 +1385,7 @@ define float @float_fadd_acq_rel(ptr %p) nounwind { - ; LA64F-NEXT: b .LBB16_6 - ; LA64F-NEXT: .LBB16_5: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB16_1 Depth=1 --; LA64F-NEXT: dbar 0 -+; LA64F-NEXT: dbar 20 - ; LA64F-NEXT: .LBB16_6: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB16_1 Depth=1 - ; LA64F-NEXT: movgr2fr.w $fa0, $a3 -@@ -1420,7 +1420,7 @@ define float @float_fadd_acq_rel(ptr %p) nounwind { - ; LA64D-NEXT: b .LBB16_6 - ; LA64D-NEXT: .LBB16_5: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB16_1 Depth=1 --; LA64D-NEXT: dbar 0 -+; LA64D-NEXT: dbar 20 - ; LA64D-NEXT: .LBB16_6: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB16_1 Depth=1 - ; LA64D-NEXT: movgr2fr.w $fa0, $a3 -@@ -1459,7 +1459,7 @@ define float @float_fsub_acq_rel(ptr %p) nounwind { - ; LA64F-NEXT: b .LBB17_6 - ; LA64F-NEXT: .LBB17_5: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB17_1 Depth=1 --; LA64F-NEXT: dbar 0 -+; LA64F-NEXT: dbar 20 - ; LA64F-NEXT: .LBB17_6: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB17_1 Depth=1 - ; LA64F-NEXT: movgr2fr.w $fa0, $a3 -@@ -1494,7 +1494,7 @@ define float @float_fsub_acq_rel(ptr %p) nounwind { - ; LA64D-NEXT: b .LBB17_6 - ; LA64D-NEXT: .LBB17_5: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB17_1 Depth=1 --; LA64D-NEXT: dbar 0 -+; LA64D-NEXT: dbar 20 - ; LA64D-NEXT: .LBB17_6: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB17_1 Depth=1 - ; LA64D-NEXT: movgr2fr.w $fa0, $a3 -@@ -1534,7 +1534,7 @@ define float @float_fmin_acq_rel(ptr %p) nounwind { - ; LA64F-NEXT: b .LBB18_6 - ; LA64F-NEXT: .LBB18_5: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB18_1 Depth=1 --; LA64F-NEXT: dbar 0 -+; LA64F-NEXT: dbar 20 - ; LA64F-NEXT: .LBB18_6: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB18_1 Depth=1 - ; LA64F-NEXT: movgr2fr.w $fa0, $a3 -@@ -1570,7 +1570,7 @@ define float @float_fmin_acq_rel(ptr %p) nounwind { - ; LA64D-NEXT: b .LBB18_6 - ; LA64D-NEXT: .LBB18_5: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB18_1 Depth=1 --; LA64D-NEXT: dbar 0 -+; LA64D-NEXT: dbar 20 - ; LA64D-NEXT: .LBB18_6: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB18_1 Depth=1 - ; LA64D-NEXT: movgr2fr.w $fa0, $a3 -@@ -1610,7 +1610,7 @@ define float @float_fmax_acq_rel(ptr %p) nounwind { - ; LA64F-NEXT: b .LBB19_6 - ; LA64F-NEXT: .LBB19_5: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB19_1 Depth=1 --; LA64F-NEXT: dbar 0 -+; LA64F-NEXT: dbar 20 - ; LA64F-NEXT: .LBB19_6: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB19_1 Depth=1 - ; LA64F-NEXT: movgr2fr.w $fa0, $a3 -@@ -1646,7 +1646,7 @@ define float @float_fmax_acq_rel(ptr %p) nounwind { - ; LA64D-NEXT: b .LBB19_6 - ; LA64D-NEXT: .LBB19_5: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB19_1 Depth=1 --; LA64D-NEXT: dbar 0 -+; LA64D-NEXT: dbar 20 - ; LA64D-NEXT: .LBB19_6: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB19_1 Depth=1 - ; LA64D-NEXT: movgr2fr.w $fa0, $a3 -@@ -2087,7 +2087,7 @@ define float @float_fadd_seq_cst(ptr %p) nounwind { - ; LA64F-NEXT: b .LBB24_6 - ; LA64F-NEXT: .LBB24_5: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB24_1 Depth=1 --; LA64F-NEXT: dbar 0 -+; LA64F-NEXT: dbar 20 - ; LA64F-NEXT: .LBB24_6: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB24_1 Depth=1 - ; LA64F-NEXT: movgr2fr.w $fa0, $a3 -@@ -2122,7 +2122,7 @@ define float @float_fadd_seq_cst(ptr %p) nounwind { - ; LA64D-NEXT: b .LBB24_6 - ; LA64D-NEXT: .LBB24_5: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB24_1 Depth=1 --; LA64D-NEXT: dbar 0 -+; LA64D-NEXT: dbar 20 - ; LA64D-NEXT: .LBB24_6: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB24_1 Depth=1 - ; LA64D-NEXT: movgr2fr.w $fa0, $a3 -@@ -2161,7 +2161,7 @@ define float @float_fsub_seq_cst(ptr %p) nounwind { - ; LA64F-NEXT: b .LBB25_6 - ; LA64F-NEXT: .LBB25_5: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB25_1 Depth=1 --; LA64F-NEXT: dbar 0 -+; LA64F-NEXT: dbar 20 - ; LA64F-NEXT: .LBB25_6: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB25_1 Depth=1 - ; LA64F-NEXT: movgr2fr.w $fa0, $a3 -@@ -2196,7 +2196,7 @@ define float @float_fsub_seq_cst(ptr %p) nounwind { - ; LA64D-NEXT: b .LBB25_6 - ; LA64D-NEXT: .LBB25_5: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB25_1 Depth=1 --; LA64D-NEXT: dbar 0 -+; LA64D-NEXT: dbar 20 - ; LA64D-NEXT: .LBB25_6: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB25_1 Depth=1 - ; LA64D-NEXT: movgr2fr.w $fa0, $a3 -@@ -2236,7 +2236,7 @@ define float @float_fmin_seq_cst(ptr %p) nounwind { - ; LA64F-NEXT: b .LBB26_6 - ; LA64F-NEXT: .LBB26_5: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB26_1 Depth=1 --; LA64F-NEXT: dbar 0 -+; LA64F-NEXT: dbar 20 - ; LA64F-NEXT: .LBB26_6: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB26_1 Depth=1 - ; LA64F-NEXT: movgr2fr.w $fa0, $a3 -@@ -2272,7 +2272,7 @@ define float @float_fmin_seq_cst(ptr %p) nounwind { - ; LA64D-NEXT: b .LBB26_6 - ; LA64D-NEXT: .LBB26_5: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB26_1 Depth=1 --; LA64D-NEXT: dbar 0 -+; LA64D-NEXT: dbar 20 - ; LA64D-NEXT: .LBB26_6: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB26_1 Depth=1 - ; LA64D-NEXT: movgr2fr.w $fa0, $a3 -@@ -2312,7 +2312,7 @@ define float @float_fmax_seq_cst(ptr %p) nounwind { - ; LA64F-NEXT: b .LBB27_6 - ; LA64F-NEXT: .LBB27_5: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB27_1 Depth=1 --; LA64F-NEXT: dbar 0 -+; LA64F-NEXT: dbar 20 - ; LA64F-NEXT: .LBB27_6: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB27_1 Depth=1 - ; LA64F-NEXT: movgr2fr.w $fa0, $a3 -@@ -2348,7 +2348,7 @@ define float @float_fmax_seq_cst(ptr %p) nounwind { - ; LA64D-NEXT: b .LBB27_6 - ; LA64D-NEXT: .LBB27_5: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB27_1 Depth=1 --; LA64D-NEXT: dbar 0 -+; LA64D-NEXT: dbar 20 - ; LA64D-NEXT: .LBB27_6: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB27_1 Depth=1 - ; LA64D-NEXT: movgr2fr.w $fa0, $a3 -diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/fence-singlethread.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/fence-singlethread.ll -index 8d6056bc7677..a8b164a4cd3c 100644 ---- a/llvm/test/CodeGen/LoongArch/ir-instruction/fence-singlethread.ll -+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/fence-singlethread.ll -@@ -5,12 +5,12 @@ - define void @fence_singlethread() { - ; LA32-LABEL: fence_singlethread: - ; LA32: # %bb.0: --; LA32-NEXT: dbar 0 -+; LA32-NEXT: #MEMBARRIER - ; LA32-NEXT: ret - ; - ; LA64-LABEL: fence_singlethread: - ; LA64: # %bb.0: --; LA64-NEXT: dbar 0 -+; LA64-NEXT: #MEMBARRIER - ; LA64-NEXT: ret - fence syncscope("singlethread") seq_cst - ret void -diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/fence.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/fence.ll -index 724639f3c6fb..c5b2232f9b80 100644 ---- a/llvm/test/CodeGen/LoongArch/ir-instruction/fence.ll -+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/fence.ll -@@ -5,12 +5,12 @@ - define void @fence_acquire() nounwind { - ; LA32-LABEL: fence_acquire: - ; LA32: # %bb.0: --; LA32-NEXT: dbar 0 -+; LA32-NEXT: dbar 20 - ; LA32-NEXT: ret - ; - ; LA64-LABEL: fence_acquire: - ; LA64: # %bb.0: --; LA64-NEXT: dbar 0 -+; LA64-NEXT: dbar 20 - ; LA64-NEXT: ret - fence acquire - ret void -@@ -19,12 +19,12 @@ define void @fence_acquire() nounwind { - define void @fence_release() nounwind { - ; LA32-LABEL: fence_release: - ; LA32: # %bb.0: --; LA32-NEXT: dbar 0 -+; LA32-NEXT: dbar 18 - ; LA32-NEXT: ret - ; - ; LA64-LABEL: fence_release: - ; LA64: # %bb.0: --; LA64-NEXT: dbar 0 -+; LA64-NEXT: dbar 18 - ; LA64-NEXT: ret - fence release - ret void -@@ -33,12 +33,12 @@ define void @fence_release() nounwind { - define void @fence_acq_rel() nounwind { - ; LA32-LABEL: fence_acq_rel: - ; LA32: # %bb.0: --; LA32-NEXT: dbar 0 -+; LA32-NEXT: dbar 16 - ; LA32-NEXT: ret - ; - ; LA64-LABEL: fence_acq_rel: - ; LA64: # %bb.0: --; LA64-NEXT: dbar 0 -+; LA64-NEXT: dbar 16 - ; LA64-NEXT: ret - fence acq_rel - ret void -@@ -47,12 +47,12 @@ define void @fence_acq_rel() nounwind { - define void @fence_seq_cst() nounwind { - ; LA32-LABEL: fence_seq_cst: - ; LA32: # %bb.0: --; LA32-NEXT: dbar 0 -+; LA32-NEXT: dbar 16 - ; LA32-NEXT: ret - ; - ; LA64-LABEL: fence_seq_cst: - ; LA64: # %bb.0: --; LA64-NEXT: dbar 0 -+; LA64-NEXT: dbar 16 - ; LA64-NEXT: ret - fence seq_cst - ret void -diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll -index deff11723d27..8b170c479eed 100644 ---- a/llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll -+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll -@@ -6,13 +6,13 @@ define i8 @load_acquire_i8(ptr %ptr) { - ; LA32-LABEL: load_acquire_i8: - ; LA32: # %bb.0: - ; LA32-NEXT: ld.b $a0, $a0, 0 --; LA32-NEXT: dbar 0 -+; LA32-NEXT: dbar 20 - ; LA32-NEXT: ret - ; - ; LA64-LABEL: load_acquire_i8: - ; LA64: # %bb.0: - ; LA64-NEXT: ld.b $a0, $a0, 0 --; LA64-NEXT: dbar 0 -+; LA64-NEXT: dbar 20 - ; LA64-NEXT: ret - %val = load atomic i8, ptr %ptr acquire, align 1 - ret i8 %val -@@ -22,13 +22,13 @@ define i16 @load_acquire_i16(ptr %ptr) { - ; LA32-LABEL: load_acquire_i16: - ; LA32: # %bb.0: - ; LA32-NEXT: ld.h $a0, $a0, 0 --; LA32-NEXT: dbar 0 -+; LA32-NEXT: dbar 20 - ; LA32-NEXT: ret - ; - ; LA64-LABEL: load_acquire_i16: - ; LA64: # %bb.0: - ; LA64-NEXT: ld.h $a0, $a0, 0 --; LA64-NEXT: dbar 0 -+; LA64-NEXT: dbar 20 - ; LA64-NEXT: ret - %val = load atomic i16, ptr %ptr acquire, align 2 - ret i16 %val -@@ -38,13 +38,13 @@ define i32 @load_acquire_i32(ptr %ptr) { - ; LA32-LABEL: load_acquire_i32: - ; LA32: # %bb.0: - ; LA32-NEXT: ld.w $a0, $a0, 0 --; LA32-NEXT: dbar 0 -+; LA32-NEXT: dbar 20 - ; LA32-NEXT: ret - ; - ; LA64-LABEL: load_acquire_i32: - ; LA64: # %bb.0: - ; LA64-NEXT: ld.w $a0, $a0, 0 --; LA64-NEXT: dbar 0 -+; LA64-NEXT: dbar 20 - ; LA64-NEXT: ret - %val = load atomic i32, ptr %ptr acquire, align 4 - ret i32 %val -@@ -66,7 +66,7 @@ define i64 @load_acquire_i64(ptr %ptr) { - ; LA64-LABEL: load_acquire_i64: - ; LA64: # %bb.0: - ; LA64-NEXT: ld.d $a0, $a0, 0 --; LA64-NEXT: dbar 0 -+; LA64-NEXT: dbar 20 - ; LA64-NEXT: ret - %val = load atomic i64, ptr %ptr acquire, align 8 - ret i64 %val -@@ -202,13 +202,13 @@ define i8 @load_seq_cst_i8(ptr %ptr) { - ; LA32-LABEL: load_seq_cst_i8: - ; LA32: # %bb.0: - ; LA32-NEXT: ld.b $a0, $a0, 0 --; LA32-NEXT: dbar 0 -+; LA32-NEXT: dbar 16 - ; LA32-NEXT: ret - ; - ; LA64-LABEL: load_seq_cst_i8: - ; LA64: # %bb.0: - ; LA64-NEXT: ld.b $a0, $a0, 0 --; LA64-NEXT: dbar 0 -+; LA64-NEXT: dbar 16 - ; LA64-NEXT: ret - %val = load atomic i8, ptr %ptr seq_cst, align 1 - ret i8 %val -@@ -218,13 +218,13 @@ define i16 @load_seq_cst_i16(ptr %ptr) { - ; LA32-LABEL: load_seq_cst_i16: - ; LA32: # %bb.0: - ; LA32-NEXT: ld.h $a0, $a0, 0 --; LA32-NEXT: dbar 0 -+; LA32-NEXT: dbar 16 - ; LA32-NEXT: ret - ; - ; LA64-LABEL: load_seq_cst_i16: - ; LA64: # %bb.0: - ; LA64-NEXT: ld.h $a0, $a0, 0 --; LA64-NEXT: dbar 0 -+; LA64-NEXT: dbar 16 - ; LA64-NEXT: ret - %val = load atomic i16, ptr %ptr seq_cst, align 2 - ret i16 %val -@@ -234,13 +234,13 @@ define i32 @load_seq_cst_i32(ptr %ptr) { - ; LA32-LABEL: load_seq_cst_i32: - ; LA32: # %bb.0: - ; LA32-NEXT: ld.w $a0, $a0, 0 --; LA32-NEXT: dbar 0 -+; LA32-NEXT: dbar 16 - ; LA32-NEXT: ret - ; - ; LA64-LABEL: load_seq_cst_i32: - ; LA64: # %bb.0: - ; LA64-NEXT: ld.w $a0, $a0, 0 --; LA64-NEXT: dbar 0 -+; LA64-NEXT: dbar 16 - ; LA64-NEXT: ret - %val = load atomic i32, ptr %ptr seq_cst, align 4 - ret i32 %val -@@ -262,7 +262,7 @@ define i64 @load_seq_cst_i64(ptr %ptr) { - ; LA64-LABEL: load_seq_cst_i64: - ; LA64: # %bb.0: - ; LA64-NEXT: ld.d $a0, $a0, 0 --; LA64-NEXT: dbar 0 -+; LA64-NEXT: dbar 16 - ; LA64-NEXT: ret - %val = load atomic i64, ptr %ptr seq_cst, align 8 - ret i64 %val -@@ -271,13 +271,13 @@ define i64 @load_seq_cst_i64(ptr %ptr) { - define void @store_release_i8(ptr %ptr, i8 signext %v) { - ; LA32-LABEL: store_release_i8: - ; LA32: # %bb.0: --; LA32-NEXT: dbar 0 -+; LA32-NEXT: dbar 18 - ; LA32-NEXT: st.b $a1, $a0, 0 - ; LA32-NEXT: ret - ; - ; LA64-LABEL: store_release_i8: - ; LA64: # %bb.0: --; LA64-NEXT: dbar 0 -+; LA64-NEXT: dbar 18 - ; LA64-NEXT: st.b $a1, $a0, 0 - ; LA64-NEXT: ret - store atomic i8 %v, ptr %ptr release, align 1 -@@ -287,13 +287,13 @@ define void @store_release_i8(ptr %ptr, i8 signext %v) { - define void @store_release_i16(ptr %ptr, i16 signext %v) { - ; LA32-LABEL: store_release_i16: - ; LA32: # %bb.0: --; LA32-NEXT: dbar 0 -+; LA32-NEXT: dbar 18 - ; LA32-NEXT: st.h $a1, $a0, 0 - ; LA32-NEXT: ret - ; - ; LA64-LABEL: store_release_i16: - ; LA64: # %bb.0: --; LA64-NEXT: dbar 0 -+; LA64-NEXT: dbar 18 - ; LA64-NEXT: st.h $a1, $a0, 0 - ; LA64-NEXT: ret - store atomic i16 %v, ptr %ptr release, align 2 -@@ -303,7 +303,7 @@ define void @store_release_i16(ptr %ptr, i16 signext %v) { - define void @store_release_i32(ptr %ptr, i32 signext %v) { - ; LA32-LABEL: store_release_i32: - ; LA32: # %bb.0: --; LA32-NEXT: dbar 0 -+; LA32-NEXT: dbar 18 - ; LA32-NEXT: st.w $a1, $a0, 0 - ; LA32-NEXT: ret - ; -@@ -465,16 +465,16 @@ define void @store_monotonic_i64(ptr %ptr, i64 %v) { - define void @store_seq_cst_i8(ptr %ptr, i8 signext %v) { - ; LA32-LABEL: store_seq_cst_i8: - ; LA32: # %bb.0: --; LA32-NEXT: dbar 0 -+; LA32-NEXT: dbar 16 - ; LA32-NEXT: st.b $a1, $a0, 0 --; LA32-NEXT: dbar 0 -+; LA32-NEXT: dbar 16 - ; LA32-NEXT: ret - ; - ; LA64-LABEL: store_seq_cst_i8: - ; LA64: # %bb.0: --; LA64-NEXT: dbar 0 -+; LA64-NEXT: dbar 16 - ; LA64-NEXT: st.b $a1, $a0, 0 --; LA64-NEXT: dbar 0 -+; LA64-NEXT: dbar 16 - ; LA64-NEXT: ret - store atomic i8 %v, ptr %ptr seq_cst, align 1 - ret void -@@ -483,16 +483,16 @@ define void @store_seq_cst_i8(ptr %ptr, i8 signext %v) { - define void @store_seq_cst_i16(ptr %ptr, i16 signext %v) { - ; LA32-LABEL: store_seq_cst_i16: - ; LA32: # %bb.0: --; LA32-NEXT: dbar 0 -+; LA32-NEXT: dbar 16 - ; LA32-NEXT: st.h $a1, $a0, 0 --; LA32-NEXT: dbar 0 -+; LA32-NEXT: dbar 16 - ; LA32-NEXT: ret - ; - ; LA64-LABEL: store_seq_cst_i16: - ; LA64: # %bb.0: --; LA64-NEXT: dbar 0 -+; LA64-NEXT: dbar 16 - ; LA64-NEXT: st.h $a1, $a0, 0 --; LA64-NEXT: dbar 0 -+; LA64-NEXT: dbar 16 - ; LA64-NEXT: ret - store atomic i16 %v, ptr %ptr seq_cst, align 2 - ret void -@@ -501,9 +501,9 @@ define void @store_seq_cst_i16(ptr %ptr, i16 signext %v) { - define void @store_seq_cst_i32(ptr %ptr, i32 signext %v) { - ; LA32-LABEL: store_seq_cst_i32: - ; LA32: # %bb.0: --; LA32-NEXT: dbar 0 -+; LA32-NEXT: dbar 16 - ; LA32-NEXT: st.w $a1, $a0, 0 --; LA32-NEXT: dbar 0 -+; LA32-NEXT: dbar 16 - ; LA32-NEXT: ret - ; - ; LA64-LABEL: store_seq_cst_i32: --- -2.20.1 - - -From 7e37560cddfa108426ff9f87871c71ed01e7596e Mon Sep 17 00:00:00 2001 -From: Weining Lu -Date: Tue, 17 Oct 2023 17:41:32 +0800 -Subject: [PATCH 4/7] [LoongArch] Precommit a test for atomic cmpxchg - optmization - -(cherry picked from commit b2773d170cb4bdb4b19ba801b5eb55395024b3ae) ---- - .../ir-instruction/atomic-cmpxchg.ll | 385 +++++++++++------- - 1 file changed, 245 insertions(+), 140 deletions(-) - -diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll -index 4f25a1d69af1..174bb9d0ff7d 100644 ---- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll -+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll -@@ -106,6 +106,111 @@ define void @cmpxchg_i64_acquire_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind - ret void - } - -+define void @cmpxchg_i8_acquire_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind { -+; LA64-LABEL: cmpxchg_i8_acquire_monotonic: -+; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a3, $zero, -4 -+; LA64-NEXT: and $a3, $a0, $a3 -+; LA64-NEXT: slli.d $a0, $a0, 3 -+; LA64-NEXT: andi $a1, $a1, 255 -+; LA64-NEXT: sll.w $a1, $a1, $a0 -+; LA64-NEXT: andi $a2, $a2, 255 -+; LA64-NEXT: sll.w $a2, $a2, $a0 -+; LA64-NEXT: ori $a4, $zero, 255 -+; LA64-NEXT: sll.w $a0, $a4, $a0 -+; LA64-NEXT: addi.w $a0, $a0, 0 -+; LA64-NEXT: addi.w $a2, $a2, 0 -+; LA64-NEXT: addi.w $a1, $a1, 0 -+; LA64-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.w $a4, $a3, 0 -+; LA64-NEXT: and $a5, $a4, $a0 -+; LA64-NEXT: bne $a5, $a1, .LBB4_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB4_1 Depth=1 -+; LA64-NEXT: andn $a5, $a4, $a0 -+; LA64-NEXT: or $a5, $a5, $a2 -+; LA64-NEXT: sc.w $a5, $a3, 0 -+; LA64-NEXT: beqz $a5, .LBB4_1 -+; LA64-NEXT: b .LBB4_4 -+; LA64-NEXT: .LBB4_3: -+; LA64-NEXT: dbar 20 -+; LA64-NEXT: .LBB4_4: -+; LA64-NEXT: ret -+ %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val acquire monotonic -+ ret void -+} -+ -+define void @cmpxchg_i16_acquire_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwind { -+; LA64-LABEL: cmpxchg_i16_acquire_monotonic: -+; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a3, $zero, -4 -+; LA64-NEXT: and $a3, $a0, $a3 -+; LA64-NEXT: slli.d $a0, $a0, 3 -+; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 -+; LA64-NEXT: sll.w $a1, $a1, $a0 -+; LA64-NEXT: bstrpick.d $a2, $a2, 15, 0 -+; LA64-NEXT: sll.w $a2, $a2, $a0 -+; LA64-NEXT: lu12i.w $a4, 15 -+; LA64-NEXT: ori $a4, $a4, 4095 -+; LA64-NEXT: sll.w $a0, $a4, $a0 -+; LA64-NEXT: addi.w $a0, $a0, 0 -+; LA64-NEXT: addi.w $a2, $a2, 0 -+; LA64-NEXT: addi.w $a1, $a1, 0 -+; LA64-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.w $a4, $a3, 0 -+; LA64-NEXT: and $a5, $a4, $a0 -+; LA64-NEXT: bne $a5, $a1, .LBB5_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB5_1 Depth=1 -+; LA64-NEXT: andn $a5, $a4, $a0 -+; LA64-NEXT: or $a5, $a5, $a2 -+; LA64-NEXT: sc.w $a5, $a3, 0 -+; LA64-NEXT: beqz $a5, .LBB5_1 -+; LA64-NEXT: b .LBB5_4 -+; LA64-NEXT: .LBB5_3: -+; LA64-NEXT: dbar 20 -+; LA64-NEXT: .LBB5_4: -+; LA64-NEXT: ret -+ %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val acquire monotonic -+ ret void -+} -+ -+define void @cmpxchg_i32_acquire_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwind { -+; LA64-LABEL: cmpxchg_i32_acquire_monotonic: -+; LA64: # %bb.0: -+; LA64-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.w $a3, $a0, 0 -+; LA64-NEXT: bne $a3, $a1, .LBB6_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB6_1 Depth=1 -+; LA64-NEXT: move $a4, $a2 -+; LA64-NEXT: sc.w $a4, $a0, 0 -+; LA64-NEXT: beqz $a4, .LBB6_1 -+; LA64-NEXT: b .LBB6_4 -+; LA64-NEXT: .LBB6_3: -+; LA64-NEXT: dbar 20 -+; LA64-NEXT: .LBB6_4: -+; LA64-NEXT: ret -+ %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val acquire monotonic -+ ret void -+} -+ -+define void @cmpxchg_i64_acquire_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwind { -+; LA64-LABEL: cmpxchg_i64_acquire_monotonic: -+; LA64: # %bb.0: -+; LA64-NEXT: .LBB7_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: ll.d $a3, $a0, 0 -+; LA64-NEXT: bne $a3, $a1, .LBB7_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB7_1 Depth=1 -+; LA64-NEXT: move $a4, $a2 -+; LA64-NEXT: sc.d $a4, $a0, 0 -+; LA64-NEXT: beqz $a4, .LBB7_1 -+; LA64-NEXT: b .LBB7_4 -+; LA64-NEXT: .LBB7_3: -+; LA64-NEXT: dbar 20 -+; LA64-NEXT: .LBB7_4: -+; LA64-NEXT: ret -+ %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val acquire monotonic -+ ret void -+} -+ - define i8 @cmpxchg_i8_acquire_acquire_reti8(ptr %ptr, i8 %cmp, i8 %val) nounwind { - ; LA64-LABEL: cmpxchg_i8_acquire_acquire_reti8: - ; LA64: # %bb.0: -@@ -121,19 +226,19 @@ define i8 @cmpxchg_i8_acquire_acquire_reti8(ptr %ptr, i8 %cmp, i8 %val) nounwind - ; LA64-NEXT: andi $a1, $a1, 255 - ; LA64-NEXT: sll.w $a1, $a1, $a0 - ; LA64-NEXT: addi.w $a1, $a1, 0 --; LA64-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 - ; LA64-NEXT: ll.w $a5, $a3, 0 - ; LA64-NEXT: and $a6, $a5, $a4 --; LA64-NEXT: bne $a6, $a1, .LBB4_3 --; LA64-NEXT: # %bb.2: # in Loop: Header=BB4_1 Depth=1 -+; LA64-NEXT: bne $a6, $a1, .LBB8_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB8_1 Depth=1 - ; LA64-NEXT: andn $a6, $a5, $a4 - ; LA64-NEXT: or $a6, $a6, $a2 - ; LA64-NEXT: sc.w $a6, $a3, 0 --; LA64-NEXT: beqz $a6, .LBB4_1 --; LA64-NEXT: b .LBB4_4 --; LA64-NEXT: .LBB4_3: -+; LA64-NEXT: beqz $a6, .LBB8_1 -+; LA64-NEXT: b .LBB8_4 -+; LA64-NEXT: .LBB8_3: - ; LA64-NEXT: dbar 20 --; LA64-NEXT: .LBB4_4: -+; LA64-NEXT: .LBB8_4: - ; LA64-NEXT: srl.w $a0, $a5, $a0 - ; LA64-NEXT: ret - %tmp = cmpxchg ptr %ptr, i8 %cmp, i8 %val acquire acquire -@@ -157,19 +262,19 @@ define i16 @cmpxchg_i16_acquire_acquire_reti16(ptr %ptr, i16 %cmp, i16 %val) nou - ; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 - ; LA64-NEXT: sll.w $a1, $a1, $a0 - ; LA64-NEXT: addi.w $a1, $a1, 0 --; LA64-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 - ; LA64-NEXT: ll.w $a5, $a3, 0 - ; LA64-NEXT: and $a6, $a5, $a4 --; LA64-NEXT: bne $a6, $a1, .LBB5_3 --; LA64-NEXT: # %bb.2: # in Loop: Header=BB5_1 Depth=1 -+; LA64-NEXT: bne $a6, $a1, .LBB9_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB9_1 Depth=1 - ; LA64-NEXT: andn $a6, $a5, $a4 - ; LA64-NEXT: or $a6, $a6, $a2 - ; LA64-NEXT: sc.w $a6, $a3, 0 --; LA64-NEXT: beqz $a6, .LBB5_1 --; LA64-NEXT: b .LBB5_4 --; LA64-NEXT: .LBB5_3: -+; LA64-NEXT: beqz $a6, .LBB9_1 -+; LA64-NEXT: b .LBB9_4 -+; LA64-NEXT: .LBB9_3: - ; LA64-NEXT: dbar 20 --; LA64-NEXT: .LBB5_4: -+; LA64-NEXT: .LBB9_4: - ; LA64-NEXT: srl.w $a0, $a5, $a0 - ; LA64-NEXT: ret - %tmp = cmpxchg ptr %ptr, i16 %cmp, i16 %val acquire acquire -@@ -180,17 +285,17 @@ define i16 @cmpxchg_i16_acquire_acquire_reti16(ptr %ptr, i16 %cmp, i16 %val) nou - define i32 @cmpxchg_i32_acquire_acquire_reti32(ptr %ptr, i32 %cmp, i32 %val) nounwind { - ; LA64-LABEL: cmpxchg_i32_acquire_acquire_reti32: - ; LA64: # %bb.0: --; LA64-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1 - ; LA64-NEXT: ll.w $a3, $a0, 0 --; LA64-NEXT: bne $a3, $a1, .LBB6_3 --; LA64-NEXT: # %bb.2: # in Loop: Header=BB6_1 Depth=1 -+; LA64-NEXT: bne $a3, $a1, .LBB10_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB10_1 Depth=1 - ; LA64-NEXT: move $a4, $a2 - ; LA64-NEXT: sc.w $a4, $a0, 0 --; LA64-NEXT: beqz $a4, .LBB6_1 --; LA64-NEXT: b .LBB6_4 --; LA64-NEXT: .LBB6_3: -+; LA64-NEXT: beqz $a4, .LBB10_1 -+; LA64-NEXT: b .LBB10_4 -+; LA64-NEXT: .LBB10_3: - ; LA64-NEXT: dbar 20 --; LA64-NEXT: .LBB6_4: -+; LA64-NEXT: .LBB10_4: - ; LA64-NEXT: move $a0, $a3 - ; LA64-NEXT: ret - %tmp = cmpxchg ptr %ptr, i32 %cmp, i32 %val acquire acquire -@@ -201,17 +306,17 @@ define i32 @cmpxchg_i32_acquire_acquire_reti32(ptr %ptr, i32 %cmp, i32 %val) nou - define i64 @cmpxchg_i64_acquire_acquire_reti64(ptr %ptr, i64 %cmp, i64 %val) nounwind { - ; LA64-LABEL: cmpxchg_i64_acquire_acquire_reti64: - ; LA64: # %bb.0: --; LA64-NEXT: .LBB7_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 - ; LA64-NEXT: ll.d $a3, $a0, 0 --; LA64-NEXT: bne $a3, $a1, .LBB7_3 --; LA64-NEXT: # %bb.2: # in Loop: Header=BB7_1 Depth=1 -+; LA64-NEXT: bne $a3, $a1, .LBB11_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB11_1 Depth=1 - ; LA64-NEXT: move $a4, $a2 - ; LA64-NEXT: sc.d $a4, $a0, 0 --; LA64-NEXT: beqz $a4, .LBB7_1 --; LA64-NEXT: b .LBB7_4 --; LA64-NEXT: .LBB7_3: -+; LA64-NEXT: beqz $a4, .LBB11_1 -+; LA64-NEXT: b .LBB11_4 -+; LA64-NEXT: .LBB11_3: - ; LA64-NEXT: dbar 20 --; LA64-NEXT: .LBB7_4: -+; LA64-NEXT: .LBB11_4: - ; LA64-NEXT: move $a0, $a3 - ; LA64-NEXT: ret - %tmp = cmpxchg ptr %ptr, i64 %cmp, i64 %val acquire acquire -@@ -234,19 +339,19 @@ define i1 @cmpxchg_i8_acquire_acquire_reti1(ptr %ptr, i8 %cmp, i8 %val) nounwind - ; LA64-NEXT: addi.w $a0, $a0, 0 - ; LA64-NEXT: addi.w $a1, $a1, 0 - ; LA64-NEXT: addi.w $a2, $a4, 0 --; LA64-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 - ; LA64-NEXT: ll.w $a5, $a3, 0 - ; LA64-NEXT: and $a6, $a5, $a2 --; LA64-NEXT: bne $a6, $a1, .LBB8_3 --; LA64-NEXT: # %bb.2: # in Loop: Header=BB8_1 Depth=1 -+; LA64-NEXT: bne $a6, $a1, .LBB12_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB12_1 Depth=1 - ; LA64-NEXT: andn $a6, $a5, $a2 - ; LA64-NEXT: or $a6, $a6, $a0 - ; LA64-NEXT: sc.w $a6, $a3, 0 --; LA64-NEXT: beqz $a6, .LBB8_1 --; LA64-NEXT: b .LBB8_4 --; LA64-NEXT: .LBB8_3: -+; LA64-NEXT: beqz $a6, .LBB12_1 -+; LA64-NEXT: b .LBB12_4 -+; LA64-NEXT: .LBB12_3: - ; LA64-NEXT: dbar 20 --; LA64-NEXT: .LBB8_4: -+; LA64-NEXT: .LBB12_4: - ; LA64-NEXT: and $a0, $a5, $a4 - ; LA64-NEXT: addi.w $a0, $a0, 0 - ; LA64-NEXT: xor $a0, $a1, $a0 -@@ -273,19 +378,19 @@ define i1 @cmpxchg_i16_acquire_acquire_reti1(ptr %ptr, i16 %cmp, i16 %val) nounw - ; LA64-NEXT: addi.w $a0, $a0, 0 - ; LA64-NEXT: addi.w $a1, $a1, 0 - ; LA64-NEXT: addi.w $a2, $a4, 0 --; LA64-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 - ; LA64-NEXT: ll.w $a5, $a3, 0 - ; LA64-NEXT: and $a6, $a5, $a2 --; LA64-NEXT: bne $a6, $a1, .LBB9_3 --; LA64-NEXT: # %bb.2: # in Loop: Header=BB9_1 Depth=1 -+; LA64-NEXT: bne $a6, $a1, .LBB13_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB13_1 Depth=1 - ; LA64-NEXT: andn $a6, $a5, $a2 - ; LA64-NEXT: or $a6, $a6, $a0 - ; LA64-NEXT: sc.w $a6, $a3, 0 --; LA64-NEXT: beqz $a6, .LBB9_1 --; LA64-NEXT: b .LBB9_4 --; LA64-NEXT: .LBB9_3: -+; LA64-NEXT: beqz $a6, .LBB13_1 -+; LA64-NEXT: b .LBB13_4 -+; LA64-NEXT: .LBB13_3: - ; LA64-NEXT: dbar 20 --; LA64-NEXT: .LBB9_4: -+; LA64-NEXT: .LBB13_4: - ; LA64-NEXT: and $a0, $a5, $a4 - ; LA64-NEXT: addi.w $a0, $a0, 0 - ; LA64-NEXT: xor $a0, $a1, $a0 -@@ -299,17 +404,17 @@ define i1 @cmpxchg_i16_acquire_acquire_reti1(ptr %ptr, i16 %cmp, i16 %val) nounw - define i1 @cmpxchg_i32_acquire_acquire_reti1(ptr %ptr, i32 %cmp, i32 %val) nounwind { - ; LA64-LABEL: cmpxchg_i32_acquire_acquire_reti1: - ; LA64: # %bb.0: --; LA64-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 - ; LA64-NEXT: ll.w $a3, $a0, 0 --; LA64-NEXT: bne $a3, $a1, .LBB10_3 --; LA64-NEXT: # %bb.2: # in Loop: Header=BB10_1 Depth=1 -+; LA64-NEXT: bne $a3, $a1, .LBB14_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB14_1 Depth=1 - ; LA64-NEXT: move $a4, $a2 - ; LA64-NEXT: sc.w $a4, $a0, 0 --; LA64-NEXT: beqz $a4, .LBB10_1 --; LA64-NEXT: b .LBB10_4 --; LA64-NEXT: .LBB10_3: -+; LA64-NEXT: beqz $a4, .LBB14_1 -+; LA64-NEXT: b .LBB14_4 -+; LA64-NEXT: .LBB14_3: - ; LA64-NEXT: dbar 20 --; LA64-NEXT: .LBB10_4: -+; LA64-NEXT: .LBB14_4: - ; LA64-NEXT: addi.w $a0, $a1, 0 - ; LA64-NEXT: xor $a0, $a3, $a0 - ; LA64-NEXT: sltui $a0, $a0, 1 -@@ -322,17 +427,17 @@ define i1 @cmpxchg_i32_acquire_acquire_reti1(ptr %ptr, i32 %cmp, i32 %val) nounw - define i1 @cmpxchg_i64_acquire_acquire_reti1(ptr %ptr, i64 %cmp, i64 %val) nounwind { - ; LA64-LABEL: cmpxchg_i64_acquire_acquire_reti1: - ; LA64: # %bb.0: --; LA64-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 - ; LA64-NEXT: ll.d $a3, $a0, 0 --; LA64-NEXT: bne $a3, $a1, .LBB11_3 --; LA64-NEXT: # %bb.2: # in Loop: Header=BB11_1 Depth=1 -+; LA64-NEXT: bne $a3, $a1, .LBB15_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 - ; LA64-NEXT: move $a4, $a2 - ; LA64-NEXT: sc.d $a4, $a0, 0 --; LA64-NEXT: beqz $a4, .LBB11_1 --; LA64-NEXT: b .LBB11_4 --; LA64-NEXT: .LBB11_3: -+; LA64-NEXT: beqz $a4, .LBB15_1 -+; LA64-NEXT: b .LBB15_4 -+; LA64-NEXT: .LBB15_3: - ; LA64-NEXT: dbar 20 --; LA64-NEXT: .LBB11_4: -+; LA64-NEXT: .LBB15_4: - ; LA64-NEXT: xor $a0, $a3, $a1 - ; LA64-NEXT: sltui $a0, $a0, 1 - ; LA64-NEXT: ret -@@ -356,19 +461,19 @@ define void @cmpxchg_i8_monotonic_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind - ; LA64-NEXT: addi.w $a0, $a0, 0 - ; LA64-NEXT: addi.w $a2, $a2, 0 - ; LA64-NEXT: addi.w $a1, $a1, 0 --; LA64-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 - ; LA64-NEXT: ll.w $a4, $a3, 0 - ; LA64-NEXT: and $a5, $a4, $a0 --; LA64-NEXT: bne $a5, $a1, .LBB12_3 --; LA64-NEXT: # %bb.2: # in Loop: Header=BB12_1 Depth=1 -+; LA64-NEXT: bne $a5, $a1, .LBB16_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB16_1 Depth=1 - ; LA64-NEXT: andn $a5, $a4, $a0 - ; LA64-NEXT: or $a5, $a5, $a2 - ; LA64-NEXT: sc.w $a5, $a3, 0 --; LA64-NEXT: beqz $a5, .LBB12_1 --; LA64-NEXT: b .LBB12_4 --; LA64-NEXT: .LBB12_3: -+; LA64-NEXT: beqz $a5, .LBB16_1 -+; LA64-NEXT: b .LBB16_4 -+; LA64-NEXT: .LBB16_3: - ; LA64-NEXT: dbar 1792 --; LA64-NEXT: .LBB12_4: -+; LA64-NEXT: .LBB16_4: - ; LA64-NEXT: ret - %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val monotonic monotonic - ret void -@@ -390,19 +495,19 @@ define void @cmpxchg_i16_monotonic_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounw - ; LA64-NEXT: addi.w $a0, $a0, 0 - ; LA64-NEXT: addi.w $a2, $a2, 0 - ; LA64-NEXT: addi.w $a1, $a1, 0 --; LA64-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1 - ; LA64-NEXT: ll.w $a4, $a3, 0 - ; LA64-NEXT: and $a5, $a4, $a0 --; LA64-NEXT: bne $a5, $a1, .LBB13_3 --; LA64-NEXT: # %bb.2: # in Loop: Header=BB13_1 Depth=1 -+; LA64-NEXT: bne $a5, $a1, .LBB17_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB17_1 Depth=1 - ; LA64-NEXT: andn $a5, $a4, $a0 - ; LA64-NEXT: or $a5, $a5, $a2 - ; LA64-NEXT: sc.w $a5, $a3, 0 --; LA64-NEXT: beqz $a5, .LBB13_1 --; LA64-NEXT: b .LBB13_4 --; LA64-NEXT: .LBB13_3: -+; LA64-NEXT: beqz $a5, .LBB17_1 -+; LA64-NEXT: b .LBB17_4 -+; LA64-NEXT: .LBB17_3: - ; LA64-NEXT: dbar 1792 --; LA64-NEXT: .LBB13_4: -+; LA64-NEXT: .LBB17_4: - ; LA64-NEXT: ret - %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val monotonic monotonic - ret void -@@ -411,17 +516,17 @@ define void @cmpxchg_i16_monotonic_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounw - define void @cmpxchg_i32_monotonic_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwind { - ; LA64-LABEL: cmpxchg_i32_monotonic_monotonic: - ; LA64: # %bb.0: --; LA64-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1 - ; LA64-NEXT: ll.w $a3, $a0, 0 --; LA64-NEXT: bne $a3, $a1, .LBB14_3 --; LA64-NEXT: # %bb.2: # in Loop: Header=BB14_1 Depth=1 -+; LA64-NEXT: bne $a3, $a1, .LBB18_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB18_1 Depth=1 - ; LA64-NEXT: move $a4, $a2 - ; LA64-NEXT: sc.w $a4, $a0, 0 --; LA64-NEXT: beqz $a4, .LBB14_1 --; LA64-NEXT: b .LBB14_4 --; LA64-NEXT: .LBB14_3: -+; LA64-NEXT: beqz $a4, .LBB18_1 -+; LA64-NEXT: b .LBB18_4 -+; LA64-NEXT: .LBB18_3: - ; LA64-NEXT: dbar 1792 --; LA64-NEXT: .LBB14_4: -+; LA64-NEXT: .LBB18_4: - ; LA64-NEXT: ret - %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val monotonic monotonic - ret void -@@ -430,17 +535,17 @@ define void @cmpxchg_i32_monotonic_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounw - define void @cmpxchg_i64_monotonic_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwind { - ; LA64-LABEL: cmpxchg_i64_monotonic_monotonic: - ; LA64: # %bb.0: --; LA64-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: .LBB19_1: # =>This Inner Loop Header: Depth=1 - ; LA64-NEXT: ll.d $a3, $a0, 0 --; LA64-NEXT: bne $a3, $a1, .LBB15_3 --; LA64-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 -+; LA64-NEXT: bne $a3, $a1, .LBB19_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB19_1 Depth=1 - ; LA64-NEXT: move $a4, $a2 - ; LA64-NEXT: sc.d $a4, $a0, 0 --; LA64-NEXT: beqz $a4, .LBB15_1 --; LA64-NEXT: b .LBB15_4 --; LA64-NEXT: .LBB15_3: -+; LA64-NEXT: beqz $a4, .LBB19_1 -+; LA64-NEXT: b .LBB19_4 -+; LA64-NEXT: .LBB19_3: - ; LA64-NEXT: dbar 1792 --; LA64-NEXT: .LBB15_4: -+; LA64-NEXT: .LBB19_4: - ; LA64-NEXT: ret - %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val monotonic monotonic - ret void -@@ -461,19 +566,19 @@ define i8 @cmpxchg_i8_monotonic_monotonic_reti8(ptr %ptr, i8 %cmp, i8 %val) noun - ; LA64-NEXT: andi $a1, $a1, 255 - ; LA64-NEXT: sll.w $a1, $a1, $a0 - ; LA64-NEXT: addi.w $a1, $a1, 0 --; LA64-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1 - ; LA64-NEXT: ll.w $a5, $a3, 0 - ; LA64-NEXT: and $a6, $a5, $a4 --; LA64-NEXT: bne $a6, $a1, .LBB16_3 --; LA64-NEXT: # %bb.2: # in Loop: Header=BB16_1 Depth=1 -+; LA64-NEXT: bne $a6, $a1, .LBB20_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB20_1 Depth=1 - ; LA64-NEXT: andn $a6, $a5, $a4 - ; LA64-NEXT: or $a6, $a6, $a2 - ; LA64-NEXT: sc.w $a6, $a3, 0 --; LA64-NEXT: beqz $a6, .LBB16_1 --; LA64-NEXT: b .LBB16_4 --; LA64-NEXT: .LBB16_3: -+; LA64-NEXT: beqz $a6, .LBB20_1 -+; LA64-NEXT: b .LBB20_4 -+; LA64-NEXT: .LBB20_3: - ; LA64-NEXT: dbar 1792 --; LA64-NEXT: .LBB16_4: -+; LA64-NEXT: .LBB20_4: - ; LA64-NEXT: srl.w $a0, $a5, $a0 - ; LA64-NEXT: ret - %tmp = cmpxchg ptr %ptr, i8 %cmp, i8 %val monotonic monotonic -@@ -497,19 +602,19 @@ define i16 @cmpxchg_i16_monotonic_monotonic_reti16(ptr %ptr, i16 %cmp, i16 %val) - ; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 - ; LA64-NEXT: sll.w $a1, $a1, $a0 - ; LA64-NEXT: addi.w $a1, $a1, 0 --; LA64-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1 - ; LA64-NEXT: ll.w $a5, $a3, 0 - ; LA64-NEXT: and $a6, $a5, $a4 --; LA64-NEXT: bne $a6, $a1, .LBB17_3 --; LA64-NEXT: # %bb.2: # in Loop: Header=BB17_1 Depth=1 -+; LA64-NEXT: bne $a6, $a1, .LBB21_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB21_1 Depth=1 - ; LA64-NEXT: andn $a6, $a5, $a4 - ; LA64-NEXT: or $a6, $a6, $a2 - ; LA64-NEXT: sc.w $a6, $a3, 0 --; LA64-NEXT: beqz $a6, .LBB17_1 --; LA64-NEXT: b .LBB17_4 --; LA64-NEXT: .LBB17_3: -+; LA64-NEXT: beqz $a6, .LBB21_1 -+; LA64-NEXT: b .LBB21_4 -+; LA64-NEXT: .LBB21_3: - ; LA64-NEXT: dbar 1792 --; LA64-NEXT: .LBB17_4: -+; LA64-NEXT: .LBB21_4: - ; LA64-NEXT: srl.w $a0, $a5, $a0 - ; LA64-NEXT: ret - %tmp = cmpxchg ptr %ptr, i16 %cmp, i16 %val monotonic monotonic -@@ -520,17 +625,17 @@ define i16 @cmpxchg_i16_monotonic_monotonic_reti16(ptr %ptr, i16 %cmp, i16 %val) - define i32 @cmpxchg_i32_monotonic_monotonic_reti32(ptr %ptr, i32 %cmp, i32 %val) nounwind { - ; LA64-LABEL: cmpxchg_i32_monotonic_monotonic_reti32: - ; LA64: # %bb.0: --; LA64-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1 - ; LA64-NEXT: ll.w $a3, $a0, 0 --; LA64-NEXT: bne $a3, $a1, .LBB18_3 --; LA64-NEXT: # %bb.2: # in Loop: Header=BB18_1 Depth=1 -+; LA64-NEXT: bne $a3, $a1, .LBB22_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB22_1 Depth=1 - ; LA64-NEXT: move $a4, $a2 - ; LA64-NEXT: sc.w $a4, $a0, 0 --; LA64-NEXT: beqz $a4, .LBB18_1 --; LA64-NEXT: b .LBB18_4 --; LA64-NEXT: .LBB18_3: -+; LA64-NEXT: beqz $a4, .LBB22_1 -+; LA64-NEXT: b .LBB22_4 -+; LA64-NEXT: .LBB22_3: - ; LA64-NEXT: dbar 1792 --; LA64-NEXT: .LBB18_4: -+; LA64-NEXT: .LBB22_4: - ; LA64-NEXT: move $a0, $a3 - ; LA64-NEXT: ret - %tmp = cmpxchg ptr %ptr, i32 %cmp, i32 %val monotonic monotonic -@@ -541,17 +646,17 @@ define i32 @cmpxchg_i32_monotonic_monotonic_reti32(ptr %ptr, i32 %cmp, i32 %val) - define i64 @cmpxchg_i64_monotonic_monotonic_reti64(ptr %ptr, i64 %cmp, i64 %val) nounwind { - ; LA64-LABEL: cmpxchg_i64_monotonic_monotonic_reti64: - ; LA64: # %bb.0: --; LA64-NEXT: .LBB19_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: .LBB23_1: # =>This Inner Loop Header: Depth=1 - ; LA64-NEXT: ll.d $a3, $a0, 0 --; LA64-NEXT: bne $a3, $a1, .LBB19_3 --; LA64-NEXT: # %bb.2: # in Loop: Header=BB19_1 Depth=1 -+; LA64-NEXT: bne $a3, $a1, .LBB23_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB23_1 Depth=1 - ; LA64-NEXT: move $a4, $a2 - ; LA64-NEXT: sc.d $a4, $a0, 0 --; LA64-NEXT: beqz $a4, .LBB19_1 --; LA64-NEXT: b .LBB19_4 --; LA64-NEXT: .LBB19_3: -+; LA64-NEXT: beqz $a4, .LBB23_1 -+; LA64-NEXT: b .LBB23_4 -+; LA64-NEXT: .LBB23_3: - ; LA64-NEXT: dbar 1792 --; LA64-NEXT: .LBB19_4: -+; LA64-NEXT: .LBB23_4: - ; LA64-NEXT: move $a0, $a3 - ; LA64-NEXT: ret - %tmp = cmpxchg ptr %ptr, i64 %cmp, i64 %val monotonic monotonic -@@ -574,19 +679,19 @@ define i1 @cmpxchg_i8_monotonic_monotonic_reti1(ptr %ptr, i8 %cmp, i8 %val) noun - ; LA64-NEXT: addi.w $a0, $a0, 0 - ; LA64-NEXT: addi.w $a1, $a1, 0 - ; LA64-NEXT: addi.w $a2, $a4, 0 --; LA64-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: .LBB24_1: # =>This Inner Loop Header: Depth=1 - ; LA64-NEXT: ll.w $a5, $a3, 0 - ; LA64-NEXT: and $a6, $a5, $a2 --; LA64-NEXT: bne $a6, $a1, .LBB20_3 --; LA64-NEXT: # %bb.2: # in Loop: Header=BB20_1 Depth=1 -+; LA64-NEXT: bne $a6, $a1, .LBB24_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB24_1 Depth=1 - ; LA64-NEXT: andn $a6, $a5, $a2 - ; LA64-NEXT: or $a6, $a6, $a0 - ; LA64-NEXT: sc.w $a6, $a3, 0 --; LA64-NEXT: beqz $a6, .LBB20_1 --; LA64-NEXT: b .LBB20_4 --; LA64-NEXT: .LBB20_3: -+; LA64-NEXT: beqz $a6, .LBB24_1 -+; LA64-NEXT: b .LBB24_4 -+; LA64-NEXT: .LBB24_3: - ; LA64-NEXT: dbar 1792 --; LA64-NEXT: .LBB20_4: -+; LA64-NEXT: .LBB24_4: - ; LA64-NEXT: and $a0, $a5, $a4 - ; LA64-NEXT: addi.w $a0, $a0, 0 - ; LA64-NEXT: xor $a0, $a1, $a0 -@@ -613,19 +718,19 @@ define i1 @cmpxchg_i16_monotonic_monotonic_reti1(ptr %ptr, i16 %cmp, i16 %val) n - ; LA64-NEXT: addi.w $a0, $a0, 0 - ; LA64-NEXT: addi.w $a1, $a1, 0 - ; LA64-NEXT: addi.w $a2, $a4, 0 --; LA64-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: .LBB25_1: # =>This Inner Loop Header: Depth=1 - ; LA64-NEXT: ll.w $a5, $a3, 0 - ; LA64-NEXT: and $a6, $a5, $a2 --; LA64-NEXT: bne $a6, $a1, .LBB21_3 --; LA64-NEXT: # %bb.2: # in Loop: Header=BB21_1 Depth=1 -+; LA64-NEXT: bne $a6, $a1, .LBB25_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB25_1 Depth=1 - ; LA64-NEXT: andn $a6, $a5, $a2 - ; LA64-NEXT: or $a6, $a6, $a0 - ; LA64-NEXT: sc.w $a6, $a3, 0 --; LA64-NEXT: beqz $a6, .LBB21_1 --; LA64-NEXT: b .LBB21_4 --; LA64-NEXT: .LBB21_3: -+; LA64-NEXT: beqz $a6, .LBB25_1 -+; LA64-NEXT: b .LBB25_4 -+; LA64-NEXT: .LBB25_3: - ; LA64-NEXT: dbar 1792 --; LA64-NEXT: .LBB21_4: -+; LA64-NEXT: .LBB25_4: - ; LA64-NEXT: and $a0, $a5, $a4 - ; LA64-NEXT: addi.w $a0, $a0, 0 - ; LA64-NEXT: xor $a0, $a1, $a0 -@@ -639,17 +744,17 @@ define i1 @cmpxchg_i16_monotonic_monotonic_reti1(ptr %ptr, i16 %cmp, i16 %val) n - define i1 @cmpxchg_i32_monotonic_monotonic_reti1(ptr %ptr, i32 %cmp, i32 %val) nounwind { - ; LA64-LABEL: cmpxchg_i32_monotonic_monotonic_reti1: - ; LA64: # %bb.0: --; LA64-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: .LBB26_1: # =>This Inner Loop Header: Depth=1 - ; LA64-NEXT: ll.w $a3, $a0, 0 --; LA64-NEXT: bne $a3, $a1, .LBB22_3 --; LA64-NEXT: # %bb.2: # in Loop: Header=BB22_1 Depth=1 -+; LA64-NEXT: bne $a3, $a1, .LBB26_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB26_1 Depth=1 - ; LA64-NEXT: move $a4, $a2 - ; LA64-NEXT: sc.w $a4, $a0, 0 --; LA64-NEXT: beqz $a4, .LBB22_1 --; LA64-NEXT: b .LBB22_4 --; LA64-NEXT: .LBB22_3: -+; LA64-NEXT: beqz $a4, .LBB26_1 -+; LA64-NEXT: b .LBB26_4 -+; LA64-NEXT: .LBB26_3: - ; LA64-NEXT: dbar 1792 --; LA64-NEXT: .LBB22_4: -+; LA64-NEXT: .LBB26_4: - ; LA64-NEXT: addi.w $a0, $a1, 0 - ; LA64-NEXT: xor $a0, $a3, $a0 - ; LA64-NEXT: sltui $a0, $a0, 1 -@@ -662,17 +767,17 @@ define i1 @cmpxchg_i32_monotonic_monotonic_reti1(ptr %ptr, i32 %cmp, i32 %val) n - define i1 @cmpxchg_i64_monotonic_monotonic_reti1(ptr %ptr, i64 %cmp, i64 %val) nounwind { - ; LA64-LABEL: cmpxchg_i64_monotonic_monotonic_reti1: - ; LA64: # %bb.0: --; LA64-NEXT: .LBB23_1: # =>This Inner Loop Header: Depth=1 -+; LA64-NEXT: .LBB27_1: # =>This Inner Loop Header: Depth=1 - ; LA64-NEXT: ll.d $a3, $a0, 0 --; LA64-NEXT: bne $a3, $a1, .LBB23_3 --; LA64-NEXT: # %bb.2: # in Loop: Header=BB23_1 Depth=1 -+; LA64-NEXT: bne $a3, $a1, .LBB27_3 -+; LA64-NEXT: # %bb.2: # in Loop: Header=BB27_1 Depth=1 - ; LA64-NEXT: move $a4, $a2 - ; LA64-NEXT: sc.d $a4, $a0, 0 --; LA64-NEXT: beqz $a4, .LBB23_1 --; LA64-NEXT: b .LBB23_4 --; LA64-NEXT: .LBB23_3: -+; LA64-NEXT: beqz $a4, .LBB27_1 -+; LA64-NEXT: b .LBB27_4 -+; LA64-NEXT: .LBB27_3: - ; LA64-NEXT: dbar 1792 --; LA64-NEXT: .LBB23_4: -+; LA64-NEXT: .LBB27_4: - ; LA64-NEXT: xor $a0, $a3, $a1 - ; LA64-NEXT: sltui $a0, $a0, 1 - ; LA64-NEXT: ret --- -2.20.1 - - -From 331674f3553b747d9869276ae34667dce7099a09 Mon Sep 17 00:00:00 2001 -From: Lu Weining <90239436+SixWeining@users.noreply.github.com> -Date: Thu, 19 Oct 2023 09:21:51 +0800 -Subject: [PATCH 5/7] [LoongArch] Improve codegen for atomic cmpxchg ops - (#69339) - -PR #67391 improved atomic codegen by handling memory ordering specified -by the `cmpxchg` instruction. An acquire barrier needs to be generated -when memory ordering includes an acquire operation. This PR improves the -codegen further by only handling the failure ordering. - -(cherry picked from commit 78abc45c44cdadf76b30e1f3dc24936bb5627d68) ---- - .../LoongArchExpandAtomicPseudoInsts.cpp | 4 +- - .../LoongArch/LoongArchISelLowering.cpp | 7 ++- - .../Target/LoongArch/LoongArchInstrInfo.td | 55 ++++++++++++++++--- - .../ir-instruction/atomic-cmpxchg.ll | 8 +-- - 4 files changed, 56 insertions(+), 18 deletions(-) - -diff --git a/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp b/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp -index b348cb56c136..18a532b55ee5 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp -+++ b/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp -@@ -571,11 +571,11 @@ bool LoongArchExpandAtomicPseudo::expandAtomicCmpXchg( - BuildMI(LoopTailMBB, DL, TII->get(LoongArch::B)).addMBB(DoneMBB); - } - -- AtomicOrdering Ordering = -+ AtomicOrdering FailureOrdering = - static_cast(MI.getOperand(IsMasked ? 6 : 5).getImm()); - int hint; - -- switch (Ordering) { -+ switch (FailureOrdering) { - case AtomicOrdering::Acquire: - case AtomicOrdering::AcquireRelease: - case AtomicOrdering::SequentiallyConsistent: -diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -index 33a3197013cc..99328f09921f 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -@@ -4492,8 +4492,9 @@ LoongArchTargetLowering::shouldExpandAtomicCmpXchgInIR( - Value *LoongArchTargetLowering::emitMaskedAtomicCmpXchgIntrinsic( - IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, - Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const { -- Value *Ordering = -- Builder.getIntN(Subtarget.getGRLen(), static_cast(Ord)); -+ AtomicOrdering FailOrd = CI->getFailureOrdering(); -+ Value *FailureOrdering = -+ Builder.getIntN(Subtarget.getGRLen(), static_cast(FailOrd)); - - // TODO: Support cmpxchg on LA32. - Intrinsic::ID CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i64; -@@ -4504,7 +4505,7 @@ Value *LoongArchTargetLowering::emitMaskedAtomicCmpXchgIntrinsic( - Function *MaskedCmpXchg = - Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys); - Value *Result = Builder.CreateCall( -- MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering}); -+ MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, FailureOrdering}); - Result = Builder.CreateTrunc(Result, Builder.getInt32Ty()); - return Result; - } -diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td -index fcbd314507a5..ab1890556814 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td -+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td -@@ -1753,7 +1753,7 @@ def PseudoMaskedAtomicLoadMin32 : PseudoMaskedAMMinMax; - - class PseudoCmpXchg - : Pseudo<(outs GPR:$res, GPR:$scratch), -- (ins GPR:$addr, GPR:$cmpval, GPR:$newval, grlenimm:$ordering)> { -+ (ins GPR:$addr, GPR:$cmpval, GPR:$newval, grlenimm:$fail_order)> { - let Constraints = "@earlyclobber $res,@earlyclobber $scratch"; - let mayLoad = 1; - let mayStore = 1; -@@ -1767,7 +1767,7 @@ def PseudoCmpXchg64 : PseudoCmpXchg; - def PseudoMaskedCmpXchg32 - : Pseudo<(outs GPR:$res, GPR:$scratch), - (ins GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, -- grlenimm:$ordering)> { -+ grlenimm:$fail_order)> { - let Constraints = "@earlyclobber $res,@earlyclobber $scratch"; - let mayLoad = 1; - let mayStore = 1; -@@ -1785,6 +1785,43 @@ class AtomicPat - : Pat<(intrin GPR:$addr, GPR:$incr, GPR:$mask, timm:$ordering), - (AMInst GPR:$addr, GPR:$incr, GPR:$mask, timm:$ordering)>; - -+// These atomic cmpxchg PatFrags only care about the failure ordering. -+// The PatFrags defined by multiclass `ternary_atomic_op_ord` in -+// TargetSelectionDAG.td care about the merged memory ordering that is the -+// stronger one between success and failure. But for LoongArch LL-SC we only -+// need to care about the failure ordering as explained in PR #67391. So we -+// define these PatFrags that will be used to define cmpxchg pats below. -+multiclass ternary_atomic_op_failure_ord { -+ def NAME#_failure_monotonic : PatFrag<(ops node:$ptr, node:$cmp, node:$val), -+ (!cast(NAME) node:$ptr, node:$cmp, node:$val), [{ -+ AtomicOrdering Ordering = cast(N)->getFailureOrdering(); -+ return Ordering == AtomicOrdering::Monotonic; -+ }]>; -+ def NAME#_failure_acquire : PatFrag<(ops node:$ptr, node:$cmp, node:$val), -+ (!cast(NAME) node:$ptr, node:$cmp, node:$val), [{ -+ AtomicOrdering Ordering = cast(N)->getFailureOrdering(); -+ return Ordering == AtomicOrdering::Acquire; -+ }]>; -+ def NAME#_failure_release : PatFrag<(ops node:$ptr, node:$cmp, node:$val), -+ (!cast(NAME) node:$ptr, node:$cmp, node:$val), [{ -+ AtomicOrdering Ordering = cast(N)->getFailureOrdering(); -+ return Ordering == AtomicOrdering::Release; -+ }]>; -+ def NAME#_failure_acq_rel : PatFrag<(ops node:$ptr, node:$cmp, node:$val), -+ (!cast(NAME) node:$ptr, node:$cmp, node:$val), [{ -+ AtomicOrdering Ordering = cast(N)->getFailureOrdering(); -+ return Ordering == AtomicOrdering::AcquireRelease; -+ }]>; -+ def NAME#_failure_seq_cst : PatFrag<(ops node:$ptr, node:$cmp, node:$val), -+ (!cast(NAME) node:$ptr, node:$cmp, node:$val), [{ -+ AtomicOrdering Ordering = cast(N)->getFailureOrdering(); -+ return Ordering == AtomicOrdering::SequentiallyConsistent; -+ }]>; -+} -+ -+defm atomic_cmp_swap_32 : ternary_atomic_op_failure_ord; -+defm atomic_cmp_swap_64 : ternary_atomic_op_failure_ord; -+ - let Predicates = [IsLA64] in { - def : AtomicPat; -@@ -1847,24 +1884,24 @@ def : AtomicPat { -- def : Pat<(vt (!cast(Op#"_monotonic") GPR:$addr, GPR:$cmp, GPR:$new)), -+ def : Pat<(vt (!cast(Op#"_failure_monotonic") GPR:$addr, GPR:$cmp, GPR:$new)), - (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 2)>; -- def : Pat<(vt (!cast(Op#"_acquire") GPR:$addr, GPR:$cmp, GPR:$new)), -+ def : Pat<(vt (!cast(Op#"_failure_acquire") GPR:$addr, GPR:$cmp, GPR:$new)), - (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 4)>; -- def : Pat<(vt (!cast(Op#"_release") GPR:$addr, GPR:$cmp, GPR:$new)), -+ def : Pat<(vt (!cast(Op#"_failure_release") GPR:$addr, GPR:$cmp, GPR:$new)), - (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 5)>; -- def : Pat<(vt (!cast(Op#"_acq_rel") GPR:$addr, GPR:$cmp, GPR:$new)), -+ def : Pat<(vt (!cast(Op#"_failure_acq_rel") GPR:$addr, GPR:$cmp, GPR:$new)), - (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 6)>; -- def : Pat<(vt (!cast(Op#"_seq_cst") GPR:$addr, GPR:$cmp, GPR:$new)), -+ def : Pat<(vt (!cast(Op#"_failure_seq_cst") GPR:$addr, GPR:$cmp, GPR:$new)), - (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 7)>; - } - - defm : PseudoCmpXchgPat<"atomic_cmp_swap_32", PseudoCmpXchg32>; - defm : PseudoCmpXchgPat<"atomic_cmp_swap_64", PseudoCmpXchg64, i64>; - def : Pat<(int_loongarch_masked_cmpxchg_i64 -- GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering), -+ GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$fail_order), - (PseudoMaskedCmpXchg32 -- GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering)>; -+ GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$fail_order)>; - - def : PseudoMaskedAMMinMaxPat; -diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll -index 174bb9d0ff7d..1dd3f39852d8 100644 ---- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll -+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll -@@ -132,7 +132,7 @@ define void @cmpxchg_i8_acquire_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind { - ; LA64-NEXT: beqz $a5, .LBB4_1 - ; LA64-NEXT: b .LBB4_4 - ; LA64-NEXT: .LBB4_3: --; LA64-NEXT: dbar 20 -+; LA64-NEXT: dbar 1792 - ; LA64-NEXT: .LBB4_4: - ; LA64-NEXT: ret - %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val acquire monotonic -@@ -166,7 +166,7 @@ define void @cmpxchg_i16_acquire_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin - ; LA64-NEXT: beqz $a5, .LBB5_1 - ; LA64-NEXT: b .LBB5_4 - ; LA64-NEXT: .LBB5_3: --; LA64-NEXT: dbar 20 -+; LA64-NEXT: dbar 1792 - ; LA64-NEXT: .LBB5_4: - ; LA64-NEXT: ret - %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val acquire monotonic -@@ -185,7 +185,7 @@ define void @cmpxchg_i32_acquire_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwin - ; LA64-NEXT: beqz $a4, .LBB6_1 - ; LA64-NEXT: b .LBB6_4 - ; LA64-NEXT: .LBB6_3: --; LA64-NEXT: dbar 20 -+; LA64-NEXT: dbar 1792 - ; LA64-NEXT: .LBB6_4: - ; LA64-NEXT: ret - %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val acquire monotonic -@@ -204,7 +204,7 @@ define void @cmpxchg_i64_acquire_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwin - ; LA64-NEXT: beqz $a4, .LBB7_1 - ; LA64-NEXT: b .LBB7_4 - ; LA64-NEXT: .LBB7_3: --; LA64-NEXT: dbar 20 -+; LA64-NEXT: dbar 1792 - ; LA64-NEXT: .LBB7_4: - ; LA64-NEXT: ret - %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val acquire monotonic --- -2.20.1 - - -From d8479f9765b12a84d1756aedebf631fdbe4f0451 Mon Sep 17 00:00:00 2001 -From: Lu Weining -Date: Mon, 4 Mar 2024 08:38:52 +0800 -Subject: [PATCH 6/7] [LoongArch] Override - LoongArchTargetLowering::getExtendForAtomicCmpSwapArg (#83656) - -This patch aims to solve Firefox issue: -https://bugzilla.mozilla.org/show_bug.cgi?id=1882301 - -Similar to 616289ed2922. Currently LoongArch uses an ll.[wd]/sc.[wd] -loop for ATOMIC_CMP_XCHG. Because the comparison in the loop is -full-width (i.e. the `bne` instruction), we must sign extend the input -comparsion argument. - -Note that LoongArch ISA manual V1.1 has introduced compare-and-swap -instructions. We would change the implementation (return `ANY_EXTEND`) -when we support them. - -(cherry picked from commit 5f058aa211995d2f0df2a0e063532832569cb7a8) -(cherry picked from commit ea6c457b8dd2d0e6a7f05b4a5bdd2686085e1ec0) ---- - .../LoongArch/LoongArchISelLowering.cpp | 5 + - .../Target/LoongArch/LoongArchISelLowering.h | 2 + - .../LoongArch/atomicrmw-uinc-udec-wrap.ll | 120 +++++++------ - .../ir-instruction/atomic-cmpxchg.ll | 25 +-- - .../LoongArch/ir-instruction/atomicrmw-fp.ll | 160 +++++++++--------- - 5 files changed, 159 insertions(+), 153 deletions(-) - -diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -index 99328f09921f..4fc2b4709840 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -@@ -4893,3 +4893,8 @@ bool LoongArchTargetLowering::hasAndNotCompare(SDValue Y) const { - - return !isa(Y); - } -+ -+ISD::NodeType LoongArchTargetLowering::getExtendForAtomicCmpSwapArg() const { -+ // TODO: LAMCAS will use amcas{_DB,}.[bhwd] which does not require extension. -+ return ISD::SIGN_EXTEND; -+} -diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h -index 23b90640a690..2c9826a13237 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h -+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h -@@ -203,6 +203,8 @@ public: - return ISD::SIGN_EXTEND; - } - -+ ISD::NodeType getExtendForAtomicCmpSwapArg() const override; -+ - Register getRegisterByName(const char *RegName, LLT VT, - const MachineFunction &MF) const override; - bool mayBeEmittedAsTailCall(const CallInst *CI) const override; -diff --git a/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll b/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll -index d8908acbc945..f0baf19bcf0e 100644 ---- a/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll -+++ b/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll -@@ -26,15 +26,16 @@ define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) { - ; LA64-NEXT: andi $a5, $a5, 255 - ; LA64-NEXT: sll.w $a5, $a5, $a0 - ; LA64-NEXT: and $a6, $a3, $a4 --; LA64-NEXT: or $a6, $a6, $a5 -+; LA64-NEXT: or $a5, $a6, $a5 -+; LA64-NEXT: addi.w $a6, $a3, 0 - ; LA64-NEXT: .LBB0_3: # %atomicrmw.start - ; LA64-NEXT: # Parent Loop BB0_1 Depth=1 - ; LA64-NEXT: # => This Inner Loop Header: Depth=2 --; LA64-NEXT: ll.w $a5, $a2, 0 --; LA64-NEXT: bne $a5, $a3, .LBB0_5 -+; LA64-NEXT: ll.w $a3, $a2, 0 -+; LA64-NEXT: bne $a3, $a6, .LBB0_5 - ; LA64-NEXT: # %bb.4: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB0_3 Depth=2 --; LA64-NEXT: move $a7, $a6 -+; LA64-NEXT: move $a7, $a5 - ; LA64-NEXT: sc.w $a7, $a2, 0 - ; LA64-NEXT: beqz $a7, .LBB0_3 - ; LA64-NEXT: b .LBB0_6 -@@ -43,11 +44,9 @@ define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) { - ; LA64-NEXT: dbar 20 - ; LA64-NEXT: .LBB0_6: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB0_1 Depth=1 --; LA64-NEXT: addi.w $a6, $a3, 0 --; LA64-NEXT: move $a3, $a5 --; LA64-NEXT: bne $a5, $a6, .LBB0_1 -+; LA64-NEXT: bne $a3, $a6, .LBB0_1 - ; LA64-NEXT: # %bb.2: # %atomicrmw.end --; LA64-NEXT: srl.w $a0, $a5, $a0 -+; LA64-NEXT: srl.w $a0, $a3, $a0 - ; LA64-NEXT: ret - %result = atomicrmw uinc_wrap ptr %ptr, i8 %val seq_cst - ret i8 %result -@@ -79,15 +78,16 @@ define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) { - ; LA64-NEXT: bstrpick.d $a5, $a5, 15, 0 - ; LA64-NEXT: sll.w $a5, $a5, $a0 - ; LA64-NEXT: and $a6, $a3, $a4 --; LA64-NEXT: or $a6, $a6, $a5 -+; LA64-NEXT: or $a5, $a6, $a5 -+; LA64-NEXT: addi.w $a6, $a3, 0 - ; LA64-NEXT: .LBB1_3: # %atomicrmw.start - ; LA64-NEXT: # Parent Loop BB1_1 Depth=1 - ; LA64-NEXT: # => This Inner Loop Header: Depth=2 --; LA64-NEXT: ll.w $a5, $a2, 0 --; LA64-NEXT: bne $a5, $a3, .LBB1_5 -+; LA64-NEXT: ll.w $a3, $a2, 0 -+; LA64-NEXT: bne $a3, $a6, .LBB1_5 - ; LA64-NEXT: # %bb.4: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB1_3 Depth=2 --; LA64-NEXT: move $a7, $a6 -+; LA64-NEXT: move $a7, $a5 - ; LA64-NEXT: sc.w $a7, $a2, 0 - ; LA64-NEXT: beqz $a7, .LBB1_3 - ; LA64-NEXT: b .LBB1_6 -@@ -96,11 +96,9 @@ define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) { - ; LA64-NEXT: dbar 20 - ; LA64-NEXT: .LBB1_6: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB1_1 Depth=1 --; LA64-NEXT: addi.w $a6, $a3, 0 --; LA64-NEXT: move $a3, $a5 --; LA64-NEXT: bne $a5, $a6, .LBB1_1 -+; LA64-NEXT: bne $a3, $a6, .LBB1_1 - ; LA64-NEXT: # %bb.2: # %atomicrmw.end --; LA64-NEXT: srl.w $a0, $a5, $a0 -+; LA64-NEXT: srl.w $a0, $a3, $a0 - ; LA64-NEXT: ret - %result = atomicrmw uinc_wrap ptr %ptr, i16 %val seq_cst - ret i16 %result -@@ -109,37 +107,36 @@ define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) { - define i32 @atomicrmw_uinc_wrap_i32(ptr %ptr, i32 %val) { - ; LA64-LABEL: atomicrmw_uinc_wrap_i32: - ; LA64: # %bb.0: --; LA64-NEXT: ld.w $a3, $a0, 0 --; LA64-NEXT: addi.w $a2, $a1, 0 -+; LA64-NEXT: ld.w $a2, $a0, 0 -+; LA64-NEXT: addi.w $a1, $a1, 0 - ; LA64-NEXT: .p2align 4, , 16 - ; LA64-NEXT: .LBB2_1: # %atomicrmw.start - ; LA64-NEXT: # =>This Loop Header: Depth=1 - ; LA64-NEXT: # Child Loop BB2_3 Depth 2 --; LA64-NEXT: addi.w $a4, $a3, 0 --; LA64-NEXT: sltu $a1, $a4, $a2 --; LA64-NEXT: xori $a1, $a1, 1 --; LA64-NEXT: addi.d $a5, $a3, 1 --; LA64-NEXT: masknez $a5, $a5, $a1 -+; LA64-NEXT: addi.w $a3, $a2, 0 -+; LA64-NEXT: sltu $a4, $a3, $a1 -+; LA64-NEXT: xori $a4, $a4, 1 -+; LA64-NEXT: addi.d $a2, $a2, 1 -+; LA64-NEXT: masknez $a4, $a2, $a4 - ; LA64-NEXT: .LBB2_3: # %atomicrmw.start - ; LA64-NEXT: # Parent Loop BB2_1 Depth=1 - ; LA64-NEXT: # => This Inner Loop Header: Depth=2 --; LA64-NEXT: ll.w $a1, $a0, 0 --; LA64-NEXT: bne $a1, $a3, .LBB2_5 -+; LA64-NEXT: ll.w $a2, $a0, 0 -+; LA64-NEXT: bne $a2, $a3, .LBB2_5 - ; LA64-NEXT: # %bb.4: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB2_3 Depth=2 --; LA64-NEXT: move $a6, $a5 --; LA64-NEXT: sc.w $a6, $a0, 0 --; LA64-NEXT: beqz $a6, .LBB2_3 -+; LA64-NEXT: move $a5, $a4 -+; LA64-NEXT: sc.w $a5, $a0, 0 -+; LA64-NEXT: beqz $a5, .LBB2_3 - ; LA64-NEXT: b .LBB2_6 - ; LA64-NEXT: .LBB2_5: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB2_1 Depth=1 - ; LA64-NEXT: dbar 20 - ; LA64-NEXT: .LBB2_6: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB2_1 Depth=1 --; LA64-NEXT: move $a3, $a1 --; LA64-NEXT: bne $a1, $a4, .LBB2_1 -+; LA64-NEXT: bne $a2, $a3, .LBB2_1 - ; LA64-NEXT: # %bb.2: # %atomicrmw.end --; LA64-NEXT: move $a0, $a1 -+; LA64-NEXT: move $a0, $a2 - ; LA64-NEXT: ret - %result = atomicrmw uinc_wrap ptr %ptr, i32 %val seq_cst - ret i32 %result -@@ -212,15 +209,16 @@ define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) { - ; LA64-NEXT: andi $a6, $a6, 255 - ; LA64-NEXT: sll.w $a6, $a6, $a0 - ; LA64-NEXT: and $a7, $a3, $a4 --; LA64-NEXT: or $a7, $a7, $a6 -+; LA64-NEXT: or $a6, $a7, $a6 -+; LA64-NEXT: addi.w $a7, $a3, 0 - ; LA64-NEXT: .LBB4_3: # %atomicrmw.start - ; LA64-NEXT: # Parent Loop BB4_1 Depth=1 - ; LA64-NEXT: # => This Inner Loop Header: Depth=2 --; LA64-NEXT: ll.w $a6, $a2, 0 --; LA64-NEXT: bne $a6, $a3, .LBB4_5 -+; LA64-NEXT: ll.w $a3, $a2, 0 -+; LA64-NEXT: bne $a3, $a7, .LBB4_5 - ; LA64-NEXT: # %bb.4: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB4_3 Depth=2 --; LA64-NEXT: move $t0, $a7 -+; LA64-NEXT: move $t0, $a6 - ; LA64-NEXT: sc.w $t0, $a2, 0 - ; LA64-NEXT: beqz $t0, .LBB4_3 - ; LA64-NEXT: b .LBB4_6 -@@ -229,11 +227,9 @@ define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) { - ; LA64-NEXT: dbar 20 - ; LA64-NEXT: .LBB4_6: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB4_1 Depth=1 --; LA64-NEXT: addi.w $a7, $a3, 0 --; LA64-NEXT: move $a3, $a6 --; LA64-NEXT: bne $a6, $a7, .LBB4_1 -+; LA64-NEXT: bne $a3, $a7, .LBB4_1 - ; LA64-NEXT: # %bb.2: # %atomicrmw.end --; LA64-NEXT: srl.w $a0, $a6, $a0 -+; LA64-NEXT: srl.w $a0, $a3, $a0 - ; LA64-NEXT: ret - %result = atomicrmw udec_wrap ptr %ptr, i8 %val seq_cst - ret i8 %result -@@ -270,15 +266,16 @@ define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) { - ; LA64-NEXT: bstrpick.d $a6, $a6, 15, 0 - ; LA64-NEXT: sll.w $a6, $a6, $a0 - ; LA64-NEXT: and $a7, $a3, $a4 --; LA64-NEXT: or $a7, $a7, $a6 -+; LA64-NEXT: or $a6, $a7, $a6 -+; LA64-NEXT: addi.w $a7, $a3, 0 - ; LA64-NEXT: .LBB5_3: # %atomicrmw.start - ; LA64-NEXT: # Parent Loop BB5_1 Depth=1 - ; LA64-NEXT: # => This Inner Loop Header: Depth=2 --; LA64-NEXT: ll.w $a6, $a2, 0 --; LA64-NEXT: bne $a6, $a3, .LBB5_5 -+; LA64-NEXT: ll.w $a3, $a2, 0 -+; LA64-NEXT: bne $a3, $a7, .LBB5_5 - ; LA64-NEXT: # %bb.4: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB5_3 Depth=2 --; LA64-NEXT: move $t0, $a7 -+; LA64-NEXT: move $t0, $a6 - ; LA64-NEXT: sc.w $t0, $a2, 0 - ; LA64-NEXT: beqz $t0, .LBB5_3 - ; LA64-NEXT: b .LBB5_6 -@@ -287,11 +284,9 @@ define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) { - ; LA64-NEXT: dbar 20 - ; LA64-NEXT: .LBB5_6: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB5_1 Depth=1 --; LA64-NEXT: addi.w $a7, $a3, 0 --; LA64-NEXT: move $a3, $a6 --; LA64-NEXT: bne $a6, $a7, .LBB5_1 -+; LA64-NEXT: bne $a3, $a7, .LBB5_1 - ; LA64-NEXT: # %bb.2: # %atomicrmw.end --; LA64-NEXT: srl.w $a0, $a6, $a0 -+; LA64-NEXT: srl.w $a0, $a3, $a0 - ; LA64-NEXT: ret - %result = atomicrmw udec_wrap ptr %ptr, i16 %val seq_cst - ret i16 %result -@@ -300,22 +295,22 @@ define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) { - define i32 @atomicrmw_udec_wrap_i32(ptr %ptr, i32 %val) { - ; LA64-LABEL: atomicrmw_udec_wrap_i32: - ; LA64: # %bb.0: --; LA64-NEXT: ld.w $a4, $a0, 0 -+; LA64-NEXT: ld.w $a2, $a0, 0 - ; LA64-NEXT: addi.w $a3, $a1, 0 - ; LA64-NEXT: .p2align 4, , 16 - ; LA64-NEXT: .LBB6_1: # %atomicrmw.start - ; LA64-NEXT: # =>This Loop Header: Depth=1 - ; LA64-NEXT: # Child Loop BB6_3 Depth 2 --; LA64-NEXT: addi.w $a5, $a4, 0 --; LA64-NEXT: sltu $a2, $a3, $a5 --; LA64-NEXT: addi.d $a6, $a4, -1 --; LA64-NEXT: masknez $a6, $a6, $a2 --; LA64-NEXT: maskeqz $a2, $a1, $a2 --; LA64-NEXT: or $a2, $a2, $a6 --; LA64-NEXT: sltui $a6, $a5, 1 --; LA64-NEXT: masknez $a2, $a2, $a6 --; LA64-NEXT: maskeqz $a6, $a1, $a6 --; LA64-NEXT: or $a6, $a6, $a2 -+; LA64-NEXT: addi.w $a4, $a2, 0 -+; LA64-NEXT: sltu $a5, $a3, $a4 -+; LA64-NEXT: addi.d $a2, $a2, -1 -+; LA64-NEXT: masknez $a2, $a2, $a5 -+; LA64-NEXT: maskeqz $a5, $a1, $a5 -+; LA64-NEXT: or $a2, $a5, $a2 -+; LA64-NEXT: sltui $a5, $a4, 1 -+; LA64-NEXT: masknez $a2, $a2, $a5 -+; LA64-NEXT: maskeqz $a5, $a1, $a5 -+; LA64-NEXT: or $a5, $a5, $a2 - ; LA64-NEXT: .LBB6_3: # %atomicrmw.start - ; LA64-NEXT: # Parent Loop BB6_1 Depth=1 - ; LA64-NEXT: # => This Inner Loop Header: Depth=2 -@@ -323,17 +318,16 @@ define i32 @atomicrmw_udec_wrap_i32(ptr %ptr, i32 %val) { - ; LA64-NEXT: bne $a2, $a4, .LBB6_5 - ; LA64-NEXT: # %bb.4: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB6_3 Depth=2 --; LA64-NEXT: move $a7, $a6 --; LA64-NEXT: sc.w $a7, $a0, 0 --; LA64-NEXT: beqz $a7, .LBB6_3 -+; LA64-NEXT: move $a6, $a5 -+; LA64-NEXT: sc.w $a6, $a0, 0 -+; LA64-NEXT: beqz $a6, .LBB6_3 - ; LA64-NEXT: b .LBB6_6 - ; LA64-NEXT: .LBB6_5: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB6_1 Depth=1 - ; LA64-NEXT: dbar 20 - ; LA64-NEXT: .LBB6_6: # %atomicrmw.start - ; LA64-NEXT: # in Loop: Header=BB6_1 Depth=1 --; LA64-NEXT: move $a4, $a2 --; LA64-NEXT: bne $a2, $a5, .LBB6_1 -+; LA64-NEXT: bne $a2, $a4, .LBB6_1 - ; LA64-NEXT: # %bb.2: # %atomicrmw.end - ; LA64-NEXT: move $a0, $a2 - ; LA64-NEXT: ret -diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll -index 1dd3f39852d8..ebb09640e6c9 100644 ---- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll -+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll -@@ -71,6 +71,7 @@ define void @cmpxchg_i16_acquire_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind - define void @cmpxchg_i32_acquire_acquire(ptr %ptr, i32 %cmp, i32 %val) nounwind { - ; LA64-LABEL: cmpxchg_i32_acquire_acquire: - ; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a1, $a1, 0 - ; LA64-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 - ; LA64-NEXT: ll.w $a3, $a0, 0 - ; LA64-NEXT: bne $a3, $a1, .LBB2_3 -@@ -176,6 +177,7 @@ define void @cmpxchg_i16_acquire_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin - define void @cmpxchg_i32_acquire_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwind { - ; LA64-LABEL: cmpxchg_i32_acquire_monotonic: - ; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a1, $a1, 0 - ; LA64-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 - ; LA64-NEXT: ll.w $a3, $a0, 0 - ; LA64-NEXT: bne $a3, $a1, .LBB6_3 -@@ -285,9 +287,10 @@ define i16 @cmpxchg_i16_acquire_acquire_reti16(ptr %ptr, i16 %cmp, i16 %val) nou - define i32 @cmpxchg_i32_acquire_acquire_reti32(ptr %ptr, i32 %cmp, i32 %val) nounwind { - ; LA64-LABEL: cmpxchg_i32_acquire_acquire_reti32: - ; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a3, $a1, 0 - ; LA64-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1 --; LA64-NEXT: ll.w $a3, $a0, 0 --; LA64-NEXT: bne $a3, $a1, .LBB10_3 -+; LA64-NEXT: ll.w $a1, $a0, 0 -+; LA64-NEXT: bne $a1, $a3, .LBB10_3 - ; LA64-NEXT: # %bb.2: # in Loop: Header=BB10_1 Depth=1 - ; LA64-NEXT: move $a4, $a2 - ; LA64-NEXT: sc.w $a4, $a0, 0 -@@ -296,7 +299,7 @@ define i32 @cmpxchg_i32_acquire_acquire_reti32(ptr %ptr, i32 %cmp, i32 %val) nou - ; LA64-NEXT: .LBB10_3: - ; LA64-NEXT: dbar 20 - ; LA64-NEXT: .LBB10_4: --; LA64-NEXT: move $a0, $a3 -+; LA64-NEXT: move $a0, $a1 - ; LA64-NEXT: ret - %tmp = cmpxchg ptr %ptr, i32 %cmp, i32 %val acquire acquire - %res = extractvalue { i32, i1 } %tmp, 0 -@@ -404,6 +407,7 @@ define i1 @cmpxchg_i16_acquire_acquire_reti1(ptr %ptr, i16 %cmp, i16 %val) nounw - define i1 @cmpxchg_i32_acquire_acquire_reti1(ptr %ptr, i32 %cmp, i32 %val) nounwind { - ; LA64-LABEL: cmpxchg_i32_acquire_acquire_reti1: - ; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a1, $a1, 0 - ; LA64-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 - ; LA64-NEXT: ll.w $a3, $a0, 0 - ; LA64-NEXT: bne $a3, $a1, .LBB14_3 -@@ -415,8 +419,7 @@ define i1 @cmpxchg_i32_acquire_acquire_reti1(ptr %ptr, i32 %cmp, i32 %val) nounw - ; LA64-NEXT: .LBB14_3: - ; LA64-NEXT: dbar 20 - ; LA64-NEXT: .LBB14_4: --; LA64-NEXT: addi.w $a0, $a1, 0 --; LA64-NEXT: xor $a0, $a3, $a0 -+; LA64-NEXT: xor $a0, $a3, $a1 - ; LA64-NEXT: sltui $a0, $a0, 1 - ; LA64-NEXT: ret - %tmp = cmpxchg ptr %ptr, i32 %cmp, i32 %val acquire acquire -@@ -516,6 +519,7 @@ define void @cmpxchg_i16_monotonic_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounw - define void @cmpxchg_i32_monotonic_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwind { - ; LA64-LABEL: cmpxchg_i32_monotonic_monotonic: - ; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a1, $a1, 0 - ; LA64-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1 - ; LA64-NEXT: ll.w $a3, $a0, 0 - ; LA64-NEXT: bne $a3, $a1, .LBB18_3 -@@ -625,9 +629,10 @@ define i16 @cmpxchg_i16_monotonic_monotonic_reti16(ptr %ptr, i16 %cmp, i16 %val) - define i32 @cmpxchg_i32_monotonic_monotonic_reti32(ptr %ptr, i32 %cmp, i32 %val) nounwind { - ; LA64-LABEL: cmpxchg_i32_monotonic_monotonic_reti32: - ; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a3, $a1, 0 - ; LA64-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1 --; LA64-NEXT: ll.w $a3, $a0, 0 --; LA64-NEXT: bne $a3, $a1, .LBB22_3 -+; LA64-NEXT: ll.w $a1, $a0, 0 -+; LA64-NEXT: bne $a1, $a3, .LBB22_3 - ; LA64-NEXT: # %bb.2: # in Loop: Header=BB22_1 Depth=1 - ; LA64-NEXT: move $a4, $a2 - ; LA64-NEXT: sc.w $a4, $a0, 0 -@@ -636,7 +641,7 @@ define i32 @cmpxchg_i32_monotonic_monotonic_reti32(ptr %ptr, i32 %cmp, i32 %val) - ; LA64-NEXT: .LBB22_3: - ; LA64-NEXT: dbar 1792 - ; LA64-NEXT: .LBB22_4: --; LA64-NEXT: move $a0, $a3 -+; LA64-NEXT: move $a0, $a1 - ; LA64-NEXT: ret - %tmp = cmpxchg ptr %ptr, i32 %cmp, i32 %val monotonic monotonic - %res = extractvalue { i32, i1 } %tmp, 0 -@@ -744,6 +749,7 @@ define i1 @cmpxchg_i16_monotonic_monotonic_reti1(ptr %ptr, i16 %cmp, i16 %val) n - define i1 @cmpxchg_i32_monotonic_monotonic_reti1(ptr %ptr, i32 %cmp, i32 %val) nounwind { - ; LA64-LABEL: cmpxchg_i32_monotonic_monotonic_reti1: - ; LA64: # %bb.0: -+; LA64-NEXT: addi.w $a1, $a1, 0 - ; LA64-NEXT: .LBB26_1: # =>This Inner Loop Header: Depth=1 - ; LA64-NEXT: ll.w $a3, $a0, 0 - ; LA64-NEXT: bne $a3, $a1, .LBB26_3 -@@ -755,8 +761,7 @@ define i1 @cmpxchg_i32_monotonic_monotonic_reti1(ptr %ptr, i32 %cmp, i32 %val) n - ; LA64-NEXT: .LBB26_3: - ; LA64-NEXT: dbar 1792 - ; LA64-NEXT: .LBB26_4: --; LA64-NEXT: addi.w $a0, $a1, 0 --; LA64-NEXT: xor $a0, $a3, $a0 -+; LA64-NEXT: xor $a0, $a3, $a1 - ; LA64-NEXT: sltui $a0, $a0, 1 - ; LA64-NEXT: ret - %tmp = cmpxchg ptr %ptr, i32 %cmp, i32 %val monotonic monotonic -diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll -index 589360823b14..4d8160d70803 100644 ---- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll -+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll -@@ -16,6 +16,7 @@ define float @float_fadd_acquire(ptr %p) nounwind { - ; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 - ; LA64F-NEXT: movfr2gr.s $a1, $fa2 - ; LA64F-NEXT: movfr2gr.s $a2, $fa0 -+; LA64F-NEXT: addi.w $a2, $a2, 0 - ; LA64F-NEXT: .LBB0_3: # %atomicrmw.start - ; LA64F-NEXT: # Parent Loop BB0_1 Depth=1 - ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 -@@ -33,8 +34,7 @@ define float @float_fadd_acquire(ptr %p) nounwind { - ; LA64F-NEXT: .LBB0_6: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB0_1 Depth=1 - ; LA64F-NEXT: movgr2fr.w $fa0, $a3 --; LA64F-NEXT: addi.w $a1, $a2, 0 --; LA64F-NEXT: bne $a3, $a1, .LBB0_1 -+; LA64F-NEXT: bne $a3, $a2, .LBB0_1 - ; LA64F-NEXT: # %bb.2: # %atomicrmw.end - ; LA64F-NEXT: ret - ; -@@ -51,6 +51,7 @@ define float @float_fadd_acquire(ptr %p) nounwind { - ; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 - ; LA64D-NEXT: movfr2gr.s $a1, $fa2 - ; LA64D-NEXT: movfr2gr.s $a2, $fa0 -+; LA64D-NEXT: addi.w $a2, $a2, 0 - ; LA64D-NEXT: .LBB0_3: # %atomicrmw.start - ; LA64D-NEXT: # Parent Loop BB0_1 Depth=1 - ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 -@@ -68,8 +69,7 @@ define float @float_fadd_acquire(ptr %p) nounwind { - ; LA64D-NEXT: .LBB0_6: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB0_1 Depth=1 - ; LA64D-NEXT: movgr2fr.w $fa0, $a3 --; LA64D-NEXT: addi.w $a1, $a2, 0 --; LA64D-NEXT: bne $a3, $a1, .LBB0_1 -+; LA64D-NEXT: bne $a3, $a2, .LBB0_1 - ; LA64D-NEXT: # %bb.2: # %atomicrmw.end - ; LA64D-NEXT: ret - %v = atomicrmw fadd ptr %p, float 1.0 acquire, align 4 -@@ -90,6 +90,7 @@ define float @float_fsub_acquire(ptr %p) nounwind { - ; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 - ; LA64F-NEXT: movfr2gr.s $a1, $fa2 - ; LA64F-NEXT: movfr2gr.s $a2, $fa0 -+; LA64F-NEXT: addi.w $a2, $a2, 0 - ; LA64F-NEXT: .LBB1_3: # %atomicrmw.start - ; LA64F-NEXT: # Parent Loop BB1_1 Depth=1 - ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 -@@ -107,8 +108,7 @@ define float @float_fsub_acquire(ptr %p) nounwind { - ; LA64F-NEXT: .LBB1_6: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB1_1 Depth=1 - ; LA64F-NEXT: movgr2fr.w $fa0, $a3 --; LA64F-NEXT: addi.w $a1, $a2, 0 --; LA64F-NEXT: bne $a3, $a1, .LBB1_1 -+; LA64F-NEXT: bne $a3, $a2, .LBB1_1 - ; LA64F-NEXT: # %bb.2: # %atomicrmw.end - ; LA64F-NEXT: ret - ; -@@ -125,6 +125,7 @@ define float @float_fsub_acquire(ptr %p) nounwind { - ; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 - ; LA64D-NEXT: movfr2gr.s $a1, $fa2 - ; LA64D-NEXT: movfr2gr.s $a2, $fa0 -+; LA64D-NEXT: addi.w $a2, $a2, 0 - ; LA64D-NEXT: .LBB1_3: # %atomicrmw.start - ; LA64D-NEXT: # Parent Loop BB1_1 Depth=1 - ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 -@@ -142,8 +143,7 @@ define float @float_fsub_acquire(ptr %p) nounwind { - ; LA64D-NEXT: .LBB1_6: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB1_1 Depth=1 - ; LA64D-NEXT: movgr2fr.w $fa0, $a3 --; LA64D-NEXT: addi.w $a1, $a2, 0 --; LA64D-NEXT: bne $a3, $a1, .LBB1_1 -+; LA64D-NEXT: bne $a3, $a2, .LBB1_1 - ; LA64D-NEXT: # %bb.2: # %atomicrmw.end - ; LA64D-NEXT: ret - %v = atomicrmw fsub ptr %p, float 1.0 acquire, align 4 -@@ -165,6 +165,7 @@ define float @float_fmin_acquire(ptr %p) nounwind { - ; LA64F-NEXT: fmin.s $fa2, $fa2, $fa1 - ; LA64F-NEXT: movfr2gr.s $a1, $fa2 - ; LA64F-NEXT: movfr2gr.s $a2, $fa0 -+; LA64F-NEXT: addi.w $a2, $a2, 0 - ; LA64F-NEXT: .LBB2_3: # %atomicrmw.start - ; LA64F-NEXT: # Parent Loop BB2_1 Depth=1 - ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 -@@ -182,8 +183,7 @@ define float @float_fmin_acquire(ptr %p) nounwind { - ; LA64F-NEXT: .LBB2_6: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB2_1 Depth=1 - ; LA64F-NEXT: movgr2fr.w $fa0, $a3 --; LA64F-NEXT: addi.w $a1, $a2, 0 --; LA64F-NEXT: bne $a3, $a1, .LBB2_1 -+; LA64F-NEXT: bne $a3, $a2, .LBB2_1 - ; LA64F-NEXT: # %bb.2: # %atomicrmw.end - ; LA64F-NEXT: ret - ; -@@ -201,6 +201,7 @@ define float @float_fmin_acquire(ptr %p) nounwind { - ; LA64D-NEXT: fmin.s $fa2, $fa2, $fa1 - ; LA64D-NEXT: movfr2gr.s $a1, $fa2 - ; LA64D-NEXT: movfr2gr.s $a2, $fa0 -+; LA64D-NEXT: addi.w $a2, $a2, 0 - ; LA64D-NEXT: .LBB2_3: # %atomicrmw.start - ; LA64D-NEXT: # Parent Loop BB2_1 Depth=1 - ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 -@@ -218,8 +219,7 @@ define float @float_fmin_acquire(ptr %p) nounwind { - ; LA64D-NEXT: .LBB2_6: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB2_1 Depth=1 - ; LA64D-NEXT: movgr2fr.w $fa0, $a3 --; LA64D-NEXT: addi.w $a1, $a2, 0 --; LA64D-NEXT: bne $a3, $a1, .LBB2_1 -+; LA64D-NEXT: bne $a3, $a2, .LBB2_1 - ; LA64D-NEXT: # %bb.2: # %atomicrmw.end - ; LA64D-NEXT: ret - %v = atomicrmw fmin ptr %p, float 1.0 acquire, align 4 -@@ -241,6 +241,7 @@ define float @float_fmax_acquire(ptr %p) nounwind { - ; LA64F-NEXT: fmax.s $fa2, $fa2, $fa1 - ; LA64F-NEXT: movfr2gr.s $a1, $fa2 - ; LA64F-NEXT: movfr2gr.s $a2, $fa0 -+; LA64F-NEXT: addi.w $a2, $a2, 0 - ; LA64F-NEXT: .LBB3_3: # %atomicrmw.start - ; LA64F-NEXT: # Parent Loop BB3_1 Depth=1 - ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 -@@ -258,8 +259,7 @@ define float @float_fmax_acquire(ptr %p) nounwind { - ; LA64F-NEXT: .LBB3_6: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB3_1 Depth=1 - ; LA64F-NEXT: movgr2fr.w $fa0, $a3 --; LA64F-NEXT: addi.w $a1, $a2, 0 --; LA64F-NEXT: bne $a3, $a1, .LBB3_1 -+; LA64F-NEXT: bne $a3, $a2, .LBB3_1 - ; LA64F-NEXT: # %bb.2: # %atomicrmw.end - ; LA64F-NEXT: ret - ; -@@ -277,6 +277,7 @@ define float @float_fmax_acquire(ptr %p) nounwind { - ; LA64D-NEXT: fmax.s $fa2, $fa2, $fa1 - ; LA64D-NEXT: movfr2gr.s $a1, $fa2 - ; LA64D-NEXT: movfr2gr.s $a2, $fa0 -+; LA64D-NEXT: addi.w $a2, $a2, 0 - ; LA64D-NEXT: .LBB3_3: # %atomicrmw.start - ; LA64D-NEXT: # Parent Loop BB3_1 Depth=1 - ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 -@@ -294,8 +295,7 @@ define float @float_fmax_acquire(ptr %p) nounwind { - ; LA64D-NEXT: .LBB3_6: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB3_1 Depth=1 - ; LA64D-NEXT: movgr2fr.w $fa0, $a3 --; LA64D-NEXT: addi.w $a1, $a2, 0 --; LA64D-NEXT: bne $a3, $a1, .LBB3_1 -+; LA64D-NEXT: bne $a3, $a2, .LBB3_1 - ; LA64D-NEXT: # %bb.2: # %atomicrmw.end - ; LA64D-NEXT: ret - %v = atomicrmw fmax ptr %p, float 1.0 acquire, align 4 -@@ -694,6 +694,7 @@ define float @float_fadd_release(ptr %p) nounwind { - ; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 - ; LA64F-NEXT: movfr2gr.s $a1, $fa2 - ; LA64F-NEXT: movfr2gr.s $a2, $fa0 -+; LA64F-NEXT: addi.w $a2, $a2, 0 - ; LA64F-NEXT: .LBB8_3: # %atomicrmw.start - ; LA64F-NEXT: # Parent Loop BB8_1 Depth=1 - ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 -@@ -711,8 +712,7 @@ define float @float_fadd_release(ptr %p) nounwind { - ; LA64F-NEXT: .LBB8_6: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB8_1 Depth=1 - ; LA64F-NEXT: movgr2fr.w $fa0, $a3 --; LA64F-NEXT: addi.w $a1, $a2, 0 --; LA64F-NEXT: bne $a3, $a1, .LBB8_1 -+; LA64F-NEXT: bne $a3, $a2, .LBB8_1 - ; LA64F-NEXT: # %bb.2: # %atomicrmw.end - ; LA64F-NEXT: ret - ; -@@ -729,6 +729,7 @@ define float @float_fadd_release(ptr %p) nounwind { - ; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 - ; LA64D-NEXT: movfr2gr.s $a1, $fa2 - ; LA64D-NEXT: movfr2gr.s $a2, $fa0 -+; LA64D-NEXT: addi.w $a2, $a2, 0 - ; LA64D-NEXT: .LBB8_3: # %atomicrmw.start - ; LA64D-NEXT: # Parent Loop BB8_1 Depth=1 - ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 -@@ -746,8 +747,7 @@ define float @float_fadd_release(ptr %p) nounwind { - ; LA64D-NEXT: .LBB8_6: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB8_1 Depth=1 - ; LA64D-NEXT: movgr2fr.w $fa0, $a3 --; LA64D-NEXT: addi.w $a1, $a2, 0 --; LA64D-NEXT: bne $a3, $a1, .LBB8_1 -+; LA64D-NEXT: bne $a3, $a2, .LBB8_1 - ; LA64D-NEXT: # %bb.2: # %atomicrmw.end - ; LA64D-NEXT: ret - %v = atomicrmw fadd ptr %p, float 1.0 release, align 4 -@@ -768,6 +768,7 @@ define float @float_fsub_release(ptr %p) nounwind { - ; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 - ; LA64F-NEXT: movfr2gr.s $a1, $fa2 - ; LA64F-NEXT: movfr2gr.s $a2, $fa0 -+; LA64F-NEXT: addi.w $a2, $a2, 0 - ; LA64F-NEXT: .LBB9_3: # %atomicrmw.start - ; LA64F-NEXT: # Parent Loop BB9_1 Depth=1 - ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 -@@ -785,8 +786,7 @@ define float @float_fsub_release(ptr %p) nounwind { - ; LA64F-NEXT: .LBB9_6: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB9_1 Depth=1 - ; LA64F-NEXT: movgr2fr.w $fa0, $a3 --; LA64F-NEXT: addi.w $a1, $a2, 0 --; LA64F-NEXT: bne $a3, $a1, .LBB9_1 -+; LA64F-NEXT: bne $a3, $a2, .LBB9_1 - ; LA64F-NEXT: # %bb.2: # %atomicrmw.end - ; LA64F-NEXT: ret - ; -@@ -803,6 +803,7 @@ define float @float_fsub_release(ptr %p) nounwind { - ; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 - ; LA64D-NEXT: movfr2gr.s $a1, $fa2 - ; LA64D-NEXT: movfr2gr.s $a2, $fa0 -+; LA64D-NEXT: addi.w $a2, $a2, 0 - ; LA64D-NEXT: .LBB9_3: # %atomicrmw.start - ; LA64D-NEXT: # Parent Loop BB9_1 Depth=1 - ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 -@@ -820,8 +821,7 @@ define float @float_fsub_release(ptr %p) nounwind { - ; LA64D-NEXT: .LBB9_6: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB9_1 Depth=1 - ; LA64D-NEXT: movgr2fr.w $fa0, $a3 --; LA64D-NEXT: addi.w $a1, $a2, 0 --; LA64D-NEXT: bne $a3, $a1, .LBB9_1 -+; LA64D-NEXT: bne $a3, $a2, .LBB9_1 - ; LA64D-NEXT: # %bb.2: # %atomicrmw.end - ; LA64D-NEXT: ret - %v = atomicrmw fsub ptr %p, float 1.0 release, align 4 -@@ -843,6 +843,7 @@ define float @float_fmin_release(ptr %p) nounwind { - ; LA64F-NEXT: fmin.s $fa2, $fa2, $fa1 - ; LA64F-NEXT: movfr2gr.s $a1, $fa2 - ; LA64F-NEXT: movfr2gr.s $a2, $fa0 -+; LA64F-NEXT: addi.w $a2, $a2, 0 - ; LA64F-NEXT: .LBB10_3: # %atomicrmw.start - ; LA64F-NEXT: # Parent Loop BB10_1 Depth=1 - ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 -@@ -860,8 +861,7 @@ define float @float_fmin_release(ptr %p) nounwind { - ; LA64F-NEXT: .LBB10_6: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB10_1 Depth=1 - ; LA64F-NEXT: movgr2fr.w $fa0, $a3 --; LA64F-NEXT: addi.w $a1, $a2, 0 --; LA64F-NEXT: bne $a3, $a1, .LBB10_1 -+; LA64F-NEXT: bne $a3, $a2, .LBB10_1 - ; LA64F-NEXT: # %bb.2: # %atomicrmw.end - ; LA64F-NEXT: ret - ; -@@ -879,6 +879,7 @@ define float @float_fmin_release(ptr %p) nounwind { - ; LA64D-NEXT: fmin.s $fa2, $fa2, $fa1 - ; LA64D-NEXT: movfr2gr.s $a1, $fa2 - ; LA64D-NEXT: movfr2gr.s $a2, $fa0 -+; LA64D-NEXT: addi.w $a2, $a2, 0 - ; LA64D-NEXT: .LBB10_3: # %atomicrmw.start - ; LA64D-NEXT: # Parent Loop BB10_1 Depth=1 - ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 -@@ -896,8 +897,7 @@ define float @float_fmin_release(ptr %p) nounwind { - ; LA64D-NEXT: .LBB10_6: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB10_1 Depth=1 - ; LA64D-NEXT: movgr2fr.w $fa0, $a3 --; LA64D-NEXT: addi.w $a1, $a2, 0 --; LA64D-NEXT: bne $a3, $a1, .LBB10_1 -+; LA64D-NEXT: bne $a3, $a2, .LBB10_1 - ; LA64D-NEXT: # %bb.2: # %atomicrmw.end - ; LA64D-NEXT: ret - %v = atomicrmw fmin ptr %p, float 1.0 release, align 4 -@@ -919,6 +919,7 @@ define float @float_fmax_release(ptr %p) nounwind { - ; LA64F-NEXT: fmax.s $fa2, $fa2, $fa1 - ; LA64F-NEXT: movfr2gr.s $a1, $fa2 - ; LA64F-NEXT: movfr2gr.s $a2, $fa0 -+; LA64F-NEXT: addi.w $a2, $a2, 0 - ; LA64F-NEXT: .LBB11_3: # %atomicrmw.start - ; LA64F-NEXT: # Parent Loop BB11_1 Depth=1 - ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 -@@ -936,8 +937,7 @@ define float @float_fmax_release(ptr %p) nounwind { - ; LA64F-NEXT: .LBB11_6: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB11_1 Depth=1 - ; LA64F-NEXT: movgr2fr.w $fa0, $a3 --; LA64F-NEXT: addi.w $a1, $a2, 0 --; LA64F-NEXT: bne $a3, $a1, .LBB11_1 -+; LA64F-NEXT: bne $a3, $a2, .LBB11_1 - ; LA64F-NEXT: # %bb.2: # %atomicrmw.end - ; LA64F-NEXT: ret - ; -@@ -955,6 +955,7 @@ define float @float_fmax_release(ptr %p) nounwind { - ; LA64D-NEXT: fmax.s $fa2, $fa2, $fa1 - ; LA64D-NEXT: movfr2gr.s $a1, $fa2 - ; LA64D-NEXT: movfr2gr.s $a2, $fa0 -+; LA64D-NEXT: addi.w $a2, $a2, 0 - ; LA64D-NEXT: .LBB11_3: # %atomicrmw.start - ; LA64D-NEXT: # Parent Loop BB11_1 Depth=1 - ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 -@@ -972,8 +973,7 @@ define float @float_fmax_release(ptr %p) nounwind { - ; LA64D-NEXT: .LBB11_6: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB11_1 Depth=1 - ; LA64D-NEXT: movgr2fr.w $fa0, $a3 --; LA64D-NEXT: addi.w $a1, $a2, 0 --; LA64D-NEXT: bne $a3, $a1, .LBB11_1 -+; LA64D-NEXT: bne $a3, $a2, .LBB11_1 - ; LA64D-NEXT: # %bb.2: # %atomicrmw.end - ; LA64D-NEXT: ret - %v = atomicrmw fmax ptr %p, float 1.0 release, align 4 -@@ -1372,6 +1372,7 @@ define float @float_fadd_acq_rel(ptr %p) nounwind { - ; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 - ; LA64F-NEXT: movfr2gr.s $a1, $fa2 - ; LA64F-NEXT: movfr2gr.s $a2, $fa0 -+; LA64F-NEXT: addi.w $a2, $a2, 0 - ; LA64F-NEXT: .LBB16_3: # %atomicrmw.start - ; LA64F-NEXT: # Parent Loop BB16_1 Depth=1 - ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 -@@ -1389,8 +1390,7 @@ define float @float_fadd_acq_rel(ptr %p) nounwind { - ; LA64F-NEXT: .LBB16_6: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB16_1 Depth=1 - ; LA64F-NEXT: movgr2fr.w $fa0, $a3 --; LA64F-NEXT: addi.w $a1, $a2, 0 --; LA64F-NEXT: bne $a3, $a1, .LBB16_1 -+; LA64F-NEXT: bne $a3, $a2, .LBB16_1 - ; LA64F-NEXT: # %bb.2: # %atomicrmw.end - ; LA64F-NEXT: ret - ; -@@ -1407,6 +1407,7 @@ define float @float_fadd_acq_rel(ptr %p) nounwind { - ; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 - ; LA64D-NEXT: movfr2gr.s $a1, $fa2 - ; LA64D-NEXT: movfr2gr.s $a2, $fa0 -+; LA64D-NEXT: addi.w $a2, $a2, 0 - ; LA64D-NEXT: .LBB16_3: # %atomicrmw.start - ; LA64D-NEXT: # Parent Loop BB16_1 Depth=1 - ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 -@@ -1424,8 +1425,7 @@ define float @float_fadd_acq_rel(ptr %p) nounwind { - ; LA64D-NEXT: .LBB16_6: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB16_1 Depth=1 - ; LA64D-NEXT: movgr2fr.w $fa0, $a3 --; LA64D-NEXT: addi.w $a1, $a2, 0 --; LA64D-NEXT: bne $a3, $a1, .LBB16_1 -+; LA64D-NEXT: bne $a3, $a2, .LBB16_1 - ; LA64D-NEXT: # %bb.2: # %atomicrmw.end - ; LA64D-NEXT: ret - %v = atomicrmw fadd ptr %p, float 1.0 acq_rel, align 4 -@@ -1446,6 +1446,7 @@ define float @float_fsub_acq_rel(ptr %p) nounwind { - ; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 - ; LA64F-NEXT: movfr2gr.s $a1, $fa2 - ; LA64F-NEXT: movfr2gr.s $a2, $fa0 -+; LA64F-NEXT: addi.w $a2, $a2, 0 - ; LA64F-NEXT: .LBB17_3: # %atomicrmw.start - ; LA64F-NEXT: # Parent Loop BB17_1 Depth=1 - ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 -@@ -1463,8 +1464,7 @@ define float @float_fsub_acq_rel(ptr %p) nounwind { - ; LA64F-NEXT: .LBB17_6: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB17_1 Depth=1 - ; LA64F-NEXT: movgr2fr.w $fa0, $a3 --; LA64F-NEXT: addi.w $a1, $a2, 0 --; LA64F-NEXT: bne $a3, $a1, .LBB17_1 -+; LA64F-NEXT: bne $a3, $a2, .LBB17_1 - ; LA64F-NEXT: # %bb.2: # %atomicrmw.end - ; LA64F-NEXT: ret - ; -@@ -1481,6 +1481,7 @@ define float @float_fsub_acq_rel(ptr %p) nounwind { - ; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 - ; LA64D-NEXT: movfr2gr.s $a1, $fa2 - ; LA64D-NEXT: movfr2gr.s $a2, $fa0 -+; LA64D-NEXT: addi.w $a2, $a2, 0 - ; LA64D-NEXT: .LBB17_3: # %atomicrmw.start - ; LA64D-NEXT: # Parent Loop BB17_1 Depth=1 - ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 -@@ -1498,8 +1499,7 @@ define float @float_fsub_acq_rel(ptr %p) nounwind { - ; LA64D-NEXT: .LBB17_6: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB17_1 Depth=1 - ; LA64D-NEXT: movgr2fr.w $fa0, $a3 --; LA64D-NEXT: addi.w $a1, $a2, 0 --; LA64D-NEXT: bne $a3, $a1, .LBB17_1 -+; LA64D-NEXT: bne $a3, $a2, .LBB17_1 - ; LA64D-NEXT: # %bb.2: # %atomicrmw.end - ; LA64D-NEXT: ret - %v = atomicrmw fsub ptr %p, float 1.0 acq_rel, align 4 -@@ -1521,6 +1521,7 @@ define float @float_fmin_acq_rel(ptr %p) nounwind { - ; LA64F-NEXT: fmin.s $fa2, $fa2, $fa1 - ; LA64F-NEXT: movfr2gr.s $a1, $fa2 - ; LA64F-NEXT: movfr2gr.s $a2, $fa0 -+; LA64F-NEXT: addi.w $a2, $a2, 0 - ; LA64F-NEXT: .LBB18_3: # %atomicrmw.start - ; LA64F-NEXT: # Parent Loop BB18_1 Depth=1 - ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 -@@ -1538,8 +1539,7 @@ define float @float_fmin_acq_rel(ptr %p) nounwind { - ; LA64F-NEXT: .LBB18_6: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB18_1 Depth=1 - ; LA64F-NEXT: movgr2fr.w $fa0, $a3 --; LA64F-NEXT: addi.w $a1, $a2, 0 --; LA64F-NEXT: bne $a3, $a1, .LBB18_1 -+; LA64F-NEXT: bne $a3, $a2, .LBB18_1 - ; LA64F-NEXT: # %bb.2: # %atomicrmw.end - ; LA64F-NEXT: ret - ; -@@ -1557,6 +1557,7 @@ define float @float_fmin_acq_rel(ptr %p) nounwind { - ; LA64D-NEXT: fmin.s $fa2, $fa2, $fa1 - ; LA64D-NEXT: movfr2gr.s $a1, $fa2 - ; LA64D-NEXT: movfr2gr.s $a2, $fa0 -+; LA64D-NEXT: addi.w $a2, $a2, 0 - ; LA64D-NEXT: .LBB18_3: # %atomicrmw.start - ; LA64D-NEXT: # Parent Loop BB18_1 Depth=1 - ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 -@@ -1574,8 +1575,7 @@ define float @float_fmin_acq_rel(ptr %p) nounwind { - ; LA64D-NEXT: .LBB18_6: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB18_1 Depth=1 - ; LA64D-NEXT: movgr2fr.w $fa0, $a3 --; LA64D-NEXT: addi.w $a1, $a2, 0 --; LA64D-NEXT: bne $a3, $a1, .LBB18_1 -+; LA64D-NEXT: bne $a3, $a2, .LBB18_1 - ; LA64D-NEXT: # %bb.2: # %atomicrmw.end - ; LA64D-NEXT: ret - %v = atomicrmw fmin ptr %p, float 1.0 acq_rel, align 4 -@@ -1597,6 +1597,7 @@ define float @float_fmax_acq_rel(ptr %p) nounwind { - ; LA64F-NEXT: fmax.s $fa2, $fa2, $fa1 - ; LA64F-NEXT: movfr2gr.s $a1, $fa2 - ; LA64F-NEXT: movfr2gr.s $a2, $fa0 -+; LA64F-NEXT: addi.w $a2, $a2, 0 - ; LA64F-NEXT: .LBB19_3: # %atomicrmw.start - ; LA64F-NEXT: # Parent Loop BB19_1 Depth=1 - ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 -@@ -1614,8 +1615,7 @@ define float @float_fmax_acq_rel(ptr %p) nounwind { - ; LA64F-NEXT: .LBB19_6: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB19_1 Depth=1 - ; LA64F-NEXT: movgr2fr.w $fa0, $a3 --; LA64F-NEXT: addi.w $a1, $a2, 0 --; LA64F-NEXT: bne $a3, $a1, .LBB19_1 -+; LA64F-NEXT: bne $a3, $a2, .LBB19_1 - ; LA64F-NEXT: # %bb.2: # %atomicrmw.end - ; LA64F-NEXT: ret - ; -@@ -1633,6 +1633,7 @@ define float @float_fmax_acq_rel(ptr %p) nounwind { - ; LA64D-NEXT: fmax.s $fa2, $fa2, $fa1 - ; LA64D-NEXT: movfr2gr.s $a1, $fa2 - ; LA64D-NEXT: movfr2gr.s $a2, $fa0 -+; LA64D-NEXT: addi.w $a2, $a2, 0 - ; LA64D-NEXT: .LBB19_3: # %atomicrmw.start - ; LA64D-NEXT: # Parent Loop BB19_1 Depth=1 - ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 -@@ -1650,8 +1651,7 @@ define float @float_fmax_acq_rel(ptr %p) nounwind { - ; LA64D-NEXT: .LBB19_6: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB19_1 Depth=1 - ; LA64D-NEXT: movgr2fr.w $fa0, $a3 --; LA64D-NEXT: addi.w $a1, $a2, 0 --; LA64D-NEXT: bne $a3, $a1, .LBB19_1 -+; LA64D-NEXT: bne $a3, $a2, .LBB19_1 - ; LA64D-NEXT: # %bb.2: # %atomicrmw.end - ; LA64D-NEXT: ret - %v = atomicrmw fmax ptr %p, float 1.0 acq_rel, align 4 -@@ -2074,6 +2074,7 @@ define float @float_fadd_seq_cst(ptr %p) nounwind { - ; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 - ; LA64F-NEXT: movfr2gr.s $a1, $fa2 - ; LA64F-NEXT: movfr2gr.s $a2, $fa0 -+; LA64F-NEXT: addi.w $a2, $a2, 0 - ; LA64F-NEXT: .LBB24_3: # %atomicrmw.start - ; LA64F-NEXT: # Parent Loop BB24_1 Depth=1 - ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 -@@ -2091,8 +2092,7 @@ define float @float_fadd_seq_cst(ptr %p) nounwind { - ; LA64F-NEXT: .LBB24_6: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB24_1 Depth=1 - ; LA64F-NEXT: movgr2fr.w $fa0, $a3 --; LA64F-NEXT: addi.w $a1, $a2, 0 --; LA64F-NEXT: bne $a3, $a1, .LBB24_1 -+; LA64F-NEXT: bne $a3, $a2, .LBB24_1 - ; LA64F-NEXT: # %bb.2: # %atomicrmw.end - ; LA64F-NEXT: ret - ; -@@ -2109,6 +2109,7 @@ define float @float_fadd_seq_cst(ptr %p) nounwind { - ; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 - ; LA64D-NEXT: movfr2gr.s $a1, $fa2 - ; LA64D-NEXT: movfr2gr.s $a2, $fa0 -+; LA64D-NEXT: addi.w $a2, $a2, 0 - ; LA64D-NEXT: .LBB24_3: # %atomicrmw.start - ; LA64D-NEXT: # Parent Loop BB24_1 Depth=1 - ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 -@@ -2126,8 +2127,7 @@ define float @float_fadd_seq_cst(ptr %p) nounwind { - ; LA64D-NEXT: .LBB24_6: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB24_1 Depth=1 - ; LA64D-NEXT: movgr2fr.w $fa0, $a3 --; LA64D-NEXT: addi.w $a1, $a2, 0 --; LA64D-NEXT: bne $a3, $a1, .LBB24_1 -+; LA64D-NEXT: bne $a3, $a2, .LBB24_1 - ; LA64D-NEXT: # %bb.2: # %atomicrmw.end - ; LA64D-NEXT: ret - %v = atomicrmw fadd ptr %p, float 1.0 seq_cst, align 4 -@@ -2148,6 +2148,7 @@ define float @float_fsub_seq_cst(ptr %p) nounwind { - ; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 - ; LA64F-NEXT: movfr2gr.s $a1, $fa2 - ; LA64F-NEXT: movfr2gr.s $a2, $fa0 -+; LA64F-NEXT: addi.w $a2, $a2, 0 - ; LA64F-NEXT: .LBB25_3: # %atomicrmw.start - ; LA64F-NEXT: # Parent Loop BB25_1 Depth=1 - ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 -@@ -2165,8 +2166,7 @@ define float @float_fsub_seq_cst(ptr %p) nounwind { - ; LA64F-NEXT: .LBB25_6: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB25_1 Depth=1 - ; LA64F-NEXT: movgr2fr.w $fa0, $a3 --; LA64F-NEXT: addi.w $a1, $a2, 0 --; LA64F-NEXT: bne $a3, $a1, .LBB25_1 -+; LA64F-NEXT: bne $a3, $a2, .LBB25_1 - ; LA64F-NEXT: # %bb.2: # %atomicrmw.end - ; LA64F-NEXT: ret - ; -@@ -2183,6 +2183,7 @@ define float @float_fsub_seq_cst(ptr %p) nounwind { - ; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 - ; LA64D-NEXT: movfr2gr.s $a1, $fa2 - ; LA64D-NEXT: movfr2gr.s $a2, $fa0 -+; LA64D-NEXT: addi.w $a2, $a2, 0 - ; LA64D-NEXT: .LBB25_3: # %atomicrmw.start - ; LA64D-NEXT: # Parent Loop BB25_1 Depth=1 - ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 -@@ -2200,8 +2201,7 @@ define float @float_fsub_seq_cst(ptr %p) nounwind { - ; LA64D-NEXT: .LBB25_6: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB25_1 Depth=1 - ; LA64D-NEXT: movgr2fr.w $fa0, $a3 --; LA64D-NEXT: addi.w $a1, $a2, 0 --; LA64D-NEXT: bne $a3, $a1, .LBB25_1 -+; LA64D-NEXT: bne $a3, $a2, .LBB25_1 - ; LA64D-NEXT: # %bb.2: # %atomicrmw.end - ; LA64D-NEXT: ret - %v = atomicrmw fsub ptr %p, float 1.0 seq_cst, align 4 -@@ -2223,6 +2223,7 @@ define float @float_fmin_seq_cst(ptr %p) nounwind { - ; LA64F-NEXT: fmin.s $fa2, $fa2, $fa1 - ; LA64F-NEXT: movfr2gr.s $a1, $fa2 - ; LA64F-NEXT: movfr2gr.s $a2, $fa0 -+; LA64F-NEXT: addi.w $a2, $a2, 0 - ; LA64F-NEXT: .LBB26_3: # %atomicrmw.start - ; LA64F-NEXT: # Parent Loop BB26_1 Depth=1 - ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 -@@ -2240,8 +2241,7 @@ define float @float_fmin_seq_cst(ptr %p) nounwind { - ; LA64F-NEXT: .LBB26_6: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB26_1 Depth=1 - ; LA64F-NEXT: movgr2fr.w $fa0, $a3 --; LA64F-NEXT: addi.w $a1, $a2, 0 --; LA64F-NEXT: bne $a3, $a1, .LBB26_1 -+; LA64F-NEXT: bne $a3, $a2, .LBB26_1 - ; LA64F-NEXT: # %bb.2: # %atomicrmw.end - ; LA64F-NEXT: ret - ; -@@ -2259,6 +2259,7 @@ define float @float_fmin_seq_cst(ptr %p) nounwind { - ; LA64D-NEXT: fmin.s $fa2, $fa2, $fa1 - ; LA64D-NEXT: movfr2gr.s $a1, $fa2 - ; LA64D-NEXT: movfr2gr.s $a2, $fa0 -+; LA64D-NEXT: addi.w $a2, $a2, 0 - ; LA64D-NEXT: .LBB26_3: # %atomicrmw.start - ; LA64D-NEXT: # Parent Loop BB26_1 Depth=1 - ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 -@@ -2276,8 +2277,7 @@ define float @float_fmin_seq_cst(ptr %p) nounwind { - ; LA64D-NEXT: .LBB26_6: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB26_1 Depth=1 - ; LA64D-NEXT: movgr2fr.w $fa0, $a3 --; LA64D-NEXT: addi.w $a1, $a2, 0 --; LA64D-NEXT: bne $a3, $a1, .LBB26_1 -+; LA64D-NEXT: bne $a3, $a2, .LBB26_1 - ; LA64D-NEXT: # %bb.2: # %atomicrmw.end - ; LA64D-NEXT: ret - %v = atomicrmw fmin ptr %p, float 1.0 seq_cst, align 4 -@@ -2299,6 +2299,7 @@ define float @float_fmax_seq_cst(ptr %p) nounwind { - ; LA64F-NEXT: fmax.s $fa2, $fa2, $fa1 - ; LA64F-NEXT: movfr2gr.s $a1, $fa2 - ; LA64F-NEXT: movfr2gr.s $a2, $fa0 -+; LA64F-NEXT: addi.w $a2, $a2, 0 - ; LA64F-NEXT: .LBB27_3: # %atomicrmw.start - ; LA64F-NEXT: # Parent Loop BB27_1 Depth=1 - ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 -@@ -2316,8 +2317,7 @@ define float @float_fmax_seq_cst(ptr %p) nounwind { - ; LA64F-NEXT: .LBB27_6: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB27_1 Depth=1 - ; LA64F-NEXT: movgr2fr.w $fa0, $a3 --; LA64F-NEXT: addi.w $a1, $a2, 0 --; LA64F-NEXT: bne $a3, $a1, .LBB27_1 -+; LA64F-NEXT: bne $a3, $a2, .LBB27_1 - ; LA64F-NEXT: # %bb.2: # %atomicrmw.end - ; LA64F-NEXT: ret - ; -@@ -2335,6 +2335,7 @@ define float @float_fmax_seq_cst(ptr %p) nounwind { - ; LA64D-NEXT: fmax.s $fa2, $fa2, $fa1 - ; LA64D-NEXT: movfr2gr.s $a1, $fa2 - ; LA64D-NEXT: movfr2gr.s $a2, $fa0 -+; LA64D-NEXT: addi.w $a2, $a2, 0 - ; LA64D-NEXT: .LBB27_3: # %atomicrmw.start - ; LA64D-NEXT: # Parent Loop BB27_1 Depth=1 - ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 -@@ -2352,8 +2353,7 @@ define float @float_fmax_seq_cst(ptr %p) nounwind { - ; LA64D-NEXT: .LBB27_6: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB27_1 Depth=1 - ; LA64D-NEXT: movgr2fr.w $fa0, $a3 --; LA64D-NEXT: addi.w $a1, $a2, 0 --; LA64D-NEXT: bne $a3, $a1, .LBB27_1 -+; LA64D-NEXT: bne $a3, $a2, .LBB27_1 - ; LA64D-NEXT: # %bb.2: # %atomicrmw.end - ; LA64D-NEXT: ret - %v = atomicrmw fmax ptr %p, float 1.0 seq_cst, align 4 -@@ -2752,6 +2752,7 @@ define float @float_fadd_monotonic(ptr %p) nounwind { - ; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 - ; LA64F-NEXT: movfr2gr.s $a1, $fa2 - ; LA64F-NEXT: movfr2gr.s $a2, $fa0 -+; LA64F-NEXT: addi.w $a2, $a2, 0 - ; LA64F-NEXT: .LBB32_3: # %atomicrmw.start - ; LA64F-NEXT: # Parent Loop BB32_1 Depth=1 - ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 -@@ -2769,8 +2770,7 @@ define float @float_fadd_monotonic(ptr %p) nounwind { - ; LA64F-NEXT: .LBB32_6: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB32_1 Depth=1 - ; LA64F-NEXT: movgr2fr.w $fa0, $a3 --; LA64F-NEXT: addi.w $a1, $a2, 0 --; LA64F-NEXT: bne $a3, $a1, .LBB32_1 -+; LA64F-NEXT: bne $a3, $a2, .LBB32_1 - ; LA64F-NEXT: # %bb.2: # %atomicrmw.end - ; LA64F-NEXT: ret - ; -@@ -2787,6 +2787,7 @@ define float @float_fadd_monotonic(ptr %p) nounwind { - ; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 - ; LA64D-NEXT: movfr2gr.s $a1, $fa2 - ; LA64D-NEXT: movfr2gr.s $a2, $fa0 -+; LA64D-NEXT: addi.w $a2, $a2, 0 - ; LA64D-NEXT: .LBB32_3: # %atomicrmw.start - ; LA64D-NEXT: # Parent Loop BB32_1 Depth=1 - ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 -@@ -2804,8 +2805,7 @@ define float @float_fadd_monotonic(ptr %p) nounwind { - ; LA64D-NEXT: .LBB32_6: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB32_1 Depth=1 - ; LA64D-NEXT: movgr2fr.w $fa0, $a3 --; LA64D-NEXT: addi.w $a1, $a2, 0 --; LA64D-NEXT: bne $a3, $a1, .LBB32_1 -+; LA64D-NEXT: bne $a3, $a2, .LBB32_1 - ; LA64D-NEXT: # %bb.2: # %atomicrmw.end - ; LA64D-NEXT: ret - %v = atomicrmw fadd ptr %p, float 1.0 monotonic, align 4 -@@ -2826,6 +2826,7 @@ define float @float_fsub_monotonic(ptr %p) nounwind { - ; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 - ; LA64F-NEXT: movfr2gr.s $a1, $fa2 - ; LA64F-NEXT: movfr2gr.s $a2, $fa0 -+; LA64F-NEXT: addi.w $a2, $a2, 0 - ; LA64F-NEXT: .LBB33_3: # %atomicrmw.start - ; LA64F-NEXT: # Parent Loop BB33_1 Depth=1 - ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 -@@ -2843,8 +2844,7 @@ define float @float_fsub_monotonic(ptr %p) nounwind { - ; LA64F-NEXT: .LBB33_6: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB33_1 Depth=1 - ; LA64F-NEXT: movgr2fr.w $fa0, $a3 --; LA64F-NEXT: addi.w $a1, $a2, 0 --; LA64F-NEXT: bne $a3, $a1, .LBB33_1 -+; LA64F-NEXT: bne $a3, $a2, .LBB33_1 - ; LA64F-NEXT: # %bb.2: # %atomicrmw.end - ; LA64F-NEXT: ret - ; -@@ -2861,6 +2861,7 @@ define float @float_fsub_monotonic(ptr %p) nounwind { - ; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 - ; LA64D-NEXT: movfr2gr.s $a1, $fa2 - ; LA64D-NEXT: movfr2gr.s $a2, $fa0 -+; LA64D-NEXT: addi.w $a2, $a2, 0 - ; LA64D-NEXT: .LBB33_3: # %atomicrmw.start - ; LA64D-NEXT: # Parent Loop BB33_1 Depth=1 - ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 -@@ -2878,8 +2879,7 @@ define float @float_fsub_monotonic(ptr %p) nounwind { - ; LA64D-NEXT: .LBB33_6: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB33_1 Depth=1 - ; LA64D-NEXT: movgr2fr.w $fa0, $a3 --; LA64D-NEXT: addi.w $a1, $a2, 0 --; LA64D-NEXT: bne $a3, $a1, .LBB33_1 -+; LA64D-NEXT: bne $a3, $a2, .LBB33_1 - ; LA64D-NEXT: # %bb.2: # %atomicrmw.end - ; LA64D-NEXT: ret - %v = atomicrmw fsub ptr %p, float 1.0 monotonic, align 4 -@@ -2901,6 +2901,7 @@ define float @float_fmin_monotonic(ptr %p) nounwind { - ; LA64F-NEXT: fmin.s $fa2, $fa2, $fa1 - ; LA64F-NEXT: movfr2gr.s $a1, $fa2 - ; LA64F-NEXT: movfr2gr.s $a2, $fa0 -+; LA64F-NEXT: addi.w $a2, $a2, 0 - ; LA64F-NEXT: .LBB34_3: # %atomicrmw.start - ; LA64F-NEXT: # Parent Loop BB34_1 Depth=1 - ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 -@@ -2918,8 +2919,7 @@ define float @float_fmin_monotonic(ptr %p) nounwind { - ; LA64F-NEXT: .LBB34_6: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB34_1 Depth=1 - ; LA64F-NEXT: movgr2fr.w $fa0, $a3 --; LA64F-NEXT: addi.w $a1, $a2, 0 --; LA64F-NEXT: bne $a3, $a1, .LBB34_1 -+; LA64F-NEXT: bne $a3, $a2, .LBB34_1 - ; LA64F-NEXT: # %bb.2: # %atomicrmw.end - ; LA64F-NEXT: ret - ; -@@ -2937,6 +2937,7 @@ define float @float_fmin_monotonic(ptr %p) nounwind { - ; LA64D-NEXT: fmin.s $fa2, $fa2, $fa1 - ; LA64D-NEXT: movfr2gr.s $a1, $fa2 - ; LA64D-NEXT: movfr2gr.s $a2, $fa0 -+; LA64D-NEXT: addi.w $a2, $a2, 0 - ; LA64D-NEXT: .LBB34_3: # %atomicrmw.start - ; LA64D-NEXT: # Parent Loop BB34_1 Depth=1 - ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 -@@ -2954,8 +2955,7 @@ define float @float_fmin_monotonic(ptr %p) nounwind { - ; LA64D-NEXT: .LBB34_6: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB34_1 Depth=1 - ; LA64D-NEXT: movgr2fr.w $fa0, $a3 --; LA64D-NEXT: addi.w $a1, $a2, 0 --; LA64D-NEXT: bne $a3, $a1, .LBB34_1 -+; LA64D-NEXT: bne $a3, $a2, .LBB34_1 - ; LA64D-NEXT: # %bb.2: # %atomicrmw.end - ; LA64D-NEXT: ret - %v = atomicrmw fmin ptr %p, float 1.0 monotonic, align 4 -@@ -2977,6 +2977,7 @@ define float @float_fmax_monotonic(ptr %p) nounwind { - ; LA64F-NEXT: fmax.s $fa2, $fa2, $fa1 - ; LA64F-NEXT: movfr2gr.s $a1, $fa2 - ; LA64F-NEXT: movfr2gr.s $a2, $fa0 -+; LA64F-NEXT: addi.w $a2, $a2, 0 - ; LA64F-NEXT: .LBB35_3: # %atomicrmw.start - ; LA64F-NEXT: # Parent Loop BB35_1 Depth=1 - ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 -@@ -2994,8 +2995,7 @@ define float @float_fmax_monotonic(ptr %p) nounwind { - ; LA64F-NEXT: .LBB35_6: # %atomicrmw.start - ; LA64F-NEXT: # in Loop: Header=BB35_1 Depth=1 - ; LA64F-NEXT: movgr2fr.w $fa0, $a3 --; LA64F-NEXT: addi.w $a1, $a2, 0 --; LA64F-NEXT: bne $a3, $a1, .LBB35_1 -+; LA64F-NEXT: bne $a3, $a2, .LBB35_1 - ; LA64F-NEXT: # %bb.2: # %atomicrmw.end - ; LA64F-NEXT: ret - ; -@@ -3013,6 +3013,7 @@ define float @float_fmax_monotonic(ptr %p) nounwind { - ; LA64D-NEXT: fmax.s $fa2, $fa2, $fa1 - ; LA64D-NEXT: movfr2gr.s $a1, $fa2 - ; LA64D-NEXT: movfr2gr.s $a2, $fa0 -+; LA64D-NEXT: addi.w $a2, $a2, 0 - ; LA64D-NEXT: .LBB35_3: # %atomicrmw.start - ; LA64D-NEXT: # Parent Loop BB35_1 Depth=1 - ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 -@@ -3030,8 +3031,7 @@ define float @float_fmax_monotonic(ptr %p) nounwind { - ; LA64D-NEXT: .LBB35_6: # %atomicrmw.start - ; LA64D-NEXT: # in Loop: Header=BB35_1 Depth=1 - ; LA64D-NEXT: movgr2fr.w $fa0, $a3 --; LA64D-NEXT: addi.w $a1, $a2, 0 --; LA64D-NEXT: bne $a3, $a1, .LBB35_1 -+; LA64D-NEXT: bne $a3, $a2, .LBB35_1 - ; LA64D-NEXT: # %bb.2: # %atomicrmw.end - ; LA64D-NEXT: ret - %v = atomicrmw fmax ptr %p, float 1.0 monotonic, align 4 --- -2.20.1 - - -From 4713504e1495a3831ba1a47078de83db3a8a533c Mon Sep 17 00:00:00 2001 -From: Lu Weining -Date: Thu, 19 Oct 2023 10:16:13 +0800 -Subject: [PATCH 7/7] [Memory] Call __clear_cache in InvalidateInstructionCache - on LoongArch (#67285) - -As the comments of `InvalidateInstructionCache`: Before the JIT can run -a block of code that has been emitted it must invalidate the instruction -cache on some platforms. I think it applies to LoongArch as LoongArch -has a weak memory-model. But I'm not able to write a test to demonstrate -this issue. Perhaps self-modifing code should be wrote? - -(cherry picked from commit fb366581e7d67df7d9a98605fd65a7e7908451e7) ---- - llvm/lib/Support/Unix/Memory.inc | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/llvm/lib/Support/Unix/Memory.inc b/llvm/lib/Support/Unix/Memory.inc -index 4c8f6b2ea7d3..69bd1164343d 100644 ---- a/llvm/lib/Support/Unix/Memory.inc -+++ b/llvm/lib/Support/Unix/Memory.inc -@@ -237,7 +237,8 @@ void Memory::InvalidateInstructionCache(const void *Addr, size_t Len) { - for (intptr_t Line = StartLine; Line < EndLine; Line += LineSize) - asm volatile("icbi 0, %0" : : "r"(Line)); - asm volatile("isync"); --#elif (defined(__arm__) || defined(__aarch64__) || defined(__mips__)) && \ -+#elif (defined(__arm__) || defined(__aarch64__) || defined(__loongarch__) || \ -+ defined(__mips__)) && \ - defined(__GNUC__) - // FIXME: Can we safely always call this for __GNUC__ everywhere? - const char *Start = static_cast(Addr); --- -2.20.1 - diff --git a/0020-Update-llvm-lit-config-to-support-build_for_openeule.patch b/0020-Update-llvm-lit-config-to-support-build_for_openeule.patch deleted file mode 100644 index 48a4ff5bca39424a8165a54b02ba8382a5bdf36f..0000000000000000000000000000000000000000 --- a/0020-Update-llvm-lit-config-to-support-build_for_openeule.patch +++ /dev/null @@ -1,30 +0,0 @@ -From 5721be433ddee5f60d4a9434df43a023f1ec4c0e Mon Sep 17 00:00:00 2001 -From: wangqiang -Date: Sun, 28 Apr 2024 14:30:34 +0800 -Subject: [PATCH] Update llvm-lit config to support build_for_openeuler - ---- - llvm/cmake/modules/HandleLLVMOptions.cmake | 7 +++++++ - 1 files changed, 7 insertions(+) - -diff --git a/llvm/cmake/modules/HandleLLVMOptions.cmake b/llvm/cmake/modules/HandleLLVMOptions.cmake -index 76723be69..c6f5569af 100644 ---- a/llvm/cmake/modules/HandleLLVMOptions.cmake -+++ b/llvm/cmake/modules/HandleLLVMOptions.cmake -@@ -97,6 +97,13 @@ if( LLVM_ENABLE_ASSERTIONS ) - set(LLVM_ENABLE_CLASSIC_FLANG 0) - endif() - -+option(BUILD_FOR_OPENEULER "Build support for openeuler" OFF) -+if(BUILD_FOR_OPENEULER) -+ set(BUILD_FOR_OPENEULER 1) -+else() -+ set(BUILD_FOR_OPENEULER 0) -+endif() -+ - if(LLVM_ENABLE_EXPENSIVE_CHECKS) - add_compile_definitions(EXPENSIVE_CHECKS) - --- -2.33.0 - diff --git a/0021-Add-BiSheng-Autotuner-support-for-LLVM-compiler.patch b/0021-Add-BiSheng-Autotuner-support-for-LLVM-compiler.patch deleted file mode 100644 index ea61cb374fb876873228fdb42eba8562cce3f68a..0000000000000000000000000000000000000000 --- a/0021-Add-BiSheng-Autotuner-support-for-LLVM-compiler.patch +++ /dev/null @@ -1,9915 +0,0 @@ -From a9863e2b6e6783aa9be0b9d1d187084fd4b32a3a Mon Sep 17 00:00:00 2001 -From: Muhammad Asif Manzoor -Date: Thu, 21 Mar 2024 12:50:38 -0400 -Subject: Add BiSheng Autotuner support for LLVM compiler - -Automatic tuning is an automatic iterative process that optimizes a given -program by manipulating compilation options for optimal performance. -BiSheng Autotuner provides a resumable interface for tuning process. BiSheng -Autotuner can tune 1) individual code segments/blocks (fine grain turning) like -loops, callsites, instructions, etc. and 2) entire modules/programs (coarse -grain tuning) for compiler flags, pass ordering, etc. -This patch enables LLVM compiler to extract tuneable code regions and then apply -suggested configuration (by Autotuner) to find out the optimal configurations. ---- - llvm/cmake/modules/CrossCompile.cmake | 1 + - llvm/cmake/modules/HandleLLVMOptions.cmake | 8 + - llvm/include/llvm/Analysis/AutotuningDump.h | 75 ++ - llvm/include/llvm/Analysis/LoopInfo.h | 13 + - llvm/include/llvm/Analysis/Passes.h | 10 + - llvm/include/llvm/AutoTuner/AutoTuning.h | 486 ++++++++++++ - .../llvm/AutoTuner/AutoTuningRemarkManager.h | 43 ++ - .../llvm/AutoTuner/AutoTuningRemarkStreamer.h | 47 ++ - llvm/include/llvm/CodeGen/MachineBasicBlock.h | 13 + - llvm/include/llvm/IR/Function.h | 37 + - llvm/include/llvm/IR/InstrTypes.h | 24 + - llvm/include/llvm/IR/Instructions.h | 24 + - llvm/include/llvm/IR/Module.h | 3 + - llvm/include/llvm/IR/StructuralHash.h | 14 + - llvm/include/llvm/InitializePasses.h | 5 + - llvm/include/llvm/LinkAllPasses.h | 8 + - llvm/include/llvm/Remarks/Remark.h | 32 + - llvm/include/llvm/Support/CommandLine.h | 17 + - llvm/include/llvm/Transforms/Scalar.h | 17 + - .../Transforms/Scalar/AutoTuningCompile.h | 170 +++++ - .../llvm/Transforms/Utils/UnrollLoop.h | 4 + - llvm/lib/Analysis/AutotuningDump.cpp | 265 +++++++ - llvm/lib/Analysis/CMakeLists.txt | 2 + - llvm/lib/Analysis/InlineAdvisor.cpp | 18 + - llvm/lib/Analysis/InlineCost.cpp | 29 + - llvm/lib/Analysis/LoopInfo.cpp | 52 ++ - llvm/lib/AutoTuner/AutoTuning.cpp | 705 ++++++++++++++++++ - .../lib/AutoTuner/AutoTuningRemarkManager.cpp | 299 ++++++++ - .../AutoTuner/AutoTuningRemarkStreamer.cpp | 55 ++ - llvm/lib/AutoTuner/CMakeLists.txt | 11 + - llvm/lib/CMakeLists.txt | 1 + - llvm/lib/CodeGen/CMakeLists.txt | 1 + - llvm/lib/CodeGen/CalcSpillWeights.cpp | 30 + - llvm/lib/CodeGen/MachineBasicBlock.cpp | 36 + - llvm/lib/CodeGen/MachineScheduler.cpp | 44 ++ - llvm/lib/CodeGen/SwitchLoweringUtils.cpp | 19 + - llvm/lib/IR/AsmWriter.cpp | 151 ++++ - llvm/lib/IR/CMakeLists.txt | 1 + - llvm/lib/IR/Function.cpp | 34 + - llvm/lib/IR/Instructions.cpp | 86 +++ - llvm/lib/IR/StructuralHash.cpp | 114 +++ - llvm/lib/Passes/PassBuilder.cpp | 5 + - llvm/lib/Passes/PassBuilderPipelines.cpp | 46 ++ - llvm/lib/Passes/PassRegistry.def | 13 + - llvm/lib/Passes/StandardInstrumentations.cpp | 23 + - .../lib/Remarks/BitstreamRemarkSerializer.cpp | 8 + - llvm/lib/Remarks/RemarkStreamer.cpp | 4 + - llvm/lib/Remarks/YAMLRemarkParser.cpp | 122 +++ - llvm/lib/Remarks/YAMLRemarkParser.h | 6 + - llvm/lib/Remarks/YAMLRemarkSerializer.cpp | 84 +++ - llvm/lib/Support/CommandLine.cpp | 41 + - llvm/lib/Transforms/IPO/CMakeLists.txt | 1 + - llvm/lib/Transforms/IPO/Inliner.cpp | 36 + - llvm/lib/Transforms/IPO/SampleProfile.cpp | 14 + - .../Transforms/Instrumentation/CMakeLists.txt | 1 + - .../Instrumentation/PGOInstrumentation.cpp | 8 + - .../Transforms/Scalar/AutoTuningCompile.cpp | 334 +++++++++ - llvm/lib/Transforms/Scalar/CMakeLists.txt | 2 + - llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp | 187 +++++ - llvm/lib/Transforms/Scalar/Scalar.cpp | 4 + - llvm/lib/Transforms/Scalar/Sink.cpp | 5 + - llvm/lib/Transforms/Utils/CMakeLists.txt | 1 + - llvm/lib/Transforms/Utils/LCSSA.cpp | 5 + - llvm/lib/Transforms/Utils/LoopSimplify.cpp | 8 + - llvm/lib/Transforms/Utils/LoopUnroll.cpp | 3 + - llvm/lib/Transforms/Vectorize/CMakeLists.txt | 1 + - .../Vectorize/LoopVectorizationLegality.cpp | 12 + - .../Transforms/Vectorize/LoopVectorize.cpp | 34 + - .../Inputs/unroll_template.yaml | 8 + - .../AutotuningDump/create-data-dir.ll | 65 ++ - llvm/test/AutoTuning/AutotuningDump/unroll.ll | 35 + - .../autotune_datadir/baseline_config.yaml | 9 + - .../autotune_datadir/random_config.yaml | 9 + - .../AutoTuning/BaselineConfig/Inputs/test.ll | 117 +++ - .../BaselineConfig/apply_baseline_config.ll | 11 + - llvm/test/AutoTuning/BaselineConfig/opp.ll | 67 ++ - .../CodeRegionFilter/function-filtering.ll | 62 ++ - .../Error/Inputs/invalid-format.yaml | 3 + - .../AutoTuning/Error/Inputs/template.yaml | 10 + - .../AutoTuning/Error/file-not-found-error.ll | 29 + - .../AutoTuning/Error/invalid-yaml-error.ll | 27 + - .../AutoTuning/Error/malformed-input-error.ll | 136 ++++ - llvm/test/AutoTuning/Error/output-error.ll | 28 + - llvm/test/AutoTuning/Error/valid-input.ll | 27 + - .../Inputs/template.yaml | 9 + - .../inc-compile-parse-input.ll | 103 +++ - .../AutoTuning/Inline/Inputs/template.yaml | 9 + - .../Inline/Inputs/template_no_metadata.yaml | 7 + - .../test/AutoTuning/Inline/duplicate-calls.ll | 96 +++ - llvm/test/AutoTuning/Inline/force-inline.ll | 84 +++ - .../AutoTuning/Inline/inline-attribute.ll | 85 +++ - llvm/test/AutoTuning/Inline/opp.ll | 64 ++ - .../LoopUnroll/Inputs/debug_loc_template.yaml | 10 + - .../LoopUnroll/Inputs/loop_nest.yaml | 10 + - .../LoopUnroll/Inputs/loop_peel.yaml | 9 + - .../Inputs/unroll_raw_template.yaml | 10 + - .../LoopUnroll/Inputs/unroll_template.yaml | 10 + - .../Inputs/unroll_template_no_metadata.yaml | 8 + - llvm/test/AutoTuning/LoopUnroll/debug_loc.ll | 161 ++++ - .../AutoTuning/LoopUnroll/dynamic_config.ll | 56 ++ - llvm/test/AutoTuning/LoopUnroll/loop_nest.ll | 136 ++++ - llvm/test/AutoTuning/LoopUnroll/loop_peel.ll | 53 ++ - .../AutoTuning/LoopUnroll/unroll-pragma.ll | 129 ++++ - llvm/test/AutoTuning/LoopUnroll/unroll.ll | 101 +++ - llvm/test/AutoTuning/LoopUnroll/unroll_raw.ll | 113 +++ - .../Inputs/vectorize_template.yaml | 9 + - .../vectorize_template_no_metadata.yaml | 7 + - .../LoopVectorize/force-vector-interleave.ll | 88 +++ - .../Inputs/misched_x86_template.yaml | 10 + - .../misched_x86_bidirectional.ll | 73 ++ - .../MachineScheduler/misched_x86_bottomup.ll | 72 ++ - .../MachineScheduler/misched_x86_topdown.ll | 72 ++ - .../AutoTuning/MetaData/structural_hash.ll | 234 ++++++ - .../AutoTuning/MetaData/write_no_metadata.ll | 191 +++++ - .../MetaData/write_with_metadata.ll | 204 +++++ - .../AutoTuning/PGO/Inputs/pgo-instr.proftext | 17 + - .../PGO/Inputs/pgo-sample-cold.prof | 7 + - .../AutoTuning/PGO/Inputs/pgo-sample-hot.prof | 7 + - llvm/test/AutoTuning/PGO/pgo-instr-filters.ll | 61 ++ - .../test/AutoTuning/PGO/pgo-sample-filters.ll | 138 ++++ - .../Inputs/pass_invocation.yaml | 10 + - .../PassInvocation/pass_invocation_read.ll | 64 ++ - .../PassInvocation/pass_invocation_write.ll | 67 ++ - .../PhaseOrdering/Inputs/template.yaml | 8 + - .../AutoTuning/PhaseOrdering/pass-order.ll | 65 ++ - .../AutoTuning/SwitchLowering/switch-opp.ll | 47 ++ - llvm/test/AutoTuning/lit.local.cfg | 2 + - llvm/test/AutoTuning/opt-opp.ll | 315 ++++++++ - llvm/test/lit.site.cfg.py.in | 1 + - llvm/tools/llc/llc.cpp | 19 + - llvm/tools/opt/NewPMDriver.cpp | 42 ++ - llvm/tools/opt/opt.cpp | 53 ++ - 132 files changed, 7801 insertions(+) - create mode 100644 llvm/include/llvm/Analysis/AutotuningDump.h - create mode 100644 llvm/include/llvm/AutoTuner/AutoTuning.h - create mode 100644 llvm/include/llvm/AutoTuner/AutoTuningRemarkManager.h - create mode 100644 llvm/include/llvm/AutoTuner/AutoTuningRemarkStreamer.h - create mode 100644 llvm/include/llvm/Transforms/Scalar/AutoTuningCompile.h - create mode 100644 llvm/lib/Analysis/AutotuningDump.cpp - create mode 100644 llvm/lib/AutoTuner/AutoTuning.cpp - create mode 100644 llvm/lib/AutoTuner/AutoTuningRemarkManager.cpp - create mode 100644 llvm/lib/AutoTuner/AutoTuningRemarkStreamer.cpp - create mode 100644 llvm/lib/AutoTuner/CMakeLists.txt - create mode 100644 llvm/lib/Transforms/Scalar/AutoTuningCompile.cpp - create mode 100644 llvm/test/AutoTuning/AutotuningDump/Inputs/unroll_template.yaml - create mode 100644 llvm/test/AutoTuning/AutotuningDump/create-data-dir.ll - create mode 100644 llvm/test/AutoTuning/AutotuningDump/unroll.ll - create mode 100644 llvm/test/AutoTuning/BaselineConfig/Inputs/autotune_datadir/baseline_config.yaml - create mode 100644 llvm/test/AutoTuning/BaselineConfig/Inputs/autotune_datadir/random_config.yaml - create mode 100644 llvm/test/AutoTuning/BaselineConfig/Inputs/test.ll - create mode 100644 llvm/test/AutoTuning/BaselineConfig/apply_baseline_config.ll - create mode 100644 llvm/test/AutoTuning/BaselineConfig/opp.ll - create mode 100644 llvm/test/AutoTuning/CodeRegionFilter/function-filtering.ll - create mode 100644 llvm/test/AutoTuning/Error/Inputs/invalid-format.yaml - create mode 100644 llvm/test/AutoTuning/Error/Inputs/template.yaml - create mode 100644 llvm/test/AutoTuning/Error/file-not-found-error.ll - create mode 100644 llvm/test/AutoTuning/Error/invalid-yaml-error.ll - create mode 100644 llvm/test/AutoTuning/Error/malformed-input-error.ll - create mode 100644 llvm/test/AutoTuning/Error/output-error.ll - create mode 100644 llvm/test/AutoTuning/Error/valid-input.ll - create mode 100644 llvm/test/AutoTuning/IncrementalCompilation/Inputs/template.yaml - create mode 100644 llvm/test/AutoTuning/IncrementalCompilation/inc-compile-parse-input.ll - create mode 100644 llvm/test/AutoTuning/Inline/Inputs/template.yaml - create mode 100644 llvm/test/AutoTuning/Inline/Inputs/template_no_metadata.yaml - create mode 100644 llvm/test/AutoTuning/Inline/duplicate-calls.ll - create mode 100644 llvm/test/AutoTuning/Inline/force-inline.ll - create mode 100644 llvm/test/AutoTuning/Inline/inline-attribute.ll - create mode 100644 llvm/test/AutoTuning/Inline/opp.ll - create mode 100644 llvm/test/AutoTuning/LoopUnroll/Inputs/debug_loc_template.yaml - create mode 100644 llvm/test/AutoTuning/LoopUnroll/Inputs/loop_nest.yaml - create mode 100644 llvm/test/AutoTuning/LoopUnroll/Inputs/loop_peel.yaml - create mode 100644 llvm/test/AutoTuning/LoopUnroll/Inputs/unroll_raw_template.yaml - create mode 100644 llvm/test/AutoTuning/LoopUnroll/Inputs/unroll_template.yaml - create mode 100644 llvm/test/AutoTuning/LoopUnroll/Inputs/unroll_template_no_metadata.yaml - create mode 100644 llvm/test/AutoTuning/LoopUnroll/debug_loc.ll - create mode 100644 llvm/test/AutoTuning/LoopUnroll/dynamic_config.ll - create mode 100644 llvm/test/AutoTuning/LoopUnroll/loop_nest.ll - create mode 100644 llvm/test/AutoTuning/LoopUnroll/loop_peel.ll - create mode 100644 llvm/test/AutoTuning/LoopUnroll/unroll-pragma.ll - create mode 100644 llvm/test/AutoTuning/LoopUnroll/unroll.ll - create mode 100644 llvm/test/AutoTuning/LoopUnroll/unroll_raw.ll - create mode 100644 llvm/test/AutoTuning/LoopVectorize/Inputs/vectorize_template.yaml - create mode 100644 llvm/test/AutoTuning/LoopVectorize/Inputs/vectorize_template_no_metadata.yaml - create mode 100644 llvm/test/AutoTuning/LoopVectorize/force-vector-interleave.ll - create mode 100644 llvm/test/AutoTuning/MachineScheduler/Inputs/misched_x86_template.yaml - create mode 100644 llvm/test/AutoTuning/MachineScheduler/misched_x86_bidirectional.ll - create mode 100644 llvm/test/AutoTuning/MachineScheduler/misched_x86_bottomup.ll - create mode 100644 llvm/test/AutoTuning/MachineScheduler/misched_x86_topdown.ll - create mode 100644 llvm/test/AutoTuning/MetaData/structural_hash.ll - create mode 100644 llvm/test/AutoTuning/MetaData/write_no_metadata.ll - create mode 100644 llvm/test/AutoTuning/MetaData/write_with_metadata.ll - create mode 100644 llvm/test/AutoTuning/PGO/Inputs/pgo-instr.proftext - create mode 100644 llvm/test/AutoTuning/PGO/Inputs/pgo-sample-cold.prof - create mode 100644 llvm/test/AutoTuning/PGO/Inputs/pgo-sample-hot.prof - create mode 100644 llvm/test/AutoTuning/PGO/pgo-instr-filters.ll - create mode 100644 llvm/test/AutoTuning/PGO/pgo-sample-filters.ll - create mode 100644 llvm/test/AutoTuning/PassInvocation/Inputs/pass_invocation.yaml - create mode 100644 llvm/test/AutoTuning/PassInvocation/pass_invocation_read.ll - create mode 100644 llvm/test/AutoTuning/PassInvocation/pass_invocation_write.ll - create mode 100644 llvm/test/AutoTuning/PhaseOrdering/Inputs/template.yaml - create mode 100644 llvm/test/AutoTuning/PhaseOrdering/pass-order.ll - create mode 100644 llvm/test/AutoTuning/SwitchLowering/switch-opp.ll - create mode 100644 llvm/test/AutoTuning/lit.local.cfg - create mode 100644 llvm/test/AutoTuning/opt-opp.ll - -diff --git a/llvm/cmake/modules/CrossCompile.cmake b/llvm/cmake/modules/CrossCompile.cmake -index 6af47b51d4c6..1a9fb4b2dddc 100644 ---- a/llvm/cmake/modules/CrossCompile.cmake -+++ b/llvm/cmake/modules/CrossCompile.cmake -@@ -82,6 +82,7 @@ function(llvm_create_cross_target project_name target_name toolchain buildtype) - -DLLVM_ENABLE_PROJECTS="${llvm_enable_projects_arg}" - -DLLVM_EXTERNAL_PROJECTS="${llvm_external_projects_arg}" - -DLLVM_ENABLE_RUNTIMES="${llvm_enable_runtimes_arg}" -+ -DLLVM_ENABLE_AUTOTUNER="${LLVM_ENABLE_AUTOTUNER}" - ${external_project_source_dirs} - -DLLVM_TEMPORARILY_ALLOW_OLD_TOOLCHAIN="${LLVM_TEMPORARILY_ALLOW_OLD_TOOLCHAIN}" - -DLLVM_INCLUDE_BENCHMARKS=OFF -diff --git a/llvm/cmake/modules/HandleLLVMOptions.cmake b/llvm/cmake/modules/HandleLLVMOptions.cmake -index 62a1a64d37d4..b8e9dbe29d88 100644 ---- a/llvm/cmake/modules/HandleLLVMOptions.cmake -+++ b/llvm/cmake/modules/HandleLLVMOptions.cmake -@@ -112,6 +112,14 @@ else() - set(BUILD_FOR_OPENEULER 0) - endif() - -+option(LLVM_ENABLE_AUTOTUNER "Enable BiSheng Auto-Tuning features" OFF) -+if (LLVM_ENABLE_AUTOTUNER) -+ set(LLVM_ENABLE_AUTOTUNER 1) -+ add_definitions( -DENABLE_AUTOTUNER ) -+else() -+ set(LLVM_ENABLE_AUTOTUNER 0) -+endif() -+ - if(LLVM_ENABLE_EXPENSIVE_CHECKS) - add_compile_definitions(EXPENSIVE_CHECKS) - -diff --git a/llvm/include/llvm/Analysis/AutotuningDump.h b/llvm/include/llvm/Analysis/AutotuningDump.h -new file mode 100644 -index 000000000000..fb973f05323e ---- /dev/null -+++ b/llvm/include/llvm/Analysis/AutotuningDump.h -@@ -0,0 +1,75 @@ -+#if defined(ENABLE_AUTOTUNER) -+// ===-- AutotuningDump.h - Auto-Tuning-----------------------------------===// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+// ===--------------------------------------------------------------------===// -+// -+// This file contains pass collecting IR of tuned regions and storing them into -+// predetrmined locations, to be used later by autotuning ML guidance -+// -+// ===--------------------------------------------------------------------===// -+ -+#include "llvm/Analysis/LoopInfo.h" -+#include "llvm/Analysis/LoopPass.h" -+#include "llvm/IR/PassManager.h" -+#include "llvm/Transforms/Scalar/LoopPassManager.h" -+#include -+ -+namespace llvm { -+class AutotuningDump { -+public: -+ AutotuningDump(bool IncrementalCompilation = false); -+ bool run(Module &F, function_ref GetLI); -+ -+private: -+ std::string AutoTuneDirPath; -+ std::unique_ptr createFile(const Twine &File); -+ int getConfigNumber(); -+ void dumpToStream(llvm::raw_ostream &os, const Loop &L) const; -+ void dumpToStream(llvm::raw_ostream &os, const Function &F) const; -+ void dumpFunctions(llvm::Module &M); -+ void dumpLoops(llvm::Module &M, function_ref GetLI); -+ void dumpModule(llvm::Module &M); -+ std::string getDirectoryName(const std::string File) const; -+ std::string getFileName(std::string FilePath); -+ -+ bool IsIncrementalCompilation; -+}; -+ -+class AutotuningDumpLegacy : public ModulePass { -+public: -+ static char ID; -+ AutotuningDumpLegacy(bool IncrementalCompilation = false); -+ StringRef getPassName() const override; -+ bool runOnModule(Module &M) override; -+ void getAnalysisUsage(AnalysisUsage &AU) const override; -+ -+private: -+ bool IsIncrementalCompilation; -+}; -+ -+class AutotuningDumpAnalysis -+ : public AnalysisInfoMixin { -+ friend AnalysisInfoMixin; -+ static AnalysisKey Key; -+ -+public: -+ AutotuningDumpAnalysis(bool IncrementalCompilation = false) { -+ IsIncrementalCompilation = IncrementalCompilation; -+ } -+ -+ // This pass only prints IRs of selected function or loops without doing any -+ // real analyses, thus the return value is meaningless. To avoid leaking data -+ // or memory, we typedef Result to Optional to avoid having to return an -+ // AutotuningDump object. -+ using Result = std::optional; -+ Result run(Module &M, ModuleAnalysisManager &AM); -+ -+private: -+ bool IsIncrementalCompilation; -+}; -+} // namespace llvm -+#endif -\ No newline at end of file -diff --git a/llvm/include/llvm/Analysis/LoopInfo.h b/llvm/include/llvm/Analysis/LoopInfo.h -index 3434630c27cf..9be3e056cf76 100644 ---- a/llvm/include/llvm/Analysis/LoopInfo.h -+++ b/llvm/include/llvm/Analysis/LoopInfo.h -@@ -26,6 +26,9 @@ - #include - #include - #include -+#if defined(ENABLE_AUTOTUNER) -+#include "llvm/AutoTuner/AutoTuning.h" -+#endif - - namespace llvm { - -@@ -44,7 +47,12 @@ extern template class LoopBase; - - /// Represents a single loop in the control flow graph. Note that not all SCCs - /// in the CFG are necessarily loops. -+#if defined(ENABLE_AUTOTUNER) -+class LLVM_EXTERNAL_VISIBILITY Loop : public LoopBase, -+ public autotuning::Container { -+#else - class LLVM_EXTERNAL_VISIBILITY Loop : public LoopBase { -+#endif - public: - /// A range representing the start and end location of a loop. - class LocRange { -@@ -395,6 +403,11 @@ public: - return ""; - } - -+#if defined(ENABLE_AUTOTUNER) -+ void initCodeRegion() override; -+ uint64_t computeStructuralHash() override; -+#endif -+ - private: - Loop() = default; - -diff --git a/llvm/include/llvm/Analysis/Passes.h b/llvm/include/llvm/Analysis/Passes.h -index ac1bc3549910..65f566cc75de 100644 ---- a/llvm/include/llvm/Analysis/Passes.h -+++ b/llvm/include/llvm/Analysis/Passes.h -@@ -58,6 +58,16 @@ namespace llvm { - // in a function and builds the region hierarchy. - // - FunctionPass *createRegionInfoPass(); -+ -+#if defined(ENABLE_AUTOTUNER) -+ //===--------------------------------------------------------------------===// -+ // -+ // createAutotuningDumpPass - This pass collects IR of tuned regions -+ // and stores them into predetrmined locations. -+ // for the purpose of autotuning ML guidance -+ // -+ ModulePass *createAutotuningDumpPass(); -+#endif - } - - #endif -diff --git a/llvm/include/llvm/AutoTuner/AutoTuning.h b/llvm/include/llvm/AutoTuner/AutoTuning.h -new file mode 100644 -index 000000000000..0f1f276306ec ---- /dev/null -+++ b/llvm/include/llvm/AutoTuner/AutoTuning.h -@@ -0,0 +1,486 @@ -+#if defined(ENABLE_AUTOTUNER) -+//===-- AutoTuning.h - Auto-Tuning-----------------------------------------===// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+//===----------------------------------------------------------------------===// -+// -+// This file defines Auto Tuning related functions, models and interfaces. -+// -+//===----------------------------------------------------------------------===// -+ -+#ifndef LLVM_AUTOTUNER_AUTOTUNING_H_ -+#define LLVM_AUTOTUNER_AUTOTUNING_H_ -+ -+#include "llvm/ADT/DenseMapInfo.h" -+#include "llvm/ADT/Hashing.h" -+#include "llvm/ADT/SetVector.h" -+#include "llvm/ADT/SmallVector.h" -+#include "llvm/IR/DebugInfoMetadata.h" -+#include "llvm/IR/DebugLoc.h" -+#include "llvm/Support/Casting.h" -+#include -+#include -+#include -+#include -+#include -+ -+// Options for AutoTuner incremental compilation. -+enum AutoTuningCompileOpt { -+ Inactive, // Disabled incremental compilation. -+ CoarseGrain, // For tuning LLVMParam. -+ FineGrain, // For tuning default code regions (Loop, CallSite, Function). -+ Basic // Same as CoarseGrain but can be applied for any code region. -+ // Can be used with ImpactRanker. -+}; -+ -+namespace autotuning { -+// Constant defintion for AutoTuner incremental compilation. -+const std::string CompileOptionStart = "start"; -+const std::string CompileOptionEnd = "end"; -+const std::string CompileOptionUnknow = "unknown"; -+const std::string CompileOptionUnroll = "loop-unroll"; -+const std::string CompileOptionVectorize = "loop-vectorize"; -+const std::string CompileOptionInline = "inline"; -+ -+class ParameterBase { -+public: -+ virtual ~ParameterBase() = default; -+ enum ParameterKind { -+ PK_PARAMETER, -+ }; -+ ParameterKind getKind() const { return Kind; } -+ -+ explicit ParameterBase(ParameterKind K) : Kind(K) {} -+ -+private: -+ const ParameterKind Kind; -+}; -+ -+template class Parameter : public ParameterBase { -+public: -+ Parameter(const T &RHS) : ParameterBase(PK_PARAMETER), Value(RHS) {} -+ const T &getValue() const { return Value; } -+ void setValue(const T &RHS) { Value = RHS; } -+ -+ static bool classof(const ParameterBase *P) { -+ return P->getKind() == PK_PARAMETER; -+ } -+ -+private: -+ T Value; -+}; -+ -+/// This class manages parameters of one codeRegion. -+class ParameterManager { -+ -+public: -+ // add a param into this ParameterManager -+ template -+ void add(const std::string &ParamName, const T ParamValue) { -+ std::shared_ptr Param = -+ std::make_shared>(ParamValue); -+ this->Parameters[ParamName] = Param; -+ } -+ -+ // Look up the value of a parameter by name in this ParameterManager. -+ // The found value will be assigned to the reference variable "Value". -+ // Return true if the parameter exits in this ParameterManager, -+ // and false otherwise. -+ template -+ bool findByName(const std::string &ParamName, T &Value) const { -+ auto Iterator = Parameters.find(ParamName); -+ if (Iterator == Parameters.end()) { -+ return false; -+ } -+ -+ auto ParamPtr = llvm::dyn_cast>(Iterator->second.get()); -+ if (ParamPtr != nullptr) { -+ Value = ParamPtr->getValue(); -+ return true; -+ } else { -+ return false; -+ } -+ } -+ -+private: -+ std::unordered_map> Parameters; -+}; -+ -+/// The debug location used to track a CodeRegion back to the source file. -+struct SourceLocation { -+ /// The source file corresponding to this CodeRegion. -+ std::string SourceFilePath; -+ unsigned SourceLine = 0; -+ unsigned SourceColumn = 0; -+ -+ bool operator==(const SourceLocation &CR) const { -+ return (this->SourceFilePath == CR.SourceFilePath) && -+ (this->SourceLine == CR.SourceLine) && -+ (this->SourceColumn == CR.SourceColumn); -+ }; -+ -+ explicit operator bool() const { -+ return !(SourceFilePath.empty() && SourceLine == 0 && SourceColumn == 0); -+ } -+}; -+ -+enum CodeRegionType { -+ CallSite, // Code region for function inlining. -+ Function, // Used in AutoTuningDump pass for IR writing. -+ LLVMParam, // Compilation flags. Tuned individually for each module. -+ Loop, // Code region for loops. -+ MachineBasicBlock, // Instruction scheduling code region. -+ Other, // Pass ordering code region. -+ ProgramParam, // Compilation flags. Tuned collectively for program. -+ Switch, // Tuning MinJumpTableEntries parameter for switch inst. -+ Empty, // Empty CodeRegion. -+ Invalid // Invalid CodeRegion. -+}; -+ -+enum HotnessType { -+ Unknown, -+ Cold, -+ Hot, -+}; -+ -+/// DynamicOptions represent a map: Arg -> DynamicConfigs. -+/// Where Arg is a tuning parameter on the associated CodeRegion. -+/// And DynamicConfigs is the possible tuning values associated with Arg. -+typedef std::map> DynamicOptions; -+ -+/// This class represents a region in source code including -+/// its name, function name, type, debug location, and associated pass name. -+class CodeRegion { -+ -+public: -+ // Default constructor -+ CodeRegion(const CodeRegionType Type = CodeRegionType::Other); -+ ~CodeRegion() = default; -+ // Concrete constructors -+ CodeRegion(const std::string &Name, const std::string &FuncName, -+ const CodeRegionType &Type, const llvm::DebugLoc &DL, -+ const DynamicOptions DO = {}); -+ CodeRegion(const std::string &Name, const std::string &FuncName, -+ const CodeRegionType &Type, -+ const SourceLocation &Location = SourceLocation(), -+ const DynamicOptions DO = {}); -+ CodeRegion(const std::string &Name, const std::string &FuncName, -+ const std::string &PassName, const CodeRegionType &Type, -+ const SourceLocation &Location = SourceLocation(), -+ const unsigned int Invocation = 0); -+ -+ bool operator==(const CodeRegion &CR) const; -+ inline bool operator!=(const CodeRegion &CR) const { return !(*this == CR); }; -+ -+ explicit operator bool() const { -+ return !(Name.empty() && FuncName.empty() && PassName.empty()); -+ } -+ -+ static std::string getTypeAsString(CodeRegionType CRType); -+ static std::string getHotnessAsString(HotnessType Hotness); -+ const std::string &getName() const { return Name; } -+ const std::string &getFuncName() const { return FuncName; } -+ const CodeRegionType &getType() const { return Type; } -+ const std::string &getFileName() const { return Location.SourceFilePath; } -+ const std::string &getTypeAsString() const { return StringType; } -+ const SourceLocation &getSourceLoc() const { return Location; } -+ const std::string &getPassName() const { return PassName; } -+ unsigned getSize() const { return Size; }; -+ void setPassName(const std::string &NewPassName); -+ void setSize(unsigned Size) { this->Size = Size; }; -+ void setHotness(HotnessType NewHotness) const { this->Hotness = NewHotness; } -+ HotnessType getHotness() const { return this->Hotness; } -+ std::string getHotnessAsString() const { return getHotnessAsString(Hotness); } -+ bool isCold() const { return this->Hotness == Cold; } -+ bool isHot() const { return this->Hotness == Hot; } -+ std::uint64_t getHash() const { return this->Hash; } -+ void setHash(std::uint64_t Hash) { this->Hash = Hash; } -+ DynamicOptions getAutoTunerOptions() const { return this->AutoTunerOptions; } -+ void setInvocation(unsigned int Invocation) { this->Invocation = Invocation; } -+ unsigned int getInvocation() const { return this->Invocation; } -+ -+ /// Add dynamic config options with Code Region for AutoTuner to tune instead -+ /// of using static config options. -+ void addAutoTunerOptions(const std::string ParamName, -+ std::vector Options) const { -+ this->AutoTunerOptions.insert( -+ std::pair>(ParamName, Options)); -+ } -+ static CodeRegion getInvalidInstance(); -+ static CodeRegion getEmptyInstance(); -+ void setBaselineConfig(std::map Value) const { -+ this->BaselineConfig = Value; -+ }; -+ std::map getBaselineConfig() const { -+ return this->BaselineConfig; -+ } -+ -+private: -+ /// Name of the code region. -+ /// For most of cases it's set to the name of a header basic block. -+ std::string Name; -+ /// Function name of this code region if any. -+ std::string FuncName; -+ /// Name of the pass which this code region is associated. -+ std::string PassName; -+ /// Type of this code region. Options are other, function, loop, -+ /// and machine basic block. -+ CodeRegionType Type; -+ /// Source Location. -+ SourceLocation Location; -+ std::string StringType; -+ /// Structural hash for the CodeRegion. -+ std::uint64_t Hash = 0; -+ /// Configs values passed to AutoTuner for dynamic setting of search space -+ /// for code regions. -+ mutable DynamicOptions AutoTunerOptions; -+ /// Configuration values passed to AutoTuner for generating the same binary -+ /// as the baseline. -+ mutable std::map BaselineConfig; -+ -+ /// Record the order of invocation of an optimization pass during the whole -+ /// compilation pipeline. It is used to differentiate multiple invocations of -+ /// a same optimization pass. -+ /// Currently, Loop Unroll pass is invoked twice during the compilation -+ /// pipeline. 'Invocation' helps to relate a code region with the invocation -+ /// of Loop Unroll pass where the code region is generated. -+ mutable unsigned int Invocation; -+ -+ /// Size of this code region. Usually it refers to the number of instructions -+ /// but could be different based on implementations. -+ unsigned Size = 0; -+ mutable HotnessType Hotness = Unknown; -+ -+ /// A boolean flag to record if a CR is initialized or not. -+ /// It should only be set to true by initContainer(). -+ /// We only add initialized CR to TuningOpps. -+ bool Initialized = false; -+ -+ friend class AutoTuningEngine; -+}; -+ -+/// This class is an interface for classes representing code regions in LLVM -+/// (eg. Loop, Function and MachineBasicBlock) to inherit -+/// so that auto-tuning can be enabled on them. -+/// A Container must contain a CodeRegion. -+class Container { -+ -+public: -+ Container() {} -+ virtual ~Container(){}; -+ -+ /// Abstract method for derived classes to overwrite -+ virtual void initCodeRegion() = 0; -+ virtual uint64_t computeStructuralHash() = 0; -+ -+ /// Get the Container's CodeRegion. -+ const CodeRegion &getCodeRegion() const; -+ /// Set the Container's CodeRegion. -+ void setCodeRegion(const CodeRegion &NewCR); -+ /// This method is to look up the value of a parameter that corresponds to an -+ /// Container. The parameter being looked up is stored in a ParameterManager. -+ template -+ bool lookUpParams(const std::string &ParamsName, T &Value) const; -+ -+ /// Check if the code region is being tuned by config file. -+ bool requiresIRDump(bool IsFunctionIR = false) const; -+ -+private: -+ CodeRegion CR; -+ friend class AutoTuningEngine; -+}; -+} // end namespace autotuning -+ -+namespace std { -+template <> -+// Implement hash for CodeRegion data type in std namespace. Only using common -+// attributes (with and without using 'OmitAutotuningMetadata' flag) of -+// CodeRegion. Remaining attributes are compared in overloaded == function. -+struct hash { -+ std::size_t operator()(const autotuning::CodeRegion &CR) const { -+ return llvm::hash_combine(CR.getPassName(), CR.getType()); -+ } -+}; -+} // namespace std -+ -+namespace llvm { -+// Forward Decleration. -+class CallBase; -+ -+typedef autotuning::CodeRegion CodeRegion; -+template <> struct DenseMapInfo { -+ static bool isEqual(const CodeRegion &LHS, const CodeRegion &RHS) { -+ return LHS == RHS; -+ } -+ static inline CodeRegion getEmptyKey() { -+ return autotuning::CodeRegion::getEmptyInstance(); -+ } -+ static inline CodeRegion getTombstoneKey() { -+ return autotuning::CodeRegion::getInvalidInstance(); -+ } -+ // Implement hash for CodeRegion data type in llvm namespace. Only using -+ // common attributes (with and without using 'OmitAutotuningMetadata' flag) -+ // of CodeRegion. Remaining attributes are compared in overloaded == -+ // function. -+ static unsigned getHashValue(const CodeRegion &CR) { -+ return llvm::hash_combine(CR.getPassName(), CR.getType()); -+ } -+}; -+} // namespace llvm -+ -+namespace autotuning { -+using namespace llvm; -+typedef std::unordered_map LookUpTable; -+typedef llvm::SetVector CodeRegions; -+ -+/// Structure to store information of CallSite code regions which is used to -+/// get a different SourceLocation for multiple callsites (same callee) in a -+/// function when these callsites have same SourceLocation due to inlining. -+struct CallSiteLocation { -+ llvm::CallBase *CB; -+ llvm::Function *Caller; -+ llvm::Function *Callee; -+ SourceLocation SrcLoc; -+}; -+ -+class AutoTuningEngine { -+public: -+ AutoTuningEngine() { Enabled = false; } -+ ~AutoTuningEngine() {} -+ -+ /// Initialize the Container for auto-tuning. -+ void initContainer(Container *Container, const std::string &PassName, -+ const StringRef FuncName = "", bool AddOpportunity = true, -+ unsigned int Invocation = 0); -+ -+ /// Initialize auto-tuning. This method should only be called in the main -+ /// function. -+ /// \return Error::success() on success or the related Error otherwise. -+ llvm::Error init(const std::string &ModuleID); -+ -+ /// Finalize auto-tuning. This method should only be called in the main -+ /// function. -+ /// \return Error::success() on success or the related Error otherwise. -+ llvm::Error finalize(); -+ -+ /// Return the number of tuning configuration used for this compilation. -+ llvm::Expected getConfigNumber(); -+ -+ void enable() { Enabled = true; } -+ void disable() { Enabled = false; } -+ bool isEnabled() const { return Enabled; } -+ bool isMLEnabled() const { return MLEnabled; } -+ bool isDumpEnabled() const { return DumpEnabled; } -+ bool isGenerateOutput() const { return GenerateOutput; } -+ bool isParseInput() const { return ParseInput; } -+ bool isTuningAllowedForType(CodeRegionType CRType) const { -+ return (CodeRegionFilterTypes.count(CRType) > 0); -+ } -+ bool isThinLTOTuning() const; -+ -+ /// Convert a pass-name to CodeRegionType. -+ CodeRegionType convertPassToType(std::string Pass); -+ -+ /// First sets BaselineConfig value for the CR then -+ /// add a tuning opportunity into the TuningOpps list. -+ void addOpportunity(const CodeRegion &OppCR, -+ std::map BaselineConfig = {}); -+ bool hasOpportunities() const { return TuningOpps.empty(); } -+ -+ bool shouldRunOptPass(std::string FileName, std::string Pass); -+ -+ /// Insert all of the callsites of a function in CallSiteLocs vector. -+ void insertCallSiteLoc(CallSiteLocation Loc); -+ -+ /// Update CallSiteLocs vector with new callsites (if any) which get available -+ /// due to inlining. -+ void updateCallSiteLocs(llvm::CallBase *CB, llvm::CallBase *Ptr, -+ llvm::Function *F, unsigned int Line); -+ -+ /// Clean up the CallSiteLocs vector by keeping the callsite if there are -+ /// multiple calls to same callee. This cleaning will be perform before -+ /// inlining any callsite. -+ void cleanCallSiteLoc(); -+ -+ /// clear the CallSiteLocs vector. -+ void clearCallSiteLocs(); -+ -+ /// Return the SourceLocation::SourceLine (if available). -+ std::optional getCallSiteLoc(llvm::CallBase *CB); -+ -+ template -+ bool lookUpGlobalParams(const std::string &ParamsName, T &Value) const; -+ /// A map storing llvm parameters. -+ std::unordered_map LLVMParams; -+ /// A map storing program parameters. -+ std::unordered_map ProgramParams; -+ -+private: -+ std::string ModuleID; -+ /// This boolean indicates if the auto-tuning mode is enabled. -+ /// It will be set to true if the any of the following command line options -+ /// (auto-tuning-input, auto-tuning-result and auto-tuning-opp) is specified. -+ bool Enabled; -+ /// This boolean indicates if the ML guidance feature is enabled in -+ /// Autotuner. It will be set to true if -fautotune-rank is specified. -+ bool MLEnabled; -+ /// This boolean indicates if the IR dumping is enabled or not. IR dumping -+ /// is enabled for ML guidance feature. It can also be enabled with command -+ /// line compiler flag 'enable-autotuning-dump'. -+ bool DumpEnabled = false; -+ /// This boolean indicates if compiler is parsing/using 'config.yaml' file -+ /// generated by AutoTuner and use the configuration values instead of -+ /// determining with compiler heuristic. -+ bool ParseInput; -+ /// This boolean indicates if compiler is creating/generating opportunity -+ /// file(s) which will be consumed by AutoTuner to create the search space. -+ bool GenerateOutput; -+ /// A map of filename and set of optimization passes; an optimization pass -+ /// will be added to this set if a CodeRegion belongs to the optimization -+ /// pass. -+ std::unordered_map> OppPassList; -+ -+ /// Vector to store all of the duplicate calls in a function and the calls -+ /// which get available due to inlining. -+ SmallVector CallSiteLocs; -+ -+ /// A set to store the code region types that will be tuned in current -+ /// autotuning flow. This will be populated with code region types based on -+ /// 'auto-tuning-type-filter' for -fautotune-generate and the types will be -+ /// extracted from config.yaml in case of -fautotune. -+ /// This set is used to apply type-based filtering prior to creating/ -+ /// initializing a code region. -+ std::unordered_set CodeRegionFilterTypes; -+ -+ // A statically initialized map used to convert 'pass-name' to -+ // 'CodeRegionType'. -+ std::unordered_map PTTMap; -+ -+ /// A map of CodeRegion and ParameterManager to keep track of all the -+ /// parameters of code regions loaded from input config file. -+ LookUpTable ParamTable; -+ /// A list of CodeRegions as tuning opportunities -+ CodeRegions TuningOpps; -+ /// A ParameterManager for global parameters. -+ ParameterManager GlobalParams; -+ -+ /// Apply filters for CodeRegions. -+ void applyOppFilters(CodeRegions &CRs); -+ -+ /// Apply function name filter for CodeRegions. -+ bool applyFunctionFilter(std::string FuncName); -+ -+ friend class Container; -+ friend class CodeRegion; -+ friend class AutoTuningRemarkManager; -+}; -+ -+extern class AutoTuningEngine Engine; // AutoTuning Engine -+ -+} // end namespace autotuning -+ -+#endif /* LLVM_AUTOTUNER_AUTOTUNING_H_ */ -+#endif -diff --git a/llvm/include/llvm/AutoTuner/AutoTuningRemarkManager.h b/llvm/include/llvm/AutoTuner/AutoTuningRemarkManager.h -new file mode 100644 -index 000000000000..153a2c6246ad ---- /dev/null -+++ b/llvm/include/llvm/AutoTuner/AutoTuningRemarkManager.h -@@ -0,0 +1,43 @@ -+#if defined(ENABLE_AUTOTUNER) -+//===- llvm/AutoTuner/AutoTuningRemarkManager.h - Remark Manager ----------===// -+// -+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -+// See https://llvm.org/LICENSE.txt for license information. -+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -+// -+//===----------------------------------------------------------------------===// -+// -+// This file declares the main interface for inputting and outputting -+// remarks for AutoTuning. -+// -+//===----------------------------------------------------------------------===// -+ -+#ifndef LLVM_AUTOTUNINGREMARKMANAGER_H -+#define LLVM_AUTOTUNINGREMARKMANAGER_H -+ -+#include "llvm/AutoTuner/AutoTuning.h" -+#include "llvm/Remarks/RemarkStreamer.h" -+#include "llvm/Support/Error.h" -+#include -+#include -+#include -+ -+namespace autotuning { -+class AutoTuningRemarkManager { -+public: -+ /// Read a list of parameters from input file. -+ /// Return true on success and false on failure. -+ static llvm::Error read(autotuning::AutoTuningEngine &E, -+ const std::string &InputName, -+ const std::string &RemarksFormat); -+ -+ /// Dump a list of CodeRegions as tuning opportunities into a file. -+ /// Return true on success and false on failure. -+ static llvm::Error dump(const autotuning::AutoTuningEngine &E, -+ const std::string &DirPath, -+ const std::string &RemarksFormat, -+ const std::string &RemarksPasses); -+}; -+} // namespace autotuning -+#endif // LLVM_AUTOTUNINGREMARKMANAGER_H -+#endif -diff --git a/llvm/include/llvm/AutoTuner/AutoTuningRemarkStreamer.h b/llvm/include/llvm/AutoTuner/AutoTuningRemarkStreamer.h -new file mode 100644 -index 000000000000..0096139b12e9 ---- /dev/null -+++ b/llvm/include/llvm/AutoTuner/AutoTuningRemarkStreamer.h -@@ -0,0 +1,47 @@ -+#if defined(ENABLE_AUTOTUNER) -+// ===------------ llvm/AutoTuner/AutoTuningRemarkStreamer.h --------------===// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+// Copyright (C) 2017-2022, Huawei Technologies Co., Ltd. All rights reserved. -+// -+// ===---------------------------------------------------------------------===// -+// -+// This file contains the implementation of the conversion between AutoTuner -+// CodeRegions and serializable remarks::Remark objects. -+// -+// ===---------------------------------------------------------------------===// -+ -+#ifndef LLVM_AUTOTUNER_AUTOTUNINGREMARKSTREAMER_H -+#define LLVM_AUTOTUNER_AUTOTUNINGREMARKSTREAMER_H -+ -+#include "llvm/AutoTuner/AutoTuning.h" -+#include "llvm/Remarks/Remark.h" -+#include "llvm/Remarks/RemarkStreamer.h" -+#include "llvm/Support/Error.h" -+#include "llvm/Support/ToolOutputFile.h" -+#include -+#include -+ -+namespace llvm { -+/// Streamer for AutoTuner remarks which has logic for dealing with CodeRegions. -+class AutoTuningRemarkStreamer { -+ remarks::RemarkStreamer &RS; -+ /// Convert CodeRegion into remark objects. -+ remarks::Remark toRemark(const autotuning::CodeRegion &CR); -+ -+public: -+ AutoTuningRemarkStreamer(remarks::RemarkStreamer &RS) : RS(RS) {} -+ /// Emit a CodeRegion through the streamer. -+ void emit(const autotuning::CodeRegion &CR); -+ /// Set a pass filter based on a regex \p Filter. -+ /// Returns an error if the regex is invalid. -+ Error setFilter(StringRef Filter); -+}; -+} // end namespace llvm -+ -+#endif // LLVM_AUTOTUNER_AUTOTUNINGREMARKSTREAMER_H -+#endif -diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h -index 52388692c196..95ac9acf4e5e 100644 ---- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h -+++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h -@@ -27,6 +27,9 @@ - #include - #include - #include -+#if defined(ENABLE_AUTOTUNER) -+#include "llvm/AutoTuner/AutoTuning.h" -+#endif - - namespace llvm { - -@@ -91,9 +94,19 @@ public: - void deleteNode(MachineInstr *MI); - }; - -+#if defined(ENABLE_AUTOTUNER) -+class MachineBasicBlock -+ : public ilist_node_with_parent, -+ public autotuning::Container { -+#else - class MachineBasicBlock - : public ilist_node_with_parent { -+#endif - public: -+#if defined(ENABLE_AUTOTUNER) -+ void initCodeRegion() override; -+ uint64_t computeStructuralHash() override; -+#endif - /// Pair of physical register and lane mask. - /// This is not simply a std::pair typedef because the members should be named - /// clearly as they both have an integer type. -diff --git a/llvm/include/llvm/IR/Function.h b/llvm/include/llvm/IR/Function.h -index 93cf0d27e9a7..c0db48ae1789 100644 ---- a/llvm/include/llvm/IR/Function.h -+++ b/llvm/include/llvm/IR/Function.h -@@ -37,6 +37,9 @@ - #include - #include - #include -+#if defined(ENABLE_AUTOTUNER) -+#include "llvm/AutoTuner/AutoTuning.h" -+#endif - - namespace llvm { - -@@ -56,6 +59,24 @@ class User; - class BranchProbabilityInfo; - class BlockFrequencyInfo; - -+#if defined(ENABLE_AUTOTUNER) -+class AutoTuningEnabledFunction : public autotuning::Container { -+public: -+ AutoTuningEnabledFunction() = delete; -+ void initCodeRegion() override; -+ void setHot() { this->Hotness = autotuning::Hot; } -+ void setCold() { this->Hotness = autotuning::Cold; } -+ autotuning::HotnessType getHotness() const { return this->Hotness; } -+ uint64_t computeStructuralHash() override; -+ -+private: -+ AutoTuningEnabledFunction(Function *F) { Func = F; }; -+ Function *Func; -+ autotuning::HotnessType Hotness = autotuning::Unknown; -+ friend class Function; -+}; -+#endif -+ - class LLVM_EXTERNAL_VISIBILITY Function : public GlobalObject, - public ilist_node { - public: -@@ -68,6 +89,13 @@ public: - using arg_iterator = Argument *; - using const_arg_iterator = const Argument *; - -+#if defined(ENABLE_AUTOTUNER) -+ // There is one-to-one correspondence between ATEFunction and the current -+ // Function object to avoid messing up the LLVM User and owned Use classes' -+ // memory layout. -+ AutoTuningEnabledFunction ATEFunction = AutoTuningEnabledFunction(this); -+#endif -+ - private: - // Important things that make up a function! - BasicBlockListType BasicBlocks; ///< The basic blocks -@@ -128,6 +156,11 @@ public: - void operator=(const Function&) = delete; - ~Function(); - -+#if defined(ENABLE_AUTOTUNER) -+ // Return the auto-tuning enabled version of this Function object. -+ AutoTuningEnabledFunction &getATEFunction() { return ATEFunction; } -+#endif -+ - // This is here to help easily convert from FunctionT * (Function * or - // MachineFunction *) in BlockFrequencyInfoImpl to Function * by calling - // FunctionT->getFunction(). -@@ -840,7 +873,11 @@ public: - /// AssemblyAnnotationWriter. - void print(raw_ostream &OS, AssemblyAnnotationWriter *AAW = nullptr, - bool ShouldPreserveUseListOrder = false, -+#if defined(ENABLE_AUTOTUNER) -+ bool IsForDebug = false, bool PrintCompleteIR = false) const; -+#else - bool IsForDebug = false) const; -+#endif - - /// viewCFG - This function is meant for use from the debugger. You can just - /// say 'call F->viewCFG()' and a ghostview window should pop up from the -diff --git a/llvm/include/llvm/IR/InstrTypes.h b/llvm/include/llvm/IR/InstrTypes.h -index 6095b0a1be69..dcc9bbee30fa 100644 ---- a/llvm/include/llvm/IR/InstrTypes.h -+++ b/llvm/include/llvm/IR/InstrTypes.h -@@ -1169,6 +1169,23 @@ public: - using OperandBundleDef = OperandBundleDefT; - using ConstOperandBundleDef = OperandBundleDefT; - -+#if defined(ENABLE_AUTOTUNER) -+//===----------------------------------------------------------------------===// -+// AutoTuningEnabledCallSite Class -+//===----------------------------------------------------------------------===// -+class CallBase; -+class AutoTuningEnabledCallSite : public autotuning::Container { -+public: -+ AutoTuningEnabledCallSite() = delete; -+ void initCodeRegion() override; -+ uint64_t computeStructuralHash() override; -+ AutoTuningEnabledCallSite(CallBase *CallBase) { CB = CallBase; } -+ -+private: -+ CallBase *CB; -+}; -+#endif -+ - //===----------------------------------------------------------------------===// - // CallBase Class - //===----------------------------------------------------------------------===// -@@ -1229,6 +1246,13 @@ protected: - unsigned getNumSubclassExtraOperandsDynamic() const; - - public: -+#if defined(ENABLE_AUTOTUNER) -+ // There is one-to-one correspondence between ATECallSite and CallBase class -+ // to enable auto-tuning. -+ std::unique_ptr ATECallSite = -+ std::make_unique(this); -+#endif -+ - using Instruction::getContext; - - /// Create a clone of \p CB with a different set of operand bundles and -diff --git a/llvm/include/llvm/IR/Instructions.h b/llvm/include/llvm/IR/Instructions.h -index 8d60384e1a32..9d638af6eeef 100644 ---- a/llvm/include/llvm/IR/Instructions.h -+++ b/llvm/include/llvm/IR/Instructions.h -@@ -3287,6 +3287,23 @@ struct OperandTraits : public VariadicOperandTraits { - - DEFINE_TRANSPARENT_OPERAND_ACCESSORS(BranchInst, Value) - -+#if defined(ENABLE_AUTOTUNER) -+//===----------------------------------------------------------------------===// -+// AutoTuningEnabledSwitchInst Class -+//===----------------------------------------------------------------------===// -+class SwitchInst; -+ -+class AutoTuningEnabledSwitchInst : public autotuning::Container { -+public: -+ AutoTuningEnabledSwitchInst() = delete; -+ void initCodeRegion() override; -+ uint64_t computeStructuralHash() override; -+ AutoTuningEnabledSwitchInst(SwitchInst *SwitchInst) { SI = SwitchInst; } -+ -+private: -+ SwitchInst *SI; -+}; -+#endif - //===----------------------------------------------------------------------===// - // SwitchInst Class - //===----------------------------------------------------------------------===// -@@ -3332,6 +3349,13 @@ protected: - public: - void operator delete(void *Ptr) { User::operator delete(Ptr); } - -+#if defined(ENABLE_AUTOTUNER) -+ // There is one-to-one correspondence between ATESwitchInst and -+ // SwitchInst class to enable AutoTuner. -+ std::unique_ptr ATESwitchInst = -+ std::make_unique(this); -+#endif -+ - // -2 - static const unsigned DefaultPseudoIndex = static_cast(~0L-1); - -diff --git a/llvm/include/llvm/IR/Module.h b/llvm/include/llvm/IR/Module.h -index 670a40b28eab..904a450a1888 100644 ---- a/llvm/include/llvm/IR/Module.h -+++ b/llvm/include/llvm/IR/Module.h -@@ -38,6 +38,9 @@ - #include - #include - #include -+#if defined(ENABLE_AUTOTUNER) -+#include "llvm/AutoTuner/AutoTuning.h" -+#endif - - namespace llvm { - -diff --git a/llvm/include/llvm/IR/StructuralHash.h b/llvm/include/llvm/IR/StructuralHash.h -index 1bdeb85afa3c..c0bcc8153eb8 100644 ---- a/llvm/include/llvm/IR/StructuralHash.h -+++ b/llvm/include/llvm/IR/StructuralHash.h -@@ -15,6 +15,9 @@ - #define LLVM_IR_STRUCTURALHASH_H - - #include -+#if defined(ENABLE_AUTOTUNER) -+#include -+#endif - - namespace llvm { - -@@ -24,6 +27,17 @@ class Module; - uint64_t StructuralHash(const Function &F); - uint64_t StructuralHash(const Module &M); - -+#if defined(ENABLE_AUTOTUNER) -+class MachineBasicBlock; -+class BasicBlock; -+class CallBase; -+class SwitchInst; -+ -+uint64_t StructuralHash(const std::vector BBs); -+uint64_t StructuralHash(const MachineBasicBlock &MBB); -+uint64_t StructuralHash(const CallBase &CB); -+uint64_t StructuralHash(const SwitchInst &SI); -+#endif - } // end namespace llvm - - #endif -diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h -index c6fee47b464b..80bec2d82e24 100644 ---- a/llvm/include/llvm/InitializePasses.h -+++ b/llvm/include/llvm/InitializePasses.h -@@ -340,6 +340,11 @@ void initializeWasmEHPreparePass(PassRegistry&); - void initializeWinEHPreparePass(PassRegistry&); - void initializeWriteBitcodePassPass(PassRegistry&); - void initializeXRayInstrumentationPass(PassRegistry&); -+#if defined(ENABLE_AUTOTUNER) -+void initializeAutotuningDumpLegacyPass(PassRegistry &); -+void initializeAutoTuningCompileFunctionLegacyPass(PassRegistry &); -+void initializeAutoTuningCompileModuleLegacyPass(PassRegistry &); -+#endif - - } // end namespace llvm - -diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h -index 7420ea64e954..3a8ecb1399f1 100644 ---- a/llvm/include/llvm/LinkAllPasses.h -+++ b/llvm/include/llvm/LinkAllPasses.h -@@ -54,6 +54,9 @@ - #include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h" - #include "llvm/Transforms/Vectorize.h" - #include -+#if defined(ENABLE_AUTOTUNER) -+#include "llvm/Transforms/Scalar/AutoTuningCompile.h" -+#endif - - namespace { - struct ForcePassLinking { -@@ -93,6 +96,11 @@ namespace { - (void) llvm::createInstSimplifyLegacyPass(); - (void) llvm::createInstructionCombiningPass(); - (void) llvm::createJMCInstrumenterPass(); -+#if defined(ENABLE_AUTOTUNER) -+ (void) llvm::createAutotuningDumpPass(); -+ (void) llvm::createAutoTuningCompileFunctionLegacyPass(); -+ (void) llvm::createAutoTuningCompileModuleLegacyPass(); -+#endif - (void) llvm::createKCFIPass(); - (void) llvm::createLCSSAPass(); - (void) llvm::createLICMPass(); -diff --git a/llvm/include/llvm/Remarks/Remark.h b/llvm/include/llvm/Remarks/Remark.h -index a66f7ed73f2f..3bcc0c710498 100644 ---- a/llvm/include/llvm/Remarks/Remark.h -+++ b/llvm/include/llvm/Remarks/Remark.h -@@ -20,6 +20,10 @@ - #include "llvm/Support/raw_ostream.h" - #include - #include -+#if defined(ENABLE_AUTOTUNER) -+#include -+#include -+#endif - - namespace llvm { - namespace remarks { -@@ -47,6 +51,9 @@ struct Argument { - StringRef Key; - // FIXME: We might want to be able to store other types than strings here. - StringRef Val; -+#if defined(ENABLE_AUTOTUNER) -+ std::optional> VectorVal; -+#endif - // If set, the debug location corresponding to the value. - std::optional Loc; - -@@ -65,6 +72,9 @@ enum class Type { - Analysis, - AnalysisFPCommute, - AnalysisAliasing, -+#if defined(ENABLE_AUTOTUNER) -+ AutoTuning, -+#endif - Failure, - First = Unknown, - Last = Failure -@@ -105,6 +115,28 @@ struct Remark { - /// Mangled name of the function that triggers the emssion of this remark. - StringRef FunctionName; - -+#if defined(ENABLE_AUTOTUNER) -+ /// Type of the code region that the remark is associated with. -+ std::optional CodeRegionType; -+ -+ /// Configuration value for generating the same baseline binary associated -+ /// with this remark. -+ std::optional> BaselineConfig; -+ -+ /// Hash of the code region that the remark is associated with. -+ std::optional CodeRegionHash; -+ -+ /// Configs values passed to AutoTuner for dynamic setting of search space -+ /// for code regions. -+ std::optional>> -+ AutoTunerOptions; -+ -+ /// Invocation/Registering of Optimization Pass in the compilation pipeline. -+ /// It is used to differentiate between different invocations of same -+ /// optimization pass. -+ std::optional Invocation; -+#endif -+ - /// The location in the source file of the remark. - std::optional Loc; - -diff --git a/llvm/include/llvm/Support/CommandLine.h b/llvm/include/llvm/Support/CommandLine.h -index d2079fead668..c59dba2749f0 100644 ---- a/llvm/include/llvm/Support/CommandLine.h -+++ b/llvm/include/llvm/Support/CommandLine.h -@@ -40,6 +40,9 @@ - #include - #include - -+#if defined(ENABLE_AUTOTUNER) -+#include -+#endif - namespace llvm { - - namespace vfs { -@@ -72,6 +75,20 @@ bool ParseCommandLineOptions(int argc, const char *const *argv, - const char *EnvVar = nullptr, - bool LongOptionsUseDoubleDash = false); - -+#if defined(ENABLE_AUTOTUNER) -+// It will parse AutoTuner options (LLVMParams & ProgramParams) and add them as -+// command line flags for the compilation process. These options are suggested -+// by AutoTuner during tuning flow. This function will always be called after -+// AutoTuner initialization. -+// Returns true on success. Otherwise, this will print the error message to -+// stderr and exit. -+bool ParseAutoTunerOptions( -+ std::unordered_map LLVMParams, -+ std::unordered_map ProgramParams, -+ StringRef Overview = "", raw_ostream *Errs = nullptr, -+ const char *EnvVar = nullptr, bool LongOptionsUseDoubleDash = false); -+#endif -+ - // Function pointer type for printing version information. - using VersionPrinterTy = std::function; - -diff --git a/llvm/include/llvm/Transforms/Scalar.h b/llvm/include/llvm/Transforms/Scalar.h -index aaba710cfde6..e69beeade947 100644 ---- a/llvm/include/llvm/Transforms/Scalar.h -+++ b/llvm/include/llvm/Transforms/Scalar.h -@@ -16,6 +16,10 @@ - - #include "llvm/Transforms/Utils/SimplifyCFGOptions.h" - #include -+#if defined(ENABLE_AUTOTUNER) -+#include "llvm/Pass.h" -+#include -+#endif - - namespace llvm { - -@@ -299,6 +303,19 @@ Pass *createLoopSimplifyCFGPass(); - // - FunctionPass *createInstSimplifyLegacyPass(); - -+#if defined(ENABLE_AUTOTUNER) -+//===--------------------------------------------------------------------===// -+// -+// createAutotuningCompilePass - It writes IR files with -fautotune-generate -+// for autotuning flow. It also enables/disables the execution of optimization -+// passes in subsequent compilations (with -fautotune) based on autotuning -+// methodology and available opportunities. -+// -+FunctionPass * -+createAutoTuningCompileFunctionLegacyPass(std::string Pass = "unknown"); -+ModulePass * -+createAutoTuningCompileModuleLegacyPass(std::string Pass = "unknown"); -+#endif - - //===----------------------------------------------------------------------===// - // -diff --git a/llvm/include/llvm/Transforms/Scalar/AutoTuningCompile.h b/llvm/include/llvm/Transforms/Scalar/AutoTuningCompile.h -new file mode 100644 -index 000000000000..2cbb48f336ef ---- /dev/null -+++ b/llvm/include/llvm/Transforms/Scalar/AutoTuningCompile.h -@@ -0,0 +1,170 @@ -+#if defined(ENABLE_AUTOTUNER) -+//===---------------- AutoTuningCompile.h - Auto-Tuning -------------------===// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+// Copyright (C) 2017-2022, Huawei Technologies Co., Ltd. All rights reserved. -+// -+//===----------------------------------------------------------------------===// -+// -+/// \file -+/// This file declares the interface for AutoTuning Incremental Compilation. -+/// Incremental compilation requires two passes 1) Module Pass and 2) Function -+/// Pass for legacy pass manager. It requires an additional Loop Pass for new -+/// pass manager. -+/// AutoTuningOptPassGate class is also defined here which is used to enable/ -+/// disable the execution of optimization passes for the compilation pipeline. -+// -+//===----------------------------------------------------------------------===// -+ -+#ifndef LLVM_AUTOTUNER_AUTOTUNING_COMPILE_H_ -+#define LLVM_AUTOTUNER_AUTOTUNING_COMPILE_H_ -+ -+#include "llvm/Analysis/LoopAnalysisManager.h" -+#include "llvm/Analysis/LoopInfo.h" -+#include "llvm/Analysis/LoopPass.h" -+#include "llvm/IR/OptBisect.h" -+#include "llvm/IR/PassManager.h" -+#include "llvm/Pass.h" -+#include "llvm/Transforms/Scalar/LoopPassManager.h" -+ -+namespace llvm { -+ -+class Pass; -+ -+// Skips or runs optimization passes. -+class AutoTuningOptPassGate : public OptPassGate { -+public: -+ explicit AutoTuningOptPassGate(bool Skip = false) : Skip(Skip) {} -+ -+ bool shouldRunPass(const StringRef PassName, -+ StringRef IRDescription) override; -+ bool isEnabled() const override { return true; } -+ bool checkPass(const StringRef PassName, const StringRef TargetDesc); -+ void setSkip(bool Skip) { this->Skip = Skip; } -+ bool getSkip() const { return Skip; } -+ -+private: -+ bool Skip; -+}; -+ -+// Returns a static AutoTuningOptPassGate object which will be used to register -+// CallBack for OptBisect instrumentation. -+// It will also be used by AutoTuningCompile passes to enable/disable -+// optimization passes. -+AutoTuningOptPassGate &getAutoTuningOptPassGate(); -+ -+class AutoTuningCompileModule { -+public: -+ explicit AutoTuningCompileModule(std::string Pass = "unknown"); -+ bool run(Module &M); -+ // Write IR files for each module to be re-used in subsequent compilations -+ // for autotuning cycles. It only works with -fautotune-generate. -+ void writeIRFiles(Module &M) const; -+ // Enable/Disable execution of optimization passes in subsequent compilations -+ // based on autotuning methodology and available opportunities. It Only works -+ // with -fautotune -+ bool modifyCompilationPipeline(Module &M) const; -+ -+ static void setSkipCompilation(bool Option) { SkipCompilation = Option; } -+ static bool getSkipCompilation() { return SkipCompilation; } -+ -+private: -+ static bool SkipCompilation; -+ std::string Pass = ""; -+}; -+ -+class AutoTuningCompileModuleLegacy : public ModulePass { -+public: -+ static char ID; -+ explicit AutoTuningCompileModuleLegacy(std::string Pass = "unknown"); -+ bool runOnModule(Module &M) override; -+ StringRef getPassName() const override; -+ void getAnalysisUsage(AnalysisUsage &AU) const override { -+ AU.setPreservesAll(); -+ } -+ -+private: -+ std::string Pass = ""; -+}; -+ -+class AutoTuningCompileModulePass -+ : public PassInfoMixin { -+public: -+ explicit AutoTuningCompileModulePass(std::string Pass = "unknown") -+ : Pass(Pass){}; -+ PreservedAnalyses run(Module &M, ModuleAnalysisManager &); -+ -+private: -+ std::string Pass = ""; -+}; -+ -+class AutoTuningCompileFunction { -+public: -+ explicit AutoTuningCompileFunction(std::string Pass = "unknown"); -+ bool run(Function &F); -+ // Write IR files for each module to be re-used in subsequent compilations -+ // for autotuning cycles. It only works with -fautotune-generate. -+ void writeIRFiles(Module &M); -+ // Enable/Disable execution of optimization passes in subsequent compilations -+ // based on autotuning methodology and available opportunities. It Only works -+ // with -fautotune -+ bool modifyCompilationPipeline(Function &F); -+ -+private: -+ // A module may have multiple functions; decision to enable/disable -+ // execution of an optimization pass will be made for the first function and -+ // will be used for all of the functions in the module. -+ // 'SkipDecision' will be set once the decision is made for a specific 'Pass'. -+ bool SkipDecision = false; -+ -+ // A module may have multiple functions; IR file will be written once for the -+ // entire module for a specific 'Pass'. -+ bool IsModuleWritten = false; -+ std::string Pass = ""; -+}; -+ -+class AutoTuningCompileFunctionLegacy : public FunctionPass { -+public: -+ static char ID; -+ explicit AutoTuningCompileFunctionLegacy(std::string Pass = "unknown"); -+ bool runOnFunction(Function &F) override; -+ StringRef getPassName() const override; -+ void getAnalysisUsage(AnalysisUsage &AU) const override { -+ AU.setPreservesAll(); -+ } -+ -+private: -+ std::string Pass = ""; -+}; -+ -+class AutoTuningCompileFunctionPass -+ : public PassInfoMixin { -+public: -+ explicit AutoTuningCompileFunctionPass(std::string Pass = "unknown") -+ : Pass(Pass){}; -+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); -+ -+private: -+ std::string Pass = ""; -+}; -+ -+class AutoTuningCompileLoopPass -+ : public PassInfoMixin { -+public: -+ explicit AutoTuningCompileLoopPass(std::string Pass = "unknown") -+ : Pass(Pass){}; -+ PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, -+ LoopStandardAnalysisResults &AR, LPMUpdater &U); -+ -+private: -+ std::string Pass = ""; -+}; -+ -+} // end namespace llvm -+ -+#endif /* LLVM_AUTOTUNER_AUTOTUNING_COMPILE_H_ */ -+#endif -diff --git a/llvm/include/llvm/Transforms/Utils/UnrollLoop.h b/llvm/include/llvm/Transforms/Utils/UnrollLoop.h -index 4f3010965b59..e1cccf417898 100644 ---- a/llvm/include/llvm/Transforms/Utils/UnrollLoop.h -+++ b/llvm/include/llvm/Transforms/Utils/UnrollLoop.h -@@ -108,7 +108,11 @@ bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI, - unsigned TripMultiple, unsigned LoopSize, - TargetTransformInfo::UnrollingPreferences &UP, - TargetTransformInfo::PeelingPreferences &PP, -+#if defined(ENABLE_AUTOTUNER) -+ bool &UseUpperBound, unsigned int Invocation = 0); -+#else - bool &UseUpperBound); -+#endif - - void simplifyLoopAfterUnroll(Loop *L, bool SimplifyIVs, LoopInfo *LI, - ScalarEvolution *SE, DominatorTree *DT, -diff --git a/llvm/lib/Analysis/AutotuningDump.cpp b/llvm/lib/Analysis/AutotuningDump.cpp -new file mode 100644 -index 000000000000..81b2bbead70e ---- /dev/null -+++ b/llvm/lib/Analysis/AutotuningDump.cpp -@@ -0,0 +1,265 @@ -+#if defined(ENABLE_AUTOTUNER) -+// ===-- AutotuningDump.cpp - Auto-Tuning---------------------------------===// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+// ===--------------------------------------------------------------------===// -+// -+// This file contains pass collecting IR of tuned regions and storing them into -+// predetrmined locations, to be used later by autotuning ML guidance -+// -+// ===--------------------------------------------------------------------===// -+#include "llvm/Analysis/AutotuningDump.h" -+#include "llvm/Analysis/Passes.h" -+#include "llvm/AutoTuner/AutoTuning.h" -+#include "llvm/IR/LegacyPassManager.h" -+#include "llvm/InitializePasses.h" -+#include "llvm/Pass.h" -+#include "llvm/Support/CommandLine.h" -+#include "llvm/Support/Path.h" -+#include "llvm/Support/Process.h" -+#include "llvm/Support/raw_ostream.h" -+#include -+ -+using namespace llvm; -+ -+#define DEBUG_TYPE "autotuning-dump" -+ -+enum AutotuningDumpOpt { whole_modules, functions, loops }; -+ -+// Enable Debug Options to be specified on the command line -+cl::opt AutotuningDumpMode( -+ "autotuning-dump-mode", cl::desc("Choose autotuning dump mode:"), -+ cl::init(whole_modules), -+ cl::values(clEnumVal(whole_modules, "dump each module in its own file"), -+ clEnumVal(functions, "dump each function in its own file"), -+ clEnumVal(loops, "dump each loop in its own file"))); -+ -+AutotuningDump::AutotuningDump(bool IncrementalCompilation) { -+ // Check if the environment variable AUTOTUNE_DATADIR is set. -+ IsIncrementalCompilation = IncrementalCompilation; -+ AutoTuneDirPath = "autotune_datadir"; -+ if (std::optional MaybePath = -+ llvm::sys::Process::GetEnv("AUTOTUNE_DATADIR")) -+ AutoTuneDirPath = *MaybePath; -+} -+ -+int AutotuningDump::getConfigNumber() { -+ auto ConfigNumOrErr = autotuning::Engine.getConfigNumber(); -+ if (ConfigNumOrErr) -+ return *ConfigNumOrErr; -+ else { -+ report_fatal_error("Invalid/missing Autotuner configuration ID"); -+ return -1; -+ } -+} -+ -+void AutotuningDump::dumpToStream(llvm::raw_ostream &os, const Loop &L) const { -+ L.print(os); -+} -+ -+void AutotuningDump::dumpToStream(llvm::raw_ostream &os, -+ const Function &F) const { -+ F.print(os, /*AAW*/ nullptr, /*ShouldPreserveUseListOrder*/ false, -+ /*IsForDebug*/ false, /*PrintCompleteIR*/ true); -+} -+ -+// Create appropriate file. File will contains AbsolutePath/FileName. -+std::unique_ptr AutotuningDump::createFile(const Twine &File) { -+ std::error_code EC; -+ return std::make_unique((File).str(), EC, -+ sys::fs::CD_CreateAlways, -+ sys::fs::FA_Write, sys::fs::OF_None); -+} -+ -+std::string AutotuningDump::getDirectoryName(const std::string File) const { -+ std::string DirectoryName = AutoTuneDirPath; -+ if (!autotuning::Engine.isMLEnabled()) -+ DirectoryName += "/IR_files"; -+ -+ DirectoryName = DirectoryName + "/" + File + "/"; -+ -+ // Create directory if not already present. -+ if (std::error_code EC = sys::fs::create_directories(DirectoryName)) -+ errs() << "could not create directory: " << DirectoryName << ": " -+ << EC.message(); -+ -+ return DirectoryName; -+} -+ -+std::string AutotuningDump::getFileName(std::string FilePath) { -+ if (autotuning::Engine.isMLEnabled()) -+ return std::to_string(this->getConfigNumber()) + ".ll"; -+ std::replace(FilePath.begin(), FilePath.end(), '/', '_'); -+ return FilePath + ".ll"; -+} -+ -+void AutotuningDump::dumpModule(Module &M) { -+ std::unique_ptr fptr; -+ LLVM_DEBUG(dbgs() << "AutotuningDump: Dump module IR files.\n"); -+ if (IsIncrementalCompilation) { -+ std::string Filename = M.getSourceFileName(); -+ llvm::SmallString<128> FilenameVec = StringRef(Filename); -+ llvm::sys::fs::make_absolute(FilenameVec); -+ size_t Pos = FilenameVec.rfind("."); -+ if (Pos != std::string::npos) { -+ FilenameVec.pop_back_n(FilenameVec.size() - Pos); -+ FilenameVec.append(".ll"); -+ } -+ fptr = createFile(FilenameVec); -+ } else { -+ std::string File = llvm::sys::path::filename(M.getName()).str(); -+ std::string DirectoryName = getDirectoryName(File); -+ std::string FileName = getFileName(M.getName().str()); -+ fptr = createFile(DirectoryName + FileName); -+ } -+ -+ M.print(*fptr, nullptr, true, false); -+} -+ -+void AutotuningDump::dumpFunctions(Module &M) { -+ std::string FilePath = M.getName().str(); -+ std::replace(FilePath.begin(), FilePath.end(), '/', '_'); -+ std::string DirectoryName = getDirectoryName(FilePath); -+ for (Function &F : M.getFunctionList()) { // go through all functions -+ if (F.isDeclaration() || F.empty()) -+ continue; -+ -+ AutoTuningEnabledFunction *AutotuneFunc = &F.getATEFunction(); -+ assert(AutotuneFunc); -+ autotuning::Engine.initContainer(AutotuneFunc, "autotuning-dump", -+ F.getName(), false); -+ std::string FuncName = F.getName().str(); -+ // check the whole function -+ if (AutotuneFunc->requiresIRDump(true)) { -+ auto fptr = createFile(DirectoryName + Twine(FuncName) + ".ll"); -+ this->dumpToStream(*fptr, F); -+ } -+ } -+} -+ -+void AutotuningDump::dumpLoops(Module &M, -+ function_ref GetLI) { -+ for (Function &F : M) { -+ // Nothing to do for declarations. -+ if (F.isDeclaration() || F.empty()) -+ continue; -+ -+ LoopInfo &LI = GetLI(F); -+ for (auto &L : LI.getLoopsInPreorder()) { -+ Function *Func = nullptr; -+ StringRef FuncName = ""; -+ if (!L->isInvalid()) -+ Func = L->getHeader()->getParent(); -+ if (Func) -+ FuncName = Func->getName(); -+ -+ autotuning::Engine.initContainer(L, "autotuning-dump", FuncName, false); -+ if (L->requiresIRDump()) { -+ std::string FuncName = L->getCodeRegion().getFuncName(); -+ unsigned SourceLine = L->getCodeRegion().getSourceLoc().SourceLine; -+ std::string DirectoryName = AutoTuneDirPath + "/" + -+ llvm::sys::path::filename(FuncName).str() + -+ "_loop_" + std::to_string(SourceLine); -+ std::string FileName = std::to_string(this->getConfigNumber()) + ".ll"; -+ auto fptr = createFile(DirectoryName + "/" + FileName); -+ this->dumpToStream(*fptr, *L); -+ } -+ } -+ } -+} -+ -+bool AutotuningDump::run(Module &M, -+ function_ref GetLI) { -+ // Change to absolute path. -+ SmallString<256> OutputPath = StringRef(AutoTuneDirPath); -+ sys::fs::make_absolute(OutputPath); -+ -+ // Creating new output directory, if it does not exists. -+ if (std::error_code EC = sys::fs::create_directories(OutputPath)) { -+ llvm::errs() << (make_error( -+ "could not create directory: " + Twine(OutputPath) + ": " + -+ EC.message(), -+ EC)); -+ return false; -+ } -+ -+ if (IsIncrementalCompilation) { -+ LLVM_DEBUG( -+ dbgs() -+ << "AutotuningDump: IR files writing for incremental compilation.\n"); -+ dumpModule(M); -+ return false; -+ } -+ -+ switch (AutotuningDumpMode) { -+ case whole_modules: -+ dumpModule(M); -+ break; -+ case functions: -+ dumpFunctions(M); -+ break; -+ case loops: -+ dumpLoops(M, GetLI); -+ } -+ -+ return false; -+} -+ -+AutotuningDumpLegacy::AutotuningDumpLegacy(bool IncrementalCompilation) -+ : ModulePass(AutotuningDumpLegacy::ID) { -+ IsIncrementalCompilation = IncrementalCompilation; -+ initializeAutotuningDumpLegacyPass(*PassRegistry::getPassRegistry()); -+} -+ -+bool AutotuningDumpLegacy::runOnModule(Module &M) { -+ if (!autotuning::Engine.isDumpEnabled()) -+ return false; -+ -+ auto GetLI = [this](Function &F) -> LoopInfo & { -+ return getAnalysis(F).getLoopInfo(); -+ }; -+ -+ AutotuningDump Impl(IsIncrementalCompilation); -+ return Impl.run(M, GetLI); -+} -+ -+StringRef AutotuningDumpLegacy::getPassName() const { -+ return "Autotuning Dump"; -+} -+ -+void AutotuningDumpLegacy::getAnalysisUsage(AnalysisUsage &AU) const { -+ AU.setPreservesAll(); -+ AU.addRequired(); -+} -+ -+char AutotuningDumpLegacy::ID = 0; -+INITIALIZE_PASS_BEGIN(AutotuningDumpLegacy, "autotuning-dump", -+ "Dump IR for Autotuned Code Regions", false, false) -+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) -+INITIALIZE_PASS_END(AutotuningDumpLegacy, "autotuning-dump", -+ "Dump IR for Autotuned Code Regions", false, false) -+ -+ModulePass *llvm::createAutotuningDumpPass() { -+ return new AutotuningDumpLegacy(); -+} -+ -+AnalysisKey AutotuningDumpAnalysis::Key; -+ -+AutotuningDumpAnalysis::Result -+AutotuningDumpAnalysis::run(Module &M, ModuleAnalysisManager &AM) { -+ if (!autotuning::Engine.isDumpEnabled()) -+ return false; -+ -+ auto &FAM = AM.getResult(M).getManager(); -+ auto GetLI = [&FAM](Function &F) -> LoopInfo & { -+ return FAM.getResult(F); -+ }; -+ -+ AutotuningDump Impl(IsIncrementalCompilation); -+ Impl.run(M, GetLI); -+ return false; -+} -+#endif -diff --git a/llvm/lib/Analysis/CMakeLists.txt b/llvm/lib/Analysis/CMakeLists.txt -index 4a1797c42789..9c6a70f0221f 100644 ---- a/llvm/lib/Analysis/CMakeLists.txt -+++ b/llvm/lib/Analysis/CMakeLists.txt -@@ -30,6 +30,7 @@ add_llvm_component_library(LLVMAnalysis - Analysis.cpp - AssumeBundleQueries.cpp - AssumptionCache.cpp -+ AutotuningDump.cpp - BasicAliasAnalysis.cpp - BlockFrequencyInfo.cpp - BlockFrequencyInfoImpl.cpp -@@ -153,6 +154,7 @@ add_llvm_component_library(LLVMAnalysis - ${MLLinkDeps} - - LINK_COMPONENTS -+ AutoTuner - BinaryFormat - Core - Object -diff --git a/llvm/lib/Analysis/InlineAdvisor.cpp b/llvm/lib/Analysis/InlineAdvisor.cpp -index e2480d51d372..f6b3c14a0345 100644 ---- a/llvm/lib/Analysis/InlineAdvisor.cpp -+++ b/llvm/lib/Analysis/InlineAdvisor.cpp -@@ -383,15 +383,27 @@ llvm::shouldInline(CallBase &CB, - Function *Callee = CB.getCalledFunction(); - Function *Caller = CB.getCaller(); - -+#if defined(ENABLE_AUTOTUNER) -+ // Get the code Region to add BaselineConfig values for inline -+ const autotuning::CodeRegion &CR = CB.ATECallSite.get()->getCodeRegion(); -+ static const std::string ForceInlineParamStr = "ForceInline"; -+#endif -+ - if (IC.isAlways()) { - LLVM_DEBUG(dbgs() << " Inlining " << inlineCostStr(IC) - << ", Call: " << CB << "\n"); -+#if defined(ENABLE_AUTOTUNER) -+ autotuning::Engine.addOpportunity(CR, {{ForceInlineParamStr, "1"}}); -+#endif - return IC; - } - - if (!IC) { - LLVM_DEBUG(dbgs() << " NOT Inlining " << inlineCostStr(IC) - << ", Call: " << CB << "\n"); -+#if defined(ENABLE_AUTOTUNER) -+ autotuning::Engine.addOpportunity(CR, {{ForceInlineParamStr, "0"}}); -+#endif - if (IC.isNever()) { - ORE.emit([&]() { - return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", Call) -@@ -417,6 +429,9 @@ llvm::shouldInline(CallBase &CB, - LLVM_DEBUG(dbgs() << " NOT Inlining: " << CB - << " Cost = " << IC.getCost() - << ", outer Cost = " << TotalSecondaryCost << '\n'); -+#if defined(ENABLE_AUTOTUNER) -+ autotuning::Engine.addOpportunity(CR, {{ForceInlineParamStr, "0"}}); -+#endif - ORE.emit([&]() { - return OptimizationRemarkMissed(DEBUG_TYPE, "IncreaseCostInOtherContexts", - Call) -@@ -430,6 +445,9 @@ llvm::shouldInline(CallBase &CB, - - LLVM_DEBUG(dbgs() << " Inlining " << inlineCostStr(IC) << ", Call: " << CB - << '\n'); -+#if defined(ENABLE_AUTOTUNER) -+ autotuning::Engine.addOpportunity(CR, {{ForceInlineParamStr, "1"}}); -+#endif - return IC; - } - -diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp -index a2f46edcf5ef..9f8f57865de2 100644 ---- a/llvm/lib/Analysis/InlineCost.cpp -+++ b/llvm/lib/Analysis/InlineCost.cpp -@@ -162,6 +162,14 @@ static cl::opt DisableGEPConstOperand( - "disable-gep-const-evaluation", cl::Hidden, cl::init(false), - cl::desc("Disables evaluation of GetElementPtr with constant operands")); - -+#if defined(ENABLE_AUTOTUNER) -+static cl::opt -+ EnableLocalCallSiteTuning("auto-tuning-enable-local-callsite-tuning", -+ cl::init(false), cl::Hidden, -+ cl::desc("Enable AutoTuning for local callsites " -+ "as well.")); -+#endif -+ - namespace llvm { - std::optional getStringFnAttrAsInt(const Attribute &Attr) { - if (Attr.isValid()) { -@@ -2990,6 +2998,27 @@ InlineCost llvm::getInlineCost( - return llvm::InlineCost::getNever(UserDecision->getFailureReason()); - } - -+#if defined(ENABLE_AUTOTUNER) -+ if (autotuning::Engine.isEnabled() && Call.getCaller() && -+ (!Callee->hasLocalLinkage() || EnableLocalCallSiteTuning)) { -+ bool ForceInline = false; -+ bool Found = false; -+ -+ autotuning::Engine.initContainer(Call.ATECallSite.get(), "inline", -+ Call.getCaller()->getName(), -+ /* addOpportunity */ false); -+ -+ Found = Call.ATECallSite->lookUpParams("ForceInline", ForceInline); -+ -+ if (Found) { -+ if (ForceInline) -+ return llvm::InlineCost::getAlways("Force inlined by auto-tuning"); -+ else -+ return llvm::InlineCost::getNever("Force non-inlined by auto-tuning"); -+ } -+ } -+#endif -+ - LLVM_DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName() - << "... (caller:" << Call.getCaller()->getName() - << ")\n"); -diff --git a/llvm/lib/Analysis/LoopInfo.cpp b/llvm/lib/Analysis/LoopInfo.cpp -index 60a72079e864..36aca73ee675 100644 ---- a/llvm/lib/Analysis/LoopInfo.cpp -+++ b/llvm/lib/Analysis/LoopInfo.cpp -@@ -37,6 +37,10 @@ - #include "llvm/Support/CommandLine.h" - #include "llvm/Support/GenericLoopInfoImpl.h" - #include "llvm/Support/raw_ostream.h" -+#if defined(ENABLE_AUTOTUNER) -+#include "llvm/AutoTuner/AutoTuning.h" -+#include "llvm/IR/StructuralHash.h" -+#endif - using namespace llvm; - - // Explicitly instantiate methods in LoopInfoImpl.h for IR-level Loops. -@@ -663,6 +667,54 @@ Loop::LocRange Loop::getLocRange() const { - return LocRange(); - } - -+#if defined(ENABLE_AUTOTUNER) -+uint64_t Loop::computeStructuralHash() { -+ std::vector BBs = getBlocks(); -+ return StructuralHash(BBs); -+} -+ -+void Loop::initCodeRegion() { -+ std::string LoopName; -+ // use the header's name as the loop name -+ if (BasicBlock *Header = getHeader()) { -+ if (Header->hasName()) { -+ LoopName = Header->getName().str(); -+ } -+ // if the header doesn't have a name, -+ // use the label of this header from AsmWriter -+ else { -+ std::string Str; -+ llvm::raw_string_ostream RSO(Str); -+ Header->printAsOperand(RSO); -+ LoopName = RSO.str(); -+ } -+ } else { -+ LoopName = ""; -+ } -+ -+ Function *F = this->getHeader()->getParent(); -+ StringRef FuncName = F->getName(); -+ -+ // init the CodeRegion -+ autotuning::CodeRegion CR = autotuning::CodeRegion( -+ LoopName, FuncName.data(), autotuning::CodeRegionType::Loop, -+ this->getStartLoc()); -+ // Compute the number of non-debug IR instructions in this loop. -+ unsigned TotalNumInstrs = 0; -+ for (const BasicBlock *BB : this->getBlocks()) { -+ unsigned NumInstrs = std::distance(BB->instructionsWithoutDebug().begin(), -+ BB->instructionsWithoutDebug().end()); -+ TotalNumInstrs += NumInstrs; -+ } -+ CR.setSize(TotalNumInstrs); -+ // Compute hotness. -+ autotuning::HotnessType Hotness = F->ATEFunction.getHotness(); -+ CR.setHotness(Hotness); -+ -+ this->setCodeRegion(CR); -+} -+#endif -+ - #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) - LLVM_DUMP_METHOD void Loop::dump() const { print(dbgs()); } - -diff --git a/llvm/lib/AutoTuner/AutoTuning.cpp b/llvm/lib/AutoTuner/AutoTuning.cpp -new file mode 100644 -index 000000000000..1f09f06d84a2 ---- /dev/null -+++ b/llvm/lib/AutoTuner/AutoTuning.cpp -@@ -0,0 +1,705 @@ -+#if defined(ENABLE_AUTOTUNER) -+//===-- AutoTuning.cpp - Auto-Tuning --------------------------------------===// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+//===----------------------------------------------------------------------===// -+// -+// This file defines Auto Tuning related functions, models and interfaces. -+// -+//===----------------------------------------------------------------------===// -+ -+#include "llvm/AutoTuner/AutoTuning.h" -+#include "llvm/ADT/STLExtras.h" -+#include "llvm/ADT/StringRef.h" -+#include "llvm/AutoTuner/AutoTuningRemarkManager.h" -+#include "llvm/Support/CommandLine.h" -+#include "llvm/Support/Error.h" -+#include "llvm/Support/Process.h" -+ -+// Enable debug messages for AutoTuning. -+#define DEBUG_TYPE "autotuning" -+ -+using namespace llvm; -+ -+// defined in 'lib/Remarks/YAMLRemarkParser.cpp'. -+extern cl::opt OmitAutotuningMetadata; -+ -+// -auto-tuning-input - Command line option to specify the input file. -+static cl::opt InputFile("auto-tuning-input", cl::Hidden, -+ cl::desc("Specify the input file")); -+ -+// -auto-tuning-opp - Command line option to specify the output directory of -+// tuning opportunities. -+static cl::opt OutputOppDir( -+ "auto-tuning-opp", cl::Hidden, -+ cl::desc("Specify the output directory of tuning opportunities")); -+ -+static cl::opt -+ RemarksPasses("auto-tuning-pass-filter", cl::Hidden, -+ cl::desc("Only dump auto-tuning remarks from passes whose " -+ "names match the given regular expression"), -+ cl::value_desc("regex")); -+ -+static cl::opt -+ ProjectDir("autotuning-project-dir", cl::Hidden, cl::init(""), -+ cl::desc("Specify project base dir to make code region name " -+ "relative to base dir. This operation will only be " -+ "applied for coarse-grain code regions.")); -+ -+// -auto-tuning-config-id - Command line option to specify the config number -+// being used for compilation. Required only for ML guidance feature. -+static cl::opt CFGNumber( -+ "auto-tuning-config-id", cl::Hidden, -+ cl::desc( -+ "Specify the auto-tuning configuration ID used in this compilation.")); -+ -+static cl::opt OutputFormat( -+ "auto-tuning-remark-format", cl::Hidden, -+ cl::desc("The format used for auto-tuning remarks (default: YAML)"), -+ cl::value_desc("format"), cl::init("yaml")); -+ -+// AutoTuner incremental compilation options. -+cl::opt AutoTuningCompileMode( -+ "auto-tuning-compile-mode", cl::Hidden, cl::init(Inactive), -+ cl::desc("AutoTuner: Choose incremental compilation mode."), -+ cl::values(clEnumVal(Inactive, -+ "AutoTuner: Disable incremental compilation."), -+ clEnumVal(CoarseGrain, "AutoTuner: Enable incremental " -+ "compilation for coarse grain tuning."), -+ clEnumVal(FineGrain, "AutoTuner: Enable incremental compilation " -+ "for fine grain tuning."), -+ clEnumVal(Basic, "AutoTuner: Enable incremental compilation for " -+ "any kind of code region."))); -+ -+static cl::opt -+ EnableAutoTuningDump("enable-autotuning-dump", cl::Hidden, cl::init(false), -+ cl::desc("Enable AutoTuningDump Pass")); -+ -+static cl::opt -+ ThinLTOTuning("autotuning-thin-lto", cl::Hidden, cl::init(false), -+ cl::desc("AutoTuner enabled in ThinLTO mode.")); -+ -+namespace autotuning { -+ -+static cl::list AutotuningOutputFilter( -+ "auto-tuning-type-filter", cl::Hidden, cl::CommaSeparated, -+ cl::desc( -+ "Select types of code regions to dump auto-tuning opportunities for:"), -+ cl::values(clEnumVal(LLVMParam, "LLVMParam code regions only"), -+ clEnumVal(ProgramParam, "ProgramParam code regions only"), -+ clEnumVal(CallSite, "CallSite code regions only"), -+ clEnumVal(Function, "Function code regions only"), -+ clEnumVal(Loop, "Loop code regions only"), -+ clEnumVal(MachineBasicBlock, -+ "Machine basic block code regions only"), -+ clEnumVal(Switch, "Switch code regions only"), -+ clEnumVal(Other, "All other types of code regions"))); -+ -+static cl::list AutotuningFunctionFilter( -+ "auto-tuning-function-filter", cl::Hidden, cl::CommaSeparated, -+ cl::desc("Apply code region filtering based on function names")); -+ -+static const cl::opt ExcludeColdCodeRegion( -+ "auto-tuning-exclude-cold", cl::Hidden, cl::init(true), -+ cl::desc("Use profile data to prune cold code regions from auto-tuning")); -+ -+static const cl::opt CodeRegionMatchingWithHash( -+ "auto-tuning-code-region-matching-hash", cl::Hidden, cl::init(true), -+ cl::desc("Use IR hashing to match the Code Regions")); -+ -+static const cl::opt HotCodeRegionOnly( -+ "auto-tuning-hot-only", cl::Hidden, cl::init(false), -+ cl::desc( -+ "Use profile data to include hot code regions only from auto-tuning")); -+ -+static const cl::opt -+ SizeThreshold("auto-tuning-size-threshold", cl::Hidden, cl::init(0), -+ cl::desc("Prune small code regions from auto-tuning with a " -+ "size smaller than the threshold")); -+ -+static inline const std::string generateName(const std::string &Name) { -+ if (Name.empty()) -+ return "unnamed"; -+ else -+ return Name; -+} -+ -+//===----------------------------------------------------------------------===// -+// CodeRegion implementation -+CodeRegion::CodeRegion(const CodeRegionType Type) : Type(Type) {} -+ -+CodeRegion::CodeRegion(const std::string &Name, const std::string &FuncName, -+ const CodeRegionType &Type, const DebugLoc &DL, -+ const DynamicOptions DO) { -+ this->Name = generateName(Name); -+ this->FuncName = generateName(FuncName); -+ this->Type = Type; -+ this->StringType = getTypeAsString(Type); -+ if (DL) { -+ StringRef File = DL->getFilename(); -+ unsigned Line = DL->getLine(); -+ unsigned Col = DL->getColumn(); -+ this->Location = SourceLocation{File.str(), Line, Col}; -+ } -+ this->AutoTunerOptions = DO; -+} -+ -+CodeRegion::CodeRegion(const std::string &Name, const std::string &FuncName, -+ const CodeRegionType &Type, -+ const SourceLocation &Location, -+ const DynamicOptions DO) { -+ this->Name = generateName(Name); -+ this->FuncName = generateName(FuncName); -+ this->Type = Type; -+ this->StringType = getTypeAsString(Type); -+ this->Location = Location; -+ this->AutoTunerOptions = DO; -+} -+ -+CodeRegion::CodeRegion(const std::string &Name, const std::string &FuncName, -+ const std::string &PassName, const CodeRegionType &Type, -+ const SourceLocation &Location, -+ const unsigned int Invocation) -+ : CodeRegion(Name, FuncName, Type, Location) { -+ this->PassName = generateName(PassName); -+ this->Invocation = Invocation; -+} -+ -+bool CodeRegion::operator==(const CodeRegion &CodeRegion) const { -+ bool IsEqual = false; -+ if (OmitAutotuningMetadata) -+ IsEqual = (this->getHash() == CodeRegion.getHash()) && -+ (this->Type == CodeRegion.getType()) && -+ (this->PassName == CodeRegion.getPassName()); -+ else { -+ IsEqual = (this->Type == CodeRegion.getType()) && -+ (this->Name == CodeRegion.getName()) && -+ (this->PassName == CodeRegion.getPassName()) && -+ (this->FuncName == CodeRegion.getFuncName()) && -+ (this->Location == CodeRegion.getSourceLoc()); -+ if (CodeRegionMatchingWithHash) -+ IsEqual = IsEqual && (this->getHash() == CodeRegion.getHash()); -+ } -+ -+ if (autotuning::Engine.ParseInput) -+ IsEqual = IsEqual && this->getInvocation() == CodeRegion.getInvocation(); -+ -+ if (autotuning::Engine.GenerateOutput) -+ IsEqual = -+ IsEqual && this->getBaselineConfig() == CodeRegion.getBaselineConfig(); -+ -+ return IsEqual; -+} -+ -+std::string CodeRegion::getTypeAsString(CodeRegionType CRType) { -+ switch (CRType) { -+ case autotuning::CodeRegionType::MachineBasicBlock: -+ return "machine_basic_block"; -+ case autotuning::CodeRegionType::Loop: -+ return "loop"; -+ case autotuning::CodeRegionType::Function: -+ return "function"; -+ case autotuning::CodeRegionType::CallSite: -+ return "callsite"; -+ case autotuning::CodeRegionType::LLVMParam: -+ return "llvm-param"; -+ case autotuning::CodeRegionType::ProgramParam: -+ return "program-param"; -+ case autotuning::CodeRegionType::Switch: -+ return "switch"; -+ default: -+ return "other"; -+ } -+} -+ -+std::string CodeRegion::getHotnessAsString(HotnessType Hotness) { -+ switch (Hotness) { -+ case autotuning::HotnessType::Cold: -+ return "cold"; -+ case autotuning::HotnessType::Hot: -+ return "hot"; -+ default: -+ return "unknown"; -+ } -+} -+ -+void CodeRegion::setPassName(const std::string &NewPassName) { -+ this->PassName = generateName(NewPassName); -+} -+ -+/* static */ -+autotuning::CodeRegion CodeRegion::getInvalidInstance() { -+ static autotuning::CodeRegion Invalid = -+ CodeRegion(autotuning::CodeRegionType::Invalid); -+ return Invalid; -+} -+ -+/* static */ -+autotuning::CodeRegion CodeRegion::getEmptyInstance() { -+ static autotuning::CodeRegion Empty = -+ CodeRegion(autotuning::CodeRegionType::Empty); -+ return Empty; -+} -+ -+//===----------------------------------------------------------------------===// -+// Container implementation -+// -+ -+const CodeRegion &Container::getCodeRegion() const { return CR; } -+ -+void Container::setCodeRegion(const CodeRegion &NewCR) { this->CR = NewCR; } -+ -+template -+bool Container::lookUpParams(const std::string &ParamsName, T &Value) const { -+ bool Found = false; -+ auto ConfigMapIterator = Engine.ParamTable.find(CR); -+ if (ConfigMapIterator != Engine.ParamTable.end()) { -+ ParameterManager InputParams = ConfigMapIterator->second; -+ Found = InputParams.findByName(ParamsName, Value); -+ if (Found) { -+ LLVM_DEBUG(dbgs() << ParamsName << " is set for the CodeRegion: \n" -+ << " Name: " << CR.getName() << "\n" -+ << " FuncName: " << CR.getFuncName() << "\n" -+ << " PassName: " << CR.getPassName() << "\n" -+ << " Type: " << CR.getTypeAsString() << "\n" -+ << " Hash: " << CR.getHash() << "\n" -+ << "\n"); -+ } -+ } -+ return Found; -+} -+ -+bool Container::requiresIRDump(bool IsFunctionIR) const { -+ auto findBaselineRegion = [&]() -> bool { -+ for (auto &entry : Engine.TuningOpps) -+ if (!IsFunctionIR) { -+ if (CR.getSourceLoc() == entry.getSourceLoc()) -+ return true; -+ } else { -+ if (CR.getFileName() == entry.getFileName() && -+ CR.getFuncName() == entry.getFuncName()) -+ return true; -+ } -+ return false; -+ }; -+ auto findNonBaselineRegion = [&]() { -+ for (auto &entry : Engine.ParamTable) -+ if (!IsFunctionIR) { -+ if (CR.getSourceLoc() == entry.first.getSourceLoc()) -+ return true; -+ } else { -+ if (CR.getFileName() == entry.first.getFileName() && -+ CR.getFuncName() == entry.first.getFuncName()) -+ return true; -+ } -+ return false; -+ }; -+ -+ if (CFGNumber == -1) -+ return findBaselineRegion(); -+ else -+ return findNonBaselineRegion(); -+} -+ -+template bool Container::lookUpParams(const std::string &ParamsName, -+ int &Value) const; -+template bool Container::lookUpParams(const std::string &ParamsName, -+ bool &Value) const; -+template bool -+Container::lookUpParams(const std::string &ParamsName, -+ std::string &Value) const; -+template bool Container::lookUpParams>( -+ const std::string &ParamsName, std::vector &Value) const; -+ -+static unsigned int count(SmallVector CallSiteLocs, -+ CallSiteLocation Loc) { -+ unsigned int Count = 0; -+ for (unsigned int Idx = 0; Idx < CallSiteLocs.size(); ++Idx) { -+ if (Loc.Caller == CallSiteLocs[Idx].Caller && -+ Loc.Callee == CallSiteLocs[Idx].Callee) -+ Count++; -+ } -+ return Count; -+} -+ -+bool AutoTuningEngine::isThinLTOTuning() const { return ThinLTOTuning; } -+ -+CodeRegionType AutoTuningEngine::convertPassToType(std::string PassName) { -+ auto Search = PTTMap.find(PassName); -+ if (Search == PTTMap.end()) -+ llvm_unreachable( -+ "AutoTuningEngine: Invalid/unsupported optimization pass provided.\n"); -+ return Search->second; -+} -+ -+void AutoTuningEngine::insertCallSiteLoc(CallSiteLocation Loc) { -+ CallSiteLocs.emplace_back(Loc); -+} -+ -+// If a function has multiple calls to same callee, then insert all the calls in -+// the CallSiteLocs vector which get available due to inlining of such calls. -+// It will use "Original Call Line No + New Call Line No" instead of using -+// "DebugLoc Line No". -+void AutoTuningEngine::updateCallSiteLocs(llvm::CallBase *OldCB, -+ llvm::CallBase *NewCB, -+ llvm::Function *Callee, -+ unsigned int Line) { -+ for (unsigned int Idx = 0; Idx < CallSiteLocs.size(); ++Idx) { -+ if (OldCB == CallSiteLocs[Idx].CB) { -+ CallSiteLocation Loc = CallSiteLocs[Idx]; -+ Loc.CB = NewCB; -+ Loc.Callee = Callee; -+ Loc.SrcLoc.SourceLine = Loc.SrcLoc.SourceLine + Line; -+ CallSiteLocs.emplace_back(Loc); -+ break; -+ } -+ } -+} -+ -+void AutoTuningEngine::cleanCallSiteLoc() { -+ unsigned int Size = CallSiteLocs.size(); -+ unsigned int Idx = 0; -+ for (unsigned int I = 0; I < Size; ++I) { -+ CallSiteLocation Loc = CallSiteLocs[Idx]; -+ unsigned int Count = count(CallSiteLocs, Loc); -+ if (Count == 1) { -+ CallSiteLocs.erase(CallSiteLocs.begin() + Idx); -+ continue; -+ } -+ Idx++; -+ } -+} -+ -+void AutoTuningEngine::clearCallSiteLocs() { CallSiteLocs.clear(); } -+ -+std::optional -+AutoTuningEngine::getCallSiteLoc(llvm::CallBase *CB) { -+ for (unsigned int Idx = 0; Idx < CallSiteLocs.size(); ++Idx) { -+ if (CB == CallSiteLocs[Idx].CB) -+ return CallSiteLocs[Idx].SrcLoc.SourceLine; -+ } -+ return std::nullopt; -+} -+ -+void AutoTuningEngine::addOpportunity( -+ const CodeRegion &OppCR, -+ std::map BaselineConfig) { -+ if (!OppCR.Initialized) -+ return; -+ -+ OppCR.setBaselineConfig(BaselineConfig); -+ if (!TuningOpps.contains(OppCR)) -+ TuningOpps.insert(OppCR); -+ else if (OppCR.getHotness() != Unknown) { -+ // If OppCR already exists in TuningOpps with unknown hotness, -+ // then update it if the current hotness is hot/cold. -+ auto OppI = find(TuningOpps, OppCR); -+ if (OppI->getHotness() == Unknown) -+ OppI->setHotness(OppCR.getHotness()); -+ } -+} -+ -+void AutoTuningEngine::applyOppFilters(CodeRegions &CRs) { -+ CodeRegions NewCRs; -+ for (CodeRegion CR : CRs) { -+ if (AutotuningOutputFilter.getNumOccurrences() > 0) { -+ bool IsMatched = false; -+ for (auto CRType : AutotuningOutputFilter) { -+ if (CRType == CR.getType()) { -+ IsMatched = true; -+ break; -+ } -+ } -+ // Filter out the CodeRegion if its type fails to match any types -+ // specified from the command line. -+ if (!IsMatched) -+ continue; -+ } -+ if (SizeThreshold.getNumOccurrences() > 0 && CR.getSize() < SizeThreshold) -+ continue; -+ if (ExcludeColdCodeRegion && CR.isCold()) { -+ LLVM_DEBUG(dbgs() << "Skip CodeRegion with cold function " -+ << CR.getFuncName() << "\n"); -+ continue; -+ } -+ if (HotCodeRegionOnly && !CR.isHot()) { -+ LLVM_DEBUG(dbgs() << "Skip CodeRegion with " << CR.getHotnessAsString() -+ << " function " << CR.getFuncName() << "\n"); -+ continue; -+ } -+ NewCRs.insert(CR); -+ LLVM_DEBUG(dbgs() << "CodeRegion added as an tuning opportunity: \n" -+ << " Name: " << CR.getName() << "\n" -+ << " FuncName: " << CR.getFuncName() << "\n" -+ << " PassName: " << CR.getPassName() << "\n" -+ << " Type: " << CR.getTypeAsString() << "\n" -+ << " Size: " << CR.getSize() << "\n" -+ << " Hotness: " << CR.getHotnessAsString() << "\n" -+ << " Hash: " << CR.getHash() << "\n" -+ << " Location: " << CR.getSourceLoc().SourceFilePath -+ << "; " << CR.getSourceLoc().SourceLine << "; " -+ << CR.getSourceLoc().SourceColumn << "\n\n"); -+ } -+ if (AutotuningOutputFilter.getNumOccurrences() == 0 || -+ std::find(AutotuningOutputFilter.begin(), AutotuningOutputFilter.end(), -+ Other) != AutotuningOutputFilter.end()) { -+ // Add an empty CodeRegion with ModuleID as an tuning opportunity. -+ // It could be used to represent a module level code region. -+ autotuning::CodeRegion GlobalCR = -+ CodeRegion(ModuleID, "none", "all", Other); -+ GlobalCR.setHash(llvm::hash_combine(ModuleID, Other)); -+ NewCRs.insert(GlobalCR); -+ LLVM_DEBUG(dbgs() << "Module added as an tuning opportunity: \n" -+ << " Name: " << GlobalCR.getName() << "\n" -+ << " Hash: " << GlobalCR.getHash() << "\n" -+ << "\n"); -+ } -+ -+ // Include LLVMParam as an tuning opportunity only if it is specified with -+ // -auto-tuning-type-filter. -+ if (std::find(AutotuningOutputFilter.begin(), AutotuningOutputFilter.end(), -+ LLVMParam) != AutotuningOutputFilter.end()) -+ NewCRs.insert(CodeRegion(ModuleID, "none", "none", LLVMParam)); -+ -+ if (std::find(AutotuningOutputFilter.begin(), AutotuningOutputFilter.end(), -+ ProgramParam) != AutotuningOutputFilter.end()) -+ NewCRs.insert(CodeRegion(ModuleID, "none", "none", ProgramParam)); -+ -+ CRs = NewCRs; -+} -+ -+bool AutoTuningEngine::applyFunctionFilter(std::string FuncName) { -+ if (AutotuningFunctionFilter.getNumOccurrences() == 0) -+ return true; -+ -+ for (std::string FunctionFilter : AutotuningFunctionFilter) -+ if (FuncName == FunctionFilter) -+ return true; -+ -+ return false; -+} -+ -+void AutoTuningEngine::initContainer(Container *Container, -+ const std::string &PassName, -+ const StringRef FuncName, -+ bool AddOpportunity, -+ unsigned int Invocation) { -+ if (Enabled) { -+ if (!isTuningAllowedForType(convertPassToType(PassName)) && -+ !(isGenerateOutput() && -+ AutotuningOutputFilter.getNumOccurrences() == 0)) -+ return; -+ -+ if (!applyFunctionFilter(FuncName.str())) -+ return; -+ -+ // The attributes of a Container could potentially change overtime even with -+ // the same pass if the associated pass is invoked multiple times at -+ // different places in the pipeline. Therefore, we need to initCodeRegion -+ // every time when this function is called to ensure the CodeRegion with the -+ // latest information will be added as tuning opportunities. -+ Container->initCodeRegion(); -+ if (Container->CR.getType() == autotuning::CodeRegionType::Invalid) -+ return; -+ -+ uint64_t hash = Container->computeStructuralHash(); -+ CodeRegion &OppCR = Container->CR; -+ if (GenerateOutput) { -+ if (OppCR.getSize() < SizeThreshold) -+ return; -+ if (ExcludeColdCodeRegion && OppCR.isCold()) { -+ LLVM_DEBUG(dbgs() << "Skip CodeRegion with cold function " -+ << OppCR.getFuncName() << "\n"); -+ return; -+ } -+ if (HotCodeRegionOnly && !OppCR.isHot()) { -+ LLVM_DEBUG(dbgs() << "Skip CodeRegion with " -+ << OppCR.getHotnessAsString() << " function " -+ << OppCR.getFuncName() << "\n"); -+ return; -+ } -+ } -+ OppCR.setPassName(PassName); -+ OppCR.setHash(hash); -+ OppCR.setInvocation(Invocation); -+ OppCR.Initialized = true; -+ if (AddOpportunity) -+ addOpportunity(OppCR); -+ } -+} -+ -+bool AutoTuningEngine::shouldRunOptPass(std::string Filename, -+ std::string Pass) { -+ return OppPassList.count(Filename) ? OppPassList[Filename].count(Pass) -+ : false; -+} -+ -+Error AutoTuningEngine::init(const std::string &Module) { -+ ParseInput = false; -+ if (std::optional MaybePath = -+ llvm::sys::Process::GetEnv("AUTOTUNE_INPUT")) { -+ InputFile = *MaybePath; -+ ParseInput = true; -+ } else if (InputFile.getNumOccurrences() > 0) { -+ ParseInput = true; -+ } -+ -+ GenerateOutput = false; -+ if (OutputOppDir.getNumOccurrences() > 0) -+ GenerateOutput = true; -+ -+ // Invocation of any of the following command line options -+ // (auto-tuning-input and auto-tuning-opp) or env variable -+ // AUTOTUNE_ALL_INPUT can enable auto-tuning mode. -+ if (ParseInput || GenerateOutput) { -+ Enabled = true; -+ // Generate absolute path and remove the base directory (if available). -+ // A relative path will be used as (coarse-grain) code region name. -+ llvm::SmallString<128> ModuleVec = StringRef(Module); -+ llvm::sys::fs::make_absolute(ModuleVec); -+ if (ProjectDir.size() && ModuleVec.startswith(ProjectDir)) -+ ModuleID = ModuleVec.substr(ProjectDir.size()).str(); -+ else -+ ModuleID = std::string(ModuleVec); -+ } -+ -+ // Initialization of map to be used for pass-name to CodeRegionType -+ // conversion. -+ PTTMap = {{"loop-unroll", Loop}, -+ {"loop-vectorize", Loop}, -+ {"inline", CallSite}, -+ {"machine-scheduler", MachineBasicBlock}, -+ {"switch-lowering", Switch}, -+ {"autotuning-dump", Function}}; -+ -+ if (ParseInput) { -+ // Currently we only support yaml format for input. -+ if (Error E = AutoTuningRemarkManager::read(*this, InputFile, "yaml")) { -+ errs() << "Error parsing auto-tuning input.\n"; -+ return E; -+ } else { -+ LLVM_DEBUG(dbgs() << "AutoTuningEngine is initialized.\n" -+ << " Size of ParamTable: " << this->ParamTable.size() -+ << "\n"); -+ if (LLVMParams.size()) -+ LLVM_DEBUG(dbgs() << "AutoTuner: LLVMParams applied."); -+ if (ProgramParams.size()) -+ LLVM_DEBUG(dbgs() << "AutoTuner: ProgramParams applied.\n"); -+ } -+ } -+ -+ for (auto CRType : AutotuningOutputFilter) -+ CodeRegionFilterTypes.insert(CRType); -+ -+ if (GenerateOutput) { -+ switch (AutoTuningCompileMode) { -+ case CoarseGrain: { -+ bool Valid = false; -+ if (AutotuningOutputFilter.getNumOccurrences() > 0) { -+ Valid = true; -+ for (auto CRType : AutotuningOutputFilter) -+ if (CRType != LLVMParam) { -+ Valid = false; -+ break; -+ } -+ } -+ if (!Valid) { -+ AutoTuningCompileMode = Inactive; -+ errs() << "AutoTunerCompile: Code region type filtering does not match" -+ " with incremental compilation option.\n" -+ "Disabling incremental compilation.\n"; -+ } -+ break; -+ } -+ case FineGrain: { -+ bool Valid = false; -+ if (AutotuningOutputFilter.getNumOccurrences() > 0) { -+ Valid = true; -+ for (auto CRType : AutotuningOutputFilter) { -+ if (CRType != Loop && CRType != CallSite && CRType != Function) { -+ Valid = false; -+ break; -+ } -+ } -+ } -+ if (!Valid) { -+ AutoTuningCompileMode = Inactive; -+ errs() << "AutoTunerCompile: Code region type filtering does not match" -+ "with incremental compilation option.\n" -+ "Disabling incremental compilation.\n"; -+ } -+ break; -+ } -+ case Basic: -+ case Inactive: -+ break; -+ default: -+ llvm_unreachable("AutoTuningCompile: Unknown AutoTuner Incremental " -+ "Compilation mode.\n"); -+ } -+ } -+ -+ MLEnabled = (CFGNumber.getNumOccurrences() > 0); -+ if (EnableAutoTuningDump || MLEnabled) -+ DumpEnabled = true; -+ return Error::success(); -+} -+ -+llvm::Expected AutoTuningEngine::getConfigNumber() { -+ if (!isMLEnabled()) { -+ std::string errorMsg = -+ "No Autotuner configuration specified; ML guidance is unavailable."; -+ return createStringError(inconvertibleErrorCode(), errorMsg); -+ } else -+ return CFGNumber; -+} -+ -+Error AutoTuningEngine::finalize() { -+ if (OutputOppDir.getNumOccurrences() > 0) { -+ // Apply filters. -+ applyOppFilters(TuningOpps); -+ if (!TuningOpps.empty()) { -+ if (Error E = AutoTuningRemarkManager::dump( -+ *this, OutputOppDir, OutputFormat, RemarksPasses)) { -+ errs() << "Error generating auto-tuning opportunities.\n"; -+ return E; -+ } -+ } -+ -+ // Clear these two global lists when ending the auto-tuning -+ // in case of redundant information -+ TuningOpps.clear(); -+ } -+ return Error::success(); -+} -+ -+template -+bool AutoTuningEngine::lookUpGlobalParams(const std::string &ParamsName, -+ T &Value) const { -+ bool Found = GlobalParams.findByName(ParamsName, Value); -+ if (Found) { -+ LLVM_DEBUG(dbgs() << "Global Variable " << ParamsName << " is set.\n"); -+ } -+ return Found; -+} -+ -+template bool -+AutoTuningEngine::lookUpGlobalParams(const std::string &ParamsName, -+ int &Value) const; -+template bool -+AutoTuningEngine::lookUpGlobalParams(const std::string &ParamsName, -+ bool &Value) const; -+template bool -+AutoTuningEngine::lookUpGlobalParams(const std::string &ParamsName, -+ std::string &Value) const; -+template bool AutoTuningEngine::lookUpGlobalParams>( -+ const std::string &ParamsName, std::vector &Value) const; -+ -+class AutoTuningEngine Engine; -+ -+} // namespace autotuning -+ -+#endif -diff --git a/llvm/lib/AutoTuner/AutoTuningRemarkManager.cpp b/llvm/lib/AutoTuner/AutoTuningRemarkManager.cpp -new file mode 100644 -index 000000000000..3e0506e534c4 ---- /dev/null -+++ b/llvm/lib/AutoTuner/AutoTuningRemarkManager.cpp -@@ -0,0 +1,299 @@ -+#if defined(ENABLE_AUTOTUNER) -+//===- llvm/AutoTuner/AutoTuningRemarkManager.cpp - Remark Manager --------===// -+// -+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -+// See https://llvm.org/LICENSE.txt for license information. -+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -+// -+//===----------------------------------------------------------------------===// -+// -+// This file contains the implementation of for inputting and outputting remarks -+// for AutoTuning. -+// -+//===----------------------------------------------------------------------===// -+ -+#include "llvm/AutoTuner/AutoTuningRemarkManager.h" -+#include "llvm/ADT/StringRef.h" -+#include "llvm/AutoTuner/AutoTuning.h" -+#include "llvm/AutoTuner/AutoTuningRemarkStreamer.h" -+#include "llvm/IR/DebugInfoMetadata.h" -+#include "llvm/IR/LLVMRemarkStreamer.h" -+#include "llvm/Remarks/Remark.h" -+#include "llvm/Remarks/RemarkFormat.h" -+#include "llvm/Remarks/RemarkParser.h" -+#include "llvm/Remarks/RemarkSerializer.h" -+#include "llvm/Remarks/RemarkStreamer.h" -+#include "llvm/Support/CommandLine.h" -+#include "llvm/Support/Debug.h" -+#include "llvm/Support/FileSystem.h" -+#include "llvm/Support/MemoryBuffer.h" -+#include "llvm/Support/Path.h" -+#include "llvm/Support/ToolOutputFile.h" -+ -+// Enable debug messages for AutoTuner. -+#define DEBUG_TYPE "autotuning" -+ -+using namespace llvm; -+using namespace autotuning; -+ -+// Helper functions. -+namespace { -+// Convert string into CodeRegionType. -+Expected StringToCodeRegionType(const std::string &CRType) { -+ if (CRType == "machine_basic_block") -+ return autotuning::CodeRegionType::MachineBasicBlock; -+ else if (CRType == "loop") -+ return autotuning::CodeRegionType::Loop; -+ else if (CRType == "function") -+ return autotuning::CodeRegionType::Function; -+ else if (CRType == "callsite") -+ return autotuning::CodeRegionType::CallSite; -+ else if (CRType == "llvm-param") -+ return autotuning::CodeRegionType::LLVMParam; -+ else if (CRType == "program-param") -+ return autotuning::CodeRegionType::ProgramParam; -+ else if (CRType == "switch") -+ return autotuning::CodeRegionType::Switch; -+ else if (CRType == "other") -+ return autotuning::CodeRegionType::Other; -+ else -+ return make_error("Unsupported CodeRegionType:" + CRType, -+ inconvertibleErrorCode()); -+} -+ -+// Remark -> autotuning::ParameterManager -+ParameterManager RemarkToParameterManager(const remarks::Remark &Remark) { -+ // Create Parameters from a remark. -+ ParameterManager ParamManager; -+ for (const remarks::Argument &Arg : Remark.Args) { -+ int Value = 0; -+ if (!Arg.Val.getAsInteger(10, Value)) -+ // If no errors -+ ParamManager.add(Arg.Key.str(), Value); -+ else if (Arg.Val == "true") -+ ParamManager.add(Arg.Key.str(), true); -+ else if (Arg.Val == "false") -+ ParamManager.add(Arg.Key.str(), false); -+ // If there is a value of vector type -+ else if (Arg.VectorVal) { -+ std::vector Strings; -+ for (const StringRef &Val : *Arg.VectorVal) { -+ Strings.push_back(Val.str()); -+ } -+ ParamManager.add(Arg.Key.str(), Strings); -+ } else -+ // Add as String Value -+ ParamManager.add(Arg.Key.str(), Arg.Val); -+ } -+ -+ return ParamManager; -+} -+ -+// Remark -> std::unordered_map -+std::unordered_map -+RemarkToStringMap(const remarks::Remark &Remark) { -+ std::unordered_map LLVMParams; -+ for (const remarks::Argument &Arg : Remark.Args) { -+ // Add as String Value -+ LLVMParams[Arg.Key.str()] = Arg.Val.str(); -+ } -+ return LLVMParams; -+} -+ -+// Remark -> autotuning::SourceLocation -+SourceLocation RemarkToSourceLocation(const remarks::Remark &Remark) { -+ SourceLocation Location; -+ if (Remark.Loc) { -+ StringRef File = Remark.Loc->SourceFilePath; -+ unsigned Line = Remark.Loc->SourceLine; -+ unsigned Column = Remark.Loc->SourceColumn; -+ Location = {File.str(), Line, Column}; -+ } -+ return Location; -+} -+ -+// Remark -> autotuning::CodeRegion -+CodeRegion RemarkToCodeRegion(const remarks::Remark &Remark, -+ Expected &Type) { -+ // Create a SourceLocation from a remark. -+ SourceLocation Location = RemarkToSourceLocation(Remark); -+ // Create a CodeRegion from a remark. -+ CodeRegion CR = CodeRegion(Remark.RemarkName.str(), Remark.FunctionName.str(), -+ Remark.PassName.str(), Type.get(), Location); -+ if (Remark.CodeRegionHash) -+ CR.setHash(Remark.CodeRegionHash.value_or(0)); -+ if (Remark.Invocation) -+ CR.setInvocation(Remark.Invocation.value_or(0)); -+ -+ return CR; -+} -+ -+Expected> emitAutoTuningRemarks( -+ const StringRef RemarksFilename, const StringRef RemarksFormat, -+ const StringRef RemarksPasses, const CodeRegions &CRList) { -+ if (RemarksFilename.empty()) -+ return nullptr; -+ // Parse remark format. Options are yaml, yaml-strtab and bitstream. -+ Expected Format = remarks::parseFormat(RemarksFormat); -+ if (Error E = Format.takeError()) -+ return make_error(std::move(E)); -+ -+ std::error_code EC; -+ auto Flags = -+ *Format == remarks::Format::YAML ? sys::fs::OF_Text : sys::fs::OF_None; -+ auto RemarksFile = -+ std::make_unique(RemarksFilename, EC, Flags); -+ if (EC) -+ return make_error(errorCodeToError(EC)); -+ // Create a remark serializer to emit code regions. -+ Expected> RemarkSerializer = -+ remarks::createRemarkSerializer( -+ *Format, remarks::SerializerMode::Separate, RemarksFile->os()); -+ -+ if (Error E = RemarkSerializer.takeError()) -+ return make_error(std::move(E)); -+ // Create remark streamer based on the serializer. -+ remarks::RemarkStreamer RStreamer = -+ remarks::RemarkStreamer(std::move(*RemarkSerializer), RemarksFilename); -+ AutoTuningRemarkStreamer Streamer(RStreamer); -+ -+ if (!RemarksPasses.empty()) -+ if (Error E = Streamer.setFilter(RemarksPasses)) -+ return make_error(std::move(E)); -+ // Emit CodeRegions in Remark format. -+ for (const CodeRegion &CR : CRList) { -+ Streamer.emit(CR); -+ } -+ return std::move(RemarksFile); -+} -+} // namespace -+ -+llvm::Error AutoTuningRemarkManager::read(AutoTuningEngine &E, -+ const std::string &InputFileName, -+ const std::string &RemarksFormat) { -+ ErrorOr> Buf = -+ MemoryBuffer::getFile(InputFileName.c_str()); -+ if (std::error_code EC = Buf.getError()) -+ return make_error( -+ "Can't open file " + InputFileName + ": " + EC.message(), EC); -+ // Parse remark format. Options are yaml, yaml-strtab and bitstream. -+ Expected Format = remarks::parseFormat(RemarksFormat); -+ if (!Format) -+ return Format.takeError(); -+ -+ Expected> MaybeParser = -+ remarks::createRemarkParserFromMeta(*Format, (*Buf)->getBuffer()); -+ if (!MaybeParser) { -+ return MaybeParser.takeError(); -+ } -+ remarks::RemarkParser &Parser = **MaybeParser; -+ -+ while (true) { -+ Expected> MaybeRemark = Parser.next(); -+ if (!MaybeRemark) { -+ Error E = MaybeRemark.takeError(); -+ if (E.isA()) { -+ // EOF. -+ consumeError(std::move(E)); -+ break; -+ } -+ return E; -+ } -+ const remarks::Remark &Remark = **MaybeRemark; -+ -+ if (Remark.RemarkType != remarks::Type::AutoTuning) -+ continue; -+ -+ if (!Remark.CodeRegionType) -+ return make_error("CodeRegionType field is missing.", -+ inconvertibleErrorCode()); -+ Expected Type = -+ StringToCodeRegionType((*Remark.CodeRegionType).str()); -+ if (!Type) -+ return Type.takeError(); -+ CodeRegionType CRType = Type.get(); -+ // If CodeRegionType is Other, this remark corresponds to global -+ // parameters, and no need to create a CodeRegion object. Check if the -+ // Remark of global parameters is for the current Module. -+ if (CRType == autotuning::Other && Remark.RemarkName == Engine.ModuleID) { -+ Engine.GlobalParams = RemarkToParameterManager(Remark); -+ continue; -+ } -+ if (CRType == autotuning::LLVMParam && -+ Remark.RemarkName == Engine.ModuleID) { -+ Engine.LLVMParams = RemarkToStringMap(Remark); -+ continue; -+ } -+ if (CRType == autotuning::ProgramParam && -+ Remark.RemarkName == Engine.ModuleID) { -+ Engine.ProgramParams = RemarkToStringMap(Remark); -+ continue; -+ } -+ if (Engine.isThinLTOTuning() && -+ (CRType == autotuning::CallSite || CRType == autotuning::Loop || -+ CRType == autotuning::MachineBasicBlock || -+ CRType == autotuning::Function)) { -+ LLVM_DEBUG(dbgs() << "AutoTuner does not support tuning of " -+ << CodeRegion::getTypeAsString(CRType) -+ << " for thinLTO durning link-time optimization. " -+ "Ignoring current code region.\n"); -+ continue; -+ } -+ // Create a SourceLocation from a remark. -+ CodeRegion CR = RemarkToCodeRegion(Remark, Type); -+ ParameterManager ParamManager = RemarkToParameterManager(Remark); -+ // Add the CodeRegion-ParameterManager entry into LoopUpTable. -+ Engine.ParamTable[CR] = ParamManager; -+ -+ std::string Filename = CR.getSourceLoc().SourceFilePath; -+ size_t Pos = Filename.rfind("."); -+ if (Pos != std::string::npos) -+ Filename.erase(Pos, Filename.size()); -+ Engine.OppPassList[Filename].insert(CR.getPassName()); -+ Engine.CodeRegionFilterTypes.insert(CR.getType()); -+ } -+ return Error::success(); -+} -+ -+Error AutoTuningRemarkManager::dump(const autotuning::AutoTuningEngine &E, -+ const std::string &DirName, -+ const std::string &RemarksFormat, -+ const std::string &RemarksPasses) { -+ // Change to absolute path. -+ SmallString<256> OutputPath = StringRef(DirName); -+ sys::fs::make_absolute(OutputPath); -+ -+ // Make sure the new output directory exists, creating it if necessary. -+ if (std::error_code EC = sys::fs::create_directories(OutputPath)) { -+ return make_error("could not create directory: " + -+ Twine(OutputPath) + ": " + EC.message(), -+ EC); -+ } -+ if (!Engine.TuningOpps.empty()) { -+ StringRef ModelFileName = sys::path::filename(Engine.ModuleID); -+ sys::path::append(OutputPath, ModelFileName + "." + RemarksFormat); -+ -+ int i = 1; // Output file suffix starts from 1. -+ // Check all exiting xml files xml.1...i and create a new file -+ // suffix.(i+1). -+ while (sys::fs::exists(OutputPath)) { -+ sys::path::remove_filename(OutputPath); -+ sys::path::append(OutputPath, -+ ModelFileName + "." + RemarksFormat + "." + Twine(i)); -+ i += 1; -+ } -+ Expected> RemarksFileOrErr = -+ emitAutoTuningRemarks(OutputPath, RemarksFormat, RemarksPasses, -+ Engine.TuningOpps); -+ if (Error E = RemarksFileOrErr.takeError()) { -+ return E; -+ } -+ -+ std::unique_ptr RemarksFile = std::move(*RemarksFileOrErr); -+ if (RemarksFile) -+ RemarksFile->keep(); -+ } -+ return Error::success(); -+} -+ -+#endif -diff --git a/llvm/lib/AutoTuner/AutoTuningRemarkStreamer.cpp b/llvm/lib/AutoTuner/AutoTuningRemarkStreamer.cpp -new file mode 100644 -index 000000000000..0516c055a139 ---- /dev/null -+++ b/llvm/lib/AutoTuner/AutoTuningRemarkStreamer.cpp -@@ -0,0 +1,55 @@ -+#if defined(ENABLE_AUTOTUNER) -+// ===---------- llvm/AutoTuner/AutoTuningRemarkStreamer.cpp --------------===// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+// Copyright (C) 2017-2022, Huawei Technologies Co., Ltd. All rights reserved. -+// -+// ===---------------------------------------------------------------------===// -+// -+// This file contains the implementation of the conversion between AutoTuner -+// CodeRegions and serializable remarks::Remark objects. -+// -+// ===---------------------------------------------------------------------===// -+ -+#include "llvm/AutoTuner/AutoTuningRemarkStreamer.h" -+ -+using namespace llvm; -+ -+// autotuning::CodeRegion -> Remark -+remarks::Remark -+AutoTuningRemarkStreamer::toRemark(const autotuning::CodeRegion &CR) { -+ remarks::Remark R; // The result. -+ R.RemarkType = remarks::Type::AutoTuning; -+ R.PassName = CR.getPassName(); -+ R.RemarkName = CR.getName(); -+ R.FunctionName = CR.getFuncName(); -+ const autotuning::SourceLocation &Location = CR.getSourceLoc(); -+ if (Location) -+ R.Loc = remarks::RemarkLocation{Location.SourceFilePath, -+ Location.SourceLine, Location.SourceColumn}; -+ R.CodeRegionType = CR.getTypeAsString(); -+ R.CodeRegionHash = CR.getHash(); -+ R.AutoTunerOptions = CR.getAutoTunerOptions(); -+ R.Invocation = CR.getInvocation(); -+ R.BaselineConfig = CR.getBaselineConfig(); -+ return R; -+} -+ -+void AutoTuningRemarkStreamer::emit(const autotuning::CodeRegion &CR) { -+ if (!RS.matchesFilter(CR.getPassName())) -+ return; -+ -+ // First, convert the code region to a remark. -+ remarks::Remark R = toRemark(CR); -+ // Then, emit the remark through the serializer. -+ RS.getSerializer().emit(R); -+} -+ -+Error AutoTuningRemarkStreamer::setFilter(StringRef Filter) { -+ return RS.setFilter(Filter); -+} -+#endif -diff --git a/llvm/lib/AutoTuner/CMakeLists.txt b/llvm/lib/AutoTuner/CMakeLists.txt -new file mode 100644 -index 000000000000..c618474fe5ae ---- /dev/null -+++ b/llvm/lib/AutoTuner/CMakeLists.txt -@@ -0,0 +1,11 @@ -+add_llvm_component_library(LLVMAutoTuner -+ AutoTuning.cpp -+ AutoTuningRemarkManager.cpp -+ AutoTuningRemarkStreamer.cpp -+ -+ ADDITIONAL_HEADER_DIRS -+ ${LLVM_MAIN_INCLUDE_DIR}/llvm/AutoTuner -+ -+ LINK_COMPONENTS -+ Remarks -+) -\ No newline at end of file -diff --git a/llvm/lib/CMakeLists.txt b/llvm/lib/CMakeLists.txt -index 283baa6090eb..966137c0f71f 100644 ---- a/llvm/lib/CMakeLists.txt -+++ b/llvm/lib/CMakeLists.txt -@@ -28,6 +28,7 @@ add_subdirectory(Object) - add_subdirectory(ObjectYAML) - add_subdirectory(Option) - add_subdirectory(Remarks) -+add_subdirectory(AutoTuner) - add_subdirectory(Debuginfod) - add_subdirectory(DebugInfo) - add_subdirectory(DWP) -diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt -index 106571b9061b..9029dc7bb3d9 100644 ---- a/llvm/lib/CodeGen/CMakeLists.txt -+++ b/llvm/lib/CodeGen/CMakeLists.txt -@@ -273,6 +273,7 @@ add_llvm_component_library(LLVMCodeGen - - LINK_COMPONENTS - Analysis -+ AutoTuner - BitReader - BitWriter - CodeGenTypes -diff --git a/llvm/lib/CodeGen/CalcSpillWeights.cpp b/llvm/lib/CodeGen/CalcSpillWeights.cpp -index 5a005ba7b414..9dcb3833ab91 100644 ---- a/llvm/lib/CodeGen/CalcSpillWeights.cpp -+++ b/llvm/lib/CodeGen/CalcSpillWeights.cpp -@@ -29,6 +29,24 @@ using namespace llvm; - - #define DEBUG_TYPE "calcspillweights" - -+#if defined(ENABLE_AUTOTUNER) -+static cl::opt LoopWeight( -+ "reg-spill-loop-weight", cl::Hidden, -+ cl::desc( -+ "Tunable extra weight to what looks like a loop induction variable"), -+ cl::init(3)); -+ -+static cl::opt RemaWeight( -+ "reg-spill-rematerialize-weight", cl::Hidden, -+ cl::desc("Tunable reduced weight giving re-materialize oppotunities"), -+ cl::init(0.5f)); -+ -+static cl::opt -+ HintWeight("reg-spill-hint-weight", cl::Hidden, -+ cl::desc("Tunable weakly boost weight of hinted registers"), -+ cl::init(1.01f)); -+#endif -+ - void VirtRegAuxInfo::calculateSpillWeightsAndHints() { - LLVM_DEBUG(dbgs() << "********** Compute Spill Weights **********\n" - << "********** Function: " << MF.getName() << '\n'); -@@ -252,7 +270,11 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &LI, SlotIndex *Start, - - // Give extra weight to what looks like a loop induction variable update. - if (Writes && IsExiting && LIS.isLiveOutOfMBB(LI, MBB)) -+#if defined(ENABLE_AUTOTUNER) -+ Weight *= LoopWeight; -+#else - Weight *= 3; -+#endif - - TotalWeight += Weight; - } -@@ -288,7 +310,11 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &LI, SlotIndex *Start, - } - - // Weakly boost the spill weight of hinted registers. -+#if defined(ENABLE_AUTOTUNER) -+ TotalWeight *= HintWeight; -+#else - TotalWeight *= 1.01F; -+#endif - } - - // If the live interval was already unspillable, leave it that way. -@@ -315,7 +341,11 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &LI, SlotIndex *Start, - // FIXME: this gets much more complicated once we support non-trivial - // re-materialization. - if (isRematerializable(LI, LIS, VRM, *MF.getSubtarget().getInstrInfo())) -+#if defined(ENABLE_AUTOTUNER) -+ TotalWeight *= RemaWeight; -+#else - TotalWeight *= 0.5F; -+#endif - - if (IsLocalSplitArtifact) - return normalize(TotalWeight, Start->distance(*End), NumInstr); -diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp -index 231544494c32..327cd40f86a4 100644 ---- a/llvm/lib/CodeGen/MachineBasicBlock.cpp -+++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp -@@ -37,6 +37,9 @@ - #include "llvm/Support/raw_ostream.h" - #include "llvm/Target/TargetMachine.h" - #include -+#if defined(ENABLE_AUTOTUNER) -+#include "llvm/IR/StructuralHash.h" -+#endif - #include - using namespace llvm; - -@@ -1703,6 +1706,39 @@ MachineBasicBlock::livein_iterator MachineBasicBlock::livein_begin() const { - return LiveIns.begin(); - } - -+#if defined(ENABLE_AUTOTUNER) -+uint64_t MachineBasicBlock::computeStructuralHash() { -+ return StructuralHash(*this); -+} -+ -+void MachineBasicBlock::initCodeRegion() { -+ std::string BasicBlockName = -+ ("%bb." + Twine(this->getNumber()) + ":" + this->getName()).str(); -+ MachineFunction *MF = this->getParent(); -+ StringRef FuncName = MF->getName(); -+ -+ autotuning::CodeRegion CR; -+ if (!this->empty()) { -+ const DebugLoc &StartLoc = this->front().getDebugLoc(); -+ CR = autotuning::CodeRegion(BasicBlockName, FuncName.data(), -+ autotuning::CodeRegionType::MachineBasicBlock, -+ StartLoc); -+ } else { -+ CR = autotuning::CodeRegion(BasicBlockName, FuncName.data(), -+ autotuning::CodeRegionType::MachineBasicBlock); -+ } -+ // Compute the number of non-debug IR instructions in this MBB. -+ unsigned NumInstrs = std::distance(this->getFirstNonDebugInstr(), -+ this->getLastNonDebugInstr()); -+ CR.setSize(NumInstrs); -+ // Compute hotness. -+ autotuning::HotnessType Hotness = MF->getFunction().ATEFunction.getHotness(); -+ CR.setHotness(Hotness); -+ -+ this->setCodeRegion(CR); -+} -+#endif -+ - MachineBasicBlock::liveout_iterator MachineBasicBlock::liveout_begin() const { - const MachineFunction &MF = *getParent(); - assert(MF.getProperties().hasProperty( -diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp -index ba5432459d12..caccc9e5fad4 100644 ---- a/llvm/lib/CodeGen/MachineScheduler.cpp -+++ b/llvm/lib/CodeGen/MachineScheduler.cpp -@@ -569,6 +569,12 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler, - for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end(); - MBB != MBBEnd; ++MBB) { - -+#if defined(ENABLE_AUTOTUNER) -+ // before visiting this MBB -+ // if AutoTuning is enabled, initialize this MBB for auto-tuning -+ autotuning::Engine.initContainer(&*MBB, DEBUG_TYPE); -+#endif -+ - Scheduler.startBlock(&*MBB); - - #ifndef NDEBUG -@@ -3244,6 +3250,44 @@ void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin, - RegionPolicy.ShouldTrackLaneMasks = false; - } - -+#if defined(ENABLE_AUTOTUNER) -+ // AUTO-TUNING - Look up for MMB level scheduling direction if AutoTuning is -+ // enabled -+ if (autotuning::Engine.isEnabled()) { -+ MachineBasicBlock &MBB = *Begin->getParent(); -+ -+ bool NewForceBottomUp = false; -+ // Look up from xml file, and overwrite values -+ bool IsForceBottomUpSet = -+ MBB.lookUpParams("ForceBottomUp", NewForceBottomUp); -+ -+ bool NewForceForceTopDown = false; -+ bool IsForceTopDownSet = -+ MBB.lookUpParams("ForceTopDown", NewForceForceTopDown); -+ -+ assert((!NewForceBottomUp || !NewForceForceTopDown) && -+ "BottomUp and TopDown cannot both set to true"); -+ -+ if (IsForceBottomUpSet) { -+ RegionPolicy.OnlyBottomUp = NewForceBottomUp; -+ if (RegionPolicy.OnlyBottomUp) { -+ RegionPolicy.OnlyTopDown = false; -+ } -+ } -+ -+ if (IsForceTopDownSet) { -+ RegionPolicy.OnlyTopDown = NewForceForceTopDown; -+ if (RegionPolicy.OnlyTopDown) { -+ RegionPolicy.OnlyBottomUp = false; -+ } -+ } -+ -+ if (IsForceBottomUpSet || IsForceTopDownSet) { -+ return; -+ } -+ } -+#endif -+ - // Check -misched-topdown/bottomup can force or unforce scheduling direction. - // e.g. -misched-bottomup=false allows scheduling in both directions. - assert((!ForceTopDown || !ForceBottomUp) && -diff --git a/llvm/lib/CodeGen/SwitchLoweringUtils.cpp b/llvm/lib/CodeGen/SwitchLoweringUtils.cpp -index 36a02d5beb4b..d4ac95d534ed 100644 ---- a/llvm/lib/CodeGen/SwitchLoweringUtils.cpp -+++ b/llvm/lib/CodeGen/SwitchLoweringUtils.cpp -@@ -16,6 +16,9 @@ - #include "llvm/CodeGen/MachineJumpTableInfo.h" - #include "llvm/CodeGen/TargetLowering.h" - #include "llvm/Target/TargetMachine.h" -+#if defined(ENABLE_AUTOTUNER) -+#include "llvm/AutoTuner/AutoTuning.h" -+#endif - - using namespace llvm; - using namespace SwitchCG; -@@ -61,7 +64,23 @@ void SwitchCG::SwitchLowering::findJumpTables(CaseClusterVector &Clusters, - if (!TLI->areJTsAllowed(SI->getParent()->getParent())) - return; - -+#if defined(ENABLE_AUTOTUNER) -+ unsigned MinJumpTableEntries = TLI->getMinimumJumpTableEntries(); -+ // Overwrite MinJumpTableEntries when it is set by Autotuner -+ if (autotuning::Engine.isEnabled()) { -+ autotuning::Engine.initContainer(SI->ATESwitchInst.get(), -+ "switch-lowering"); -+ -+ int NewValue = 0; // the int value is set by lookUpParams() -+ bool Changed = -+ SI->ATESwitchInst->lookUpParams("MinJumpTableEntries", NewValue); -+ if (Changed) -+ MinJumpTableEntries = NewValue; -+ } -+#else - const unsigned MinJumpTableEntries = TLI->getMinimumJumpTableEntries(); -+#endif -+ - const unsigned SmallNumberOfEntries = MinJumpTableEntries / 2; - - // Bail if not enough cases. -diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp -index df753b91ff90..af77e6c2dc4d 100644 ---- a/llvm/lib/IR/AsmWriter.cpp -+++ b/llvm/lib/IR/AsmWriter.cpp -@@ -2602,11 +2602,21 @@ public: - void writeAllAttributeGroups(); - - void printTypeIdentities(); -+#if defined(ENABLE_AUTOTUNER) -+ void printGlobal(const GlobalVariable *GV, bool PrintDeclarationOnly = false); -+ void printAlias(const GlobalAlias *GA); -+ void printIFunc(const GlobalIFunc *GI); -+ void printComdat(const Comdat *C); -+ void printRequisiteDeclarations(const Function *F); -+ void printFunction(const Function *F, bool PrintCompleteIR = false, -+ bool PrintDeclarationOnly = false); -+#else - void printGlobal(const GlobalVariable *GV); - void printAlias(const GlobalAlias *GA); - void printIFunc(const GlobalIFunc *GI); - void printComdat(const Comdat *C); - void printFunction(const Function *F); -+#endif - void printArgument(const Argument *FA, AttributeSet Attrs); - void printBasicBlock(const BasicBlock *BB); - void printInstructionLine(const Instruction &I); -@@ -3593,15 +3603,26 @@ static void maybePrintComdat(formatted_raw_ostream &Out, - Out << ')'; - } - -+#if defined(ENABLE_AUTOTUNER) -+void AssemblyWriter::printGlobal(const GlobalVariable *GV, -+ bool PrintDeclarationOnly) { -+ if (GV->isMaterializable() && !PrintDeclarationOnly) -+#else - void AssemblyWriter::printGlobal(const GlobalVariable *GV) { - if (GV->isMaterializable()) -+#endif - Out << "; Materializable\n"; - - AsmWriterContext WriterCtx(&TypePrinter, &Machine, GV->getParent()); - WriteAsOperandInternal(Out, GV, WriterCtx); - Out << " = "; - -+#if defined(ENABLE_AUTOTUNER) -+ if ((!GV->hasInitializer() || PrintDeclarationOnly) && -+ GV->hasExternalLinkage()) -+#else - if (!GV->hasInitializer() && GV->hasExternalLinkage()) -+#endif - Out << "external "; - - Out << getLinkageNameWithSpace(GV->getLinkage()); -@@ -3619,7 +3640,11 @@ void AssemblyWriter::printGlobal(const GlobalVariable *GV) { - Out << (GV->isConstant() ? "constant " : "global "); - TypePrinter.print(GV->getValueType(), Out); - -+#if defined(ENABLE_AUTOTUNER) -+ if (GV->hasInitializer() && !PrintDeclarationOnly) { -+#else - if (GV->hasInitializer()) { -+#endif - Out << ' '; - writeOperand(GV->getInitializer(), false); - } -@@ -3769,12 +3794,102 @@ void AssemblyWriter::printTypeIdentities() { - } - } - -+#if defined(ENABLE_AUTOTUNER) -+/// printRequisiteDeclarations - Print the declarations of type identities, -+/// global variables, functions, and function attribute groups of a function. -+void AssemblyWriter::printRequisiteDeclarations(const Function *F) { -+ // walk through instructions and collect global variables & functions -+ SmallPtrSet GVs; -+ SmallPtrSet Functions; -+ for (const BasicBlock &BB : *F) { -+ for (const Instruction &I : BB) { -+ // Check for function -+ if (const auto *CI = dyn_cast(&I)) { -+ Function *func = CI->getCalledFunction(); -+ if (func) -+ Functions.insert(func); -+ } -+ // Check for global variables -+ for (const Use &U : I.operands()) { -+ if (GlobalVariable *gv = dyn_cast(U)) -+ GVs.insert(gv); -+ if (GEPOperator *gepo = dyn_cast(&U)) { -+ if (GlobalVariable *gv = -+ dyn_cast(gepo->getPointerOperand())) -+ GVs.insert(gv); -+ for (auto it = gepo->idx_begin(), et = gepo->idx_end(); it != et; -+ ++it) { -+ if (GlobalVariable *gv = dyn_cast(*it)) -+ GVs.insert(gv); -+ } -+ } -+ } -+ } -+ } -+ -+ // print type identities -+ printTypeIdentities(); -+ -+ // print global variables -+ if (!GVs.empty()) { -+ Out << '\n'; -+ for (auto GVit = GVs.begin(), et = GVs.end(); GVit != et; ++GVit) { -+ // Make backups of some properties. They may be modified for printing. -+ GlobalValue::LinkageTypes SavedLinkage = (*GVit)->getLinkage(); -+ GlobalVariable::VisibilityTypes SavedVisibility = -+ (*GVit)->getVisibility(); -+ -+ // modify property if needed -+ if (!(*GVit)->hasAvailableExternallyLinkage() && -+ !((*GVit)->getName() == "llvm.global_ctors") && -+ (*GVit)->hasLocalLinkage()) { -+ (*GVit)->setLinkage(GlobalValue::ExternalLinkage); -+ (*GVit)->setVisibility(GlobalValue::HiddenVisibility); -+ } -+ -+ printGlobal(*GVit, true); -+ Out << '\n'; -+ -+ // restore backups -+ (*GVit)->setLinkage(SavedLinkage); -+ (*GVit)->setVisibility(SavedVisibility); -+ } -+ Out << '\n'; -+ } -+ -+ // print functions -+ for (auto FuncIt = Functions.begin(), et = Functions.end(); FuncIt != et; -+ ++FuncIt) { -+ Out << '\n'; -+ printFunction(*FuncIt, false, true); -+ } -+ -+ // Write attribute groups. -+ if (!Machine.as_empty()) { -+ Out << '\n'; -+ writeAllAttributeGroups(); -+ } -+ Out << '\n'; -+} -+ - /// printFunction - Print all aspects of a function. -+void AssemblyWriter::printFunction(const Function *F, bool PrintCompleteIR, -+ bool PrintDeclarationOnly) { -+ if (PrintCompleteIR && !PrintDeclarationOnly) { -+ printRequisiteDeclarations(F); -+ } -+ if (AnnotationWriter && !PrintDeclarationOnly) -+ AnnotationWriter->emitFunctionAnnot(F, Out); -+ -+ if (F->isMaterializable() && !PrintDeclarationOnly) -+ Out << "; Materializable\n"; -+#else - void AssemblyWriter::printFunction(const Function *F) { - if (AnnotationWriter) AnnotationWriter->emitFunctionAnnot(F, Out); - - if (F->isMaterializable()) - Out << "; Materializable\n"; -+#endif - - const AttributeList &Attrs = F->getAttributes(); - if (Attrs.hasFnAttrs()) { -@@ -3792,6 +3907,18 @@ void AssemblyWriter::printFunction(const Function *F) { - Out << "; Function Attrs: " << AttrStr << '\n'; - } - -+#if defined(ENABLE_AUTOTUNER) -+ if (!PrintDeclarationOnly) -+ Machine.incorporateFunction(F); -+ -+ if (F->isDeclaration() || PrintDeclarationOnly) { -+ Out << "declare"; -+ if (!PrintDeclarationOnly) { -+ SmallVector, 4> MDs; -+ F->getAllMetadata(MDs); -+ printMetadataAttachments(MDs, " "); -+ } -+#else - Machine.incorporateFunction(F); - - if (F->isDeclaration()) { -@@ -3799,6 +3926,7 @@ void AssemblyWriter::printFunction(const Function *F) { - SmallVector, 4> MDs; - F->getAllMetadata(MDs); - printMetadataAttachments(MDs, " "); -+#endif - Out << ' '; - } else - Out << "define "; -@@ -3824,7 +3952,11 @@ void AssemblyWriter::printFunction(const Function *F) { - Out << '('; - - // Loop over the arguments, printing them... -+#if defined(ENABLE_AUTOTUNER) -+ if ((F->isDeclaration() && !IsForDebug) || PrintDeclarationOnly) { -+#else - if (F->isDeclaration() && !IsForDebug) { -+#endif - // We're only interested in the type here - don't print argument names. - for (unsigned I = 0, E = FT->getNumParams(); I != E; ++I) { - // Insert commas as we go... the first arg doesn't get a comma -@@ -3895,7 +4027,11 @@ void AssemblyWriter::printFunction(const Function *F) { - writeOperand(F->getPersonalityFn(), /*PrintType=*/true); - } - -+#if defined(ENABLE_AUTOTUNER) -+ if (F->isDeclaration() || PrintDeclarationOnly) { -+#else - if (F->isDeclaration()) { -+#endif - Out << '\n'; - } else { - SmallVector, 4> MDs; -@@ -3913,6 +4049,13 @@ void AssemblyWriter::printFunction(const Function *F) { - Out << "}\n"; - } - -+#if defined(ENABLE_AUTOTUNER) -+ // Output metadata -+ if (!Machine.mdn_empty() && PrintCompleteIR && !PrintDeclarationOnly) { -+ Out << '\n'; -+ writeAllMDNodes(); -+ } -+#endif - Machine.purgeFunction(); - } - -@@ -4591,13 +4734,21 @@ void AssemblyWriter::printUseLists(const Function *F) { - - void Function::print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW, - bool ShouldPreserveUseListOrder, -+#if defined(ENABLE_AUTOTUNER) -+ bool IsForDebug, bool PrintCompleteIR) const { -+#else - bool IsForDebug) const { -+#endif - SlotTracker SlotTable(this->getParent()); - formatted_raw_ostream OS(ROS); - AssemblyWriter W(OS, SlotTable, this->getParent(), AAW, - IsForDebug, - ShouldPreserveUseListOrder); -+#if defined(ENABLE_AUTOTUNER) -+ W.printFunction(this, PrintCompleteIR); -+#else - W.printFunction(this); -+#endif - } - - void BasicBlock::print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW, -diff --git a/llvm/lib/IR/CMakeLists.txt b/llvm/lib/IR/CMakeLists.txt -index 217fe703dd4e..d44d1eea9f3e 100644 ---- a/llvm/lib/IR/CMakeLists.txt -+++ b/llvm/lib/IR/CMakeLists.txt -@@ -78,6 +78,7 @@ add_llvm_component_library(LLVMCore - intrinsics_gen - - LINK_COMPONENTS -+ AutoTuner - BinaryFormat - Demangle - Remarks -diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp -index 435800d9e5f9..ec2620efac38 100644 ---- a/llvm/lib/IR/Function.cpp -+++ b/llvm/lib/IR/Function.cpp -@@ -70,6 +70,10 @@ - #include - #include - -+#if defined(ENABLE_AUTOTUNER) -+#include "llvm/IR/StructuralHash.h" -+#endif -+ - using namespace llvm; - using ProfileCount = Function::ProfileCount; - -@@ -1977,6 +1981,36 @@ std::optional Function::getSectionPrefix() const { - return std::nullopt; - } - -+#if defined(ENABLE_AUTOTUNER) -+uint64_t AutoTuningEnabledFunction::computeStructuralHash() { -+ return StructuralHash(*(this->Func)); -+} -+ -+void AutoTuningEnabledFunction::initCodeRegion() { -+ StringRef FuncName = Func->getName(); -+ StringRef EntryBBName; -+ autotuning::SourceLocation Loc; -+ -+ if (!Func->empty()) -+ EntryBBName = Func->front().getName(); -+ else -+ EntryBBName = StringRef("None"); -+ -+ DISubprogram *SubProgram = Func->getSubprogram(); -+ if (SubProgram) -+ // Set the column number to 0 because there is no information about -+ // column number for functions. -+ Loc = {SubProgram->getFilename().str(), SubProgram->getLine(), 0}; -+ -+ autotuning::CodeRegion CR = -+ autotuning::CodeRegion(EntryBBName.data(), FuncName.data(), -+ autotuning::CodeRegionType::Function, Loc); -+ CR.setSize(Func->getInstructionCount()); -+ CR.setHotness(this->getHotness()); -+ this->setCodeRegion(CR); -+} -+#endif -+ - bool Function::nullPointerIsDefined() const { - return hasFnAttribute(Attribute::NullPointerIsValid); - } -diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp -index cb0ac0f8eae6..e614285df07a 100644 ---- a/llvm/lib/IR/Instructions.cpp -+++ b/llvm/lib/IR/Instructions.cpp -@@ -45,6 +45,9 @@ - #include - #include - #include -+#if defined(ENABLE_AUTOTUNER) -+#include "llvm/IR/StructuralHash.h" -+#endif - - using namespace llvm; - -@@ -259,6 +262,89 @@ void LandingPadInst::addClause(Constant *Val) { - getOperandList()[OpNo] = Val; - } - -+#if defined(ENABLE_AUTOTUNER) -+uint64_t AutoTuningEnabledSwitchInst::computeStructuralHash() { -+ return StructuralHash(*(this->SI)); -+} -+ -+void AutoTuningEnabledSwitchInst::initCodeRegion() { -+ std::string SwitchName; -+ if (this->SI->hasName()) { -+ SwitchName = this->SI->getName().str(); -+ } else { -+ std::string Str; -+ llvm::raw_string_ostream RSO(Str); -+ this->SI->getCondition()->printAsOperand(RSO); -+ SwitchName = RSO.str(); -+ } -+ -+ autotuning::CodeRegion CR = autotuning::CodeRegion( -+ SwitchName, this->SI->getFunction()->getName().str(), -+ autotuning::CodeRegionType::Switch, this->SI->getDebugLoc()); -+ -+ unsigned TotalNumInsts = 0; -+ for (auto Case : SI->cases()) { -+ const BasicBlock *BB = Case.getCaseSuccessor(); -+ unsigned NumInsts = std::distance(BB->instructionsWithoutDebug().begin(), -+ BB->instructionsWithoutDebug().end()); -+ TotalNumInsts += NumInsts; -+ } -+ -+ CR.setSize(TotalNumInsts); -+ // Compute hotness. -+ autotuning::HotnessType Hotness = -+ this->SI->getFunction()->ATEFunction.getHotness(); -+ CR.setHotness(Hotness); -+ -+ this->setCodeRegion(CR); -+} -+ -+uint64_t AutoTuningEnabledCallSite::computeStructuralHash() { -+ return StructuralHash(*(this->CB)); -+} -+ -+void AutoTuningEnabledCallSite::initCodeRegion() { -+ // Use Caller's name as FuncName and Callee's name as Name of a CodeRegion. -+ Function *Caller = this->CB->getCaller(); -+ Function *Callee = this->CB->getCalledFunction(); -+ if (Caller == nullptr || Callee == nullptr) { -+ this->setCodeRegion(autotuning::CodeRegion::getInvalidInstance()); -+ return; -+ } -+ -+ autotuning::SourceLocation SrcLoc; -+ if (this->CB->getDebugLoc()) { -+ unsigned int SourceLine = this->CB->getDebugLoc()->getLine(); -+ // Get modified source line number for current callsite if there is another -+ // call instruction (to same callee) which has same source line number -+ // happened due to inlining. -+ std::optional LineNum = autotuning::Engine.getCallSiteLoc(CB); -+ if (LineNum) -+ SourceLine = *LineNum; -+ SrcLoc = autotuning::SourceLocation{ -+ this->CB->getDebugLoc()->getFilename().str(), SourceLine, -+ this->CB->getDebugLoc()->getColumn()}; -+ } -+ -+ // We are using DebugLoc to distinguish between multiple calls to the same -+ // callee in a function. It may be possible that these multiple calls have -+ // same DebugLoc either 1) due to inlining of multiple calls (same callee) -+ // and callee having more calls, or 2) cloned calls added by previous -+ // optimizations. We are using 'callee name + it's parent (basic block) name' -+ // to solve these problems. Additionally we are using modified line number -+ // for the issue # 1; this will handle the cases where the multiple calls are -+ // in the same basic block. -+ autotuning::CodeRegion CR = autotuning::CodeRegion( -+ Callee->getName().str() + "-" + this->CB->getParent()->getName().str(), -+ Caller->getName().data(), autotuning::CodeRegionType::CallSite, SrcLoc, -+ autotuning::DynamicOptions{{"ForceInline", {0, 1}}}); -+ -+ CR.setSize(Callee->getInstructionCount()); -+ CR.setHotness(Caller->ATEFunction.getHotness()); -+ this->setCodeRegion(CR); -+} -+#endif -+ - //===----------------------------------------------------------------------===// - // CallBase Implementation - //===----------------------------------------------------------------------===// -diff --git a/llvm/lib/IR/StructuralHash.cpp b/llvm/lib/IR/StructuralHash.cpp -index 6ea108d831a1..1583e1c82b3e 100644 ---- a/llvm/lib/IR/StructuralHash.cpp -+++ b/llvm/lib/IR/StructuralHash.cpp -@@ -10,9 +10,23 @@ - #include "llvm/IR/Function.h" - #include "llvm/IR/GlobalVariable.h" - #include "llvm/IR/Module.h" -+#if defined(ENABLE_AUTOTUNER) -+#include "llvm/CodeGen/MachineBasicBlock.h" -+#include "llvm/IR/InstrTypes.h" -+#include "llvm/IR/Instructions.h" -+#include "llvm/Support/CommandLine.h" -+#endif - - using namespace llvm; - -+#if defined(ENABLE_AUTOTUNER) -+// AutoTuner Flag to use callsite Debug Location for hash cacluation. -+static cl::opt HashCallSite( -+ "hash-prior-to-callsite", cl::init(true), cl::Hidden, -+ cl::desc("Use function IR prior to a call site to compute the hashcode for" -+ " the call site")); -+#endif -+ - namespace { - - // Basic hashing mechanism to detect structural change to the IR, used to verify -@@ -21,16 +35,81 @@ namespace { - - class StructuralHashImpl { - hash_code Hash; -+#if defined(ENABLE_AUTOTUNER) -+ const uint64_t BLOCK_HEADER_HASH = 45798; -+#endif - - template void hash(const T &V) { Hash = hash_combine(Hash, V); } - - public: - StructuralHashImpl() : Hash(4) {} - -+#if defined(ENABLE_AUTOTUNER) -+ void update(const MachineBasicBlock &MBB) { -+ // Update the structural hash when we encounter a new basic block. -+ // Prevents CodeRegions with different structures, but many empty -+ // BasicBlocks to have the same structural hash. -+ if (const BasicBlock *Block = MBB.getBasicBlock()) { -+ hash(BLOCK_HEADER_HASH); // Block header -+ for (auto &Inst : *Block) -+ hash(Inst.getOpcode()); -+ } -+ } -+ -+ void update(const std::vector BBs) { -+ // Update the structural hash when we encounter a new basic block. -+ // Prevents CodeRegions with different structures, but many empty -+ // BasicBlocks to have the same structural hash. -+ for (BasicBlock *BB : BBs) { -+ if (BB == nullptr) -+ continue; -+ -+ hash(BLOCK_HEADER_HASH); // Block header -+ for (auto &Inst : *BB) -+ hash(Inst.getOpcode()); -+ } -+ } -+ -+ void update(const llvm::CallBase &CB) { -+ StringRef Name = ""; -+ if (HashCallSite) { -+ update(*CB.getCaller(), std::addressof(CB)); -+ } else { -+ const Function &F = *CB.getCaller(); -+ Name = F.getName(); -+ std::string FileName = Name.str(); -+ for (uint64_t Idx = 0; Idx < Name.size(); Idx = Idx + sizeof(uint64_t)) { -+ uint64_t Value = 0; -+ FileName.copy((char *)&Value, sizeof(uint64_t), Idx); -+ hash(Value); -+ } -+ } -+ -+ update(*CB.getCalledFunction()); -+ } -+ -+ void update(const SwitchInst &SI) { -+ hash(SI.getNumCases()); -+ for (auto Case : SI.cases()) { -+ hash(BLOCK_HEADER_HASH); -+ const BasicBlock *BB = Case.getCaseSuccessor(); -+ for (auto &Inst : *BB) -+ hash(Inst.getOpcode()); -+ } -+ } -+ -+ void update(const Function &F, const CallBase *TargetCB = nullptr) { -+ if (F.isDeclaration()) -+ return; -+ -+ const Instruction *I = -+ TargetCB ? (dyn_cast(TargetCB)) : nullptr; -+#else - void update(const Function &F) { - // Declarations don't affect analyses. - if (F.isDeclaration()) - return; -+#endif - - hash(12345); // Function header - -@@ -44,9 +123,18 @@ public: - VisitedBBs.insert(BBs[0]); - while (!BBs.empty()) { - const BasicBlock *BB = BBs.pop_back_val(); -+#if defined(ENABLE_AUTOTUNER) -+ hash(BLOCK_HEADER_HASH); // Block header -+ for (auto &Inst : *BB) { -+ hash(Inst.getOpcode()); -+ if (I && Inst.isIdenticalTo(I)) -+ return; -+ } -+#else - hash(45798); // Block header - for (auto &Inst : *BB) - hash(Inst.getOpcode()); -+#endif - - const Instruction *Term = BB->getTerminator(); - for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) { -@@ -79,6 +167,32 @@ public: - - } // namespace - -+#if defined(ENABLE_AUTOTUNER) -+uint64_t llvm::StructuralHash(const MachineBasicBlock &MBB) { -+ StructuralHashImpl H; -+ H.update(MBB); -+ return H.getHash(); -+} -+ -+uint64_t llvm::StructuralHash(const std::vector BBs) { -+ StructuralHashImpl H; -+ H.update(BBs); -+ return H.getHash(); -+} -+ -+uint64_t llvm::StructuralHash(const CallBase &CB) { -+ StructuralHashImpl H; -+ H.update(CB); -+ return H.getHash(); -+} -+ -+uint64_t llvm::StructuralHash(const SwitchInst &SI) { -+ StructuralHashImpl H; -+ H.update(SI); -+ return H.getHash(); -+} -+#endif -+ - uint64_t llvm::StructuralHash(const Function &F) { - StructuralHashImpl H; - H.update(F); -diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp -index d0cbbcc0e310..a3ccbc6d258f 100644 ---- a/llvm/lib/Passes/PassBuilder.cpp -+++ b/llvm/lib/Passes/PassBuilder.cpp -@@ -262,6 +262,11 @@ - #include "llvm/Transforms/Vectorize/VectorCombine.h" - #include - -+#if defined(ENABLE_AUTOTUNER) -+#include "llvm/Analysis/AutotuningDump.h" -+#include "llvm/Transforms/Scalar/AutoTuningCompile.h" -+#endif -+ - using namespace llvm; - - static const Regex DefaultAliasRegex( -diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp -index 660cb2e974d7..8009e011833c 100644 ---- a/llvm/lib/Passes/PassBuilderPipelines.cpp -+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp -@@ -133,6 +133,11 @@ - #include "llvm/Transforms/Vectorize/SLPVectorizer.h" - #include "llvm/Transforms/Vectorize/VectorCombine.h" - -+#if defined(ENABLE_AUTOTUNER) -+#include "llvm/AutoTuner/AutoTuning.h" -+#include "llvm/Transforms/Scalar/AutoTuningCompile.h" -+#endif -+ - using namespace llvm; - - static cl::opt UseInlineAdvisor( -@@ -289,6 +294,10 @@ PipelineTuningOptions::PipelineTuningOptions() { - EagerlyInvalidateAnalyses = EnableEagerlyInvalidateAnalyses; - } - -+#if defined(ENABLE_AUTOTUNER) -+extern cl::opt AutoTuningCompileMode; -+#endif -+ - namespace llvm { - extern cl::opt MaxDevirtIterations; - extern cl::opt EnableKnowledgeRetention; -@@ -452,9 +461,17 @@ PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level, - // attention to it. - if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt || - PGOOpt->Action != PGOOptions::SampleUse) -+#if defined(ENABLE_AUTOTUNER) -+ { -+ if (AutoTuningCompileMode) -+ LPM2.addPass(AutoTuningCompileLoopPass(autotuning::CompileOptionUnroll)); -+#endif - LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(), - /* OnlyWhenForced= */ !PTO.LoopUnrolling, - PTO.ForgetAllSCEVInLoopUnroll)); -+#if defined(ENABLE_AUTOTUNER) -+ } -+#endif - - invokeLoopOptimizerEndEPCallbacks(LPM2, Level); - -@@ -631,9 +648,17 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, - // attention to it. - if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt || - PGOOpt->Action != PGOOptions::SampleUse) -+#if defined(ENABLE_AUTOTUNER) -+ { -+ if (AutoTuningCompileMode) -+ LPM2.addPass(AutoTuningCompileLoopPass(autotuning::CompileOptionUnroll)); -+#endif - LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(), - /* OnlyWhenForced= */ !PTO.LoopUnrolling, - PTO.ForgetAllSCEVInLoopUnroll)); -+#if defined(ENABLE_AUTOTUNER) -+ } -+#endif - - invokeLoopOptimizerEndEPCallbacks(LPM2, Level); - -@@ -1110,6 +1135,11 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level, - if (EnableSyntheticCounts && !PGOOpt) - MPM.addPass(SyntheticCountsPropagation()); - -+#if defined(ENABLE_AUTOTUNER) -+ if (AutoTuningCompileMode) -+ MPM.addPass(AutoTuningCompileModulePass(autotuning::CompileOptionInline)); -+#endif -+ - if (EnableModuleInliner) - MPM.addPass(buildModuleInlinerPipeline(Level, Phase)); - else -@@ -1131,6 +1161,12 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level, - /// TODO: Should LTO cause any differences to this set of passes? - void PassBuilder::addVectorPasses(OptimizationLevel Level, - FunctionPassManager &FPM, bool IsFullLTO) { -+#if defined(ENABLE_AUTOTUNER) -+ if (AutoTuningCompileMode && !IsFullLTO) -+ FPM.addPass( -+ AutoTuningCompileFunctionPass(autotuning::CompileOptionVectorize)); -+#endif -+ - FPM.addPass(LoopVectorizePass( - LoopVectorizeOptions(!PTO.LoopInterleaving, !PTO.LoopVectorization))); - -@@ -1444,6 +1480,10 @@ PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level, - return buildO0DefaultPipeline(Level, LTOPreLink); - - ModulePassManager MPM; -+#if defined(ENABLE_AUTOTUNER) -+ if (AutoTuningCompileMode) -+ MPM.addPass(AutoTuningCompileModulePass(autotuning::CompileOptionStart)); -+#endif - - // Convert @llvm.global.annotations to !annotation metadata. - MPM.addPass(Annotation2MetadataPass()); -@@ -1475,6 +1515,12 @@ PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level, - - if (LTOPreLink) - addRequiredLTOPreLinkPasses(MPM); -+ -+#if defined(ENABLE_AUTOTUNER) -+ if (AutoTuningCompileMode) -+ MPM.addPass(AutoTuningCompileModulePass(autotuning::CompileOptionEnd)); -+#endif -+ - return MPM; - } - -diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def -index e10dc995c493..45a539f14b93 100644 ---- a/llvm/lib/Passes/PassRegistry.def -+++ b/llvm/lib/Passes/PassRegistry.def -@@ -29,6 +29,10 @@ MODULE_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis(PIC)) - MODULE_ANALYSIS("inline-advisor", InlineAdvisorAnalysis()) - MODULE_ANALYSIS("ir-similarity", IRSimilarityAnalysis()) - -+#if defined(ENABLE_AUTOTUNER) -+MODULE_ANALYSIS("autotuning-dump", AutotuningDumpAnalysis()) -+#endif -+ - #ifndef MODULE_ALIAS_ANALYSIS - #define MODULE_ALIAS_ANALYSIS(NAME, CREATE_PASS) \ - MODULE_ANALYSIS(NAME, CREATE_PASS) -@@ -127,6 +131,9 @@ MODULE_PASS("sanmd-module", SanitizerBinaryMetadataPass()) - MODULE_PASS("memprof-module", ModuleMemProfilerPass()) - MODULE_PASS("poison-checking", PoisonCheckingPass()) - MODULE_PASS("pseudo-probe-update", PseudoProbeUpdatePass()) -+#if defined(ENABLE_AUTOTUNER) -+MODULE_PASS("autotuning-compile-module", AutoTuningCompileModulePass()) -+#endif - #undef MODULE_PASS - - #ifndef MODULE_PASS_WITH_PARAMS -@@ -430,6 +437,9 @@ FUNCTION_PASS("transform-warning", WarnMissedTransformationsPass()) - FUNCTION_PASS("tsan", ThreadSanitizerPass()) - FUNCTION_PASS("memprof", MemProfilerPass()) - FUNCTION_PASS("declare-to-assign", llvm::AssignmentTrackingPass()) -+#if defined(ENABLE_AUTOTUNER) -+FUNCTION_PASS("autotuning-compile-function", AutoTuningCompileFunctionPass()) -+#endif - #undef FUNCTION_PASS - - #ifndef FUNCTION_PASS_WITH_PARAMS -@@ -614,6 +624,9 @@ LOOP_PASS("guard-widening", GuardWideningPass()) - LOOP_PASS("loop-bound-split", LoopBoundSplitPass()) - LOOP_PASS("loop-reroll", LoopRerollPass()) - LOOP_PASS("loop-versioning-licm", LoopVersioningLICMPass()) -+#if defined(ENABLE_AUTOTUNER) -+LOOP_PASS("autotuning-compile-loop", AutoTuningCompileLoopPass()) -+#endif - #undef LOOP_PASS - - #ifndef LOOP_PASS_WITH_PARAMS -diff --git a/llvm/lib/Passes/StandardInstrumentations.cpp b/llvm/lib/Passes/StandardInstrumentations.cpp -index 7eef511928ec..8653027ceed2 100644 ---- a/llvm/lib/Passes/StandardInstrumentations.cpp -+++ b/llvm/lib/Passes/StandardInstrumentations.cpp -@@ -41,6 +41,10 @@ - #include - #include - #include -+#if defined(ENABLE_AUTOTUNER) -+#include "llvm/AutoTuner/AutoTuning.h" -+#include "llvm/Transforms/Scalar/AutoTuningCompile.h" -+#endif - - using namespace llvm; - -@@ -107,6 +111,10 @@ static cl::opt PrintOnCrash( - cl::desc("Print the last form of the IR before crash (use -print-on-crash-path to dump to a file)"), - cl::Hidden); - -+#if defined(ENABLE_AUTOTUNER) -+extern cl::opt AutoTuningCompileMode; -+#endif -+ - static cl::opt OptBisectPrintIRPath( - "opt-bisect-print-ir-path", - cl::desc("Print IR to path when opt-bisect-limit is reached"), cl::Hidden); -@@ -874,6 +882,21 @@ bool OptPassGateInstrumentation::shouldRun(StringRef PassName, Any IR) { - - void OptPassGateInstrumentation::registerCallbacks( - PassInstrumentationCallbacks &PIC) { -+#if defined(ENABLE_AUTOTUNER) -+ // Using AutoTuner OptBisect to change the behavior of compilation pipeline. -+ // Flag 'opt-bisect-limit' will be preferred if both 'opt-bisect-limit' and -+ // incremental compilation flags are used. -+ if (autotuning::Engine.isParseInput() && AutoTuningCompileMode) { -+ if (!getAutoTuningOptPassGate().isEnabled()) -+ return; -+ -+ PIC.registerShouldRunOptionalPassCallback([](StringRef PassID, Any IR) { -+ return isIgnored(PassID) || -+ getAutoTuningOptPassGate().checkPass(PassID, getIRName(IR)); -+ }); -+ return; -+ } -+#endif - OptPassGate &PassGate = Context.getOptPassGate(); - if (!PassGate.isEnabled()) - return; -diff --git a/llvm/lib/Remarks/BitstreamRemarkSerializer.cpp b/llvm/lib/Remarks/BitstreamRemarkSerializer.cpp -index b2627196bce6..b1dfa9d0f2cf 100644 ---- a/llvm/lib/Remarks/BitstreamRemarkSerializer.cpp -+++ b/llvm/lib/Remarks/BitstreamRemarkSerializer.cpp -@@ -277,6 +277,14 @@ void BitstreamRemarkSerializerHelper::emitRemarkBlock(const Remark &Remark, - R.push_back(StrTab.add(Remark.RemarkName).first); - R.push_back(StrTab.add(Remark.PassName).first); - R.push_back(StrTab.add(Remark.FunctionName).first); -+#if defined(ENABLE_AUTOTUNER) -+ if (Remark.CodeRegionType) -+ R.push_back(StrTab.add(*Remark.CodeRegionType).first); -+ if (std::optional hash = Remark.CodeRegionHash) -+ R.push_back(*hash); -+ if (std::optional Invocation = Remark.Invocation) -+ R.push_back(*Invocation); -+#endif - Bitstream.EmitRecordWithAbbrev(RecordRemarkHeaderAbbrevID, R); - - if (const std::optional &Loc = Remark.Loc) { -diff --git a/llvm/lib/Remarks/RemarkStreamer.cpp b/llvm/lib/Remarks/RemarkStreamer.cpp -index 9f4676ce37ab..d1faf4f1553a 100644 ---- a/llvm/lib/Remarks/RemarkStreamer.cpp -+++ b/llvm/lib/Remarks/RemarkStreamer.cpp -@@ -14,6 +14,10 @@ - #include "llvm/Support/CommandLine.h" - #include - -+#if defined(ENABLE_AUTOTUNER) -+#include "llvm/IR/DebugInfoMetadata.h" -+#endif -+ - using namespace llvm; - using namespace llvm::remarks; - -diff --git a/llvm/lib/Remarks/YAMLRemarkParser.cpp b/llvm/lib/Remarks/YAMLRemarkParser.cpp -index f5123b0f64ce..baa393c6a619 100644 ---- a/llvm/lib/Remarks/YAMLRemarkParser.cpp -+++ b/llvm/lib/Remarks/YAMLRemarkParser.cpp -@@ -17,10 +17,23 @@ - #include "llvm/Support/Endian.h" - #include "llvm/Support/Path.h" - #include -+#if defined(ENABLE_AUTOTUNER) -+#include "llvm/Support/CommandLine.h" -+#endif - - using namespace llvm; - using namespace llvm::remarks; - -+#if defined(ENABLE_AUTOTUNER) -+// Creating code regions without meta data (e.g. debug Location, Function Name, -+// etc.). -+// This flag is added here instead of 'lib/AutoTuner/AutoTuning.cpp' to avoid -+// making LLVMRemarks dependent on LLVMCore. -+cl::opt OmitAutotuningMetadata( -+ "auto-tuning-omit-metadata", cl::Hidden, cl::init(false), -+ cl::desc("Include only code region hashes and types in opportunity files")); -+#endif -+ - char YAMLParseError::ID = 0; - - static void handleDiagnostic(const SMDiagnostic &Diag, void *Ctx) { -@@ -235,6 +248,23 @@ YAMLRemarkParser::parseRemark(yaml::Document &RemarkEntry) { - TheRemark.FunctionName = *MaybeStr; - else - return MaybeStr.takeError(); -+#if defined(ENABLE_AUTOTUNER) -+ } else if (KeyName == "CodeRegionType") { -+ if (Expected MaybeStr = parseStr(RemarkField)) -+ TheRemark.CodeRegionType = *MaybeStr; -+ else -+ return MaybeStr.takeError(); -+ } else if (KeyName == "CodeRegionHash") { -+ if (Expected MaybeULL = parseUnsignedLL(RemarkField)) -+ TheRemark.CodeRegionHash = *MaybeULL; -+ else -+ return MaybeULL.takeError(); -+ } else if (KeyName == "Invocation") { -+ if (Expected MaybeULL = parseUnsignedLL(RemarkField)) -+ TheRemark.Invocation = *MaybeULL; -+ else -+ return MaybeULL.takeError(); -+#endif - } else if (KeyName == "Hotness") { - if (Expected MaybeU = parseUnsigned(RemarkField)) - TheRemark.Hotness = *MaybeU; -@@ -261,11 +291,35 @@ YAMLRemarkParser::parseRemark(yaml::Document &RemarkEntry) { - } - } - -+#if defined(ENABLE_AUTOTUNER) -+ // Check if any of the mandatory fields are missing. -+ if (TheRemark.RemarkType == Type::AutoTuning) { -+ // We expect type, and pass to be present at least. -+ if (!TheRemark.CodeRegionType || TheRemark.PassName.empty()) -+ return error("CodeRegionHash, CodeRegionType, or Pass missing.", -+ *RemarkEntry.getRoot()); -+ -+ // Sanity check for the correct command line option. -+ if (!OmitAutotuningMetadata && TheRemark.RemarkName.empty()) -+ return error("Remark Name expected; enable -autotuning-omit-metadata.", -+ *RemarkEntry.getRoot()); -+ -+ if (!OmitAutotuningMetadata && TheRemark.FunctionName.empty()) -+ return error( -+ "Remark Function Name expected; enable -autotuning-omit-metadata.", -+ *RemarkEntry.getRoot()); -+ } else if (TheRemark.RemarkType == Type::Unknown || -+ TheRemark.PassName.empty() || TheRemark.RemarkName.empty() || -+ TheRemark.FunctionName.empty()) -+ return error("Type, Pass, Name or Function missing.", -+ *RemarkEntry.getRoot()); -+#else - // Check if any of the mandatory fields are missing. - if (TheRemark.RemarkType == Type::Unknown || TheRemark.PassName.empty() || - TheRemark.RemarkName.empty() || TheRemark.FunctionName.empty()) - return error("Type, Pass, Name or Function missing.", - *RemarkEntry.getRoot()); -+#endif - - return std::move(Result); - } -@@ -277,6 +331,9 @@ Expected YAMLRemarkParser::parseType(yaml::MappingNode &Node) { - .Case("!Analysis", remarks::Type::Analysis) - .Case("!AnalysisFPCommute", remarks::Type::AnalysisFPCommute) - .Case("!AnalysisAliasing", remarks::Type::AnalysisAliasing) -+#if defined(ENABLE_AUTOTUNER) -+ .Case("!AutoTuning", remarks::Type::AutoTuning) -+#endif - .Case("!Failure", remarks::Type::Failure) - .Default(remarks::Type::Unknown); - if (Type == remarks::Type::Unknown) -@@ -313,6 +370,31 @@ Expected YAMLRemarkParser::parseStr(yaml::KeyValueNode &Node) { - return Result; - } - -+#if defined(ENABLE_AUTOTUNER) -+Expected> -+YAMLRemarkParser::parseStrVector(yaml::KeyValueNode &Node) { -+ std::vector Result; -+ auto *SequenceNode = dyn_cast(Node.getValue()); -+ if (!SequenceNode) -+ return error("expected a value of sequence type.", Node); -+ -+ for (yaml::Node &Element : *SequenceNode) { -+ auto *ScalarNode = dyn_cast(&Element); -+ if (!ScalarNode) -+ return error("expected a value of scalar type.", Element); -+ else { -+ StringRef Str = ScalarNode->getRawValue(); -+ if (Str.front() == '\'') -+ Str = Str.drop_front(); -+ if (Str.back() == '\'') -+ Str = Str.drop_back(); -+ Result.push_back(Str); -+ } -+ } -+ return Result; -+} -+#endif -+ - Expected YAMLRemarkParser::parseUnsigned(yaml::KeyValueNode &Node) { - SmallVector Tmp; - auto *Value = dyn_cast(Node.getValue()); -@@ -324,6 +406,19 @@ Expected YAMLRemarkParser::parseUnsigned(yaml::KeyValueNode &Node) { - return UnsignedValue; - } - -+#if defined(ENABLE_AUTOTUNER) -+Expected YAMLRemarkParser::parseUnsignedLL(yaml::KeyValueNode &Node) { -+ SmallVector Tmp; -+ if (auto *Value = dyn_cast(Node.getValue())) { -+ uint64_t UnsignedValue = 0; -+ if (Value->getValue(Tmp).getAsInteger(10, UnsignedValue)) -+ return error("expected a value of integer type.", *Value); -+ return UnsignedValue; -+ } -+ return error("expected a value of scalar type.", Node); -+} -+#endif -+ - Expected - YAMLRemarkParser::parseDebugLoc(yaml::KeyValueNode &Node) { - auto *DebugLoc = dyn_cast(Node.getValue()); -@@ -374,6 +469,9 @@ Expected YAMLRemarkParser::parseArg(yaml::Node &Node) { - - std::optional KeyStr; - std::optional ValueStr; -+#if defined(ENABLE_AUTOTUNER) -+ std::optional> ValueStrVector; -+#endif - std::optional Loc; - - for (yaml::KeyValueNode &ArgEntry : *ArgMap) { -@@ -400,11 +498,27 @@ Expected YAMLRemarkParser::parseArg(yaml::Node &Node) { - if (ValueStr) - return error("only one string entry is allowed per argument.", ArgEntry); - -+#if defined(ENABLE_AUTOTUNER) -+ // Try to parse the value to a string vector. -+ if (Expected> MaybeStrVector = -+ parseStrVector(ArgEntry)) { -+ ValueStrVector = *MaybeStrVector; -+ ValueStr = ""; -+ } else { -+ consumeError(MaybeStrVector.takeError()); -+ // Try to parse the value. -+ if (Expected MaybeStr = parseStr(ArgEntry)) -+ ValueStr = *MaybeStr; -+ else -+ return MaybeStr.takeError(); -+ } -+#else - // Try to parse the value. - if (Expected MaybeStr = parseStr(ArgEntry)) - ValueStr = *MaybeStr; - else - return MaybeStr.takeError(); -+#endif - - // Keep the key from the string. - KeyStr = KeyName; -@@ -412,10 +526,18 @@ Expected YAMLRemarkParser::parseArg(yaml::Node &Node) { - - if (!KeyStr) - return error("argument key is missing.", *ArgMap); -+#if defined(ENABLE_AUTOTUNER) -+ if (!ValueStr && !ValueStrVector) -+#else - if (!ValueStr) -+#endif - return error("argument value is missing.", *ArgMap); - -+#if defined(ENABLE_AUTOTUNER) -+ return Argument{*KeyStr, *ValueStr, ValueStrVector, Loc}; -+#else - return Argument{*KeyStr, *ValueStr, Loc}; -+#endif - } - - Expected> YAMLRemarkParser::next() { -diff --git a/llvm/lib/Remarks/YAMLRemarkParser.h b/llvm/lib/Remarks/YAMLRemarkParser.h -index 8ef72e16be74..141f10dd3900 100644 ---- a/llvm/lib/Remarks/YAMLRemarkParser.h -+++ b/llvm/lib/Remarks/YAMLRemarkParser.h -@@ -91,6 +91,12 @@ protected: - Expected parseDebugLoc(yaml::KeyValueNode &Node); - /// Parse an argument. - Expected parseArg(yaml::Node &Node); -+#if defined(ENABLE_AUTOTUNER) -+ /// parse a vector of strings. -+ Expected> parseStrVector(yaml::KeyValueNode &Node); -+ /// Parse one value to an unsigned long long. -+ Expected parseUnsignedLL(yaml::KeyValueNode &Node); -+#endif - }; - - /// YAML with a string table to Remark parser. -diff --git a/llvm/lib/Remarks/YAMLRemarkSerializer.cpp b/llvm/lib/Remarks/YAMLRemarkSerializer.cpp -index 68285c3dde1b..1bc0f23f9221 100644 ---- a/llvm/lib/Remarks/YAMLRemarkSerializer.cpp -+++ b/llvm/lib/Remarks/YAMLRemarkSerializer.cpp -@@ -15,10 +15,45 @@ - #include "llvm/Remarks/Remark.h" - #include "llvm/Support/FileSystem.h" - #include -+#if defined(ENABLE_AUTOTUNER) -+#include "llvm/Support/CommandLine.h" -+#endif - - using namespace llvm; - using namespace llvm::remarks; - -+#if defined(ENABLE_AUTOTUNER) -+extern cl::opt OmitAutotuningMetadata; -+ -+// Use the same keys whether we use a string table or not (respectively, T is an -+// unsigned or a StringRef). -+template -+static void mapRemarkHeader( -+ yaml::IO &io, T PassName, T RemarkName, std::optional RL, -+ T FunctionName, std::optional CodeRegionType, -+ std::optional CodeRegionHash, -+ std::optional Invocation, -+ std::optional> BaselineConfig, -+ std::optional>> -+ AutoTunerOptions, -+ std::optional Hotness, ArrayRef Args) { -+ io.mapRequired("Pass", PassName); -+ if (!OmitAutotuningMetadata) { -+ io.mapRequired("Name", RemarkName); -+ io.mapOptional("DebugLoc", RL); -+ io.mapRequired("Function", FunctionName); -+ } -+ io.mapOptional("CodeRegionType", CodeRegionType); -+ io.mapOptional("CodeRegionHash", CodeRegionHash); -+ io.mapOptional("DynamicConfigs", AutoTunerOptions); -+ io.mapOptional("BaselineConfig", BaselineConfig); -+ io.mapOptional("Invocation", Invocation); -+ if (!OmitAutotuningMetadata) { -+ io.mapOptional("Hotness", Hotness); -+ io.mapOptional("Args", Args); -+ } -+} -+#else - // Use the same keys whether we use a string table or not (respectively, T is an - // unsigned or a StringRef). - template -@@ -33,6 +68,7 @@ static void mapRemarkHeader(yaml::IO &io, T PassName, T RemarkName, - io.mapOptional("Hotness", Hotness); - io.mapOptional("Args", Args); - } -+#endif - - namespace llvm { - namespace yaml { -@@ -53,6 +89,10 @@ template <> struct MappingTraits { - else if (io.mapTag("!AnalysisAliasing", - (Remark->RemarkType == Type::AnalysisAliasing))) - ; -+#if defined(ENABLE_AUTOTUNER) -+ else if (io.mapTag("!AutoTuning", (Remark->RemarkType == Type::AutoTuning))) -+ ; -+#endif - else if (io.mapTag("!Failure", (Remark->RemarkType == Type::Failure))) - ; - else -@@ -66,14 +106,58 @@ template <> struct MappingTraits { - unsigned NameID = StrTab.add(Remark->RemarkName).first; - unsigned FunctionID = StrTab.add(Remark->FunctionName).first; - mapRemarkHeader(io, PassID, NameID, Remark->Loc, FunctionID, -+#if defined(ENABLE_AUTOTUNER) -+ Remark->CodeRegionType, Remark->CodeRegionHash, -+ Remark->Invocation, Remark->BaselineConfig, -+ Remark->AutoTunerOptions, Remark->Hotness, Remark->Args); -+ -+#else - Remark->Hotness, Remark->Args); -+#endif - } else { - mapRemarkHeader(io, Remark->PassName, Remark->RemarkName, Remark->Loc, -+#if defined(ENABLE_AUTOTUNER) -+ Remark->FunctionName, Remark->CodeRegionType, -+ Remark->CodeRegionHash, Remark->Invocation, -+ Remark->BaselineConfig, Remark->AutoTunerOptions, -+ Remark->Hotness, Remark->Args); -+#else - Remark->FunctionName, Remark->Hotness, Remark->Args); -+#endif - } - } - }; - -+#if defined(ENABLE_AUTOTUNER) -+// YAML I/O to support dumping 'Values: { key: [...], ... }' in opportunity -+// files. -+template <> -+struct MappingTraits>> { -+ static void mapping(IO &io, -+ std::map> &OM) { -+ assert(io.outputting() && "input not yet implemented"); -+ -+ // Print as an abbreviated dictionary -+ llvm::yaml::StdMapStringCustomMappingTraitsImpl< -+ std::vector>::output(io, OM); -+ } -+ // This sets the beginFlowMapping and endFlowMapping -+ static const bool flow = true; -+}; -+ -+template <> struct MappingTraits> { -+ static void mapping(IO &io, std::map &OM) { -+ assert(io.outputting() && "input not yet implemented"); -+ -+ // Print as an abbreviated dictionary -+ llvm::yaml::StdMapStringCustomMappingTraitsImpl::output(io, -+ OM); -+ } -+ // This sets the beginFlowMapping and endFlowMapping -+ static const bool flow = true; -+}; -+#endif -+ - template <> struct MappingTraits { - static void mapping(IO &io, RemarkLocation &RL) { - assert(io.outputting() && "input not yet implemented"); -diff --git a/llvm/lib/Support/CommandLine.cpp b/llvm/lib/Support/CommandLine.cpp -index d3efb8b67be5..b66415c0e9a9 100644 ---- a/llvm/lib/Support/CommandLine.cpp -+++ b/llvm/lib/Support/CommandLine.cpp -@@ -127,6 +127,9 @@ static inline bool isPrefixedOrGrouping(const Option *O) { - O->getFormattingFlag() == cl::AlwaysPrefix; - } - -+#if defined(ENABLE_AUTOTUNER) -+#include -+#endif - - namespace { - -@@ -1470,6 +1473,44 @@ bool cl::ParseCommandLineOptions(int argc, const char *const *argv, - Errs, LongOptionsUseDoubleDash); - } - -+#if defined(ENABLE_AUTOTUNER) -+bool cl::ParseAutoTunerOptions( -+ std::unordered_map LLVMParams, -+ std::unordered_map ProgramParams, -+ StringRef Overview, raw_ostream *Errs, const char *EnvVar, -+ bool LongOptionsUseDoubleDash) { -+ SmallVector NewArgv; -+ BumpPtrAllocator A; -+ StringSaver Saver(A); -+ // GlobalParser requires arguments similar to C style command line options -+ // (int argc, char * argv[]) where argv[0] refers to the program name. -+ // We are using a fake program name here which is consistent with LLVM. -+ NewArgv.push_back("AutoTuner (LLVM option parsing)"); -+ -+ for (const auto &I : LLVMParams) { -+ std::string NewOption = I.first + "=" + I.second; -+ NewArgv.push_back(Saver.save(NewOption).data()); -+ } -+ -+ for (const auto &I : ProgramParams) { -+ std::string NewOption = I.first + "=" + I.second; -+ NewArgv.push_back(Saver.save(NewOption).data()); -+ } -+ -+ // Parse options from environment variable. -+ if (EnvVar) { -+ if (std::optional EnvValue = -+ sys::Process::GetEnv(StringRef(EnvVar))) -+ TokenizeGNUCommandLine(*EnvValue, Saver, NewArgv); -+ } -+ -+ int NewArgc = static_cast(NewArgv.size()); -+ // Parse all options. -+ return GlobalParser->ParseCommandLineOptions(NewArgc, &NewArgv[0], Overview, -+ Errs, LongOptionsUseDoubleDash); -+} -+#endif -+ - /// Reset all options at least once, so that we can parse different options. - void CommandLineParser::ResetAllOptionOccurrences() { - // Reset all option values to look like they have never been seen before. -diff --git a/llvm/lib/Transforms/IPO/CMakeLists.txt b/llvm/lib/Transforms/IPO/CMakeLists.txt -index 034f1587ae8d..3507d357a4c6 100644 ---- a/llvm/lib/Transforms/IPO/CMakeLists.txt -+++ b/llvm/lib/Transforms/IPO/CMakeLists.txt -@@ -57,6 +57,7 @@ add_llvm_component_library(LLVMipo - LINK_COMPONENTS - AggressiveInstCombine - Analysis -+ AutoTuner - BitReader - BitWriter - Core -diff --git a/llvm/lib/Transforms/IPO/Inliner.cpp b/llvm/lib/Transforms/IPO/Inliner.cpp -index 3e00aebce372..802667819c44 100644 ---- a/llvm/lib/Transforms/IPO/Inliner.cpp -+++ b/llvm/lib/Transforms/IPO/Inliner.cpp -@@ -64,6 +64,9 @@ - #include - #include - #include -+#if defined(ENABLE_AUTOTUNER) -+#include "llvm/AutoTuner/AutoTuning.h" -+#endif - - using namespace llvm; - -@@ -298,6 +301,27 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, - // be deleted as a batch after inlining. - SmallVector DeadFunctionsInComdats; - -+#if defined(ENABLE_AUTOTUNER) -+ bool IsAutoTunerEnabled = -+ autotuning::Engine.isEnabled() && -+ autotuning::Engine.isTuningAllowedForType(autotuning::CallSite); -+ if (IsAutoTunerEnabled) { -+ SmallVector, 16> CallsCopy = Calls; -+ for (int I = 0; I < (int)CallsCopy.size(); ++I) { -+ CallBase &CB = *CallsCopy[I].first; -+ DebugLoc DLoc = CB.getDebugLoc(); -+ if (!CB.getCaller() || !CB.getCalledFunction() || !DLoc) -+ continue; -+ autotuning::CallSiteLocation Loc = autotuning::CallSiteLocation{ -+ &CB, CB.getCaller(), CB.getCalledFunction(), -+ autotuning::SourceLocation{DLoc->getFilename().str(), DLoc->getLine(), -+ DLoc->getColumn()}}; -+ autotuning::Engine.insertCallSiteLoc(Loc); -+ } -+ autotuning::Engine.cleanCallSiteLoc(); -+ } -+#endif -+ - // Loop forward over all of the calls. Note that we cannot cache the size as - // inlining can introduce new calls that need to be processed. - for (int I = 0; I < (int)Calls.size(); ++I) { -@@ -412,6 +436,13 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, - if (NewCallee) { - if (!NewCallee->isDeclaration()) { - Calls.push_back({ICB, NewHistoryID}); -+#if defined(ENABLE_AUTOTUNER) -+ if (IsAutoTunerEnabled) -+ if (ICB->getDebugLoc()) -+ autotuning::Engine.updateCallSiteLocs( -+ CB, ICB, ICB->getCalledFunction(), -+ ICB->getDebugLoc()->getLine()); -+#endif - // Continually inlining through an SCC can result in huge compile - // times and bloated code since we arbitrarily stop at some point - // when the inliner decides it's not profitable to inline anymore. -@@ -527,6 +558,11 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, - FAM.invalidate(F, PreservedAnalyses::none()); - } - -+#if defined(ENABLE_AUTOTUNER) -+ if (IsAutoTunerEnabled) -+ autotuning::Engine.clearCallSiteLocs(); -+#endif -+ - // We must ensure that we only delete functions with comdats if every function - // in the comdat is going to be deleted. - if (!DeadFunctionsInComdats.empty()) { -diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp -index a53baecd4776..9590cf625c64 100644 ---- a/llvm/lib/Transforms/IPO/SampleProfile.cpp -+++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp -@@ -1212,6 +1212,20 @@ bool SampleProfileLoader::inlineHotFunctions( - } - } - } -+#if defined(ENABLE_AUTOTUNER) -+ if (autotuning::Engine.isEnabled()) { -+ // If a callsite is hot/cold, mark its corresponding callee as -+ // hot/cold respectively so that auto-tuning engine will be able to -+ // selectively dump code regions as tuning opportunities. -+ if (const CallInst *CI = dyn_cast(&I)) -+ if (Function *Callee = CI->getCalledFunction()) { -+ if (callsiteIsHot(FS, PSI, ProfAccForSymsInList)) -+ Callee->getATEFunction().setHot(); -+ else -+ Callee->getATEFunction().setCold(); -+ } -+ } -+#endif - } - if (Hot || ExternalInlineAdvisor) { - CIS.insert(CIS.begin(), AllCandidates.begin(), AllCandidates.end()); -diff --git a/llvm/lib/Transforms/Instrumentation/CMakeLists.txt b/llvm/lib/Transforms/Instrumentation/CMakeLists.txt -index 424f1d433606..955353944b14 100644 ---- a/llvm/lib/Transforms/Instrumentation/CMakeLists.txt -+++ b/llvm/lib/Transforms/Instrumentation/CMakeLists.txt -@@ -30,6 +30,7 @@ add_llvm_component_library(LLVMInstrumentation - - LINK_COMPONENTS - Analysis -+ AutoTuner - Core - Demangle - MC -diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp -index 3c8f25d73c62..b9459b59e704 100644 ---- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp -+++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp -@@ -2132,6 +2132,10 @@ static bool annotateAllFunctions( - F->addFnAttr(Attribute::InlineHint); - LLVM_DEBUG(dbgs() << "Set inline attribute to function: " << F->getName() - << "\n"); -+#if defined(ENABLE_AUTOTUNER) -+ if (autotuning::Engine.isEnabled()) -+ F->getATEFunction().setHot(); -+#endif - } - for (auto &F : ColdFunctions) { - // Only set when there is no Attribute::Hot set by the user. For Hot -@@ -2148,6 +2152,10 @@ static bool annotateAllFunctions( - F->addFnAttr(Attribute::Cold); - LLVM_DEBUG(dbgs() << "Set cold attribute to function: " << F->getName() - << "\n"); -+#if defined(ENABLE_AUTOTUNER) -+ if (autotuning::Engine.isEnabled()) -+ F->getATEFunction().setCold(); -+#endif - } - return true; - } -diff --git a/llvm/lib/Transforms/Scalar/AutoTuningCompile.cpp b/llvm/lib/Transforms/Scalar/AutoTuningCompile.cpp -new file mode 100644 -index 000000000000..c33cb7cfc256 ---- /dev/null -+++ b/llvm/lib/Transforms/Scalar/AutoTuningCompile.cpp -@@ -0,0 +1,334 @@ -+#if defined(ENABLE_AUTOTUNER) -+//===--------------- AutoTuningCompile.cpp - Auto-Tuning ------------------===// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+// Copyright (C) 2017-2022, Huawei Technologies Co., Ltd. All rights reserved. -+// -+//===----------------------------------------------------------------------===// -+// -+/// \file -+/// This pass implements incremental compilation for AutoTuner to reduce the -+/// compilation time for tuning process. -+/// This pass performs 2 operations. -+/// 1. Writing module level IR files which can be used in subsequent -+/// compilations for AutoTuner flow. So clang frontend don't have to process -+/// the source code from scratch. -+/// 2. Add/Remove attributes for modules and functions to enable/disable -+/// execution of optimization pass(es). It further reduces the compilation -+/// time by skipping optimization pass(es) (If feasible). -+// -+//===----------------------------------------------------------------------===// -+ -+#include "llvm/Transforms/Scalar/AutoTuningCompile.h" -+#include "llvm/Analysis/AutotuningDump.h" -+#include "llvm/AutoTuner/AutoTuning.h" -+#include "llvm/InitializePasses.h" -+#include "llvm/Support/CommandLine.h" -+#include "llvm/Transforms/Scalar.h" -+#include -+ -+// Enable debug messages for AutoTuning Compilation. -+#define DEBUG_TYPE "autotuning-compile" -+ -+using namespace llvm; -+ -+extern cl::opt AutoTuningCompileMode; -+ -+AutoTuningOptPassGate SkipPasses = AutoTuningOptPassGate(true); -+AutoTuningOptPassGate RunPasses = AutoTuningOptPassGate(false); -+bool AutoTuningCompileModule::SkipCompilation = false; -+ -+static void writeFiles(Module &M, std::string Pass) { -+ if (autotuning::Engine.isGenerateOutput()) { -+ switch (AutoTuningCompileMode) { -+ case Basic: -+ case CoarseGrain: -+ if (Pass == autotuning::CompileOptionStart) { -+ LLVM_DEBUG(dbgs() << "AutoTuningCompile: IR files writing before Pass: " -+ << Pass << ".\n"); -+ auto ATD = new AutotuningDumpLegacy(/* Incremental Compilation */ true); -+ ATD->runOnModule(M); -+ } -+ break; -+ case FineGrain: -+ if (autotuning::Engine.hasOpportunities()) { -+ LLVM_DEBUG(dbgs() << "AutoTuningCompile: IR files writing before Pass: " -+ << Pass << ".\n"); -+ auto ATD = new AutotuningDumpLegacy(/* Incremental Compilation */ true); -+ ATD->runOnModule(M); -+ } -+ break; -+ default: -+ llvm_unreachable("AutoTuningCompile: Unknown AutoTuner Incremental " -+ "Compilation mode.\n"); -+ } -+ } -+} -+ -+bool AutoTuningOptPassGate::shouldRunPass(const StringRef PassName, -+ StringRef IRDescription) { -+ LLVM_DEBUG(dbgs() << "Skip pass '" << PassName -+ << "': " << (Skip ? "True" : "False") << '\n'); -+ return !Skip; -+} -+ -+bool AutoTuningOptPassGate::checkPass(const StringRef PassName, -+ const StringRef TargetDesc) { -+ if (PassName.startswith("AutoTuningCompile")) { -+ LLVM_DEBUG(dbgs() << "Running '" << PassName << "'pass.\n"); -+ return true; -+ } -+ -+ LLVM_DEBUG(dbgs() << "Skip pass '" << PassName -+ << "': " << (Skip ? "True" : "False") << '\n'); -+ return !Skip; -+} -+ -+AutoTuningCompileModule::AutoTuningCompileModule(std::string Pass) { -+ this->Pass = Pass; -+} -+ -+void AutoTuningCompileModule::writeIRFiles(Module &M) const { -+ writeFiles(M, Pass); -+} -+ -+bool AutoTuningCompileModule::modifyCompilationPipeline(Module &M) const { -+ bool Changed = false; -+ LLVM_DEBUG(dbgs() << "AutoTuningCompile: Deciding to enable/disable " -+ "optimization of module/functions. Pass: " -+ << Pass << '\n'); -+ -+ StringRef Filename = M.getName(); -+ size_t Pos = Filename.rfind(".ll"); -+ if (Pos == StringRef::npos) { -+ errs() << "AutoTuningCompile: Source file is not IR (.ll) file. " -+ "Disabling incremental compilation.\n"; -+ AutoTuningCompileMode = Inactive; -+ return Changed; -+ } -+ Filename = Filename.substr(0, Pos); -+ -+ switch (AutoTuningCompileMode) { -+ case Basic: -+ case CoarseGrain: -+ LLVM_DEBUG(dbgs() << "AutoTuningCompile: No change in opt pipeline for " -+ "Basic/CoarseGrain incremental compilation mode.\n"); -+ break; -+ case FineGrain: { -+ if (Pass == autotuning::CompileOptionStart) { -+ M.getContext().setOptPassGate(SkipPasses); -+ getAutoTuningOptPassGate().setSkip(true); -+ setSkipCompilation(true); -+ LLVM_DEBUG(dbgs() << "AutoTuningCompile: SkipPasses enabled.\n"); -+ } else if (getSkipCompilation() && -+ (autotuning::Engine.shouldRunOptPass(Filename.str(), Pass) || -+ Pass == "end")) { -+ M.getContext().setOptPassGate(RunPasses); -+ getAutoTuningOptPassGate().setSkip(false); -+ setSkipCompilation(false); -+ LLVM_DEBUG(dbgs() << "AutoTuningCompile: SkipPasses disabled.\n"); -+ } else -+ LLVM_DEBUG(dbgs() << "AutoTuningCompile: Old decision (SkipPasses = " -+ << (getSkipCompilation() ? "True" : "False") -+ << " ) continued.\n"); -+ -+ Changed = true; -+ break; -+ } -+ default: -+ llvm_unreachable( -+ "AutoTuningCompile: Unknown AutoTuner Incremental Compilation mode.\n"); -+ } -+ -+ return Changed; -+} -+ -+bool AutoTuningCompileModule::run(Module &M) { -+ bool Changed = false; -+ if (AutoTuningCompileMode == Inactive) -+ return Changed; -+ -+ if (!autotuning::Engine.isEnabled()) { -+ LLVM_DEBUG(dbgs() << "AutoTuningCompile: AutoTuner is not enabled.\n"); -+ return Changed; -+ } -+ -+ writeIRFiles(M); -+ -+ if (autotuning::Engine.isParseInput()) -+ Changed |= modifyCompilationPipeline(M); -+ -+ return Changed; -+} -+ -+AutoTuningCompileModuleLegacy::AutoTuningCompileModuleLegacy(std::string Pass) -+ : ModulePass(AutoTuningCompileModuleLegacy::ID) { -+ this->Pass = Pass; -+} -+ -+bool AutoTuningCompileModuleLegacy::runOnModule(Module &M) { -+ AutoTuningCompileModule Impl(Pass); -+ return Impl.run(M); -+} -+ -+char AutoTuningCompileModuleLegacy::ID = 0; -+ -+StringRef AutoTuningCompileModuleLegacy::getPassName() const { -+ return "AutoTuner Incremental Compilation"; -+} -+ -+INITIALIZE_PASS(AutoTuningCompileModuleLegacy, "autotuning-compile-module", -+ "AutoTuner Incremental Compilation", false, false) -+ -+// Public interface to the AutoTuningCompile pass -+ModulePass *llvm::createAutoTuningCompileModuleLegacyPass(std::string Pass) { -+ return new AutoTuningCompileModuleLegacy(Pass); -+} -+ -+PreservedAnalyses AutoTuningCompileModulePass::run(Module &M, -+ ModuleAnalysisManager &) { -+ AutoTuningCompileModule Impl(Pass); -+ Impl.run(M); -+ return PreservedAnalyses::all(); -+} -+ -+AutoTuningCompileFunction::AutoTuningCompileFunction(std::string Pass) { -+ this->Pass = Pass; -+} -+ -+void AutoTuningCompileFunction::writeIRFiles(Module &M) { -+ if (IsModuleWritten) -+ return; -+ IsModuleWritten = true; -+ writeFiles(M, Pass); -+} -+ -+bool AutoTuningCompileFunction::modifyCompilationPipeline(Function &F) { -+ bool Changed = false; -+ LLVM_DEBUG(dbgs() << "AutoTuningCompile: Deciding to enable/disable " -+ "optimization of module/functions. Pass: " -+ << Pass << '\n'); -+ Module *M = F.getParent(); -+ StringRef Filename = M->getName(); -+ size_t Pos = Filename.rfind(".ll"); -+ if (Pos == StringRef::npos) { -+ errs() << "AutoTuningCompile: Source file is not IR (.ll) file. " -+ "Disabling incremental compilation.\n"; -+ AutoTuningCompileMode = Inactive; -+ return Changed; -+ } -+ Filename = Filename.substr(0, Pos); -+ -+ switch (AutoTuningCompileMode) { -+ case Basic: -+ case CoarseGrain: -+ LLVM_DEBUG(dbgs() << "AutoTuningCompile: No change in opt pipeline for " -+ "Basic/CoarseGrain incremental compilation mode.\n"); -+ break; -+ case FineGrain: { -+ if (!AutoTuningCompileModule::getSkipCompilation() && -+ Pass == autotuning::CompileOptionStart) { -+ if (!SkipDecision) { -+ M->getContext().setOptPassGate(SkipPasses); -+ getAutoTuningOptPassGate().setSkip(true); -+ SkipDecision = true; -+ } -+ AutoTuningCompileModule::setSkipCompilation(true); -+ LLVM_DEBUG(dbgs() << "AutoTuningCompile: SkipPasses enabled.\n"); -+ } else if (AutoTuningCompileModule::getSkipCompilation() && -+ Pass != autotuning::CompileOptionStart && -+ (autotuning::Engine.shouldRunOptPass(Filename.str(), Pass) || -+ Pass == autotuning::CompileOptionEnd)) { -+ M->getContext().setOptPassGate(RunPasses); -+ getAutoTuningOptPassGate().setSkip(false); -+ SkipDecision = false; -+ AutoTuningCompileModule::setSkipCompilation(false); -+ LLVM_DEBUG(dbgs() << "AutoTuningCompile: SkipPasses disabled.\n"); -+ } else -+ LLVM_DEBUG(dbgs() << "AutoTuningCompile: Old decision (SkipPasses = " -+ << (AutoTuningCompileModule::getSkipCompilation() -+ ? "True" -+ : "False") -+ << " ) continued.\n"); -+ -+ Changed = true; -+ break; -+ } -+ default: -+ llvm_unreachable( -+ "AutoTuningCompile: Unknown AutoTuner Incremental Compilation mode.\n"); -+ } -+ -+ return Changed; -+} -+ -+bool AutoTuningCompileFunction::run(Function &F) { -+ bool Changed = false; -+ if (AutoTuningCompileMode == Inactive) -+ return Changed; -+ -+ if (!autotuning::Engine.isEnabled()) { -+ LLVM_DEBUG(dbgs() << "AutoTuningCompile: AutoTuner is not enabled.\n"); -+ return Changed; -+ } -+ -+ writeIRFiles(*F.getParent()); -+ -+ if (autotuning::Engine.isParseInput()) -+ Changed |= modifyCompilationPipeline(F); -+ -+ return Changed; -+} -+ -+AutoTuningCompileFunctionLegacy::AutoTuningCompileFunctionLegacy( -+ std::string Pass) -+ : FunctionPass(AutoTuningCompileFunctionLegacy::ID) { -+ this->Pass = Pass; -+} -+ -+bool AutoTuningCompileFunctionLegacy::runOnFunction(Function &F) { -+ AutoTuningCompileFunction Impl(Pass); -+ return Impl.run(F); -+} -+ -+char AutoTuningCompileFunctionLegacy::ID = 0; -+ -+StringRef AutoTuningCompileFunctionLegacy::getPassName() const { -+ return "AutoTuner Incremental Compilation"; -+} -+ -+INITIALIZE_PASS(AutoTuningCompileFunctionLegacy, "autotuning-compile-function", -+ "AutoTuner Incremental Compilation", false, false) -+ -+// Public interface to the AutoTuningCompile pass -+FunctionPass * -+llvm::createAutoTuningCompileFunctionLegacyPass(std::string Pass) { -+ return new AutoTuningCompileFunctionLegacy(Pass); -+} -+ -+PreservedAnalyses -+AutoTuningCompileFunctionPass::run(Function &F, FunctionAnalysisManager &AM) { -+ AutoTuningCompileFunction Impl(Pass); -+ Impl.run(F); -+ return PreservedAnalyses::all(); -+} -+ -+PreservedAnalyses -+AutoTuningCompileLoopPass::run(Loop &L, LoopAnalysisManager &AM, -+ LoopStandardAnalysisResults &AR, LPMUpdater &U) { -+ AutoTuningCompileFunction Impl(Pass); -+ Function *F = L.getHeader()->getParent(); -+ Impl.run(*F); -+ return PreservedAnalyses::all(); -+} -+ -+AutoTuningOptPassGate &llvm::getAutoTuningOptPassGate() { -+ static AutoTuningOptPassGate AutoTuningGate; -+ return AutoTuningGate; -+} -+ -+#endif -diff --git a/llvm/lib/Transforms/Scalar/CMakeLists.txt b/llvm/lib/Transforms/Scalar/CMakeLists.txt -index eb008c15903a..e5a82ea8f923 100644 ---- a/llvm/lib/Transforms/Scalar/CMakeLists.txt -+++ b/llvm/lib/Transforms/Scalar/CMakeLists.txt -@@ -2,6 +2,7 @@ add_llvm_component_library(LLVMScalarOpts - ADCE.cpp - AlignmentFromAssumptions.cpp - AnnotationRemarks.cpp -+ AutoTuningCompile.cpp - BDCE.cpp - CallSiteSplitting.cpp - ConstantHoisting.cpp -@@ -92,6 +93,7 @@ add_llvm_component_library(LLVMScalarOpts - LINK_COMPONENTS - AggressiveInstCombine - Analysis -+ AutoTuner - Core - InstCombine - Support -diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp -index 335b489d3cb2..feb8932eaae7 100644 ---- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp -+++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp -@@ -66,6 +66,9 @@ - #include - #include - #include -+#if defined(ENABLE_AUTOTUNER) -+#include "llvm/AutoTuner/AutoTuning.h" -+#endif - - using namespace llvm; - -@@ -173,6 +176,10 @@ static cl::opt - cl::desc("Default threshold (max size of unrolled " - "loop), used in all but O3 optimizations")); - -+#if defined(ENABLE_AUTOTUNER) -+static const std::string UnrollCountParamStr = "UnrollCount"; -+#endif -+ - /// A magic value for use with the Threshold parameter to indicate - /// that the loop unroll should be performed regardless of how much - /// code expansion would result. -@@ -893,7 +900,12 @@ bool llvm::computeUnrollCount( - OptimizationRemarkEmitter *ORE, unsigned TripCount, unsigned MaxTripCount, - bool MaxOrZero, unsigned TripMultiple, unsigned LoopSize, - TargetTransformInfo::UnrollingPreferences &UP, -+#if defined(ENABLE_AUTOTUNER) -+ TargetTransformInfo::PeelingPreferences &PP, bool &UseUpperBound, -+ unsigned int Invocation) { -+#else - TargetTransformInfo::PeelingPreferences &PP, bool &UseUpperBound) { -+#endif - - UnrollCostEstimator UCE(*L, LoopSize); - -@@ -942,6 +954,43 @@ bool llvm::computeUnrollCount( - } - } - -+#if defined(ENABLE_AUTOTUNER) -+ // Priority 2.5 is using Unroll Count set by AutoTuner (if enabled). -+ if (autotuning::Engine.isEnabled()) { -+ // Create a code region for current loop. This code region will be added to -+ // opportunity list once all the relevant information is gathered. -+ autotuning::Engine.initContainer(L, DEBUG_TYPE, -+ L->getHeader()->getParent()->getName(), -+ /* addOpportunity */ false, Invocation); -+ -+ int NewValue = 0; // the int value is set by lookUpParams() -+ bool UnrollCountChanged = L->lookUpParams("UnrollCount", NewValue); -+ -+ if (UnrollCountChanged) { -+ // Setting the UP.Count with the value suggested by AutoTuner. -+ // AutoTuner will use UnrollCount = 0, 1, X, Y, Z in case of dynamic -+ // configuration and UnrollCount = 0, 1, 2, 4, 8 otherwise to find -+ // optimal configuration. Compiler will unroll the loop with suggested -+ // UnrollCount except when UnrollCount = 1 where AutoTuner is suggesting -+ // to try loop peeling. -+ UP.Count = NewValue; -+ UP.AllowExpensiveTripCount = true; -+ UP.Force = true; -+ UP.Runtime = true; -+ if (!UP.AllowRemainder && UP.Count != 1) -+ UP.Count = 0; -+ -+ // Check for Loop Peeling -+ if (UP.Count == 1) { -+ computePeelCount(L, LoopSize, PP, TripCount, DT, SE, AC, UP.Threshold); -+ UP.Runtime = (PP.PeelCount) ? false : UP.Runtime; -+ } -+ -+ return true; -+ } -+ } -+#endif -+ - // 3rd priority is exact full unrolling. This will eliminate all copies - // of some exit test. - UP.Count = 0; -@@ -1119,6 +1168,59 @@ bool llvm::computeUnrollCount( - return ExplicitUnroll; - } - -+#if defined(ENABLE_AUTOTUNER) -+// Given UnrollingPreferences count (UPCount) and TripCount for CodeRegion -+// CR, compute the dynamic Unroll values for tuning and add it to CR. -+static void -+computeAutoTunerDynamicUnrollOptions(unsigned UPCount, unsigned TripCount, -+ const autotuning::CodeRegion &CR) { -+ std::vector DynamicTuningOptions; -+ unsigned int PotentialTuningOptions[2]; -+ unsigned int Idx = 0; -+ int Count = -1; -+ unsigned int CurrentOption = 2; -+ unsigned int MaxTuningCount = 64; -+ DynamicTuningOptions.push_back(0); -+ // Add LoopPeeling as an additional option. -+ DynamicTuningOptions.push_back(1); -+ if (!UPCount) { -+ TripCount = (TripCount > MaxTuningCount) ? MaxTuningCount : TripCount; -+ unsigned int Limit = (TripCount == 0) ? 8 : TripCount; -+ DynamicTuningOptions.push_back(TripCount ? TripCount : 8); -+ while (CurrentOption < Limit) { -+ PotentialTuningOptions[Idx] = CurrentOption; -+ CurrentOption *= 2; -+ Idx = (Idx + 1) % 2; -+ ++Count; -+ } -+ } else { -+ while (CurrentOption < UPCount) { -+ PotentialTuningOptions[Idx] = CurrentOption; -+ CurrentOption *= 2; -+ Idx = (Idx + 1) % 2; -+ ++Count; -+ } -+ if (TripCount != UPCount) { -+ if (CurrentOption == UPCount) { -+ CurrentOption *= 2; -+ } -+ if (!TripCount || CurrentOption < TripCount) { -+ PotentialTuningOptions[Idx] = CurrentOption; -+ ++Count; -+ } -+ } -+ if (UPCount != 1) -+ DynamicTuningOptions.push_back(UPCount); -+ } -+ -+ Count = std::min(1, Count); -+ while (Count >= 0) -+ DynamicTuningOptions.push_back(PotentialTuningOptions[Count--]); -+ -+ CR.addAutoTunerOptions("UnrollCount", DynamicTuningOptions); -+} -+#endif -+ - static LoopUnrollResult - tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, - const TargetTransformInfo &TTI, AssumptionCache &AC, -@@ -1132,7 +1234,12 @@ tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, - std::optional ProvidedUpperBound, - std::optional ProvidedAllowPeeling, - std::optional ProvidedAllowProfileBasedPeeling, -+#if defined(ENABLE_AUTOTUNER) -+ std::optional ProvidedFullUnrollMaxCount, -+ unsigned int Invocation = 0) { -+#else - std::optional ProvidedFullUnrollMaxCount) { -+#endif - - LLVM_DEBUG(dbgs() << "Loop Unroll: F[" - << L->getHeader()->getParent()->getName() << "] Loop %" -@@ -1276,11 +1383,28 @@ tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, - // computeUnrollCount() decides whether it is beneficial to use upper bound to - // fully unroll the loop. - bool UseUpperBound = false; -+ -+#if defined(ENABLE_AUTOTUNER) -+ bool IsCountSetExplicitly = computeUnrollCount( -+ L, TTI, DT, LI, &AC, SE, EphValues, &ORE, TripCount, MaxTripCount, -+ MaxOrZero, TripMultiple, LoopSize, UP, PP, UseUpperBound, Invocation); -+ const autotuning::CodeRegion CR = L->getCodeRegion(); -+ // computeAutoTunerDynamicUnrollOptions() adds the dynamic Unroll values to -+ // the CodeRegion. -+ computeAutoTunerDynamicUnrollOptions(UP.Count, TripCount, CR); -+ -+ if (!UP.Count) { -+ autotuning::Engine.addOpportunity( -+ CR, {{UnrollCountParamStr, std::to_string(UP.Count)}}); -+ return LoopUnrollResult::Unmodified; -+ } -+#else - bool IsCountSetExplicitly = computeUnrollCount( - L, TTI, DT, LI, &AC, SE, EphValues, &ORE, TripCount, MaxTripCount, MaxOrZero, - TripMultiple, LoopSize, UP, PP, UseUpperBound); - if (!UP.Count) - return LoopUnrollResult::Unmodified; -+#endif - - if (PP.PeelCount) { - assert(UP.Count == 1 && "Cannot perform peel and unroll in the same step"); -@@ -1300,8 +1424,16 @@ tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, - // we had, so we don't want to unroll or peel again. - if (PP.PeelProfiledIterations) - L->setLoopAlreadyUnrolled(); -+#if defined(ENABLE_AUTOTUNER) -+ autotuning::Engine.addOpportunity( -+ CR, {{UnrollCountParamStr, std::to_string(UP.Count)}}); -+ return LoopUnrollResult::PartiallyUnrolled; -+ } -+ autotuning::Engine.addOpportunity(CR, {{UnrollCountParamStr, "0"}}); -+#else - return LoopUnrollResult::PartiallyUnrolled; - } -+#endif - return LoopUnrollResult::Unmodified; - } - -@@ -1329,8 +1461,18 @@ tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, - {UP.Count, UP.Force, UP.Runtime, UP.AllowExpensiveTripCount, - UP.UnrollRemainder, ForgetAllSCEV}, - LI, &SE, &DT, &AC, &TTI, &ORE, PreserveLCSSA, &RemainderLoop); -+ -+#if defined(ENABLE_AUTOTUNER) -+ if (UnrollResult == LoopUnrollResult::Unmodified) { -+ autotuning::Engine.addOpportunity(CR, {{UnrollCountParamStr, "0"}}); -+ return LoopUnrollResult::Unmodified; -+ } -+ autotuning::Engine.addOpportunity( -+ CR, {{UnrollCountParamStr, std::to_string(UP.Count)}}); -+#else - if (UnrollResult == LoopUnrollResult::Unmodified) - return LoopUnrollResult::Unmodified; -+#endif - - if (RemainderLoop) { - std::optional RemainderLoopID = -@@ -1379,6 +1521,20 @@ public: - /// Otherwise, forgetAllLoops and rebuild when needed next. - bool ForgetAllSCEV; - -+#if defined(ENABLE_AUTOTUNER) -+private: -+ // 'InvocationCounter' keeps track of Invocation of Loop Unroll Pass and -+ // assign it to 'Invocation'. So each LoopUnroll Object knows when it is -+ // being invoked during optimization pipeline. It is used to identify the -+ // Invocation of a pass if it is invoked multiple times. AutoTuner will use -+ // this information to generate the Code Regions and apply the suggested -+ // configuration during the correct invocation of the Loop Unroll Pass. -+ static unsigned int InvocationCounter; -+ unsigned int Invocation; -+ -+public: -+#endif -+ - std::optional ProvidedCount; - std::optional ProvidedThreshold; - std::optional ProvidedAllowPartial; -@@ -1405,6 +1561,9 @@ public: - ProvidedAllowPeeling(AllowPeeling), - ProvidedAllowProfileBasedPeeling(AllowProfileBasedPeeling), - ProvidedFullUnrollMaxCount(ProvidedFullUnrollMaxCount) { -+#if defined(ENABLE_AUTOTUNER) -+ Invocation = InvocationCounter++; -+#endif - initializeLoopUnrollPass(*PassRegistry::getPassRegistry()); - } - -@@ -1431,7 +1590,12 @@ public: - /*OnlyFullUnroll*/ false, OnlyWhenForced, ForgetAllSCEV, ProvidedCount, - ProvidedThreshold, ProvidedAllowPartial, ProvidedRuntime, - ProvidedUpperBound, ProvidedAllowPeeling, -+#if defined(ENABLE_AUTOTUNER) -+ ProvidedAllowProfileBasedPeeling, ProvidedFullUnrollMaxCount, -+ Invocation); -+#else - ProvidedAllowProfileBasedPeeling, ProvidedFullUnrollMaxCount); -+#endif - - if (Result == LoopUnrollResult::FullyUnrolled) - LPM.markLoopAsDeleted(*L); -@@ -1449,6 +1613,9 @@ public: - getLoopAnalysisUsage(AU); - } - }; -+#if defined(ENABLE_AUTOTUNER) -+unsigned int LoopUnroll::InvocationCounter = 0; -+#endif - - } // end anonymous namespace - -@@ -1496,6 +1663,11 @@ PreservedAnalyses LoopFullUnrollPass::run(Loop &L, LoopAnalysisManager &AM, - - std::string LoopName = std::string(L.getName()); - -+#if defined(ENABLE_AUTOTUNER) -+ // LoopFullUnrollPass will be invoked first during optimization pipeline. -+ unsigned int Invocation = 0; -+#endif -+ - bool Changed = - tryToUnrollLoop(&L, AR.DT, &AR.LI, AR.SE, AR.TTI, AR.AC, ORE, - /*BFI*/ nullptr, /*PSI*/ nullptr, -@@ -1505,7 +1677,12 @@ PreservedAnalyses LoopFullUnrollPass::run(Loop &L, LoopAnalysisManager &AM, - /*Runtime*/ false, /*UpperBound*/ false, - /*AllowPeeling*/ true, - /*AllowProfileBasedPeeling*/ false, -+#if defined(ENABLE_AUTOTUNER) -+ /*FullUnrollMaxCount*/ std::nullopt, -+ /*Invocation*/ Invocation) != -+#else - /*FullUnrollMaxCount*/ std::nullopt) != -+#endif - LoopUnrollResult::Unmodified; - if (!Changed) - return PreservedAnalyses::all(); -@@ -1588,6 +1765,11 @@ PreservedAnalyses LoopUnrollPass::run(Function &F, - - bool Changed = false; - -+#if defined(ENABLE_AUTOTUNER) -+ // LoopUnrollPass will be invoked second during optimization pipeline. -+ unsigned int Invocation = 1; -+#endif -+ - // The unroller requires loops to be in simplified form, and also needs LCSSA. - // Since simplification may add new inner loops, it has to run before the - // legality and profitability checks. This means running the loop unroller -@@ -1630,7 +1812,12 @@ PreservedAnalyses LoopUnrollPass::run(Function &F, - /*Count*/ std::nullopt, - /*Threshold*/ std::nullopt, UnrollOpts.AllowPartial, - UnrollOpts.AllowRuntime, UnrollOpts.AllowUpperBound, LocalAllowPeeling, -+#if defined(ENABLE_AUTOTUNER) -+ UnrollOpts.AllowProfileBasedPeeling, UnrollOpts.FullUnrollMaxCount, -+ Invocation); -+#else - UnrollOpts.AllowProfileBasedPeeling, UnrollOpts.FullUnrollMaxCount); -+#endif - Changed |= Result != LoopUnrollResult::Unmodified; - - // The parent must not be damaged by unrolling! -diff --git a/llvm/lib/Transforms/Scalar/Scalar.cpp b/llvm/lib/Transforms/Scalar/Scalar.cpp -index 37b032e4d7c7..4b140e8d600b 100644 ---- a/llvm/lib/Transforms/Scalar/Scalar.cpp -+++ b/llvm/lib/Transforms/Scalar/Scalar.cpp -@@ -64,4 +64,8 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) { - initializeStraightLineStrengthReduceLegacyPassPass(Registry); - initializePlaceBackedgeSafepointsLegacyPassPass(Registry); - initializeLoopSimplifyCFGLegacyPassPass(Registry); -+#if defined(ENABLE_AUTOTUNER) -+ initializeAutoTuningCompileFunctionLegacyPass(Registry); -+ initializeAutoTuningCompileModuleLegacyPass(Registry); -+#endif - } -diff --git a/llvm/lib/Transforms/Scalar/Sink.cpp b/llvm/lib/Transforms/Scalar/Sink.cpp -index 8b99f73b850b..b3c60686e252 100644 ---- a/llvm/lib/Transforms/Scalar/Sink.cpp -+++ b/llvm/lib/Transforms/Scalar/Sink.cpp -@@ -248,6 +248,11 @@ namespace { - } - - bool runOnFunction(Function &F) override { -+#if defined(ENABLE_AUTOTUNER) -+ if (skipFunction(F)) -+ return false; -+#endif -+ - auto &DT = getAnalysis().getDomTree(); - auto &LI = getAnalysis().getLoopInfo(); - auto &AA = getAnalysis().getAAResults(); -diff --git a/llvm/lib/Transforms/Utils/CMakeLists.txt b/llvm/lib/Transforms/Utils/CMakeLists.txt -index a870071f3f64..8616e7b923c0 100644 ---- a/llvm/lib/Transforms/Utils/CMakeLists.txt -+++ b/llvm/lib/Transforms/Utils/CMakeLists.txt -@@ -93,6 +93,7 @@ add_llvm_component_library(LLVMTransformUtils - - LINK_COMPONENTS - Analysis -+ AutoTuner - Core - Support - TargetParser -diff --git a/llvm/lib/Transforms/Utils/LCSSA.cpp b/llvm/lib/Transforms/Utils/LCSSA.cpp -index c36b0533580b..20a4edcb29db 100644 ---- a/llvm/lib/Transforms/Utils/LCSSA.cpp -+++ b/llvm/lib/Transforms/Utils/LCSSA.cpp -@@ -491,6 +491,11 @@ char &llvm::LCSSAID = LCSSAWrapperPass::ID; - - /// Transform \p F into loop-closed SSA form. - bool LCSSAWrapperPass::runOnFunction(Function &F) { -+#if defined(ENABLE_AUTOTUNER) -+ if (skipFunction(F)) -+ return false; -+#endif -+ - LI = &getAnalysis().getLoopInfo(); - DT = &getAnalysis().getDomTree(); - auto *SEWP = getAnalysisIfAvailable(); -diff --git a/llvm/lib/Transforms/Utils/LoopSimplify.cpp b/llvm/lib/Transforms/Utils/LoopSimplify.cpp -index 3e604fdf2e11..2e42e7f1397f 100644 ---- a/llvm/lib/Transforms/Utils/LoopSimplify.cpp -+++ b/llvm/lib/Transforms/Utils/LoopSimplify.cpp -@@ -69,6 +69,9 @@ - #include "llvm/Transforms/Utils/BasicBlockUtils.h" - #include "llvm/Transforms/Utils/Local.h" - #include "llvm/Transforms/Utils/LoopUtils.h" -+#if defined(ENABLE_AUTOTUNER) -+#include "llvm/AutoTuner/AutoTuning.h" -+#endif - using namespace llvm; - - #define DEBUG_TYPE "loop-simplify" -@@ -793,6 +796,11 @@ Pass *llvm::createLoopSimplifyPass() { return new LoopSimplify(); } - /// it in any convenient order) inserting preheaders... - /// - bool LoopSimplify::runOnFunction(Function &F) { -+#if defined(ENABLE_AUTOTUNER) -+ if (autotuning::Engine.isEnabled() && skipFunction(F)) -+ return false; -+#endif -+ - bool Changed = false; - LoopInfo *LI = &getAnalysis().getLoopInfo(); - DominatorTree *DT = &getAnalysis().getDomTree(); -diff --git a/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/llvm/lib/Transforms/Utils/LoopUnroll.cpp -index 511dd61308f9..2d2c3e50514b 100644 ---- a/llvm/lib/Transforms/Utils/LoopUnroll.cpp -+++ b/llvm/lib/Transforms/Utils/LoopUnroll.cpp -@@ -69,6 +69,9 @@ - #include - #include - #include -+#if defined(ENABLE_AUTOTUNER) -+#include "llvm/AutoTuner/AutoTuning.h" -+#endif - - namespace llvm { - class DataLayout; -diff --git a/llvm/lib/Transforms/Vectorize/CMakeLists.txt b/llvm/lib/Transforms/Vectorize/CMakeLists.txt -index 998dfd956575..f2c5c04abb13 100644 ---- a/llvm/lib/Transforms/Vectorize/CMakeLists.txt -+++ b/llvm/lib/Transforms/Vectorize/CMakeLists.txt -@@ -21,6 +21,7 @@ add_llvm_component_library(LLVMVectorize - - LINK_COMPONENTS - Analysis -+ AutoTuner - Core - Support - TransformUtils -diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp -index f923f0be6621..f13ce6853666 100644 ---- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp -+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp -@@ -113,6 +113,18 @@ LoopVectorizeHints::LoopVectorizeHints(const Loop *L, - // Populate values with existing loop metadata. - getHintsFromMetadata(); - -+#if defined(ENABLE_AUTOTUNER) -+ if (autotuning::Engine.isEnabled()) { -+ int NewValue = 0; -+ bool VectorizationInterleaveChanged = -+ L->lookUpParams("VectorizationInterleave", NewValue); -+ -+ if (VectorizationInterleaveChanged) { -+ Interleave.Value = NewValue; -+ } -+ } -+#endif -+ - // force-vector-interleave overrides DisableInterleaving. - if (VectorizerParams::isInterleaveForced()) - Interleave.Value = VectorizerParams::VectorizationInterleave; -diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp -index b603bbe55dc9..46fab860f5a3 100644 ---- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp -+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp -@@ -10178,6 +10178,22 @@ LoopVectorizePass::LoopVectorizePass(LoopVectorizeOptions Opts) - VectorizeOnlyWhenForced(Opts.VectorizeOnlyWhenForced || - !EnableLoopVectorization) {} - -+#if defined(ENABLE_AUTOTUNER) -+// Given the iterleave count (IC) and CR, compute the dynamic values for -+// interleave count. Then add it to CR. -+static void -+computeAutoTunerDynamicInterleaveOptions(unsigned IC, -+ const autotuning::CodeRegion &CR) { -+ -+ std::vector AutoTunerOptions{1, 2, 4}; -+ if (std::find(AutoTunerOptions.begin(), AutoTunerOptions.end(), IC) == -+ AutoTunerOptions.end()) -+ AutoTunerOptions[2] = IC; -+ -+ CR.addAutoTunerOptions("VectorizationInterleave", AutoTunerOptions); -+} -+#endif -+ - bool LoopVectorizePass::processLoop(Loop *L) { - assert((EnableVPlanNativePath || L->isInnermost()) && - "VPlan-native path is not enabled. Only process inner loops."); -@@ -10190,6 +10206,12 @@ bool LoopVectorizePass::processLoop(Loop *L) { - << L->getHeader()->getParent()->getName() << "' from " - << DebugLocStr << "\n"); - -+#if defined(ENABLE_AUTOTUNER) -+ // Initialize the loop for auto-tuning but do not add it -+ // as an tuning opportunity yet. -+ autotuning::Engine.initContainer( -+ L, LV_NAME, L->getHeader()->getParent()->getName(), false); -+#endif - LoopVectorizeHints Hints(L, InterleaveOnlyWhenForced, *ORE, TTI); - - LLVM_DEBUG( -@@ -10422,6 +10444,18 @@ bool LoopVectorizePass::processLoop(Loop *L) { - InterleaveLoop = false; - } - -+#if defined(ENABLE_AUTOTUNER) -+ if (!VectorizerParams::isInterleaveForced()) { -+ // Compute the dynamic values for VectorizationInterleave and add it to the -+ // CodeRegion. -+ computeAutoTunerDynamicInterleaveOptions(IC, L->getCodeRegion()); -+ -+ // Add the current loop as a tuning opportunity explicitly. -+ autotuning::Engine.addOpportunity( -+ L->getCodeRegion(), {{"VectorizationInterleave", std::to_string(IC)}}); -+ } -+#endif -+ - // Override IC if user provided an interleave count. - IC = UserIC > 0 ? UserIC : IC; - -diff --git a/llvm/test/AutoTuning/AutotuningDump/Inputs/unroll_template.yaml b/llvm/test/AutoTuning/AutotuningDump/Inputs/unroll_template.yaml -new file mode 100644 -index 000000000000..f483a269906a ---- /dev/null -+++ b/llvm/test/AutoTuning/AutotuningDump/Inputs/unroll_template.yaml -@@ -0,0 +1,8 @@ -+--- !AutoTuning -+Pass: loop-unroll -+Name: [name] -+Function: foo -+CodeRegionType: loop -+Args: -+ - UnrollCount: [number] -+... -diff --git a/llvm/test/AutoTuning/AutotuningDump/create-data-dir.ll b/llvm/test/AutoTuning/AutotuningDump/create-data-dir.ll -new file mode 100644 -index 000000000000..ceb9b4fb2ca6 ---- /dev/null -+++ b/llvm/test/AutoTuning/AutotuningDump/create-data-dir.ll -@@ -0,0 +1,65 @@ -+; UNSUPPORTED: windows -+; RUN: sed 's#\[number\]#0#g; s#\[name\]#for.body#g' \ -+; RUN: %S/Inputs/unroll_template.yaml > %t.DEFAULT.yaml -+; RUN: opt --disable-output %s -S -passes='require' \ -+; RUN: -auto-tuning-input=%t.DEFAULT.yaml -auto-tuning-config-id=1 -+; RUN: cat %T/../autotune_datadir/create-data-dir.ll/1.ll | FileCheck %s -+; RUN: rm -rf %T/../autotune_datadir/* -+ -+; RUN: cp %t.DEFAULT.yaml %T/../autotune_datadir/config.yaml -+; RUN: opt %s -S -passes='require' -auto-tuning-config-id=1 -+; RUN: cat %T/../autotune_datadir/create-data-dir.ll/1.ll | FileCheck %s -+; RUN: rm -rf %T/../autotune_datadir/* -+ -+; RUN: cp %t.DEFAULT.yaml %T/../autotune_datadir/config.yaml -+; RUN: opt %s -S -passes='require' -enable-autotuning-dump -+; RUN: echo -n %T/../autotune_datadir/IR_files/ > %t.filename -+; RUN: echo -n "create-data-dir.ll/" >> %t.filename -+; RUN: echo -n %s | sed 's#/#_#g' >> %t.filename -+; RUN: echo -n ".ll" >> %t.filename -+; RUN: cat %t.filename | xargs cat | FileCheck %s -+; RUN: rm -rf %T/../autotune_datadir -+ -+; ModuleID = 'search.c' -+source_filename = "search.c" -+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" -+target triple = "aarch64-unknown-linux-gnu" -+ -+; Function Attrs: argmemonly nofree norecurse nosync nounwind readonly uwtable -+define dso_local i32 @search(ptr nocapture noundef readonly %Arr, i32 noundef %Value, i32 noundef %Size) { -+entry: -+ %cmp5 = icmp sgt i32 %Size, 0 -+ br i1 %cmp5, label %for.body.preheader, label %for.end -+ -+for.body.preheader: ; preds = %entry -+ %wide.trip.count = zext i32 %Size to i64 -+ br label %for.body -+ -+for.body: ; preds = %for.body.preheader, %for.inc -+ %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.inc ] -+ %arrayidx = getelementptr inbounds i32, ptr %Arr, i64 %indvars.iv -+ %0 = load i32, ptr %arrayidx, align 4 -+ %cmp1 = icmp eq i32 %0, %Value -+ br i1 %cmp1, label %for.end.loopexit.split.loop.exit, label %for.inc -+ -+for.inc: ; preds = %for.body -+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 -+ %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count -+ br i1 %exitcond.not, label %for.end, label %for.body -+ -+for.end.loopexit.split.loop.exit: ; preds = %for.body -+ %1 = trunc i64 %indvars.iv to i32 -+ br label %for.end -+ -+for.end: ; preds = %for.inc, %for.end.loopexit.split.loop.exit, %entry -+ %Idx.0.lcssa = phi i32 [ 0, %entry ], [ %1, %for.end.loopexit.split.loop.exit ], [ %Size, %for.inc ] -+ ret i32 %Idx.0.lcssa -+} -+ -+; Check that only loop body is inside the IR File. -+; CHECK-LABEL: for.body: ; preds = -+; CHECK-NEXT: %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.inc ] -+; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, ptr %Arr, i64 %indvars.iv -+; CHECK-NEXT: %0 = load i32, ptr %arrayidx, align 4 -+; CHECK-NEXT: %cmp1 = icmp eq i32 %0, %Value -+; CHECK-NEXT: br i1 %cmp1, label %for.end.loopexit.split.loop.exit, label %for.inc -diff --git a/llvm/test/AutoTuning/AutotuningDump/unroll.ll b/llvm/test/AutoTuning/AutotuningDump/unroll.ll -new file mode 100644 -index 000000000000..e8243da55fff ---- /dev/null -+++ b/llvm/test/AutoTuning/AutotuningDump/unroll.ll -@@ -0,0 +1,35 @@ -+; RUN: rm -rf %T.tmp/Output -+; RUN: mkdir -p %T.tmp/Output -+; RUN: rm %t.DEFAULT.yaml -rf -+; RUN: sed 's#\[number\]#0#g; s#\[name\]#for.body#g' %S/Inputs/unroll_template.yaml > %t.DEFAULT.yaml -+; RUN: env AUTOTUNE_DATADIR=%T.tmp/Output opt %s -S -passes='require' \ -+; RUN: -auto-tuning-input=%t.DEFAULT.yaml -auto-tuning-config-id=1 -+; RUN: env AUTOTUNE_DATADIR=%T.tmp/Output opt %s -S -passes='require' \ -+; RUN: -auto-tuning-input=%t.DEFAULT.yaml -auto-tuning-config-id=2 -+; RUN: cat %T.tmp/Output/unroll.ll/1.ll | FileCheck %s -check-prefix=DEFAULT -+; RUN: cat %T.tmp/Output/unroll.ll/2.ll | FileCheck %s -check-prefix=DEFAULT -+; UNSUPPORTED: windows -+ -+define void @foo(i32* nocapture %a) { -+entry: -+ br label %for.body -+for.body: ; preds = %for.body, %entry -+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] -+ %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv -+ %0 = load i32, i32* %arrayidx, align 4 -+ %inc = add nsw i32 %0, 1 -+ store i32 %inc, i32* %arrayidx, align 4 -+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 -+ %exitcond = icmp eq i64 %indvars.iv.next, 64 -+ br i1 %exitcond, label %for.end, label %for.body -+for.end: ; preds = %for.body -+ ret void -+} -+; Check that only loop body is inside the IR File. -+; DEFAULT-LABEL: for.body: ; preds = %for.body, %entry -+; DEFAULT-NEXT: %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] -+; DEFAULT-NEXT: %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv -+; DEFAULT: %exitcond = icmp eq i64 %indvars.iv.next, 64 -+; DEFAULT: br i1 %exitcond, label %for.end, label %for.body -+ -+; RUN: rm -rf %T.tmp/Output -diff --git a/llvm/test/AutoTuning/BaselineConfig/Inputs/autotune_datadir/baseline_config.yaml b/llvm/test/AutoTuning/BaselineConfig/Inputs/autotune_datadir/baseline_config.yaml -new file mode 100644 -index 000000000000..a5e669c17a71 ---- /dev/null -+++ b/llvm/test/AutoTuning/BaselineConfig/Inputs/autotune_datadir/baseline_config.yaml -@@ -0,0 +1,9 @@ -+!AutoTuning {Args: [{UnrollCount: 0}], CodeRegionHash: 12835463591102937421, -+ CodeRegionType: loop, Function: test, Invocation: 0, Name: for.body, -+ Pass: loop-unroll} -+--- !AutoTuning {Args: [{VectorizationInterleave: 2}], -+ CodeRegionHash: 12835463591102937421, CodeRegionType: loop, Function: test, -+ Invocation: 0, Name: for.body, Pass: loop-vectorize} -+--- !AutoTuning {Args: [{UnrollCount: 0}], CodeRegionHash: 8430337282115614432, -+ CodeRegionType: loop, Function: test, Invocation: 1, Name: vector.body, -+ Pass: loop-unroll} -diff --git a/llvm/test/AutoTuning/BaselineConfig/Inputs/autotune_datadir/random_config.yaml b/llvm/test/AutoTuning/BaselineConfig/Inputs/autotune_datadir/random_config.yaml -new file mode 100644 -index 000000000000..738cf55ffe9a ---- /dev/null -+++ b/llvm/test/AutoTuning/BaselineConfig/Inputs/autotune_datadir/random_config.yaml -@@ -0,0 +1,9 @@ -+!AutoTuning {Args: [{UnrollCount: 2}], CodeRegionHash: 12835463591102937421, -+ CodeRegionType: loop, Function: test, Invocation: 0, Name: for.body, -+ Pass: loop-unroll} -+--- !AutoTuning {Args: [{VectorizationInterleave: 2}], -+ CodeRegionHash: 12835463591102937421, CodeRegionType: loop, Function: test, -+ Invocation: 0, Name: for.body, Pass: loop-vectorize} -+--- !AutoTuning {Args: [{UnrollCount: 0}], CodeRegionHash: 8430337282115614432, -+ CodeRegionType: loop, Function: test, Invocation: 1, Name: vector.body, -+ Pass: loop-unroll} -diff --git a/llvm/test/AutoTuning/BaselineConfig/Inputs/test.ll b/llvm/test/AutoTuning/BaselineConfig/Inputs/test.ll -new file mode 100644 -index 000000000000..667a076b2d23 ---- /dev/null -+++ b/llvm/test/AutoTuning/BaselineConfig/Inputs/test.ll -@@ -0,0 +1,117 @@ -+; ModuleID = 'test.c' -+source_filename = "test.c" -+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" -+target triple = "aarch64-unknown-linux-gnu" -+ -+@.str = private unnamed_addr constant [12 x i8] c"tmp <= 10.0\00", align 1 -+@.str.1 = private unnamed_addr constant [7 x i8] c"test.c\00", align 1 -+@__PRETTY_FUNCTION__.test = private unnamed_addr constant [12 x i8] c"void test()\00", align 1 -+ -+; Function Attrs: nounwind uwtable -+define dso_local void @test() #0 { -+entry: -+ %cs = alloca i32, align 4 -+ %flush = alloca ptr, align 8 -+ %i = alloca i32, align 4 -+ %tmp = alloca double, align 8 -+ call void @llvm.lifetime.start.p0(i64 4, ptr %cs) #5 -+ store i32 16431360, ptr %cs, align 4, !tbaa !6 -+ call void @llvm.lifetime.start.p0(i64 8, ptr %flush) #5 -+ %0 = load i32, ptr %cs, align 4, !tbaa !6 -+ %conv = sext i32 %0 to i64 -+ %call = call noalias ptr @calloc(i64 noundef %conv, i64 noundef 8) #6 -+ store ptr %call, ptr %flush, align 8, !tbaa !10 -+ call void @llvm.lifetime.start.p0(i64 4, ptr %i) #5 -+ call void @llvm.lifetime.start.p0(i64 8, ptr %tmp) #5 -+ store double 0.000000e+00, ptr %tmp, align 8, !tbaa !12 -+ store i32 0, ptr %i, align 4, !tbaa !6 -+ br label %for.cond -+ -+for.cond: ; preds = %for.inc, %entry -+ %1 = load i32, ptr %i, align 4, !tbaa !6 -+ %2 = load i32, ptr %cs, align 4, !tbaa !6 -+ %cmp = icmp slt i32 %1, %2 -+ br i1 %cmp, label %for.body, label %for.end -+ -+for.body: ; preds = %for.cond -+ %3 = load ptr, ptr %flush, align 8, !tbaa !10 -+ %4 = load i32, ptr %i, align 4, !tbaa !6 -+ %idxprom = sext i32 %4 to i64 -+ %arrayidx = getelementptr inbounds double, ptr %3, i64 %idxprom -+ %5 = load double, ptr %arrayidx, align 8, !tbaa !12 -+ %6 = load double, ptr %tmp, align 8, !tbaa !12 -+ %add = fadd double %6, %5 -+ store double %add, ptr %tmp, align 8, !tbaa !12 -+ br label %for.inc -+ -+for.inc: ; preds = %for.body -+ %7 = load i32, ptr %i, align 4, !tbaa !6 -+ %inc = add nsw i32 %7, 1 -+ store i32 %inc, ptr %i, align 4, !tbaa !6 -+ br label %for.cond, !llvm.loop !14 -+ -+for.end: ; preds = %for.cond -+ %8 = load double, ptr %tmp, align 8, !tbaa !12 -+ %cmp2 = fcmp ole double %8, 1.000000e+01 -+ br i1 %cmp2, label %if.then, label %if.else -+ -+if.then: ; preds = %for.end -+ br label %if.end -+ -+if.else: ; preds = %for.end -+ call void @__assert_fail(ptr noundef @.str, ptr noundef @.str.1, i32 noundef 11, ptr noundef @__PRETTY_FUNCTION__.test) #7 -+ unreachable -+ -+if.end: ; preds = %if.then -+ %9 = load ptr, ptr %flush, align 8, !tbaa !10 -+ call void @free(ptr noundef %9) #5 -+ call void @llvm.lifetime.end.p0(i64 8, ptr %tmp) #5 -+ call void @llvm.lifetime.end.p0(i64 4, ptr %i) #5 -+ call void @llvm.lifetime.end.p0(i64 8, ptr %flush) #5 -+ call void @llvm.lifetime.end.p0(i64 4, ptr %cs) #5 -+ ret void -+} -+ -+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) -+declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #1 -+ -+; Function Attrs: nounwind allocsize(0,1) -+declare noalias ptr @calloc(i64 noundef, i64 noundef) #2 -+ -+; Function Attrs: noreturn nounwind -+declare void @__assert_fail(ptr noundef, ptr noundef, i32 noundef, ptr noundef) #3 -+ -+; Function Attrs: nounwind -+declare void @free(ptr noundef) #4 -+ -+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) -+declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #1 -+ -+attributes #0 = { nounwind uwtable "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+fp-armv8,+neon,+outline-atomics,+v8a,-fmv" } -+attributes #1 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } -+attributes #2 = { nounwind allocsize(0,1) "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+fp-armv8,+neon,+outline-atomics,+v8a,-fmv" } -+attributes #3 = { noreturn nounwind "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+fp-armv8,+neon,+outline-atomics,+v8a,-fmv" } -+attributes #4 = { nounwind "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+fp-armv8,+neon,+outline-atomics,+v8a,-fmv" } -+attributes #5 = { nounwind } -+attributes #6 = { nounwind allocsize(0,1) } -+attributes #7 = { noreturn nounwind } -+ -+!llvm.module.flags = !{!0, !1, !2, !3, !4} -+!llvm.ident = !{!5} -+ -+!0 = !{i32 1, !"wchar_size", i32 4} -+!1 = !{i32 8, !"PIC Level", i32 2} -+!2 = !{i32 7, !"PIE Level", i32 2} -+!3 = !{i32 7, !"uwtable", i32 2} -+!4 = !{i32 7, !"frame-pointer", i32 1} -+!5 = !{!"Huawei BiSheng Compiler clang version 18.0.0 (ssh://git@codehub-dg-y.huawei.com:2222/CompilerKernel/BiShengKernel/BiSheng.git 026024071a7fb66b26b65fb81da702cc5f0cf405)"} -+!6 = !{!7, !7, i64 0} -+!7 = !{!"int", !8, i64 0} -+!8 = !{!"omnipotent char", !9, i64 0} -+!9 = !{!"Simple C/C++ TBAA"} -+!10 = !{!11, !11, i64 0} -+!11 = !{!"any pointer", !8, i64 0} -+!12 = !{!13, !13, i64 0} -+!13 = !{!"double", !8, i64 0} -+!14 = distinct !{!14, !15} -+!15 = !{!"llvm.loop.mustprogress"} -diff --git a/llvm/test/AutoTuning/BaselineConfig/apply_baseline_config.ll b/llvm/test/AutoTuning/BaselineConfig/apply_baseline_config.ll -new file mode 100644 -index 000000000000..f905208a2f3b ---- /dev/null -+++ b/llvm/test/AutoTuning/BaselineConfig/apply_baseline_config.ll -@@ -0,0 +1,11 @@ -+; The purpose is to test the baseline IR is the same as the 1st iteration of -+; autotuning process with --use-baseline-config enabled. -+; RUN: rm %t.baseline %t.firstIt -f -+; RUN: opt -O3 %S/Inputs/test.ll -o %t.baseline -+; RUN: opt -O3 %S/Inputs/test.ll -o %t.firstIt_baseline \ -+; RUN: -auto-tuning-input=%S/Inputs/autotune_datadir/baseline_config.yaml -+; RUN: cmp %t.firstIt_baseline %t.baseline -+ -+; RUN: opt -O3 %S/Inputs/test.ll -o %t.firstIt_random \ -+; RUN: -auto-tuning-input=%S/Inputs/autotune_datadir/random_config.yaml -+; RUN: not cmp %t.firstIt_random %t.baseline -diff --git a/llvm/test/AutoTuning/BaselineConfig/opp.ll b/llvm/test/AutoTuning/BaselineConfig/opp.ll -new file mode 100644 -index 000000000000..b2897316fc22 ---- /dev/null -+++ b/llvm/test/AutoTuning/BaselineConfig/opp.ll -@@ -0,0 +1,67 @@ -+; REQUIRES: asserts -+; RUN: rm %t.callsite_opp -rf -+; RUN: opt %s -O3 -debug-only=inline -disable-output -S 2>&1 | \ -+; RUN: FileCheck %s -check-prefix=DEFAULT -+; RUN: opt %s -O3 -auto-tuning-opp=%t.callsite_opp -disable-output -S 2>&1 -+; RUN: FileCheck %s --input-file %t.callsite_opp/opp.ll.yaml -check-prefix=AUTOTUNE -+ -+@a = global i32 4 -+ -+; Function Attrs: nounwind readnone uwtable -+define i32 @simpleFunction(i32 %a) #0 { -+entry: -+ call void @extern() -+ %a1 = load volatile i32, i32* @a -+ %x1 = add i32 %a1, %a1 -+ %a2 = load volatile i32, i32* @a -+ %x2 = add i32 %x1, %a2 -+ %a3 = load volatile i32, i32* @a -+ %x3 = add i32 %x2, %a3 -+ %a4 = load volatile i32, i32* @a -+ %x4 = add i32 %x3, %a4 -+ %a5 = load volatile i32, i32* @a -+ %x5 = add i32 %x4, %a5 -+ %a6 = load volatile i32, i32* @a -+ %x6 = add i32 %x5, %a6 -+ %a7 = load volatile i32, i32* @a -+ %x7 = add i32 %x6, %a6 -+ %a8 = load volatile i32, i32* @a -+ %x8 = add i32 %x7, %a8 -+ %a9 = load volatile i32, i32* @a -+ %x9 = add i32 %x8, %a9 -+ %a10 = load volatile i32, i32* @a -+ %x10 = add i32 %x9, %a10 -+ %a11 = load volatile i32, i32* @a -+ %x11 = add i32 %x10, %a11 -+ %a12 = load volatile i32, i32* @a -+ %x12 = add i32 %x11, %a12 -+ %add = add i32 %x12, %a -+ ret i32 %add -+} -+ -+; Function Attrs: nounwind readnone uwtable -+define i32 @bar(i32 %a) #0 { -+entry: -+ %0 = tail call i32 @simpleFunction(i32 6) -+ ret i32 %0 -+} -+ -+declare void @extern() -+ -+attributes #0 = { nounwind readnone uwtable } -+attributes #1 = { nounwind cold readnone uwtable } -+ -+ -+; NOTE: Need to make sure the function inling have the same behaviour as O3 and -+; 'BaselineConfig' -+; DEFAULT: Inlining calls in: bar -+; DEFAULT: Inlining (cost=115, threshold=375), Call: %0 = tail call i32 @simpleFunction(i32 6) -+ -+; AUTOTUNE: Pass: inline -+; AUTOTUNE-NEXT: Name: simpleFunction -+; AUTOTUNE-NEXT: Function: bar -+; AUTOTUNE-NEXT: CodeRegionType: callsite -+; AUTOTUNE-NEXT: CodeRegionHash: {{[0-9]+}} -+; AUTOTUNE-NEXT: DynamicConfigs: { ForceInline: [ 0, 1 ] } -+; AUTOTUNE-NEXT: BaselineConfig: { ForceInline: '1' } -+; AUTOTUNE-NEXT: Invocation: 0 -diff --git a/llvm/test/AutoTuning/CodeRegionFilter/function-filtering.ll b/llvm/test/AutoTuning/CodeRegionFilter/function-filtering.ll -new file mode 100644 -index 000000000000..13acafae6fc4 ---- /dev/null -+++ b/llvm/test/AutoTuning/CodeRegionFilter/function-filtering.ll -@@ -0,0 +1,62 @@ -+; REQUIRES: asserts -+ -+; RUN: rm -rf %t.filter -+; RUN: opt %s -S -passes='function(require,loop-unroll),cgscc(inline)' \ -+; RUN: -auto-tuning-opp=%t.filter -auto-tuning-type-filter=CallSite,Loop --disable-output -+; RUN: FileCheck %s --input-file %t.filter/function-filtering.ll.yaml -check-prefix=DEFAULT -+ -+; RUN: rm -rf %t.filter -+; RUN: opt %s -S -passes='function(require,loop-unroll),cgscc(inline)' \ -+; RUN: -auto-tuning-opp=%t.filter -auto-tuning-type-filter=CallSite,Loop \ -+; RUN: -auto-tuning-function-filter=foo --disable-output -+; RUN: FileCheck %s --input-file %t.filter/function-filtering.ll.yaml -check-prefix=FILTER_FOO -+ -+; RUN: rm -rf %t.filter -+; RUN: opt %s -S -passes='function(require,loop-unroll),cgscc(inline)' \ -+; RUN: -auto-tuning-opp=%t.filter -auto-tuning-type-filter=CallSite,Loop \ -+; RUN: -auto-tuning-function-filter=bar --disable-output -+; RUN: FileCheck %s --input-file %t.filter/function-filtering.ll.yaml -check-prefix=FILTER_BAR -+ -+; RUN: rm -rf %t.filter -+; RUN: opt %s -S -passes='function(require,loop-unroll),cgscc(inline)' \ -+; RUN: -auto-tuning-opp=%t.filter -auto-tuning-type-filter=CallSite,Loop \ -+; RUN: -auto-tuning-function-filter=dummy -debug-only=autotuning | \ -+; RUN: FileCheck %s -check-prefix=FILTER_DUMMY -+ -+define void @foo(i32* nocapture %a) { -+entry: -+ br label %for.body -+ -+for.body: ; preds = %for.body, %entry -+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] -+ %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv -+ %0 = load i32, i32* %arrayidx, align 4 -+ %inc = add nsw i32 %0, 1 -+ store i32 %inc, i32* %arrayidx, align 4 -+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 -+ %exitcond = icmp eq i64 %indvars.iv.next, 64 -+ br i1 %exitcond, label %for.end, label %for.body -+ -+for.end: ; preds = %for.body -+ ret void -+} -+ -+define void @bar(i32* nocapture %a) { -+entry: -+ call void @foo(i32* %a) -+ ret void -+} -+ -+; DEFAULT: --- !AutoTuning -+; DEFAULT: --- !AutoTuning -+ -+; FILTER_FOO: --- !AutoTuning -+; FILTER_FOO: Function: foo -+; FILTER_FOO-NOT: --- !AutoTuning -+ -+; FILTER_BAR: --- !AutoTuning -+; FILTER_BAR: Function: bar -+; FILTER_BAR-NOT: --- !AutoTuning -+ -+; FILTER_DUMMY-NOT: --- !AutoTuning -+; FILTER_DUMMY-NOT: --- !AutoTuning -diff --git a/llvm/test/AutoTuning/Error/Inputs/invalid-format.yaml b/llvm/test/AutoTuning/Error/Inputs/invalid-format.yaml -new file mode 100644 -index 000000000000..9c203e58f0ab ---- /dev/null -+++ b/llvm/test/AutoTuning/Error/Inputs/invalid-format.yaml -@@ -0,0 +1,3 @@ -+ -+ this is a xml file -+ -diff --git a/llvm/test/AutoTuning/Error/Inputs/template.yaml b/llvm/test/AutoTuning/Error/Inputs/template.yaml -new file mode 100644 -index 000000000000..1f02b52ffb38 ---- /dev/null -+++ b/llvm/test/AutoTuning/Error/Inputs/template.yaml -@@ -0,0 +1,10 @@ -+--- !AutoTuning -+Pass: pass -+Name: for.body -+Function: foo -+CodeRegionType: loop -+CodeRegionHash: 0 -+Args: -+ - UnrollCount: 2 -+ - PassOrder: [test, test2] -+... -diff --git a/llvm/test/AutoTuning/Error/file-not-found-error.ll b/llvm/test/AutoTuning/Error/file-not-found-error.ll -new file mode 100644 -index 000000000000..6a364239a271 ---- /dev/null -+++ b/llvm/test/AutoTuning/Error/file-not-found-error.ll -@@ -0,0 +1,29 @@ -+; RUN: rm %t.non-existing.yaml -rf -+; RUN: not opt %s -S -passes='require,loop(loop-unroll-full)' \ -+; RUN: -auto-tuning-input=%t.non-existing.yaml 2>&1 | \ -+; RUN: FileCheck %s -check-prefix=ERROR -+ -+; UNSUPPORTED: windows -+ -+define void @foo(i32* nocapture %a) { -+entry: -+ br label %for.body -+ -+for.body: ; preds = %for.body, %entry -+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] -+ %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv -+ %0 = load i32, i32* %arrayidx, align 4 -+ %inc = add nsw i32 %0, 1 -+ store i32 %inc, i32* %arrayidx, align 4 -+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 -+ %exitcond = icmp eq i64 %indvars.iv.next, 64 -+ br i1 %exitcond, label %for.end, label %for.body -+ -+for.end: ; preds = %for.body -+ ret void -+} -+ -+; check if error massage is shown properly when input yaml is not found -+; -+; ERROR: Error parsing auto-tuning input. -+; ERROR: No such file or directory -diff --git a/llvm/test/AutoTuning/Error/invalid-yaml-error.ll b/llvm/test/AutoTuning/Error/invalid-yaml-error.ll -new file mode 100644 -index 000000000000..bfc8784c4ea4 ---- /dev/null -+++ b/llvm/test/AutoTuning/Error/invalid-yaml-error.ll -@@ -0,0 +1,27 @@ -+; RUN: not opt %s -S -passes='require,loop(loop-unroll-full)' \ -+; RUN: -auto-tuning-input=%S/Inputs/invalid-format.yaml 2>&1 | \ -+; RUN: FileCheck %s -check-prefix=ERROR -+ -+; UNSUPPORTED: windows -+ -+define void @foo(i32* nocapture %a) { -+entry: -+ br label %for.body -+ -+for.body: ; preds = %for.body, %entry -+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] -+ %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv -+ %0 = load i32, i32* %arrayidx, align 4 -+ %inc = add nsw i32 %0, 1 -+ store i32 %inc, i32* %arrayidx, align 4 -+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 -+ %exitcond = icmp eq i64 %indvars.iv.next, 64 -+ br i1 %exitcond, label %for.end, label %for.body -+ -+for.end: ; preds = %for.body -+ ret void -+} -+ -+; check if error massage is shown properly when input yaml is in invalid format -+; -+; ERROR: error: YAML:1:1: error: document root is not of mapping type. -diff --git a/llvm/test/AutoTuning/Error/malformed-input-error.ll b/llvm/test/AutoTuning/Error/malformed-input-error.ll -new file mode 100644 -index 000000000000..0b73c3195503 ---- /dev/null -+++ b/llvm/test/AutoTuning/Error/malformed-input-error.ll -@@ -0,0 +1,136 @@ -+; Check if error messages are shown properly for malformed YAML files. -+ -+; Missing Pass Field -+; RUN: rm %t.missing-pass.yaml -rf -+; RUN: sed 's#Pass: pass##g' %S/Inputs/template.yaml > %t.missing-pass.yaml -+; RUN: not opt %s -S -passes='require,loop(loop-unroll-full)' \ -+; RUN: -auto-tuning-input=%t.missing-pass.yaml 2>&1 | \ -+; RUN: FileCheck %s -check-prefix=ERROR-FIELD -+ -+; Missing Pass Value -+; RUN: rm %t.missing-value-pass.yaml -rf -+; RUN: sed 's#pass##g' %S/Inputs/template.yaml > %t.missing-value-pass.yaml -+; RUN: not opt %s -S -passes='require,loop(loop-unroll-full)' \ -+; RUN: -auto-tuning-input=%t.missing-value-pass.yaml 2>&1 | \ -+; RUN: FileCheck %s -check-prefix=ERROR-PASS-VALUE -+ -+; Missing Name Field -+; RUN: rm %t.missing-name.yaml -rf -+; RUN: sed 's#Name: for.body##g' %S/Inputs/template.yaml > %t.missing-name.yaml -+; RUN: not opt %s -S -passes='require,loop(loop-unroll-full)' \ -+; RUN: -auto-tuning-input=%t.missing-name.yaml 2>&1 | \ -+; RUN: FileCheck %s -check-prefix=ERROR-NAME-FIELD -+ -+; Missing Name Value -+; RUN: rm %t.missing-value-name.yaml -rf -+; RUN: sed 's#for.body##g' %S/Inputs/template.yaml > %t.missing-value-name.yaml -+; RUN: not opt %s -S -passes='require,loop(loop-unroll-full)' \ -+; RUN: -auto-tuning-input=%t.missing-value-name.yaml 2>&1 | \ -+; RUN: FileCheck %s -check-prefix=ERROR-NAME-VALUE -+ -+; Missing Function Field -+; RUN: rm %t.missing-function.yaml -rf -+; RUN: sed 's#Function: foo##g' %S/Inputs/template.yaml > %t.missing-function.yaml -+; RUN: not opt %s -S -passes='require,loop(loop-unroll-full)' -auto-tuning-input=%t.missing-function.yaml 2>&1 | FileCheck %s -check-prefix=ERROR-FUNCTION-FIELD -+ -+; Missing Function Value -+; RUN: rm %t.missing-value-func.yaml -rf -+; RUN: sed 's#foo##g' %S/Inputs/template.yaml > %t.missing-value-func.yaml -+; RUN: not opt %s -S -passes='require,loop(loop-unroll-full)' \ -+; RUN: -auto-tuning-input=%t.missing-value-func.yaml 2>&1 | \ -+; RUN: FileCheck %s -check-prefix=ERROR-FUNC-VALUE -+ -+; Missing CodeRegionType Field -+; RUN: rm %t.missing-type.yaml -rf -+; RUN: sed 's#CodeRegionType: loop##g' %S/Inputs/template.yaml > %t.missing-type.yaml -+; RUN: not opt %s -S -passes='require,loop(loop-unroll-full)' \ -+; RUN: -auto-tuning-input=%t.missing-type.yaml 2>&1 | \ -+; RUN: FileCheck %s -check-prefix=ERROR-CODE-REGION-TYPE-FIELD -+ -+; Missing CodeRegionType Value -+; RUN: rm %t.missing-value-type.yaml -rf -+; RUN: sed 's#loop##g' %S/Inputs/template.yaml > %t.missing-value-type.yaml -+; RUN: not opt %s -S -passes='require,loop(loop-unroll-full)' \ -+; RUN: -auto-tuning-input=%t.missing-value-type.yaml 2>&1 | \ -+; RUN: FileCheck %s -check-prefix=ERROR-CODE-REGION-TYPE-VALUE -+ -+; Invalid CodeRegionType Value -+; RUN: rm %t.invalid-value-type.yaml -rf -+; RUN: sed 's#loop#error-type#g' %S/Inputs/template.yaml > %t.invalid-value-type.yaml -+; RUN: not opt %s -S -passes='require,loop(loop-unroll-full)' \ -+; RUN: -auto-tuning-input=%t.invalid-value-type.yaml 2>&1 | \ -+; RUN: FileCheck %s -check-prefix=ERROR-CODE-REGION-TYPE-INVALID -+ -+; Missing Param Name -+; RUN: rm %t.missing-param-name.yaml -rf -+; RUN: sed 's#UnrollCount##g' %S/Inputs/template.yaml > %t.missing-param-name.yaml -+; RUN: not opt %s -S -passes='require,loop(loop-unroll-full)' \ -+; RUN: -auto-tuning-input=%t.missing-param-name.yaml 2>&1 | \ -+; RUN: FileCheck %s -check-prefix=ERROR-PARAM-NAME -+ -+; Missing Param Value -+; RUN: rm %t.missing-value-param.yaml -rf -+; RUN: sed 's#2##g' %S/Inputs/template.yaml > %t.missing-value-param.yaml -+; RUN: not opt %s -S -passes='require,loop(loop-unroll-full)' \ -+; RUN: -auto-tuning-input=%t.missing-value-param.yaml 2>&1 | \ -+; RUN: FileCheck %s -check-prefix=ERROR-PARAM-VALUE -+ -+; Empty Param List -+; RUN: rm %t.empty-value-param-list.yaml -rf -+; RUN: sed 's#\[test, test2\]#\[\]#g' %S/Inputs/template.yaml > %t.empty-value-param-list.yaml -+; RUN: opt %s -S -passes='require,loop(loop-unroll-full)' \ -+; RUN: -auto-tuning-input=%t.empty-value-param-list.yaml 2>&1 | \ -+; RUN: FileCheck %s -check-prefix=VALID -+ -+; UNSUPPORTED: windows -+ -+define void @foo(i32* nocapture %a) { -+entry: -+ br label %for.body -+ -+for.body: ; preds = %for.body, %entry -+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] -+ %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv -+ %0 = load i32, i32* %arrayidx, align 4 -+ %inc = add nsw i32 %0, 1 -+ store i32 %inc, i32* %arrayidx, align 4 -+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 -+ %exitcond = icmp eq i64 %indvars.iv.next, 64 -+ br i1 %exitcond, label %for.end, label %for.body -+ -+for.end: ; preds = %for.body -+ ret void -+} -+ -+; check if error massage is shown properly for malformed YAML input files. -+; -+ -+; ERROR-FIELD: error: CodeRegionHash, CodeRegionType, or Pass missing. -+ -+; ERROR-NAME-FIELD: error: Remark Name expected; enable -autotuning-omit-metadata. -+ -+; ERROR-FUNCTION-FIELD: error: Remark Function Name expected; enable -autotuning-omit-metadata. -+ -+; ERROR-PASS-VALUE: error: YAML:2:1: error: expected a value of scalar type. -+; ERROR-PASS-VALUE: Pass: -+ -+; ERROR-NAME-VALUE: error: YAML:3:1: error: expected a value of scalar type. -+; ERROR-NAME-VALUE: Name: -+ -+; ERROR-FUNC-VALUE: error: YAML:4:1: error: expected a value of scalar type. -+; ERROR-FUNC-VALUE: Function: -+ -+; ERROR-CODE-REGION-TYPE-FIELD: CodeRegionHash, CodeRegionType, or Pass missing. -+ -+; ERROR-CODE-REGION-TYPE-VALUE: error: YAML:5:1: error: expected a value of scalar type. -+; ERROR-CODE-REGION-TYPE-VALUE: CodeRegionType: -+ -+; ERROR-CODE-REGION-TYPE-INVALID: Unsupported CodeRegionType:error-type -+ -+; ERROR-PARAM-NAME: error: YAML:8:5: error: argument key is missing. -+; ERROR-PARAM-NAME: - : 2 -+ -+; ERROR-PARAM-VALUE: error: YAML:8:5: error: expected a value of scalar type. -+; ERROR-PARAM-VALUE: - UnrollCount: -+ -+; VALID-NOT: -auto-tuning-input=(input file) option failed. -diff --git a/llvm/test/AutoTuning/Error/output-error.ll b/llvm/test/AutoTuning/Error/output-error.ll -new file mode 100644 -index 000000000000..61ffba50924b ---- /dev/null -+++ b/llvm/test/AutoTuning/Error/output-error.ll -@@ -0,0 +1,28 @@ -+; RUN: rm %t.opp -rf; touch %t.opp -+; RUN: not opt %s -S -passes='require,loop(loop-unroll-full)' \ -+; RUN: -auto-tuning-opp=%t.opp 2>&1 | FileCheck %s -check-prefix=ERROR-OPP -+ -+; UNSUPPORTED: windows -+ -+define void @foo(i32* nocapture %a) { -+entry: -+ br label %for.body -+ -+for.body: ; preds = %for.body, %entry -+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] -+ %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv -+ %0 = load i32, i32* %arrayidx, align 4 -+ %inc = add nsw i32 %0, 1 -+ store i32 %inc, i32* %arrayidx, align 4 -+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 -+ %exitcond = icmp eq i64 %indvars.iv.next, 64 -+ br i1 %exitcond, label %for.end, label %for.body -+ -+for.end: ; preds = %for.body -+ ret void -+} -+ -+; check if error massage is shown properly when output files cannot be created -+; -+; ERROR-OPP: Error generating auto-tuning opportunities. -+; ERROR-OPP: error: Not a directory -diff --git a/llvm/test/AutoTuning/Error/valid-input.ll b/llvm/test/AutoTuning/Error/valid-input.ll -new file mode 100644 -index 000000000000..dae90cdbe408 ---- /dev/null -+++ b/llvm/test/AutoTuning/Error/valid-input.ll -@@ -0,0 +1,27 @@ -+; RUN: opt %s -S -passes='require,loop(loop-unroll-full)' \ -+; RUN: -auto-tuning-input=%S/Inputs/template.yaml 2>&1 | \ -+; RUN: FileCheck %s -check-prefix=VALID -+; UNSUPPORTED: windows -+ -+define void @foo(i32* nocapture %a) { -+entry: -+ br label %for.body -+ -+for.body: ; preds = %for.body, %entry -+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] -+ %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv -+ %0 = load i32, i32* %arrayidx, align 4 -+ %inc = add nsw i32 %0, 1 -+ store i32 %inc, i32* %arrayidx, align 4 -+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 -+ %exitcond = icmp eq i64 %indvars.iv.next, 64 -+ br i1 %exitcond, label %for.end, label %for.body -+ -+for.end: ; preds = %for.body -+ ret void -+} -+ -+; check if error massage is shown properly when the input is valid -+; -+ -+; VALID-NOT: -auto-tuning-input=(input file) option failed. -diff --git a/llvm/test/AutoTuning/IncrementalCompilation/Inputs/template.yaml b/llvm/test/AutoTuning/IncrementalCompilation/Inputs/template.yaml -new file mode 100644 -index 000000000000..a7d390be63e7 ---- /dev/null -+++ b/llvm/test/AutoTuning/IncrementalCompilation/Inputs/template.yaml -@@ -0,0 +1,9 @@ -+--- !AutoTuning -+Pass: [dummy-pass] -+CodeRegionType: [dummy-type] -+Name: foo -+DebugLoc: { File: [dummy-file], Line: 0, Column: 0 } -+Function: foo -+CodeRegionHash: 0 -+Invocation: 0 -+... -diff --git a/llvm/test/AutoTuning/IncrementalCompilation/inc-compile-parse-input.ll b/llvm/test/AutoTuning/IncrementalCompilation/inc-compile-parse-input.ll -new file mode 100644 -index 000000000000..b9dc81089d40 ---- /dev/null -+++ b/llvm/test/AutoTuning/IncrementalCompilation/inc-compile-parse-input.ll -@@ -0,0 +1,103 @@ -+; REQUIRES: asserts -+; RUN: rm %t.output -rf -+; RUN: rm %t.inc_compile.yaml -rf -+; RUN: sed 's#\[dummy-pass\]#inline#g' %S/Inputs/template.yaml > %t.temp.yaml -+; RUN: sed 's#\[dummy-type\]#callsite#g' %t.temp.yaml > %t.temp2.yaml -+; RUN: sed 's#\[dummy-file\]#%s#g' %t.temp2.yaml > %t.inc_compile.yaml -+; RUN: opt -O3 %s -auto-tuning-input=%t.inc_compile.yaml \ -+; RUN: -auto-tuning-compile-mode=CoarseGrain -print-after-all \ -+; RUN: -debug-only=autotuning-compile \ -+; RUN: -o %t.output 2>&1 | \ -+; RUN: FileCheck %s -check-prefix=COARSEGRAIN -+ -+; RUN: rm %t.output -rf -+; RUN: rm %t.inc_compile.yaml -rf -+; RUN: sed 's#\[dummy-pass\]#inline#g' %S/Inputs/template.yaml > %t.temp.yaml -+; RUN: sed 's#\[dummy-type\]#callsite#g' %t.temp.yaml > %t.temp2.yaml -+; RUN: sed 's#\[dummy-file\]#%s#g' %t.temp2.yaml > %t.inc_compile.yaml -+; RUN: opt -O3 %s -auto-tuning-input=%t.inc_compile.yaml \ -+; RUN: -auto-tuning-compile-mode=FineGrain -print-after-all \ -+; RUN: -debug-only=autotuning-compile \ -+; RUN: -o %t.output 2>&1 | \ -+; RUN: FileCheck %s -check-prefixes=FINEGRAIN-1,FINEGRAIN-INLINE -+ -+; RUN: rm %t.output -rf -+; RUN: rm %t.inc_compile.yaml -rf -+; RUN: sed 's#\[dummy-pass\]#loop-unroll#g' %S/Inputs/template.yaml > %t.temp.yaml -+; RUN: sed 's#\[dummy-type\]#loop#g' %t.temp.yaml > %t.temp2.yaml -+; RUN: sed 's#\[dummy-file\]#%s#g' %t.temp2.yaml > %t.inc_compile.yaml -+; RUN: opt -O3 %s -auto-tuning-input=%t.inc_compile.yaml \ -+; RUN: -auto-tuning-compile-mode=FineGrain -print-after-all \ -+; RUN: -debug-only=autotuning-compile \ -+; RUN: -o %t.output 2>&1 | \ -+; RUN: FileCheck %s -check-prefixes=FINEGRAIN-1,FINEGRAIN-2,FINEGRAIN-UNROLL -+ -+; ModuleID = 'test.c' -+source_filename = "test.c" -+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" -+target triple = "aarch64-unknown-linux-gnu" -+ -+; Function Attrs: argmemonly nofree norecurse nosync nounwind uwtable -+define dso_local i32 @test(i32* nocapture noundef %a, i32* nocapture noundef readonly %b, i32 noundef %size) local_unnamed_addr #0 { -+entry: -+ %cmp11 = icmp sgt i32 %size, 0 -+ br i1 %cmp11, label %for.body.preheader, label %for.cond.cleanup -+ -+for.body.preheader: ; preds = %entry -+ %wide.trip.count = zext i32 %size to i64 -+ br label %for.body -+ -+for.cond.cleanup: ; preds = %for.body, %entry -+ ret i32 undef -+ -+for.body: ; preds = %for.body.preheader, %for.body -+ %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] -+ %arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv -+ %0 = load i32, i32* %arrayidx, align 4 -+ %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv -+ %1 = load i32, i32* %arrayidx2, align 4 -+ %add = add nsw i32 %1, %0 -+ store i32 %add, i32* %arrayidx2, align 4 -+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 -+ %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count -+ br i1 %exitcond.not, label %for.cond.cleanup, label %for.body -+} -+ -+attributes #0 = { argmemonly nofree norecurse nosync nounwind uwtable "frame-pointer"="non-leaf" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon,+v8a" } -+ -+!llvm.dbg.cu = !{!0} -+!llvm.module.flags = !{!3, !4, !5, !6, !7, !8} -+!llvm.ident = !{!9} -+ -+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "Huawei BiSheng Compiler clang version 12.0.0 (1c7b819ced36)", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2, splitDebugInlining: false, nameTableKind: None) -+!1 = !DIFile(filename: "test.c", directory: "/home/m00629332/code/autoTuner") -+!2 = !{} -+!3 = !{i32 2, !"Debug Info Version", i32 3} -+!4 = !{i32 1, !"wchar_size", i32 4} -+!5 = !{i32 1, !"branch-target-enforcement", i32 0} -+!6 = !{i32 1, !"sign-return-address", i32 0} -+!7 = !{i32 1, !"sign-return-address-all", i32 0} -+!8 = !{i32 1, !"sign-return-address-with-bkey", i32 0} -+!9 = !{!"Huawei BiSheng Compiler clang version 12.0.0 (1c7b819ced36)"} -+!10 = distinct !DISubprogram(name: "dummy", scope: !1, file: !1, line: 1, type: !11, scopeLine: 1, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) -+!11 = !DISubroutineType(types: !2) -+!12 = !DILocation(line: 2, column: 5, scope: !10) -+ -+; COARSEGRAIN: AutoTuningCompile: Deciding to enable/disable optimization of module/functions. Pass: start -+; COARSEGRAIN-NEXT: AutoTuningCompile: No change in opt pipeline for Basic/CoarseGrain incremental compilation mode. -+; COARSEGRAIN-NOT: Skip pass {{.*}}: True -+ -+; FINEGRAIN-1: AutoTuningCompile: Deciding to enable/disable optimization of module/functions. Pass: start -+; FINEGRAIN-1-NEXT: AutoTuningCompile: SkipPasses enabled. -+; FINEGRAIN-1-NOT: Skip pass {{.*}}: False -+; FINEGRAIN-1: AutoTuningCompile: Deciding to enable/disable optimization of module/functions. Pass: inline -+; FINEGRAIN-INLINE: AutoTuningCompile: SkipPasses disabled. -+; FINEGRAIN-INLINE: Skip pass 'InlinerPass': False -+; FINEGRAIN-INLINE-NEXT: *** IR Dump After InlinerPass -+; FINEGRAIN-INLINE-NOT: Skip pass {{.*}}: True -+ -+; FINEGRAIN-2: AutoTuningCompile: Old decision (SkipPasses = True ) continued. -+; FINEGRAIN-2-NOT: Skip pass {{.*}}: False -+; FINEGRAIN-2: AutoTuningCompile: Deciding to enable/disable optimization of module/functions. Pass: loop-unroll -+; FINEGRAIN-UNROLL: AutoTuningCompile: SkipPasses disabled. -+; FINEGRAIN-UNROLL-NOT: Skip pass {{.*}}: True -diff --git a/llvm/test/AutoTuning/Inline/Inputs/template.yaml b/llvm/test/AutoTuning/Inline/Inputs/template.yaml -new file mode 100644 -index 000000000000..e04612183d1f ---- /dev/null -+++ b/llvm/test/AutoTuning/Inline/Inputs/template.yaml -@@ -0,0 +1,9 @@ -+--- !AutoTuning -+Pass: inline -+Name: simpleFunction-entry -+Function: bar -+CodeRegionType: callsite -+CodeRegionHash: 5550568187071847048 -+Args: -+ - ForceInline: [force-inline] -+... -diff --git a/llvm/test/AutoTuning/Inline/Inputs/template_no_metadata.yaml b/llvm/test/AutoTuning/Inline/Inputs/template_no_metadata.yaml -new file mode 100644 -index 000000000000..9fc88f56d6bc ---- /dev/null -+++ b/llvm/test/AutoTuning/Inline/Inputs/template_no_metadata.yaml -@@ -0,0 +1,7 @@ -+--- !AutoTuning -+Pass: inline -+CodeRegionType: callsite -+CodeRegionHash: 5550568187071847048 -+Args: -+ - ForceInline: [force-inline] -+... -diff --git a/llvm/test/AutoTuning/Inline/duplicate-calls.ll b/llvm/test/AutoTuning/Inline/duplicate-calls.ll -new file mode 100644 -index 000000000000..ad32262ad044 ---- /dev/null -+++ b/llvm/test/AutoTuning/Inline/duplicate-calls.ll -@@ -0,0 +1,96 @@ -+; RUN: rm %t.duplicate_calls -rf -+; RUN: opt %s -S -passes='cgscc(inline)' -auto-tuning-opp=%t.duplicate_calls \ -+; RUN: -auto-tuning-type-filter=CallSite --disable-output -+; RUN: FileCheck %s --input-file %t.duplicate_calls/duplicate-calls.ll.yaml -+ -+; ModuleID = 'duplicate-calls.c' -+source_filename = "duplicate-calls.c" -+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" -+target triple = "aarch64-unknown-linux-gnu" -+ -+; Function Attrs: nounwind uwtable -+define dso_local void @bar(i32* nocapture %result, i32* %cfb, i32 %bytes) local_unnamed_addr #0 !dbg !10 { -+entry: -+ %call = tail call i32 @test(i32* %cfb, i32 %bytes) #1, !dbg !12 -+ store i32 %call, i32* %result, align 4, !dbg !13, !tbaa !14 -+ ret void, !dbg !18 -+} -+ -+declare dso_local i32 @test(i32*, i32) local_unnamed_addr #0 -+ -+; Function Attrs: nounwind uwtable -+define dso_local void @foo(i32* %cfb, i32* readnone %saved, i32* nocapture %result, i32 %bytes) local_unnamed_addr #0 !dbg !19 { -+entry: -+ %tobool.not = icmp eq i32* %cfb, null, !dbg !20 -+ br i1 %tobool.not, label %if.else, label %if.then.split, !dbg !20 -+ -+if.then.split: ; preds = %entry -+ tail call void @bar(i32* %result, i32* nonnull %cfb, i32 %bytes), !dbg !21 -+ br label %return, !dbg !22 -+ -+if.else: ; preds = %entry -+ %tobool1.not = icmp eq i32* %saved, null, !dbg !23 -+ br i1 %tobool1.not, label %if.else.split, label %return, !dbg !23 -+ -+if.else.split: ; preds = %if.else -+ tail call void @bar(i32* %result, i32* null, i32 %bytes), !dbg !21 -+ br label %return, !dbg !23 -+ -+return: ; preds = %if.then.split, %if.else.split, %if.else -+ ret void, !dbg !24 -+} -+ -+attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "frame-pointer"="non-leaf" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon" "unsafe-fp-math"="false" "use-soft-float"="false" } -+attributes #1 = { nounwind } -+ -+!llvm.dbg.cu = !{!0} -+!llvm.module.flags = !{!3, !4, !5, !6, !7, !8} -+!llvm.ident = !{!9} -+ -+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "Huawei BiSheng Compiler clang version 12.0.0 (clang-0d5d71fe6c22 flang-8b17fc131076)", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2, splitDebugInlining: false, nameTableKind: None) -+!1 = !DIFile(filename: "duplicate-calls.c", directory: "/home/m00629332/benchmarks/cBench/source/security_pgp_d/src") -+!2 = !{} -+!3 = !{i32 2, !"Debug Info Version", i32 3} -+!4 = !{i32 1, !"wchar_size", i32 4} -+!5 = !{i32 1, !"branch-target-enforcement", i32 0} -+!6 = !{i32 1, !"sign-return-address", i32 0} -+!7 = !{i32 1, !"sign-return-address-all", i32 0} -+!8 = !{i32 1, !"sign-return-address-with-bkey", i32 0} -+!9 = !{!"Huawei BiSheng Compiler clang version 12.0.0 (clang-0d5d71fe6c22 flang-8b17fc131076)"} -+!10 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 7, type: !11, scopeLine: 8, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) -+!11 = !DISubroutineType(types: !2) -+!12 = !DILocation(line: 10, column: 16, scope: !10) -+!13 = !DILocation(line: 10, column: 14, scope: !10) -+!14 = !{!15, !15, i64 0} -+!15 = !{!"int", !16, i64 0} -+!16 = !{!"omnipotent char", !17, i64 0} -+!17 = !{!"Simple C/C++ TBAA"} -+!18 = !DILocation(line: 14, column: 1, scope: !10) -+!19 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 17, type: !11, scopeLine: 18, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) -+!20 = !DILocation(line: 22, column: 6, scope: !19) -+!21 = !DILocation(line: 27, column: 2, scope: !19) -+!22 = !DILocation(line: 23, column: 3, scope: !19) -+!23 = !DILocation(line: 24, column: 11, scope: !19) -+!24 = !DILocation(line: 28, column: 1, scope: !19) -+ -+; CHECK: --- !AutoTuning -+; CHECK-NEXT: Pass: inline -+; CHECK-NEXT: Name: bar-if.then.split -+; CHECK-NEXT: DebugLoc: { File: duplicate-calls.c, Line: 27, Column: 2 } -+; CHECK-NEXT: Function: foo -+; CHECK-NEXT: CodeRegionType: callsite -+; CHECK-NEXT: CodeRegionHash: -+; CHECK-NEXT: DynamicConfigs: { ForceInline: [ 0, 1 ] } -+; CHECK-NEXT: BaselineConfig: { ForceInline: '1' } -+; CHECK-NEXT: Invocation: 0 -+; CHECK-NEXT: ... -+; CHECK-NEXT: --- !AutoTuning -+; CHECK-NEXT: Pass: inline -+; CHECK-NEXT: Name: bar-if.else.split -+; CHECK-NEXT: DebugLoc: { File: duplicate-calls.c, Line: 27, Column: 2 } -+; CHECK-NEXT: Function: foo -+; CHECK-NEXT: CodeRegionType: callsite -+; CHECK-NEXT: CodeRegionHash: -+; CHECK-NEXT: DynamicConfigs: { ForceInline: [ 0, 1 ] } -+; CHECK-NEXT: BaselineConfig: { ForceInline: '1' } -+; CHECK-NEXT: Invocation: 0 -diff --git a/llvm/test/AutoTuning/Inline/force-inline.ll b/llvm/test/AutoTuning/Inline/force-inline.ll -new file mode 100644 -index 000000000000..cedfc8df3483 ---- /dev/null -+++ b/llvm/test/AutoTuning/Inline/force-inline.ll -@@ -0,0 +1,84 @@ -+; REQUIRES: asserts -+; RUN: opt < %s -passes=inline -debug-only=inline -disable-output -S 2>&1 | FileCheck %s -check-prefix=DEFAULT -+; simpleFunction will be inlined with the default behavior. -+ -+; RUN: rm %t.force-inline.yaml -rf -+; RUN: sed 's#\[force-inline\]#true#g' %S/Inputs/template.yaml > %t.force-inline.yaml -+; RUN: opt %s -passes=inline -debug-only=inline -disable-output -S \ -+; RUN: -auto-tuning-input=%t.force-inline.yaml 2>&1 | \ -+; RUN: FileCheck %s -check-prefix=FORCE-INLINE -+; Test with ForceInline=true; -+ -+; RUN: rm %t.force-inline.yaml -rf -+; RUN: sed 's#\[force-inline\]#true#g' %S/Inputs/template_no_metadata.yaml > %t.force-inline.yaml -+; RUN: opt %s -passes=inline -S -auto-tuning-input=%t.force-inline.yaml \ -+; RUN: -debug-only=inline -disable-output -auto-tuning-omit-metadata 2>&1 | \ -+; RUN: FileCheck %s -check-prefix=FORCE-INLINE -+; Test with ForceInline=true; -+ -+; RUN: rm %t.no-inline.yaml -rf -+; RUN: sed 's#\[force-inline\]#false#g' %S/Inputs/template.yaml > %t.no-inline.yaml -+; RUN: opt %s -passes=inline -debug-only=inline -disable-output -S \ -+; RUN: -auto-tuning-input=%t.no-inline.yaml 2>&1 | \ -+; RUN: FileCheck %s -check-prefix=NO-INLINE -+; Test with ForceInline=false; -+ -+; RUN: rm %t.no-inline.yaml -rf -+; RUN: sed 's#\[force-inline\]#false#g' %S/Inputs/template_no_metadata.yaml > %t.no-inline.yaml -+; RUN: opt %s -passes='cgscc(inline)' -debug-only=inline -disable-output -S \ -+; RUN: -auto-tuning-input=%t.no-inline.yaml -auto-tuning-omit-metadata 2>&1 | \ -+; RUN: FileCheck %s -check-prefix=NO-INLINE -+; Test with ForceInline=false; -+ -+@a = global i32 4 -+ -+; Function Attrs: nounwind readnone uwtable -+define i32 @simpleFunction(i32 %a) #0 { -+entry: -+ call void @extern() -+ %a1 = load volatile i32, i32* @a -+ %x1 = add i32 %a1, %a1 -+ %a2 = load volatile i32, i32* @a -+ %x2 = add i32 %x1, %a2 -+ %a3 = load volatile i32, i32* @a -+ %x3 = add i32 %x2, %a3 -+ %a4 = load volatile i32, i32* @a -+ %x4 = add i32 %x3, %a4 -+ %a5 = load volatile i32, i32* @a -+ %x5 = add i32 %x4, %a5 -+ %a6 = load volatile i32, i32* @a -+ %x6 = add i32 %x5, %a6 -+ %a7 = load volatile i32, i32* @a -+ %x7 = add i32 %x6, %a6 -+ %a8 = load volatile i32, i32* @a -+ %x8 = add i32 %x7, %a8 -+ %a9 = load volatile i32, i32* @a -+ %x9 = add i32 %x8, %a9 -+ %a10 = load volatile i32, i32* @a -+ %x10 = add i32 %x9, %a10 -+ %a11 = load volatile i32, i32* @a -+ %x11 = add i32 %x10, %a11 -+ %a12 = load volatile i32, i32* @a -+ %x12 = add i32 %x11, %a12 -+ %add = add i32 %x12, %a -+ ret i32 %add -+} -+ -+; Function Attrs: nounwind readnone uwtable -+define i32 @bar(i32 %a) #0 { -+entry: -+ %0 = tail call i32 @simpleFunction(i32 6) -+ ret i32 %0 -+} -+ -+declare void @extern() -+ -+attributes #0 = { nounwind readnone uwtable } -+attributes #1 = { nounwind cold readnone uwtable } -+ -+; DEFAULT: Inlining (cost=120, threshold=337) -+; DEFAULT-SAME: simpleFunction -+; FORCE-INLINE: Inlining (cost=always): Force inlined by auto-tuning -+; FORCE-INLINE-SAME: simpleFunction -+; NO-INLINE: NOT Inlining (cost=never): Force non-inlined by auto-tuning -+; NO-INLINE-SAME: simpleFunction -diff --git a/llvm/test/AutoTuning/Inline/inline-attribute.ll b/llvm/test/AutoTuning/Inline/inline-attribute.ll -new file mode 100644 -index 000000000000..50f583d0a51e ---- /dev/null -+++ b/llvm/test/AutoTuning/Inline/inline-attribute.ll -@@ -0,0 +1,85 @@ -+; RUN: rm %t.inline_opp -rf -+; RUN: opt %s -S -passes='cgscc(inline)' -auto-tuning-opp=%t.inline_opp -auto-tuning-type-filter=CallSite --disable-output -+; RUN: FileCheck %s --input-file %t.inline_opp/inline-attribute.ll.yaml -check-prefix=TEST-1 -+; RUN: FileCheck %s --input-file %t.inline_opp/inline-attribute.ll.yaml -check-prefix=TEST-2 -+ -+; ModuleID = 'inline.c' -+source_filename = "inline.c" -+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" -+target triple = "aarch64-unknown-linux-gnu" -+ -+; Function Attrs: noinline norecurse nounwind readnone uwtable willreturn -+define dso_local i32 @mul(i32 %a) local_unnamed_addr #0 !dbg !10 { -+entry: -+ %mul = mul nsw i32 %a, %a, !dbg !12 -+ ret i32 %mul, !dbg !13 -+} -+ -+; Function Attrs: alwaysinline nounwind uwtable -+define dso_local i32 @add(i32 %a) local_unnamed_addr #1 !dbg !14 { -+entry: -+ %add = shl nsw i32 %a, 1, !dbg !15 -+ ret i32 %add, !dbg !16 -+} -+ -+; Function Attrs: nounwind uwtable -+define dso_local i32 @inc(i32 %a) local_unnamed_addr #2 !dbg !17 { -+entry: -+ %inc = add nsw i32 %a, 1, !dbg !18 -+ ret i32 %inc, !dbg !19 -+} -+ -+; Function Attrs: nounwind uwtable -+define dso_local i32 @func(i32 %a) local_unnamed_addr #2 !dbg !20 { -+entry: -+ %call = call i32 @add(i32 %a), !dbg !21 -+ %call1 = call i32 @mul(i32 %a), !dbg !22 -+ %add = add nsw i32 %call, %call1, !dbg !23 -+ %call2 = call i32 @inc(i32 %a), !dbg !24 -+ %add3 = add nsw i32 %add, %call2, !dbg !25 -+ ret i32 %add3, !dbg !26 -+} -+ -+attributes #0 = { noinline norecurse nounwind readnone uwtable willreturn "disable-tail-calls"="false" "frame-pointer"="non-leaf" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon" "unsafe-fp-math"="false" "use-soft-float"="false" } -+attributes #1 = { alwaysinline nounwind uwtable "disable-tail-calls"="false" "frame-pointer"="non-leaf" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon" "unsafe-fp-math"="false" "use-soft-float"="false" } -+attributes #2 = { nounwind uwtable "disable-tail-calls"="false" "frame-pointer"="non-leaf" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon" "unsafe-fp-math"="false" "use-soft-float"="false" } -+ -+!llvm.dbg.cu = !{!0} -+!llvm.module.flags = !{!3, !4, !5, !6, !7, !8} -+!llvm.ident = !{!9} -+ -+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "Huawei Bisheng Compiler clang version 12.0.0 (729941c4adfa)", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2, splitDebugInlining: false, nameTableKind: None) -+!1 = !DIFile(filename: "test.c", directory: "/home/m00629332/code/autoTuner/ir-hashing") -+!2 = !{} -+!3 = !{i32 2, !"Debug Info Version", i32 3} -+!4 = !{i32 1, !"wchar_size", i32 4} -+!5 = !{i32 1, !"branch-target-enforcement", i32 0} -+!6 = !{i32 1, !"sign-return-address", i32 0} -+!7 = !{i32 1, !"sign-return-address-all", i32 0} -+!8 = !{i32 1, !"sign-return-address-with-bkey", i32 0} -+!9 = !{!"Huawei Bisheng Compiler clang version 12.0.0 (729941c4adfa)"} -+!10 = distinct !DISubprogram(name: "mul", scope: !1, file: !1, line: 2, type: !11, scopeLine: 2, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) -+!11 = !DISubroutineType(types: !2) -+!12 = !DILocation(line: 3, column: 13, scope: !10) -+!13 = !DILocation(line: 3, column: 5, scope: !10) -+!14 = distinct !DISubprogram(name: "add", scope: !1, file: !1, line: 7, type: !11, scopeLine: 7, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) -+!15 = !DILocation(line: 8, column: 13, scope: !14) -+!16 = !DILocation(line: 8, column: 5, scope: !14) -+!17 = distinct !DISubprogram(name: "inc", scope: !1, file: !1, line: 11, type: !11, scopeLine: 11, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) -+!18 = !DILocation(line: 12, column: 12, scope: !17) -+!19 = !DILocation(line: 12, column: 5, scope: !17) -+!20 = distinct !DISubprogram(name: "func", scope: !1, file: !1, line: 15, type: !11, scopeLine: 15, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) -+!21 = !DILocation(line: 16, column: 12, scope: !20) -+!22 = !DILocation(line: 16, column: 19, scope: !20) -+!23 = !DILocation(line: 16, column: 18, scope: !20) -+!24 = !DILocation(line: 16, column: 26, scope: !20) -+!25 = !DILocation(line: 16, column: 25, scope: !20) -+!26 = !DILocation(line: 16, column: 5, scope: !20) -+ -+; TEST-1: Pass: inline -+; TEST-1-NOT: Pass: inline -+ -+; TEST-2: Name: inc -+; TEST-2-NEXT: DebugLoc: { File: test.c, Line: 16, Column: 26 } -+; TEST-2-NEXT: Function: func -+; TEST-2-NEXT: CodeRegionType: callsite -diff --git a/llvm/test/AutoTuning/Inline/opp.ll b/llvm/test/AutoTuning/Inline/opp.ll -new file mode 100644 -index 000000000000..dfe1dac29476 ---- /dev/null -+++ b/llvm/test/AutoTuning/Inline/opp.ll -@@ -0,0 +1,64 @@ -+; RUN: rm %t.callsite_opp -rf -+; RUN: sed 's#\[number\]#25#g; s#\[func_name\]#ColdFunction#g' %S/Inputs/template.yaml > %t.template25.yaml -+; RUN: opt %s -passes=inline -S -auto-tuning-opp=%t.callsite_opp -auto-tuning-type-filter=CallSite -+ -+; RUN: FileCheck %s --input-file %t.callsite_opp/opp.ll.yaml -check-prefix=CALLSITE -+ -+@a = global i32 4 -+ -+declare void @extern() -+; Function Attrs: nounwind readnone uwtable -+define i32 @simpleFunction(i32 %a) #1 { -+entry: -+ call void @extern() -+ %a1 = load volatile i32, i32* @a -+ %x1 = add i32 %a1, %a1 -+ %a2 = load volatile i32, i32* @a -+ %x2 = add i32 %x1, %a2 -+ %a3 = load volatile i32, i32* @a -+ %x3 = add i32 %x2, %a3 -+ %a4 = load volatile i32, i32* @a -+ %x4 = add i32 %x3, %a4 -+ %a5 = load volatile i32, i32* @a -+ %x5 = add i32 %x4, %a5 -+ %a6 = load volatile i32, i32* @a -+ %x6 = add i32 %x5, %a6 -+ %a7 = load volatile i32, i32* @a -+ %x7 = add i32 %x6, %a6 -+ %a8 = load volatile i32, i32* @a -+ %x8 = add i32 %x7, %a8 -+ %a9 = load volatile i32, i32* @a -+ %x9 = add i32 %x8, %a9 -+ %a10 = load volatile i32, i32* @a -+ %x10 = add i32 %x9, %a10 -+ %a11 = load volatile i32, i32* @a -+ %x11 = add i32 %x10, %a11 -+ %a12 = load volatile i32, i32* @a -+ %x12 = add i32 %x11, %a12 -+ %add = add i32 %x12, %a -+ ret i32 %add -+} -+ -+define i32 @bar(i32 %a) #0 { -+entry: -+ %0 = tail call i32 @simpleFunction(i32 6) -+ ret i32 %0 -+} -+ -+attributes #0 = { nounwind readnone uwtable } -+attributes #1 = { nounwind cold readnone uwtable } -+ -+; Check if code regions are properly generated as tuning opportunities. -+; CALLSITE: --- !AutoTuning -+; CALLSITE-NEXT: Pass: inline -+; CALLSITE-NEXT: Name: simpleFunction -+; CALLSITE-NEXT: Function: bar -+; CALLSITE-NEXT: CodeRegionType: callsite -+; CALLSITE-NEXT: CodeRegionHash: {{[0-9]+}} -+; CALLSITE-NEXT: DynamicConfigs: { ForceInline: [ 0, 1 ] } -+; CALLSITE-NEXT: BaselineConfig: { ForceInline: '1' } -+; CALLSITE-NEXT: Invocation: 0 -+; CALLSITE-NEXT: ... -+ -+; Check if external functions are filtered out. -+; EXTERNAL-NOT: Name: extern -diff --git a/llvm/test/AutoTuning/LoopUnroll/Inputs/debug_loc_template.yaml b/llvm/test/AutoTuning/LoopUnroll/Inputs/debug_loc_template.yaml -new file mode 100644 -index 000000000000..6dc49a1f7dc2 ---- /dev/null -+++ b/llvm/test/AutoTuning/LoopUnroll/Inputs/debug_loc_template.yaml -@@ -0,0 +1,10 @@ -+--- !AutoTuning -+Pass: loop-unroll -+Name: for.cond -+DebugLoc: { File: loop-opp.c, Line: 4, Column: 5 } -+Function: foo -+CodeRegionType: loop -+Args: -+ - UnrollCount: [number] -+Invocation: 0 -+... -diff --git a/llvm/test/AutoTuning/LoopUnroll/Inputs/loop_nest.yaml b/llvm/test/AutoTuning/LoopUnroll/Inputs/loop_nest.yaml -new file mode 100644 -index 000000000000..4920329dbd4b ---- /dev/null -+++ b/llvm/test/AutoTuning/LoopUnroll/Inputs/loop_nest.yaml -@@ -0,0 +1,10 @@ -+# CodeRegionHash is correct for only first code region only. -+!AutoTuning {Args: [{UnrollCount: 2}], CodeRegionHash: 8456922293277663707, CodeRegionType: loop, -+ DebugLoc: {Column: 8, File: loop-nest.c, Line: 10}, Function: loop_nest, Invocation: 0, -+ Name: for.body6.us, Pass: loop-unroll} -+--- !AutoTuning {Args: [{UnrollCount: 4}], CodeRegionHash: 8456922293277663707, CodeRegionType: loop, -+ DebugLoc: {Column: 5, File: loop-nest.c, Line: 9}, Function: loop_nest, Invocation: 0, -+ Name: for.cond4.preheader.us, Pass: loop-unroll} -+--- !AutoTuning {Args: [{UnrollCount: 4}], CodeRegionHash: 8456922293277663707, CodeRegionType: loop, -+ DebugLoc: {Column: 3, File: loop-nest.c, Line: 8}, Function: loop_nest, Invocation: 0, -+ Name: for.cond1.preheader, Pass: loop-unroll} -diff --git a/llvm/test/AutoTuning/LoopUnroll/Inputs/loop_peel.yaml b/llvm/test/AutoTuning/LoopUnroll/Inputs/loop_peel.yaml -new file mode 100644 -index 000000000000..a90cebbce88f ---- /dev/null -+++ b/llvm/test/AutoTuning/LoopUnroll/Inputs/loop_peel.yaml -@@ -0,0 +1,9 @@ -+--- !AutoTuning -+Pass: loop-unroll -+Name: loop -+Function: invariant_backedge_1 -+CodeRegionType: loop -+Args: -+ - UnrollCount: [number] -+Invocation: 0 -+... -diff --git a/llvm/test/AutoTuning/LoopUnroll/Inputs/unroll_raw_template.yaml b/llvm/test/AutoTuning/LoopUnroll/Inputs/unroll_raw_template.yaml -new file mode 100644 -index 000000000000..18681a0e2efe ---- /dev/null -+++ b/llvm/test/AutoTuning/LoopUnroll/Inputs/unroll_raw_template.yaml -@@ -0,0 +1,10 @@ -+--- !AutoTuning -+Pass: loop-unroll -+Name: label %5 -+Function: main -+CodeRegionType: loop -+CodeRegionHash: [hash] -+Args: -+- UnrollCount: [number] -+Invocation: 1 -+... -diff --git a/llvm/test/AutoTuning/LoopUnroll/Inputs/unroll_template.yaml b/llvm/test/AutoTuning/LoopUnroll/Inputs/unroll_template.yaml -new file mode 100644 -index 000000000000..166f877a232e ---- /dev/null -+++ b/llvm/test/AutoTuning/LoopUnroll/Inputs/unroll_template.yaml -@@ -0,0 +1,10 @@ -+--- !AutoTuning -+Pass: loop-unroll -+Name: [name] -+Function: foo -+CodeRegionType: loop -+CodeRegionHash: [hash] -+Args: -+ - UnrollCount: [number] -+Invocation: 1 -+... -diff --git a/llvm/test/AutoTuning/LoopUnroll/Inputs/unroll_template_no_metadata.yaml b/llvm/test/AutoTuning/LoopUnroll/Inputs/unroll_template_no_metadata.yaml -new file mode 100644 -index 000000000000..b626473cf782 ---- /dev/null -+++ b/llvm/test/AutoTuning/LoopUnroll/Inputs/unroll_template_no_metadata.yaml -@@ -0,0 +1,8 @@ -+--- !AutoTuning -+Pass: loop-unroll -+CodeRegionType: loop -+CodeRegionHash: [hash] -+Args: -+ - UnrollCount: [number] -+Invocation: 1 -+... -diff --git a/llvm/test/AutoTuning/LoopUnroll/debug_loc.ll b/llvm/test/AutoTuning/LoopUnroll/debug_loc.ll -new file mode 100644 -index 000000000000..85dd690d01c5 ---- /dev/null -+++ b/llvm/test/AutoTuning/LoopUnroll/debug_loc.ll -@@ -0,0 +1,161 @@ -+; RUN: opt %s -S -passes='require,loop(loop-unroll-full)' | \ -+; RUN: FileCheck %s -check-prefix=DISABLE -+ -+; RUN: rm %t.unroll_debug_loc0.yaml -rf -+; RUN: sed 's#\[number\]#0#g' %S/Inputs/debug_loc_template.yaml > %t.unroll_debug_loc0.yaml -+; RUN: opt %s -S -passes='require,loop(loop-unroll-full)' \ -+; RUN: -auto-tuning-input=%t.unroll_debug_loc0.yaml | \ -+; RUN: FileCheck %s -check-prefix=UNROLL0 -+ -+; RUN: rm %t.unroll_debug_loc4.yaml -rf -+; RUN: sed 's#\[number\]#4#g' %S/Inputs/debug_loc_template.yaml > %t.unroll_debug_loc4.yaml -+; RUN: opt %s -S -passes='require,loop(loop-unroll-full)' \ -+; RUN: -auto-tuning-code-region-matching-hash=false \ -+; RUN: -auto-tuning-input=%t.unroll_debug_loc4.yaml | \ -+; RUN: FileCheck %s -check-prefix=UNROLL4 -+ -+; RUN: rm %t.unroll4.yaml -rf -+; RUN: sed 's#\[number\]#4#g; s#\[name\]#for.cond#g; s#\[hash\]#11552168367013316892#g;'\ -+; RUN: %S/Inputs/unroll_template.yaml > %t.unroll4.yaml -+; RUN: opt %s -S -passes='require,loop(loop-unroll-full)' \ -+; RUN: -auto-tuning-code-region-matching-hash=false \ -+; RUN: -auto-tuning-input=%t.unroll4.yaml | \ -+; RUN: FileCheck %s -check-prefix=UNROLL4-MISMATCH -+ -+; UNSUPPORTED: windows -+ -+; ModuleID = 'loop-opp.c' -+source_filename = "loop-opp.c" -+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -+target triple = "x86_64-unknown-linux-gnu" -+ -+; Function Attrs: noinline nounwind uwtable -+define i32 @foo(i32* %n) #0 !dbg !6 { -+entry: -+ %n.addr = alloca i32*, align 8 -+ %b = alloca i32, align 4 -+ %i = alloca i32, align 4 -+ store i32* %n, i32** %n.addr, align 8 -+ call void @llvm.dbg.declare(metadata i32** %n.addr, metadata !11, metadata !12), !dbg !13 -+ call void @llvm.dbg.declare(metadata i32* %b, metadata !14, metadata !12), !dbg !15 -+ store i32 0, i32* %b, align 4, !dbg !15 -+ call void @llvm.dbg.declare(metadata i32* %i, metadata !16, metadata !12), !dbg !18 -+ store i32 0, i32* %i, align 4, !dbg !18 -+ br label %for.cond, !dbg !19 -+ -+for.cond: ; preds = %for.inc, %entry -+ %0 = load i32, i32* %i, align 4, !dbg !20 -+ %1 = load i32*, i32** %n.addr, align 8, !dbg !23 -+ %2 = load i32, i32* %1, align 4, !dbg !24 -+ %cmp = icmp slt i32 %0, %2, !dbg !25 -+ br i1 %cmp, label %for.body, label %for.end, !dbg !26 -+ -+for.body: ; preds = %for.cond -+ %3 = load i32, i32* %b, align 4, !dbg !28 -+ %add = add nsw i32 %3, 1, !dbg !30 -+ store i32 %add, i32* %b, align 4, !dbg !31 -+ br label %for.inc, !dbg !32 -+ -+for.inc: ; preds = %for.body -+ %4 = load i32, i32* %i, align 4, !dbg !33 -+ %inc = add nsw i32 %4, 1, !dbg !33 -+ store i32 %inc, i32* %i, align 4, !dbg !33 -+ br label %for.cond, !dbg !35, !llvm.loop !36 -+ -+for.end: ; preds = %for.cond -+ %5 = load i32, i32* %b, align 4, !dbg !39 -+ ret i32 %5, !dbg !40 -+} -+ -+; Function Attrs: nounwind readnone -+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1 -+ -+attributes #0 = { noinline nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -+attributes #1 = { nounwind readnone } -+ -+!llvm.dbg.cu = !{!0} -+!llvm.module.flags = !{!3, !4} -+!llvm.ident = !{!5} -+ -+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "" ,isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2) -+!1 = !DIFile(filename: "loop-opp.c", directory: "") -+!2 = !{} -+!3 = !{i32 2, !"Dwarf Version", i32 4} -+!4 = !{i32 2, !"Debug Info Version", i32 3} -+!5 = !{!""} -+!6 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !7, isLocal: false, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: false, unit: !0) -+!7 = !DISubroutineType(types: !8) -+!8 = !{!9, !10} -+!9 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) -+!10 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !9, size: 64) -+!11 = !DILocalVariable(name: "n", arg: 1, scope: !6, file: !1, line: 1, type: !10) -+!12 = !DIExpression() -+!13 = !DILocation(line: 1, column: 20, scope: !6) -+!14 = !DILocalVariable(name: "b", scope: !6, file: !1, line: 3, type: !9) -+!15 = !DILocation(line: 3, column: 9, scope: !6) -+!16 = !DILocalVariable(name: "i", scope: !17, file: !1, line: 4, type: !9) -+!17 = distinct !DILexicalBlock(scope: !6, file: !1, line: 4, column: 5) -+!18 = !DILocation(line: 4, column: 14, scope: !17) -+!19 = !DILocation(line: 4, column: 10, scope: !17) -+!20 = !DILocation(line: 4, column: 20, scope: !21) -+!21 = !DILexicalBlockFile(scope: !22, file: !1, discriminator: 1) -+!22 = distinct !DILexicalBlock(scope: !17, file: !1, line: 4, column: 5) -+!23 = !DILocation(line: 4, column: 25, scope: !21) -+!24 = !DILocation(line: 4, column: 24, scope: !21) -+!25 = !DILocation(line: 4, column: 22, scope: !21) -+!26 = !DILocation(line: 4, column: 5, scope: !27) -+!27 = !DILexicalBlockFile(scope: !17, file: !1, discriminator: 1) -+!28 = !DILocation(line: 6, column: 11, scope: !29) -+!29 = distinct !DILexicalBlock(scope: !22, file: !1, line: 5, column: 5) -+!30 = !DILocation(line: 6, column: 12, scope: !29) -+!31 = !DILocation(line: 6, column: 9, scope: !29) -+!32 = !DILocation(line: 7, column: 5, scope: !29) -+!33 = !DILocation(line: 4, column: 28, scope: !34) -+!34 = !DILexicalBlockFile(scope: !22, file: !1, discriminator: 2) -+!35 = !DILocation(line: 4, column: 5, scope: !34) -+!36 = distinct !{!36, !37, !38} -+!37 = !DILocation(line: 4, column: 5, scope: !17) -+!38 = !DILocation(line: 7, column: 5, scope: !17) -+!39 = !DILocation(line: 8, column: 12, scope: !6) -+!40 = !DILocation(line: 8, column: 5, scope: !6) -+ -+; Auto-tuning-enabled loop unrolling - check that the loop is not unrolled when the auto-tuning feature is disabled when -+; the input remark contains DebugLoc info. -+; -+; DISABLE-LABEL: @foo( -+; DISABLE: for.cond -+; DISABLE: for.body -+; DISABLE-NOT: for.body.1 -+; DISABLE: for.inc -+; DISABLE-NOT: llvm.loop.unroll.disable -+ -+; Auto-tuning-enabled loop unrolling - check that the loop is not unrolled -+; when unroll count explicitly set to be 0. -+; -+; UNROLL0-LABEL: @foo( -+; UNROLL0: for.cond -+; UNROLL0: for.body -+; UNROLL0-NOT: for.body.1 -+; UNROLL0: for.inc -+; UNROLL0-NOT: llvm.loop.unroll.disable -+ -+; Auto-tuning-enabled loop unrolling - check that we can unroll the loop by 4 -+; when explicitly requested. -+; -+; UNROLL4-LABEL: @foo( -+; UNROLL4: for.cond -+; UNROLL4: for.body -+; UNROLL4: for.body.1 -+; UNROLL4: for.body.2 -+; UNROLL4: for.body.3 -+; UNROLL4: llvm.loop.unroll.disable -+ -+; Auto-tuning-enabled loop unrolling - check that the loop is not unrolled -+; when DebugLoc is missing in the input remark. -+; -+; UNROLL4-MISMATCH-LABEL: @foo( -+; UNROLL4-MISMATCH: for.cond -+; UNROLL4-MISMATCH: for.body -+; UNROLL4-MISMATCH-NOT: for.body.1 -+; UNROLL4-MISMATCH: for.inc -+; UNROLL4-MISMATCH-NOT: llvm.loop.unroll.disable -diff --git a/llvm/test/AutoTuning/LoopUnroll/dynamic_config.ll b/llvm/test/AutoTuning/LoopUnroll/dynamic_config.ll -new file mode 100644 -index 000000000000..414c6ff2d1b0 ---- /dev/null -+++ b/llvm/test/AutoTuning/LoopUnroll/dynamic_config.ll -@@ -0,0 +1,56 @@ -+; RUN: rm %t.default_opp -rf -+; RUN: opt %s -S -auto-tuning-opp=%t.default_opp -auto-tuning-type-filter=Loop \ -+; RUN: -passes='require,loop(loop-unroll-full)' --disable-output -+; RUN: FileCheck %s --input-file %t.default_opp/dynamic_config.ll.yaml -+ -+; Function Attrs: nofree norecurse nounwind uwtable -+define dso_local void @transform(i64* nocapture %W) local_unnamed_addr{ -+entry: -+ br label %for.body -+ -+for.body: ; preds = %entry, %for.body -+ %i.037 = phi i32 [ 16, %entry ], [ %inc, %for.body ] -+ %sub = add nsw i32 %i.037, -3 -+ %idxprom = sext i32 %sub to i64 -+ %arrayidx = getelementptr inbounds i64, i64* %W, i64 %idxprom -+ %0 = load i64, i64* %arrayidx, align 8 -+ %sub1 = add nsw i32 %i.037, -6 -+ %idxprom2 = sext i32 %sub1 to i64 -+ %arrayidx3 = getelementptr inbounds i64, i64* %W, i64 %idxprom2 -+ %1 = load i64, i64* %arrayidx3, align 8 -+ %xor = xor i64 %1, %0 -+ %idxprom4 = zext i32 %i.037 to i64 -+ %arrayidx5 = getelementptr inbounds i64, i64* %W, i64 %idxprom4 -+ store i64 %xor, i64* %arrayidx5, align 8 -+ %inc = add nuw nsw i32 %i.037, 1 -+ %cmp = icmp ult i32 %i.037, 79 -+ br i1 %cmp, label %for.body, label %for.body8.preheader -+ -+for.body8.preheader: ; preds = %for.body -+ br label %for.body8 -+ -+for.body8: ; preds = %for.body8.preheader, %for.body8 -+ %indvars.iv = phi i64 [ 80, %for.body8.preheader ], [ %indvars.iv.next, %for.body8 ] -+ %2 = add nsw i64 %indvars.iv, -4 -+ %arrayidx11 = getelementptr inbounds i64, i64* %W, i64 %2 -+ %3 = load i64, i64* %arrayidx11, align 8 -+ %4 = add nsw i64 %indvars.iv, -5 -+ %arrayidx14 = getelementptr inbounds i64, i64* %W, i64 %4 -+ %5 = load i64, i64* %arrayidx14, align 8 -+ %xor15 = xor i64 %5, %3 -+ %arrayidx17 = getelementptr inbounds i64, i64* %W, i64 %indvars.iv -+ store i64 %xor15, i64* %arrayidx17, align 8 -+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 -+ %exitcond = icmp ne i64 %indvars.iv.next, 256 -+ br i1 %exitcond, label %for.body8, label %for.end20 -+ -+for.end20: ; preds = %for.body8 -+ ret void -+} -+ -+; CHECK: --- !AutoTuning -+; CHECK: DynamicConfigs: { UnrollCount: [ 0, 1, 64, 16, 32 ] -+; CHECK: ... -+; CHECK-NEXT: --- !AutoTuning -+; CHECK: DynamicConfigs: { UnrollCount: [ 0, 1, 64, 16, 32 ] -+; CHECK: ... -diff --git a/llvm/test/AutoTuning/LoopUnroll/loop_nest.ll b/llvm/test/AutoTuning/LoopUnroll/loop_nest.ll -new file mode 100644 -index 000000000000..7f3e27ca057a ---- /dev/null -+++ b/llvm/test/AutoTuning/LoopUnroll/loop_nest.ll -@@ -0,0 +1,136 @@ -+; REQUIRES: asserts -+; CodeRegionHash matches for the first code region only. AutoTuner will find -+; match for one code region when hash matching is enabled. AutoTuner will find -+; match for all three code regions when hash matching is disabl3ed. -+ -+; RUN: rm -rf %t.loop_nest.txt -+; RUN: opt %s -passes='require,loop(loop-unroll-full)' \ -+; RUN: -debug-only=autotuning -auto-tuning-input=%S/Inputs/loop_nest.yaml \ -+; RUN: --disable-output &> %t.loop_nest.txt -+; RUN: grep 'UnrollCount is set' %t.loop_nest.txt | wc -l | \ -+; RUN: FileCheck %s -check-prefix=HASH_MATCHING_ENABLED -+ -+; RUN: rm -rf %t.loop_nest.txt -+; RUN: opt %s -passes='require,loop(loop-unroll-full)' \ -+; RUN: -auto-tuning-input=%S/Inputs/loop_nest.yaml -debug-only=autotuning \ -+; RUN: -auto-tuning-code-region-matching-hash=false --disable-output &> %t.loop_nest.txt -+; RUN: grep 'UnrollCount is set' %t.loop_nest.txt | wc -l | \ -+; RUN: FileCheck %s -check-prefix=HASH_MATCHING_DISABLED -+ -+; ModuleID = 'loop-nest.c' -+source_filename = "loop-nest.c" -+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" -+target triple = "aarch64-unknown-linux-gnu" -+ -+; Function Attrs: nofree norecurse nounwind uwtable -+define dso_local void @loop_nest(i32 %ni, i32 %nj, i32 %nk, i32 %alpha, i32 %beta, i32** nocapture readonly %A, i32** nocapture readonly %B, i32** nocapture readonly %C) local_unnamed_addr #0 !dbg !10 { -+entry: -+ %cmp41 = icmp sgt i32 %ni, 0, !dbg !12 -+ br i1 %cmp41, label %for.cond1.preheader.lr.ph, label %for.end23, !dbg !13 -+ -+for.cond1.preheader.lr.ph: ; preds = %entry -+ %cmp238 = icmp slt i32 %nk, 1 -+ %cmp536 = icmp slt i32 %nj, 1 -+ %wide.trip.count51 = zext i32 %ni to i64, !dbg !12 -+ %wide.trip.count47 = zext i32 %nk to i64 -+ %wide.trip.count = zext i32 %nj to i64 -+ %brmerge = or i1 %cmp238, %cmp536 -+ br label %for.cond1.preheader, !dbg !13 -+ -+for.cond1.preheader: ; preds = %for.cond1.preheader.lr.ph, %for.inc21 -+ %indvars.iv49 = phi i64 [ 0, %for.cond1.preheader.lr.ph ], [ %indvars.iv.next50, %for.inc21 ] -+ br i1 %brmerge, label %for.inc21, label %for.cond4.preheader.us.preheader, !dbg !14 -+ -+for.cond4.preheader.us.preheader: ; preds = %for.cond1.preheader -+ %arrayidx15 = getelementptr inbounds i32*, i32** %C, i64 %indvars.iv49 -+ %arrayidx = getelementptr inbounds i32*, i32** %A, i64 %indvars.iv49 -+ %.pre = load i32*, i32** %arrayidx, align 8, !tbaa !15 -+ %.pre53 = load i32*, i32** %arrayidx15, align 8, !tbaa !15 -+ br label %for.cond4.preheader.us, !dbg !14 -+ -+for.cond4.preheader.us: ; preds = %for.cond4.preheader.us.preheader, %for.cond4.for.inc18_crit_edge.us -+ %indvars.iv45 = phi i64 [ 0, %for.cond4.preheader.us.preheader ], [ %indvars.iv.next46, %for.cond4.for.inc18_crit_edge.us ] -+ %arrayidx8.us = getelementptr inbounds i32, i32* %.pre, i64 %indvars.iv45 -+ %arrayidx10.us = getelementptr inbounds i32*, i32** %B, i64 %indvars.iv45 -+ %0 = load i32*, i32** %arrayidx10.us, align 8, !tbaa !15 -+ br label %for.body6.us, !dbg !19 -+ -+for.body6.us: ; preds = %for.cond4.preheader.us, %for.body6.us -+ %indvars.iv = phi i64 [ 0, %for.cond4.preheader.us ], [ %indvars.iv.next, %for.body6.us ] -+ %1 = load i32, i32* %arrayidx8.us, align 4, !dbg !20, !tbaa !21 -+ %mul.us = mul nsw i32 %1, %alpha, !dbg !23 -+ %arrayidx12.us = getelementptr inbounds i32, i32* %0, i64 %indvars.iv, !dbg !24 -+ %2 = load i32, i32* %arrayidx12.us, align 4, !dbg !24, !tbaa !21 -+ %mul13.us = mul nsw i32 %mul.us, %2, !dbg !25 -+ %arrayidx17.us = getelementptr inbounds i32, i32* %.pre53, i64 %indvars.iv, !dbg !26 -+ %3 = load i32, i32* %arrayidx17.us, align 4, !dbg !27, !tbaa !21 -+ %add.us = add nsw i32 %3, %mul13.us, !dbg !27 -+ store i32 %add.us, i32* %arrayidx17.us, align 4, !dbg !27, !tbaa !21 -+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !28 -+ %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count, !dbg !29 -+ br i1 %exitcond.not, label %for.cond4.for.inc18_crit_edge.us, label %for.body6.us, !dbg !19, !llvm.loop !30 -+ -+for.cond4.for.inc18_crit_edge.us: ; preds = %for.body6.us -+ %indvars.iv.next46 = add nuw nsw i64 %indvars.iv45, 1, !dbg !33 -+ %exitcond48.not = icmp eq i64 %indvars.iv.next46, %wide.trip.count47, !dbg !34 -+ br i1 %exitcond48.not, label %for.inc21, label %for.cond4.preheader.us, !dbg !14, !llvm.loop !35 -+ -+for.inc21: ; preds = %for.cond4.for.inc18_crit_edge.us, %for.cond1.preheader -+ %indvars.iv.next50 = add nuw nsw i64 %indvars.iv49, 1, !dbg !37 -+ %exitcond52.not = icmp eq i64 %indvars.iv.next50, %wide.trip.count51, !dbg !12 -+ br i1 %exitcond52.not, label %for.end23, label %for.cond1.preheader, !dbg !13, !llvm.loop !38 -+ -+for.end23: ; preds = %for.inc21, %entry -+ ret void, !dbg !40 -+} -+ -+attributes #0 = { nofree norecurse nounwind uwtable "disable-tail-calls"="false" "frame-pointer"="non-leaf" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon" "unsafe-fp-math"="false" "use-soft-float"="false" } -+ -+!llvm.dbg.cu = !{!0} -+!llvm.module.flags = !{!3, !4, !5, !6, !7, !8} -+!llvm.ident = !{!9} -+ -+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "Huawei BiSheng Compiler clang version 12.0.0 (clang-a279e099a09a flang-9a86b70390a7)", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2, splitDebugInlining: false, nameTableKind: None) -+!1 = !DIFile(filename: "loop-nest.c", directory: "/home/m00629332/code/autoTuner") -+!2 = !{} -+!3 = !{i32 2, !"Debug Info Version", i32 3} -+!4 = !{i32 1, !"wchar_size", i32 4} -+!5 = !{i32 1, !"branch-target-enforcement", i32 0} -+!6 = !{i32 1, !"sign-return-address", i32 0} -+!7 = !{i32 1, !"sign-return-address-all", i32 0} -+!8 = !{i32 1, !"sign-return-address-with-bkey", i32 0} -+!9 = !{!"Huawei BiSheng Compiler clang version 12.0.0 (clang-a279e099a09a flang-9a86b70390a7)"} -+!10 = distinct !DISubprogram(name: "loop_nest", scope: !1, file: !1, line: 1, type: !11, scopeLine: 5, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) -+!11 = !DISubroutineType(types: !2) -+!12 = !DILocation(line: 8, column: 17, scope: !10) -+!13 = !DILocation(line: 8, column: 3, scope: !10) -+!14 = !DILocation(line: 9, column: 5, scope: !10) -+!15 = !{!16, !16, i64 0} -+!16 = !{!"any pointer", !17, i64 0} -+!17 = !{!"omnipotent char", !18, i64 0} -+!18 = !{!"Simple C/C++ TBAA"} -+!19 = !DILocation(line: 10, column: 8, scope: !10) -+!20 = !DILocation(line: 11, column: 23, scope: !10) -+!21 = !{!22, !22, i64 0} -+!22 = !{!"int", !17, i64 0} -+!23 = !DILocation(line: 11, column: 21, scope: !10) -+!24 = !DILocation(line: 11, column: 33, scope: !10) -+!25 = !DILocation(line: 11, column: 31, scope: !10) -+!26 = !DILocation(line: 11, column: 4, scope: !10) -+!27 = !DILocation(line: 11, column: 12, scope: !10) -+!28 = !DILocation(line: 10, column: 29, scope: !10) -+!29 = !DILocation(line: 10, column: 22, scope: !10) -+!30 = distinct !{!30, !19, !31, !32} -+!31 = !DILocation(line: 11, column: 39, scope: !10) -+!32 = !{!"llvm.loop.mustprogress"} -+!33 = !DILocation(line: 9, column: 26, scope: !10) -+!34 = !DILocation(line: 9, column: 19, scope: !10) -+!35 = distinct !{!35, !14, !36, !32} -+!36 = !DILocation(line: 12, column: 5, scope: !10) -+!37 = !DILocation(line: 8, column: 24, scope: !10) -+!38 = distinct !{!38, !13, !39, !32} -+!39 = !DILocation(line: 13, column: 3, scope: !10) -+!40 = !DILocation(line: 15, column: 1, scope: !10) -+ -+; HASH_MATCHING_ENABLED: 1 -+; HASH_MATCHING_DISABLED: 3 -diff --git a/llvm/test/AutoTuning/LoopUnroll/loop_peel.ll b/llvm/test/AutoTuning/LoopUnroll/loop_peel.ll -new file mode 100644 -index 000000000000..f3839a49b20e ---- /dev/null -+++ b/llvm/test/AutoTuning/LoopUnroll/loop_peel.ll -@@ -0,0 +1,53 @@ -+; NOTE: This file is used to test when UnrollCount = 1 and when the compiler -+; sees that Loop Peeling is beneficial and possible, then we do Loop Peeling. -+; RUN: rm %t.unroll1.yaml -rf -+; RUN: sed 's#\[number\]#1#g;' %S/Inputs/loop_peel.yaml > %t.unroll1.yaml -+; RUN: opt %s -S -passes='require,loop(loop-unroll-full)' \ -+; RUN: -auto-tuning-code-region-matching-hash=false \ -+; RUN: -auto-tuning-input=%t.unroll1.yaml | FileCheck %s -+ -+; RUN: rm %t.unroll0.yaml -rf -+; RUN: sed 's#\[number\]#0#g;' %S/Inputs/loop_peel.yaml > %t.unroll0.yaml -+; RUN: opt %s -S -passes='require,loop(loop-unroll-full)' \ -+; RUN: -auto-tuning-code-region-matching-hash=false \ -+; RUN: -auto-tuning-input=%t.unroll0.yaml | FileCheck %s --check-prefix=DISABLE -+ -+; RUN: opt %s -S -passes='require,loop(loop-unroll-full)' \ -+; RUN: -auto-tuning-code-region-matching-hash=false \ -+; RUN: -auto-tuning-opp=%t.unroll_opp -auto-tuning-type-filter=Loop --disable-output -+; RUN: FileCheck %s --input-file %t.unroll_opp/loop_peel.ll.yaml -check-prefix=TEST-1 -+ -+define i32 @invariant_backedge_1(i32 %a, i32 %b) { -+; CHECK-LABEL: @invariant_backedge_1 -+; CHECK-NOT: %plus = phi -+; CHECK: loop.peel: -+; CHECK: loop: -+; CHECK: %i = phi -+; CHECK: %sum = phi -+; DISABLE-LABEL: @invariant_backedge_1 -+; DISABLE-NOT: loop.peel: -+entry: -+ br label %loop -+ -+loop: -+ %i = phi i32 [ 0, %entry ], [ %inc, %loop ] -+ %sum = phi i32 [ 0, %entry ], [ %incsum, %loop ] -+ %plus = phi i32 [ %a, %entry ], [ %b, %loop ] -+ -+ %incsum = add i32 %sum, %plus -+ %inc = add i32 %i, 1 -+ %cmp = icmp slt i32 %i, 1000 -+ -+ br i1 %cmp, label %loop, label %exit -+ -+exit: -+ ret i32 %sum -+} -+ -+; Check for dynamic values when UnrollCount is set to 1: -+; TEST-1: Pass: loop-unroll -+; TEST-1-NEXT: Name: loop -+; TEST-1-NEXT: Function: invariant_backedge_1 -+; TEST-1-NEXT: CodeRegionType: loop -+; TEST-1-NEXT: CodeRegionHash: {{[0-9]+}} -+; TEST-1-NEXT: DynamicConfigs: { UnrollCount: [ 0, 1, 2 ] } -diff --git a/llvm/test/AutoTuning/LoopUnroll/unroll-pragma.ll b/llvm/test/AutoTuning/LoopUnroll/unroll-pragma.ll -new file mode 100644 -index 000000000000..843b8e28f3d8 ---- /dev/null -+++ b/llvm/test/AutoTuning/LoopUnroll/unroll-pragma.ll -@@ -0,0 +1,129 @@ -+; RUN: rm %t.unroll_opp -rf -+; RUN: opt %s -S -auto-tuning-opp=%t.unroll_opp -auto-tuning-type-filter=Loop \ -+; RUN: -passes='require,loop(loop-unroll-full)' --disable-output -+; RUN: FileCheck %s --input-file %t.unroll_opp/unroll-pragma.ll.yaml -check-prefix=TEST-1 -+; RUN: FileCheck %s --input-file %t.unroll_opp/unroll-pragma.ll.yaml -check-prefix=TEST-2 -+ -+; RUN: rm %t.unroll_opp -rf -+; RUN: opt %s -S -auto-tuning-opp=%t.unroll_opp -auto-tuning-type-filter=Loop \ -+; RUN: -passes='require,function(loop-unroll)' --disable-output -+; RUN: FileCheck %s --input-file %t.unroll_opp/unroll-pragma.ll.yaml -check-prefix=TEST-1 -+; RUN: FileCheck %s --input-file %t.unroll_opp/unroll-pragma.ll.yaml -check-prefix=TEST-2 -+ -+; This function contains two loops. loop for.body is defined with a pragma -+; unroll_count(4) and loop for.body9 is without a pragama. AutoTuner will only -+; consider for.body9 as a tuning opportunity. -+ -+; ModuleID = 'loop-unroll.c' -+source_filename = "loop-unroll.c" -+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" -+target triple = "aarch64-unknown-linux-gnu" -+ -+; Function Attrs: nofree norecurse nounwind uwtable -+define dso_local void @loop(i32* noalias nocapture readonly %a, i32* noalias nocapture readonly %b, i32* noalias nocapture %c, i32* noalias nocapture %d, i32 %len) local_unnamed_addr #0 !dbg !10 { -+entry: -+ %cmp34 = icmp slt i32 0, %len, !dbg !12 -+ br i1 %cmp34, label %for.body.lr.ph, label %for.cond6.preheader, !dbg !13 -+ -+for.body.lr.ph: ; preds = %entry -+ br label %for.body, !dbg !13 -+ -+for.cond.for.cond6.preheader_crit_edge: ; preds = %for.body -+ br label %for.cond6.preheader, !dbg !13 -+ -+for.cond6.preheader: ; preds = %for.cond.for.cond6.preheader_crit_edge, %entry -+ %cmp732 = icmp slt i32 0, %len, !dbg !14 -+ br i1 %cmp732, label %for.body9.lr.ph, label %for.cond.cleanup8, !dbg !15 -+ -+for.body9.lr.ph: ; preds = %for.cond6.preheader -+ br label %for.body9, !dbg !15 -+ -+for.body: ; preds = %for.body.lr.ph, %for.body -+ %i.035 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ] -+ %idxprom = zext i32 %i.035 to i64, !dbg !16 -+ %arrayidx = getelementptr inbounds i32, i32* %a, i64 %idxprom, !dbg !16 -+ %0 = load i32, i32* %arrayidx, align 4, !dbg !16, !tbaa !17 -+ %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %idxprom, !dbg !21 -+ %1 = load i32, i32* %arrayidx2, align 4, !dbg !21, !tbaa !17 -+ %add = add nsw i32 %1, %0, !dbg !22 -+ %arrayidx4 = getelementptr inbounds i32, i32* %c, i64 %idxprom, !dbg !23 -+ store i32 %add, i32* %arrayidx4, align 4, !dbg !24, !tbaa !17 -+ %inc = add nuw nsw i32 %i.035, 1, !dbg !25 -+ %cmp = icmp slt i32 %inc, %len, !dbg !12 -+ br i1 %cmp, label %for.body, label %for.cond.for.cond6.preheader_crit_edge, !dbg !13, !llvm.loop !26 -+ -+for.cond6.for.cond.cleanup8_crit_edge: ; preds = %for.body9 -+ br label %for.cond.cleanup8, !dbg !15 -+ -+for.cond.cleanup8: ; preds = %for.cond6.for.cond.cleanup8_crit_edge, %for.cond6.preheader -+ ret void, !dbg !30 -+ -+for.body9: ; preds = %for.body9.lr.ph, %for.body9 -+ %i5.033 = phi i32 [ 0, %for.body9.lr.ph ], [ %inc17, %for.body9 ] -+ %idxprom10 = zext i32 %i5.033 to i64, !dbg !31 -+ %arrayidx11 = getelementptr inbounds i32, i32* %a, i64 %idxprom10, !dbg !31 -+ %2 = load i32, i32* %arrayidx11, align 4, !dbg !31, !tbaa !17 -+ %arrayidx13 = getelementptr inbounds i32, i32* %b, i64 %idxprom10, !dbg !32 -+ %3 = load i32, i32* %arrayidx13, align 4, !dbg !32, !tbaa !17 -+ %mul = mul nsw i32 %3, %2, !dbg !33 -+ %arrayidx15 = getelementptr inbounds i32, i32* %d, i64 %idxprom10, !dbg !34 -+ store i32 %mul, i32* %arrayidx15, align 4, !dbg !35, !tbaa !17 -+ %inc17 = add nuw nsw i32 %i5.033, 1, !dbg !36 -+ %cmp7 = icmp slt i32 %inc17, %len, !dbg !14 -+ br i1 %cmp7, label %for.body9, label %for.cond6.for.cond.cleanup8_crit_edge, !dbg !15, !llvm.loop !37 -+} -+ -+attributes #0 = { nofree norecurse nounwind uwtable "disable-tail-calls"="false" "frame-pointer"="non-leaf" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon" "unsafe-fp-math"="false" "use-soft-float"="false" } -+ -+!llvm.dbg.cu = !{!0} -+!llvm.module.flags = !{!3, !4, !5, !6, !7, !8} -+!llvm.ident = !{!9} -+ -+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "Huawei Bisheng Compiler clang version 12.0.0 (0261bbf0b2fd)", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2, splitDebugInlining: false, nameTableKind: None) -+!1 = !DIFile(filename: "loop-unroll.c", directory: "/home/AutoTuner/") -+!2 = !{} -+!3 = !{i32 2, !"Debug Info Version", i32 3} -+!4 = !{i32 1, !"wchar_size", i32 4} -+!5 = !{i32 1, !"branch-target-enforcement", i32 0} -+!6 = !{i32 1, !"sign-return-address", i32 0} -+!7 = !{i32 1, !"sign-return-address-all", i32 0} -+!8 = !{i32 1, !"sign-return-address-with-bkey", i32 0} -+!9 = !{!"Huawei Bisheng Compiler clang version 12.0.0 (0261bbf0b2fd)"} -+!10 = distinct !DISubprogram(name: "a", scope: !1, file: !1, line: 1, type: !11, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) -+!11 = !DISubroutineType(types: !2) -+!12 = !DILocation(line: 3, column: 20, scope: !10) -+!13 = !DILocation(line: 3, column: 5, scope: !10) -+!14 = !DILocation(line: 7, column: 20, scope: !10) -+!15 = !DILocation(line: 7, column: 5, scope: !10) -+!16 = !DILocation(line: 4, column: 16, scope: !10) -+!17 = !{!18, !18, i64 0} -+!18 = !{!"int", !19, i64 0} -+!19 = !{!"omnipotent char", !20, i64 0} -+!20 = !{!"Simple C/C++ TBAA"} -+!21 = !DILocation(line: 4, column: 23, scope: !10) -+!22 = !DILocation(line: 4, column: 21, scope: !10) -+!23 = !DILocation(line: 4, column: 9, scope: !10) -+!24 = !DILocation(line: 4, column: 14, scope: !10) -+!25 = !DILocation(line: 3, column: 28, scope: !10) -+!26 = distinct !{!26, !13, !27, !28, !29} -+!27 = !DILocation(line: 5, column: 5, scope: !10) -+!28 = !{!"llvm.loop.mustprogress"} -+!29 = !{!"llvm.loop.unroll.count", i32 4} -+!30 = !DILocation(line: 10, column: 1, scope: !10) -+!31 = !DILocation(line: 8, column: 16, scope: !10) -+!32 = !DILocation(line: 8, column: 23, scope: !10) -+!33 = !DILocation(line: 8, column: 21, scope: !10) -+!34 = !DILocation(line: 8, column: 9, scope: !10) -+!35 = !DILocation(line: 8, column: 14, scope: !10) -+!36 = !DILocation(line: 7, column: 28, scope: !10) -+!37 = distinct !{!37, !15, !38, !28} -+!38 = !DILocation(line: 9, column: 5, scope: !10) -+ -+ -+; TEST-1: Pass: loop-unroll -+; TEST-1-NOT: Pass: loop-unroll -+ -+; TEST-2: Name: for.body9 -+; TEST-2-NEXT: DebugLoc: { File: loop-unroll.c, Line: 7, Column: 5 } -+; TEST-2-NEXT: Function: loop -+; TEST-2-NEXT: CodeRegionType: loop -diff --git a/llvm/test/AutoTuning/LoopUnroll/unroll.ll b/llvm/test/AutoTuning/LoopUnroll/unroll.ll -new file mode 100644 -index 000000000000..ba5c89fffaff ---- /dev/null -+++ b/llvm/test/AutoTuning/LoopUnroll/unroll.ll -@@ -0,0 +1,101 @@ -+; RUN: opt %s -S -passes=loop-unroll | FileCheck %s -check-prefix=DISABLE -+ -+; RUN: rm %t.unroll0.yaml -rf -+; RUN: sed 's#\[number\]#0#g; s#\[name\]#for.body#g; s#\[hash\]#14791762861362113823#g' \ -+; RUN: %S/Inputs/unroll_template.yaml > %t.unroll0.yaml -+; RUN: opt %s -S -passes=loop-unroll -auto-tuning-input=%t.unroll0.yaml \ -+; RUN: -auto-tuning-code-region-matching-hash=false | \ -+; RUN: FileCheck %s -check-prefix=UNROLL0 -+ -+; RUN: rm %t.unroll0.yaml -rf -+; RUN: sed 's#\[number\]#0#g; s#\[hash\]#14791762861362113823#g' \ -+; RUN: %S/Inputs/unroll_template_no_metadata.yaml > %t.unroll0.yaml -+; RUN: opt %s -S -passes=loop-unroll -auto-tuning-input=%t.unroll0.yaml \ -+; RUN: -auto-tuning-omit-metadata | \ -+; RUN: FileCheck %s -check-prefix=UNROLL0 -+ -+; RUN: rm %t.result1 %t.unroll1.yaml -rf -+; RUN: sed 's#\[number\]#1#g; s#\[name\]#for.body#g; s#\[hash\]#14791762861362113823#g' \ -+; RUN: %S/Inputs/unroll_template.yaml > %t.unroll1.yaml -+; RUN: opt %s -S -passes=loop-unroll -auto-tuning-input=%t.unroll1.yaml | \ -+; RUN: FileCheck %s -check-prefix=UNROLL1 -+ -+; RUN: rm %t.result1 %t.unroll1.yaml -rf -+; RUN: sed 's#\[number\]#1#g; s#\[hash\]#14791762861362113823#g' \ -+; RUN: %S/Inputs/unroll_template_no_metadata.yaml > %t.unroll1.yaml -+; RUN: opt %s -S -passes=loop-unroll -auto-tuning-input=%t.unroll1.yaml \ -+; RUN: -auto-tuning-omit-metadata | \ -+; RUN: FileCheck %s -check-prefix=UNROLL1 -+ -+; RUN: rm %t.result4 %t.unroll4.yaml -rf -+; RUN: sed 's#\[number\]#4#g; s#\[name\]#for.body#g; s#\[hash\]#14791762861362113823#g' \ -+; RUN: %S/Inputs/unroll_template.yaml > %t.unroll4.yaml -+; RUN: opt %s -S -passes=loop-unroll -auto-tuning-input=%t.unroll4.yaml | \ -+; RUN: FileCheck %s -check-prefix=UNROLL4 -+ -+; RUN: rm %t.result4 %t.unroll4.yaml -rf -+; RUN: sed 's#\[number\]#4#g; s#\[hash\]#14791762861362113823#g' \ -+; RUN: %S/Inputs/unroll_template_no_metadata.yaml > %t.unroll4.yaml -+; RUN: opt %s -S -passes=loop-unroll -auto-tuning-input=%t.unroll4.yaml \ -+; RUN: -auto-tuning-omit-metadata | \ -+; RUN: FileCheck %s -check-prefix=UNROLL4 -+ -+; UNSUPPORTED: windows -+ -+define void @foo(i32* nocapture %a) { -+entry: -+ br label %for.body -+ -+for.body: ; preds = %for.body, %entry -+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] -+ %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv -+ %0 = load i32, i32* %arrayidx, align 4 -+ %inc = add nsw i32 %0, 1 -+ store i32 %inc, i32* %arrayidx, align 4 -+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 -+ %exitcond = icmp eq i64 %indvars.iv.next, 64 -+ br i1 %exitcond, label %for.end, label %for.body -+ -+for.end: ; preds = %for.body -+ ret void -+} -+ -+; Auto-tuning-enabled loop unrolling - check that the loop is not unrolled when the auto-tuning feature is disabled -+; -+; DISABLE-LABEL: @foo( -+; DISABLE: store i32 -+; DISABLE-NOT: store i32 -+; DISABLE: br i1 -+; DISABLE-NOT: llvm.loop.unroll.disable -+ -+ -+; Auto-tuning-enabled loop unrolling - check that the loop is not unrolled -+; when unroll count explicitly set to be 0. -+; -+; UNROLL0-LABEL: @foo( -+; UNROLL0: store i32 -+; UNROLL0-NOT: store i32 -+; UNROLL0: br i1 -+; UNROLL0-NOT: llvm.loop.unroll.disable -+ -+ -+; Auto-tuning-enabled loop unrolling - Requesting UnrollCount = 1 will perform -+; Loop Peeling, and if Loop Peeling isn't possible/beneficial then Unroll Count -+; is unchanged. -+; -+; UNROLL1-LABEL: @foo( -+; UNROLL1: store i32 -+; UNROLL1-NOT: store i32 -+; UNROLL1: br i1 -+; UNROLL1: llvm.loop.unroll.disable -+ -+; Auto-tuning-enabled loop unrolling - check that we can unroll the loop by 4 -+; when explicitly requested. -+; -+; UNROLL4-LABEL: @foo( -+; UNROLL4: store i32 -+; UNROLL4: store i32 -+; UNROLL4: store i32 -+; UNROLL4: store i32 -+; UNROLL4: br i1 -+; UNROLL4: llvm.loop.unroll.disable -diff --git a/llvm/test/AutoTuning/LoopUnroll/unroll_raw.ll b/llvm/test/AutoTuning/LoopUnroll/unroll_raw.ll -new file mode 100644 -index 000000000000..480ccad640ae ---- /dev/null -+++ b/llvm/test/AutoTuning/LoopUnroll/unroll_raw.ll -@@ -0,0 +1,113 @@ -+; Test loop unrolling using auto-tuning YAML api with IRs generated when ASSERTION=OFF -+; The IRs generated when ASSERTION=OFF usually only use slot numbers as variable names. -+ -+; RUN: opt %s -S -passes='require,loop(loop-unroll-full)' | \ -+; RUN: FileCheck %s -check-prefix=DISABLE -+ -+; RUN: rm %t.result1_raw %t.unroll1_raw.yaml -rf -+; RUN: sed 's#\[number\]#1#g; s#\[hash\]#18159364858606519094#g' \ -+; RUN: %S/Inputs/unroll_raw_template.yaml > %t.unroll1_raw.yaml -+; RUN: opt %s -S -passes='require,function(loop-unroll)' \ -+; RUN: -auto-tuning-input=%t.unroll1_raw.yaml | FileCheck %s -check-prefix=UNROLL1 -+ -+; RUN: rm %t.result2_raw %t.unroll2_raw.yaml -rf -+; RUN: sed 's#\[number\]#2#g; s#\[hash\]#18159364858606519094#g' \ -+; RUN: %S/Inputs/unroll_raw_template.yaml > %t.unroll2_raw.yaml -+; RUN: opt %s -S -passes='require,function(loop-unroll)' \ -+; RUN: -auto-tuning-input=%t.unroll2_raw.yaml | FileCheck %s -check-prefix=UNROLL2 -+ -+; RUN: rm %t.result4_raw %t.unroll4_raw.yaml -rf -+; RUN: sed 's#\[number\]#4#g; s#\[hash\]#18159364858606519094#g' \ -+; RUN: %S/Inputs/unroll_raw_template.yaml > %t.unroll4_raw.yaml -+; RUN: opt %s -S -passes='require,function(loop-unroll)' \ -+; RUN: -auto-tuning-input=%t.unroll4_raw.yaml | FileCheck %s -check-prefix=UNROLL4 -+ -+; UNSUPPORTED: windows -+ -+; ModuleID = 't.ll' -+source_filename = "t.ll" -+ -+@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1 -+ -+define void @test(i32*) { -+ %2 = alloca i32*, align 8 -+ store i32* %0, i32** %2, align 8 -+ %3 = load i32*, i32** %2, align 8 -+ %4 = load i32, i32* %3, align 4 -+ %5 = add nsw i32 %4, 2 -+ %6 = load i32*, i32** %2, align 8 -+ store i32 %5, i32* %6, align 4 -+ ret void -+} -+ -+define i32 @main() { -+ %1 = alloca i32, align 4 -+ %2 = alloca i32, align 4 -+ store i32 0, i32* %1, align 4 -+ store i32 8, i32* %2, align 4 -+ %3 = load i32, i32* %2, align 4 -+ %4 = icmp sle i32 %3, 88 -+ br i1 %4, label %.lr.ph, label %13 -+ -+.lr.ph: ; preds = %0 -+ br label %5 -+ -+;