diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index 4b1c89039d908aea537083889d3eeca26c5652d4..f79e9ef3fa10a963b652222cd951c13efb54559c 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -778,7 +778,8 @@ def XRayLogArgs : InheritableAttr { def PatchableFunctionEntry : InheritableAttr, TargetSpecificAttr> { + ["aarch64", "aarch64_be", "loongarch32", "loongarch64", "riscv32", + "riscv64", "x86", "x86_64"]>> { let Spellings = [GCC<"patchable_function_entry">]; let Subjects = SubjectList<[Function, ObjCMethod]>; let Args = [UnsignedArgument<"Count">, DefaultIntArgument<"Offset", 0>]; diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index 4604c6a0161b72ce5e487332b4c0a6b8b4f10cf7..74e76f42e04c95d00d0022d6fd4f778cb1271f51 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -5231,7 +5231,7 @@ takes precedence over the command line option ``-fpatchable-function-entry=N,M`` ``M`` defaults to 0 if omitted. This attribute is only supported on -aarch64/aarch64-be/riscv32/riscv64/i386/x86-64 targets. +aarch64/aarch64-be/loongarch32/loongarch64/riscv32/riscv64/i386/x86-64 targets. }]; } diff --git a/clang/include/clang/Basic/BuiltinsLoongArch.def b/clang/include/clang/Basic/BuiltinsLoongArch.def new file mode 100644 index 0000000000000000000000000000000000000000..7f2c8403410dd3ef491148c89c70c082ffc6b4df --- /dev/null +++ b/clang/include/clang/Basic/BuiltinsLoongArch.def @@ -0,0 +1,61 @@ +//==- BuiltinsLoongArch.def - LoongArch Builtin function database -- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the LoongArch-specific builtin function database. Users of +// this file must define the BUILTIN macro to make use of this information. +// +//===----------------------------------------------------------------------===// + +#if defined(BUILTIN) && !defined(TARGET_BUILTIN) +# define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) BUILTIN(ID, TYPE, ATTRS) +#endif + +// TODO: Support more builtins. +// TODO: Added feature constraints. +TARGET_BUILTIN(__builtin_loongarch_cacop_d, "vLiULiLi", "nc", "64bit") +TARGET_BUILTIN(__builtin_loongarch_cacop_w, "viUii", "nc", "32bit") +TARGET_BUILTIN(__builtin_loongarch_dbar, "vIUi", "nc", "") +TARGET_BUILTIN(__builtin_loongarch_ibar, "vIUi", "nc", "") +TARGET_BUILTIN(__builtin_loongarch_movfcsr2gr, "UiIUi", "nc", "f") +TARGET_BUILTIN(__builtin_loongarch_movgr2fcsr, "vIUiUi", "nc", "f") +TARGET_BUILTIN(__builtin_loongarch_break, "vIUi", "nc", "") +TARGET_BUILTIN(__builtin_loongarch_syscall, "vIUi", "nc", "") +TARGET_BUILTIN(__builtin_loongarch_cpucfg, "UiUi", "nc", "") +TARGET_BUILTIN(__builtin_loongarch_asrtle_d, "vLiLi", "nc", "64bit") +TARGET_BUILTIN(__builtin_loongarch_asrtgt_d, "vLiLi", "nc", "64bit") + +TARGET_BUILTIN(__builtin_loongarch_crc_w_b_w, "iii", "nc", "64bit") +TARGET_BUILTIN(__builtin_loongarch_crc_w_h_w, "iii", "nc", "64bit") +TARGET_BUILTIN(__builtin_loongarch_crc_w_w_w, "iii", "nc", "64bit") +TARGET_BUILTIN(__builtin_loongarch_crc_w_d_w, "iLii", "nc", "64bit") +TARGET_BUILTIN(__builtin_loongarch_crcc_w_b_w, "iii", "nc", "64bit") +TARGET_BUILTIN(__builtin_loongarch_crcc_w_h_w, "iii", "nc", "64bit") +TARGET_BUILTIN(__builtin_loongarch_crcc_w_w_w, "iii", "nc", "64bit") +TARGET_BUILTIN(__builtin_loongarch_crcc_w_d_w, "iLii", "nc", "64bit") + +TARGET_BUILTIN(__builtin_loongarch_csrrd_w, "UiIUi", "nc", "") +TARGET_BUILTIN(__builtin_loongarch_csrrd_d, "ULiIUi", "nc", "64bit") +TARGET_BUILTIN(__builtin_loongarch_csrwr_w, "UiUiIUi", "nc", "") +TARGET_BUILTIN(__builtin_loongarch_csrwr_d, "ULiULiIUi", "nc", "64bit") +TARGET_BUILTIN(__builtin_loongarch_csrxchg_w, "UiUiUiIUi", "nc", "") +TARGET_BUILTIN(__builtin_loongarch_csrxchg_d, "ULiULiULiIUi", "nc", "64bit") + +TARGET_BUILTIN(__builtin_loongarch_iocsrrd_b, "UiUi", "nc", "") +TARGET_BUILTIN(__builtin_loongarch_iocsrrd_h, "UiUi", "nc", "") +TARGET_BUILTIN(__builtin_loongarch_iocsrrd_w, "UiUi", "nc", "") +TARGET_BUILTIN(__builtin_loongarch_iocsrrd_d, "ULiUi", "nc", "64bit") +TARGET_BUILTIN(__builtin_loongarch_iocsrwr_b, "vUiUi", "nc", "") +TARGET_BUILTIN(__builtin_loongarch_iocsrwr_h, "vUiUi", "nc", "") +TARGET_BUILTIN(__builtin_loongarch_iocsrwr_w, "vUiUi", "nc", "") +TARGET_BUILTIN(__builtin_loongarch_iocsrwr_d, "vULiUi", "nc", "64bit") + +TARGET_BUILTIN(__builtin_loongarch_lddir_d, "LiLiIULi", "nc", "64bit") +TARGET_BUILTIN(__builtin_loongarch_ldpte_d, "vLiIULi", "nc", "64bit") + +#undef BUILTIN +#undef TARGET_BUILTIN diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td index 2f600d28fea0c78e6f5e0043fe47ce446d34815a..7d13fcc769c57204e8e68103d4b2affe4437c8de 100644 --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -685,4 +685,10 @@ def warn_drv_sarif_format_unstable : Warning< def err_drv_riscv_unsupported_with_linker_relaxation : Error< "%0 is unsupported with RISC-V linker relaxation (-mrelax)">; + +def warn_drv_loongarch_conflicting_implied_val : Warning< + "ignoring '%0' as it conflicts with that implied by '%1' (%2)">, + InGroup; +def err_drv_loongarch_invalid_mfpu_EQ : Error< + "invalid argument '%0' to -mfpu=; must be one of: 64, 32, none, 0 (alias for none)">; } diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index cb460401eb47c1a3af5bc9778325327c533a39be..a6531ecf17b313d14f090b6acdff3a51cb797f0f 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -11650,4 +11650,10 @@ def err_non_designated_init_used : Error< "a randomized struct can only be initialized with a designated initializer">; def err_cast_from_randomized_struct : Error< "casting from randomized structure pointer type %0 to %1">; + +// LoongArch-specific Diagnostics +def err_loongarch_builtin_requires_la64 : Error< + "this builtin requires target: loongarch64">; +def err_loongarch_builtin_requires_la32 : Error< + "this builtin requires target: loongarch32">; } // end of sema component. diff --git a/clang/include/clang/Basic/TargetBuiltins.h b/clang/include/clang/Basic/TargetBuiltins.h index d8ad9858d8c80c0014561900142ed6f69217ebba..d6c9d38815e30b9d4ebf33cdda29774962cd441c 100644 --- a/clang/include/clang/Basic/TargetBuiltins.h +++ b/clang/include/clang/Basic/TargetBuiltins.h @@ -150,6 +150,16 @@ namespace clang { }; } // namespace RISCV + /// LoongArch builtins + namespace LoongArch { + enum { + LastTIBuiltin = clang::Builtin::FirstTSBuiltin - 1, +#define BUILTIN(ID, TYPE, ATTRS) BI##ID, +#include "clang/Basic/BuiltinsLoongArch.def" + LastTSBuiltin + }; + } // namespace LoongArch + /// Flags to identify the types for overloaded Neon builtins. /// /// These must be kept in sync with the flags in utils/TableGen/NeonEmitter.h. diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 97433f169d1434c4e6a10bbc0ce41d4d4129eeff..cf32cf2dac05f8f4ae8ee8a5657f7653de4e85b5 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -3499,12 +3499,14 @@ def mcmodel_EQ_medany : Flag<["-"], "mcmodel=medany">, Group, Group, HelpText<"Enable use of experimental RISC-V extensions.">; -def munaligned_access : Flag<["-"], "munaligned-access">, Group, - HelpText<"Allow memory accesses to be unaligned (AArch32/AArch64 only)">; -def mno_unaligned_access : Flag<["-"], "mno-unaligned-access">, Group, - HelpText<"Force all memory accesses to be aligned (AArch32/AArch64 only)">; +def munaligned_access : Flag<["-"], "munaligned-access">, Group, + HelpText<"Allow memory accesses to be unaligned (AArch32/AArch64/LoongArch only)">; +def mno_unaligned_access : Flag<["-"], "mno-unaligned-access">, Group, + HelpText<"Force all memory accesses to be aligned (AArch32/AArch64/LoongArch only)">; def mstrict_align : Flag<["-"], "mstrict-align">, Alias, Flags<[CC1Option,HelpHidden]>, HelpText<"Force all memory accesses to be aligned (same as mno-unaligned-access)">; +def mno_strict_align : Flag<["-"], "mno-strict-align">, Alias, Flags<[CC1Option,HelpHidden]>, + HelpText<"Allow memory accesses to be unaligned (same as munaligned-access)">; def mno_thumb : Flag<["-"], "mno-thumb">, Group; def mrestrict_it: Flag<["-"], "mrestrict-it">, Group, HelpText<"Disallow generation of complex IT blocks.">; @@ -3868,8 +3870,8 @@ def mdsp : Flag<["-"], "mdsp">, Group; def mno_dsp : Flag<["-"], "mno-dsp">, Group; def mdspr2 : Flag<["-"], "mdspr2">, Group; def mno_dspr2 : Flag<["-"], "mno-dspr2">, Group; -def msingle_float : Flag<["-"], "msingle-float">, Group; -def mdouble_float : Flag<["-"], "mdouble-float">, Group; +def msingle_float : Flag<["-"], "msingle-float">, Group; +def mdouble_float : Flag<["-"], "mdouble-float">, Group; def mmadd4 : Flag<["-"], "mmadd4">, Group, HelpText<"Enable the generation of 4-operand madd.s, madd.d and related instructions.">; def mno_madd4 : Flag<["-"], "mno-madd4">, Group, diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 681a76dfa56a6933745d0e47098852777cf907e4..8f2800d6f2eaf18096157593406d5a9eb7bb91af 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -13117,6 +13117,8 @@ private: bool CheckRISCVLMUL(CallExpr *TheCall, unsigned ArgNum); bool CheckRISCVBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID, CallExpr *TheCall); + bool CheckLoongArchBuiltinFunctionCall(const TargetInfo &TI, + unsigned BuiltinID, CallExpr *TheCall); bool SemaBuiltinVAStart(unsigned BuiltinID, CallExpr *TheCall); bool SemaBuiltinVAStartARMMicrosoft(CallExpr *Call); diff --git a/clang/include/clang/module.modulemap b/clang/include/clang/module.modulemap index 01bce77718b357facba800065c6d7a4ea4304aea..a21e2beebf37558e985f20507a177a180fba35a4 100644 --- a/clang/include/clang/module.modulemap +++ b/clang/include/clang/module.modulemap @@ -42,6 +42,7 @@ module Clang_Basic { textual header "Basic/BuiltinsHexagon.def" textual header "Basic/BuiltinsHexagonDep.def" textual header "Basic/BuiltinsHexagonMapCustomDep.def" + textual header "Basic/BuiltinsLoongArch.def" textual header "Basic/BuiltinsMips.def" textual header "Basic/BuiltinsNEON.def" textual header "Basic/BuiltinsNVPTX.def" diff --git a/clang/lib/Basic/CMakeLists.txt b/clang/lib/Basic/CMakeLists.txt index ff83f996353502e5aebaf3f0d7655db41e7d7bed..92d487471621ad925e2310db76aebbc8b43b58a6 100644 --- a/clang/lib/Basic/CMakeLists.txt +++ b/clang/lib/Basic/CMakeLists.txt @@ -82,6 +82,7 @@ add_clang_library(clangBasic Targets/Hexagon.cpp Targets/Lanai.cpp Targets/Le64.cpp + Targets/LoongArch.cpp Targets/M68k.cpp Targets/MSP430.cpp Targets/Mips.cpp diff --git a/clang/lib/Basic/Targets.cpp b/clang/lib/Basic/Targets.cpp index 4a136b1c3e396f98fd20a9230dca83ed54719660..a44d829b914354ab2cb01e37a14b0f0704db235d 100644 --- a/clang/lib/Basic/Targets.cpp +++ b/clang/lib/Basic/Targets.cpp @@ -24,6 +24,7 @@ #include "Targets/Hexagon.h" #include "Targets/Lanai.h" #include "Targets/Le64.h" +#include "Targets/LoongArch.h" #include "Targets/M68k.h" #include "Targets/MSP430.h" #include "Targets/Mips.h" @@ -700,6 +701,34 @@ TargetInfo *AllocateTarget(const llvm::Triple &Triple, default: return new CSKYTargetInfo(Triple, Opts); } + case llvm::Triple::loongarch32: + switch (os) { + case llvm::Triple::Linux: + // OHOS_LOCAL begin + switch (Triple.getEnvironment()) { + default: + return new LinuxTargetInfo(Triple, Opts); + case llvm::Triple::OpenHOS: + return new OHOSTargetInfo(Triple, Opts); + } + // OHOS_LOCAL end + default: + return new LoongArch32TargetInfo(Triple, Opts); + } + case llvm::Triple::loongarch64: + switch (os) { + case llvm::Triple::Linux: + // OHOS_LOCAL begin + switch (Triple.getEnvironment()) { + default: + return new LinuxTargetInfo(Triple, Opts); + case llvm::Triple::OpenHOS: + return new OHOSTargetInfo(Triple, Opts); + } + // OHOS_LOCAL end + default: + return new LoongArch64TargetInfo(Triple, Opts); + } } } } // namespace targets diff --git a/clang/lib/Basic/Targets/LoongArch.cpp b/clang/lib/Basic/Targets/LoongArch.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f755d8962973e0acffc0915dc16331d76ea666df --- /dev/null +++ b/clang/lib/Basic/Targets/LoongArch.cpp @@ -0,0 +1,280 @@ +//===--- LoongArch.cpp - Implement LoongArch target feature support -------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements LoongArch TargetInfo objects. +// +//===----------------------------------------------------------------------===// + +#include "LoongArch.h" +#include "clang/Basic/Diagnostic.h" +#include "clang/Basic/MacroBuilder.h" +#include "clang/Basic/TargetBuiltins.h" +#include "llvm/Support/LoongArchTargetParser.h" +#include "llvm/Support/TargetParser.h" +#include "llvm/Support/raw_ostream.h" + +using namespace clang; +using namespace clang::targets; + +ArrayRef LoongArchTargetInfo::getGCCRegNames() const { + static const char *const GCCRegNames[] = { + // General purpose registers. + "$r0", "$r1", "$r2", "$r3", "$r4", "$r5", "$r6", "$r7", "$r8", "$r9", + "$r10", "$r11", "$r12", "$r13", "$r14", "$r15", "$r16", "$r17", "$r18", + "$r19", "$r20", "$r21", "$r22", "$r23", "$r24", "$r25", "$r26", "$r27", + "$r28", "$r29", "$r30", "$r31", + // Floating point registers. + "$f0", "$f1", "$f2", "$f3", "$f4", "$f5", "$f6", "$f7", "$f8", "$f9", + "$f10", "$f11", "$f12", "$f13", "$f14", "$f15", "$f16", "$f17", "$f18", + "$f19", "$f20", "$f21", "$f22", "$f23", "$f24", "$f25", "$f26", "$f27", + "$f28", "$f29", "$f30", "$f31", + // Condition flag registers. + "$fcc0", "$fcc1", "$fcc2", "$fcc3", "$fcc4", "$fcc5", "$fcc6", "$fcc7"}; + return llvm::makeArrayRef(GCCRegNames); +} + +ArrayRef +LoongArchTargetInfo::getGCCRegAliases() const { + static const TargetInfo::GCCRegAlias GCCRegAliases[] = { + {{"zero", "$zero", "r0"}, "$r0"}, + {{"ra", "$ra", "r1"}, "$r1"}, + {{"tp", "$tp", "r2"}, "$r2"}, + {{"sp", "$sp", "r3"}, "$r3"}, + {{"a0", "$a0", "r4"}, "$r4"}, + {{"a1", "$a1", "r5"}, "$r5"}, + {{"a2", "$a2", "r6"}, "$r6"}, + {{"a3", "$a3", "r7"}, "$r7"}, + {{"a4", "$a4", "r8"}, "$r8"}, + {{"a5", "$a5", "r9"}, "$r9"}, + {{"a6", "$a6", "r10"}, "$r10"}, + {{"a7", "$a7", "r11"}, "$r11"}, + {{"t0", "$t0", "r12"}, "$r12"}, + {{"t1", "$t1", "r13"}, "$r13"}, + {{"t2", "$t2", "r14"}, "$r14"}, + {{"t3", "$t3", "r15"}, "$r15"}, + {{"t4", "$t4", "r16"}, "$r16"}, + {{"t5", "$t5", "r17"}, "$r17"}, + {{"t6", "$t6", "r18"}, "$r18"}, + {{"t7", "$t7", "r19"}, "$r19"}, + {{"t8", "$t8", "r20"}, "$r20"}, + {{"r21"}, "$r21"}, + {{"s9", "$s9", "r22", "fp", "$fp"}, "$r22"}, + {{"s0", "$s0", "r23"}, "$r23"}, + {{"s1", "$s1", "r24"}, "$r24"}, + {{"s2", "$s2", "r25"}, "$r25"}, + {{"s3", "$s3", "r26"}, "$r26"}, + {{"s4", "$s4", "r27"}, "$r27"}, + {{"s5", "$s5", "r28"}, "$r28"}, + {{"s6", "$s6", "r29"}, "$r29"}, + {{"s7", "$s7", "r30"}, "$r30"}, + {{"s8", "$s8", "r31"}, "$r31"}, + {{"$fa0"}, "$f0"}, + {{"$fa1"}, "$f1"}, + {{"$fa2"}, "$f2"}, + {{"$fa3"}, "$f3"}, + {{"$fa4"}, "$f4"}, + {{"$fa5"}, "$f5"}, + {{"$fa6"}, "$f6"}, + {{"$fa7"}, "$f7"}, + {{"$ft0"}, "$f8"}, + {{"$ft1"}, "$f9"}, + {{"$ft2"}, "$f10"}, + {{"$ft3"}, "$f11"}, + {{"$ft4"}, "$f12"}, + {{"$ft5"}, "$f13"}, + {{"$ft6"}, "$f14"}, + {{"$ft7"}, "$f15"}, + {{"$ft8"}, "$f16"}, + {{"$ft9"}, "$f17"}, + {{"$ft10"}, "$f18"}, + {{"$ft11"}, "$f19"}, + {{"$ft12"}, "$f20"}, + {{"$ft13"}, "$f21"}, + {{"$ft14"}, "$f22"}, + {{"$ft15"}, "$f23"}, + {{"$fs0"}, "$f24"}, + {{"$fs1"}, "$f25"}, + {{"$fs2"}, "$f26"}, + {{"$fs3"}, "$f27"}, + {{"$fs4"}, "$f28"}, + {{"$fs5"}, "$f29"}, + {{"$fs6"}, "$f30"}, + {{"$fs7"}, "$f31"}, + }; + return llvm::makeArrayRef(GCCRegAliases); +} + +bool LoongArchTargetInfo::validateAsmConstraint( + const char *&Name, TargetInfo::ConstraintInfo &Info) const { + // See the GCC definitions here: + // https://gcc.gnu.org/onlinedocs/gccint/Machine-Constraints.html + // Note that the 'm' constraint is handled in TargetInfo. + switch (*Name) { + default: + return false; + case 'f': + // A floating-point register (if available). + Info.setAllowsRegister(); + return true; + case 'k': + // A memory operand whose address is formed by a base register and + // (optionally scaled) index register. + Info.setAllowsMemory(); + return true; + case 'l': + // A signed 16-bit constant. + Info.setRequiresImmediate(-32768, 32767); + return true; + case 'I': + // A signed 12-bit constant (for arithmetic instructions). + Info.setRequiresImmediate(-2048, 2047); + return true; + case 'J': + // Integer zero. + Info.setRequiresImmediate(0); + return true; + case 'K': + // An unsigned 12-bit constant (for logic instructions). + Info.setRequiresImmediate(0, 4095); + return true; + case 'Z': + // ZB: An address that is held in a general-purpose register. The offset is + // zero. + // ZC: A memory operand whose address is formed by a base register + // and offset that is suitable for use in instructions with the same + // addressing mode as ll.w and sc.w. + if (Name[1] == 'C' || Name[1] == 'B') { + Info.setAllowsMemory(); + ++Name; // Skip over 'Z'. + return true; + } + return false; + } +} + +std::string +LoongArchTargetInfo::convertConstraint(const char *&Constraint) const { + std::string R; + switch (*Constraint) { + case 'Z': + // "ZC"/"ZB" are two-character constraints; add "^" hint for later + // parsing. + R = "^" + std::string(Constraint, 2); + ++Constraint; + break; + default: + R = TargetInfo::convertConstraint(Constraint); + break; + } + return R; +} + +void LoongArchTargetInfo::getTargetDefines(const LangOptions &Opts, + MacroBuilder &Builder) const { + Builder.defineMacro("__loongarch__"); + unsigned GRLen = getRegisterWidth(); + Builder.defineMacro("__loongarch_grlen", Twine(GRLen)); + if (GRLen == 64) + Builder.defineMacro("__loongarch64"); + + if (HasFeatureD) + Builder.defineMacro("__loongarch_frlen", "64"); + else if (HasFeatureF) + Builder.defineMacro("__loongarch_frlen", "32"); + else + Builder.defineMacro("__loongarch_frlen", "0"); + + // Define __loongarch_arch. + StringRef ArchName = getCPU(); + Builder.defineMacro("__loongarch_arch", Twine('"') + ArchName + Twine('"')); + + // Define __loongarch_tune. + StringRef TuneCPU = getTargetOpts().TuneCPU; + if (TuneCPU.empty()) + TuneCPU = ArchName; + Builder.defineMacro("__loongarch_tune", Twine('"') + TuneCPU + Twine('"')); + + StringRef ABI = getABI(); + if (ABI == "lp64d" || ABI == "lp64f" || ABI == "lp64s") + Builder.defineMacro("__loongarch_lp64"); + + if (ABI == "lp64d" || ABI == "ilp32d") { + Builder.defineMacro("__loongarch_hard_float"); + Builder.defineMacro("__loongarch_double_float"); + } else if (ABI == "lp64f" || ABI == "ilp32f") { + Builder.defineMacro("__loongarch_hard_float"); + Builder.defineMacro("__loongarch_single_float"); + } else if (ABI == "lp64s" || ABI == "ilp32s") { + Builder.defineMacro("__loongarch_soft_float"); + } + + Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1"); + Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_2"); + Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4"); + if (GRLen == 64) + Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8"); +} + +static constexpr Builtin::Info BuiltinInfo[] = { +#define BUILTIN(ID, TYPE, ATTRS) \ + {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr}, +#define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \ + {#ID, TYPE, ATTRS, FEATURE, ALL_LANGUAGES, nullptr}, +#include "clang/Basic/BuiltinsLoongArch.def" +}; + +bool LoongArchTargetInfo::initFeatureMap( + llvm::StringMap &Features, DiagnosticsEngine &Diags, StringRef CPU, + const std::vector &FeaturesVec) const { + if (getTriple().getArch() == llvm::Triple::loongarch64) + Features["64bit"] = true; + if (getTriple().getArch() == llvm::Triple::loongarch32) + Features["32bit"] = true; + + return TargetInfo::initFeatureMap(Features, Diags, CPU, FeaturesVec); +} + +/// Return true if has this feature. +bool LoongArchTargetInfo::hasFeature(StringRef Feature) const { + bool Is64Bit = getTriple().getArch() == llvm::Triple::loongarch64; + // TODO: Handle more features. + return llvm::StringSwitch(Feature) + .Case("loongarch32", !Is64Bit) + .Case("loongarch64", Is64Bit) + .Case("32bit", !Is64Bit) + .Case("64bit", Is64Bit) + .Default(false); +} + +ArrayRef LoongArchTargetInfo::getTargetBuiltins() const { + return llvm::makeArrayRef(BuiltinInfo, clang::LoongArch::LastTSBuiltin - + Builtin::FirstTSBuiltin); +} + +bool LoongArchTargetInfo::handleTargetFeatures( + std::vector &Features, DiagnosticsEngine &Diags) { + for (const auto &Feature : Features) { + if (Feature == "+d" || Feature == "+f") { + // "d" implies "f". + HasFeatureF = true; + if (Feature == "+d") { + HasFeatureD = true; + } + } + } + return true; +} + +bool LoongArchTargetInfo::isValidCPUName(StringRef Name) const { + return llvm::LoongArch::isValidCPUName(Name); +} + +void LoongArchTargetInfo::fillValidCPUList( + SmallVectorImpl &Values) const { + llvm::LoongArch::fillValidCPUList(Values); +} diff --git a/clang/lib/Basic/Targets/LoongArch.h b/clang/lib/Basic/Targets/LoongArch.h new file mode 100644 index 0000000000000000000000000000000000000000..46ce56b4382230650627169981a407174281944d --- /dev/null +++ b/clang/lib/Basic/Targets/LoongArch.h @@ -0,0 +1,149 @@ +//===-- LoongArch.h - Declare LoongArch target feature support --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file declares LoongArch TargetInfo objects. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_LIB_BASIC_TARGETS_LOONGARCH_H +#define LLVM_CLANG_LIB_BASIC_TARGETS_LOONGARCH_H + +#include "clang/Basic/TargetInfo.h" +#include "clang/Basic/TargetOptions.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Support/Compiler.h" + +namespace clang { +namespace targets { + +class LLVM_LIBRARY_VISIBILITY LoongArchTargetInfo : public TargetInfo { +protected: + std::string ABI; + std::string CPU; + bool HasFeatureD; + bool HasFeatureF; + +public: + LoongArchTargetInfo(const llvm::Triple &Triple, const TargetOptions &) + : TargetInfo(Triple) { + HasFeatureD = false; + HasFeatureF = false; + LongDoubleWidth = 128; + LongDoubleAlign = 128; + LongDoubleFormat = &llvm::APFloat::IEEEquad(); + SuitableAlign = 128; + WCharType = SignedInt; + WIntType = UnsignedInt; + } + + bool setCPU(const std::string &Name) override { + if (!isValidCPUName(Name)) + return false; + CPU = Name; + return true; + } + + StringRef getCPU() const { return CPU; } + + StringRef getABI() const override { return ABI; } + + void getTargetDefines(const LangOptions &Opts, + MacroBuilder &Builder) const override; + + ArrayRef getTargetBuiltins() const override; + + BuiltinVaListKind getBuiltinVaListKind() const override { + return TargetInfo::VoidPtrBuiltinVaList; + } + + const char *getClobbers() const override { return ""; } + + ArrayRef getGCCRegNames() const override; + + int getEHDataRegisterNumber(unsigned RegNo) const override { + if (RegNo == 0) + return 4; + if (RegNo == 1) + return 5; + return -1; + } + + ArrayRef getGCCRegAliases() const override; + + bool validateAsmConstraint(const char *&Name, + TargetInfo::ConstraintInfo &Info) const override; + std::string convertConstraint(const char *&Constraint) const override; + + bool hasBitIntType() const override { return true; } + + bool handleTargetFeatures(std::vector &Features, + DiagnosticsEngine &Diags) override; + + bool + initFeatureMap(llvm::StringMap &Features, DiagnosticsEngine &Diags, + StringRef CPU, + const std::vector &FeaturesVec) const override; + + bool hasFeature(StringRef Feature) const override; + + bool isValidCPUName(StringRef Name) const override; + void fillValidCPUList(SmallVectorImpl &Values) const override; +}; + +class LLVM_LIBRARY_VISIBILITY LoongArch32TargetInfo + : public LoongArchTargetInfo { +public: + LoongArch32TargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts) + : LoongArchTargetInfo(Triple, Opts) { + IntPtrType = SignedInt; + PtrDiffType = SignedInt; + SizeType = UnsignedInt; + resetDataLayout("e-m:e-p:32:32-i64:64-n32-S128"); + // TODO: select appropriate ABI. + setABI("ilp32d"); + } + + bool setABI(const std::string &Name) override { + if (Name == "ilp32d" || Name == "ilp32f" || Name == "ilp32s") { + ABI = Name; + return true; + } + return false; + } + void setMaxAtomicWidth() override { + MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 32; + } +}; + +class LLVM_LIBRARY_VISIBILITY LoongArch64TargetInfo + : public LoongArchTargetInfo { +public: + LoongArch64TargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts) + : LoongArchTargetInfo(Triple, Opts) { + LongWidth = LongAlign = PointerWidth = PointerAlign = 64; + IntMaxType = Int64Type = SignedLong; + resetDataLayout("e-m:e-p:64:64-i64:64-i128:128-n64-S128"); + // TODO: select appropriate ABI. + setABI("lp64d"); + } + + bool setABI(const std::string &Name) override { + if (Name == "lp64d" || Name == "lp64f" || Name == "lp64s") { + ABI = Name; + return true; + } + return false; + } + void setMaxAtomicWidth() override { + MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64; + } +}; +} // end namespace targets +} // end namespace clang + +#endif // LLVM_CLANG_LIB_BASIC_TARGETS_LOONGARCH_H diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 113c629bf9edca945951f5f88c83cd874a256ea7..2de87b4a840da90c638ff77f565d3bb0858d4b84 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -41,6 +41,7 @@ #include "llvm/IR/IntrinsicsARM.h" #include "llvm/IR/IntrinsicsBPF.h" #include "llvm/IR/IntrinsicsHexagon.h" +#include "llvm/IR/IntrinsicsLoongArch.h" #include "llvm/IR/IntrinsicsNVPTX.h" #include "llvm/IR/IntrinsicsPowerPC.h" #include "llvm/IR/IntrinsicsR600.h" @@ -5433,6 +5434,9 @@ static Value *EmitTargetArchBuiltinExpr(CodeGenFunction *CGF, case llvm::Triple::riscv32: case llvm::Triple::riscv64: return CGF->EmitRISCVBuiltinExpr(BuiltinID, E, ReturnValue); + case llvm::Triple::loongarch32: + case llvm::Triple::loongarch64: + return CGF->EmitLoongArchBuiltinExpr(BuiltinID, E); default: return nullptr; } @@ -19407,3 +19411,129 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID, llvm::Function *F = CGM.getIntrinsic(ID, IntrinsicTypes); return Builder.CreateCall(F, Ops, ""); } + +Value *CodeGenFunction::EmitLoongArchBuiltinExpr(unsigned BuiltinID, + const CallExpr *E) { + SmallVector Ops; + + for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) + Ops.push_back(EmitScalarExpr(E->getArg(i))); + + Intrinsic::ID ID = Intrinsic::not_intrinsic; + + switch (BuiltinID) { + default: + llvm_unreachable("unexpected builtin ID."); + case LoongArch::BI__builtin_loongarch_cacop_d: + ID = Intrinsic::loongarch_cacop_d; + break; + case LoongArch::BI__builtin_loongarch_cacop_w: + ID = Intrinsic::loongarch_cacop_w; + break; + case LoongArch::BI__builtin_loongarch_dbar: + ID = Intrinsic::loongarch_dbar; + break; + case LoongArch::BI__builtin_loongarch_break: + ID = Intrinsic::loongarch_break; + break; + case LoongArch::BI__builtin_loongarch_ibar: + ID = Intrinsic::loongarch_ibar; + break; + case LoongArch::BI__builtin_loongarch_movfcsr2gr: + ID = Intrinsic::loongarch_movfcsr2gr; + break; + case LoongArch::BI__builtin_loongarch_movgr2fcsr: + ID = Intrinsic::loongarch_movgr2fcsr; + break; + case LoongArch::BI__builtin_loongarch_syscall: + ID = Intrinsic::loongarch_syscall; + break; + case LoongArch::BI__builtin_loongarch_crc_w_b_w: + ID = Intrinsic::loongarch_crc_w_b_w; + break; + case LoongArch::BI__builtin_loongarch_crc_w_h_w: + ID = Intrinsic::loongarch_crc_w_h_w; + break; + case LoongArch::BI__builtin_loongarch_crc_w_w_w: + ID = Intrinsic::loongarch_crc_w_w_w; + break; + case LoongArch::BI__builtin_loongarch_crc_w_d_w: + ID = Intrinsic::loongarch_crc_w_d_w; + break; + case LoongArch::BI__builtin_loongarch_crcc_w_b_w: + ID = Intrinsic::loongarch_crcc_w_b_w; + break; + case LoongArch::BI__builtin_loongarch_crcc_w_h_w: + ID = Intrinsic::loongarch_crcc_w_h_w; + break; + case LoongArch::BI__builtin_loongarch_crcc_w_w_w: + ID = Intrinsic::loongarch_crcc_w_w_w; + break; + case LoongArch::BI__builtin_loongarch_crcc_w_d_w: + ID = Intrinsic::loongarch_crcc_w_d_w; + break; + case LoongArch::BI__builtin_loongarch_csrrd_w: + ID = Intrinsic::loongarch_csrrd_w; + break; + case LoongArch::BI__builtin_loongarch_csrwr_w: + ID = Intrinsic::loongarch_csrwr_w; + break; + case LoongArch::BI__builtin_loongarch_csrxchg_w: + ID = Intrinsic::loongarch_csrxchg_w; + break; + case LoongArch::BI__builtin_loongarch_csrrd_d: + ID = Intrinsic::loongarch_csrrd_d; + break; + case LoongArch::BI__builtin_loongarch_csrwr_d: + ID = Intrinsic::loongarch_csrwr_d; + break; + case LoongArch::BI__builtin_loongarch_csrxchg_d: + ID = Intrinsic::loongarch_csrxchg_d; + break; + case LoongArch::BI__builtin_loongarch_iocsrrd_b: + ID = Intrinsic::loongarch_iocsrrd_b; + break; + case LoongArch::BI__builtin_loongarch_iocsrrd_h: + ID = Intrinsic::loongarch_iocsrrd_h; + break; + case LoongArch::BI__builtin_loongarch_iocsrrd_w: + ID = Intrinsic::loongarch_iocsrrd_w; + break; + case LoongArch::BI__builtin_loongarch_iocsrrd_d: + ID = Intrinsic::loongarch_iocsrrd_d; + break; + case LoongArch::BI__builtin_loongarch_iocsrwr_b: + ID = Intrinsic::loongarch_iocsrwr_b; + break; + case LoongArch::BI__builtin_loongarch_iocsrwr_h: + ID = Intrinsic::loongarch_iocsrwr_h; + break; + case LoongArch::BI__builtin_loongarch_iocsrwr_w: + ID = Intrinsic::loongarch_iocsrwr_w; + break; + case LoongArch::BI__builtin_loongarch_iocsrwr_d: + ID = Intrinsic::loongarch_iocsrwr_d; + break; + case LoongArch::BI__builtin_loongarch_cpucfg: + ID = Intrinsic::loongarch_cpucfg; + break; + case LoongArch::BI__builtin_loongarch_asrtle_d: + ID = Intrinsic::loongarch_asrtle_d; + break; + case LoongArch::BI__builtin_loongarch_asrtgt_d: + ID = Intrinsic::loongarch_asrtgt_d; + break; + case LoongArch::BI__builtin_loongarch_lddir_d: + ID = Intrinsic::loongarch_lddir_d; + break; + case LoongArch::BI__builtin_loongarch_ldpte_d: + ID = Intrinsic::loongarch_ldpte_d; + break; + // TODO: Support more Intrinsics. + } + + assert(ID != Intrinsic::not_intrinsic); + + llvm::Function *F = CGM.getIntrinsic(ID); + return Builder.CreateCall(F, Ops); +} diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 672acd844525ba423cbf13d07de5f0873941c790..f9096d05ecde83d572c4ca386b7c288d982baa23 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -4252,6 +4252,7 @@ public: llvm::Value *EmitHexagonBuiltinExpr(unsigned BuiltinID, const CallExpr *E); llvm::Value *EmitRISCVBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue); + llvm::Value *EmitLoongArchBuiltinExpr(unsigned BuiltinID, const CallExpr *E); bool ProcessOrderScopeAMDGCN(llvm::Value *Order, llvm::Value *Scope, llvm::AtomicOrdering &AO, llvm::SyncScope::ID &SSID); diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp index 2de773e38519ef46ba3cdb81a0f0e0be7b914d7e..dabbd516b820e14269c9fdb153445174d7f7453c 100644 --- a/clang/lib/CodeGen/TargetInfo.cpp +++ b/clang/lib/CodeGen/TargetInfo.cpp @@ -539,12 +539,13 @@ TargetCodeGenInfo::getLLVMSyncScopeID(const LangOptions &LangOpts, return Ctx.getOrInsertSyncScopeID(""); /* default sync scope */ } -static bool isEmptyRecord(ASTContext &Context, QualType T, bool AllowArrays); +static bool isEmptyRecord(ASTContext &Context, QualType T, bool AllowArrays, + bool AsIfNoUniqueAddr = false); /// isEmptyField - Return true iff a the field is "empty", that is it /// is an unnamed bit-field or an (array of) empty record(s). static bool isEmptyField(ASTContext &Context, const FieldDecl *FD, - bool AllowArrays) { + bool AllowArrays, bool AsIfNoUniqueAddr = false) { if (FD->isUnnamedBitfield()) return true; @@ -578,16 +579,19 @@ static bool isEmptyField(ASTContext &Context, const FieldDecl *FD, // not arrays of records, so we must also check whether we stripped off an // array type above. if (isa(RT->getDecl()) && - (WasArray || !FD->hasAttr())) + (WasArray || (!AsIfNoUniqueAddr && !FD->hasAttr()))) return false; - return isEmptyRecord(Context, FT, AllowArrays); + return isEmptyRecord(Context, FT, AllowArrays, AsIfNoUniqueAddr); } /// isEmptyRecord - Return true iff a structure contains only empty /// fields. Note that a structure with a flexible array member is not -/// considered empty. -static bool isEmptyRecord(ASTContext &Context, QualType T, bool AllowArrays) { +/// considered empty. If AsIfNoUniqueAddr is true, then C++ record fields are +/// considered empty if the [[no_unique_address]] attribute would have made +/// them empty. +static bool isEmptyRecord(ASTContext &Context, QualType T, bool AllowArrays, + bool AsIfNoUniqueAddr) { const RecordType *RT = T->getAs(); if (!RT) return false; @@ -598,11 +602,11 @@ static bool isEmptyRecord(ASTContext &Context, QualType T, bool AllowArrays) { // If this is a C++ record, check the bases first. if (const CXXRecordDecl *CXXRD = dyn_cast(RD)) for (const auto &I : CXXRD->bases()) - if (!isEmptyRecord(Context, I.getType(), true)) + if (!isEmptyRecord(Context, I.getType(), true, AsIfNoUniqueAddr)) return false; for (const auto *I : RD->fields()) - if (!isEmptyField(Context, I, AllowArrays)) + if (!isEmptyField(Context, I, AllowArrays, AsIfNoUniqueAddr)) return false; return true; } @@ -11641,6 +11645,447 @@ public: }; } // end anonymous namespace +// LoongArch ABI Implementation. Documented at +// https://loongson.github.io/LoongArch-Documentation/LoongArch-ELF-ABI-EN.html +// +//===----------------------------------------------------------------------===// + +namespace { +class LoongArchABIInfo : public DefaultABIInfo { +private: + // Size of the integer ('r') registers in bits. + unsigned GRLen; + // Size of the floating point ('f') registers in bits. + unsigned FRLen; + // Number of general-purpose argument registers. + static const int NumGARs = 8; + // Number of floating-point argument registers. + static const int NumFARs = 8; + bool detectFARsEligibleStructHelper(QualType Ty, CharUnits CurOff, + llvm::Type *&Field1Ty, + CharUnits &Field1Off, + llvm::Type *&Field2Ty, + CharUnits &Field2Off) const; + +public: + LoongArchABIInfo(CodeGen::CodeGenTypes &CGT, unsigned GRLen, unsigned FRLen) + : DefaultABIInfo(CGT), GRLen(GRLen), FRLen(FRLen) {} + + void computeInfo(CGFunctionInfo &FI) const override; + + ABIArgInfo classifyArgumentType(QualType Ty, bool IsFixed, int &GARsLeft, + int &FARsLeft) const; + ABIArgInfo classifyReturnType(QualType RetTy) const; + + Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, + QualType Ty) const override; + + ABIArgInfo extendType(QualType Ty) const; + + bool detectFARsEligibleStruct(QualType Ty, llvm::Type *&Field1Ty, + CharUnits &Field1Off, llvm::Type *&Field2Ty, + CharUnits &Field2Off, int &NeededArgGPRs, + int &NeededArgFPRs) const; + ABIArgInfo coerceAndExpandFARsEligibleStruct(llvm::Type *Field1Ty, + CharUnits Field1Off, + llvm::Type *Field2Ty, + CharUnits Field2Off) const; +}; +} // end anonymous namespace + +void LoongArchABIInfo::computeInfo(CGFunctionInfo &FI) const { + QualType RetTy = FI.getReturnType(); + if (!getCXXABI().classifyReturnType(FI)) + FI.getReturnInfo() = classifyReturnType(RetTy); + + // IsRetIndirect is true if classifyArgumentType indicated the value should + // be passed indirect, or if the type size is a scalar greater than 2*GRLen + // and not a complex type with elements <= FRLen. e.g. fp128 is passed direct + // in LLVM IR, relying on the backend lowering code to rewrite the argument + // list and pass indirectly on LA32. + bool IsRetIndirect = FI.getReturnInfo().getKind() == ABIArgInfo::Indirect; + if (!IsRetIndirect && RetTy->isScalarType() && + getContext().getTypeSize(RetTy) > (2 * GRLen)) { + if (RetTy->isComplexType() && FRLen) { + QualType EltTy = RetTy->castAs()->getElementType(); + IsRetIndirect = getContext().getTypeSize(EltTy) > FRLen; + } else { + // This is a normal scalar > 2*GRLen, such as fp128 on LA32. + IsRetIndirect = true; + } + } + + // We must track the number of GARs and FARs used in order to conform to the + // LoongArch ABI. As GAR usage is different for variadic arguments, we must + // also track whether we are examining a vararg or not. + int GARsLeft = IsRetIndirect ? NumGARs - 1 : NumGARs; + int FARsLeft = FRLen ? NumFARs : 0; + int NumFixedArgs = FI.getNumRequiredArgs(); + + int ArgNum = 0; + for (auto &ArgInfo : FI.arguments()) { + ArgInfo.info = classifyArgumentType( + ArgInfo.type, /*IsFixed=*/ArgNum < NumFixedArgs, GARsLeft, FARsLeft); + ArgNum++; + } +} + +// Returns true if the struct is a potential candidate to be passed in FARs (and +// GARs). If this function returns true, the caller is responsible for checking +// that if there is only a single field then that field is a float. +bool LoongArchABIInfo::detectFARsEligibleStructHelper( + QualType Ty, CharUnits CurOff, llvm::Type *&Field1Ty, CharUnits &Field1Off, + llvm::Type *&Field2Ty, CharUnits &Field2Off) const { + bool IsInt = Ty->isIntegralOrEnumerationType(); + bool IsFloat = Ty->isRealFloatingType(); + + if (IsInt || IsFloat) { + uint64_t Size = getContext().getTypeSize(Ty); + if (IsInt && Size > GRLen) + return false; + // Can't be eligible if larger than the FP registers. Half precision isn't + // currently supported on LoongArch and the ABI hasn't been confirmed, so + // default to the integer ABI in that case. + if (IsFloat && (Size > FRLen || Size < 32)) + return false; + // Can't be eligible if an integer type was already found (int+int pairs + // are not eligible). + if (IsInt && Field1Ty && Field1Ty->isIntegerTy()) + return false; + if (!Field1Ty) { + Field1Ty = CGT.ConvertType(Ty); + Field1Off = CurOff; + return true; + } + if (!Field2Ty) { + Field2Ty = CGT.ConvertType(Ty); + Field2Off = CurOff; + return true; + } + return false; + } + + if (auto CTy = Ty->getAs()) { + if (Field1Ty) + return false; + QualType EltTy = CTy->getElementType(); + if (getContext().getTypeSize(EltTy) > FRLen) + return false; + Field1Ty = CGT.ConvertType(EltTy); + Field1Off = CurOff; + Field2Ty = Field1Ty; + Field2Off = Field1Off + getContext().getTypeSizeInChars(EltTy); + return true; + } + + if (const ConstantArrayType *ATy = getContext().getAsConstantArrayType(Ty)) { + uint64_t ArraySize = ATy->getSize().getZExtValue(); + QualType EltTy = ATy->getElementType(); + // Non-zero-length arrays of empty records make the struct ineligible to be + // passed via FARs in C++. + if (const auto *RTy = EltTy->getAs()) { + if (ArraySize != 0 && isa(RTy->getDecl()) && + isEmptyRecord(getContext(), EltTy, true, true)) + return false; + } + CharUnits EltSize = getContext().getTypeSizeInChars(EltTy); + for (uint64_t i = 0; i < ArraySize; ++i) { + if (!detectFARsEligibleStructHelper(EltTy, CurOff, Field1Ty, Field1Off, + Field2Ty, Field2Off)) + return false; + CurOff += EltSize; + } + return true; + } + + if (const auto *RTy = Ty->getAs()) { + // Structures with either a non-trivial destructor or a non-trivial + // copy constructor are not eligible for the FP calling convention. + if (getRecordArgABI(Ty, CGT.getCXXABI())) + return false; + if (isEmptyRecord(getContext(), Ty, true, true)) + return true; + const RecordDecl *RD = RTy->getDecl(); + // Unions aren't eligible unless they're empty (which is caught above). + if (RD->isUnion()) + return false; + const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD); + // If this is a C++ record, check the bases first. + if (const CXXRecordDecl *CXXRD = dyn_cast(RD)) { + for (const CXXBaseSpecifier &B : CXXRD->bases()) { + const auto *BDecl = + cast(B.getType()->castAs()->getDecl()); + if (!detectFARsEligibleStructHelper( + B.getType(), CurOff + Layout.getBaseClassOffset(BDecl), + Field1Ty, Field1Off, Field2Ty, Field2Off)) + return false; + } + } + for (const FieldDecl *FD : RD->fields()) { + QualType QTy = FD->getType(); + if (FD->isBitField()) { + unsigned BitWidth = FD->getBitWidthValue(getContext()); + // Zero-width bitfields are ignored. + if (BitWidth == 0) + continue; + // Allow a bitfield with a type greater than GRLen as long as the + // bitwidth is GRLen or less. + if (getContext().getTypeSize(QTy) > GRLen && BitWidth <= GRLen) { + QTy = getContext().getIntTypeForBitwidth(GRLen, false); + } + } + + if (!detectFARsEligibleStructHelper( + QTy, + CurOff + getContext().toCharUnitsFromBits( + Layout.getFieldOffset(FD->getFieldIndex())), + Field1Ty, Field1Off, Field2Ty, Field2Off)) + return false; + } + return Field1Ty != nullptr; + } + + return false; +} + +// Determine if a struct is eligible to be passed in FARs (and GARs) (i.e., when +// flattened it contains a single fp value, fp+fp, or int+fp of appropriate +// size). If so, NeededFARs and NeededGARs are incremented appropriately. +bool LoongArchABIInfo::detectFARsEligibleStruct( + QualType Ty, llvm::Type *&Field1Ty, CharUnits &Field1Off, + llvm::Type *&Field2Ty, CharUnits &Field2Off, int &NeededGARs, + int &NeededFARs) const { + Field1Ty = nullptr; + Field2Ty = nullptr; + NeededGARs = 0; + NeededFARs = 0; + if (!detectFARsEligibleStructHelper(Ty, CharUnits::Zero(), Field1Ty, + Field1Off, Field2Ty, Field2Off)) + return false; + if (!Field1Ty) + return false; + // Not really a candidate if we have a single int but no float. + if (Field1Ty && !Field2Ty && !Field1Ty->isFloatingPointTy()) + return false; + if (Field1Ty && Field1Ty->isFloatingPointTy()) + NeededFARs++; + else if (Field1Ty) + NeededGARs++; + if (Field2Ty && Field2Ty->isFloatingPointTy()) + NeededFARs++; + else if (Field2Ty) + NeededGARs++; + return true; +} + +// Call getCoerceAndExpand for the two-element flattened struct described by +// Field1Ty, Field1Off, Field2Ty, Field2Off. This method will create an +// appropriate coerceToType and unpaddedCoerceToType. +ABIArgInfo LoongArchABIInfo::coerceAndExpandFARsEligibleStruct( + llvm::Type *Field1Ty, CharUnits Field1Off, llvm::Type *Field2Ty, + CharUnits Field2Off) const { + SmallVector CoerceElts; + SmallVector UnpaddedCoerceElts; + if (!Field1Off.isZero()) + CoerceElts.push_back(llvm::ArrayType::get( + llvm::Type::getInt8Ty(getVMContext()), Field1Off.getQuantity())); + + CoerceElts.push_back(Field1Ty); + UnpaddedCoerceElts.push_back(Field1Ty); + + if (!Field2Ty) { + return ABIArgInfo::getCoerceAndExpand( + llvm::StructType::get(getVMContext(), CoerceElts, !Field1Off.isZero()), + UnpaddedCoerceElts[0]); + } + + CharUnits Field2Align = + CharUnits::fromQuantity(getDataLayout().getABITypeAlignment(Field2Ty)); + CharUnits Field1End = + Field1Off + + CharUnits::fromQuantity(getDataLayout().getTypeStoreSize(Field1Ty)); + CharUnits Field2OffNoPadNoPack = Field1End.alignTo(Field2Align); + + CharUnits Padding = CharUnits::Zero(); + if (Field2Off > Field2OffNoPadNoPack) + Padding = Field2Off - Field2OffNoPadNoPack; + else if (Field2Off != Field2Align && Field2Off > Field1End) + Padding = Field2Off - Field1End; + + bool IsPacked = !Field2Off.isMultipleOf(Field2Align); + + if (!Padding.isZero()) + CoerceElts.push_back(llvm::ArrayType::get( + llvm::Type::getInt8Ty(getVMContext()), Padding.getQuantity())); + + CoerceElts.push_back(Field2Ty); + UnpaddedCoerceElts.push_back(Field2Ty); + + return ABIArgInfo::getCoerceAndExpand( + llvm::StructType::get(getVMContext(), CoerceElts, IsPacked), + llvm::StructType::get(getVMContext(), UnpaddedCoerceElts, IsPacked)); +} + +ABIArgInfo LoongArchABIInfo::classifyArgumentType(QualType Ty, bool IsFixed, + int &GARsLeft, + int &FARsLeft) const { + assert(GARsLeft <= NumGARs && "GAR tracking underflow"); + Ty = useFirstFieldIfTransparentUnion(Ty); + + // Structures with either a non-trivial destructor or a non-trivial + // copy constructor are always passed indirectly. + if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) { + if (GARsLeft) + GARsLeft -= 1; + return getNaturalAlignIndirect(Ty, /*ByVal=*/RAA == + CGCXXABI::RAA_DirectInMemory); + } + + // Ignore empty structs/unions. + if (isEmptyRecord(getContext(), Ty, true)) + return ABIArgInfo::getIgnore(); + + uint64_t Size = getContext().getTypeSize(Ty); + + // Pass floating point values via FARs if possible. + if (IsFixed && Ty->isFloatingType() && !Ty->isComplexType() && + FRLen >= Size && FARsLeft) { + FARsLeft--; + return ABIArgInfo::getDirect(); + } + + // Complex types for the *f or *d ABI must be passed directly rather than + // using CoerceAndExpand. + if (IsFixed && Ty->isComplexType() && FRLen && FARsLeft >= 2) { + QualType EltTy = Ty->castAs()->getElementType(); + if (getContext().getTypeSize(EltTy) <= FRLen) { + FARsLeft -= 2; + return ABIArgInfo::getDirect(); + } + } + + if (IsFixed && FRLen && Ty->isStructureOrClassType()) { + llvm::Type *Field1Ty = nullptr; + llvm::Type *Field2Ty = nullptr; + CharUnits Field1Off = CharUnits::Zero(); + CharUnits Field2Off = CharUnits::Zero(); + int NeededGARs = 0; + int NeededFARs = 0; + bool IsCandidate = detectFARsEligibleStruct( + Ty, Field1Ty, Field1Off, Field2Ty, Field2Off, NeededGARs, NeededFARs); + if (IsCandidate && NeededGARs <= GARsLeft && NeededFARs <= FARsLeft) { + GARsLeft -= NeededGARs; + FARsLeft -= NeededFARs; + return coerceAndExpandFARsEligibleStruct(Field1Ty, Field1Off, Field2Ty, + Field2Off); + } + } + + uint64_t NeededAlign = getContext().getTypeAlign(Ty); + // Determine the number of GARs needed to pass the current argument + // according to the ABI. 2*GRLen-aligned varargs are passed in "aligned" + // register pairs, so may consume 3 registers. + int NeededGARs = 1; + if (!IsFixed && NeededAlign == 2 * GRLen) + NeededGARs = 2 + (GARsLeft % 2); + else if (Size > GRLen && Size <= 2 * GRLen) + NeededGARs = 2; + + if (NeededGARs > GARsLeft) + NeededGARs = GARsLeft; + + GARsLeft -= NeededGARs; + + if (!isAggregateTypeForABI(Ty) && !Ty->isVectorType()) { + // Treat an enum type as its underlying type. + if (const EnumType *EnumTy = Ty->getAs()) + Ty = EnumTy->getDecl()->getIntegerType(); + + // All integral types are promoted to GRLen width. + if (Size < GRLen && Ty->isIntegralOrEnumerationType()) + return extendType(Ty); + + if (const auto *EIT = Ty->getAs()) { + if (EIT->getNumBits() < GRLen) + return extendType(Ty); + if (EIT->getNumBits() > 128 || + (!getContext().getTargetInfo().hasInt128Type() && + EIT->getNumBits() > 64)) + return getNaturalAlignIndirect(Ty, /*ByVal=*/false); + } + + return ABIArgInfo::getDirect(); + } + + // Aggregates which are <= 2*GRLen will be passed in registers if possible, + // so coerce to integers. + if (Size <= 2 * GRLen) { + // Use a single GRLen int if possible, 2*GRLen if 2*GRLen alignment is + // required, and a 2-element GRLen array if only GRLen alignment is + // required. + if (Size <= GRLen) { + return ABIArgInfo::getDirect( + llvm::IntegerType::get(getVMContext(), GRLen)); + } + if (getContext().getTypeAlign(Ty) == 2 * GRLen) { + return ABIArgInfo::getDirect( + llvm::IntegerType::get(getVMContext(), 2 * GRLen)); + } + return ABIArgInfo::getDirect( + llvm::ArrayType::get(llvm::IntegerType::get(getVMContext(), GRLen), 2)); + } + return getNaturalAlignIndirect(Ty, /*ByVal=*/false); +} + +ABIArgInfo LoongArchABIInfo::classifyReturnType(QualType RetTy) const { + if (RetTy->isVoidType()) + return ABIArgInfo::getIgnore(); + // The rules for return and argument types are the same, so defer to + // classifyArgumentType. + int GARsLeft = 2; + int FARsLeft = FRLen ? 2 : 0; + return classifyArgumentType(RetTy, /*IsFixed=*/true, GARsLeft, FARsLeft); +} + +Address LoongArchABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, + QualType Ty) const { + CharUnits SlotSize = CharUnits::fromQuantity(GRLen / 8); + + // Empty records are ignored for parameter passing purposes. + if (isEmptyRecord(getContext(), Ty, true)) { + Address Addr = Address(CGF.Builder.CreateLoad(VAListAddr), + getVAListElementType(CGF), SlotSize); + Addr = CGF.Builder.CreateElementBitCast(Addr, CGF.ConvertTypeForMem(Ty)); + return Addr; + } + + auto TInfo = getContext().getTypeInfoInChars(Ty); + + // Arguments bigger than 2*GRLen bytes are passed indirectly. + return emitVoidPtrVAArg(CGF, VAListAddr, Ty, + /*IsIndirect=*/TInfo.Width > 2 * SlotSize, TInfo, + SlotSize, + /*AllowHigherAlign=*/true); +} + +ABIArgInfo LoongArchABIInfo::extendType(QualType Ty) const { + int TySize = getContext().getTypeSize(Ty); + // LA64 ABI requires unsigned 32 bit integers to be sign extended. + if (GRLen == 64 && Ty->isUnsignedIntegerOrEnumerationType() && TySize == 32) + return ABIArgInfo::getSignExtend(Ty); + return ABIArgInfo::getExtend(Ty); +} + +namespace { +class LoongArchTargetCodeGenInfo : public TargetCodeGenInfo { +public: + LoongArchTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, unsigned GRLen, + unsigned FRLen) + : TargetCodeGenInfo( + std::make_unique(CGT, GRLen, FRLen)) {} +}; +} // namespace + //===----------------------------------------------------------------------===// // Driver code //===----------------------------------------------------------------------===// @@ -11874,6 +12319,17 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() { : hasFP64 ? 64 : 32)); } + case llvm::Triple::loongarch32: + case llvm::Triple::loongarch64: { + StringRef ABIStr = getTarget().getABI(); + unsigned ABIFRLen = 0; + if (ABIStr.endswith("f")) + ABIFRLen = 32; + else if (ABIStr.endswith("d")) + ABIFRLen = 64; + return SetCGInfo(new LoongArchTargetCodeGenInfo( + Types, getTarget().getPointerWidth(0), ABIFRLen)); + } } } diff --git a/clang/lib/Driver/CMakeLists.txt b/clang/lib/Driver/CMakeLists.txt index 00da647e4c66f6360c6d69cc4fdd3d28c2a6329e..5bbfcd0b34dfe2606c25db2ef0fe208e5df7ca6f 100644 --- a/clang/lib/Driver/CMakeLists.txt +++ b/clang/lib/Driver/CMakeLists.txt @@ -28,6 +28,7 @@ add_clang_library(clangDriver ToolChains/Arch/AArch64.cpp ToolChains/Arch/ARM.cpp ToolChains/Arch/CSKY.cpp + ToolChains/Arch/LoongArch.cpp ToolChains/Arch/M68k.cpp ToolChains/Arch/Mips.cpp ToolChains/Arch/PPC.cpp diff --git a/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp b/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp new file mode 100644 index 0000000000000000000000000000000000000000..9b7877c88fbff14a3c25333d9d95485dddd50596 --- /dev/null +++ b/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp @@ -0,0 +1,196 @@ +//===--- LoongArch.cpp - LoongArch Helpers for Tools ------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "LoongArch.h" +#include "ToolChains/CommonArgs.h" +#include "clang/Basic/DiagnosticDriver.h" +#include "clang/Driver/Driver.h" +#include "clang/Driver/DriverDiagnostic.h" +#include "clang/Driver/Options.h" +#include "llvm/Support/Host.h" +#include "llvm/Support/LoongArchTargetParser.h" + +using namespace clang::driver; +using namespace clang::driver::tools; +using namespace clang; +using namespace llvm::opt; + +StringRef loongarch::getLoongArchABI(const Driver &D, const ArgList &Args, + const llvm::Triple &Triple) { + assert((Triple.getArch() == llvm::Triple::loongarch32 || + Triple.getArch() == llvm::Triple::loongarch64) && + "Unexpected triple"); + bool IsLA32 = Triple.getArch() == llvm::Triple::loongarch32; + + // Record -mabi value for later use. + const Arg *MABIArg = Args.getLastArg(options::OPT_mabi_EQ); + StringRef MABIValue; + if (MABIArg) { + MABIValue = MABIArg->getValue(); + } + + // Parse -mfpu value for later use. + const Arg *MFPUArg = Args.getLastArg(options::OPT_mfpu_EQ); + int FPU = -1; + if (MFPUArg) { + StringRef V = MFPUArg->getValue(); + if (V == "64") + FPU = 64; + else if (V == "32") + FPU = 32; + else if (V == "0" || V == "none") + FPU = 0; + else + D.Diag(diag::err_drv_loongarch_invalid_mfpu_EQ) << V; + } + + // Check -m*-float firstly since they have highest priority. + if (const Arg *A = Args.getLastArg(options::OPT_mdouble_float, + options::OPT_msingle_float, + options::OPT_msoft_float)) { + StringRef ImpliedABI; + int ImpliedFPU = -1; + if (A->getOption().matches(options::OPT_mdouble_float)) { + ImpliedABI = IsLA32 ? "ilp32d" : "lp64d"; + ImpliedFPU = 64; + } + if (A->getOption().matches(options::OPT_msingle_float)) { + ImpliedABI = IsLA32 ? "ilp32f" : "lp64f"; + ImpliedFPU = 32; + } + if (A->getOption().matches(options::OPT_msoft_float)) { + ImpliedABI = IsLA32 ? "ilp32s" : "lp64s"; + ImpliedFPU = 0; + } + + // Check `-mabi=` and `-mfpu=` settings and report if they conflict with + // the higher-priority settings implied by -m*-float. + // + // ImpliedABI and ImpliedFPU are guaranteed to have valid values because + // one of the match arms must match if execution can arrive here at all. + if (!MABIValue.empty() && ImpliedABI != MABIValue) + D.Diag(diag::warn_drv_loongarch_conflicting_implied_val) + << MABIArg->getAsString(Args) << A->getAsString(Args) << ImpliedABI; + + if (FPU != -1 && ImpliedFPU != FPU) + D.Diag(diag::warn_drv_loongarch_conflicting_implied_val) + << MFPUArg->getAsString(Args) << A->getAsString(Args) << ImpliedFPU; + + return ImpliedABI; + } + + // If `-mabi=` is specified, use it. + if (!MABIValue.empty()) + return MABIValue; + + // Select abi based on -mfpu=xx. + switch (FPU) { + case 64: + return IsLA32 ? "ilp32d" : "lp64d"; + case 32: + return IsLA32 ? "ilp32f" : "lp64f"; + case 0: + return IsLA32 ? "ilp32s" : "lp64s"; + } + + // Choose a default based on the triple. + // Honor the explicit ABI modifier suffix in triple's environment part if + // present, falling back to {ILP32,LP64}D otherwise. + switch (Triple.getEnvironment()) { + case llvm::Triple::GNUSF: + return IsLA32 ? "ilp32s" : "lp64s"; + case llvm::Triple::GNUF32: + return IsLA32 ? "ilp32f" : "lp64f"; + case llvm::Triple::GNUF64: + // This was originally permitted (and indeed the canonical way) to + // represent the {ILP32,LP64}D ABIs, but in Feb 2023 Loongson decided to + // drop the explicit suffix in favor of unmarked `-gnu` for the + // "general-purpose" ABIs, among other non-technical reasons. + // + // The spec change did not mention whether existing usages of "gnuf64" + // shall remain valid or not, so we are going to continue recognizing it + // for some time, until it is clear that everyone else has migrated away + // from it. + [[fallthrough]]; + case llvm::Triple::GNU: + default: + return IsLA32 ? "ilp32d" : "lp64d"; + } +} + +void loongarch::getLoongArchTargetFeatures(const Driver &D, + const llvm::Triple &Triple, + const ArgList &Args, + std::vector &Features) { + std::string ArchName; + if (const Arg *A = Args.getLastArg(options::OPT_march_EQ)) + ArchName = A->getValue(); + ArchName = postProcessTargetCPUString(ArchName, Triple); + llvm::LoongArch::getArchFeatures(ArchName, Features); + + // Select floating-point features determined by -mdouble-float, + // -msingle-float, -msoft-float and -mfpu. + // Note: -m*-float wins any other options. + if (const Arg *A = Args.getLastArg(options::OPT_mdouble_float, + options::OPT_msingle_float, + options::OPT_msoft_float)) { + if (A->getOption().matches(options::OPT_mdouble_float)) { + Features.push_back("+f"); + Features.push_back("+d"); + } else if (A->getOption().matches(options::OPT_msingle_float)) { + Features.push_back("+f"); + Features.push_back("-d"); + } else /*Soft-float*/ { + Features.push_back("-f"); + Features.push_back("-d"); + } + } else if (const Arg *A = Args.getLastArg(options::OPT_mfpu_EQ)) { + StringRef FPU = A->getValue(); + if (FPU == "64") { + Features.push_back("+f"); + Features.push_back("+d"); + } else if (FPU == "32") { + Features.push_back("+f"); + Features.push_back("-d"); + } else if (FPU == "0" || FPU == "none") { + Features.push_back("-f"); + Features.push_back("-d"); + } else { + D.Diag(diag::err_drv_loongarch_invalid_mfpu_EQ) << FPU; + } + } + + // Select the `ual` feature determined by -m[no-]unaligned-access + // or the alias -m[no-]strict-align. + AddTargetFeature(Args, Features, options::OPT_munaligned_access, + options::OPT_mno_unaligned_access, "ual"); +} + +std::string loongarch::postProcessTargetCPUString(const std::string &CPU, + const llvm::Triple &Triple) { + std::string CPUString = CPU; + if (CPUString == "native") { + CPUString = std::string(llvm::sys::getHostCPUName()); + if (CPUString == "generic") + CPUString = + std::string(llvm::LoongArch::getDefaultArch(Triple.isLoongArch64())); + } + if (CPUString.empty()) + CPUString = + std::string(llvm::LoongArch::getDefaultArch(Triple.isLoongArch64())); + return CPUString; +} + +std::string loongarch::getLoongArchTargetCPU(const llvm::opt::ArgList &Args, + const llvm::Triple &Triple) { + std::string CPU; + // If we have -march, use that. + if (const Arg *A = Args.getLastArg(options::OPT_march_EQ)) + CPU = A->getValue(); + return postProcessTargetCPUString(CPU, Triple); +} diff --git a/clang/lib/Driver/ToolChains/Arch/LoongArch.h b/clang/lib/Driver/ToolChains/Arch/LoongArch.h new file mode 100644 index 0000000000000000000000000000000000000000..d8280cd836f8183dd861039086d5932bcf79046e --- /dev/null +++ b/clang/lib/Driver/ToolChains/Arch/LoongArch.h @@ -0,0 +1,37 @@ +//===--- LoongArch.h - LoongArch-specific Tool Helpers ----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_ARCH_LOONGARCH_H +#define LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_ARCH_LOONGARCH_H + +#include "clang/Driver/Driver.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Option/Option.h" + +namespace clang { +namespace driver { +namespace tools { +namespace loongarch { +void getLoongArchTargetFeatures(const Driver &D, const llvm::Triple &Triple, + const llvm::opt::ArgList &Args, + std::vector &Features); + +StringRef getLoongArchABI(const Driver &D, const llvm::opt::ArgList &Args, + const llvm::Triple &Triple); + +std::string postProcessTargetCPUString(const std::string &CPU, + const llvm::Triple &Triple); + +std::string getLoongArchTargetCPU(const llvm::opt::ArgList &Args, + const llvm::Triple &Triple); +} // end namespace loongarch +} // end namespace tools +} // end namespace driver +} // end namespace clang + +#endif // LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_ARCH_LOONGARCH_H diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index ba06b28d9661af0b84cd46a56553ce754e93e9a5..dcd86ddf1fa1a7ef7e6abb5d05ccc26cb28821b6 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -11,6 +11,7 @@ #include "Arch/AArch64.h" #include "Arch/ARM.h" #include "Arch/CSKY.h" +#include "Arch/LoongArch.h" #include "Arch/M68k.h" #include "Arch/Mips.h" #include "Arch/PPC.h" @@ -48,6 +49,7 @@ #include "llvm/Support/Compression.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Host.h" +#include "llvm/Support/LoongArchTargetParser.h" #include "llvm/Support/Path.h" #include "llvm/Support/Process.h" #include "llvm/Support/TargetParser.h" @@ -371,6 +373,10 @@ static void getTargetFeatures(const Driver &D, const llvm::Triple &Triple, case llvm::Triple::csky: csky::getCSKYTargetFeatures(D, Triple, Args, CmdArgs, Features); break; + case llvm::Triple::loongarch32: + case llvm::Triple::loongarch64: + loongarch::getLoongArchTargetFeatures(D, Triple, Args, Features); + break; } for (auto Feature : unifyTargetFeatures(Features)) { @@ -536,6 +542,8 @@ static bool useFramePointerForTargetByDefault(const ArgList &Args, case llvm::Triple::amdgcn: case llvm::Triple::r600: case llvm::Triple::csky: + case llvm::Triple::loongarch32: + case llvm::Triple::loongarch64: return !areOptimizationsEnabled(Args); default: break; @@ -1794,6 +1802,11 @@ void Clang::RenderTargetOptions(const llvm::Triple &EffectiveTriple, CmdArgs.push_back("-fallow-half-arguments-and-returns"); break; + case llvm::Triple::loongarch32: + case llvm::Triple::loongarch64: + AddLoongArchTargetArgs(Args, CmdArgs); + break; + case llvm::Triple::mips: case llvm::Triple::mipsel: case llvm::Triple::mips64: @@ -1933,6 +1946,24 @@ void Clang::AddAArch64TargetArgs(const ArgList &Args, AddUnalignedAccessWarning(CmdArgs); } +void Clang::AddLoongArchTargetArgs(const ArgList &Args, + ArgStringList &CmdArgs) const { + const llvm::Triple &Triple = getToolChain().getTriple(); + + CmdArgs.push_back("-target-abi"); + CmdArgs.push_back( + loongarch::getLoongArchABI(getToolChain().getDriver(), Args, Triple) + .data()); + + // Handle -mtune. + if (const Arg *A = Args.getLastArg(options::OPT_mtune_EQ)) { + std::string TuneCPU = A->getValue(); + TuneCPU = loongarch::postProcessTargetCPUString(TuneCPU, Triple); + CmdArgs.push_back("-tune-cpu"); + CmdArgs.push_back(Args.MakeArgString(TuneCPU)); + } +} + void Clang::AddMIPSTargetArgs(const ArgList &Args, ArgStringList &CmdArgs) const { const Driver &D = getToolChain().getDriver(); @@ -6202,7 +6233,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, if (Arg *A = Args.getLastArg(options::OPT_fpatchable_function_entry_EQ)) { StringRef S0 = A->getValue(), S = S0; unsigned Size, Offset = 0; - if (!Triple.isAArch64() && !Triple.isRISCV() && !Triple.isX86()) + if (!Triple.isAArch64() && !Triple.isLoongArch() && !Triple.isRISCV() && + !Triple.isX86()) D.Diag(diag::err_drv_unsupported_opt_for_target) << A->getAsString(Args) << TripleStr; else if (S.consumeInteger(10, Size) || @@ -7877,6 +7909,14 @@ void ClangAs::AddX86TargetArgs(const ArgList &Args, } } +void ClangAs::AddLoongArchTargetArgs(const ArgList &Args, + ArgStringList &CmdArgs) const { + CmdArgs.push_back("-target-abi"); + CmdArgs.push_back(loongarch::getLoongArchABI(getToolChain().getDriver(), Args, + getToolChain().getTriple()) + .data()); +} + void ClangAs::AddRISCVTargetArgs(const ArgList &Args, ArgStringList &CmdArgs) const { const llvm::Triple &Triple = getToolChain().getTriple(); @@ -8078,6 +8118,11 @@ void ClangAs::ConstructJob(Compilation &C, const JobAction &JA, } break; + case llvm::Triple::loongarch32: + case llvm::Triple::loongarch64: + AddLoongArchTargetArgs(Args, CmdArgs); + break; + case llvm::Triple::riscv32: case llvm::Triple::riscv64: AddRISCVTargetArgs(Args, CmdArgs); diff --git a/clang/lib/Driver/ToolChains/Clang.h b/clang/lib/Driver/ToolChains/Clang.h index 5209c6687599b490f5ac357025cd305fc28d5af9..e28012af1fda09fa9bd2d95519395a0de5d6b630 100644 --- a/clang/lib/Driver/ToolChains/Clang.h +++ b/clang/lib/Driver/ToolChains/Clang.h @@ -57,6 +57,8 @@ private: bool KernelOrKext) const; void AddARM64TargetArgs(const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CmdArgs) const; + void AddLoongArchTargetArgs(const llvm::opt::ArgList &Args, + llvm::opt::ArgStringList &CmdArgs) const; void AddMIPSTargetArgs(const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CmdArgs) const; void AddPPCTargetArgs(const llvm::opt::ArgList &Args, @@ -123,6 +125,8 @@ class LLVM_LIBRARY_VISIBILITY ClangAs : public Tool { public: ClangAs(const ToolChain &TC) : Tool("clang::as", "clang integrated assembler", TC) {} + void AddLoongArchTargetArgs(const llvm::opt::ArgList &Args, + llvm::opt::ArgStringList &CmdArgs) const; void AddMIPSTargetArgs(const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CmdArgs) const; void AddX86TargetArgs(const llvm::opt::ArgList &Args, diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index 6e7fccf898715cfc12afd4ba97b8fe31447d5bf2..152139369ef09280af9e2d4d2f089d812efb84be 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -9,6 +9,7 @@ #include "CommonArgs.h" #include "Arch/AArch64.h" #include "Arch/ARM.h" +#include "Arch/LoongArch.h" #include "Arch/M68k.h" #include "Arch/Mips.h" #include "Arch/PPC.h" @@ -469,6 +470,10 @@ std::string tools::getCPUName(const Driver &D, const ArgList &Args, case llvm::Triple::wasm32: case llvm::Triple::wasm64: return std::string(getWebAssemblyTargetCPU(Args)); + + case llvm::Triple::loongarch32: + case llvm::Triple::loongarch64: + return loongarch::getLoongArchTargetCPU(Args, T); } } diff --git a/clang/lib/Driver/ToolChains/Gnu.cpp b/clang/lib/Driver/ToolChains/Gnu.cpp index e320fee08547666e8ff94f9de3a45ccbd014e253..0c57b4187f00fc1d314019a5a62da141c98dd347 100644 --- a/clang/lib/Driver/ToolChains/Gnu.cpp +++ b/clang/lib/Driver/ToolChains/Gnu.cpp @@ -9,6 +9,7 @@ #include "Gnu.h" #include "Arch/ARM.h" #include "Arch/CSKY.h" +#include "Arch/LoongArch.h" #include "Arch/Mips.h" #include "Arch/PPC.h" #include "Arch/RISCV.h" @@ -278,6 +279,10 @@ static const char *getLDMOption(const llvm::Triple &T, const ArgList &Args) { return "elf32_sparc"; case llvm::Triple::sparcv9: return "elf64_sparc"; + case llvm::Triple::loongarch32: + return "elf32loongarch"; + case llvm::Triple::loongarch64: + return "elf64loongarch"; case llvm::Triple::mips: return "elf32btsmip"; case llvm::Triple::mipsel: @@ -858,6 +863,13 @@ void tools::gnutools::Assembler::ConstructJob(Compilation &C, break; } + // TODO: handle loongarch32. + case llvm::Triple::loongarch64: { + StringRef ABIName = + loongarch::getLoongArchABI(D, Args, getToolChain().getTriple()); + CmdArgs.push_back(Args.MakeArgString("-mabi=" + ABIName)); + break; + } case llvm::Triple::mips: case llvm::Triple::mipsel: case llvm::Triple::mips64: @@ -2221,6 +2233,11 @@ void Generic_GCC::GCCInstallationDetector::AddDefaultGCCPrefixes( "i586-suse-linux", "i686-montavista-linux", "i686-gnu", }; + static const char *const LoongArch64LibDirs[] = {"/lib64", "/lib"}; + static const char *const LoongArch64Triples[] = { + "loongarch64-linux-gnu", "loongarch64-unknown-linux-gnu", + "loongarch64-linux-ohos"}; + static const char *const M68kLibDirs[] = {"/lib"}; static const char *const M68kTriples[] = { "m68k-linux-gnu", "m68k-unknown-linux-gnu", "m68k-suse-linux"}; @@ -2469,6 +2486,11 @@ void Generic_GCC::GCCInstallationDetector::AddDefaultGCCPrefixes( BiarchTripleAliases.append(begin(X32Triples), end(X32Triples)); } break; + // TODO: Handle loongarch32. + case llvm::Triple::loongarch64: + LibDirs.append(begin(LoongArch64LibDirs), end(LoongArch64LibDirs)); + TripleAliases.append(begin(LoongArch64Triples), end(LoongArch64Triples)); + break; case llvm::Triple::m68k: LibDirs.append(begin(M68kLibDirs), end(M68kLibDirs)); TripleAliases.append(begin(M68kTriples), end(M68kTriples)); @@ -2856,6 +2878,8 @@ bool Generic_GCC::IsIntegratedAssemblerDefault() const { case llvm::Triple::csky: case llvm::Triple::hexagon: case llvm::Triple::lanai: + case llvm::Triple::loongarch32: + case llvm::Triple::loongarch64: case llvm::Triple::m68k: case llvm::Triple::mips: case llvm::Triple::mipsel: diff --git a/clang/lib/Driver/ToolChains/Linux.cpp b/clang/lib/Driver/ToolChains/Linux.cpp index ceb1a982c3a4ceb28575f9ffb4a962d67b77254b..e4d320a2e1d5d089680e6b46f1fc333df254574c 100644 --- a/clang/lib/Driver/ToolChains/Linux.cpp +++ b/clang/lib/Driver/ToolChains/Linux.cpp @@ -8,6 +8,7 @@ #include "Linux.h" #include "Arch/ARM.h" +#include "Arch/LoongArch.h" #include "Arch/Mips.h" #include "Arch/PPC.h" #include "Arch/RISCV.h" @@ -85,6 +86,39 @@ std::string Linux::getMultiarchTriple(const Driver &D, case llvm::Triple::aarch64_be: return "aarch64_be-linux-gnu"; + case llvm::Triple::loongarch64: { + const char *Libc; + const char *FPFlavor; + + if (TargetTriple.isGNUEnvironment()) { + Libc = "gnu"; + } else if (TargetTriple.isMusl()) { + Libc = "musl"; + } else { + return TargetTriple.str(); + } + + switch (TargetEnvironment) { + default: + return TargetTriple.str(); + case llvm::Triple::GNUSF: + FPFlavor = "sf"; + break; + case llvm::Triple::GNUF32: + FPFlavor = "f32"; + break; + case llvm::Triple::GNU: + case llvm::Triple::GNUF64: + // This was going to be "f64" in an earlier Toolchain Conventions + // revision, but starting from Feb 2023 the F64 ABI variants are + // unmarked in their canonical forms. + FPFlavor = ""; + break; + } + + return (Twine("loongarch64-linux-") + Libc + FPFlavor).str(); + } + case llvm::Triple::m68k: return "m68k-linux-gnu"; @@ -473,6 +507,22 @@ std::string Linux::getDynamicLinker(const ArgList &Args) const { Loader = HF ? "ld-linux-armhf.so.3" : "ld-linux.so.3"; break; } + case llvm::Triple::loongarch32: { + LibDir = "lib32"; + Loader = + ("ld-linux-loongarch-" + + tools::loongarch::getLoongArchABI(getDriver(), Args, Triple) + ".so.1") + .str(); + break; + } + case llvm::Triple::loongarch64: { + LibDir = "lib64"; + Loader = + ("ld-linux-loongarch-" + + tools::loongarch::getLoongArchABI(getDriver(), Args, Triple) + ".so.1") + .str(); + break; + } case llvm::Triple::m68k: LibDir = "lib"; Loader = "ld.so.1"; @@ -738,6 +788,7 @@ SanitizerMask Linux::getSupportedSanitizers() const { getTriple().getArch() == llvm::Triple::thumb || getTriple().getArch() == llvm::Triple::armeb || getTriple().getArch() == llvm::Triple::thumbeb; + const bool IsLoongArch64 = getTriple().getArch() == llvm::Triple::loongarch64; const bool IsRISCV64 = getTriple().getArch() == llvm::Triple::riscv64; const bool IsSystemZ = getTriple().getArch() == llvm::Triple::systemz; const bool IsHexagon = getTriple().getArch() == llvm::Triple::hexagon; @@ -754,16 +805,17 @@ SanitizerMask Linux::getSupportedSanitizers() const { if (IsX86_64 || IsMIPS64 || IsAArch64) Res |= SanitizerKind::DataFlow; if (IsX86_64 || IsMIPS64 || IsAArch64 || IsX86 || IsArmArch || IsPowerPC64 || - IsRISCV64 || IsSystemZ || IsHexagon) + IsRISCV64 || IsSystemZ || IsHexagon || IsLoongArch64) Res |= SanitizerKind::Leak; - if (IsX86_64 || IsMIPS64 || IsAArch64 || IsPowerPC64 || IsSystemZ) + if (IsX86_64 || IsMIPS64 || IsAArch64 || IsPowerPC64 || IsSystemZ || + IsLoongArch64) Res |= SanitizerKind::Thread; if (IsX86_64) Res |= SanitizerKind::KernelMemory; if (IsX86 || IsX86_64) Res |= SanitizerKind::Function; if (IsX86_64 || IsMIPS64 || IsAArch64 || IsX86 || IsMIPS || IsArmArch || - IsPowerPC64 || IsHexagon) + IsPowerPC64 || IsHexagon || IsLoongArch64) Res |= SanitizerKind::Scudo; if (IsX86_64 || IsAArch64) { Res |= SanitizerKind::HWAddress; diff --git a/clang/lib/Driver/ToolChains/OHOS.cpp b/clang/lib/Driver/ToolChains/OHOS.cpp index 1bbe09030f3c38c7f1a29a5a72ea77e06132d44b..a4482e38668a5ae0546d60fd633b27fbadb1b30d 100644 --- a/clang/lib/Driver/ToolChains/OHOS.cpp +++ b/clang/lib/Driver/ToolChains/OHOS.cpp @@ -122,6 +122,10 @@ std::string OHOS::getMultiarchTriple(const llvm::Triple &T) const { return "x86_64-linux-ohos"; case llvm::Triple::aarch64: return "aarch64-linux-ohos"; + // OHOS_LOCAL begin + case llvm::Triple::loongarch64: + return "loongarch64-linux-ohos"; + // OHOS_LOCAL end } return T.str(); } @@ -389,7 +393,14 @@ void OHOS::addExtraOpts(llvm::opt::ArgStringList &CmdArgs) const { CmdArgs.push_back("-z"); CmdArgs.push_back("relro"); CmdArgs.push_back("-z"); - CmdArgs.push_back("max-page-size=4096"); + // OHOS_LOCAL begin + //LoongArch need page size 16K + if (getArch() == llvm::Triple::loongarch64) { + CmdArgs.push_back("max-page-size=16384"); + } else { + CmdArgs.push_back("max-page-size=4096"); + } + // OHOS_LOCAL end // .gnu.hash section is not compatible with the MIPS target if (getArch() != llvm::Triple::mipsel) { CmdArgs.push_back("--hash-style=gnu"); @@ -405,6 +416,7 @@ void OHOS::addExtraOpts(llvm::opt::ArgStringList &CmdArgs) const { SanitizerMask OHOS::getSupportedSanitizers() const { const bool IsX86_64 = getTriple().getArch() == llvm::Triple::x86_64; const bool IsAArch64 = getTriple().getArch() == llvm::Triple::aarch64; + const bool IsLoongArch64 = getTriple().getArch() == llvm::Triple::loongarch64; SanitizerMask Res = ToolChain::getSupportedSanitizers(); Res |= SanitizerKind::Address; Res |= SanitizerKind::PointerCompare; @@ -420,7 +432,7 @@ SanitizerMask OHOS::getSupportedSanitizers() const { // OHOS_LOCAL Res |= SanitizerKind::HWAddress; // TODO: Support TSAN and HWASAN and update mask. - if (IsAArch64 || IsX86_64) + if (IsAArch64 || IsX86_64 || IsLoongArch64) Res |= SanitizerKind::Thread; return Res; } diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt index 6e2060991b921571ef14f4782ab8b8db2884a167..105f85af6171ac67c99e1155338bf862f8164f2c 100644 --- a/clang/lib/Headers/CMakeLists.txt +++ b/clang/lib/Headers/CMakeLists.txt @@ -68,6 +68,10 @@ set(hlsl_files hlsl/hlsl_intrinsics.h ) +set(loongarch_files + larchintrin.h + ) + set(mips_msa_files msa.h ) @@ -220,6 +224,7 @@ set(files ${hexagon_files} ${hip_files} ${hlsl_files} + ${loongarch_files} ${mips_msa_files} ${opencl_files} ${ppc_files} @@ -381,6 +386,7 @@ add_dependencies("clang-resource-headers" "hexagon-resource-headers" "hip-resource-headers" "hlsl-resource-headers" + "loongarch-resource-headers" "mips-resource-headers" "ppc-resource-headers" "ppc-htm-resource-headers" @@ -404,6 +410,7 @@ add_header_target("aarch64-resource-headers" "${aarch64_only_files};${aarch64_on add_header_target("cuda-resource-headers" "${cuda_files};${cuda_wrapper_files}") add_header_target("hexagon-resource-headers" "${hexagon_files}") add_header_target("hip-resource-headers" "${hip_files}") +add_header_target("loongarch-resource-headers" "${loongarch_files}") add_header_target("mips-resource-headers" "${mips_msa_files}") add_header_target("ppc-resource-headers" "${ppc_files};${ppc_wrapper_files}") add_header_target("ppc-htm-resource-headers" "${ppc_htm_files}") @@ -494,6 +501,12 @@ install( EXCLUDE_FROM_ALL COMPONENT hip-resource-headers) +install( + FILES ${loongarch_files} + DESTINATION ${header_install_dir} + EXCLUDE_FROM_ALL + COMPONENT loongarch-resource-headers) + install( FILES ${mips_msa_files} DESTINATION ${header_install_dir} diff --git a/clang/lib/Headers/larchintrin.h b/clang/lib/Headers/larchintrin.h new file mode 100644 index 0000000000000000000000000000000000000000..c5c533ee0b8c1d6e2372244fd8fc186e55a0409a --- /dev/null +++ b/clang/lib/Headers/larchintrin.h @@ -0,0 +1,234 @@ +/*===------------ larchintrin.h - LoongArch intrinsics ---------------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef _LOONGARCH_BASE_INTRIN_H +#define _LOONGARCH_BASE_INTRIN_H + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct rdtime { + unsigned int value; + unsigned int timeid; +} __rdtime_t; + +#if __loongarch_grlen == 64 +typedef struct drdtime { + unsigned long dvalue; + unsigned long dtimeid; +} __drdtime_t; + +extern __inline __drdtime_t + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + __rdtime_d(void) { + __drdtime_t __drdtime; + __asm__ volatile( + "rdtime.d %[val], %[tid]\n\t" + : [val] "=&r"(__drdtime.dvalue), [tid] "=&r"(__drdtime.dtimeid)); + return __drdtime; +} +#endif + +extern __inline __rdtime_t + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + __rdtimeh_w(void) { + __rdtime_t __rdtime; + __asm__ volatile("rdtimeh.w %[val], %[tid]\n\t" + : [val] "=&r"(__rdtime.value), [tid] "=&r"(__rdtime.timeid)); + return __rdtime; +} + +extern __inline __rdtime_t + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + __rdtimel_w(void) { + __rdtime_t __rdtime; + __asm__ volatile("rdtimel.w %[val], %[tid]\n\t" + : [val] "=&r"(__rdtime.value), [tid] "=&r"(__rdtime.timeid)); + return __rdtime; +} + +#if __loongarch_grlen == 64 +extern __inline int + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + __crc_w_b_w(char _1, int _2) { + return (int)__builtin_loongarch_crc_w_b_w((char)_1, (int)_2); +} + +extern __inline int + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + __crc_w_h_w(short _1, int _2) { + return (int)__builtin_loongarch_crc_w_h_w((short)_1, (int)_2); +} + +extern __inline int + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + __crc_w_w_w(int _1, int _2) { + return (int)__builtin_loongarch_crc_w_w_w((int)_1, (int)_2); +} + +extern __inline int + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + __crc_w_d_w(long int _1, int _2) { + return (int)__builtin_loongarch_crc_w_d_w((long int)_1, (int)_2); +} + +extern __inline int + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + __crcc_w_b_w(char _1, int _2) { + return (int)__builtin_loongarch_crcc_w_b_w((char)_1, (int)_2); +} + +extern __inline int + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + __crcc_w_h_w(short _1, int _2) { + return (int)__builtin_loongarch_crcc_w_h_w((short)_1, (int)_2); +} + +extern __inline int + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + __crcc_w_w_w(int _1, int _2) { + return (int)__builtin_loongarch_crcc_w_w_w((int)_1, (int)_2); +} + +extern __inline int + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + __crcc_w_d_w(long int _1, int _2) { + return (int)__builtin_loongarch_crcc_w_d_w((long int)_1, (int)_2); +} +#endif + +#define __break(/*ui15*/ _1) __builtin_loongarch_break((_1)) + +#if __loongarch_grlen == 32 +#define __cacop_w(/*uimm5*/ _1, /*unsigned int*/ _2, /*simm12*/ _3) \ + ((void)__builtin_loongarch_cacop_w((_1), (unsigned int)(_2), (_3))) +#endif + +#if __loongarch_grlen == 64 +#define __cacop_d(/*uimm5*/ _1, /*unsigned long int*/ _2, /*simm12*/ _3) \ + ((void)__builtin_loongarch_cacop_d((_1), (unsigned long int)(_2), (_3))) +#endif + +#define __dbar(/*ui15*/ _1) __builtin_loongarch_dbar((_1)) + +#define __ibar(/*ui15*/ _1) __builtin_loongarch_ibar((_1)) + +#define __movfcsr2gr(/*ui5*/ _1) __builtin_loongarch_movfcsr2gr((_1)); + +#define __movgr2fcsr(/*ui5*/ _1, _2) \ + __builtin_loongarch_movgr2fcsr((_1), (unsigned int)_2); + +#define __syscall(/*ui15*/ _1) __builtin_loongarch_syscall((_1)) + +#define __csrrd_w(/*ui14*/ _1) ((unsigned int)__builtin_loongarch_csrrd_w((_1))) + +#define __csrwr_w(/*unsigned int*/ _1, /*ui14*/ _2) \ + ((unsigned int)__builtin_loongarch_csrwr_w((unsigned int)(_1), (_2))) + +#define __csrxchg_w(/*unsigned int*/ _1, /*unsigned int*/ _2, /*ui14*/ _3) \ + ((unsigned int)__builtin_loongarch_csrxchg_w((unsigned int)(_1), \ + (unsigned int)(_2), (_3))) + +#if __loongarch_grlen == 64 +#define __csrrd_d(/*ui14*/ _1) \ + ((unsigned long int)__builtin_loongarch_csrrd_d((_1))) + +#define __csrwr_d(/*unsigned long int*/ _1, /*ui14*/ _2) \ + ((unsigned long int)__builtin_loongarch_csrwr_d((unsigned long int)(_1), \ + (_2))) + +#define __csrxchg_d(/*unsigned long int*/ _1, /*unsigned long int*/ _2, \ + /*ui14*/ _3) \ + ((unsigned long int)__builtin_loongarch_csrxchg_d( \ + (unsigned long int)(_1), (unsigned long int)(_2), (_3))) +#endif + +extern __inline unsigned char + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + __iocsrrd_b(unsigned int _1) { + return (unsigned char)__builtin_loongarch_iocsrrd_b((unsigned int)_1); +} + +extern __inline unsigned char + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + __iocsrrd_h(unsigned int _1) { + return (unsigned short)__builtin_loongarch_iocsrrd_h((unsigned int)_1); +} + +extern __inline unsigned int + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + __iocsrrd_w(unsigned int _1) { + return (unsigned int)__builtin_loongarch_iocsrrd_w((unsigned int)_1); +} + +#if __loongarch_grlen == 64 +extern __inline unsigned long int + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + __iocsrrd_d(unsigned int _1) { + return (unsigned long int)__builtin_loongarch_iocsrrd_d((unsigned int)_1); +} +#endif + +extern __inline void + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + __iocsrwr_b(unsigned char _1, unsigned int _2) { + __builtin_loongarch_iocsrwr_b((unsigned char)_1, (unsigned int)_2); +} + +extern __inline void + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + __iocsrwr_h(unsigned short _1, unsigned int _2) { + __builtin_loongarch_iocsrwr_h((unsigned short)_1, (unsigned int)_2); +} + +extern __inline void + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + __iocsrwr_w(unsigned int _1, unsigned int _2) { + __builtin_loongarch_iocsrwr_w((unsigned int)_1, (unsigned int)_2); +} + +extern __inline unsigned int + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + __cpucfg(unsigned int _1) { + return (unsigned int)__builtin_loongarch_cpucfg((unsigned int)_1); +} + +#if __loongarch_grlen == 64 +extern __inline void + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + __iocsrwr_d(unsigned long int _1, unsigned int _2) { + __builtin_loongarch_iocsrwr_d((unsigned long int)_1, (unsigned int)_2); +} + +extern __inline void + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + __asrtgt_d(long int _1, long int _2) { + __builtin_loongarch_asrtgt_d((long int)_1, (long int)_2); +} + +extern __inline void + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + __asrtle_d(long int _1, long int _2) { + __builtin_loongarch_asrtle_d((long int)_1, (long int)_2); +} +#endif + +#if __loongarch_grlen == 64 +#define __lddir_d(/*long int*/ _1, /*ui5*/ _2) \ + ((long int)__builtin_loongarch_lddir_d((long int)(_1), (_2))) + +#define __ldpte_d(/*long int*/ _1, /*ui5*/ _2) \ + ((void)__builtin_loongarch_ldpte_d((long int)(_1), (_2))) +#endif + +#ifdef __cplusplus +} +#endif +#endif /* _LOONGARCH_BASE_INTRIN_H */ diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index dae51d0690e6d496e05ad02870b3482cd8f1bbf2..0d281d410c1975429181566b8f826fb5a9f0aa7e 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -1981,6 +1981,9 @@ bool Sema::CheckTSBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID, case llvm::Triple::riscv32: case llvm::Triple::riscv64: return CheckRISCVBuiltinFunctionCall(TI, BuiltinID, TheCall); + case llvm::Triple::loongarch32: + case llvm::Triple::loongarch64: + return CheckLoongArchBuiltinFunctionCall(TI, BuiltinID, TheCall); } } @@ -3629,6 +3632,91 @@ bool Sema::CheckHexagonBuiltinFunctionCall(unsigned BuiltinID, return CheckHexagonBuiltinArgument(BuiltinID, TheCall); } +bool Sema::CheckLoongArchBuiltinFunctionCall(const TargetInfo &TI, + unsigned BuiltinID, + CallExpr *TheCall) { + switch (BuiltinID) { + default: + break; + case LoongArch::BI__builtin_loongarch_cacop_d: + if (!TI.hasFeature("64bit")) + return Diag(TheCall->getBeginLoc(), + diag::err_loongarch_builtin_requires_la64) + << TheCall->getSourceRange(); + LLVM_FALLTHROUGH; + case LoongArch::BI__builtin_loongarch_cacop_w: { + if (BuiltinID == LoongArch::BI__builtin_loongarch_cacop_w && + !TI.hasFeature("32bit")) + return Diag(TheCall->getBeginLoc(), + diag::err_loongarch_builtin_requires_la32) + << TheCall->getSourceRange(); + SemaBuiltinConstantArgRange(TheCall, 0, 0, llvm::maxUIntN(5)); + SemaBuiltinConstantArgRange(TheCall, 2, llvm::minIntN(12), + llvm::maxIntN(12)); + break; + } + case LoongArch::BI__builtin_loongarch_crc_w_b_w: + case LoongArch::BI__builtin_loongarch_crc_w_h_w: + case LoongArch::BI__builtin_loongarch_crc_w_w_w: + case LoongArch::BI__builtin_loongarch_crc_w_d_w: + case LoongArch::BI__builtin_loongarch_crcc_w_b_w: + case LoongArch::BI__builtin_loongarch_crcc_w_h_w: + case LoongArch::BI__builtin_loongarch_crcc_w_w_w: + case LoongArch::BI__builtin_loongarch_crcc_w_d_w: + case LoongArch::BI__builtin_loongarch_iocsrrd_d: + case LoongArch::BI__builtin_loongarch_iocsrwr_d: + case LoongArch::BI__builtin_loongarch_asrtle_d: + case LoongArch::BI__builtin_loongarch_asrtgt_d: + if (!TI.hasFeature("64bit")) + return Diag(TheCall->getBeginLoc(), + diag::err_loongarch_builtin_requires_la64) + << TheCall->getSourceRange(); + break; + case LoongArch::BI__builtin_loongarch_break: + case LoongArch::BI__builtin_loongarch_dbar: + case LoongArch::BI__builtin_loongarch_ibar: + case LoongArch::BI__builtin_loongarch_syscall: + // Check if immediate is in [0, 32767]. + return SemaBuiltinConstantArgRange(TheCall, 0, 0, 32767); + case LoongArch::BI__builtin_loongarch_csrrd_w: + return SemaBuiltinConstantArgRange(TheCall, 0, 0, 16383); + case LoongArch::BI__builtin_loongarch_csrwr_w: + return SemaBuiltinConstantArgRange(TheCall, 1, 0, 16383); + case LoongArch::BI__builtin_loongarch_csrxchg_w: + return SemaBuiltinConstantArgRange(TheCall, 2, 0, 16383); + case LoongArch::BI__builtin_loongarch_csrrd_d: + if (!TI.hasFeature("64bit")) + return Diag(TheCall->getBeginLoc(), + diag::err_loongarch_builtin_requires_la64) + << TheCall->getSourceRange(); + return SemaBuiltinConstantArgRange(TheCall, 0, 0, 16383); + case LoongArch::BI__builtin_loongarch_csrwr_d: + if (!TI.hasFeature("64bit")) + return Diag(TheCall->getBeginLoc(), + diag::err_loongarch_builtin_requires_la64) + << TheCall->getSourceRange(); + return SemaBuiltinConstantArgRange(TheCall, 1, 0, 16383); + case LoongArch::BI__builtin_loongarch_csrxchg_d: + if (!TI.hasFeature("64bit")) + return Diag(TheCall->getBeginLoc(), + diag::err_loongarch_builtin_requires_la64) + << TheCall->getSourceRange(); + return SemaBuiltinConstantArgRange(TheCall, 2, 0, 16383); + case LoongArch::BI__builtin_loongarch_lddir_d: + case LoongArch::BI__builtin_loongarch_ldpte_d: + if (!TI.hasFeature("64bit")) + return Diag(TheCall->getBeginLoc(), + diag::err_loongarch_builtin_requires_la64) + << TheCall->getSourceRange(); + return SemaBuiltinConstantArgRange(TheCall, 1, 0, 31); + case LoongArch::BI__builtin_loongarch_movfcsr2gr: + case LoongArch::BI__builtin_loongarch_movgr2fcsr: + return SemaBuiltinConstantArgRange(TheCall, 0, 0, llvm::maxUIntN(2)); + } + + return false; +} + bool Sema::CheckMipsBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID, CallExpr *TheCall) { return CheckMipsBuiltinCpu(TI, BuiltinID, TheCall) || diff --git a/clang/test/CodeGen/LoongArch/abi-lp64d-empty-structs.c b/clang/test/CodeGen/LoongArch/abi-lp64d-empty-structs.c new file mode 100644 index 0000000000000000000000000000000000000000..fb90bf556c19b27ebb2b4f362c96b1b5c01bfe48 --- /dev/null +++ b/clang/test/CodeGen/LoongArch/abi-lp64d-empty-structs.c @@ -0,0 +1,89 @@ +// RUN: %clang_cc1 -triple loongarch64 -target-feature +f -target-feature +d -target-abi lp64d -emit-llvm %s -o - | \ +// RUN: FileCheck --check-prefix=CHECK-C %s +// RUN: %clang_cc1 -triple loongarch64 -target-feature +f -target-feature +d -target-abi lp64d -emit-llvm %s -o - -x c++ | \ +// RUN: FileCheck --check-prefix=CHECK-CXX %s + +// Fields containing empty structs or unions are ignored when flattening +// structs to examine whether the structs can be passed via FARs, even in C++. +// But there is an exception that non-zero-length array of empty structures are +// not ignored in C++. These rules are not documented in psABI +// but they match GCC behaviours. + +#include + +struct empty { struct { struct { } e; }; }; +struct s1 { struct empty e; float f; }; + +// CHECK-C: define{{.*}} float @test_s1(float {{.*}}) +// CHECK-CXX: define{{.*}} float @_Z7test_s12s1(float {{.*}}) +struct s1 test_s1(struct s1 a) { + return a; +} + +struct s2 { struct empty e; int32_t i; float f; }; + +// CHECK-C: define{{.*}} { i32, float } @test_s2(i32 {{.*}}, float {{.*}}) +// CHECK-CXX: define{{.*}} { i32, float } @_Z7test_s22s2(i32 {{.*}}, float {{.*}}) +struct s2 test_s2(struct s2 a) { + return a; +} + +struct s3 { struct empty e; float f; float g; }; + +// CHECK-C: define{{.*}} { float, float } @test_s3(float {{.*}}, float {{.*}}) +// CHECK-CXX: define{{.*}} { float, float } @_Z7test_s32s3(float {{.*}}, float {{.*}}) +struct s3 test_s3(struct s3 a) { + return a; +} + +struct s4 { struct empty e; float __complex__ c; }; + +// CHECK-C: define{{.*}} { float, float } @test_s4(float {{.*}}, float {{.*}}) +// CHECK-CXX: define{{.*}} { float, float } @_Z7test_s42s4(float {{.*}}, float {{.*}}) +struct s4 test_s4(struct s4 a) { + return a; +} + +// An array of empty fields isn't ignored in C++ (this isn't explicit in the +// psABI, but matches observed g++ behaviour). + +struct s5 { struct empty e[1]; float f; }; + +// CHECK-C: define{{.*}} float @test_s5(float {{.*}}) +// CHECK-CXX: define{{.*}} i64 @_Z7test_s52s5(i64 {{.*}}) +struct s5 test_s5(struct s5 a) { + return a; +} + +struct empty_arr { struct { struct { } e[1]; }; }; +struct s6 { struct empty_arr e; float f; }; + +// CHECK-C: define{{.*}} float @test_s6(float {{.*}}) +// CHECK-CXX: define{{.*}} i64 @_Z7test_s62s6(i64 {{.*}}) +struct s6 test_s6(struct s6 a) { + return a; +} + +struct s7 { struct empty e[0]; float f; }; + +// CHECK-C: define{{.*}} float @test_s7(float {{.*}}) +// CHECK-CXX: define{{.*}} float @_Z7test_s72s7(float {{.*}}) +struct s7 test_s7(struct s7 a) { + return a; +} + +struct empty_arr0 { struct { struct { } e[0]; }; }; +struct s8 { struct empty_arr0 e; float f; }; + +// CHECK-C: define{{.*}} float @test_s8(float {{.*}}) +// CHECK-CXX: define{{.*}} float @_Z7test_s82s8(float {{.*}}) +struct s8 test_s8(struct s8 a) { + return a; +} + +// CHECK-C: define{{.*}} void @test_s9() +// CHECK-CXX: define{{.*}} i64 @_Z7test_s92s9(i64 {{.*}}) +struct s9 { struct empty e; }; +struct s9 test_s9(struct s9 a) { + return a; +} diff --git a/clang/test/CodeGen/LoongArch/abi-lp64d.c b/clang/test/CodeGen/LoongArch/abi-lp64d.c new file mode 100644 index 0000000000000000000000000000000000000000..66b480a7f068945484b3292014de43fe6db5dd23 --- /dev/null +++ b/clang/test/CodeGen/LoongArch/abi-lp64d.c @@ -0,0 +1,488 @@ +// RUN: %clang_cc1 -triple loongarch64 -target-feature +f -target-feature +d -target-abi lp64d \ +// RUN: -emit-llvm %s -o - | FileCheck %s + +/// This test checks the calling convention of the lp64d ABI. + +#include +#include + +/// Part 0: C Data Types and Alignment. + +/// `char` datatype is signed by default. +/// In most cases, the unsigned integer data types are zero-extended when stored +/// in general-purpose register, and the signed integer data types are +/// sign-extended. However, in the LP64D ABI, unsigned 32-bit types, such as +/// unsigned int, are stored in general-purpose registers as proper sign +/// extensions of their 32-bit values. + +// CHECK-LABEL: define{{.*}} zeroext i1 @check_bool() +_Bool check_bool() { return 0; } + +// CHECK-LABEL: define{{.*}} signext i8 @check_char() +char check_char() { return 0; } + +// CHECK-LABEL: define{{.*}} signext i16 @check_short() +short check_short() { return 0; } + +// CHECK-LABEL: define{{.*}} signext i32 @check_int() +int check_int() { return 0; } + +// CHECK-LABEL: define{{.*}} i64 @check_long() +long check_long() { return 0; } + +// CHECK-LABEL: define{{.*}} i64 @check_longlong() +long long check_longlong() { return 0; } + +// CHECK-LABEL: define{{.*}} zeroext i8 @check_uchar() +unsigned char check_uchar() { return 0; } + +// CHECK-LABEL: define{{.*}} zeroext i16 @check_ushort() +unsigned short check_ushort() { return 0; } + +// CHECK-LABEL: define{{.*}} signext i32 @check_uint() +unsigned int check_uint() { return 0; } + +// CHECK-LABEL: define{{.*}} i64 @check_ulong() +unsigned long check_ulong() { return 0; } + +// CHECK-LABEL: define{{.*}} i64 @check_ulonglong() +unsigned long long check_ulonglong() { return 0; } + +// CHECK-LABEL: define{{.*}} float @check_float() +float check_float() { return 0; } + +// CHECK-LABEL: define{{.*}} double @check_double() +double check_double() { return 0; } + +// CHECK-LABEL: define{{.*}} fp128 @check_longdouble() +long double check_longdouble() { return 0; } + +/// Part 1: Scalar arguments and return value. + +/// 1. 1 < WOA <= GRLEN +/// a. Argument is passed in a single argument register, or on the stack by +/// value if none is available. +/// i. If the argument is floating-point type, the argument is passed in FAR. if +/// no FAR is available, it’s passed in GAR. If no GAR is available, it’s +/// passed on the stack. When passed in registers or on the stack, +/// floating-point types narrower than GRLEN bits are widened to GRLEN bits, +/// with the upper bits undefined. +/// ii. If the argument is integer or pointer type, the argument is passed in +/// GAR. If no GAR is available, it’s passed on the stack. When passed in +/// registers or on the stack, the unsigned integer scalars narrower than GRLEN +/// bits are zero-extended to GRLEN bits, and the signed integer scalars are +/// sign-extended. +/// 2. GRLEN < WOA ≤ 2 × GRLEN +/// a. The argument is passed in a pair of GAR, with the low-order GRLEN bits in +/// the lower-numbered register and the high-order GRLEN bits in the +/// higher-numbered register. If exactly one register is available, the +/// low-order GRLEN bits are passed in the register and the high-order GRLEN +/// bits are passed on the stack. If no GAR is available, it’s passed on the +/// stack. + +/// Note that most of these conventions are handled by the backend, so here we +/// only check the correctness of argument (or return value)'s sign/zero +/// extension attribute. + +// CHECK-LABEL: define{{.*}} signext i32 @f_scalar(i1 noundef zeroext %a, i8 noundef signext %b, i8 noundef zeroext %c, i16 noundef signext %d, i16 noundef zeroext %e, i32 noundef signext %f, i32 noundef signext %g, i64 noundef %h, i1 noundef zeroext %i, i8 noundef signext %j, i8 noundef zeroext %k, i16 noundef signext %l, i16 noundef zeroext %m, i32 noundef signext %n, i32 noundef signext %o, i64 noundef %p) +int f_scalar(_Bool a, int8_t b, uint8_t c, int16_t d, uint16_t e, int32_t f, + uint32_t g, int64_t h, _Bool i, int8_t j, uint8_t k, int16_t l, + uint16_t m, int32_t n, uint32_t o, int64_t p) { + return 0; +} + +/// Part 2: Structure arguments and return value. + +/// Empty structures are ignored by C compilers which support them as a +/// non-standard extension(same as union arguments and return values). Bits +/// unused due to padding, and bits past the end of a structure whose size in +/// bits is not divisible by GRLEN, are undefined. And the layout of the +/// structure on the stack is consistent with that in memory. + +/// Check empty structs are ignored. + +struct empty_s {}; + +// CHECK-LABEL: define{{.*}} void @f_empty_s() +struct empty_s f_empty_s(struct empty_s x) { + return x; +} + +/// 1. 0 < WOA ≤ GRLEN +/// a. The structure has only fixed-point members. If there is an available GAR, +/// the structure is passed through the GAR by value passing; If no GAR is +/// available, it’s passed on the stack. + +struct i16x4_s { + int16_t a, b, c, d; +}; + +// CHECK-LABEL: define{{.*}} i64 @f_i16x4_s(i64 %x.coerce) +struct i16x4_s f_i16x4_s(struct i16x4_s x) { + return x; +} + +/// b. The structure has only floating-point members: +/// i. One floating-point member. The argument is passed in a FAR; If no FAR is +/// available, the value is passed in a GAR; if no GAR is available, the value +/// is passed on the stack. + +struct f32x1_s { + float a; +}; + +struct f64x1_s { + double a; +}; + +// CHECK-LABEL: define{{.*}} float @f_f32x1_s(float %0) +struct f32x1_s f_f32x1_s(struct f32x1_s x) { + return x; +} + +// CHECK-LABEL: define{{.*}} double @f_f64x1_s(double %0) +struct f64x1_s f_f64x1_s(struct f64x1_s x) { + return x; +} + +/// ii. Two floating-point members. The argument is passed in a pair of +/// available FAR, with the low-order float member bits in the lower-numbered +/// FAR and the high-order float member bits in the higher-numbered FAR. If the +/// number of available FAR is less than 2, it’s passed in a GAR, and passed on +/// the stack if no GAR is available. + +struct f32x2_s { + float a, b; +}; + +// CHECK-LABEL: define{{.*}} { float, float } @f_f32x2_s(float %0, float %1) +struct f32x2_s f_f32x2_s(struct f32x2_s x) { + return x; +} + +/// c. The structure has both fixed-point and floating-point members, i.e. the +/// structure has one float member and... +/// i. Multiple fixed-point members. If there are available GAR, the structure +/// is passed in a GAR, and passed on the stack if no GAR is available. + +struct f32x1_i16x2_s { + float a; + int16_t b, c; +}; + +// CHECK-LABEL: define{{.*}} i64 @f_f32x1_i16x2_s(i64 %x.coerce) +struct f32x1_i16x2_s f_f32x1_i16x2_s(struct f32x1_i16x2_s x) { + return x; +} + +/// ii. Only one fixed-point member. If one FAR and one GAR are available, the +/// floating-point member of the structure is passed in the FAR, and the integer +/// member of the structure is passed in the GAR; If no floating-point register +/// but one GAR is available, it’s passed in GAR; If no GAR is available, it’s +/// passed on the stack. + +struct f32x1_i32x1_s { + float a; + int32_t b; +}; + +// CHECK-LABEL: define{{.*}} { float, i32 } @f_f32x1_i32x1_s(float %0, i32 %1) +struct f32x1_i32x1_s f_f32x1_i32x1_s(struct f32x1_i32x1_s x) { + return x; +} + +/// 2. GRLEN < WOA ≤ 2 × GRLEN +/// a. Only fixed-point members. +/// i. The argument is passed in a pair of available GAR, with the low-order +/// bits in the lower-numbered GAR and the high-order bits in the +/// higher-numbered GAR. If only one GAR is available, the low-order bits are in +/// the GAR and the high-order bits are on the stack, and passed on the stack if +/// no GAR is available. + +struct i64x2_s { + int64_t a, b; +}; + +// CHECK-LABEL: define{{.*}} [2 x i64] @f_i64x2_s([2 x i64] %x.coerce) +struct i64x2_s f_i64x2_s(struct i64x2_s x) { + return x; +} + +/// b. Only floating-point members. +/// i. The structure has one long double member or one double member and two +/// adjacent float members or 3-4 float members. The argument is passed in a +/// pair of available GAR, with the low-order bits in the lower-numbered GAR and +/// the high-order bits in the higher-numbered GAR. If only one GAR is +/// available, the low-order bits are in the GAR and the high-order bits are on +/// the stack, and passed on the stack if no GAR is available. + +struct f128x1_s { + long double a; +}; + +// CHECK-LABEL: define{{.*}} i128 @f_f128x1_s(i128 %x.coerce) +struct f128x1_s f_f128x1_s(struct f128x1_s x) { + return x; +} + +struct f64x1_f32x2_s { + double a; + float b, c; +}; + +// CHECK-LABEL: define{{.*}} [2 x i64] @f_f64x1_f32x2_s([2 x i64] %x.coerce) +struct f64x1_f32x2_s f_f64x1_f32x2_s(struct f64x1_f32x2_s x) { + return x; +} + +struct f32x3_s { + float a, b, c; +}; + +// CHECK-LABEL: define{{.*}} [2 x i64] @f_f32x3_s([2 x i64] %x.coerce) +struct f32x3_s f_f32x3_s(struct f32x3_s x) { + return x; +} + +struct f32x4_s { + float a, b, c, d; +}; + +// CHECK-LABEL: define{{.*}} [2 x i64] @f_f32x4_s([2 x i64] %x.coerce) +struct f32x4_s f_f32x4_s(struct f32x4_s x) { + return x; +} + +/// ii. The structure with two double members is passed in a pair of available +/// FARs. If no a pair of available FARs, it’s passed in GARs. A structure with +/// one double member and one float member is same. + +struct f64x2_s { + double a, b; +}; + +// CHECK-LABEL: define{{.*}} { double, double } @f_f64x2_s(double %0, double %1) +struct f64x2_s f_f64x2_s(struct f64x2_s x) { + return x; +} + +/// c. Both fixed-point and floating-point members. +/// i. The structure has one double member and only one fixed-point member. +/// A. If one FAR and one GAR are available, the floating-point member of the +/// structure is passed in the FAR, and the integer member of the structure is +/// passed in the GAR; If no floating-point registers but two GARs are +/// available, it’s passed in the two GARs; If only one GAR is available, the +/// low-order bits are in the GAR and the high-order bits are on the stack; And +/// it’s passed on the stack if no GAR is available. + +struct f64x1_i64x1_s { + double a; + int64_t b; +}; + +// CHECK-LABEL: define{{.*}} { double, i64 } @f_f64x1_i64x1_s(double %0, i64 %1) +struct f64x1_i64x1_s f_f64x1_i64x1_s(struct f64x1_i64x1_s x) { + return x; +} + +/// ii. Others +/// A. The argument is passed in a pair of available GAR, with the low-order +/// bits in the lower-numbered GAR and the high-order bits in the +/// higher-numbered GAR. If only one GAR is available, the low-order bits are in +/// the GAR and the high-order bits are on the stack, and passed on the stack if +/// no GAR is available. + +struct f64x1_i32x2_s { + double a; + int32_t b, c; +}; + +// CHECK-LABEL: define{{.*}} [2 x i64] @f_f64x1_i32x2_s([2 x i64] %x.coerce) +struct f64x1_i32x2_s f_f64x1_i32x2_s(struct f64x1_i32x2_s x) { + return x; +} + +struct f32x2_i32x2_s { + float a, b; + int32_t c, d; +}; + +// CHECK-LABEL: define{{.*}} [2 x i64] @f_f32x2_i32x2_s([2 x i64] %x.coerce) +struct f32x2_i32x2_s f_f32x2_i32x2_s(struct f32x2_i32x2_s x) { + return x; +} + +/// 3. WOA > 2 × GRLEN +/// a. It’s passed by reference and are replaced in the argument list with the +/// address. If there is an available GAR, the reference is passed in the GAR, +/// and passed on the stack if no GAR is available. + +struct i64x4_s { + int64_t a, b, c, d; +}; + +// CHECK-LABEL: define{{.*}} void @f_i64x4_s(ptr{{.*}} sret(%struct.i64x4_s) align 8 %agg.result, ptr{{.*}} %x) +struct i64x4_s f_i64x4_s(struct i64x4_s x) { + return x; +} + +struct f64x4_s { + double a, b, c, d; +}; + +// CHECK-LABEL: define{{.*}} void @f_f64x4_s(ptr{{.*}} sret(%struct.f64x4_s) align 8 %agg.result, ptr{{.*}} %x) +struct f64x4_s f_f64x4_s(struct f64x4_s x) { + return x; +} + +/// Part 3: Union arguments and return value. + +/// Check empty unions are ignored. + +union empty_u {}; + +// CHECK-LABEL: define{{.*}} void @f_empty_u() +union empty_u f_empty_u(union empty_u x) { + return x; +} + +/// Union is passed in GAR or stack. +/// 1. 0 < WOA ≤ GRLEN +/// a. The argument is passed in a GAR, or on the stack by value if no GAR is +/// available. + +union i32_f32_u { + int32_t a; + float b; +}; + +// CHECK-LABEL: define{{.*}} i64 @f_i32_f32_u(i64 %x.coerce) +union i32_f32_u f_i32_f32_u(union i32_f32_u x) { + return x; +} + +union i64_f64_u { + int64_t a; + double b; +}; + +// CHECK-LABEL: define{{.*}} i64 @f_i64_f64_u(i64 %x.coerce) +union i64_f64_u f_i64_f64_u(union i64_f64_u x) { + return x; +} + +/// 2. GRLEN < WOA ≤ 2 × GRLEN +/// a. The argument is passed in a pair of available GAR, with the low-order +/// bits in the lower-numbered GAR and the high-order bits in the +/// higher-numbered GAR. If only one GAR is available, the low-order bits are in +/// the GAR and the high-order bits are on the stack. The arguments are passed +/// on the stack when no GAR is available. + +union i128_f128_u { + __int128_t a; + long double b; +}; + +// CHECK-LABEL: define{{.*}} i128 @f_i128_f128_u(i128 %x.coerce) +union i128_f128_u f_i128_f128_u(union i128_f128_u x) { + return x; +} + +/// 3. WOA > 2 × GRLEN +/// a. It’s passed by reference and are replaced in the argument list with the +/// address. If there is an available GAR, the reference is passed in the GAR, +/// and passed on the stack if no GAR is available. + +union i64_arr3_u { + int64_t a[3]; +}; + +// CHECK-LABEL: define{{.*}} void @f_i64_arr3_u(ptr{{.*}} sret(%union.i64_arr3_u) align 8 %agg.result, ptr{{.*}} %x) +union i64_arr3_u f_i64_arr3_u(union i64_arr3_u x) { + return x; +} + +/// Part 4: Complex number arguments and return value. + +/// A complex floating-point number, or a structure containing just one complex +/// floating-point number, is passed as though it were a structure containing +/// two floating-point reals. + +// CHECK-LABEL: define{{.*}} { float, float } @f_floatcomplex(float noundef %x.coerce0, float noundef %x.coerce1) +float __complex__ f_floatcomplex(float __complex__ x) { return x; } + +// CHECK-LABEL: define{{.*}} { double, double } @f_doublecomplex(double noundef %x.coerce0, double noundef %x.coerce1) +double __complex__ f_doublecomplex(double __complex__ x) { return x; } + +struct floatcomplex_s { + float __complex__ c; +}; +// CHECK-LABEL: define{{.*}} { float, float } @f_floatcomplex_s(float %0, float %1) +struct floatcomplex_s f_floatcomplex_s(struct floatcomplex_s x) { + return x; +} + +struct doublecomplex_s { + double __complex__ c; +}; +// CHECK-LABEL: define{{.*}} { double, double } @f_doublecomplex_s(double %0, double %1) +struct doublecomplex_s f_doublecomplex_s(struct doublecomplex_s x) { + return x; +} + +/// Part 5: Variadic arguments. + +/// Variadic arguments are passed in GARs in the same manner as named arguments. + +int f_va_callee(int, ...); + +// CHECK-LABEL: define{{.*}} void @f_va_caller() +// CHECK: call signext i32 (i32, ...) @f_va_callee(i32 noundef signext 1, i32 noundef signext 2, i64 noundef 3, double noundef 4.000000e+00, double noundef 5.000000e+00, i64 {{.*}}, [2 x i64] {{.*}}) +void f_va_caller(void) { + f_va_callee(1, 2, 3LL, 4.0f, 5.0, (struct i16x4_s){6, 7, 8, 9}, + (struct i64x2_s){10, 11}); +} + +// CHECK-LABEL: @f_va_int( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[FMT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[VA:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[V:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store ptr [[FMT:%.*]], ptr [[FMT_ADDR]], align 8 +// CHECK-NEXT: call void @llvm.va_start(ptr [[VA]]) +// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[VA]], align 8 +// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i64 8 +// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[VA]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARGP_CUR]], align 8 +// CHECK-NEXT: store i32 [[TMP0]], ptr [[V]], align 4 +// CHECK-NEXT: call void @llvm.va_end(ptr [[VA]]) +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[V]], align 4 +// CHECK-NEXT: ret i32 [[TMP1]] +int f_va_int(char *fmt, ...) { + __builtin_va_list va; + __builtin_va_start(va, fmt); + int v = __builtin_va_arg(va, int); + __builtin_va_end(va); + return v; +} + +/// Part 6. Structures with zero size fields (bitfields or arrays). + +/// Check that zero size fields in structure are ignored. +/// Note that this rule is not explicitly documented in ABI spec but it matches +/// GCC's behavior. + +struct f64x2_zsfs_s { + double a; + int : 0; + __int128_t : 0; + int b[0]; + __int128_t c[0]; + double d; +}; + +// CHECK-LABEL: define{{.*}} { double, double } @f_f64x2_zsfs_s(double %0, double %1) +struct f64x2_zsfs_s f_f64x2_zsfs_s(struct f64x2_zsfs_s x) { + return x; +} + diff --git a/clang/test/CodeGen/LoongArch/atomics.c b/clang/test/CodeGen/LoongArch/atomics.c new file mode 100644 index 0000000000000000000000000000000000000000..edc58d30db186d7f8b028a169653f0951c2eddbc --- /dev/null +++ b/clang/test/CodeGen/LoongArch/atomics.c @@ -0,0 +1,45 @@ +// RUN: %clang_cc1 -triple loongarch32 -O1 -emit-llvm %s -o - \ +// RUN: | FileCheck %s --check-prefix=LA32 +// RUN: %clang_cc1 -triple loongarch64 -O1 -emit-llvm %s -o - \ +// RUN: | FileCheck %s --check-prefix=LA64 + +/// This test demonstrates that MaxAtomicInlineWidth is set appropriately. + +#include +#include + +void test_i8_atomics(_Atomic(int8_t) * a, int8_t b) { + // LA32: load atomic i8, ptr %a seq_cst, align 1 + // LA32: store atomic i8 %b, ptr %a seq_cst, align 1 + // LA32: atomicrmw add ptr %a, i8 %b seq_cst + // LA64: load atomic i8, ptr %a seq_cst, align 1 + // LA64: store atomic i8 %b, ptr %a seq_cst, align 1 + // LA64: atomicrmw add ptr %a, i8 %b seq_cst + __c11_atomic_load(a, memory_order_seq_cst); + __c11_atomic_store(a, b, memory_order_seq_cst); + __c11_atomic_fetch_add(a, b, memory_order_seq_cst); +} + +void test_i32_atomics(_Atomic(int32_t) * a, int32_t b) { + // LA32: load atomic i32, ptr %a seq_cst, align 4 + // LA32: store atomic i32 %b, ptr %a seq_cst, align 4 + // LA32: atomicrmw add ptr %a, i32 %b seq_cst + // LA64: load atomic i32, ptr %a seq_cst, align 4 + // LA64: store atomic i32 %b, ptr %a seq_cst, align 4 + // LA64: atomicrmw add ptr %a, i32 %b seq_cst + __c11_atomic_load(a, memory_order_seq_cst); + __c11_atomic_store(a, b, memory_order_seq_cst); + __c11_atomic_fetch_add(a, b, memory_order_seq_cst); +} + +void test_i64_atomics(_Atomic(int64_t) * a, int64_t b) { + // LA32: call i64 @__atomic_load_8 + // LA32: call void @__atomic_store_8 + // LA32: call i64 @__atomic_fetch_add_8 + // LA64: load atomic i64, ptr %a seq_cst, align 8 + // LA64: store atomic i64 %b, ptr %a seq_cst, align 8 + // LA64: atomicrmw add ptr %a, i64 %b seq_cst + __c11_atomic_load(a, memory_order_seq_cst); + __c11_atomic_store(a, b, memory_order_seq_cst); + __c11_atomic_fetch_add(a, b, memory_order_seq_cst); +} diff --git a/clang/test/CodeGen/LoongArch/inline-asm-constraints-error.c b/clang/test/CodeGen/LoongArch/inline-asm-constraints-error.c new file mode 100644 index 0000000000000000000000000000000000000000..b4430cf40e622bc656805ad6b8d26394d5116e73 --- /dev/null +++ b/clang/test/CodeGen/LoongArch/inline-asm-constraints-error.c @@ -0,0 +1,23 @@ +// RUN: not %clang_cc1 -triple loongarch32 -O2 -emit-llvm %s 2>&1 -o - | FileCheck %s +// RUN: not %clang_cc1 -triple loongarch64 -O2 -emit-llvm %s 2>&1 -o - | FileCheck %s + +void test_l(void) { +// CHECK: :[[#@LINE+1]]:27: error: value '32768' out of range for constraint 'l' + asm volatile ("" :: "l"(32768)); +// CHECK: :[[#@LINE+1]]:27: error: value '-32769' out of range for constraint 'l' + asm volatile ("" :: "l"(-32769)); +} + +void test_I(void) { +// CHECK: :[[#@LINE+1]]:27: error: value '2048' out of range for constraint 'I' + asm volatile ("" :: "I"(2048)); +// CHECK: :[[#@LINE+1]]:27: error: value '-2049' out of range for constraint 'I' + asm volatile ("" :: "I"(-2049)); +} + +void test_K(void) { +// CHECK: :[[#@LINE+1]]:27: error: value '4096' out of range for constraint 'K' + asm volatile ("" :: "K"(4096)); +// CHECK: :[[#@LINE+1]]:27: error: value '-1' out of range for constraint 'K' + asm volatile ("" :: "K"(-1)); +} diff --git a/clang/test/CodeGen/LoongArch/inline-asm-constraints.c b/clang/test/CodeGen/LoongArch/inline-asm-constraints.c new file mode 100644 index 0000000000000000000000000000000000000000..b19494284bd994f386fbc379f54348d719478a5b --- /dev/null +++ b/clang/test/CodeGen/LoongArch/inline-asm-constraints.c @@ -0,0 +1,70 @@ +// RUN: %clang_cc1 -triple loongarch32 -O2 -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -triple loongarch64 -O2 -emit-llvm %s -o - | FileCheck %s + +/// Test LoongArch specific inline assembly constraints. + +float f; +double d; +void test_f(void) { +// CHECK-LABEL: define{{.*}} void @test_f() +// CHECK: [[FLT_ARG:%[a-zA-Z_0-9]+]] = load float, ptr @f +// CHECK: call void asm sideeffect "", "f"(float [[FLT_ARG]]) + asm volatile ("" :: "f"(f)); +// CHECK: [[FLT_ARG:%[a-zA-Z_0-9]+]] = load double, ptr @d +// CHECK: call void asm sideeffect "", "f"(double [[FLT_ARG]]) + asm volatile ("" :: "f"(d)); +} + +void test_k(int *p, int idx) { +// CHECK-LABEL: define{{.*}} void @test_k(ptr noundef %p, i32 noundef{{.*}} %idx) +// CHECK: call void asm sideeffect "", "*k"(ptr elementtype(i32) %{{.*}}) + asm volatile("" :: "k"(*(p+idx))); +} + +void test_l(void) { +// CHECK-LABEL: define{{.*}} void @test_l() +// CHECK: call void asm sideeffect "", "l"(i32 32767) + asm volatile ("" :: "l"(32767)); +// CHECK: call void asm sideeffect "", "l"(i32 -32768) + asm volatile ("" :: "l"(-32768)); +} + +void test_m(int *p) { +// CHECK-LABEL: define{{.*}} void @test_m(ptr noundef %p) +// CHECK: call void asm sideeffect "", "*m"(ptr nonnull elementtype(i32) %{{.*}}) + asm volatile("" :: "m"(*(p+4))); +} + +void test_I(void) { +// CHECK-LABEL: define{{.*}} void @test_I() +// CHECK: call void asm sideeffect "", "I"(i32 2047) + asm volatile ("" :: "I"(2047)); +// CHECK: call void asm sideeffect "", "I"(i32 -2048) + asm volatile ("" :: "I"(-2048)); +} + +void test_J(void) { +// CHECK-LABEL: define{{.*}} void @test_J() +// CHECK: call void asm sideeffect "", "J"(i32 0) + asm volatile ("" :: "J"(0)); +} + +void test_K(void) { +// CHECK-LABEL: define{{.*}} void @test_K() +// CHECK: call void asm sideeffect "", "K"(i32 4095) + asm volatile ("" :: "K"(4095)); +// CHECK: call void asm sideeffect "", "K"(i32 0) + asm volatile ("" :: "K"(0)); +} + +void test_ZB(int *p) { +// CHECK-LABEL: define{{.*}} void @test_ZB(ptr noundef %p) +// CHECK: call void asm sideeffect "", "*^ZB"(ptr elementtype(i32) %p) + asm volatile ("" :: "ZB"(*p)); +} + +void test_ZC(int *p) { +// CHECK-LABEL: define{{.*}} void @test_ZC(ptr noundef %p) +// CHECK: call void asm sideeffect "", "*^ZC"(ptr elementtype(i32) %p) + asm volatile ("" :: "ZC"(*p)); +} diff --git a/clang/test/CodeGen/LoongArch/inline-asm-gcc-regs-error.c b/clang/test/CodeGen/LoongArch/inline-asm-gcc-regs-error.c new file mode 100644 index 0000000000000000000000000000000000000000..c5ecf0c929af8a4b11c11ba455ac74ffd64769e1 --- /dev/null +++ b/clang/test/CodeGen/LoongArch/inline-asm-gcc-regs-error.c @@ -0,0 +1,20 @@ +// RUN: not %clang_cc1 -triple loongarch32 -emit-llvm %s 2>&1 -o - | FileCheck %s +// RUN: not %clang_cc1 -triple loongarch64 -emit-llvm %s 2>&1 -o - | FileCheck %s + +void test(void) { +// CHECK: :[[#@LINE+1]]:24: error: unknown register name '$r32' in asm + register int a0 asm ("$r32"); +// CHECK: :[[#@LINE+1]]:26: error: unknown register name '$f32' in asm + register float a1 asm ("$f32"); +// CHECK: :[[#@LINE+1]]:24: error: unknown register name '$foo' in asm + register int a2 asm ("$foo"); + +/// Names not prefixed with '$' are invalid. + +// CHECK: :[[#@LINE+1]]:26: error: unknown register name 'f0' in asm + register float a5 asm ("f0"); +// CHECK: :[[#@LINE+1]]:26: error: unknown register name 'fa0' in asm + register float a6 asm ("fa0"); +// CHECK: :[[#@LINE+1]]:15: error: unknown register name 'fcc0' in asm + asm ("" ::: "fcc0"); +} diff --git a/clang/test/CodeGen/LoongArch/inline-asm-gcc-regs.c b/clang/test/CodeGen/LoongArch/inline-asm-gcc-regs.c new file mode 100644 index 0000000000000000000000000000000000000000..e1015f6fc01d58f436e89b81c53f54f37a3e71d8 --- /dev/null +++ b/clang/test/CodeGen/LoongArch/inline-asm-gcc-regs.c @@ -0,0 +1,126 @@ +// RUN: %clang_cc1 -triple loongarch32 -emit-llvm -O2 %s -o - | FileCheck %s +// RUN: %clang_cc1 -triple loongarch64 -emit-llvm -O2 %s -o - | FileCheck %s + +/// Check GCC register names and alias can be used in register variable definition. + +// CHECK-LABEL: @test_r0 +// CHECK: call void asm sideeffect "", "{$r0}"(i32 undef) +void test_r0() { + register int a asm ("$r0"); + register int b asm ("r0"); + asm ("" :: "r" (a)); + asm ("" :: "r" (b)); +} + +// CHECK-LABEL: @test_r12 +// CHECK: call void asm sideeffect "", "{$r12}"(i32 undef) +void test_r12() { + register int a asm ("$r12"); + register int b asm ("r12"); + asm ("" :: "r" (a)); + asm ("" :: "r" (b)); +} + +// CHECK-LABEL: @test_r31 +// CHECK: call void asm sideeffect "", "{$r31}"(i32 undef) +void test_r31() { + register int a asm ("$r31"); + register int b asm ("r31"); + asm ("" :: "r" (a)); + asm ("" :: "r" (b)); +} + +// CHECK-LABEL: @test_zero +// CHECK: call void asm sideeffect "", "{$r0}"(i32 undef) +void test_zero() { + register int a asm ("$zero"); + register int b asm ("zero"); + asm ("" :: "r" (a)); + asm ("" :: "r" (b)); +} + +// CHECK-LABEL: @test_a0 +// CHECK: call void asm sideeffect "", "{$r4}"(i32 undef) +void test_a0() { + register int a asm ("$a0"); + register int b asm ("a0"); + asm ("" :: "r" (a)); + asm ("" :: "r" (b)); +} + +// CHECK-LABEL: @test_t1 +// CHECK: call void asm sideeffect "", "{$r13}"(i32 undef) +void test_t1() { + register int a asm ("$t1"); + register int b asm ("t1"); + asm ("" :: "r" (a)); + asm ("" :: "r" (b)); +} + +// CHECK-LABEL: @test_fp +// CHECK: call void asm sideeffect "", "{$r22}"(i32 undef) +void test_fp() { + register int a asm ("$fp"); + register int b asm ("fp"); + asm ("" :: "r" (a)); + asm ("" :: "r" (b)); +} + +// CHECK-LABEL: @test_s2 +// CHECK: call void asm sideeffect "", "{$r25}"(i32 undef) +void test_s2() { + register int a asm ("$s2"); + register int b asm ("s2"); + asm ("" :: "r" (a)); + asm ("" :: "r" (b)); +} + +// CHECK-LABEL: @test_f0 +// CHECK: call void asm sideeffect "", "{$f0}"(float undef) +void test_f0() { + register float a asm ("$f0"); + asm ("" :: "f" (a)); +} + +// CHECK-LABEL: @test_f14 +// CHECK: call void asm sideeffect "", "{$f14}"(float undef) +void test_f14() { + register float a asm ("$f14"); + asm ("" :: "f" (a)); +} + +// CHECK-LABEL: @test_f31 +// CHECK: call void asm sideeffect "", "{$f31}"(float undef) +void test_f31() { + register float a asm ("$f31"); + asm ("" :: "f" (a)); +} + +// CHECK-LABEL: @test_fa0 +// CHECK: call void asm sideeffect "", "{$f0}"(float undef) +void test_fa0() { + register float a asm ("$fa0"); + asm ("" :: "f" (a)); +} + +// CHECK-LABEL: @test_ft1 +// CHECK: call void asm sideeffect "", "{$f9}"(float undef) +void test_ft1() { + register float a asm ("$ft1"); + asm ("" :: "f" (a)); +} + +// CHECK-LABEL: @test_fs2 +// CHECK: call void asm sideeffect "", "{$f26}"(float undef) +void test_fs2() { + register float a asm ("$fs2"); + asm ("" :: "f" (a)); +} + +// CHECK-LABEL: @test_fcc +// CHECK: call void asm sideeffect "", "~{$fcc0}"() +// CHECK: call void asm sideeffect "", "~{$fcc7}"() +void test_fcc() { + asm ("" ::: "$fcc0"); + asm ("" ::: "$fcc7"); +} diff --git a/clang/test/CodeGen/LoongArch/inline-asm-operand-modifiers.c b/clang/test/CodeGen/LoongArch/inline-asm-operand-modifiers.c new file mode 100644 index 0000000000000000000000000000000000000000..b36fe7a7b69bb0686f6b49d94538896958e672eb --- /dev/null +++ b/clang/test/CodeGen/LoongArch/inline-asm-operand-modifiers.c @@ -0,0 +1,25 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple loongarch32 -O2 -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -triple loongarch64 -O2 -emit-llvm %s -o - | FileCheck %s + +/// Test LoongArch specific operand modifiers (i.e. operand codes). + +// CHECK-LABEL: @test_z_zero( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 asm sideeffect "add.w $0, $1, ${2:z}", "=r,r,ri"(i32 [[A:%.*]], i32 0) #[[ATTR1:[0-9]+]], !srcloc !2 +// CHECK-NEXT: ret void +// +void test_z_zero(int a) { + int tmp; + asm volatile ("add.w %0, %1, %z2" : "=r" (tmp) : "r" (a), "ri" (0)); +} + +// CHECK-LABEL: @test_z_nonzero( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 asm sideeffect "add.w $0, $1, ${2:z}", "=r,r,ri"(i32 [[A:%.*]], i32 1) #[[ATTR1]], !srcloc !3 +// CHECK-NEXT: ret void +// +void test_z_nonzero(int a) { + int tmp; + asm volatile ("add.w %0, %1, %z2" : "=r" (tmp) : "r" (a), "ri" (1)); +} diff --git a/clang/test/CodeGen/LoongArch/intrinsic-la32-error.c b/clang/test/CodeGen/LoongArch/intrinsic-la32-error.c new file mode 100644 index 0000000000000000000000000000000000000000..7551b762e12973768026860954995cd8ddf257af --- /dev/null +++ b/clang/test/CodeGen/LoongArch/intrinsic-la32-error.c @@ -0,0 +1,138 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple loongarch32 -emit-llvm -S -verify %s -o /dev/null + +#include + +void cacop_d(unsigned long int a) { + __builtin_loongarch_cacop_d(1, a, 1024); // expected-error {{this builtin requires target: loongarch64}} + __builtin_loongarch_cacop_w(-1, a, 1024); // expected-error {{argument value -1 is outside the valid range [0, 31]}} + __builtin_loongarch_cacop_w(32, a, 1024); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + __builtin_loongarch_cacop_w(1, a, -4096); // expected-error {{argument value -4096 is outside the valid range [-2048, 2047]}} + __builtin_loongarch_cacop_w(1, a, 4096); // expected-error {{argument value 4096 is outside the valid range [-2048, 2047]}} +} + +void dbar(int a) { + __builtin_loongarch_dbar(32768); // expected-error {{argument value 32768 is outside the valid range [0, 32767]}} + __builtin_loongarch_dbar(-1); // expected-error {{argument value 4294967295 is outside the valid range [0, 32767]}} + __builtin_loongarch_dbar(a); // expected-error {{argument to '__builtin_loongarch_dbar' must be a constant integer}} +} + +void ibar(int a) { + __builtin_loongarch_ibar(32769); // expected-error {{argument value 32769 is outside the valid range [0, 32767]}} + __builtin_loongarch_ibar(-1); // expected-error {{argument value 4294967295 is outside the valid range [0, 32767]}} + __builtin_loongarch_ibar(a); // expected-error {{argument to '__builtin_loongarch_ibar' must be a constant integer}} +} + +void loongarch_break(int a) { + __builtin_loongarch_break(32769); // expected-error {{argument value 32769 is outside the valid range [0, 32767]}} + __builtin_loongarch_break(-1); // expected-error {{argument value 4294967295 is outside the valid range [0, 32767]}} + __builtin_loongarch_break(a); // expected-error {{argument to '__builtin_loongarch_break' must be a constant integer}} +} + +int movfcsr2gr_out_of_lo_range(int a) { + int b = __builtin_loongarch_movfcsr2gr(-1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} + int c = __builtin_loongarch_movfcsr2gr(32); // expected-error {{argument value 32 is outside the valid range [0, 3]}} + int d = __builtin_loongarch_movfcsr2gr(a); // expected-error {{argument to '__builtin_loongarch_movfcsr2gr' must be a constant integer}} + return 0; +} + +void movgr2fcsr(int a, int b) { + __builtin_loongarch_movgr2fcsr(-1, b); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} + __builtin_loongarch_movgr2fcsr(32, b); // expected-error {{argument value 32 is outside the valid range [0, 3]}} + __builtin_loongarch_movgr2fcsr(a, b); // expected-error {{argument to '__builtin_loongarch_movgr2fcsr' must be a constant integer}} +} + +void syscall(int a) { + __builtin_loongarch_syscall(32769); // expected-error {{argument value 32769 is outside the valid range [0, 32767]}} + __builtin_loongarch_syscall(-1); // expected-error {{argument value 4294967295 is outside the valid range [0, 32767]}} + __builtin_loongarch_syscall(a); // expected-error {{argument to '__builtin_loongarch_syscall' must be a constant integer}} +} + +int crc_w_b_w(char a, int b) { + return __builtin_loongarch_crc_w_b_w(a, b); // expected-error {{this builtin requires target: loongarch64}} +} + +int crc_w_h_w(short a, int b) { + return __builtin_loongarch_crc_w_h_w(a, b); // expected-error {{this builtin requires target: loongarch64}} +} + +int crc_w_w_w(int a, int b) { + return __builtin_loongarch_crc_w_w_w(a, b); // expected-error {{this builtin requires target: loongarch64}} +} + +int crc_w_d_w(long int a, int b) { + return __builtin_loongarch_crc_w_d_w(a, b); // expected-error {{this builtin requires target: loongarch64}} +} +int crcc_w_b_w(char a, int b) { + return __builtin_loongarch_crcc_w_b_w(a, b); // expected-error {{this builtin requires target: loongarch64}} +} + +int crcc_w_h_w(short a, int b) { + return __builtin_loongarch_crcc_w_h_w(a, b); // expected-error {{this builtin requires target: loongarch64}} +} + +int crcc_w_w_w(int a, int b) { + return __builtin_loongarch_crcc_w_w_w(a, b); // expected-error {{this builtin requires target: loongarch64}} +} + +int crcc_w_d_w(long int a, int b) { + return __builtin_loongarch_crcc_w_d_w(a, b); // expected-error {{this builtin requires target: loongarch64}} +} + +unsigned long int csrrd_d() { + return __builtin_loongarch_csrrd_d(1); // expected-error {{this builtin requires target: loongarch64}} +} + +unsigned long int csrwr_d(unsigned long int a) { + return __builtin_loongarch_csrwr_d(a, 1); // expected-error {{this builtin requires target: loongarch64}} +} + +unsigned long int csrxchg_d(unsigned long int a, unsigned long int b) { + return __builtin_loongarch_csrxchg_d(a, b, 1); // expected-error {{this builtin requires target: loongarch64}} +} + +void csrrd_w(int a) { + __builtin_loongarch_csrrd_w(16384); // expected-error {{argument value 16384 is outside the valid range [0, 16383]}} + __builtin_loongarch_csrrd_w(-1); // expected-error {{argument value 4294967295 is outside the valid range [0, 16383]}} + __builtin_loongarch_csrrd_w(a); // expected-error {{argument to '__builtin_loongarch_csrrd_w' must be a constant integer}} +} + +void csrwr_w(unsigned int a) { + __builtin_loongarch_csrwr_w(a, 16384); // expected-error {{argument value 16384 is outside the valid range [0, 16383]}} + __builtin_loongarch_csrwr_w(a, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 16383]}} + __builtin_loongarch_csrwr_w(a, a); // expected-error {{argument to '__builtin_loongarch_csrwr_w' must be a constant integer}} +} + +void csrxchg_w(unsigned int a, unsigned int b) { + __builtin_loongarch_csrxchg_w(a, b, 16384); // expected-error {{argument value 16384 is outside the valid range [0, 16383]}} + __builtin_loongarch_csrxchg_w(a, b, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 16383]}} + __builtin_loongarch_csrxchg_w(a, b, b); // expected-error {{argument to '__builtin_loongarch_csrxchg_w' must be a constant integer}} +} + +unsigned long int iocsrrd_d(unsigned int a) { + return __builtin_loongarch_iocsrrd_d(a); // expected-error {{this builtin requires target: loongarch64}} +} + +void iocsrwr_d(unsigned long int a, unsigned int b) { + __builtin_loongarch_iocsrwr_d(a, b); // expected-error {{this builtin requires target: loongarch64}} +} + +void asrtle_d(long int a, long int b) { + __builtin_loongarch_asrtle_d(a, b); // expected-error {{this builtin requires target: loongarch64}} +} + +void asrtgt_d(long int a, long int b) { + __builtin_loongarch_asrtgt_d(a, b); // expected-error {{this builtin requires target: loongarch64}} +} + +void lddir_d(long int a, int b) { + __builtin_loongarch_lddir_d(a, 1); // expected-error {{this builtin requires target: loongarch64}} +} + +void ldpte_d(long int a, int b) { + __builtin_loongarch_ldpte_d(a, 1); // expected-error {{this builtin requires target: loongarch64}} +} + +void rdtime_d() { + __rdtime_d(); // expected-warning {{call to undeclared function '__rdtime_d'}} +} diff --git a/clang/test/CodeGen/LoongArch/intrinsic-la32.c b/clang/test/CodeGen/LoongArch/intrinsic-la32.c new file mode 100644 index 0000000000000000000000000000000000000000..93d54f511a9cd271695ec066c483ff87f0a03c09 --- /dev/null +++ b/clang/test/CodeGen/LoongArch/intrinsic-la32.c @@ -0,0 +1,213 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple loongarch32 -target-feature +f -O2 -emit-llvm %s -o - \ +// RUN: | FileCheck %s -check-prefix=LA32 + +#include + +// LA32-LABEL: @dbar( +// LA32-NEXT: entry: +// LA32-NEXT: tail call void @llvm.loongarch.dbar(i32 0) +// LA32-NEXT: tail call void @llvm.loongarch.dbar(i32 0) +// LA32-NEXT: ret void +// +void dbar() { + __dbar(0); + __builtin_loongarch_dbar(0); +} + +// LA32-LABEL: @ibar( +// LA32-NEXT: entry: +// LA32-NEXT: tail call void @llvm.loongarch.ibar(i32 0) +// LA32-NEXT: tail call void @llvm.loongarch.ibar(i32 0) +// LA32-NEXT: ret void +// +void ibar() { + __ibar(0); + __builtin_loongarch_ibar(0); +} + +// LA32-LABEL: @loongarch_break( +// LA32-NEXT: entry: +// LA32-NEXT: tail call void @llvm.loongarch.break(i32 1) +// LA32-NEXT: tail call void @llvm.loongarch.break(i32 1) +// LA32-NEXT: ret void +// +void loongarch_break() { + __break(1); + __builtin_loongarch_break(1); +} + +// LA32-LABEL: @syscall( +// LA32-NEXT: entry: +// LA32-NEXT: tail call void @llvm.loongarch.syscall(i32 1) +// LA32-NEXT: tail call void @llvm.loongarch.syscall(i32 1) +// LA32-NEXT: ret void +// +void syscall() { + __syscall(1); + __builtin_loongarch_syscall(1); +} + +// LA32-LABEL: @csrrd_w( +// LA32-NEXT: entry: +// LA32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.csrrd.w(i32 1) +// LA32-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.csrrd.w(i32 1) +// LA32-NEXT: ret i32 0 +// +unsigned int csrrd_w() { + unsigned int a = __csrrd_w(1); + unsigned int b = __builtin_loongarch_csrrd_w(1); + return 0; +} + +// LA32-LABEL: @csrwr_w( +// LA32-NEXT: entry: +// LA32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.csrwr.w(i32 [[A:%.*]], i32 1) +// LA32-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.csrwr.w(i32 [[A]], i32 1) +// LA32-NEXT: ret i32 0 +// +unsigned int csrwr_w(unsigned int a) { + unsigned int b = __csrwr_w(a, 1); + unsigned int c = __builtin_loongarch_csrwr_w(a, 1); + return 0; +} + +// LA32-LABEL: @csrxchg_w( +// LA32-NEXT: entry: +// LA32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.csrxchg.w(i32 [[A:%.*]], i32 [[B:%.*]], i32 1) +// LA32-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.csrxchg.w(i32 [[A]], i32 [[B]], i32 1) +// LA32-NEXT: ret i32 0 +// +unsigned int csrxchg_w(unsigned int a, unsigned int b) { + unsigned int c = __csrxchg_w(a, b, 1); + unsigned int d = __builtin_loongarch_csrxchg_w(a, b, 1); + return 0; +} + +// LA32-LABEL: @iocsrrd_b( +// LA32-NEXT: entry: +// LA32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.iocsrrd.b(i32 [[A:%.*]]) +// LA32-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.iocsrrd.b(i32 [[A]]) +// LA32-NEXT: ret i8 0 +// +unsigned char iocsrrd_b(unsigned int a) { + unsigned char b = __iocsrrd_b(a); + unsigned char c = __builtin_loongarch_iocsrrd_b(a); + return 0; +} + +// LA32-LABEL: @iocsrrd_h( +// LA32-NEXT: entry: +// LA32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.iocsrrd.h(i32 [[A:%.*]]) +// LA32-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.iocsrrd.h(i32 [[A]]) +// LA32-NEXT: ret i16 0 +// +unsigned short iocsrrd_h(unsigned int a) { + unsigned short b = __iocsrrd_h(a); + unsigned short c = __builtin_loongarch_iocsrrd_h(a); + return 0; +} + +// LA32-LABEL: @iocsrrd_w( +// LA32-NEXT: entry: +// LA32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.iocsrrd.w(i32 [[A:%.*]]) +// LA32-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.iocsrrd.w(i32 [[A]]) +// LA32-NEXT: ret i32 0 +// +unsigned int iocsrrd_w(unsigned int a) { + unsigned int b = __iocsrrd_w(a); + unsigned int c = __builtin_loongarch_iocsrrd_w(a); + return 0; +} + +// LA32-LABEL: @iocsrwr_b( +// LA32-NEXT: entry: +// LA32-NEXT: [[CONV_I:%.*]] = zext i8 [[A:%.*]] to i32 +// LA32-NEXT: tail call void @llvm.loongarch.iocsrwr.b(i32 [[CONV_I]], i32 [[B:%.*]]) +// LA32-NEXT: tail call void @llvm.loongarch.iocsrwr.b(i32 [[CONV_I]], i32 [[B]]) +// LA32-NEXT: ret void +// +void iocsrwr_b(unsigned char a, unsigned int b) { + __iocsrwr_b(a, b); + __builtin_loongarch_iocsrwr_b(a, b); +} + +// LA32-LABEL: @iocsrwr_h( +// LA32-NEXT: entry: +// LA32-NEXT: [[CONV_I:%.*]] = zext i16 [[A:%.*]] to i32 +// LA32-NEXT: tail call void @llvm.loongarch.iocsrwr.h(i32 [[CONV_I]], i32 [[B:%.*]]) +// LA32-NEXT: tail call void @llvm.loongarch.iocsrwr.h(i32 [[CONV_I]], i32 [[B]]) +// LA32-NEXT: ret void +// +void iocsrwr_h(unsigned short a, unsigned int b) { + __iocsrwr_h(a, b); + __builtin_loongarch_iocsrwr_h(a, b); +} + +// LA32-LABEL: @iocsrwr_w( +// LA32-NEXT: entry: +// LA32-NEXT: tail call void @llvm.loongarch.iocsrwr.w(i32 [[A:%.*]], i32 [[B:%.*]]) +// LA32-NEXT: tail call void @llvm.loongarch.iocsrwr.w(i32 [[A]], i32 [[B]]) +// LA32-NEXT: ret void +// +void iocsrwr_w(unsigned int a, unsigned int b) { + __iocsrwr_w(a, b); + __builtin_loongarch_iocsrwr_w(a, b); +} + +// LA32-LABEL: @cpucfg( +// LA32-NEXT: entry: +// LA32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.cpucfg(i32 [[A:%.*]]) +// LA32-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.cpucfg(i32 [[A]]) +// LA32-NEXT: ret i32 0 +// +unsigned int cpucfg(unsigned int a) { + unsigned int b = __cpucfg(a); + unsigned int c = __builtin_loongarch_cpucfg(a); + return 0; +} + +// LA32-LABEL: @rdtime( +// LA32-NEXT: entry: +// LA32-NEXT: [[TMP0:%.*]] = tail call { i32, i32 } asm sideeffect "rdtimeh.w $0, $1\0A\09", "=&r,=&r"() #[[ATTR1:[0-9]+]], !srcloc !2 +// LA32-NEXT: [[TMP1:%.*]] = tail call { i32, i32 } asm sideeffect "rdtimel.w $0, $1\0A\09", "=&r,=&r"() #[[ATTR1]], !srcloc !3 +// LA32-NEXT: ret void +// +void rdtime() { + __rdtimeh_w(); + __rdtimel_w(); +} + +// LA32-LABEL: @loongarch_movfcsr2gr( +// LA32-NEXT: entry: +// LA32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.movfcsr2gr(i32 1) +// LA32-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.movfcsr2gr(i32 1) +// LA32-NEXT: ret i32 0 +// +int loongarch_movfcsr2gr() { + int a = __movfcsr2gr(1); + int b = __builtin_loongarch_movfcsr2gr(1); + return 0; +} + +// LA32-LABEL: @loongarch_movgr2fcsr( +// LA32-NEXT: entry: +// LA32-NEXT: tail call void @llvm.loongarch.movgr2fcsr(i32 1, i32 [[A:%.*]]) +// LA32-NEXT: tail call void @llvm.loongarch.movgr2fcsr(i32 1, i32 [[A]]) +// LA32-NEXT: ret void +// +void loongarch_movgr2fcsr(int a) { + __movgr2fcsr(1, a); + __builtin_loongarch_movgr2fcsr(1, a); +} + +// CHECK-LABEL: @cacop_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.loongarch.cacop.w(i32 1, i32 [[A:%.*]], i32 1024) +// CHECK-NEXT: tail call void @llvm.loongarch.cacop.w(i32 1, i32 [[A]], i32 1024) +// CHECK-NEXT: ret void +// +void cacop_w(unsigned long int a) { + __cacop_w(1, a, 1024); + __builtin_loongarch_cacop_w(1, a, 1024); +} diff --git a/clang/test/CodeGen/LoongArch/intrinsic-la64-error.c b/clang/test/CodeGen/LoongArch/intrinsic-la64-error.c new file mode 100644 index 0000000000000000000000000000000000000000..60dfece74be6ee43b9572338f405a4c0253ca44e --- /dev/null +++ b/clang/test/CodeGen/LoongArch/intrinsic-la64-error.c @@ -0,0 +1,47 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple loongarch64 -emit-llvm -S -verify %s -o /dev/null + +#include + +void csrrd_d(int a) { + __builtin_loongarch_csrrd_d(16384); // expected-error {{argument value 16384 is outside the valid range [0, 16383]}} + __builtin_loongarch_csrrd_d(-1); // expected-error {{argument value 4294967295 is outside the valid range [0, 16383]}} + __builtin_loongarch_csrrd_d(a); // expected-error {{argument to '__builtin_loongarch_csrrd_d' must be a constant integer}} +} + +void csrwr_d(unsigned long int a) { + __builtin_loongarch_csrwr_d(a, 16384); // expected-error {{argument value 16384 is outside the valid range [0, 16383]}} + __builtin_loongarch_csrwr_d(a, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 16383]}} + __builtin_loongarch_csrwr_d(a, a); // expected-error {{argument to '__builtin_loongarch_csrwr_d' must be a constant integer}} +} + +void csrxchg_d(unsigned long int a, unsigned long int b) { + __builtin_loongarch_csrxchg_d(a, b, 16384); // expected-error {{argument value 16384 is outside the valid range [0, 16383]}} + __builtin_loongarch_csrxchg_d(a, b, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 16383]}} + __builtin_loongarch_csrxchg_d(a, b, b); // expected-error {{argument to '__builtin_loongarch_csrxchg_d' must be a constant integer}} +} + +void lddir_d(long int a, int b) { + __builtin_loongarch_lddir_d(a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + __builtin_loongarch_lddir_d(a, -1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 31]}} + __builtin_loongarch_lddir_d(a, b); // expected-error {{argument to '__builtin_loongarch_lddir_d' must be a constant integer}} +} + +void ldpte_d(long int a, int b) { + __builtin_loongarch_ldpte_d(a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + __builtin_loongarch_ldpte_d(a, -1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 31]}} + __builtin_loongarch_ldpte_d(a, b); // expected-error {{argument to '__builtin_loongarch_ldpte_d' must be a constant integer}} +} + +int movfcsr2gr_out_of_lo_range(int a) { + int b = __builtin_loongarch_movfcsr2gr(-1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} + int c = __builtin_loongarch_movfcsr2gr(32); // expected-error {{argument value 32 is outside the valid range [0, 3]}} + int d = __builtin_loongarch_movfcsr2gr(a); // expected-error {{argument to '__builtin_loongarch_movfcsr2gr' must be a constant integer}} + return 0; +} + +void movgr2fcsr(int a, int b) { + __builtin_loongarch_movgr2fcsr(-1, b); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} + __builtin_loongarch_movgr2fcsr(32, b); // expected-error {{argument value 32 is outside the valid range [0, 3]}} + __builtin_loongarch_movgr2fcsr(a, b); // expected-error {{argument to '__builtin_loongarch_movgr2fcsr' must be a constant integer}} +} diff --git a/clang/test/CodeGen/LoongArch/intrinsic-la64.c b/clang/test/CodeGen/LoongArch/intrinsic-la64.c new file mode 100644 index 0000000000000000000000000000000000000000..a740882eef5411cbb1940ce1538cbea12a672b2e --- /dev/null +++ b/clang/test/CodeGen/LoongArch/intrinsic-la64.c @@ -0,0 +1,429 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple loongarch64 -target-feature +f -O2 -emit-llvm %s -o - | FileCheck %s + +#include + +// CHECK-LABEL: @dbar( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.loongarch.dbar(i32 0) +// CHECK-NEXT: tail call void @llvm.loongarch.dbar(i32 0) +// CHECK-NEXT: ret void +// +void dbar() { + __dbar(0); + __builtin_loongarch_dbar(0); +} + +// CHECK-LABEL: @ibar( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.loongarch.ibar(i32 0) +// CHECK-NEXT: tail call void @llvm.loongarch.ibar(i32 0) +// CHECK-NEXT: ret void +// +void ibar() { + __ibar(0); + __builtin_loongarch_ibar(0); +} + +// CHECK-LABEL: @loongarch_break( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.loongarch.break(i32 1) +// CHECK-NEXT: tail call void @llvm.loongarch.break(i32 1) +// CHECK-NEXT: ret void +// +void loongarch_break() { + __break(1); + __builtin_loongarch_break(1); +} + +// CHECK-LABEL: @syscall( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.loongarch.syscall(i32 1) +// CHECK-NEXT: tail call void @llvm.loongarch.syscall(i32 1) +// CHECK-NEXT: ret void +// +void syscall() { + __syscall(1); + __builtin_loongarch_syscall(1); +} + +// CHECK-LABEL: @csrrd_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.csrrd.w(i32 1) +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.csrrd.w(i32 1) +// CHECK-NEXT: ret i32 0 +// +unsigned int csrrd_w() { + unsigned int a = __csrrd_w(1); + unsigned int b = __builtin_loongarch_csrrd_w(1); + return 0; +} + +// CHECK-LABEL: @csrwr_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.csrwr.w(i32 [[A:%.*]], i32 1) +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.csrwr.w(i32 [[A]], i32 1) +// CHECK-NEXT: ret i32 0 +// +unsigned int csrwr_w(unsigned int a) { + unsigned int b = __csrwr_w(a, 1); + unsigned int c = __builtin_loongarch_csrwr_w(a, 1); + return 0; +} + +// CHECK-LABEL: @csrxchg_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.csrxchg.w(i32 [[A:%.*]], i32 [[B:%.*]], i32 1) +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.csrxchg.w(i32 [[A]], i32 [[B]], i32 1) +// CHECK-NEXT: ret i32 0 +// +unsigned int csrxchg_w(unsigned int a, unsigned int b) { + unsigned int c = __csrxchg_w(a, b, 1); + unsigned int d = __builtin_loongarch_csrxchg_w(a, b, 1); + return 0; +} + +// CHECK-LABEL: @crc_w_b_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = shl i32 [[A:%.*]], 24 +// CHECK-NEXT: [[CONV_I:%.*]] = ashr exact i32 [[TMP0]], 24 +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.crc.w.b.w(i32 [[CONV_I]], i32 [[B:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = tail call i32 @llvm.loongarch.crc.w.b.w(i32 [[A]], i32 [[B]]) +// CHECK-NEXT: ret i32 0 +// +int crc_w_b_w(int a, int b) { + int c = __crc_w_b_w(a, b); + int d = __builtin_loongarch_crc_w_b_w(a, b); + return 0; +} + +// CHECK-LABEL: @crc_w_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = shl i32 [[A:%.*]], 16 +// CHECK-NEXT: [[CONV_I:%.*]] = ashr exact i32 [[TMP0]], 16 +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.crc.w.h.w(i32 [[CONV_I]], i32 [[B:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = tail call i32 @llvm.loongarch.crc.w.h.w(i32 [[A]], i32 [[B]]) +// CHECK-NEXT: ret i32 0 +// +int crc_w_h_w(int a, int b) { + int c = __crc_w_h_w(a, b); + int d = __builtin_loongarch_crc_w_h_w(a, b); + return 0; +} + +// CHECK-LABEL: @crc_w_w_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.crc.w.w.w(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.crc.w.w.w(i32 [[A]], i32 [[B]]) +// CHECK-NEXT: ret i32 0 +// +int crc_w_w_w(int a, int b) { + int c = __crc_w_w_w(a, b); + int d = __builtin_loongarch_crc_w_w_w(a, b); + return 0; +} + +// CHECK-LABEL: @cacop_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.loongarch.cacop.d(i64 1, i64 [[A:%.*]], i64 1024) +// CHECK-NEXT: tail call void @llvm.loongarch.cacop.d(i64 1, i64 [[A]], i64 1024) +// CHECK-NEXT: ret void +// +void cacop_d(unsigned long int a) { + __cacop_d(1, a, 1024); + __builtin_loongarch_cacop_d(1, a, 1024); +} + +// CHECK-LABEL: @crc_w_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.crc.w.d.w(i64 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.crc.w.d.w(i64 [[A]], i32 [[B]]) +// CHECK-NEXT: ret i32 0 +// +int crc_w_d_w(long int a, int b) { + int c = __crc_w_d_w(a, b); + int d = __builtin_loongarch_crc_w_d_w(a, b); + return 0; +} + +// CHECK-LABEL: @crcc_w_b_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = shl i32 [[A:%.*]], 24 +// CHECK-NEXT: [[CONV_I:%.*]] = ashr exact i32 [[TMP0]], 24 +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.crcc.w.b.w(i32 [[CONV_I]], i32 [[B:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = tail call i32 @llvm.loongarch.crcc.w.b.w(i32 [[A]], i32 [[B]]) +// CHECK-NEXT: ret i32 0 +// +int crcc_w_b_w(int a, int b) { + int c = __crcc_w_b_w(a, b); + int d = __builtin_loongarch_crcc_w_b_w(a, b); + return 0; +} + +// CHECK-LABEL: @crcc_w_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = shl i32 [[A:%.*]], 16 +// CHECK-NEXT: [[CONV_I:%.*]] = ashr exact i32 [[TMP0]], 16 +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.crcc.w.h.w(i32 [[CONV_I]], i32 [[B:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = tail call i32 @llvm.loongarch.crcc.w.h.w(i32 [[A]], i32 [[B]]) +// CHECK-NEXT: ret i32 0 +// +int crcc_w_h_w(int a, int b) { + int c = __crcc_w_h_w(a, b); + int d = __builtin_loongarch_crcc_w_h_w(a, b); + return 0; +} + +// CHECK-LABEL: @crcc_w_w_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.crcc.w.w.w(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.crcc.w.w.w(i32 [[A]], i32 [[B]]) +// CHECK-NEXT: ret i32 0 +// +int crcc_w_w_w(int a, int b) { + int c = __crcc_w_w_w(a, b); + int d = __builtin_loongarch_crcc_w_w_w(a, b); + return 0; +} + +// CHECK-LABEL: @crcc_w_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.crcc.w.d.w(i64 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.crcc.w.d.w(i64 [[A]], i32 [[B]]) +// CHECK-NEXT: ret i32 0 +// +int crcc_w_d_w(long int a, int b) { + int c = __crcc_w_d_w(a, b); + int d = __builtin_loongarch_crcc_w_d_w(a, b); + return 0; +} + +// CHECK-LABEL: @csrrd_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.loongarch.csrrd.d(i32 1) +// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.loongarch.csrrd.d(i32 1) +// CHECK-NEXT: ret i64 0 +// +unsigned long int csrrd_d() { + unsigned long int a = __csrrd_d(1); + unsigned long int b = __builtin_loongarch_csrrd_d(1); + return 0; +} + +// CHECK-LABEL: @csrwr_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.loongarch.csrwr.d(i64 [[A:%.*]], i32 1) +// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.loongarch.csrwr.d(i64 [[A]], i32 1) +// CHECK-NEXT: ret i64 0 +// +unsigned long int csrwr_d(unsigned long int a) { + unsigned long int b = __csrwr_d(a, 1); + unsigned long int c = __builtin_loongarch_csrwr_d(a, 1); + return 0; +} + +// CHECK-LABEL: @csrxchg_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.loongarch.csrxchg.d(i64 [[A:%.*]], i64 [[B:%.*]], i32 1) +// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.loongarch.csrxchg.d(i64 [[A]], i64 [[B]], i32 1) +// CHECK-NEXT: ret i64 0 +// +unsigned long int csrxchg_d(unsigned long int a, unsigned long int b) { + unsigned long int c = __csrxchg_d(a, b, 1); + unsigned long int d = __builtin_loongarch_csrxchg_d(a, b, 1); + return 0; +} + +// CHECK-LABEL: @iocsrrd_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.iocsrrd.b(i32 [[A:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.iocsrrd.b(i32 [[A]]) +// CHECK-NEXT: ret i8 0 +// +unsigned char iocsrrd_b(unsigned int a) { + unsigned char b = __iocsrrd_b(a); + unsigned char c = __builtin_loongarch_iocsrrd_b(a); + return 0; +} + +// CHECK-LABEL: @iocsrrd_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.iocsrrd.h(i32 [[A:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.iocsrrd.h(i32 [[A]]) +// CHECK-NEXT: ret i16 0 +// +unsigned short iocsrrd_h(unsigned int a) { + unsigned short b = __iocsrrd_h(a); + unsigned short c = __builtin_loongarch_iocsrrd_h(a); + return 0; +} + +// CHECK-LABEL: @iocsrrd_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.iocsrrd.w(i32 [[A:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.iocsrrd.w(i32 [[A]]) +// CHECK-NEXT: ret i32 0 +// +unsigned int iocsrrd_w(unsigned int a) { + unsigned int b = __iocsrrd_w(a); + unsigned int c = __builtin_loongarch_iocsrrd_w(a); + return 0; +} + +// CHECK-LABEL: @iocsrwr_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[CONV_I:%.*]] = zext i8 [[A:%.*]] to i32 +// CHECK-NEXT: tail call void @llvm.loongarch.iocsrwr.b(i32 [[CONV_I]], i32 [[B:%.*]]) +// CHECK-NEXT: tail call void @llvm.loongarch.iocsrwr.b(i32 [[CONV_I]], i32 [[B]]) +// CHECK-NEXT: ret void +// +void iocsrwr_b(unsigned char a, unsigned int b) { + __iocsrwr_b(a, b); + __builtin_loongarch_iocsrwr_b(a, b); +} + +// CHECK-LABEL: @iocsrwr_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[CONV_I:%.*]] = zext i16 [[A:%.*]] to i32 +// CHECK-NEXT: tail call void @llvm.loongarch.iocsrwr.h(i32 [[CONV_I]], i32 [[B:%.*]]) +// CHECK-NEXT: tail call void @llvm.loongarch.iocsrwr.h(i32 [[CONV_I]], i32 [[B]]) +// CHECK-NEXT: ret void +// +void iocsrwr_h(unsigned short a, unsigned int b) { + __iocsrwr_h(a, b); + __builtin_loongarch_iocsrwr_h(a, b); +} + +// CHECK-LABEL: @iocsrwr_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.loongarch.iocsrwr.w(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-NEXT: tail call void @llvm.loongarch.iocsrwr.w(i32 [[A]], i32 [[B]]) +// CHECK-NEXT: ret void +// +void iocsrwr_w(unsigned int a, unsigned int b) { + __iocsrwr_w(a, b); + __builtin_loongarch_iocsrwr_w(a, b); +} + +// CHECK-LABEL: @iocsrrd_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.loongarch.iocsrrd.d(i32 [[A:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.loongarch.iocsrrd.d(i32 [[A]]) +// CHECK-NEXT: ret i64 0 +// +unsigned long int iocsrrd_d(unsigned int a) { + unsigned long int b = __iocsrrd_d(a); + unsigned long int c = __builtin_loongarch_iocsrrd_d(a); + return 0; +} + +// CHECK-LABEL: @iocsrwr_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.loongarch.iocsrwr.d(i64 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-NEXT: tail call void @llvm.loongarch.iocsrwr.d(i64 [[A]], i32 [[B]]) +// CHECK-NEXT: ret void +// +void iocsrwr_d(unsigned long int a, unsigned int b) { + __iocsrwr_d(a, b); + __builtin_loongarch_iocsrwr_d(a, b); +} + +// CHECK-LABEL: @asrtle_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.loongarch.asrtle.d(i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-NEXT: tail call void @llvm.loongarch.asrtle.d(i64 [[A]], i64 [[B]]) +// CHECK-NEXT: ret void +// +void asrtle_d(long int a, long int b) { + __asrtle_d(a, b); + __builtin_loongarch_asrtle_d(a, b); +} + +// CHECK-LABEL: @asrtgt_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.loongarch.asrtgt.d(i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-NEXT: tail call void @llvm.loongarch.asrtgt.d(i64 [[A]], i64 [[B]]) +// CHECK-NEXT: ret void +// +void asrtgt_d(long int a, long int b) { + __asrtgt_d(a, b); + __builtin_loongarch_asrtgt_d(a, b); +} + +// CHECK-LABEL: @lddir_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.loongarch.lddir.d(i64 [[A:%.*]], i64 1) +// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.loongarch.lddir.d(i64 [[A]], i64 1) +// CHECK-NEXT: ret i64 0 +// +long int lddir_d(long int a) { + long int b = __lddir_d(a, 1); + long int c = __builtin_loongarch_lddir_d(a, 1); + return 0; +} + +// CHECK-LABEL: @ldpte_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.loongarch.ldpte.d(i64 [[A:%.*]], i64 1) +// CHECK-NEXT: tail call void @llvm.loongarch.ldpte.d(i64 [[A]], i64 1) +// CHECK-NEXT: ret void +// +void ldpte_d(long int a) { + __ldpte_d(a, 1); + __builtin_loongarch_ldpte_d(a, 1); +} + +// CHECK-LABEL: @cpucfg( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.cpucfg(i32 [[A:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.cpucfg(i32 [[A]]) +// CHECK-NEXT: ret i32 0 +// +unsigned int cpucfg(unsigned int a) { + unsigned int b = __cpucfg(a); + unsigned int c = __builtin_loongarch_cpucfg(a); + return 0; +} + +// CHECK-LABEL: @rdtime_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { i64, i64 } asm sideeffect "rdtime.d $0, $1\0A\09", "=&r,=&r"() #[[ATTR1:[0-9]+]], !srcloc !2 +// CHECK-NEXT: ret void +// +void rdtime_d() { + __rdtime_d(); +} + +// CHECK-LABEL: @rdtime( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { i32, i32 } asm sideeffect "rdtimeh.w $0, $1\0A\09", "=&r,=&r"() #[[ATTR1]], !srcloc !3 +// CHECK-NEXT: [[TMP1:%.*]] = tail call { i32, i32 } asm sideeffect "rdtimel.w $0, $1\0A\09", "=&r,=&r"() #[[ATTR1]], !srcloc !4 +// CHECK-NEXT: ret void +// +void rdtime() { + __rdtimeh_w(); + __rdtimel_w(); +} + +// CHECK-LABEL: @loongarch_movfcsr2gr( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.movfcsr2gr(i32 1) +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.movfcsr2gr(i32 1) +// CHECK-NEXT: ret i32 0 +// +int loongarch_movfcsr2gr() { + int a = __movfcsr2gr(1); + int b = __builtin_loongarch_movfcsr2gr(1); + return 0; +} + +// CHECK-LABEL: @loongarch_movgr2fcsr( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.loongarch.movgr2fcsr(i32 1, i32 [[A:%.*]]) +// CHECK-NEXT: tail call void @llvm.loongarch.movgr2fcsr(i32 1, i32 [[A]]) +// CHECK-NEXT: ret void +// +void loongarch_movgr2fcsr(int a) { + __movgr2fcsr(1, a); + __builtin_loongarch_movgr2fcsr(1, a); +} diff --git a/clang/test/CodeGen/ext-int-cc.c b/clang/test/CodeGen/ext-int-cc.c index 451bec4c05aabe4d2bd970b4786fdb893936b569..e3c80d71c2bd768d77b12c9627ad0e3a6b957f6b 100644 --- a/clang/test/CodeGen/ext-int-cc.c +++ b/clang/test/CodeGen/ext-int-cc.c @@ -27,6 +27,8 @@ // RUN: %clang_cc1 -no-opaque-pointers -no-enable-noundef-analysis -triple arm64_32-apple-ios -O3 -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=AARCH64 // RUN: %clang_cc1 -no-opaque-pointers -no-enable-noundef-analysis -triple arm64_32-apple-ios -target-abi darwinpcs -O3 -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=AARCH64DARWIN // RUN: %clang_cc1 -no-opaque-pointers -no-enable-noundef-analysis -triple arm -O3 -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=ARM +// RUN: %clang_cc1 -no-opaque-pointers -no-enable-noundef-analysis -triple loongarch64 -O3 -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=LA64 +// RUN: %clang_cc1 -no-opaque-pointers -no-enable-noundef-analysis -triple loongarch32 -O3 -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=LA32 // Make sure 128 and 64 bit versions are passed like integers. void ParamPassing(_BitInt(128) b, _BitInt(64) c) {} @@ -57,6 +59,8 @@ void ParamPassing(_BitInt(128) b, _BitInt(64) c) {} // AARCH64: define{{.*}} void @ParamPassing(i128 %{{.+}}, i64 %{{.+}}) // AARCH64DARWIN: define{{.*}} void @ParamPassing(i128 %{{.+}}, i64 %{{.+}}) // ARM: define{{.*}} arm_aapcscc void @ParamPassing(i128* byval(i128) align 8 %{{.+}}, i64 %{{.+}}) +// LA64: define{{.*}} void @ParamPassing(i128 %{{.+}}, i64 %{{.+}}) +// LA32: define{{.*}} void @ParamPassing(i128* %{{.+}}, i64 %{{.+}}) void ParamPassing2(_BitInt(127) b, _BitInt(63) c) {} // LIN64: define{{.*}} void @ParamPassing2(i64 %{{.+}}, i64 %{{.+}}, i64 %{{.+}}) @@ -86,6 +90,8 @@ void ParamPassing2(_BitInt(127) b, _BitInt(63) c) {} // AARCH64: define{{.*}} void @ParamPassing2(i127 %{{.+}}, i63 %{{.+}}) // AARCH64DARWIN: define{{.*}} void @ParamPassing2(i127 %{{.+}}, i63 %{{.+}}) // ARM: define{{.*}} arm_aapcscc void @ParamPassing2(i127* byval(i127) align 8 %{{.+}}, i63 %{{.+}}) +// LA64: define{{.*}} void @ParamPassing2(i127 %{{.+}}, i63 signext %{{.+}}) +// LA32: define{{.*}} void @ParamPassing2(i127* %{{.+}}, i63 %{{.+}}) // Make sure we follow the signext rules for promotable integer types. void ParamPassing3(_BitInt(15) a, _BitInt(31) b) {} @@ -116,6 +122,8 @@ void ParamPassing3(_BitInt(15) a, _BitInt(31) b) {} // AARCH64: define{{.*}} void @ParamPassing3(i15 %{{.+}}, i31 %{{.+}}) // AARCH64DARWIN: define{{.*}} void @ParamPassing3(i15 signext %{{.+}}, i31 signext %{{.+}}) // ARM: define{{.*}} arm_aapcscc void @ParamPassing3(i15 signext %{{.+}}, i31 signext %{{.+}}) +// LA64: define{{.*}} void @ParamPassing3(i15 signext %{{.+}}, i31 signext %{{.+}}) +// LA32: define{{.*}} void @ParamPassing3(i15 signext %{{.+}}, i31 signext %{{.+}}) #if __BITINT_MAXWIDTH__ > 128 // When supported, bit-precise types that are >128 are passed indirectly. Note, @@ -150,6 +158,8 @@ void ParamPassing4(_BitInt(129) a) {} // AARCH64-NOT: define{{.*}} void @ParamPassing4(i129* byval(i129) align 8 %{{.+}}) // AARCH64DARWIN-NOT: define{{.*}} void @ParamPassing4(i129* byval(i129) align 8 %{{.+}}) // ARM-NOT: define{{.*}} arm_aapcscc void @ParamPassing4(i129* byval(i129) align 8 %{{.+}}) +// LA64-NOT: define{{.*}} void @ParamPassing4(i129* %{{.+}}) +// LA32-NOT: define{{.*}} void @ParamPassing4(i129* %{{.+}}) #endif _BitInt(63) ReturnPassing(void){} @@ -180,6 +190,8 @@ _BitInt(63) ReturnPassing(void){} // AARCH64: define{{.*}} i63 @ReturnPassing( // AARCH64DARWIN: define{{.*}} i63 @ReturnPassing( // ARM: define{{.*}} arm_aapcscc i63 @ReturnPassing( +// LA64: define{{.*}} signext i63 @ReturnPassing( +// LA32: define{{.*}} i63 @ReturnPassing( _BitInt(64) ReturnPassing2(void){} // LIN64: define{{.*}} i64 @ReturnPassing2( @@ -209,6 +221,8 @@ _BitInt(64) ReturnPassing2(void){} // AARCH64: define{{.*}} i64 @ReturnPassing2( // AARCH64DARWIN: define{{.*}} i64 @ReturnPassing2( // ARM: define{{.*}} arm_aapcscc i64 @ReturnPassing2( +// LA64: define{{.*}} i64 @ReturnPassing2( +// LA32: define{{.*}} i64 @ReturnPassing2( _BitInt(127) ReturnPassing3(void){} // LIN64: define{{.*}} { i64, i64 } @ReturnPassing3( @@ -240,6 +254,8 @@ _BitInt(127) ReturnPassing3(void){} // AARCH64: define{{.*}} i127 @ReturnPassing3( // AARCH64DARWIN: define{{.*}} i127 @ReturnPassing3( // ARM: define{{.*}} arm_aapcscc void @ReturnPassing3(i127* noalias sret +// LA64: define{{.*}} i127 @ReturnPassing3( +// LA32: define{{.*}} void @ReturnPassing3(i127* noalias sret _BitInt(128) ReturnPassing4(void){} // LIN64: define{{.*}} { i64, i64 } @ReturnPassing4( @@ -269,6 +285,8 @@ _BitInt(128) ReturnPassing4(void){} // AARCH64: define{{.*}} i128 @ReturnPassing4( // AARCH64DARWIN: define{{.*}} i128 @ReturnPassing4( // ARM: define{{.*}} arm_aapcscc void @ReturnPassing4(i128* noalias sret +// LA64: define{{.*}} i128 @ReturnPassing4( +// LA32: define{{.*}} void @ReturnPassing4(i128* noalias sret #if __BITINT_MAXWIDTH__ > 128 _BitInt(129) ReturnPassing5(void){} @@ -299,6 +317,8 @@ _BitInt(129) ReturnPassing5(void){} // AARCH64-NOT: define{{.*}} void @ReturnPassing5(i129* noalias sret // AARCH64DARWIN-NOT: define{{.*}} void @ReturnPassing5(i129* noalias sret // ARM-NOT: define{{.*}} arm_aapcscc void @ReturnPassing5(i129* noalias sret +// LA64-NOT: define{{.*}} void @ReturnPassing5(i129* noalias sret +// LA32-NOT: define{{.*}} void @ReturnPassing5(i129* noalias sret // SparcV9 is odd in that it has a return-size limit of 256, not 128 or 64 // like other platforms, so test to make sure this behavior will still work. diff --git a/clang/test/CodeGenCXX/LoongArch/abi-lp64d-struct-inherit.cpp b/clang/test/CodeGenCXX/LoongArch/abi-lp64d-struct-inherit.cpp new file mode 100644 index 0000000000000000000000000000000000000000..6d8018564bd6c72d4b66129d82c9b6952b5eb23c --- /dev/null +++ b/clang/test/CodeGenCXX/LoongArch/abi-lp64d-struct-inherit.cpp @@ -0,0 +1,95 @@ +// RUN: %clang_cc1 -triple loongarch64 -target-feature +f -target-feature +d -target-abi lp64d \ +// RUN: -emit-llvm %s -o - | FileCheck %s + +#include + +/// Ensure that fields inherited from a parent struct are treated in the same +/// way as fields directly in the child for the purposes of LoongArch ABI rules. + +struct parent1_int32_s { + int32_t i1; +}; + +struct child1_int32_s : parent1_int32_s { + int32_t i2; +}; + +// CHECK-LABEL: define{{.*}} i64 @_Z30int32_int32_struct_inheritance14child1_int32_s(i64 %a.coerce) +struct child1_int32_s int32_int32_struct_inheritance(struct child1_int32_s a) { + return a; +} + +struct parent2_int32_s { + int32_t i1; +}; + +struct child2_float_s : parent2_int32_s { + float f1; +}; + +// CHECK-LABEL: define{{.*}} { i32, float } @_Z30int32_float_struct_inheritance14child2_float_s(i32 %0, float %1) +struct child2_float_s int32_float_struct_inheritance(struct child2_float_s a) { + return a; +} + +struct parent3_float_s { + float f1; +}; + +struct child3_int64_s : parent3_float_s { + int64_t i1; +}; + +// CHECK-LABEL: define{{.*}} { float, i64 } @_Z30float_int64_struct_inheritance14child3_int64_s(float %0, i64 %1) +struct child3_int64_s float_int64_struct_inheritance(struct child3_int64_s a) { + return a; +} + +struct parent4_double_s { + double d1; +}; + +struct child4_double_s : parent4_double_s { + double d1; +}; + +// CHECK-LABEL: define{{.*}} { double, double } @_Z32double_double_struct_inheritance15child4_double_s(double %0, double %1) +struct child4_double_s double_double_struct_inheritance(struct child4_double_s a) { + return a; +} + +/// When virtual inheritance is used, the resulting struct isn't eligible for +/// passing in registers. + +struct parent5_virtual_s { + int32_t i1; +}; + +struct child5_virtual_s : virtual parent5_virtual_s { + float f1; +}; + +// CHECK-LABEL: define{{.*}} void @_ZN16child5_virtual_sC1EOS_(ptr noundef nonnull align 8 dereferenceable(12) %this, ptr noundef nonnull align 8 dereferenceable(12) %0) +struct child5_virtual_s int32_float_virtual_struct_inheritance(struct child5_virtual_s a) { + return a; +} + +/// Check for correct lowering in the presence of diamond inheritance. + +struct parent6_float_s { + float f1; +}; + +struct child6a_s : parent6_float_s { +}; + +struct child6b_s : parent6_float_s { +}; + +struct grandchild_6_s : child6a_s, child6b_s { +}; + +// CHECK-LABEL: define{{.*}} { float, float } @_Z38float_float_diamond_struct_inheritance14grandchild_6_s(float %0, float %1) +struct grandchild_6_s float_float_diamond_struct_inheritance(struct grandchild_6_s a) { + return a; +} diff --git a/clang/test/Driver/Inputs/multilib_loongarch_linux_sdk/loongarch64-unknown-linux-gnu/bin/ld b/clang/test/Driver/Inputs/multilib_loongarch_linux_sdk/loongarch64-unknown-linux-gnu/bin/ld new file mode 100644 index 0000000000000000000000000000000000000000..b23e55619b2ff03e2b4ec0a726d54266fdeeb69d --- /dev/null +++ b/clang/test/Driver/Inputs/multilib_loongarch_linux_sdk/loongarch64-unknown-linux-gnu/bin/ld @@ -0,0 +1 @@ +#!/bin/true diff --git a/clang/test/Driver/fpatchable-function-entry.c b/clang/test/Driver/fpatchable-function-entry.c index da7370a4d87a7dfaf6e85504ea1340fa0c2981dd..4d0d609584c8de537bb6a48280ff48aeba3df039 100644 --- a/clang/test/Driver/fpatchable-function-entry.c +++ b/clang/test/Driver/fpatchable-function-entry.c @@ -2,6 +2,8 @@ // RUN: %clang -target x86_64 %s -fpatchable-function-entry=1 -c -### 2>&1 | FileCheck %s // RUN: %clang -target aarch64 %s -fpatchable-function-entry=1 -c -### 2>&1 | FileCheck %s // RUN: %clang -target aarch64 %s -fpatchable-function-entry=1,0 -c -### 2>&1 | FileCheck %s +// RUN: %clang -target loongarch32 %s -fpatchable-function-entry=1,0 -c -### 2>&1 | FileCheck %s +// RUN: %clang -target loongarch64 %s -fpatchable-function-entry=1,0 -c -### 2>&1 | FileCheck %s // RUN: %clang -target riscv32 %s -fpatchable-function-entry=1,0 -c -### 2>&1 | FileCheck %s // RUN: %clang -target riscv64 %s -fpatchable-function-entry=1,0 -c -### 2>&1 | FileCheck %s // CHECK: "-fpatchable-function-entry=1" diff --git a/clang/test/Driver/frame-pointer.c b/clang/test/Driver/frame-pointer.c index 47884e4e200368260044b0d402216af1aca96727..a007fda5aaf6e19736993d1b5fec0f34022ecbfd 100644 --- a/clang/test/Driver/frame-pointer.c +++ b/clang/test/Driver/frame-pointer.c @@ -57,6 +57,18 @@ // RUN: %clang --target=riscv64-unknown-linux-gnu -### -S -O3 %s 2>&1 | FileCheck -check-prefix=CHECK3-64 %s // RUN: %clang --target=riscv64-unknown-linux-gnu -### -S -Os %s 2>&1 | FileCheck -check-prefix=CHECKs-64 %s +// RUN: %clang --target=loongarch32 -### -S -O0 %s -o %t.s 2>&1 | FileCheck -check-prefix=CHECK0-32 %s +// RUN: %clang --target=loongarch32 -### -S -O1 %s -o %t.s 2>&1 | FileCheck -check-prefix=CHECK1-32 %s +// RUN: %clang --target=loongarch32 -### -S -O2 %s -o %t.s 2>&1 | FileCheck -check-prefix=CHECK2-32 %s +// RUN: %clang --target=loongarch32 -### -S -O3 %s -o %t.s 2>&1 | FileCheck -check-prefix=CHECK3-32 %s +// RUN: %clang --target=loongarch32 -### -S -Os %s -o %t.s 2>&1 | FileCheck -check-prefix=CHECKs-32 %s + +// RUN: %clang --target=loongarch64 -### -S -O0 %s -o %t.s 2>&1 | FileCheck -check-prefix=CHECK0-64 %s +// RUN: %clang --target=loongarch64 -### -S -O1 %s -o %t.s 2>&1 | FileCheck -check-prefix=CHECK1-64 %s +// RUN: %clang --target=loongarch64 -### -S -O2 %s -o %t.s 2>&1 | FileCheck -check-prefix=CHECK2-64 %s +// RUN: %clang --target=loongarch64 -### -S -O3 %s -o %t.s 2>&1 | FileCheck -check-prefix=CHECK3-64 %s +// RUN: %clang --target=loongarch64 -### -S -Os %s -o %t.s 2>&1 | FileCheck -check-prefix=CHECKs-64 %s + // CHECK0-32: -mframe-pointer=all // CHECK1-32-NOT: -mframe-pointer=all // CHECK2-32-NOT: -mframe-pointer=all diff --git a/clang/test/Driver/fsanitize.c b/clang/test/Driver/fsanitize.c index d7fbeaa8c6f1e451c70a96384d25718a46027692..5e8f4883a2a93c59904b2b4bd829d8209ed8b736 100644 --- a/clang/test/Driver/fsanitize.c +++ b/clang/test/Driver/fsanitize.c @@ -436,6 +436,12 @@ // CHECK-SANA-SANL-NO-SANA-RISCV64: "-fsanitize=leak" // RUN: %clang -target x86_64-linux-gnu -fsanitize=memory %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-MSAN +// RUN: %clang -target loongarch64-unknown-linux-gnu -fsanitize=leak %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-SANL-LOONGARCH64 +// CHECK-SANL-LOONGARCH64: "-fsanitize=leak" + +// RUN: %clang -target loongarch64-unknown-linux-gnu -fsanitize=address,leak -fno-sanitize=address %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-SANA-SANL-NO-SANA-LOONGARCH64 +// CHECK-SANA-SANL-NO-SANA-LOONGARCH64: "-fsanitize=leak" + // CHECK-MSAN: "-fno-assume-sane-operator-new" // RUN: %clang -target x86_64-linux-gnu -fsanitize=address %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-ASAN // CHECK-ASAN: "-fno-assume-sane-operator-new" @@ -866,6 +872,7 @@ // RUN: %clang -target arm-linux-androideabi -fsanitize=scudo %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-SCUDO // RUN: %clang -target x86_64-linux-gnu -fsanitize=scudo %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-SCUDO // RUN: %clang -target i386-linux-gnu -fsanitize=scudo %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-SCUDO +// RUN: %clang -target loongarch64-unknown-linux-gnu -fsanitize=scudo %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-SCUDO // RUN: %clang -target mips64-unknown-linux-gnu -fsanitize=scudo %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-SCUDO // RUN: %clang -target mips64el-unknown-linux-gnu -fsanitize=scudo %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-SCUDO // RUN: %clang -target mips-unknown-linux-gnu -fsanitize=scudo %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-SCUDO diff --git a/clang/test/Driver/linux-header-search.cpp b/clang/test/Driver/linux-header-search.cpp index 91e4f8825e49a9dd00ed9a3262b2e523ba8d4f55..e8e84f1e4d39135e7280fab65360deb91310970f 100644 --- a/clang/test/Driver/linux-header-search.cpp +++ b/clang/test/Driver/linux-header-search.cpp @@ -244,6 +244,32 @@ // CHECK-GENTOO-4-9-X-32: "-internal-externc-isystem" "[[SYSROOT]]/include" // CHECK-GENTOO-4-9-X-32: "-internal-externc-isystem" "[[SYSROOT]]/usr/include" // +// Check header search on Debian loong64 +// RUN: %clang -### %s -fsyntax-only 2>&1 \ +// RUN: --target=loongarch64-unknown-linux-gnu -stdlib=libstdc++ \ +// RUN: --sysroot=%S/Inputs/debian_loong64_tree \ +// RUN: --gcc-toolchain="" \ +// RUN: | FileCheck --check-prefix=CHECK-LOONG64-GNU %s +// +// Check that "-gnuf64" is seen as "-gnu" for loong64. +// RUN: %clang -### %s -fsyntax-only 2>&1 \ +// RUN: --target=loongarch64-unknown-linux-gnuf64 -stdlib=libstdc++ \ +// RUN: --sysroot=%S/Inputs/debian_loong64_tree \ +// RUN: --gcc-toolchain="" \ +// RUN: | FileCheck --check-prefix=CHECK-LOONG64-GNU %s +// CHECK-LOONG64-GNU: "-cc1" +// CHECK-LOONG64-GNU: "-resource-dir" "[[RESOURCE_DIR:[^"]+]]" +// CHECK-LOONG64-GNU: "-isysroot" "[[SYSROOT:[^"]+]]" +// CHECK-LOONG64-GNU: "-internal-isystem" "[[SYSROOT]]/usr/lib/gcc/loongarch64-linux-gnu/13/../../../../include/c++/13" +// CHECK-LOONG64-GNU: "-internal-isystem" "[[SYSROOT]]/usr/lib/gcc/loongarch64-linux-gnu/13/../../../../include/c++/13/loongarch64-linux-gnu" +// CHECK-LOONG64-GNU: "-internal-isystem" "[[SYSROOT]]/usr/lib/gcc/loongarch64-linux-gnu/13/../../../../include/c++/13/backward" +// CHECK-LOONG64-GNU: "-internal-isystem" "[[RESOURCE_DIR]]{{/|\\\\}}include" +// CHECK-LOONG64-GNU: "-internal-isystem" "[[SYSROOT]]/usr/local/include" +// CHECK-LOONG64-GNU: "-internal-isystem" "[[SYSROOT]]/usr/lib/gcc/loongarch64-linux-gnu/13/../../../../loongarch64-linux-gnu/include" +// CHECK-LOONG64-GNU: "-internal-externc-isystem" "[[SYSROOT]]/usr/include/loongarch64-linux-gnu" +// CHECK-LOONG64-GNU: "-internal-externc-isystem" "[[SYSROOT]]/include" +// CHECK-LOONG64-GNU: "-internal-externc-isystem" "[[SYSROOT]]/usr/include" +// // Check header search on Debian 6 / MIPS64 // RUN: %clang -### %s -fsyntax-only 2>&1 \ // RUN: --target=mips64-unknown-linux-gnuabi64 -stdlib=libstdc++ \ diff --git a/clang/test/Driver/linux-ld.c b/clang/test/Driver/linux-ld.c index e76b35d6137c7422829e8740feb240aa0563d78a..d5de5c66adc77fc79109a9992014a5840871fda5 100644 --- a/clang/test/Driver/linux-ld.c +++ b/clang/test/Driver/linux-ld.c @@ -830,6 +830,30 @@ // CHECK-ARM-HF: "-dynamic-linker" "{{.*}}/lib/ld-linux-armhf.so.3" // // RUN: %clang -### %s -no-pie 2>&1 \ +// RUN: --target=loongarch64-linux-gnu \ +// RUN: | FileCheck --check-prefix=CHECK-LOONGARCH-LP64D %s +// RUN: %clang -### %s -no-pie 2>&1 \ +// RUN: --target=loongarch64-linux-gnuf64 \ +// RUN: | FileCheck --check-prefix=CHECK-LOONGARCH-LP64D %s +// CHECK-LOONGARCH-LP64D: "{{.*}}ld{{(.exe)?}}" +// CHECK-LOONGARCH-LP64D: "-m" "elf64loongarch" +// CHECK-LOONGARCH-LP64D: "-dynamic-linker" "{{.*}}/lib64/ld-linux-loongarch-lp64d.so.1" +// +// RUN: %clang -### %s -no-pie 2>&1 \ +// RUN: --target=loongarch64-linux-gnuf32 \ +// RUN: | FileCheck --check-prefix=CHECK-LOONGARCH-LP64F %s +// CHECK-LOONGARCH-LP64F: "{{.*}}ld{{(.exe)?}}" +// CHECK-LOONGARCH-LP64F: "-m" "elf64loongarch" +// CHECK-LOONGARCH-LP64F: "-dynamic-linker" "{{.*}}/lib64/ld-linux-loongarch-lp64f.so.1" +// +// RUN: %clang -### %s -no-pie 2>&1 \ +// RUN: --target=loongarch64-linux-gnusf \ +// RUN: | FileCheck --check-prefix=CHECK-LOONGARCH-LP64S %s +// CHECK-LOONGARCH-LP64S: "{{.*}}ld{{(.exe)?}}" +// CHECK-LOONGARCH-LP64S: "-m" "elf64loongarch" +// CHECK-LOONGARCH-LP64S: "-dynamic-linker" "{{.*}}/lib64/ld-linux-loongarch-lp64s.so.1" +// +// RUN: %clang -### %s -no-pie 2>&1 \ // RUN: --target=powerpc64-linux-gnu \ // RUN: | FileCheck --check-prefix=CHECK-PPC64 %s // CHECK-PPC64: "{{.*}}ld{{(.exe)?}}" @@ -1389,6 +1413,29 @@ // RUN: | FileCheck --check-prefix=CHECK-ANDROID-PTHREAD-LINK %s // CHECK-ANDROID-PTHREAD-LINK-NOT: argument unused during compilation: '-pthread' // +// Check linker invocation on a Debian LoongArch sysroot. +// RUN: %clang -### %s -no-pie 2>&1 \ +// RUN: --target=loongarch64-linux-gnu -rtlib=platform \ +// RUN: --gcc-toolchain="" \ +// RUN: --sysroot=%S/Inputs/debian_loong64_tree \ +// RUN: | FileCheck --check-prefix=CHECK-DEBIAN-ML-LOONG64 %s +// +// Check that "-gnuf64" is seen as "-gnu" for loong64. +// RUN: %clang -### %s -no-pie 2>&1 \ +// RUN: --target=loongarch64-linux-gnuf64 -rtlib=platform \ +// RUN: --gcc-toolchain="" \ +// RUN: --sysroot=%S/Inputs/debian_loong64_tree \ +// RUN: | FileCheck --check-prefix=CHECK-DEBIAN-ML-LOONG64 %s +// CHECK-DEBIAN-ML-LOONG64: "{{.*}}ld{{(.exe)?}}" "--sysroot=[[SYSROOT:[^"]+]]" +// CHECK-DEBIAN-ML-LOONG64: "[[SYSROOT]]/usr/lib/loongarch64-linux-gnu/crt1.o" +// CHECK-DEBIAN-ML-LOONG64: "[[SYSROOT]]/usr/lib/loongarch64-linux-gnu/crti.o" +// CHECK-DEBIAN-ML-LOONG64: "[[SYSROOT]]/usr/lib/gcc/loongarch64-linux-gnu/13/crtbegin.o" +// CHECK-DEBIAN-ML-LOONG64: "-L[[SYSROOT]]/usr/lib/gcc/loongarch64-linux-gnu/13" +// CHECK-DEBIAN-ML-LOONG64: "-L[[SYSROOT]]/usr/lib/loongarch64-linux-gnu" +// CHECK-DEBIAN-ML-LOONG64: "-L[[SYSROOT]]/usr/lib" +// CHECK-DEBIAN-ML-LOONG64: "[[SYSROOT]]/usr/lib/gcc/loongarch64-linux-gnu/13/crtend.o" +// CHECK-DEBIAN-ML-LOONG64: "[[SYSROOT]]/usr/lib/loongarch64-linux-gnu/crtn.o" +// // Check linker invocation on Debian 6 MIPS 32/64-bit. // RUN: %clang -### %s -no-pie 2>&1 \ // RUN: --target=mipsel-linux-gnu -rtlib=platform \ diff --git a/clang/test/Driver/loongarch-abi-error.c b/clang/test/Driver/loongarch-abi-error.c new file mode 100644 index 0000000000000000000000000000000000000000..2d4f4b925026bcdb82c6e13ce8f593e8f4aaafd0 --- /dev/null +++ b/clang/test/Driver/loongarch-abi-error.c @@ -0,0 +1,21 @@ +// RUN: not %clang --target=loongarch32-unknown-elf %s -fsyntax-only -mabi=lp64s 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-LA32-LP64S %s +// RUN: not %clang --target=loongarch32-unknown-elf %s -fsyntax-only -mabi=lp64f 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-LA32-LP64F %s +// RUN: not %clang --target=loongarch32-unknown-elf %s -fsyntax-only -mabi=lp64d 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-LA32-LP64D %s + +// RUN: not %clang --target=loongarch64-unknown-elf %s -fsyntax-only -mabi=ilp32s 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-LA64-ILP32S %s +// RUN: not %clang --target=loongarch64-unknown-elf %s -fsyntax-only -mabi=ilp32f 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-LA64-ILP32F %s +// RUN: not %clang --target=loongarch64-unknown-elf %s -fsyntax-only -mabi=ilp32d 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-LA64-ILP32D %s + +// CHECK-LA32-LP64S: error: unknown target ABI 'lp64s' +// CHECK-LA32-LP64F: error: unknown target ABI 'lp64f' +// CHECK-LA32-LP64D: error: unknown target ABI 'lp64d' + +// CHECK-LA64-ILP32S: error: unknown target ABI 'ilp32s' +// CHECK-LA64-ILP32F: error: unknown target ABI 'ilp32f' +// CHECK-LA64-ILP32D: error: unknown target ABI 'ilp32d' diff --git a/clang/test/Driver/loongarch-abi.c b/clang/test/Driver/loongarch-abi.c new file mode 100644 index 0000000000000000000000000000000000000000..12a81d66455849392d00d39acab26828ee7239ba --- /dev/null +++ b/clang/test/Driver/loongarch-abi.c @@ -0,0 +1,53 @@ +// RUN: %clang --target=loongarch32-unknown-elf %s -fsyntax-only -### 2>&1 \ +// RUN: | FileCheck --check-prefix=ILP32D %s +// RUN: %clang --target=loongarch32-unknown-elf %s -fsyntax-only -### -mabi=ilp32s 2>&1 \ +// RUN: | FileCheck --check-prefix=ILP32S %s +// RUN: %clang --target=loongarch32-unknown-elf %s -fsyntax-only -### -mabi=ilp32f 2>&1 \ +// RUN: | FileCheck --check-prefix=ILP32F %s +// RUN: %clang --target=loongarch32-unknown-elf %s -fsyntax-only -### -mabi=ilp32d 2>&1 \ +// RUN: | FileCheck --check-prefix=ILP32D %s + +// RUN: %clang --target=loongarch64-unknown-elf %s -fsyntax-only -### 2>&1 \ +// RUN: | FileCheck --check-prefix=LP64D %s +// RUN: %clang --target=loongarch64-unknown-elf %s -fsyntax-only -### -mabi=lp64s 2>&1 \ +// RUN: | FileCheck --check-prefix=LP64S %s +// RUN: %clang --target=loongarch64-unknown-elf %s -fsyntax-only -### -mabi=lp64f 2>&1 \ +// RUN: | FileCheck --check-prefix=LP64F %s +// RUN: %clang --target=loongarch64-unknown-elf %s -fsyntax-only -### -mabi=lp64d 2>&1 \ +// RUN: | FileCheck --check-prefix=LP64D %s + +// RUN: %clang --target=loongarch32-linux-gnusf %s -fsyntax-only -### 2>&1 \ +// RUN: | FileCheck --check-prefix=ILP32S %s +// RUN: %clang --target=loongarch32-linux-gnuf32 %s -fsyntax-only -### 2>&1 \ +// RUN: | FileCheck --check-prefix=ILP32F %s +// RUN: %clang --target=loongarch32-linux-gnuf64 %s -fsyntax-only -### 2>&1 \ +// RUN: | FileCheck --check-prefix=ILP32D %s +// RUN: %clang --target=loongarch32-linux-gnu %s -fsyntax-only -### 2>&1 \ +// RUN: | FileCheck --check-prefix=ILP32D %s + +// RUN: %clang --target=loongarch64-linux-gnusf %s -fsyntax-only -### 2>&1 \ +// RUN: | FileCheck --check-prefix=LP64S %s +// RUN: %clang --target=loongarch64-linux-gnuf32 %s -fsyntax-only -### 2>&1 \ +// RUN: | FileCheck --check-prefix=LP64F %s +// RUN: %clang --target=loongarch64-linux-gnuf64 %s -fsyntax-only -### 2>&1 \ +// RUN: | FileCheck --check-prefix=LP64D %s +// RUN: %clang --target=loongarch64-linux-gnu %s -fsyntax-only -### 2>&1 \ +// RUN: | FileCheck --check-prefix=LP64D %s + +// Check that -mabi prevails in case of conflicts with the triple-implied ABI. +// RUN: %clang --target=loongarch32-linux-gnuf64 %s -fsyntax-only -### -mabi=ilp32s 2>&1 \ +// RUN: | FileCheck --check-prefix=ILP32S %s +// RUN: %clang --target=loongarch64-linux-gnuf64 %s -fsyntax-only -### -mabi=lp64s 2>&1 \ +// RUN: | FileCheck --check-prefix=LP64S %s +// RUN: %clang --target=loongarch32-linux-gnu %s -fsyntax-only -### -mabi=ilp32s 2>&1 \ +// RUN: | FileCheck --check-prefix=ILP32S %s +// RUN: %clang --target=loongarch64-linux-gnu %s -fsyntax-only -### -mabi=lp64s 2>&1 \ +// RUN: | FileCheck --check-prefix=LP64S %s + +// ILP32S: "-target-abi" "ilp32s" +// ILP32F: "-target-abi" "ilp32f" +// ILP32D: "-target-abi" "ilp32d" + +// LP64S: "-target-abi" "lp64s" +// LP64F: "-target-abi" "lp64f" +// LP64D: "-target-abi" "lp64d" diff --git a/clang/test/Driver/loongarch-as.s b/clang/test/Driver/loongarch-as.s new file mode 100644 index 0000000000000000000000000000000000000000..6f6d87f6ac65cdf0dba8c672a33573cd97ca9b8f --- /dev/null +++ b/clang/test/Driver/loongarch-as.s @@ -0,0 +1,15 @@ +/// This file checks options are correctly passed to as for LoongArch targets. + +/// Check `-mabi`. +// RUN: %clang --target=loongarch64 -### -fno-integrated-as -c %s 2>&1 | \ +// RUN: FileCheck -DABI=lp64d --check-prefix=ABI %s +// RUN: %clang --target=loongarch64 -mabi=lp64d -### -fno-integrated-as -c %s 2>&1 | \ +// RUN: FileCheck -DABI=lp64d --check-prefix=ABI %s +// RUN: %clang --target=loongarch64 -mabi=lp64f -### -fno-integrated-as -c %s 2>&1 | \ +// RUN: FileCheck -DABI=lp64f --check-prefix=ABI %s +// RUN: %clang --target=loongarch64 -mabi=lp64s -### -fno-integrated-as -c %s 2>&1 | \ +// RUN: FileCheck -DABI=lp64s --check-prefix=ABI %s + +// ALL: as + +// ABI: "-mabi=[[ABI]]" diff --git a/clang/test/Driver/loongarch-default-features.c b/clang/test/Driver/loongarch-default-features.c new file mode 100644 index 0000000000000000000000000000000000000000..3cdf3ba3d23e14407edae2dc21f35bd1599a4fb7 --- /dev/null +++ b/clang/test/Driver/loongarch-default-features.c @@ -0,0 +1,9 @@ +// RUN: %clang --target=loongarch32 -S -emit-llvm %s -o - | FileCheck %s --check-prefix=LA32 +// RUN: %clang --target=loongarch64 -S -emit-llvm %s -o - | FileCheck %s --check-prefix=LA64 + +// LA32: "target-features"="+32bit" +// LA64: "target-features"="+64bit,+d,+f,+ual" + +int foo(void) { + return 3; +} diff --git a/clang/test/Driver/loongarch-ias.s b/clang/test/Driver/loongarch-ias.s new file mode 100644 index 0000000000000000000000000000000000000000..6fec9e6e159195b52231152a41dcc4f396249fdf --- /dev/null +++ b/clang/test/Driver/loongarch-ias.s @@ -0,0 +1,23 @@ +/// This file checks options are correctly passed to cc1as for LoongArch targets. + +/// Check `-target-abi`. +// RUN: %clang --target=loongarch32 -### -fintegrated-as -c %s 2>&1 | \ +// RUN: FileCheck -DABI=ilp32d --check-prefix=ABI %s +// RUN: %clang --target=loongarch32 -mabi=ilp32d -### -fintegrated-as -c %s 2>&1 | \ +// RUN: FileCheck -DABI=ilp32d --check-prefix=ABI %s +// RUN: %clang --target=loongarch32 -mabi=ilp32f -### -fintegrated-as -c %s 2>&1 | \ +// RUN: FileCheck -DABI=ilp32f --check-prefix=ABI %s +// RUN: %clang --target=loongarch32 -mabi=ilp32s -### -fintegrated-as -c %s 2>&1 | \ +// RUN: FileCheck -DABI=ilp32s --check-prefix=ABI %s +// RUN: %clang --target=loongarch64 -### -fintegrated-as -c %s 2>&1 | \ +// RUN: FileCheck -DABI=lp64d --check-prefix=ABI %s +// RUN: %clang --target=loongarch64 -mabi=lp64d -### -fintegrated-as -c %s 2>&1 | \ +// RUN: FileCheck -DABI=lp64d --check-prefix=ABI %s +// RUN: %clang --target=loongarch64 -mabi=lp64f -### -fintegrated-as -c %s 2>&1 | \ +// RUN: FileCheck -DABI=lp64f --check-prefix=ABI %s +// RUN: %clang --target=loongarch64 -mabi=lp64s -### -fintegrated-as -c %s 2>&1 | \ +// RUN: FileCheck -DABI=lp64s --check-prefix=ABI %s + +// ALL: -cc1as + +// ABI: "-target-abi" "[[ABI]]" diff --git a/clang/test/Driver/loongarch-march-error.c b/clang/test/Driver/loongarch-march-error.c new file mode 100644 index 0000000000000000000000000000000000000000..6bfaa18b05225902791b3ccc84a1488208874f30 --- /dev/null +++ b/clang/test/Driver/loongarch-march-error.c @@ -0,0 +1,7 @@ +// RUN: not %clang --target=loongarch64 -march=loongarch -fsyntax-only %s 2>&1 | \ +// RUN: FileCheck -DCPU=loongarch %s +// RUN: not %clang --target=loongarch64 -march=LA464 -fsyntax-only %s 2>&1 | \ +// RUN: FileCheck -DCPU=LA464 %s + +// CHECK: error: unknown target CPU '[[CPU]]' +// CHECK-NEXT: note: valid target CPU values are: {{.*}} diff --git a/clang/test/Driver/loongarch-march.c b/clang/test/Driver/loongarch-march.c new file mode 100644 index 0000000000000000000000000000000000000000..9214130cd034fd58164b4860c5199d198f21f577 --- /dev/null +++ b/clang/test/Driver/loongarch-march.c @@ -0,0 +1,27 @@ +// RUN: %clang --target=loongarch64 -march=loongarch64 -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CC1-LOONGARCH64 +// RUN: %clang --target=loongarch64 -march=la464 -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CC1-LA464 +// RUN: %clang --target=loongarch64 -march=loongarch64 -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefix=IR-LOONGARCH64 +// RUN: %clang --target=loongarch64 -march=la464 -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefix=IR-LA464 + +// CC1-LOONGARCH64: "-target-cpu" "loongarch64" +// CC1-LOONGARCH64-NOT: "-target-feature" +// CC1-LOONGARCH64: "-target-feature" "+64bit" "-target-feature" "+f" "-target-feature" "+d" "-target-feature" "+ual" +// CC1-LOONGARCH64-NOT: "-target-feature" +// CC1-LOONGARCH64: "-target-abi" "lp64d" + +// CC1-LA464: "-target-cpu" "la464" +// CC1-LA464-NOT: "-target-feature" +// CC1-LA464: "-target-feature" "+64bit" "-target-feature" "+f" "-target-feature" "+d" "-target-feature" "+lsx" "-target-feature" "+lasx" "-target-feature" "+ual" +// CC1-LA464-NOT: "-target-feature" +// CC1-LA464: "-target-abi" "lp64d" + +// IR-LOONGARCH64: attributes #[[#]] ={{.*}}"target-cpu"="loongarch64" {{.*}}"target-features"="+64bit,+d,+f,+ual" +// IR-LA464: attributes #[[#]] ={{.*}}"target-cpu"="la464" {{.*}}"target-features"="+64bit,+d,+f,+lasx,+lsx,+ual" + +int foo(void) { + return 3; +} diff --git a/clang/test/Driver/loongarch-mdouble-float.c b/clang/test/Driver/loongarch-mdouble-float.c new file mode 100644 index 0000000000000000000000000000000000000000..caa9ab42cbcfc77dd9d3ed26da19986d13f7ca5c --- /dev/null +++ b/clang/test/Driver/loongarch-mdouble-float.c @@ -0,0 +1,24 @@ +// RUN: %clang --target=loongarch64 -mdouble-float -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CC1 +// RUN: %clang --target=loongarch64 -mdouble-float -mfpu=64 -mabi=lp64d -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=CC1,NOWARN +// RUN: %clang --target=loongarch64 -mdouble-float -mfpu=0 -mabi=lp64s -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=CC1,WARN,WARN-FPU0 +// RUN: %clang --target=loongarch64 -mdouble-float -mfpu=none -mabi=lp64s -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=CC1,WARN,WARN-FPUNONE +// RUN: %clang --target=loongarch64 -mdouble-float -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefix=IR + +// NOWARN-NOT: warning: +// WARN: warning: ignoring '-mabi=lp64s' as it conflicts with that implied by '-mdouble-float' (lp64d) +// WARN-FPU0: warning: ignoring '-mfpu=0' as it conflicts with that implied by '-mdouble-float' (64) +// WARN-FPUNONE: warning: ignoring '-mfpu=none' as it conflicts with that implied by '-mdouble-float' (64) + +// CC1: "-target-feature" "+f"{{.*}} "-target-feature" "+d" +// CC1: "-target-abi" "lp64d" + +// IR: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}+d,{{(.*,)?}}+f{{(,.*)?}}" + +int foo(void) { + return 3; +} diff --git a/clang/test/Driver/loongarch-mfpu-error.c b/clang/test/Driver/loongarch-mfpu-error.c new file mode 100644 index 0000000000000000000000000000000000000000..1dbc556ddbcb45bfbd2be64040a43dbe5018369c --- /dev/null +++ b/clang/test/Driver/loongarch-mfpu-error.c @@ -0,0 +1,3 @@ +// RUN: %clang --target=loongarch64 -mfpu=xxx %s -### 2>&1 | FileCheck %s + +// CHECK: invalid argument 'xxx' to -mfpu=; must be one of: 64, 32, none, 0 (alias for none) diff --git a/clang/test/Driver/loongarch-mfpu.c b/clang/test/Driver/loongarch-mfpu.c new file mode 100644 index 0000000000000000000000000000000000000000..753057d70ceea32b37297b207b87e96e0c7877d8 --- /dev/null +++ b/clang/test/Driver/loongarch-mfpu.c @@ -0,0 +1,34 @@ +// RUN: %clang --target=loongarch64 -mfpu=64 -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CC1-FPU64 +// RUN: %clang --target=loongarch64 -mfpu=32 -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CC1-FPU32 +// RUN: %clang --target=loongarch64 -mfpu=0 -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CC1-FPU0 +// RUN: %clang --target=loongarch64 -mfpu=none -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CC1-FPU0 + +// RUN: %clang --target=loongarch64 -mfpu=64 -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefix=IR-FPU64 +// RUN: %clang --target=loongarch64 -mfpu=32 -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefix=IR-FPU32 +// RUN: %clang --target=loongarch64 -mfpu=0 -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefix=IR-FPU0 +// RUN: %clang --target=loongarch64 -mfpu=none -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefix=IR-FPU0 + +// CC1-FPU64: "-target-feature" "+f"{{.*}} "-target-feature" "+d" +// CC1-FPU64: "-target-abi" "lp64d" + +// CC1-FPU32: "-target-feature" "+f"{{.*}} "-target-feature" "-d" +// CC1-FPU32: "-target-abi" "lp64f" + +// CC1-FPU0: "-target-feature" "-f"{{.*}} "-target-feature" "-d" +// CC1-FPU0: "-target-abi" "lp64s" + +// IR-FPU64: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}+d,{{(.*,)?}}+f{{(,.*)?}}" +// IR-FPU32: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}+f,{{(.*,)?}}-d{{(,.*)?}}" +// IR-FPU0: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}-d,{{(.*,)?}}-f{{(,.*)?}}" + +int foo(void) { + return 3; +} diff --git a/clang/test/Driver/loongarch-msingle-float.c b/clang/test/Driver/loongarch-msingle-float.c new file mode 100644 index 0000000000000000000000000000000000000000..bd9b3e8a8c019d02ae154f5791192061909bd389 --- /dev/null +++ b/clang/test/Driver/loongarch-msingle-float.c @@ -0,0 +1,21 @@ +// RUN: %clang --target=loongarch64 -msingle-float -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CC1 +// RUN: %clang --target=loongarch64 -msingle-float -mfpu=32 -mabi=lp64f -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=CC1,NOWARN +// RUN: %clang --target=loongarch64 -msingle-float -mfpu=64 -mabi=lp64s -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=CC1,WARN +// RUN: %clang --target=loongarch64 -msingle-float -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefix=IR + +// NOWARN-NOT: warning: +// WARN: warning: ignoring '-mabi=lp64s' as it conflicts with that implied by '-msingle-float' (lp64f) +// WARN: warning: ignoring '-mfpu=64' as it conflicts with that implied by '-msingle-float' (32) + +// CC1: "-target-feature" "+f"{{.*}} "-target-feature" "-d" +// CC1: "-target-abi" "lp64f" + +// IR: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}+f,{{(.*,)?}}-d" + +int foo(void) { + return 3; +} diff --git a/clang/test/Driver/loongarch-msoft-float.c b/clang/test/Driver/loongarch-msoft-float.c new file mode 100644 index 0000000000000000000000000000000000000000..0e5121ac84b4c1ca51eee930b8fea47a28f4ccdb --- /dev/null +++ b/clang/test/Driver/loongarch-msoft-float.c @@ -0,0 +1,21 @@ +// RUN: %clang --target=loongarch64 -msoft-float -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CC1 +// RUN: %clang --target=loongarch64 -msoft-float -mfpu=0 -mabi=lp64s -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=CC1,NOWARN +// RUN: %clang --target=loongarch64 -msoft-float -mfpu=64 -mabi=lp64d -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=CC1,WARN +// RUN: %clang --target=loongarch64 -msoft-float -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefix=IR + +// NOWARN-NOT: warning: +// WARN: warning: ignoring '-mabi=lp64d' as it conflicts with that implied by '-msoft-float' (lp64s) +// WARN: warning: ignoring '-mfpu=64' as it conflicts with that implied by '-msoft-float' (0) + +// CC1: "-target-feature" "-f"{{.*}} "-target-feature" "-d" +// CC1: "-target-abi" "lp64s" + +// IR: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}-d,{{(.*,)?}}-f{{(,.*)?}}" + +int foo(void) { + return 3; +} diff --git a/clang/test/Driver/loongarch-mtune.c b/clang/test/Driver/loongarch-mtune.c new file mode 100644 index 0000000000000000000000000000000000000000..6f3f39e9bbd86a351d161bc2bc213783a612829f --- /dev/null +++ b/clang/test/Driver/loongarch-mtune.c @@ -0,0 +1,34 @@ +// RUN: %clang --target=loongarch64 -mtune=loongarch64 -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CC1ARG -DCPU=loongarch64 +// RUN: %clang --target=loongarch64 -mtune=loongarch64 -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefix=IRATTR -DCPU=loongarch64 + +// RUN: %clang --target=loongarch64 -mtune=la464 -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CC1ARG -DCPU=la464 +// RUN: %clang --target=loongarch64 -mtune=la464 -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefix=IRATTR -DCPU=la464 + +// RUN: %clang --target=loongarch64 -mtune=invalidcpu -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CC1ARG -DCPU=invalidcpu +// RUN: not %clang --target=loongarch64 -mtune=invalidcpu -S -emit-llvm %s -o /dev/null 2>&1 | \ +// RUN: FileCheck %s --check-prefix=ERROR -DCPU=invalidcpu + +// RUN: %clang --target=loongarch64 -mtune=generic -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CC1ARG -DCPU=generic +// RUN: not %clang --target=loongarch64 -mtune=generic -S -emit-llvm %s -o /dev/null 2>&1 | \ +// RUN: FileCheck %s --check-prefix=ERROR -DCPU=generic + +// RUN: %clang --target=loongarch64 -mtune=generic-la64 -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CC1ARG -DCPU=generic-la64 +// RUN: not %clang --target=loongarch64 -mtune=generic-la64 -S -emit-llvm %s -o /dev/null 2>&1 | \ +// RUN: FileCheck %s --check-prefix=ERROR -DCPU=generic-la64 + +// CC1ARG: "-tune-cpu" "[[CPU]]" +// IRATTR: "tune-cpu"="[[CPU]]" + +// ERROR: error: unknown target CPU '[[CPU]]' +// ERROR-NEXT: note: valid target CPU values are: {{.*}} + +int foo(void) { + return 3; +} diff --git a/clang/test/Driver/loongarch-munaligned-access.c b/clang/test/Driver/loongarch-munaligned-access.c new file mode 100644 index 0000000000000000000000000000000000000000..44edb2eb17e6ab603136b1e0db795562194e0693 --- /dev/null +++ b/clang/test/Driver/loongarch-munaligned-access.c @@ -0,0 +1,61 @@ +/// Test -m[no-]unaligned-access and -m[no-]strict-align options. + +// RUN: %clang --target=loongarch64 -munaligned-access -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CC1-UNALIGNED +// RUN: %clang --target=loongarch64 -mno-unaligned-access -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CC1-NO-UNALIGNED +// RUN: %clang --target=loongarch64 -mstrict-align -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CC1-NO-UNALIGNED +// RUN: %clang --target=loongarch64 -mno-strict-align -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CC1-UNALIGNED +// RUN: %clang --target=loongarch64 -munaligned-access -mno-unaligned-access -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CC1-NO-UNALIGNED +// RUN: %clang --target=loongarch64 -mno-unaligned-access -munaligned-access -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CC1-UNALIGNED +// RUN: %clang --target=loongarch64 -mstrict-align -mno-strict-align -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CC1-UNALIGNED +// RUN: %clang --target=loongarch64 -mno-strict-align -mstrict-align -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CC1-NO-UNALIGNED +// RUN: %clang --target=loongarch64 -munaligned-access -mstrict-align -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CC1-NO-UNALIGNED +// RUN: %clang --target=loongarch64 -mstrict-align -munaligned-access -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CC1-UNALIGNED +// RUN: %clang --target=loongarch64 -mno-unaligned-access -mno-strict-align -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CC1-UNALIGNED +// RUN: %clang --target=loongarch64 -mno-strict-align -mno-unaligned-access -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CC1-NO-UNALIGNED + +// RUN: %clang --target=loongarch64 -munaligned-access -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefix=IR-UNALIGNED +// RUN: %clang --target=loongarch64 -mno-unaligned-access -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefix=IR-NO-UNALIGNED +// RUN: %clang --target=loongarch64 -mstrict-align -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefix=IR-NO-UNALIGNED +// RUN: %clang --target=loongarch64 -mno-strict-align -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefix=IR-UNALIGNED +// RUN: %clang --target=loongarch64 -munaligned-access -mno-unaligned-access -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefix=IR-NO-UNALIGNED +// RUN: %clang --target=loongarch64 -mno-unaligned-access -munaligned-access -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefix=IR-UNALIGNED +// RUN: %clang --target=loongarch64 -mstrict-align -mno-strict-align -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefix=IR-UNALIGNED +// RUN: %clang --target=loongarch64 -mno-strict-align -mstrict-align -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefix=IR-NO-UNALIGNED +// RUN: %clang --target=loongarch64 -munaligned-access -mstrict-align -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefix=IR-NO-UNALIGNED +// RUN: %clang --target=loongarch64 -mstrict-align -munaligned-access -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefix=IR-UNALIGNED +// RUN: %clang --target=loongarch64 -mno-unaligned-access -mno-strict-align -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefix=IR-UNALIGNED +// RUN: %clang --target=loongarch64 -mno-strict-align -mno-unaligned-access -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefix=IR-NO-UNALIGNED + +// CC1-UNALIGNED: "-target-feature" "+ual" +// CC1-NO-UNALIGNED: "-target-feature" "-ual" + +// IR-UNALIGNED: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}+ual{{(,.*)?}}" +// IR-NO-UNALIGNED: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}-ual{{(,.*)?}}" + +int foo(void) { + return 3; +} diff --git a/clang/test/Driver/loongarch-toolchain.c b/clang/test/Driver/loongarch-toolchain.c new file mode 100644 index 0000000000000000000000000000000000000000..2e6d360acf29df52cb5b433bfb9fed8082133f4f --- /dev/null +++ b/clang/test/Driver/loongarch-toolchain.c @@ -0,0 +1,27 @@ +// UNSUPPORTED: system-windows +/// A basic clang -cc1 command-line, and simple environment check. + +// RUN: %clang %s -### --target=loongarch32 2>&1 | FileCheck --check-prefix=CC1 %s -DTRIPLE=loongarch32 +// RUN: %clang %s -### --target=loongarch64 2>&1 | FileCheck --check-prefix=CC1 %s -DTRIPLE=loongarch64 + +// CC1: "-cc1" "-triple" "[[TRIPLE]]" + +/// In the below tests, --rtlib=platform is used so that the driver ignores +/// the configure-time CLANG_DEFAULT_RTLIB option when choosing the runtime lib. + +// RUN: env "PATH=" %clang -### %s -fuse-ld=ld -no-pie -mabi=lp64d \ +// RUN: --target=loongarch64-unknown-linux-gnu --rtlib=platform \ +// RUN: --gcc-toolchain=%S/Inputs/multilib_loongarch_linux_sdk \ +// RUN: --sysroot=%S/Inputs/multilib_loongarch_linux_sdk/sysroot 2>&1 \ +// RUN: | FileCheck --check-prefix=LA64 %s + +// LA64: "{{.*}}/Inputs/multilib_loongarch_linux_sdk/lib/gcc/loongarch64-unknown-linux-gnu/12.1.0/../../../../loongarch64-unknown-linux-gnu/bin/ld" +// LA64-SAME: {{^}} "--sysroot={{.*}}/Inputs/multilib_loongarch_linux_sdk/sysroot" +// LA64-SAME: "-m" "elf64loongarch" +// LA64-SAME: "-dynamic-linker" "/lib64/ld-linux-loongarch-lp64d.so.1" +// LA64-SAME: "{{.*}}/Inputs/multilib_loongarch_linux_sdk/lib/gcc/loongarch64-unknown-linux-gnu/12.1.0/crtbegin.o" +// LA64-SAME: "-L{{.*}}/Inputs/multilib_loongarch_linux_sdk/lib/gcc/loongarch64-unknown-linux-gnu/12.1.0" +// LA64-SAME: {{^}} "-L{{.*}}/Inputs/multilib_loongarch_linux_sdk/lib/gcc/loongarch64-unknown-linux-gnu/12.1.0/../../../../loongarch64-unknown-linux-gnu/lib/../lib64" +// LA64-SAME: {{^}} "-L{{.*}}/Inputs/multilib_loongarch_linux_sdk/sysroot/usr/lib/../lib64" +// LA64-SAME: {{^}} "-L{{.*}}/Inputs/multilib_loongarch_linux_sdk/lib/gcc/loongarch64-unknown-linux-gnu/12.1.0/../../../../loongarch64-unknown-linux-gnu/lib" +// LA64-SAME: {{^}} "-L{{.*}}/Inputs/multilib_loongarch_linux_sdk/sysroot/usr/lib" diff --git a/clang/test/Driver/munaligned-access-unused.c b/clang/test/Driver/munaligned-access-unused.c new file mode 100644 index 0000000000000000000000000000000000000000..c8cbe4e0df4ada814020791c1cb8279a804c8bf9 --- /dev/null +++ b/clang/test/Driver/munaligned-access-unused.c @@ -0,0 +1,8 @@ +/// Check -m[no-]unaligned-access and -m[no-]strict-align are warned unused on a target that does not support them. + +// RUN: %clang --target=x86_64 -munaligned-access -fsyntax-only %s -### 2>&1 | FileCheck %s -DOPTION=unaligned-access +// RUN: %clang --target=x86_64 -mno-unaligned-access -fsyntax-only %s -### 2>&1 | FileCheck %s -DOPTION=no-unaligned-access +// RUN: %clang --target=x86_64 -mstrict-align -fsyntax-only %s -### 2>&1 | FileCheck %s -DOPTION=strict-align +// RUN: %clang --target=x86_64 -mno-strict-align -fsyntax-only %s -### 2>&1 | FileCheck %s -DOPTION=no-strict-align + +// CHECK: warning: argument unused during compilation: '-m[[OPTION]]' [-Wunused-command-line-argument] diff --git a/clang/test/Preprocessor/init-loongarch.c b/clang/test/Preprocessor/init-loongarch.c new file mode 100644 index 0000000000000000000000000000000000000000..4ef42a921ec0339cf465f7e5a5b057f898b285e7 --- /dev/null +++ b/clang/test/Preprocessor/init-loongarch.c @@ -0,0 +1,809 @@ +// RUN: %clang_cc1 -E -dM -ffreestanding -fgnuc-version=4.2.1 -triple loongarch32 /dev/null \ +// RUN: | FileCheck --match-full-lines --check-prefix=LA32 %s +// RUN: %clang_cc1 -E -dM -ffreestanding -fgnuc-version=4.2.1 -triple loongarch32-unknown-linux /dev/null \ +// RUN: | FileCheck --match-full-lines --check-prefixes=LA32,LA32-LINUX %s +// RUN: %clang_cc1 -E -dM -ffreestanding -fgnuc-version=4.2.1 -triple loongarch32 \ +// RUN: -fforce-enable-int128 /dev/null | FileCheck --match-full-lines \ +// RUN: --check-prefixes=LA32,LA32-INT128 %s + +// RUN: %clang_cc1 -E -dM -ffreestanding -fgnuc-version=4.2.1 -triple loongarch64 /dev/null \ +// RUN: | FileCheck --match-full-lines --check-prefix=LA64 %s +// RUN: %clang_cc1 -E -dM -ffreestanding -fgnuc-version=4.2.1 -triple loongarch64-unknown-linux /dev/null \ +// RUN: | FileCheck --match-full-lines --check-prefixes=LA64,LA64-LINUX %s +// RUN: %clang_cc1 -E -dM -ffreestanding -fgnuc-version=4.2.1 -triple loongarch64 \ +// RUN: -fforce-enable-int128 /dev/null | FileCheck --match-full-lines \ +// RUN: --check-prefixes=LA64,LA64-INT128 %s + +//// Note that common macros are tested in init.c, such as __VERSION__. So they're not listed here. + +// LA32: #define _ILP32 1 +// LA32: #define __ATOMIC_ACQUIRE 2 +// LA32-NEXT: #define __ATOMIC_ACQ_REL 4 +// LA32-NEXT: #define __ATOMIC_CONSUME 1 +// LA32-NEXT: #define __ATOMIC_RELAXED 0 +// LA32-NEXT: #define __ATOMIC_RELEASE 3 +// LA32-NEXT: #define __ATOMIC_SEQ_CST 5 +// LA32: #define __BIGGEST_ALIGNMENT__ 16 +// LA32: #define __BITINT_MAXWIDTH__ 128 +// LA32: #define __BOOL_WIDTH__ 8 +// LA32: #define __BYTE_ORDER__ __ORDER_LITTLE_ENDIAN__ +// LA32: #define __CHAR16_TYPE__ unsigned short +// LA32: #define __CHAR32_TYPE__ unsigned int +// LA32: #define __CHAR_BIT__ 8 +// LA32: #define __CLANG_ATOMIC_BOOL_LOCK_FREE 2 +// LA32: #define __CLANG_ATOMIC_CHAR16_T_LOCK_FREE 2 +// LA32: #define __CLANG_ATOMIC_CHAR32_T_LOCK_FREE 2 +// LA32: #define __CLANG_ATOMIC_CHAR_LOCK_FREE 2 +// LA32: #define __CLANG_ATOMIC_INT_LOCK_FREE 2 +// LA32: #define __CLANG_ATOMIC_LLONG_LOCK_FREE 1 +// LA32: #define __CLANG_ATOMIC_LONG_LOCK_FREE 2 +// LA32: #define __CLANG_ATOMIC_POINTER_LOCK_FREE 2 +// LA32: #define __CLANG_ATOMIC_SHORT_LOCK_FREE 2 +// LA32: #define __CLANG_ATOMIC_WCHAR_T_LOCK_FREE 2 +// LA32: #define __DBL_DECIMAL_DIG__ 17 +// LA32: #define __DBL_DENORM_MIN__ 4.9406564584124654e-324 +// LA32: #define __DBL_DIG__ 15 +// LA32: #define __DBL_EPSILON__ 2.2204460492503131e-16 +// LA32: #define __DBL_HAS_DENORM__ 1 +// LA32: #define __DBL_HAS_INFINITY__ 1 +// LA32: #define __DBL_HAS_QUIET_NAN__ 1 +// LA32: #define __DBL_MANT_DIG__ 53 +// LA32: #define __DBL_MAX_10_EXP__ 308 +// LA32: #define __DBL_MAX_EXP__ 1024 +// LA32: #define __DBL_MAX__ 1.7976931348623157e+308 +// LA32: #define __DBL_MIN_10_EXP__ (-307) +// LA32: #define __DBL_MIN_EXP__ (-1021) +// LA32: #define __DBL_MIN__ 2.2250738585072014e-308 +// LA32: #define __DECIMAL_DIG__ __LDBL_DECIMAL_DIG__ +// LA32: #define __FLT_DECIMAL_DIG__ 9 +// LA32: #define __FLT_DENORM_MIN__ 1.40129846e-45F +// LA32: #define __FLT_DIG__ 6 +// LA32: #define __FLT_EPSILON__ 1.19209290e-7F +// LA32: #define __FLT_HAS_DENORM__ 1 +// LA32: #define __FLT_HAS_INFINITY__ 1 +// LA32: #define __FLT_HAS_QUIET_NAN__ 1 +// LA32: #define __FLT_MANT_DIG__ 24 +// LA32: #define __FLT_MAX_10_EXP__ 38 +// LA32: #define __FLT_MAX_EXP__ 128 +// LA32: #define __FLT_MAX__ 3.40282347e+38F +// LA32: #define __FLT_MIN_10_EXP__ (-37) +// LA32: #define __FLT_MIN_EXP__ (-125) +// LA32: #define __FLT_MIN__ 1.17549435e-38F +// LA32: #define __FLT_RADIX__ 2 +// LA32: #define __GCC_ATOMIC_BOOL_LOCK_FREE 2 +// LA32: #define __GCC_ATOMIC_CHAR16_T_LOCK_FREE 2 +// LA32: #define __GCC_ATOMIC_CHAR32_T_LOCK_FREE 2 +// LA32: #define __GCC_ATOMIC_CHAR_LOCK_FREE 2 +// LA32: #define __GCC_ATOMIC_INT_LOCK_FREE 2 +// LA32: #define __GCC_ATOMIC_LLONG_LOCK_FREE 1 +// LA32: #define __GCC_ATOMIC_LONG_LOCK_FREE 2 +// LA32: #define __GCC_ATOMIC_POINTER_LOCK_FREE 2 +// LA32: #define __GCC_ATOMIC_SHORT_LOCK_FREE 2 +// LA32: #define __GCC_ATOMIC_TEST_AND_SET_TRUEVAL 1 +// LA32: #define __GCC_ATOMIC_WCHAR_T_LOCK_FREE 2 +// LA32: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_1 1 +// LA32: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_2 1 +// LA32: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4 1 +// LA32: #define __ILP32__ 1 +// LA32: #define __INT16_C_SUFFIX__ +// LA32: #define __INT16_FMTd__ "hd" +// LA32: #define __INT16_FMTi__ "hi" +// LA32: #define __INT16_MAX__ 32767 +// LA32: #define __INT16_TYPE__ short +// LA32: #define __INT32_C_SUFFIX__ +// LA32: #define __INT32_FMTd__ "d" +// LA32: #define __INT32_FMTi__ "i" +// LA32: #define __INT32_MAX__ 2147483647 +// LA32: #define __INT32_TYPE__ int +// LA32: #define __INT64_C_SUFFIX__ LL +// LA32: #define __INT64_FMTd__ "lld" +// LA32: #define __INT64_FMTi__ "lli" +// LA32: #define __INT64_MAX__ 9223372036854775807LL +// LA32: #define __INT64_TYPE__ long long int +// LA32: #define __INT8_C_SUFFIX__ +// LA32: #define __INT8_FMTd__ "hhd" +// LA32: #define __INT8_FMTi__ "hhi" +// LA32: #define __INT8_MAX__ 127 +// LA32: #define __INT8_TYPE__ signed char +// LA32: #define __INTMAX_C_SUFFIX__ LL +// LA32: #define __INTMAX_FMTd__ "lld" +// LA32: #define __INTMAX_FMTi__ "lli" +// LA32: #define __INTMAX_MAX__ 9223372036854775807LL +// LA32: #define __INTMAX_TYPE__ long long int +// LA32: #define __INTMAX_WIDTH__ 64 +// LA32: #define __INTPTR_FMTd__ "d" +// LA32: #define __INTPTR_FMTi__ "i" +// LA32: #define __INTPTR_MAX__ 2147483647 +// LA32: #define __INTPTR_TYPE__ int +// LA32: #define __INTPTR_WIDTH__ 32 +// LA32: #define __INT_FAST16_FMTd__ "hd" +// LA32: #define __INT_FAST16_FMTi__ "hi" +// LA32: #define __INT_FAST16_MAX__ 32767 +// LA32: #define __INT_FAST16_TYPE__ short +// LA32: #define __INT_FAST16_WIDTH__ 16 +// LA32: #define __INT_FAST32_FMTd__ "d" +// LA32: #define __INT_FAST32_FMTi__ "i" +// LA32: #define __INT_FAST32_MAX__ 2147483647 +// LA32: #define __INT_FAST32_TYPE__ int +// LA32: #define __INT_FAST32_WIDTH__ 32 +// LA32: #define __INT_FAST64_FMTd__ "lld" +// LA32: #define __INT_FAST64_FMTi__ "lli" +// LA32: #define __INT_FAST64_MAX__ 9223372036854775807LL +// LA32: #define __INT_FAST64_TYPE__ long long int +// LA32: #define __INT_FAST64_WIDTH__ 64 +// LA32: #define __INT_FAST8_FMTd__ "hhd" +// LA32: #define __INT_FAST8_FMTi__ "hhi" +// LA32: #define __INT_FAST8_MAX__ 127 +// LA32: #define __INT_FAST8_TYPE__ signed char +// LA32: #define __INT_FAST8_WIDTH__ 8 +// LA32: #define __INT_LEAST16_FMTd__ "hd" +// LA32: #define __INT_LEAST16_FMTi__ "hi" +// LA32: #define __INT_LEAST16_MAX__ 32767 +// LA32: #define __INT_LEAST16_TYPE__ short +// LA32: #define __INT_LEAST16_WIDTH__ 16 +// LA32: #define __INT_LEAST32_FMTd__ "d" +// LA32: #define __INT_LEAST32_FMTi__ "i" +// LA32: #define __INT_LEAST32_MAX__ 2147483647 +// LA32: #define __INT_LEAST32_TYPE__ int +// LA32: #define __INT_LEAST32_WIDTH__ 32 +// LA32: #define __INT_LEAST64_FMTd__ "lld" +// LA32: #define __INT_LEAST64_FMTi__ "lli" +// LA32: #define __INT_LEAST64_MAX__ 9223372036854775807LL +// LA32: #define __INT_LEAST64_TYPE__ long long int +// LA32: #define __INT_LEAST64_WIDTH__ 64 +// LA32: #define __INT_LEAST8_FMTd__ "hhd" +// LA32: #define __INT_LEAST8_FMTi__ "hhi" +// LA32: #define __INT_LEAST8_MAX__ 127 +// LA32: #define __INT_LEAST8_TYPE__ signed char +// LA32: #define __INT_LEAST8_WIDTH__ 8 +// LA32: #define __INT_MAX__ 2147483647 +// LA32: #define __INT_WIDTH__ 32 +// LA32: #define __LDBL_DECIMAL_DIG__ 36 +// LA32: #define __LDBL_DENORM_MIN__ 6.47517511943802511092443895822764655e-4966L +// LA32: #define __LDBL_DIG__ 33 +// LA32: #define __LDBL_EPSILON__ 1.92592994438723585305597794258492732e-34L +// LA32: #define __LDBL_HAS_DENORM__ 1 +// LA32: #define __LDBL_HAS_INFINITY__ 1 +// LA32: #define __LDBL_HAS_QUIET_NAN__ 1 +// LA32: #define __LDBL_MANT_DIG__ 113 +// LA32: #define __LDBL_MAX_10_EXP__ 4932 +// LA32: #define __LDBL_MAX_EXP__ 16384 +// LA32: #define __LDBL_MAX__ 1.18973149535723176508575932662800702e+4932L +// LA32: #define __LDBL_MIN_10_EXP__ (-4931) +// LA32: #define __LDBL_MIN_EXP__ (-16381) +// LA32: #define __LDBL_MIN__ 3.36210314311209350626267781732175260e-4932L +// LA32: #define __LITTLE_ENDIAN__ 1 +// LA32: #define __LLONG_WIDTH__ 64 +// LA32: #define __LONG_LONG_MAX__ 9223372036854775807LL +// LA32: #define __LONG_MAX__ 2147483647L +// LA32: #define __LONG_WIDTH__ 32 +// LA32: #define __NO_INLINE__ 1 +// LA32: #define __NO_MATH_ERRNO__ 1 +// LA32: #define __OBJC_BOOL_IS_BOOL 0 +// LA32: #define __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES 3 +// LA32: #define __OPENCL_MEMORY_SCOPE_DEVICE 2 +// LA32: #define __OPENCL_MEMORY_SCOPE_SUB_GROUP 4 +// LA32: #define __OPENCL_MEMORY_SCOPE_WORK_GROUP 1 +// LA32: #define __OPENCL_MEMORY_SCOPE_WORK_ITEM 0 +// LA32: #define __POINTER_WIDTH__ 32 +// LA32: #define __PRAGMA_REDEFINE_EXTNAME 1 +// LA32: #define __PTRDIFF_FMTd__ "d" +// LA32: #define __PTRDIFF_FMTi__ "i" +// LA32: #define __PTRDIFF_MAX__ 2147483647 +// LA32: #define __PTRDIFF_TYPE__ int +// LA32: #define __PTRDIFF_WIDTH__ 32 +// LA32: #define __SCHAR_MAX__ 127 +// LA32: #define __SHRT_MAX__ 32767 +// LA32: #define __SHRT_WIDTH__ 16 +// LA32: #define __SIG_ATOMIC_MAX__ 2147483647 +// LA32: #define __SIG_ATOMIC_WIDTH__ 32 +// LA32: #define __SIZEOF_DOUBLE__ 8 +// LA32: #define __SIZEOF_FLOAT__ 4 +// LA32-INT128: #define __SIZEOF_INT128__ 16 +// LA32: #define __SIZEOF_INT__ 4 +// LA32: #define __SIZEOF_LONG_DOUBLE__ 16 +// LA32: #define __SIZEOF_LONG_LONG__ 8 +// LA32: #define __SIZEOF_LONG__ 4 +// LA32: #define __SIZEOF_POINTER__ 4 +// LA32: #define __SIZEOF_PTRDIFF_T__ 4 +// LA32: #define __SIZEOF_SHORT__ 2 +// LA32: #define __SIZEOF_SIZE_T__ 4 +// LA32: #define __SIZEOF_WCHAR_T__ 4 +// LA32: #define __SIZEOF_WINT_T__ 4 +// LA32: #define __SIZE_FMTX__ "X" +// LA32: #define __SIZE_FMTo__ "o" +// LA32: #define __SIZE_FMTu__ "u" +// LA32: #define __SIZE_FMTx__ "x" +// LA32: #define __SIZE_MAX__ 4294967295U +// LA32: #define __SIZE_TYPE__ unsigned int +// LA32: #define __SIZE_WIDTH__ 32 +// LA32: #define __STDC_HOSTED__ 0 +// LA32: #define __STDC_UTF_16__ 1 +// LA32: #define __STDC_UTF_32__ 1 +// LA32: #define __STDC_VERSION__ 201710L +// LA32: #define __STDC__ 1 +// LA32: #define __UINT16_C_SUFFIX__ +// LA32: #define __UINT16_FMTX__ "hX" +// LA32: #define __UINT16_FMTo__ "ho" +// LA32: #define __UINT16_FMTu__ "hu" +// LA32: #define __UINT16_FMTx__ "hx" +// LA32: #define __UINT16_MAX__ 65535 +// LA32: #define __UINT16_TYPE__ unsigned short +// LA32: #define __UINT32_C_SUFFIX__ U +// LA32: #define __UINT32_FMTX__ "X" +// LA32: #define __UINT32_FMTo__ "o" +// LA32: #define __UINT32_FMTu__ "u" +// LA32: #define __UINT32_FMTx__ "x" +// LA32: #define __UINT32_MAX__ 4294967295U +// LA32: #define __UINT32_TYPE__ unsigned int +// LA32: #define __UINT64_C_SUFFIX__ ULL +// LA32: #define __UINT64_FMTX__ "llX" +// LA32: #define __UINT64_FMTo__ "llo" +// LA32: #define __UINT64_FMTu__ "llu" +// LA32: #define __UINT64_FMTx__ "llx" +// LA32: #define __UINT64_MAX__ 18446744073709551615ULL +// LA32: #define __UINT64_TYPE__ long long unsigned int +// LA32: #define __UINT8_C_SUFFIX__ +// LA32: #define __UINT8_FMTX__ "hhX" +// LA32: #define __UINT8_FMTo__ "hho" +// LA32: #define __UINT8_FMTu__ "hhu" +// LA32: #define __UINT8_FMTx__ "hhx" +// LA32: #define __UINT8_MAX__ 255 +// LA32: #define __UINT8_TYPE__ unsigned char +// LA32: #define __UINTMAX_C_SUFFIX__ ULL +// LA32: #define __UINTMAX_FMTX__ "llX" +// LA32: #define __UINTMAX_FMTo__ "llo" +// LA32: #define __UINTMAX_FMTu__ "llu" +// LA32: #define __UINTMAX_FMTx__ "llx" +// LA32: #define __UINTMAX_MAX__ 18446744073709551615ULL +// LA32: #define __UINTMAX_TYPE__ long long unsigned int +// LA32: #define __UINTMAX_WIDTH__ 64 +// LA32: #define __UINTPTR_FMTX__ "X" +// LA32: #define __UINTPTR_FMTo__ "o" +// LA32: #define __UINTPTR_FMTu__ "u" +// LA32: #define __UINTPTR_FMTx__ "x" +// LA32: #define __UINTPTR_MAX__ 4294967295U +// LA32: #define __UINTPTR_TYPE__ unsigned int +// LA32: #define __UINTPTR_WIDTH__ 32 +// LA32: #define __UINT_FAST16_FMTX__ "hX" +// LA32: #define __UINT_FAST16_FMTo__ "ho" +// LA32: #define __UINT_FAST16_FMTu__ "hu" +// LA32: #define __UINT_FAST16_FMTx__ "hx" +// LA32: #define __UINT_FAST16_MAX__ 65535 +// TODO: LoongArch GCC defines UINT_FAST16 to be long unsigned int +// LA32: #define __UINT_FAST16_TYPE__ unsigned short +// LA32: #define __UINT_FAST32_FMTX__ "X" +// LA32: #define __UINT_FAST32_FMTo__ "o" +// LA32: #define __UINT_FAST32_FMTu__ "u" +// LA32: #define __UINT_FAST32_FMTx__ "x" +// LA32: #define __UINT_FAST32_MAX__ 4294967295U +// LA32: #define __UINT_FAST32_TYPE__ unsigned int +// LA32: #define __UINT_FAST64_FMTX__ "llX" +// LA32: #define __UINT_FAST64_FMTo__ "llo" +// LA32: #define __UINT_FAST64_FMTu__ "llu" +// LA32: #define __UINT_FAST64_FMTx__ "llx" +// LA32: #define __UINT_FAST64_MAX__ 18446744073709551615ULL +// LA32: #define __UINT_FAST64_TYPE__ long long unsigned int +// LA32: #define __UINT_FAST8_FMTX__ "hhX" +// LA32: #define __UINT_FAST8_FMTo__ "hho" +// LA32: #define __UINT_FAST8_FMTu__ "hhu" +// LA32: #define __UINT_FAST8_FMTx__ "hhx" +// LA32: #define __UINT_FAST8_MAX__ 255 +// LA32: #define __UINT_FAST8_TYPE__ unsigned char +// LA32: #define __UINT_LEAST16_FMTX__ "hX" +// LA32: #define __UINT_LEAST16_FMTo__ "ho" +// LA32: #define __UINT_LEAST16_FMTu__ "hu" +// LA32: #define __UINT_LEAST16_FMTx__ "hx" +// LA32: #define __UINT_LEAST16_MAX__ 65535 +// LA32: #define __UINT_LEAST16_TYPE__ unsigned short +// LA32: #define __UINT_LEAST32_FMTX__ "X" +// LA32: #define __UINT_LEAST32_FMTo__ "o" +// LA32: #define __UINT_LEAST32_FMTu__ "u" +// LA32: #define __UINT_LEAST32_FMTx__ "x" +// LA32: #define __UINT_LEAST32_MAX__ 4294967295U +// LA32: #define __UINT_LEAST32_TYPE__ unsigned int +// LA32: #define __UINT_LEAST64_FMTX__ "llX" +// LA32: #define __UINT_LEAST64_FMTo__ "llo" +// LA32: #define __UINT_LEAST64_FMTu__ "llu" +// LA32: #define __UINT_LEAST64_FMTx__ "llx" +// LA32: #define __UINT_LEAST64_MAX__ 18446744073709551615ULL +// LA32: #define __UINT_LEAST64_TYPE__ long long unsigned int +// LA32: #define __UINT_LEAST8_FMTX__ "hhX" +// LA32: #define __UINT_LEAST8_FMTo__ "hho" +// LA32: #define __UINT_LEAST8_FMTu__ "hhu" +// LA32: #define __UINT_LEAST8_FMTx__ "hhx" +// LA32: #define __UINT_LEAST8_MAX__ 255 +// LA32: #define __UINT_LEAST8_TYPE__ unsigned char +// LA32: #define __USER_LABEL_PREFIX__ +// LA32: #define __WCHAR_MAX__ 2147483647 +// LA32: #define __WCHAR_TYPE__ int +// LA32: #define __WCHAR_WIDTH__ 32 +// LA32: #define __WINT_MAX__ 4294967295U +// LA32: #define __WINT_TYPE__ unsigned int +// LA32: #define __WINT_UNSIGNED__ 1 +// LA32: #define __WINT_WIDTH__ 32 +// LA32-LINUX: #define __gnu_linux__ 1 +// LA32-LINUX: #define __linux 1 +// LA32-LINUX: #define __linux__ 1 +// LA32-NOT: #define __loongarch64 1 +// LA32: #define __loongarch__ 1 +// LA32-LINUX: #define __unix 1 +// LA32-LINUX: #define __unix__ 1 +// LA32-LINUX: #define linux 1 +// LA32-LINUX: #define unix 1 + +// LA64: #define _LP64 1 +// LA64: #define __ATOMIC_ACQUIRE 2 +// LA64-NEXT: #define __ATOMIC_ACQ_REL 4 +// LA64-NEXT: #define __ATOMIC_CONSUME 1 +// LA64-NEXT: #define __ATOMIC_RELAXED 0 +// LA64-NEXT: #define __ATOMIC_RELEASE 3 +// LA64-NEXT: #define __ATOMIC_SEQ_CST 5 +// LA64: #define __BIGGEST_ALIGNMENT__ 16 +// LA64: #define __BITINT_MAXWIDTH__ 128 +// LA64: #define __BOOL_WIDTH__ 8 +// LA64: #define __BYTE_ORDER__ __ORDER_LITTLE_ENDIAN__ +// LA64: #define __CHAR16_TYPE__ unsigned short +// LA64: #define __CHAR32_TYPE__ unsigned int +// LA64: #define __CHAR_BIT__ 8 +// LA64: #define __CLANG_ATOMIC_BOOL_LOCK_FREE 2 +// LA64: #define __CLANG_ATOMIC_CHAR16_T_LOCK_FREE 2 +// LA64: #define __CLANG_ATOMIC_CHAR32_T_LOCK_FREE 2 +// LA64: #define __CLANG_ATOMIC_CHAR_LOCK_FREE 2 +// LA64: #define __CLANG_ATOMIC_INT_LOCK_FREE 2 +// LA64: #define __CLANG_ATOMIC_LLONG_LOCK_FREE 2 +// LA64: #define __CLANG_ATOMIC_LONG_LOCK_FREE 2 +// LA64: #define __CLANG_ATOMIC_POINTER_LOCK_FREE 2 +// LA64: #define __CLANG_ATOMIC_SHORT_LOCK_FREE 2 +// LA64: #define __CLANG_ATOMIC_WCHAR_T_LOCK_FREE 2 +// LA64: #define __DBL_DECIMAL_DIG__ 17 +// LA64: #define __DBL_DENORM_MIN__ 4.9406564584124654e-324 +// LA64: #define __DBL_DIG__ 15 +// LA64: #define __DBL_EPSILON__ 2.2204460492503131e-16 +// LA64: #define __DBL_HAS_DENORM__ 1 +// LA64: #define __DBL_HAS_INFINITY__ 1 +// LA64: #define __DBL_HAS_QUIET_NAN__ 1 +// LA64: #define __DBL_MANT_DIG__ 53 +// LA64: #define __DBL_MAX_10_EXP__ 308 +// LA64: #define __DBL_MAX_EXP__ 1024 +// LA64: #define __DBL_MAX__ 1.7976931348623157e+308 +// LA64: #define __DBL_MIN_10_EXP__ (-307) +// LA64: #define __DBL_MIN_EXP__ (-1021) +// LA64: #define __DBL_MIN__ 2.2250738585072014e-308 +// LA64: #define __DECIMAL_DIG__ __LDBL_DECIMAL_DIG__ +// LA64: #define __FLT_DECIMAL_DIG__ 9 +// LA64: #define __FLT_DENORM_MIN__ 1.40129846e-45F +// LA64: #define __FLT_DIG__ 6 +// LA64: #define __FLT_EPSILON__ 1.19209290e-7F +// LA64: #define __FLT_HAS_DENORM__ 1 +// LA64: #define __FLT_HAS_INFINITY__ 1 +// LA64: #define __FLT_HAS_QUIET_NAN__ 1 +// LA64: #define __FLT_MANT_DIG__ 24 +// LA64: #define __FLT_MAX_10_EXP__ 38 +// LA64: #define __FLT_MAX_EXP__ 128 +// LA64: #define __FLT_MAX__ 3.40282347e+38F +// LA64: #define __FLT_MIN_10_EXP__ (-37) +// LA64: #define __FLT_MIN_EXP__ (-125) +// LA64: #define __FLT_MIN__ 1.17549435e-38F +// LA64: #define __FLT_RADIX__ 2 +// LA64: #define __GCC_ATOMIC_BOOL_LOCK_FREE 2 +// LA64: #define __GCC_ATOMIC_CHAR16_T_LOCK_FREE 2 +// LA64: #define __GCC_ATOMIC_CHAR32_T_LOCK_FREE 2 +// LA64: #define __GCC_ATOMIC_CHAR_LOCK_FREE 2 +// LA64: #define __GCC_ATOMIC_INT_LOCK_FREE 2 +// LA64: #define __GCC_ATOMIC_LLONG_LOCK_FREE 2 +// LA64: #define __GCC_ATOMIC_LONG_LOCK_FREE 2 +// LA64: #define __GCC_ATOMIC_POINTER_LOCK_FREE 2 +// LA64: #define __GCC_ATOMIC_SHORT_LOCK_FREE 2 +// LA64: #define __GCC_ATOMIC_TEST_AND_SET_TRUEVAL 1 +// LA64: #define __GCC_ATOMIC_WCHAR_T_LOCK_FREE 2 +// LA64: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_1 1 +// LA64: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_2 1 +// LA64: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4 1 +// LA64: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8 1 +// LA64: #define __INT16_C_SUFFIX__ +// LA64: #define __INT16_FMTd__ "hd" +// LA64: #define __INT16_FMTi__ "hi" +// LA64: #define __INT16_MAX__ 32767 +// LA64: #define __INT16_TYPE__ short +// LA64: #define __INT32_C_SUFFIX__ +// LA64: #define __INT32_FMTd__ "d" +// LA64: #define __INT32_FMTi__ "i" +// LA64: #define __INT32_MAX__ 2147483647 +// LA64: #define __INT32_TYPE__ int +// LA64: #define __INT64_C_SUFFIX__ L +// LA64: #define __INT64_FMTd__ "ld" +// LA64: #define __INT64_FMTi__ "li" +// LA64: #define __INT64_MAX__ 9223372036854775807L +// LA64: #define __INT64_TYPE__ long int +// LA64: #define __INT8_C_SUFFIX__ +// LA64: #define __INT8_FMTd__ "hhd" +// LA64: #define __INT8_FMTi__ "hhi" +// LA64: #define __INT8_MAX__ 127 +// LA64: #define __INT8_TYPE__ signed char +// LA64: #define __INTMAX_C_SUFFIX__ L +// LA64: #define __INTMAX_FMTd__ "ld" +// LA64: #define __INTMAX_FMTi__ "li" +// LA64: #define __INTMAX_MAX__ 9223372036854775807L +// LA64: #define __INTMAX_TYPE__ long int +// LA64: #define __INTMAX_WIDTH__ 64 +// LA64: #define __INTPTR_FMTd__ "ld" +// LA64: #define __INTPTR_FMTi__ "li" +// LA64: #define __INTPTR_MAX__ 9223372036854775807L +// LA64: #define __INTPTR_TYPE__ long int +// LA64: #define __INTPTR_WIDTH__ 64 +// LA64: #define __INT_FAST16_FMTd__ "hd" +// LA64: #define __INT_FAST16_FMTi__ "hi" +// LA64: #define __INT_FAST16_MAX__ 32767 +// LA64: #define __INT_FAST16_TYPE__ short +// LA64: #define __INT_FAST16_WIDTH__ 16 +// LA64: #define __INT_FAST32_FMTd__ "d" +// LA64: #define __INT_FAST32_FMTi__ "i" +// LA64: #define __INT_FAST32_MAX__ 2147483647 +// LA64: #define __INT_FAST32_TYPE__ int +// LA64: #define __INT_FAST32_WIDTH__ 32 +// LA64: #define __INT_FAST64_FMTd__ "ld" +// LA64: #define __INT_FAST64_FMTi__ "li" +// LA64: #define __INT_FAST64_MAX__ 9223372036854775807L +// LA64: #define __INT_FAST64_TYPE__ long int +// LA64: #define __INT_FAST64_WIDTH__ 64 +// LA64: #define __INT_FAST8_FMTd__ "hhd" +// LA64: #define __INT_FAST8_FMTi__ "hhi" +// LA64: #define __INT_FAST8_MAX__ 127 +// LA64: #define __INT_FAST8_TYPE__ signed char +// LA64: #define __INT_FAST8_WIDTH__ 8 +// LA64: #define __INT_LEAST16_FMTd__ "hd" +// LA64: #define __INT_LEAST16_FMTi__ "hi" +// LA64: #define __INT_LEAST16_MAX__ 32767 +// LA64: #define __INT_LEAST16_TYPE__ short +// LA64: #define __INT_LEAST16_WIDTH__ 16 +// LA64: #define __INT_LEAST32_FMTd__ "d" +// LA64: #define __INT_LEAST32_FMTi__ "i" +// LA64: #define __INT_LEAST32_MAX__ 2147483647 +// LA64: #define __INT_LEAST32_TYPE__ int +// LA64: #define __INT_LEAST32_WIDTH__ 32 +// LA64: #define __INT_LEAST64_FMTd__ "ld" +// LA64: #define __INT_LEAST64_FMTi__ "li" +// LA64: #define __INT_LEAST64_MAX__ 9223372036854775807L +// LA64: #define __INT_LEAST64_TYPE__ long int +// LA64: #define __INT_LEAST64_WIDTH__ 64 +// LA64: #define __INT_LEAST8_FMTd__ "hhd" +// LA64: #define __INT_LEAST8_FMTi__ "hhi" +// LA64: #define __INT_LEAST8_MAX__ 127 +// LA64: #define __INT_LEAST8_TYPE__ signed char +// LA64: #define __INT_LEAST8_WIDTH__ 8 +// LA64: #define __INT_MAX__ 2147483647 +// LA64: #define __INT_WIDTH__ 32 +// LA64: #define __LDBL_DECIMAL_DIG__ 36 +// LA64: #define __LDBL_DENORM_MIN__ 6.47517511943802511092443895822764655e-4966L +// LA64: #define __LDBL_DIG__ 33 +// LA64: #define __LDBL_EPSILON__ 1.92592994438723585305597794258492732e-34L +// LA64: #define __LDBL_HAS_DENORM__ 1 +// LA64: #define __LDBL_HAS_INFINITY__ 1 +// LA64: #define __LDBL_HAS_QUIET_NAN__ 1 +// LA64: #define __LDBL_MANT_DIG__ 113 +// LA64: #define __LDBL_MAX_10_EXP__ 4932 +// LA64: #define __LDBL_MAX_EXP__ 16384 +// LA64: #define __LDBL_MAX__ 1.18973149535723176508575932662800702e+4932L +// LA64: #define __LDBL_MIN_10_EXP__ (-4931) +// LA64: #define __LDBL_MIN_EXP__ (-16381) +// LA64: #define __LDBL_MIN__ 3.36210314311209350626267781732175260e-4932L +// LA64: #define __LITTLE_ENDIAN__ 1 +// LA64: #define __LLONG_WIDTH__ 64 +// LA64: #define __LONG_LONG_MAX__ 9223372036854775807LL +// LA64: #define __LONG_MAX__ 9223372036854775807L +// LA64: #define __LONG_WIDTH__ 64 +// LA64: #define __LP64__ 1 +// LA64: #define __NO_INLINE__ 1 +// LA64: #define __NO_MATH_ERRNO__ 1 +// LA64: #define __OBJC_BOOL_IS_BOOL 0 +// LA64: #define __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES 3 +// LA64: #define __OPENCL_MEMORY_SCOPE_DEVICE 2 +// LA64: #define __OPENCL_MEMORY_SCOPE_SUB_GROUP 4 +// LA64: #define __OPENCL_MEMORY_SCOPE_WORK_GROUP 1 +// LA64: #define __OPENCL_MEMORY_SCOPE_WORK_ITEM 0 +// LA64: #define __POINTER_WIDTH__ 64 +// LA64: #define __PRAGMA_REDEFINE_EXTNAME 1 +// LA64: #define __PTRDIFF_FMTd__ "ld" +// LA64: #define __PTRDIFF_FMTi__ "li" +// LA64: #define __PTRDIFF_MAX__ 9223372036854775807L +// LA64: #define __PTRDIFF_TYPE__ long int +// LA64: #define __PTRDIFF_WIDTH__ 64 +// LA64: #define __SCHAR_MAX__ 127 +// LA64: #define __SHRT_MAX__ 32767 +// LA64: #define __SHRT_WIDTH__ 16 +// LA64: #define __SIG_ATOMIC_MAX__ 2147483647 +// LA64: #define __SIG_ATOMIC_WIDTH__ 32 +// LA64: #define __SIZEOF_DOUBLE__ 8 +// LA64: #define __SIZEOF_FLOAT__ 4 +// LA64-INT128: #define __SIZEOF_INT128__ 16 +// LA64: #define __SIZEOF_INT__ 4 +// LA64: #define __SIZEOF_LONG_DOUBLE__ 16 +// LA64: #define __SIZEOF_LONG_LONG__ 8 +// LA64: #define __SIZEOF_LONG__ 8 +// LA64: #define __SIZEOF_POINTER__ 8 +// LA64: #define __SIZEOF_PTRDIFF_T__ 8 +// LA64: #define __SIZEOF_SHORT__ 2 +// LA64: #define __SIZEOF_SIZE_T__ 8 +// LA64: #define __SIZEOF_WCHAR_T__ 4 +// LA64: #define __SIZEOF_WINT_T__ 4 +// LA64: #define __SIZE_FMTX__ "lX" +// LA64: #define __SIZE_FMTo__ "lo" +// LA64: #define __SIZE_FMTu__ "lu" +// LA64: #define __SIZE_FMTx__ "lx" +// LA64: #define __SIZE_MAX__ 18446744073709551615UL +// LA64: #define __SIZE_TYPE__ long unsigned int +// LA64: #define __SIZE_WIDTH__ 64 +// LA64: #define __STDC_HOSTED__ 0 +// LA64: #define __STDC_UTF_16__ 1 +// LA64: #define __STDC_UTF_32__ 1 +// LA64: #define __STDC_VERSION__ 201710L +// LA64: #define __STDC__ 1 +// LA64: #define __UINT16_C_SUFFIX__ +// LA64: #define __UINT16_FMTX__ "hX" +// LA64: #define __UINT16_FMTo__ "ho" +// LA64: #define __UINT16_FMTu__ "hu" +// LA64: #define __UINT16_FMTx__ "hx" +// LA64: #define __UINT16_MAX__ 65535 +// LA64: #define __UINT16_TYPE__ unsigned short +// LA64: #define __UINT32_C_SUFFIX__ U +// LA64: #define __UINT32_FMTX__ "X" +// LA64: #define __UINT32_FMTo__ "o" +// LA64: #define __UINT32_FMTu__ "u" +// LA64: #define __UINT32_FMTx__ "x" +// LA64: #define __UINT32_MAX__ 4294967295U +// LA64: #define __UINT32_TYPE__ unsigned int +// LA64: #define __UINT64_C_SUFFIX__ UL +// LA64: #define __UINT64_FMTX__ "lX" +// LA64: #define __UINT64_FMTo__ "lo" +// LA64: #define __UINT64_FMTu__ "lu" +// LA64: #define __UINT64_FMTx__ "lx" +// LA64: #define __UINT64_MAX__ 18446744073709551615UL +// LA64: #define __UINT64_TYPE__ long unsigned int +// LA64: #define __UINT8_C_SUFFIX__ +// LA64: #define __UINT8_FMTX__ "hhX" +// LA64: #define __UINT8_FMTo__ "hho" +// LA64: #define __UINT8_FMTu__ "hhu" +// LA64: #define __UINT8_FMTx__ "hhx" +// LA64: #define __UINT8_MAX__ 255 +// LA64: #define __UINT8_TYPE__ unsigned char +// LA64: #define __UINTMAX_C_SUFFIX__ UL +// LA64: #define __UINTMAX_FMTX__ "lX" +// LA64: #define __UINTMAX_FMTo__ "lo" +// LA64: #define __UINTMAX_FMTu__ "lu" +// LA64: #define __UINTMAX_FMTx__ "lx" +// LA64: #define __UINTMAX_MAX__ 18446744073709551615UL +// LA64: #define __UINTMAX_TYPE__ long unsigned int +// LA64: #define __UINTMAX_WIDTH__ 64 +// LA64: #define __UINTPTR_FMTX__ "lX" +// LA64: #define __UINTPTR_FMTo__ "lo" +// LA64: #define __UINTPTR_FMTu__ "lu" +// LA64: #define __UINTPTR_FMTx__ "lx" +// LA64: #define __UINTPTR_MAX__ 18446744073709551615UL +// LA64: #define __UINTPTR_TYPE__ long unsigned int +// LA64: #define __UINTPTR_WIDTH__ 64 +// LA64: #define __UINT_FAST16_FMTX__ "hX" +// LA64: #define __UINT_FAST16_FMTo__ "ho" +// LA64: #define __UINT_FAST16_FMTu__ "hu" +// LA64: #define __UINT_FAST16_FMTx__ "hx" +// LA64: #define __UINT_FAST16_MAX__ 65535 +// TODO: LoongArch GCC defines UINT_FAST16 to be long unsigned int +// LA64: #define __UINT_FAST16_TYPE__ unsigned short +// LA64: #define __UINT_FAST32_FMTX__ "X" +// LA64: #define __UINT_FAST32_FMTo__ "o" +// LA64: #define __UINT_FAST32_FMTu__ "u" +// LA64: #define __UINT_FAST32_FMTx__ "x" +// LA64: #define __UINT_FAST32_MAX__ 4294967295U +// LA64: #define __UINT_FAST32_TYPE__ unsigned int +// LA64: #define __UINT_FAST64_FMTX__ "lX" +// LA64: #define __UINT_FAST64_FMTo__ "lo" +// LA64: #define __UINT_FAST64_FMTu__ "lu" +// LA64: #define __UINT_FAST64_FMTx__ "lx" +// LA64: #define __UINT_FAST64_MAX__ 18446744073709551615UL +// LA64: #define __UINT_FAST64_TYPE__ long unsigned int +// LA64: #define __UINT_FAST8_FMTX__ "hhX" +// LA64: #define __UINT_FAST8_FMTo__ "hho" +// LA64: #define __UINT_FAST8_FMTu__ "hhu" +// LA64: #define __UINT_FAST8_FMTx__ "hhx" +// LA64: #define __UINT_FAST8_MAX__ 255 +// LA64: #define __UINT_FAST8_TYPE__ unsigned char +// LA64: #define __UINT_LEAST16_FMTX__ "hX" +// LA64: #define __UINT_LEAST16_FMTo__ "ho" +// LA64: #define __UINT_LEAST16_FMTu__ "hu" +// LA64: #define __UINT_LEAST16_FMTx__ "hx" +// LA64: #define __UINT_LEAST16_MAX__ 65535 +// LA64: #define __UINT_LEAST16_TYPE__ unsigned short +// LA64: #define __UINT_LEAST32_FMTX__ "X" +// LA64: #define __UINT_LEAST32_FMTo__ "o" +// LA64: #define __UINT_LEAST32_FMTu__ "u" +// LA64: #define __UINT_LEAST32_FMTx__ "x" +// LA64: #define __UINT_LEAST32_MAX__ 4294967295U +// LA64: #define __UINT_LEAST32_TYPE__ unsigned int +// LA64: #define __UINT_LEAST64_FMTX__ "lX" +// LA64: #define __UINT_LEAST64_FMTo__ "lo" +// LA64: #define __UINT_LEAST64_FMTu__ "lu" +// LA64: #define __UINT_LEAST64_FMTx__ "lx" +// LA64: #define __UINT_LEAST64_MAX__ 18446744073709551615UL +// LA64: #define __UINT_LEAST64_TYPE__ long unsigned int +// LA64: #define __UINT_LEAST8_FMTX__ "hhX" +// LA64: #define __UINT_LEAST8_FMTo__ "hho" +// LA64: #define __UINT_LEAST8_FMTu__ "hhu" +// LA64: #define __UINT_LEAST8_FMTx__ "hhx" +// LA64: #define __UINT_LEAST8_MAX__ 255 +// LA64: #define __UINT_LEAST8_TYPE__ unsigned char +// LA64: #define __USER_LABEL_PREFIX__ +// LA64: #define __WCHAR_MAX__ 2147483647 +// LA64: #define __WCHAR_TYPE__ int +// LA64: #define __WCHAR_WIDTH__ 32 +// LA64: #define __WINT_MAX__ 4294967295U +// LA64: #define __WINT_TYPE__ unsigned int +// LA64: #define __WINT_UNSIGNED__ 1 +// LA64: #define __WINT_WIDTH__ 32 +// LA64-LINUX: #define __gnu_linux__ 1 +// LA64-LINUX: #define __linux 1 +// LA64-LINUX: #define __linux__ 1 +// LA64: #define __loongarch64 1 +// LA64: #define __loongarch__ 1 +// LA64-LINUX: #define __unix 1 +// LA64-LINUX: #define __unix__ 1 +// LA64-LINUX: #define linux 1 +// LA64-LINUX: #define unix 1 + + +/// Check __loongarch_{double,single,hard,soft}_float, __loongarch_{gr,fr}len, __loongarch_lp64. + +// RUN: %clang --target=loongarch32 -mfpu=64 -mabi=ilp32d -x c -E -dM %s -o - \ +// RUN: | FileCheck --match-full-lines --check-prefix=LA32-FPU64-ILP32D %s +// RUN: %clang --target=loongarch32 -mdouble-float -x c -E -dM %s -o - \ +// RUN: | FileCheck --match-full-lines --check-prefix=LA32-FPU64-ILP32D %s +// LA32-FPU64-ILP32D: #define __loongarch_double_float 1 +// LA32-FPU64-ILP32D: #define __loongarch_frlen 64 +// LA32-FPU64-ILP32D: #define __loongarch_grlen 32 +// LA32-FPU64-ILP32D: #define __loongarch_hard_float 1 +// LA32-FPU64-ILP32D-NOT: #define __loongarch_lp64 +// LA32-FPU64-ILP32D-NOT: #define __loongarch_single_float +// LA32-FPU64-ILP32D-NOT: #define __loongarch_soft_float + +// RUN: %clang --target=loongarch32 -mfpu=64 -mabi=ilp32f -x c -E -dM %s -o - \ +// RUN: | FileCheck --match-full-lines --check-prefix=LA32-FPU64-ILP32F %s +// LA32-FPU64-ILP32F-NOT: #define __loongarch_double_float +// LA32-FPU64-ILP32F: #define __loongarch_frlen 64 +// LA32-FPU64-ILP32F: #define __loongarch_grlen 32 +// LA32-FPU64-ILP32F: #define __loongarch_hard_float 1 +// LA32-FPU64-ILP32F-NOT: #define __loongarch_lp64 +// LA32-FPU64-ILP32F: #define __loongarch_single_float 1 +// LA32-FPU64-ILP32F-NOT: #define __loongarch_soft_float + +// RUN: %clang --target=loongarch32 -mfpu=64 -mabi=ilp32s -x c -E -dM %s -o - \ +// RUN: | FileCheck --match-full-lines --check-prefix=LA32-FPU64-ILP32S %s +// LA32-FPU64-ILP32S-NOT: #define __loongarch_double_float +// LA32-FPU64-ILP32S: #define __loongarch_frlen 64 +// LA32-FPU64-ILP32S: #define __loongarch_grlen 32 +// LA32-FPU64-ILP32S-NOT: #define __loongarch_hard_float +// LA32-FPU64-ILP32S-NOT: #define __loongarch_lp64 +// LA32-FPU64-ILP32S-NOT: #define __loongarch_single_float +// LA32-FPU64-ILP32S: #define __loongarch_soft_float 1 + +// RUN: %clang --target=loongarch32 -mfpu=32 -mabi=ilp32f -x c -E -dM %s -o - \ +// RUN: | FileCheck --match-full-lines --check-prefix=LA32-FPU32-ILP32F %s +// RUN: %clang --target=loongarch32 -msingle-float -x c -E -dM %s -o - \ +// RUN: | FileCheck --match-full-lines --check-prefix=LA32-FPU32-ILP32F %s +// LA32-FPU32-ILP32F-NOT: #define __loongarch_double_float +// LA32-FPU32-ILP32F: #define __loongarch_frlen 32 +// LA32-FPU32-ILP32F: #define __loongarch_grlen 32 +// LA32-FPU32-ILP32F: #define __loongarch_hard_float 1 +// LA32-FPU32-ILP32F-NOT: #define __loongarch_lp64 +// LA32-FPU32-ILP32F: #define __loongarch_single_float 1 +// LA32-FPU32-ILP32F-NOT: #define __loongarch_soft_float + +// RUN: %clang --target=loongarch32 -mfpu=32 -mabi=ilp32s -x c -E -dM %s -o - \ +// RUN: | FileCheck --match-full-lines --check-prefix=LA32-FPU32-ILP32S %s +// LA32-FPU32-ILP32S-NOT: #define __loongarch_double_float +// LA32-FPU32-ILP32S: #define __loongarch_frlen 32 +// LA32-FPU32-ILP32S: #define __loongarch_grlen 32 +// LA32-FPU32-ILP32S-NOT: #define __loongarch_hard_float +// LA32-FPU32-ILP32S-NOT: #define __loongarch_lp64 +// LA32-FPU32-ILP32S-NOT: #define __loongarch_single_float +// LA32-FPU32-ILP32S: #define __loongarch_soft_float 1 + +// RUN: %clang --target=loongarch32 -mfpu=0 -mabi=ilp32s -x c -E -dM %s -o - \ +// RUN: | FileCheck --match-full-lines --check-prefix=LA32-FPU0-ILP32S %s +// RUN: %clang --target=loongarch32 -mfpu=none -mabi=ilp32s -x c -E -dM %s -o - \ +// RUN: | FileCheck --match-full-lines --check-prefix=LA32-FPU0-ILP32S %s +// RUN: %clang --target=loongarch32 -msoft-float -x c -E -dM %s -o - \ +// RUN: | FileCheck --match-full-lines --check-prefix=LA32-FPU0-ILP32S %s +// LA32-FPU0-ILP32S-NOT: #define __loongarch_double_float +// LA32-FPU0-ILP32S: #define __loongarch_frlen 0 +// LA32-FPU0-ILP32S: #define __loongarch_grlen 32 +// LA32-FPU0-ILP32S-NOT: #define __loongarch_hard_float +// LA32-FPU0-ILP32S-NOT: #define __loongarch_lp64 +// LA32-FPU0-ILP32S-NOT: #define __loongarch_single_float +// LA32-FPU0-ILP32S: #define __loongarch_soft_float 1 + +// RUN: %clang --target=loongarch64 -mfpu=64 -mabi=lp64d -x c -E -dM %s -o - \ +// RUN: | FileCheck --match-full-lines --check-prefix=LA64-FPU64-LP64D %s +// RUN: %clang --target=loongarch64 -mdouble-float -x c -E -dM %s -o - \ +// RUN: | FileCheck --match-full-lines --check-prefix=LA64-FPU64-LP64D %s +// LA64-FPU64-LP64D: #define __loongarch_double_float 1 +// LA64-FPU64-LP64D: #define __loongarch_frlen 64 +// LA64-FPU64-LP64D: #define __loongarch_grlen 64 +// LA64-FPU64-LP64D: #define __loongarch_hard_float 1 +// LA64-FPU64-LP64D: #define __loongarch_lp64 1 +// LA64-FPU64-LP64D-NOT: #define __loongarch_single_float +// LA64-FPU64-LP64D-NOT: #define __loongarch_soft_float + +// RUN: %clang --target=loongarch64 -mfpu=64 -mabi=lp64f -x c -E -dM %s -o - \ +// RUN: | FileCheck --match-full-lines --check-prefix=LA64-FPU64-LP64F %s +// LA64-FPU64-LP64F-NOT: #define __loongarch_double_float +// LA64-FPU64-LP64F: #define __loongarch_frlen 64 +// LA64-FPU64-LP64F: #define __loongarch_grlen 64 +// LA64-FPU64-LP64F: #define __loongarch_hard_float 1 +// LA64-FPU64-LP64F: #define __loongarch_lp64 1 +// LA64-FPU64-LP64F: #define __loongarch_single_float 1 +// LA64-FPU64-LP64F-NOT: #define __loongarch_soft_float + +// RUN: %clang --target=loongarch64 -mfpu=64 -mabi=lp64s -x c -E -dM %s -o - \ +// RUN: | FileCheck --match-full-lines --check-prefix=LA64-FPU64-LP64S %s +// LA64-FPU64-LP64S-NOT: #define __loongarch_double_float +// LA64-FPU64-LP64S: #define __loongarch_frlen 64 +// LA64-FPU64-LP64S: #define __loongarch_grlen 64 +// LA64-FPU64-LP64S-NOT: #define __loongarch_hard_float +// LA64-FPU64-LP64S: #define __loongarch_lp64 1 +// LA64-FPU64-LP64S-NOT: #define __loongarch_single_float +// LA64-FPU64-LP64S: #define __loongarch_soft_float 1 + +// RUN: %clang --target=loongarch64 -mfpu=32 -mabi=lp64f -x c -E -dM %s -o - \ +// RUN: | FileCheck --match-full-lines --check-prefix=LA64-FPU32-LP64F %s +// RUN: %clang --target=loongarch64 -msingle-float -x c -E -dM %s -o - \ +// RUN: | FileCheck --match-full-lines --check-prefix=LA64-FPU32-LP64F %s +// LA64-FPU32-LP64F-NOT: #define __loongarch_double_float +// LA64-FPU32-LP64F: #define __loongarch_frlen 32 +// LA64-FPU32-LP64F: #define __loongarch_grlen 64 +// LA64-FPU32-LP64F: #define __loongarch_hard_float 1 +// LA64-FPU32-LP64F: #define __loongarch_lp64 1 +// LA64-FPU32-LP64F: #define __loongarch_single_float 1 +// LA64-FPU32-LP64F-NOT: #define __loongarch_soft_float + +// RUN: %clang --target=loongarch64 -mfpu=32 -mabi=lp64s -x c -E -dM %s -o - \ +// RUN: | FileCheck --match-full-lines --check-prefix=LA64-FPU32-LP64S %s +// LA64-FPU32-LP64S-NOT: #define __loongarch_double_float +// LA64-FPU32-LP64S: #define __loongarch_frlen 32 +// LA64-FPU32-LP64S: #define __loongarch_grlen 64 +// LA64-FPU32-LP64S-NOT: #define __loongarch_hard_float +// LA64-FPU32-LP64S: #define __loongarch_lp64 1 +// LA64-FPU32-LP64S-NOT: #define __loongarch_single_float +// LA64-FPU32-LP64S: #define __loongarch_soft_float 1 + +// RUN: %clang --target=loongarch64 -mfpu=0 -mabi=lp64s -x c -E -dM %s -o - \ +// RUN: | FileCheck --match-full-lines --check-prefix=LA64-FPU0-LP64S %s +// RUN: %clang --target=loongarch64 -mfpu=none -mabi=lp64s -x c -E -dM %s -o - \ +// RUN: | FileCheck --match-full-lines --check-prefix=LA64-FPU0-LP64S %s +// RUN: %clang --target=loongarch64 -msoft-float -x c -E -dM %s -o - \ +// RUN: | FileCheck --match-full-lines --check-prefix=LA64-FPU0-LP64S %s +// LA64-FPU0-LP64S-NOT: #define __loongarch_double_float +// LA64-FPU0-LP64S: #define __loongarch_frlen 0 +// LA64-FPU0-LP64S: #define __loongarch_grlen 64 +// LA64-FPU0-LP64S-NOT: #define __loongarch_hard_float +// LA64-FPU0-LP64S: #define __loongarch_lp64 1 +// LA64-FPU0-LP64S-NOT: #define __loongarch_single_float +// LA64-FPU0-LP64S: #define __loongarch_soft_float 1 + +/// Check __loongarch_arch and __loongarch_tune. + +// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - | \ +// RUN: FileCheck --match-full-lines --check-prefix=ARCH-TUNE -DARCH=loongarch64 -DTUNE=loongarch64 %s +// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=loongarch64 | \ +// RUN: FileCheck --match-full-lines --check-prefix=ARCH-TUNE -DARCH=loongarch64 -DTUNE=loongarch64 %s +// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la464 | \ +// RUN: FileCheck --match-full-lines --check-prefix=ARCH-TUNE -DARCH=la464 -DTUNE=la464 %s +// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -mtune=loongarch64 | \ +// RUN: FileCheck --match-full-lines --check-prefix=ARCH-TUNE -DARCH=loongarch64 -DTUNE=loongarch64 %s +// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -mtune=la464 | \ +// RUN: FileCheck --match-full-lines --check-prefix=ARCH-TUNE -DARCH=loongarch64 -DTUNE=la464 %s +// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=loongarch64 -mtune=la464 | \ +// RUN: FileCheck --match-full-lines --check-prefix=ARCH-TUNE -DARCH=loongarch64 -DTUNE=la464 %s +// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la464 -mtune=loongarch64 | \ +// RUN: FileCheck --match-full-lines --check-prefix=ARCH-TUNE -DARCH=la464 -DTUNE=loongarch64 %s + +// ARCH-TUNE: #define __loongarch_arch "[[ARCH]]" +// ARCH-TUNE: #define __loongarch_tune "[[TUNE]]" diff --git a/clang/test/Preprocessor/ohos.c b/clang/test/Preprocessor/ohos.c index 0c435c7ed5ab471e97d2358640d8ef838cd47955..7773f5fd6916a557587b4e082ed48b8e23b9d434 100644 --- a/clang/test/Preprocessor/ohos.c +++ b/clang/test/Preprocessor/ohos.c @@ -1,6 +1,7 @@ // RUN: %clang_cc1 -x c++ -E -dM -ffreestanding -triple=arm-linux-ohos < /dev/null | FileCheck %s -match-full-lines -check-prefix=ARM-OHOS-CXX // RUN: %clang_cc1 -x c++ -E -dM -ffreestanding -triple=aarch64-linux-ohos < /dev/null | FileCheck %s -match-full-lines -check-prefix=ARM64-OHOS-CXX // RUN: %clang_cc1 -x c++ -E -dM -ffreestanding -triple=riscv64-linux-ohos < /dev/null | FileCheck %s -match-full-lines -check-prefix=RISCV64-OHOS-CXX +// RUN: %clang_cc1 -x c++ -E -dM -ffreestanding -triple=loongarch64-linux-ohos < /dev/null | FileCheck %s -match-full-lines -check-prefix=LOONGARCH64-OHOS-CXX // RUN: %clang_cc1 -x c++ -E -dM -ffreestanding -triple=mipsel-linux-ohos < /dev/null | FileCheck %s -match-full-lines -check-prefix=MIPSEL-OHOS-CXX // RUN: %clang_cc1 -x c++ -E -dM -ffreestanding -triple=x86_64-linux-ohos < /dev/null | FileCheck %s -match-full-lines -check-prefix=X86_64-OHOS-CXX // RUN: %clang_cc1 -E -dM -ffreestanding -triple=arm-linux-ohos < /dev/null | FileCheck %s -check-prefix=OHOS-DEFS @@ -8,6 +9,7 @@ // ARM-OHOS-CXX: #define __STDCPP_DEFAULT_NEW_ALIGNMENT__ 8U // ARM64-OHOS-CXX: #define __STDCPP_DEFAULT_NEW_ALIGNMENT__ 16UL // RISCV64-OHOS-CXX: #define __STDCPP_DEFAULT_NEW_ALIGNMENT__ 16UL +// LOONGARCH64-OHOS-CXX: #define __STDCPP_DEFAULT_NEW_ALIGNMENT__ 16UL // MIPSEL-OHOS-CXX: #define __STDCPP_DEFAULT_NEW_ALIGNMENT__ 8U // X86_64-OHOS-CXX: #define __STDCPP_DEFAULT_NEW_ALIGNMENT__ 16UL // OHOS-DEFS: __OHOS_FAMILY__ diff --git a/clang/test/Preprocessor/predefined-arch-macros.c b/clang/test/Preprocessor/predefined-arch-macros.c index 0ffa2739e5df4457f607745deb6d5b9418e35a88..02a0dc72f787e765a30999109318b128bc62e710 100644 --- a/clang/test/Preprocessor/predefined-arch-macros.c +++ b/clang/test/Preprocessor/predefined-arch-macros.c @@ -3750,3 +3750,20 @@ // RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_R600_FP64 // CHECK_R600_FP64-DAG: #define __R600__ 1 // CHECK_R600_FP64-DAG: #define __HAS_FMAF__ 1 + +// Begin LoongArch tests ---------------- + +// RUN: %clang -E -dM %s -o - 2>&1 \ +// RUN: -target loongarch32-unknown-linux-gnu \ +// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_LA32_ATOMICS +// CHECK_LA32_ATOMICS: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_1 1 +// CHECK_LA32_ATOMICS: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_2 1 +// CHECK_LA32_ATOMICS: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4 1 + +// RUN: %clang -E -dM %s -o - 2>&1 \ +// RUN: -target loongarch64-unknown-linux-gnu \ +// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_LA64_ATOMICS +// CHECK_LA64_ATOMICS: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_1 1 +// CHECK_LA64_ATOMICS: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_2 1 +// CHECK_LA64_ATOMICS: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4 1 +// CHECK_LA64_ATOMICS: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8 1 diff --git a/clang/test/Sema/patchable-function-entry-attr.cpp b/clang/test/Sema/patchable-function-entry-attr.cpp index 3dd0504987306e9986ab6f7160eb0fd367171db1..9134c851da588c44af65507f57d404ca6968af91 100644 --- a/clang/test/Sema/patchable-function-entry-attr.cpp +++ b/clang/test/Sema/patchable-function-entry-attr.cpp @@ -2,6 +2,8 @@ // RUN: %clang_cc1 -triple aarch64_be -fsyntax-only -verify=silence %s // RUN: %clang_cc1 -triple i386 -fsyntax-only -verify=silence %s // RUN: %clang_cc1 -triple x86_64 -fsyntax-only -verify=silence %s +// RUN: %clang_cc1 -triple loongarch32 -fsyntax-only -verify=silence %s +// RUN: %clang_cc1 -triple loongarch64 -fsyntax-only -verify=silence %s // RUN: %clang_cc1 -triple riscv32 -fsyntax-only -verify=silence %s // RUN: %clang_cc1 -triple riscv64 -fsyntax-only -verify=silence %s // RUN: %clang_cc1 -triple ppc64le -fsyntax-only -verify %s diff --git a/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake b/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake index 4612e4d8b9af7d41a9ab925f52224e4263e7f4a9..4ac8905cdba8c77061240dbe2499d9844d38bcd0 100644 --- a/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake +++ b/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake @@ -52,25 +52,27 @@ if(APPLE) set(ALL_LSAN_SUPPORTED_ARCH ${X86} ${X86_64} ${MIPS64} ${ARM64}) else() set(ALL_LSAN_SUPPORTED_ARCH ${X86} ${X86_64} ${MIPS64} ${ARM64} ${ARM32} - ${PPC64} ${S390X} ${RISCV64} ${HEXAGON}) + ${PPC64} ${S390X} ${RISCV64} ${HEXAGON} ${LOONGARCH64}) endif() set(ALL_MSAN_SUPPORTED_ARCH ${X86_64} ${MIPS64} ${ARM64} ${PPC64} ${S390X}) set(ALL_HWASAN_SUPPORTED_ARCH ${X86_64} ${ARM64}) set(ALL_MEMPROF_SUPPORTED_ARCH ${X86_64}) set(ALL_PROFILE_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM32} ${ARM64} ${PPC32} ${PPC64} ${MIPS32} ${MIPS64} ${S390X} ${SPARC} ${SPARCV9} ${HEXAGON} - ${RISCV32} ${RISCV64}) -set(ALL_TSAN_SUPPORTED_ARCH ${X86_64} ${MIPS64} ${ARM64} ${PPC64} ${S390X}) + ${RISCV32} ${RISCV64} ${LOONGARCH64}) +set(ALL_TSAN_SUPPORTED_ARCH ${X86_64} ${MIPS64} ${ARM64} ${PPC64} ${S390X} + ${LOONGARCH64}) set(ALL_UBSAN_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM32} ${ARM64} ${RISCV64} - ${MIPS32} ${MIPS64} ${PPC64} ${S390X} ${SPARC} ${SPARCV9} ${HEXAGON}) + ${MIPS32} ${MIPS64} ${PPC64} ${S390X} ${SPARC} ${SPARCV9} ${HEXAGON} + ${LOONGARCH64}) set(ALL_SAFESTACK_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM64} ${MIPS32} ${MIPS64} - ${HEXAGON}) + ${HEXAGON} ${LOONGARCH64}) set(ALL_CFI_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM32} ${ARM64} ${MIPS64} ${HEXAGON}) set(ALL_SCUDO_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM32} ${ARM64} ${MIPS32} - ${MIPS64} ${PPC64} ${HEXAGON}) + ${MIPS64} ${PPC64} ${HEXAGON} ${LOONGARCH64}) set(ALL_SCUDO_STANDALONE_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM32} ${ARM64} - ${MIPS32} ${MIPS64} ${PPC64} ${HEXAGON}) + ${MIPS32} ${MIPS64} ${PPC64} ${HEXAGON} ${LOONGARCH64}) if(APPLE) set(ALL_XRAY_SUPPORTED_ARCH ${X86_64}) else() diff --git a/compiler-rt/cmake/Modules/CompilerRTUtils.cmake b/compiler-rt/cmake/Modules/CompilerRTUtils.cmake index 8675f5689b99fb6f9c271b56b4aec3579385c053..edd9d40189685d34117e213af0e6d2666aa9059a 100644 --- a/compiler-rt/cmake/Modules/CompilerRTUtils.cmake +++ b/compiler-rt/cmake/Modules/CompilerRTUtils.cmake @@ -151,6 +151,7 @@ macro(detect_target_arch) check_symbol_exists(__aarch64__ "" __AARCH64) check_symbol_exists(__x86_64__ "" __X86_64) check_symbol_exists(__i386__ "" __I386) + check_symbol_exists(__loongarch__ "" __LOONGARCH) check_symbol_exists(__mips__ "" __MIPS) check_symbol_exists(__mips64__ "" __MIPS64) check_symbol_exists(__powerpc__ "" __PPC) @@ -179,6 +180,14 @@ macro(detect_target_arch) endif() elseif(__I386) add_default_target_arch(i386) + elseif(__LOONGARCH) + if(CMAKE_SIZEOF_VOID_P EQUAL "4") + add_default_target_arch(loongarch32) + elseif(CMAKE_SIZEOF_VOID_P EQUAL "8") + add_default_target_arch(loongarch64) + else() + message(FATAL_ERROR "Unsupported pointer size for LoongArch") + endif() elseif(__MIPS64) # must be checked before __MIPS add_default_target_arch(mips64) elseif(__MIPS) diff --git a/compiler-rt/cmake/base-config-ix.cmake b/compiler-rt/cmake/base-config-ix.cmake index f61b962bdb248d64ccddf5bce85a6e6e2748fb0d..b0d2210b0ed24620a397dbf1ce2c88793b836a29 100644 --- a/compiler-rt/cmake/base-config-ix.cmake +++ b/compiler-rt/cmake/base-config-ix.cmake @@ -208,6 +208,8 @@ macro(test_targets) test_target_arch(x86_64 "" "") endif() endif() + elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "loongarch64") + test_target_arch(loongarch64 "" "") elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "powerpc64le|ppc64le") test_target_arch(powerpc64le "" "-m64") elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "powerpc") diff --git a/compiler-rt/cmake/builtin-config-ix.cmake b/compiler-rt/cmake/builtin-config-ix.cmake index 439abc713bad9e5b401cc8243dc9b9eb18935680..f6873736c2d97e6ed73da8bfc1900e178d7de868 100644 --- a/compiler-rt/cmake/builtin-config-ix.cmake +++ b/compiler-rt/cmake/builtin-config-ix.cmake @@ -50,6 +50,7 @@ set(AVR avr) set(HEXAGON hexagon) set(X86 i386) set(X86_64 x86_64) +set(LOONGARCH64 loongarch64) set(MIPS32 mips mipsel) set(MIPS64 mips64 mips64el) set(PPC32 powerpc powerpcspe) @@ -72,7 +73,7 @@ set(ALL_BUILTIN_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM32} ${ARM64} ${AVR} ${HEXAGON} ${MIPS32} ${MIPS64} ${PPC32} ${PPC64} ${RISCV32} ${RISCV64} ${SPARC} ${SPARCV9} - ${WASM32} ${WASM64} ${VE}) + ${WASM32} ${WASM64} ${VE} ${LOONGARCH64}) include(CompilerRTUtils) include(CompilerRTDarwinUtils) diff --git a/compiler-rt/cmake/config-ix.cmake b/compiler-rt/cmake/config-ix.cmake index 565ea795ac0e940da16a2fdc31a23e3490016b36..55bddb93842009af5d0b156bd43895bb68cdbe87 100644 --- a/compiler-rt/cmake/config-ix.cmake +++ b/compiler-rt/cmake/config-ix.cmake @@ -71,6 +71,9 @@ check_c_compiler_flag(-ffreestanding COMPILER_RT_HAS_FFREESTANDING_FLAG) check_c_compiler_flag(-fomit-frame-pointer COMPILER_RT_HAS_OMIT_FRAME_POINTER_FLAG) check_c_compiler_flag(-std=c11 COMPILER_RT_HAS_STD_C11_FLAG) check_c_compiler_flag(-fcf-protection=full COMPILER_RT_HAS_FCF_PROTECTION_FLAG) +# OHOS_LOCAL begin +check_c_compiler_flag(-mcmodel=medium COMPILER_RT_HAS_LOONGARCH_MCMODEL_FLAG) +# OHOS_LOCAL end check_cxx_compiler_flag(-fPIC COMPILER_RT_HAS_FPIC_FLAG) check_cxx_compiler_flag(-fPIE COMPILER_RT_HAS_FPIE_FLAG) check_cxx_compiler_flag(-fno-builtin COMPILER_RT_HAS_FNO_BUILTIN_FLAG) diff --git a/compiler-rt/cmake/crt-config-ix.cmake b/compiler-rt/cmake/crt-config-ix.cmake index f737e4eba82248617a989a9b1aadc102ad2d062e..2a7bfc2ff5bc6d8abe8f8c7eb5aed88ab4bd8e26 100644 --- a/compiler-rt/cmake/crt-config-ix.cmake +++ b/compiler-rt/cmake/crt-config-ix.cmake @@ -23,6 +23,7 @@ set(ARM32 arm armhf) set(HEXAGON hexagon) set(X86 i386) set(X86_64 x86_64) +set(LOONGARCH64 loongarch64) set(PPC32 powerpc powerpcspe) set(PPC64 powerpc64 powerpc64le) set(RISCV32 riscv32) @@ -30,7 +31,7 @@ set(RISCV64 riscv64) set(VE ve) set(ALL_CRT_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM32} ${ARM64} ${PPC32} - ${PPC64} ${MIPS32} ${RISCV32} ${RISCV64} ${VE} ${HEXAGON}) + ${PPC64} ${MIPS32} ${RISCV32} ${RISCV64} ${VE} ${HEXAGON} ${LOONGARCH64}) include(CompilerRTUtils) diff --git a/compiler-rt/lib/asan/asan_interceptors.h b/compiler-rt/lib/asan/asan_interceptors.h index 35727a96497dcbbaaf692b5e8e74efe4259a785c..c4bf087ea17f08f0b3082c523a82d2505b7b535d 100644 --- a/compiler-rt/lib/asan/asan_interceptors.h +++ b/compiler-rt/lib/asan/asan_interceptors.h @@ -114,7 +114,7 @@ void InitializePlatformInterceptors(); #if SANITIZER_LINUX && \ (defined(__arm__) || defined(__aarch64__) || defined(__i386__) || \ - defined(__x86_64__) || SANITIZER_RISCV64) + defined(__x86_64__) || SANITIZER_RISCV64 || SANITIZER_LOONGARCH64) # define ASAN_INTERCEPT_VFORK 1 #else # define ASAN_INTERCEPT_VFORK 0 diff --git a/compiler-rt/lib/asan/scripts/asan_symbolize.py b/compiler-rt/lib/asan/scripts/asan_symbolize.py index ab04b1c67e5a8afd93711db3d7e15a2864db3f23..4d5312858fdbbff904f0c307fde136197393309e 100755 --- a/compiler-rt/lib/asan/scripts/asan_symbolize.py +++ b/compiler-rt/lib/asan/scripts/asan_symbolize.py @@ -50,7 +50,7 @@ def fix_filename(file_name): def is_valid_arch(s): return s in ["i386", "x86_64", "x86_64h", "arm", "armv6", "armv7", "armv7s", "armv7k", "arm64", "powerpc64", "powerpc64le", "s390x", "s390", - "riscv64"] + "riscv64", "loongarch64"] def guess_arch(addr): # Guess which arch we're running. 10 = len('0x') + 8 hex digits. diff --git a/compiler-rt/lib/asan/tests/asan_test.cpp b/compiler-rt/lib/asan/tests/asan_test.cpp index e392ea90dbc2aa0f2b0d7ea2b946f85f7f0ba143..4dc65c0f6144b5309a73ce151a97d38648bacf5b 100644 --- a/compiler-rt/lib/asan/tests/asan_test.cpp +++ b/compiler-rt/lib/asan/tests/asan_test.cpp @@ -623,7 +623,7 @@ NOINLINE void SigLongJmpFunc1(sigjmp_buf buf) { #if !defined(__ANDROID__) && !defined(__arm__) && !defined(__aarch64__) && \ !defined(__mips__) && !defined(__mips64) && !defined(__s390__) && \ - !defined(__riscv) + !defined(__riscv) && !defined(__loongarch__) NOINLINE void BuiltinLongJmpFunc1(jmp_buf buf) { // create three red zones for these two stack objects. int a; @@ -646,9 +646,9 @@ TEST(AddressSanitizer, BuiltinLongJmpTest) { } } #endif // !defined(__ANDROID__) && !defined(__arm__) && - // !defined(__aarch64__) && !defined(__mips__) - // !defined(__mips64) && !defined(__s390__) - // !defined(__riscv) + // !defined(__aarch64__) && !defined(__mips__) && + // !defined(__mips64) && !defined(__s390__) && + // !defined(__riscv) && !defined(__loongarch__) TEST(AddressSanitizer, UnderscopeLongJmpTest) { static jmp_buf buf; diff --git a/compiler-rt/lib/builtins/CMakeLists.txt b/compiler-rt/lib/builtins/CMakeLists.txt index ec668e294d6d72d0d9dad5a34a62dc02366fc2be..a6c827f887f38010c5f52b36db149eaec92910ac 100644 --- a/compiler-rt/lib/builtins/CMakeLists.txt +++ b/compiler-rt/lib/builtins/CMakeLists.txt @@ -620,6 +620,14 @@ set(hexagon_SOURCES ${GENERIC_TF_SOURCES} ) +set(loongarch_SOURCES + loongarch/fp_mode.c + ${GENERIC_SOURCES} + ${GENERIC_TF_SOURCES} +) +set(loongarch64_SOURCES + ${loongarch_SOURCES} +) set(mips_SOURCES ${GENERIC_SOURCES}) set(mipsel_SOURCES ${mips_SOURCES}) diff --git a/compiler-rt/lib/builtins/clear_cache.c b/compiler-rt/lib/builtins/clear_cache.c index 9816940b504a7895b68316c3535148607865b46a..bcc5922e073b3a7fd5c5c3f7b422c2b4f05fecc9 100644 --- a/compiler-rt/lib/builtins/clear_cache.c +++ b/compiler-rt/lib/builtins/clear_cache.c @@ -91,6 +91,8 @@ void __clear_cache(void *start, void *end) { #else compilerrt_abort(); #endif +#elif defined(__linux__) && defined(__loongarch__) + __asm__ volatile("ibar 0"); #elif defined(__linux__) && defined(__mips__) const uintptr_t start_int = (uintptr_t)start; const uintptr_t end_int = (uintptr_t)end; diff --git a/compiler-rt/lib/builtins/loongarch/fp_mode.c b/compiler-rt/lib/builtins/loongarch/fp_mode.c new file mode 100644 index 0000000000000000000000000000000000000000..31877fb02bd5b648283e64b3531a3d8c01701afc --- /dev/null +++ b/compiler-rt/lib/builtins/loongarch/fp_mode.c @@ -0,0 +1,59 @@ +//=== lib/builtins/loongarch/fp_mode.c - Floaing-point mode utilities -*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#include "../fp_mode.h" + +#define LOONGARCH_TONEAREST 0x0000 +#define LOONGARCH_TOWARDZERO 0x0100 +#define LOONGARCH_UPWARD 0x0200 +#define LOONGARCH_DOWNWARD 0x0300 + +#define LOONGARCH_RMODE_MASK (LOONGARCH_TONEAREST | LOONGARCH_TOWARDZERO | \ + LOONGARCH_UPWARD | LOONGARCH_DOWNWARD) + +#define LOONGARCH_INEXACT 0x10000 + +CRT_FE_ROUND_MODE __fe_getround(void) { +#if __loongarch_frlen != 0 + int fcsr; +# ifdef __clang__ + __asm__ __volatile__("movfcsr2gr %0, $fcsr0" : "=r" (fcsr)); +# else + __asm__ __volatile__("movfcsr2gr %0, $r0" : "=r" (fcsr)); +# endif + fcsr &= LOONGARCH_RMODE_MASK; + switch (fcsr) { + case LOONGARCH_TOWARDZERO: + return CRT_FE_TOWARDZERO; + case LOONGARCH_DOWNWARD: + return CRT_FE_DOWNWARD; + case LOONGARCH_UPWARD: + return CRT_FE_UPWARD; + case LOONGARCH_TONEAREST: + default: + return CRT_FE_TONEAREST; + } +#else + return CRT_FE_TONEAREST; +#endif +} + +int __fe_raise_inexact(void) { +#if __loongarch_frlen != 0 + int fcsr; +# ifdef __clang__ + __asm__ __volatile__("movfcsr2gr %0, $fcsr0" : "=r" (fcsr)); + __asm__ __volatile__( + "movgr2fcsr $fcsr0, %0" :: "r" (fcsr | LOONGARCH_INEXACT)); +# else + __asm__ __volatile__("movfcsr2gr %0, $r0" : "=r" (fcsr)); + __asm__ __volatile__( + "movgr2fcsr $r0, %0" :: "r" (fcsr | LOONGARCH_INEXACT)); +# endif +#endif + return 0; +} diff --git a/compiler-rt/lib/crt/CMakeLists.txt b/compiler-rt/lib/crt/CMakeLists.txt index 60b30566b7925a381ff33e3512e8152c6670b829..34fdf9d818bd98086e1bc063f4a716bd59f340e2 100644 --- a/compiler-rt/lib/crt/CMakeLists.txt +++ b/compiler-rt/lib/crt/CMakeLists.txt @@ -42,6 +42,11 @@ if(COMPILER_RT_HAS_CRT) append_list_if(COMPILER_RT_CRT_USE_EH_FRAME_REGISTRY -DEH_USE_FRAME_REGISTRY CRT_CFLAGS) append_list_if(COMPILER_RT_HAS_FPIC_FLAG -fPIC CRT_CFLAGS) append_list_if(COMPILER_RT_HAS_WNO_PEDANTIC -Wno-pedantic CRT_CFLAGS) + # OHOS_LOCAL begin + if ("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "loongarch64") + append_list_if(COMPILER_RT_HAS_LOONGARCH_MCMODEL_FLAG -mcmodel=medium CRT_CFLAGS) + endif() + # OHOS_LOCAL end if (COMPILER_RT_HAS_FCF_PROTECTION_FLAG) append_list_if(COMPILER_RT_ENABLE_CET -fcf-protection=full CRT_CFLAGS) endif() diff --git a/compiler-rt/lib/lsan/lsan_common.cpp b/compiler-rt/lib/lsan/lsan_common.cpp index 94bb3cca0083f0d0adc996df4ce7cc16732068b2..fd9b6e6c5c8e5766e0aff3c0fb831b8cb569a91e 100644 --- a/compiler-rt/lib/lsan/lsan_common.cpp +++ b/compiler-rt/lib/lsan/lsan_common.cpp @@ -254,6 +254,9 @@ static inline bool MaybeUserPointer(uptr p) { # elif defined(__aarch64__) // Accept up to 48 bit VMA. return ((p >> 48) == 0); +# elif defined(__loongarch_lp64) + // Allow 47-bit user-space VMA at current. + return ((p >> 47) == 0); # else return true; # endif diff --git a/compiler-rt/lib/lsan/lsan_common.h b/compiler-rt/lib/lsan/lsan_common.h index 2fc038b8fd14a2aedd2adc9e1700eaaad12f0e47..ff544a661a1e55776cf6751de52c59de84c754a0 100644 --- a/compiler-rt/lib/lsan/lsan_common.h +++ b/compiler-rt/lib/lsan/lsan_common.h @@ -43,6 +43,8 @@ # define CAN_SANITIZE_LEAKS 1 #elif defined(__arm__) && SANITIZER_LINUX # define CAN_SANITIZE_LEAKS 1 +#elif SANITIZER_LOONGARCH64 && SANITIZER_LINUX +# define CAN_SANITIZE_LEAKS 1 #elif SANITIZER_RISCV64 && SANITIZER_LINUX # define CAN_SANITIZE_LEAKS 1 #elif SANITIZER_NETBSD || SANITIZER_FUCHSIA diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common.h b/compiler-rt/lib/sanitizer_common/sanitizer_common.h index 787687db26d0bf6197ed965cf8cac761dbf93f06..ed20388016fb10c6908a5315b7a2ba3b6512b97c 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_common.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_common.h @@ -709,6 +709,7 @@ enum ModuleArch { kModuleArchARMV7S, kModuleArchARMV7K, kModuleArchARM64, + kModuleArchLoongArch64, kModuleArchRISCV64, kModuleArchHexagon }; @@ -781,6 +782,8 @@ inline const char *ModuleArchToString(ModuleArch arch) { return "armv7k"; case kModuleArchARM64: return "arm64"; + case kModuleArchLoongArch64: + return "loongarch64"; case kModuleArchRISCV64: return "riscv64"; case kModuleArchHexagon: diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_vfork_loongarch64.inc.S b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_vfork_loongarch64.inc.S index 05192485d597170615a06fcb41d967134c92b060..68782acb379d134c47bc5e7cf94fb58af3c5392f 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_vfork_loongarch64.inc.S +++ b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_vfork_loongarch64.inc.S @@ -5,12 +5,6 @@ ASM_HIDDEN(COMMON_INTERCEPTOR_SPILL_AREA) ASM_HIDDEN(_ZN14__interception10real_vforkE) -.bss -.type _ZN14__interception10real_vforkE, @object -.size _ZN14__interception10real_vforkE, 8 -_ZN14__interception10real_vforkE: - .zero 8 - .text .globl ASM_WRAPPER_NAME(vfork) ASM_TYPE_FUNCTION(ASM_WRAPPER_NAME(vfork)) diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common_syscalls.inc b/compiler-rt/lib/sanitizer_common/sanitizer_common_syscalls.inc index 7e7628ea0c1cb970b3285a3a2d3538f6f89bd5b1..2f7a2e86fd2a4006770f51fab85bcfb65b30ea15 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_common_syscalls.inc +++ b/compiler-rt/lib/sanitizer_common/sanitizer_common_syscalls.inc @@ -2512,7 +2512,7 @@ PRE_SYSCALL(ptrace)(long request, long pid, long addr, long data) { # if !SANITIZER_ANDROID && !SANITIZER_OHOS && \ (defined(__i386) || defined(__x86_64) || defined(__mips64) || \ defined(__powerpc64__) || defined(__aarch64__) || defined(__s390__) || \ - SANITIZER_RISCV64) + defined(__loongarch__) || SANITIZER_RISCV64) if (data) { if (request == ptrace_setregs) { PRE_READ((void *)data, struct_user_regs_struct_sz); @@ -2534,7 +2534,7 @@ POST_SYSCALL(ptrace)(long res, long request, long pid, long addr, long data) { # if !SANITIZER_ANDROID && !SANITIZER_OHOS && \ (defined(__i386) || defined(__x86_64) || defined(__mips64) || \ defined(__powerpc64__) || defined(__aarch64__) || defined(__s390__) || \ - SANITIZER_RISCV64) + defined(__loongarch__) || SANITIZER_RISCV64) if (res >= 0 && data) { // Note that this is different from the interceptor in // sanitizer_common_interceptors.inc. diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp index 7d1315255005f08b30826e3aff8368d0768a174b..8e00a70109e0957d750531a0a36a833a336a153b 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp @@ -14,6 +14,11 @@ #include "sanitizer_interface_internal.h" // OHOS_LOCAL #include "sanitizer_platform.h" +#if defined(__loongarch__) +# define SC_ADDRERR_RD (1 << 30) +# define SC_ADDRERR_WR (1 << 31) +#endif + #if SANITIZER_FREEBSD || SANITIZER_LINUX || SANITIZER_NETBSD || \ SANITIZER_SOLARIS @@ -444,7 +449,7 @@ uptr internal_fstat(fd_t fd, void *buf) { return res; # elif SANITIZER_LINUX && defined(__loongarch__) struct statx bufx; - int res = internal_syscall(SYSCALL(statx), fd, 0, AT_EMPTY_PATH, + int res = internal_syscall(SYSCALL(statx), fd, "", AT_EMPTY_PATH, STATX_BASIC_STATS, (uptr)&bufx); statx_to_stat(&bufx, (struct stat *)buf); return res; @@ -1127,7 +1132,7 @@ uptr GetMaxVirtualAddress() { #if SANITIZER_NETBSD && defined(__x86_64__) return 0x7f7ffffff000ULL; // (0x00007f8000000000 - PAGE_SIZE) #elif SANITIZER_WORDSIZE == 64 -# if defined(__powerpc64__) || defined(__aarch64__) +# if defined(__powerpc64__) || defined(__aarch64__) || defined(__loongarch__) // On PowerPC64 we have two different address space layouts: 44- and 46-bit. // We somehow need to figure out which one we are using now and choose // one of 0x00000fffffffffffUL and 0x00003fffffffffffUL. @@ -1135,6 +1140,7 @@ uptr GetMaxVirtualAddress() { // of the address space, so simply checking the stack address is not enough. // This should (does) work for both PowerPC64 Endian modes. // Similarly, aarch64 has multiple address space layouts: 39, 42 and 47-bit. + // loongarch64 also has multiple address space layouts: default is 47-bit. return (1ULL << (MostSignificantSetBitIndex(GET_CURRENT_FRAME()) + 1)) - 1; #elif SANITIZER_RISCV64 return (1ULL << 38) - 1; @@ -1523,6 +1529,47 @@ uptr internal_clone(int (*fn)(void *), void *child_stack, int flags, void *arg, : "x30", "memory"); return res; } +#elif SANITIZER_LOONGARCH64 +uptr internal_clone(int (*fn)(void *), void *child_stack, int flags, void *arg, + int *parent_tidptr, void *newtls, int *child_tidptr) { + if (!fn || !child_stack) + return -EINVAL; + + CHECK_EQ(0, (uptr)child_stack % 16); + + register int res __asm__("$a0"); + register int __flags __asm__("$a0") = flags; + register void *__stack __asm__("$a1") = child_stack; + register int *__ptid __asm__("$a2") = parent_tidptr; + register int *__ctid __asm__("$a3") = child_tidptr; + register void *__tls __asm__("$a4") = newtls; + register int (*__fn)(void *) __asm__("$a5") = fn; + register void *__arg __asm__("$a6") = arg; + register int nr_clone __asm__("$a7") = __NR_clone; + + __asm__ __volatile__( + "syscall 0\n" + + // if ($a0 != 0) + // return $a0; + "bnez $a0, 1f\n" + + // In the child, now. Call "fn(arg)". + "move $a0, $a6\n" + "jirl $ra, $a5, 0\n" + + // Call _exit($a0). + "addi.d $a7, $zero, %9\n" + "syscall 0\n" + + "1:\n" + + : "=r"(res) + : "0"(__flags), "r"(__stack), "r"(__ptid), "r"(__ctid), "r"(__tls), + "r"(__fn), "r"(__arg), "r"(nr_clone), "i"(__NR_exit) + : "memory", "$t0", "$t1", "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t8"); + return res; +} #elif defined(__powerpc64__) uptr internal_clone(int (*fn)(void *), void *child_stack, int flags, void *arg, int *parent_tidptr, void *newtls, int *child_tidptr) { @@ -1977,6 +2024,13 @@ SignalContext::WriteFlag SignalContext::GetWriteFlag() const { u64 esr; if (!Aarch64GetESR(ucontext, &esr)) return Unknown; return esr & ESR_ELx_WNR ? Write : Read; +#elif defined(__loongarch__) + u32 flags = ucontext->uc_mcontext.__flags; + if (flags & SC_ADDRERR_RD) + return SignalContext::Read; + if (flags & SC_ADDRERR_WR) + return SignalContext::Write; + return SignalContext::Unknown; #elif defined(__sparc__) // Decode the instruction to determine the access type. // From OpenSolaris $SRC/uts/sun4/os/trap.c (get_accesstype). diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_linux.h b/compiler-rt/lib/sanitizer_common/sanitizer_linux.h index 1b1b98fe4989d23a3e890ed022b00a11917a7c4b..fe4f9c18e9a22d4470abc7b41eec0efbc0497134 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_linux.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_linux.h @@ -77,9 +77,9 @@ uptr internal_arch_prctl(int option, uptr arg2); // internal_sigaction instead. int internal_sigaction_norestorer(int signum, const void *act, void *oldact); void internal_sigdelset(__sanitizer_sigset_t *set, int signum); -#if defined(__x86_64__) || defined(__mips__) || defined(__aarch64__) || \ - defined(__powerpc64__) || defined(__s390__) || defined(__i386__) || \ - defined(__arm__) || SANITIZER_RISCV64 +# if defined(__x86_64__) || defined(__mips__) || defined(__aarch64__) || \ + defined(__powerpc64__) || defined(__s390__) || defined(__i386__) || \ + defined(__arm__) || SANITIZER_RISCV64 || SANITIZER_LOONGARCH64 uptr internal_clone(int (*fn)(void *), void *child_stack, int flags, void *arg, int *parent_tidptr, void *newtls, int *child_tidptr); #endif @@ -152,6 +152,9 @@ inline void ReleaseMemoryPagesToOSAndZeroFill(uptr beg, uptr end) { "rdhwr %0,$29\n" \ ".set pop\n" : "=r"(__v)); \ __v; }) +#elif defined (__loongarch__) +# define __get_tls() \ + ({ void** __v; __asm__("move %0, $tp" : "=r"(__v)); __v; }) #elif defined(__riscv) # define __get_tls() \ ({ void** __v; __asm__("mv %0, tp" : "=r"(__v)); __v; }) diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp index 4f330ffa10b0581f8886bc1f6617e19866d38105..f84cdf3cb887640b116102c0f962b03841009bdb 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp @@ -205,7 +205,8 @@ void InitTlsSize() { g_use_dlpi_tls_data = GetLibcVersion(&major, &minor, &patch) && major == 2 && minor >= 25; -#if defined(__aarch64__) || defined(__x86_64__) || defined(__powerpc64__) +#if defined(__aarch64__) || defined(__x86_64__) || defined(__powerpc64__) || \ + defined(__loongarch__) void *get_tls_static_info = dlsym(RTLD_NEXT, "_dl_get_tls_static_info"); size_t tls_align; ((void (*)(size_t *, size_t *))get_tls_static_info)(&g_tls_size, &tls_align); @@ -265,6 +266,8 @@ static uptr ThreadDescriptorSizeFallback() { #elif defined(__mips__) // TODO(sagarthakur): add more values as per different glibc versions. val = FIRST_32_SECOND_64(1152, 1776); +#elif SANITIZER_LOONGARCH64 + val = 1856; // from glibc 2.36 #elif SANITIZER_RISCV64 int major; int minor; @@ -304,7 +307,8 @@ uptr ThreadDescriptorSize() { return val; } -#if defined(__mips__) || defined(__powerpc64__) || SANITIZER_RISCV64 +#if defined(__mips__) || defined(__powerpc64__) || SANITIZER_RISCV64 || \ + SANITIZER_LOONGARCH64 // TlsPreTcbSize includes size of struct pthread_descr and size of tcb // head structure. It lies before the static tls blocks. static uptr TlsPreTcbSize() { @@ -314,6 +318,8 @@ static uptr TlsPreTcbSize() { const uptr kTcbHead = 88; // sizeof (tcbhead_t) #elif SANITIZER_RISCV64 const uptr kTcbHead = 16; // sizeof (tcbhead_t) +#elif SANITIZER_LOONGARCH64 + const uptr kTcbHead = 16; // sizeof (tcbhead_t) #endif const uptr kTlsAlign = 16; const uptr kTlsPreTcbSize = @@ -500,6 +506,15 @@ static void GetTls(uptr *addr, uptr *size) { *addr = reinterpret_cast(__builtin_thread_pointer()) - ThreadDescriptorSize(); *size = g_tls_size + ThreadDescriptorSize(); +#elif SANITIZER_GLIBC && defined(__loongarch__) +# ifdef __clang__ + *addr = reinterpret_cast(__builtin_thread_pointer()) - + ThreadDescriptorSize(); +# else + asm("or %0,$tp,$zero" : "=r"(*addr)); + *addr -= ThreadDescriptorSize(); +# endif + *size = g_tls_size + ThreadDescriptorSize(); #elif SANITIZER_GLIBC && defined(__powerpc64__) // Workaround for glibc<2.25(?). 2.27 is known to not need this. uptr tp; diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h b/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h index 81febbdca4347d703e4b91c2d964c421cd2e3008..cd773827c2582ad749baf108245d6b1a84c11669 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h @@ -281,7 +281,7 @@ #if SI_LINUX_NOT_ANDROID && !SI_OHOS && \ (defined(__i386) || defined(__x86_64) || defined(__mips64) || \ defined(__powerpc64__) || defined(__aarch64__) || defined(__arm__) || \ - defined(__s390__) || SANITIZER_RISCV64) + defined(__s390__) || defined(__loongarch__) || SANITIZER_RISCV64) #define SANITIZER_INTERCEPT_PTRACE 1 #else #define SANITIZER_INTERCEPT_PTRACE 0 diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp index 839d31a8b75ca518fd09124bb8632af7f3eb07f6..e5796b076ad2839e627933326739592db4ea5d7e 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp @@ -103,7 +103,7 @@ # endif # if defined(__mips64) || defined(__aarch64__) || defined(__arm__) || \ - defined(__hexagon__) || SANITIZER_RISCV64 + defined(__hexagon__) || defined(__loongarch__) ||SANITIZER_RISCV64 # include # ifdef __arm__ typedef struct user_fpregs elf_fpregset_t; @@ -367,7 +367,7 @@ unsigned struct_ElfW_Phdr_sz = sizeof(Elf_Phdr); #if SANITIZER_LINUX && !SANITIZER_ANDROID && \ (defined(__i386) || defined(__x86_64) || defined(__mips64) || \ defined(__powerpc64__) || defined(__aarch64__) || defined(__arm__) || \ - defined(__s390__) || SANITIZER_RISCV64) + defined(__s390__) || defined(__loongarch__)|| SANITIZER_RISCV64) #if defined(__mips64) || defined(__powerpc64__) || defined(__arm__) unsigned struct_user_regs_struct_sz = sizeof(struct pt_regs); unsigned struct_user_fpregs_struct_sz = sizeof(elf_fpregset_t); @@ -377,21 +377,24 @@ unsigned struct_ElfW_Phdr_sz = sizeof(Elf_Phdr); #elif defined(__aarch64__) unsigned struct_user_regs_struct_sz = sizeof(struct user_pt_regs); unsigned struct_user_fpregs_struct_sz = sizeof(struct user_fpsimd_state); +#elif defined(__loongarch__) + unsigned struct_user_regs_struct_sz = sizeof(struct user_pt_regs); + unsigned struct_user_fpregs_struct_sz = sizeof(struct user_fp_state); #elif defined(__s390__) unsigned struct_user_regs_struct_sz = sizeof(struct _user_regs_struct); unsigned struct_user_fpregs_struct_sz = sizeof(struct _user_fpregs_struct); #else unsigned struct_user_regs_struct_sz = sizeof(struct user_regs_struct); unsigned struct_user_fpregs_struct_sz = sizeof(struct user_fpregs_struct); -#endif // __mips64 || __powerpc64__ || __aarch64__ +#endif // __mips64 || __powerpc64__ || __aarch64__ || __loongarch__ #if defined(__x86_64) || defined(__mips64) || defined(__powerpc64__) || \ defined(__aarch64__) || defined(__arm__) || defined(__s390__) || \ - SANITIZER_RISCV64 + defined(__loongarch__) || SANITIZER_RISCV64 unsigned struct_user_fpxregs_struct_sz = 0; #else unsigned struct_user_fpxregs_struct_sz = sizeof(struct user_fpxregs_struct); #endif // __x86_64 || __mips64 || __powerpc64__ || __aarch64__ || __arm__ -// || __s390__ +// || __s390__ || __loongarch__ #ifdef __arm__ unsigned struct_user_vfpregs_struct_sz = ARM_VFPREGS_SIZE; #else diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.h b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.h index a13117a330ec668c4965c2bf5504cbd27f152e8b..ecd45138026cea1d175f79fdc6f7706d8657a10f 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.h @@ -848,7 +848,7 @@ typedef void __sanitizer_FILE; #if SANITIZER_LINUX && !SANITIZER_ANDROID && !SANITIZER_OHOS && \ (defined(__i386) || defined(__x86_64) || defined(__mips64) || \ defined(__powerpc64__) || defined(__aarch64__) || defined(__arm__) || \ - defined(__s390__) || SANITIZER_RISCV64) + defined(__s390__) || defined(__loongarch__) || SANITIZER_RISCV64) extern unsigned struct_user_regs_struct_sz; extern unsigned struct_user_fpregs_struct_sz; extern unsigned struct_user_fpxregs_struct_sz; diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cpp index 556fdfaaa89e9e9c609f8cc0cbf44ea23debceca..8618cbc48eadad1ab97cddf382d27b94c09851f2 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cpp @@ -16,7 +16,7 @@ #if SANITIZER_LINUX && \ (defined(__x86_64__) || defined(__mips__) || defined(__aarch64__) || \ defined(__powerpc64__) || defined(__s390__) || defined(__i386__) || \ - defined(__arm__) || SANITIZER_RISCV64) + defined(__arm__) || SANITIZER_RISCV64 || SANITIZER_LOONGARCH64) #include "sanitizer_stoptheworld.h" @@ -31,7 +31,8 @@ #include // for pid_t #include // for iovec #include // for NT_PRSTATUS -#if (defined(__aarch64__) || SANITIZER_RISCV64) && !SANITIZER_ANDROID +#if (defined(__aarch64__) || SANITIZER_RISCV64 || SANITIZER_LOONGARCH64) && \ + !SANITIZER_ANDROID // GLIBC 2.20+ sys/user does not include asm/ptrace.h #if SANITIZER_OHOS // Do not include asm/sigcontext.h on behalf of asm/ptrace.h @@ -524,6 +525,12 @@ typedef struct user_pt_regs regs_struct; static constexpr uptr kExtraRegs[] = {0}; #define ARCH_IOVEC_FOR_GETREGSET +#elif defined(__loongarch__) +typedef struct user_pt_regs regs_struct; +#define REG_SP regs[3] +static constexpr uptr kExtraRegs[] = {0}; +#define ARCH_IOVEC_FOR_GETREGSET + #elif SANITIZER_RISCV64 typedef struct user_regs_struct regs_struct; // sys/ucontext.h already defines REG_SP as 2. Undefine it first. @@ -631,3 +638,4 @@ PtraceRegistersStatus SuspendedThreadsListLinux::GetRegistersAndSP( #endif // SANITIZER_LINUX && (defined(__x86_64__) || defined(__mips__) // || defined(__aarch64__) || defined(__powerpc64__) // || defined(__s390__) || defined(__i386__) || defined(__arm__) + // || SANITIZER_LOONGARCH64 diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_libcdep.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_libcdep.cpp index 461fe96613688beb72ba8de03417e46c13970faa..a6f82ced20367310fc13d8fecb3e1c1e70efe62b 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_libcdep.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_libcdep.cpp @@ -256,6 +256,8 @@ class LLVMSymbolizerProcess final : public SymbolizerProcess { const char* const kSymbolizerArch = "--default-arch=x86_64"; #elif defined(__i386__) const char* const kSymbolizerArch = "--default-arch=i386"; +#elif SANITIZER_LOONGARCH64 + const char *const kSymbolizerArch = "--default-arch=loongarch64"; #elif SANITIZER_RISCV64 const char *const kSymbolizerArch = "--default-arch=riscv64"; #elif defined(__aarch64__) diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_syscall_linux_loongarch64.inc b/compiler-rt/lib/sanitizer_common/sanitizer_syscall_linux_loongarch64.inc index 97ca7f2f3f92bdaeb284884cc52f499a83a4367d..80f5e6be8ad1459a7e6708dc2a02f1b585cfe0c4 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_syscall_linux_loongarch64.inc +++ b/compiler-rt/lib/sanitizer_common/sanitizer_syscall_linux_loongarch64.inc @@ -14,18 +14,22 @@ // About local register variables: // https://gcc.gnu.org/onlinedocs/gcc/Local-Register-Variables.html#Local-Register-Variables // -// Kernel ABI... -// syscall number is passed in a7 -// (http://man7.org/linux/man-pages/man2/syscall.2.html) results are return in -// a0 and a1 (http://man7.org/linux/man-pages/man2/syscall.2.html) arguments -// are passed in: a0-a7 (confirmed by inspecting glibc sources). +// Kernel ABI: +// https://lore.kernel.org/loongarch/1f353678-3398-e30b-1c87-6edb278f74db@xen0n.name/T/#m1613bc86c2d7bf5f6da92bd62984302bfd699a2f +// syscall number is placed in a7 +// parameters, if present, are placed in a0-a6 +// upon return: +// the return value is placed in a0 +// t0-t8 should be considered clobbered +// all other registers are preserved #define SYSCALL(name) __NR_##name -#define INTERNAL_SYSCALL_CLOBBERS "memory" +#define INTERNAL_SYSCALL_CLOBBERS \ + "memory", "$t0", "$t1", "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t8" static uptr __internal_syscall(u64 nr) { - register u64 a7 asm("a7") = nr; - register u64 a0 asm("a0"); + register u64 a7 asm("$a7") = nr; + register u64 a0 asm("$a0"); __asm__ volatile("syscall 0\n\t" : "=r"(a0) : "r"(a7) @@ -35,8 +39,8 @@ static uptr __internal_syscall(u64 nr) { #define __internal_syscall0(n) (__internal_syscall)(n) static uptr __internal_syscall(u64 nr, u64 arg1) { - register u64 a7 asm("a7") = nr; - register u64 a0 asm("a0") = arg1; + register u64 a7 asm("$a7") = nr; + register u64 a0 asm("$a0") = arg1; __asm__ volatile("syscall 0\n\t" : "+r"(a0) : "r"(a7) @@ -46,9 +50,9 @@ static uptr __internal_syscall(u64 nr, u64 arg1) { #define __internal_syscall1(n, a1) (__internal_syscall)(n, (u64)(a1)) static uptr __internal_syscall(u64 nr, u64 arg1, long arg2) { - register u64 a7 asm("a7") = nr; - register u64 a0 asm("a0") = arg1; - register u64 a1 asm("a1") = arg2; + register u64 a7 asm("$a7") = nr; + register u64 a0 asm("$a0") = arg1; + register u64 a1 asm("$a1") = arg2; __asm__ volatile("syscall 0\n\t" : "+r"(a0) : "r"(a7), "r"(a1) @@ -59,10 +63,10 @@ static uptr __internal_syscall(u64 nr, u64 arg1, long arg2) { (__internal_syscall)(n, (u64)(a1), (long)(a2)) static uptr __internal_syscall(u64 nr, u64 arg1, long arg2, long arg3) { - register u64 a7 asm("a7") = nr; - register u64 a0 asm("a0") = arg1; - register u64 a1 asm("a1") = arg2; - register u64 a2 asm("a2") = arg3; + register u64 a7 asm("$a7") = nr; + register u64 a0 asm("$a0") = arg1; + register u64 a1 asm("$a1") = arg2; + register u64 a2 asm("$a2") = arg3; __asm__ volatile("syscall 0\n\t" : "+r"(a0) : "r"(a7), "r"(a1), "r"(a2) @@ -74,11 +78,11 @@ static uptr __internal_syscall(u64 nr, u64 arg1, long arg2, long arg3) { static uptr __internal_syscall(u64 nr, u64 arg1, long arg2, long arg3, u64 arg4) { - register u64 a7 asm("a7") = nr; - register u64 a0 asm("a0") = arg1; - register u64 a1 asm("a1") = arg2; - register u64 a2 asm("a2") = arg3; - register u64 a3 asm("a3") = arg4; + register u64 a7 asm("$a7") = nr; + register u64 a0 asm("$a0") = arg1; + register u64 a1 asm("$a1") = arg2; + register u64 a2 asm("$a2") = arg3; + register u64 a3 asm("$a3") = arg4; __asm__ volatile("syscall 0\n\t" : "+r"(a0) : "r"(a7), "r"(a1), "r"(a2), "r"(a3) @@ -90,12 +94,12 @@ static uptr __internal_syscall(u64 nr, u64 arg1, long arg2, long arg3, static uptr __internal_syscall(u64 nr, u64 arg1, long arg2, long arg3, u64 arg4, long arg5) { - register u64 a7 asm("a7") = nr; - register u64 a0 asm("a0") = arg1; - register u64 a1 asm("a1") = arg2; - register u64 a2 asm("a2") = arg3; - register u64 a3 asm("a3") = arg4; - register u64 a4 asm("a4") = arg5; + register u64 a7 asm("$a7") = nr; + register u64 a0 asm("$a0") = arg1; + register u64 a1 asm("$a1") = arg2; + register u64 a2 asm("$a2") = arg3; + register u64 a3 asm("$a3") = arg4; + register u64 a4 asm("$a4") = arg5; __asm__ volatile("syscall 0\n\t" : "+r"(a0) : "r"(a7), "r"(a1), "r"(a2), "r"(a3), "r"(a4) @@ -108,13 +112,13 @@ static uptr __internal_syscall(u64 nr, u64 arg1, long arg2, long arg3, u64 arg4, static uptr __internal_syscall(u64 nr, u64 arg1, long arg2, long arg3, u64 arg4, long arg5, long arg6) { - register u64 a7 asm("a7") = nr; - register u64 a0 asm("a0") = arg1; - register u64 a1 asm("a1") = arg2; - register u64 a2 asm("a2") = arg3; - register u64 a3 asm("a3") = arg4; - register u64 a4 asm("a4") = arg5; - register u64 a5 asm("a5") = arg6; + register u64 a7 asm("$a7") = nr; + register u64 a0 asm("$a0") = arg1; + register u64 a1 asm("$a1") = arg2; + register u64 a2 asm("$a2") = arg3; + register u64 a3 asm("$a3") = arg4; + register u64 a4 asm("$a4") = arg5; + register u64 a5 asm("$a5") = arg6; __asm__ volatile("syscall 0\n\t" : "+r"(a0) : "r"(a7), "r"(a1), "r"(a2), "r"(a3), "r"(a4), "r"(a5) @@ -127,14 +131,14 @@ static uptr __internal_syscall(u64 nr, u64 arg1, long arg2, long arg3, u64 arg4, static uptr __internal_syscall(u64 nr, u64 arg1, long arg2, long arg3, u64 arg4, long arg5, long arg6, long arg7) { - register u64 a7 asm("a7") = nr; - register u64 a0 asm("a0") = arg1; - register u64 a1 asm("a1") = arg2; - register u64 a2 asm("a2") = arg3; - register u64 a3 asm("a3") = arg4; - register u64 a4 asm("a4") = arg5; - register u64 a5 asm("a5") = arg6; - register u64 a6 asm("a6") = arg7; + register u64 a7 asm("$a7") = nr; + register u64 a0 asm("$a0") = arg1; + register u64 a1 asm("$a1") = arg2; + register u64 a2 asm("$a2") = arg3; + register u64 a3 asm("$a3") = arg4; + register u64 a4 asm("$a4") = arg5; + register u64 a5 asm("$a5") = arg6; + register u64 a6 asm("$a6") = arg7; __asm__ volatile("syscall 0\n\t" : "+r"(a0) : "r"(a7), "r"(a1), "r"(a2), "r"(a3), "r"(a4), "r"(a5), diff --git a/compiler-rt/lib/sanitizer_common/tests/sanitizer_stacktrace_test.cpp b/compiler-rt/lib/sanitizer_common/tests/sanitizer_stacktrace_test.cpp index e8d590a503a9abdf4687a071fca03a16a335acb9..a9dd0669ccb968f580d4e21842e11fe0c8999e0e 100644 --- a/compiler-rt/lib/sanitizer_common/tests/sanitizer_stacktrace_test.cpp +++ b/compiler-rt/lib/sanitizer_common/tests/sanitizer_stacktrace_test.cpp @@ -44,7 +44,7 @@ class FastUnwindTest : public ::testing::Test { uhwptr fake_bottom; BufferedStackTrace trace; -#if defined(__riscv) +#if defined(__loongarch__) || defined(__riscv) const uptr kFpOffset = 4; const uptr kBpOffset = 2; #else diff --git a/compiler-rt/lib/tsan/rtl/CMakeLists.txt b/compiler-rt/lib/tsan/rtl/CMakeLists.txt index af3c9a2de500458cfa37c4c7ced45703bcf28262..057e53a00d91329014871059ee42d9e7fca072a4 100644 --- a/compiler-rt/lib/tsan/rtl/CMakeLists.txt +++ b/compiler-rt/lib/tsan/rtl/CMakeLists.txt @@ -219,6 +219,10 @@ else() WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/../go COMMENT "Checking TSan Go runtime..." VERBATIM) + elseif(arch MATCHES "loongarch64") + add_asm_sources(TSAN_ASM_SOURCES + tsan_rtl_loongarch64.S + ) elseif(arch MATCHES "mips64|mips64le") add_asm_sources(TSAN_ASM_SOURCES tsan_rtl_mips64.S diff --git a/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp b/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp index 6b0e255f6db302e1c44fed6e18428f10f0e0ef15..550a1889b141dd5e67073aca7cc5726b6ae17880 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp @@ -76,6 +76,8 @@ struct ucontext_t { #define PTHREAD_ABI_BASE "GLIBC_2.3.2" #elif defined(__aarch64__) || SANITIZER_PPC64V2 #define PTHREAD_ABI_BASE "GLIBC_2.17" +#elif SANITIZER_LOONGARCH64 +#define PTHREAD_ABI_BASE "GLIBC_2.36" #endif extern "C" int pthread_attr_init(void *attr); diff --git a/compiler-rt/lib/tsan/rtl/tsan_platform.h b/compiler-rt/lib/tsan/rtl/tsan_platform.h index 7c13c7335136b437a8419c013203e4d945147322..cad99f1a0bd2635b121307e05ebc04eb02c0f38f 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_platform.h +++ b/compiler-rt/lib/tsan/rtl/tsan_platform.h @@ -229,6 +229,38 @@ struct MappingAarch64_48 { static const uptr kVdsoBeg = 0xffff000000000ull; }; +/* C/C++ on linux/loongarch64 (47-bit VMA) +0000 0000 4000 - 0080 0000 0000: main binary +0080 0000 0000 - 0100 0000 0000: - +0100 0000 0000 - 1000 0000 0000: shadow memory +1000 0000 0000 - 3000 0000 0000: - +3000 0000 0000 - 3400 0000 0000: metainfo +3400 0000 0000 - 5555 0000 0000: - +5555 0000 0000 - 5556 0000 0000: main binary (PIE) +5556 0000 0000 - 7ffe 0000 0000: - +7ffe 0000 0000 - 7fff 0000 0000: heap +7fff 0000 0000 - 7fff 8000 0000: - +7fff 8000 0000 - 8000 0000 0000: modules and main thread stack +*/ +struct MappingLoongArch64_47 { + static const uptr kMetaShadowBeg = 0x300000000000ull; + static const uptr kMetaShadowEnd = 0x340000000000ull; + static const uptr kShadowBeg = 0x010000000000ull; + static const uptr kShadowEnd = 0x100000000000ull; + static const uptr kHeapMemBeg = 0x7ffe00000000ull; + static const uptr kHeapMemEnd = 0x7fff00000000ull; + static const uptr kLoAppMemBeg = 0x000000004000ull; + static const uptr kLoAppMemEnd = 0x008000000000ull; + static const uptr kMidAppMemBeg = 0x555500000000ull; + static const uptr kMidAppMemEnd = 0x555600000000ull; + static const uptr kHiAppMemBeg = 0x7fff80000000ull; + static const uptr kHiAppMemEnd = 0x800000000000ull; + static const uptr kShadowMsk = 0x780000000000ull; + static const uptr kShadowXor = 0x040000000000ull; + static const uptr kShadowAdd = 0x000000000000ull; + static const uptr kVdsoBeg = 0x7fffffffc000ull; +}; + /* C/C++ on linux/powerpc64 (44-bit VMA) 0000 0000 0100 - 0001 0000 0000: main binary @@ -599,6 +631,8 @@ ALWAYS_INLINE auto SelectMapping(Arg arg) { case 48: return Func::template Apply(arg); } +# elif SANITIZER_LOONGARCH64 + return Func::template Apply(arg); # elif defined(__powerpc64__) switch (vmaSize) { case 44: @@ -627,6 +661,7 @@ void ForEachMapping() { Func::template Apply(); Func::template Apply(); Func::template Apply(); + Func::template Apply(); Func::template Apply(); Func::template Apply(); Func::template Apply(); diff --git a/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cpp b/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cpp index 807f6be2eee378b82644c8b64965ffb76ec1797f..93276fcf52121e8caa3dbc4c9f0575f6251f842c 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cpp @@ -66,7 +66,8 @@ extern "C" void *__libc_stack_end; void *__libc_stack_end = 0; #endif -#if SANITIZER_LINUX && defined(__aarch64__) && !SANITIZER_GO +#if SANITIZER_LINUX && (defined(__aarch64__) || defined(__loongarch_lp64)) && \ + !SANITIZER_GO # define INIT_LONGJMP_XOR_KEY 1 #else # define INIT_LONGJMP_XOR_KEY 0 @@ -230,6 +231,14 @@ void InitializePlatformEarly() { Die(); } #endif +#elif SANITIZER_LOONGARCH64 +# if !SANITIZER_GO + if (vmaSize != 47) { + Printf("FATAL: ThreadSanitizer: unsupported VMA range\n"); + Printf("FATAL: Found %zd - Supported 47\n", vmaSize); + Die(); + } +# endif #elif defined(__powerpc64__) # if !SANITIZER_GO if (vmaSize != 44 && vmaSize != 46 && vmaSize != 47) { @@ -303,6 +312,9 @@ void InitializePlatform() { CHECK_NE(personality(old_personality | ADDR_NO_RANDOMIZE), -1); reexec = true; } + +#endif +#if SANITIZER_LINUX && defined(__loongarch_lp64) // Initialize the xor key used in {sig}{set,long}jump. InitializeLongjmpXorKey(); #endif @@ -375,6 +387,8 @@ static uptr UnmangleLongJmpSp(uptr mangled_sp) { # else return mangled_sp; # endif +#elif defined(__loongarch_lp64) + return mangled_sp ^ longjmp_xor_key; #elif defined(__powerpc64__) // Reverse of: // ld r4, -28696(r13) @@ -410,6 +424,8 @@ static uptr UnmangleLongJmpSp(uptr mangled_sp) { #elif SANITIZER_LINUX # ifdef __aarch64__ # define LONG_JMP_SP_ENV_SLOT 13 +# elif defined(__loongarch__) +# define LONG_JMP_SP_ENV_SLOT 1 # elif defined(__mips64) # define LONG_JMP_SP_ENV_SLOT 1 # elif defined(__s390x__) @@ -436,7 +452,11 @@ static void InitializeLongjmpXorKey() { // 2. Retrieve vanilla/mangled SP. uptr sp; +#ifdef __loongarch__ + asm("move %0, $sp" : "=r" (sp)); +#else asm("mov %0, sp" : "=r" (sp)); +#endif uptr mangled_sp = ((uptr *)&env)[LONG_JMP_SP_ENV_SLOT]; // 3. xor SPs to obtain key. diff --git a/compiler-rt/lib/tsan/rtl/tsan_rtl.h b/compiler-rt/lib/tsan/rtl/tsan_rtl.h index e1e121e2ee073dc711aa79d056d79daedaba2764..8b8e4bcd7b5ce6a52ca51246604eef2a17322705 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_rtl.h +++ b/compiler-rt/lib/tsan/rtl/tsan_rtl.h @@ -56,7 +56,8 @@ namespace __tsan { #if !SANITIZER_GO struct MapUnmapCallback; -#if defined(__mips64) || defined(__aarch64__) || defined(__powerpc__) +#if defined(__mips64) || defined(__aarch64__) || defined(__loongarch__) || \ + defined(__powerpc__) struct AP32 { static const uptr kSpaceBeg = 0; diff --git a/compiler-rt/lib/tsan/rtl/tsan_rtl_loongarch64.S b/compiler-rt/lib/tsan/rtl/tsan_rtl_loongarch64.S new file mode 100644 index 0000000000000000000000000000000000000000..12856bd110cd4d414b9c20668b5dd9627199aff1 --- /dev/null +++ b/compiler-rt/lib/tsan/rtl/tsan_rtl_loongarch64.S @@ -0,0 +1,196 @@ +#include "sanitizer_common/sanitizer_asm.h" + +.section .text + +ASM_HIDDEN(__tsan_setjmp) +.comm _ZN14__interception11real_setjmpE,8,8 +.globl ASM_SYMBOL_INTERCEPTOR(setjmp) +ASM_TYPE_FUNCTION(ASM_SYMBOL_INTERCEPTOR(setjmp)) +ASM_SYMBOL_INTERCEPTOR(setjmp): + CFI_STARTPROC + + // Save frame pointer and return address register + addi.d $sp, $sp, -32 + st.d $ra, $sp, 24 + st.d $fp, $sp, 16 + CFI_DEF_CFA_OFFSET (32) + CFI_OFFSET (1, -8) + CFI_OFFSET (22, -16) + + // Adjust the SP for previous frame + addi.d $fp, $sp, 32 + CFI_DEF_CFA_REGISTER (22) + + // Save env parameter + st.d $a0, $sp, 8 + CFI_OFFSET (4, -24) + + // Obtain SP, first argument to `void __tsan_setjmp(uptr sp)` + addi.d $a0, $fp, 0 + + // call tsan interceptor + bl ASM_SYMBOL(__tsan_setjmp) + + // Restore env parameter + ld.d $a0, $sp, 8 + CFI_RESTORE (4) + + // Restore frame/link register + ld.d $fp, $sp, 16 + ld.d $ra, $sp, 24 + addi.d $sp, $sp, 32 + CFI_RESTORE (22) + CFI_RESTORE (1) + CFI_DEF_CFA (3, 0) + + // tail jump to libc setjmp + la.local $a1, _ZN14__interception11real_setjmpE + ld.d $a1, $a1, 0 + jr $a1 + + CFI_ENDPROC +ASM_SIZE(ASM_SYMBOL_INTERCEPTOR(setjmp)) + +.comm _ZN14__interception12real__setjmpE,8,8 +.globl ASM_SYMBOL_INTERCEPTOR(_setjmp) +ASM_TYPE_FUNCTION(ASM_SYMBOL_INTERCEPTOR(_setjmp)) +ASM_SYMBOL_INTERCEPTOR(_setjmp): + CFI_STARTPROC + + // Save frame pointer and return address register + addi.d $sp, $sp, -32 + st.d $ra, $sp, 24 + st.d $fp, $sp, 16 + CFI_DEF_CFA_OFFSET (32) + CFI_OFFSET (1, -8) + CFI_OFFSET (22, -16) + + // Adjust the SP for previous frame + addi.d $fp, $sp, 32 + CFI_DEF_CFA_REGISTER (22) + + // Save env parameter + st.d $a0, $sp, 8 + CFI_OFFSET (4, -24) + + // Obtain SP, first argument to `void __tsan_setjmp(uptr sp)` + addi.d $a0, $fp, 0 + + // call tsan interceptor + bl ASM_SYMBOL(__tsan_setjmp) + + // Restore env parameter + ld.d $a0, $sp, 8 + CFI_RESTORE (4) + + // Restore frame/link register + ld.d $fp, $sp, 16 + ld.d $ra, $sp, 24 + addi.d $sp, $sp, 32 + CFI_RESTORE (22) + CFI_RESTORE (1) + CFI_DEF_CFA (3, 0) + + // tail jump to libc setjmp + la.local $a1, _ZN14__interception12real__setjmpE + ld.d $a1, $a1, 0 + jr $a1 + + CFI_ENDPROC +ASM_SIZE(ASM_SYMBOL_INTERCEPTOR(_setjmp)) + +.comm _ZN14__interception14real_sigsetjmpE,8,8 +.globl ASM_SYMBOL_INTERCEPTOR(sigsetjmp) +ASM_TYPE_FUNCTION(ASM_SYMBOL_INTERCEPTOR(sigsetjmp)) +ASM_SYMBOL_INTERCEPTOR(sigsetjmp): + CFI_STARTPROC + + // Save frame pointer and return address register + addi.d $sp, $sp, -32 + st.d $ra, $sp, 24 + st.d $fp, $sp, 16 + CFI_DEF_CFA_OFFSET (32) + CFI_OFFSET (1, -8) + CFI_OFFSET (22, -16) + + // Adjust the SP for previous frame + addi.d $fp, $sp, 32 + CFI_DEF_CFA_REGISTER (22) + + // Save env parameter + st.d $a0, $sp, 8 + CFI_OFFSET (4, -24) + + // Obtain SP, first argument to `void __tsan_setjmp(uptr sp)` + addi.d $a0, $fp, 0 + + // call tsan interceptor + bl ASM_SYMBOL(__tsan_setjmp) + + // Restore env parameter + ld.d $a0, $sp, 8 + CFI_RESTORE (4) + + // Restore frame/link register + ld.d $fp, $sp, 16 + ld.d $ra, $sp, 24 + addi.d $sp, $sp, 32 + CFI_RESTORE (22) + CFI_RESTORE (1) + CFI_DEF_CFA (3, 0) + + // tail jump to libc setjmp + la.local $a1, _ZN14__interception14real_sigsetjmpE + ld.d $a1, $a1, 0 + jr $a1 + + CFI_ENDPROC +ASM_SIZE(ASM_SYMBOL_INTERCEPTOR(sigsetjmp)) + +.comm _ZN14__interception16real___sigsetjmpE,8,8 +.globl ASM_SYMBOL_INTERCEPTOR(__sigsetjmp) +ASM_TYPE_FUNCTION(ASM_SYMBOL_INTERCEPTOR(__sigsetjmp)) +ASM_SYMBOL_INTERCEPTOR(__sigsetjmp): + CFI_STARTPROC + + // Save frame pointer and return address register + addi.d $sp, $sp, -32 + st.d $ra, $sp, 24 + st.d $fp, $sp, 16 + CFI_DEF_CFA_OFFSET (32) + CFI_OFFSET (1, -8) + CFI_OFFSET (22, -16) + + // Adjust the SP for previous frame + addi.d $fp, $sp, 32 + CFI_DEF_CFA_REGISTER (22) + + // Save env parameter + st.d $a0, $sp, 8 + CFI_OFFSET (4, -24) + + // Obtain SP, first argument to `void __tsan_setjmp(uptr sp)` + addi.d $a0, $fp, 0 + + // call tsan interceptor + bl ASM_SYMBOL(__tsan_setjmp) + + // Restore env parameter + ld.d $a0, $sp, 8 + CFI_RESTORE (4) + + // Restore frame/link register + ld.d $fp, $sp, 16 + ld.d $ra, $sp, 24 + addi.d $sp, $sp, 32 + CFI_RESTORE (22) + CFI_RESTORE (1) + CFI_DEF_CFA (3, 0) + + // tail jump to libc setjmp + la.local $a1, _ZN14__interception16real___sigsetjmpE + ld.d $a1, $a1, 0 + jr $a1 + + CFI_ENDPROC +ASM_SIZE(ASM_SYMBOL_INTERCEPTOR(__sigsetjmp)) diff --git a/compiler-rt/test/asan/CMakeLists.txt b/compiler-rt/test/asan/CMakeLists.txt index 91f4bc675d79dcdad82ec53e0355edb80d5c0ba2..536f2f4f1cc5c5162b5f3364531805a49a9d436e 100644 --- a/compiler-rt/test/asan/CMakeLists.txt +++ b/compiler-rt/test/asan/CMakeLists.txt @@ -14,7 +14,7 @@ if(OS_NAME MATCHES "Windows" AND CMAKE_SIZEOF_VOID_P EQUAL 8 AND endif() macro(get_bits_for_arch arch bits) - if (${arch} MATCHES "x86_64|powerpc64|powerpc64le|aarch64|arm64|mips64|mips64el|s390x|sparcv9|riscv64") + if (${arch} MATCHES "x86_64|powerpc64|powerpc64le|aarch64|arm64|mips64|mips64el|s390x|sparcv9|riscv64|loongarch64") set(${bits} 64) elseif (${arch} MATCHES "i386|arm|mips|mipsel|sparc") set(${bits} 32) diff --git a/compiler-rt/test/asan/TestCases/Linux/leak_check_segv.cpp b/compiler-rt/test/asan/TestCases/Linux/leak_check_segv.cpp index 2a2010f7ab0fe3685c0ca8219341f2418a5d8485..016774a32fee75cd322843b4486cd5f215eedc16 100644 --- a/compiler-rt/test/asan/TestCases/Linux/leak_check_segv.cpp +++ b/compiler-rt/test/asan/TestCases/Linux/leak_check_segv.cpp @@ -1,18 +1,22 @@ // Test that SIGSEGV during leak checking does not crash the process. // RUN: %clangxx_asan -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s // REQUIRES: leak-detection -#include +#include #include +#include #include -#include +#include char data[10 * 1024 * 1024]; int main() { + long pagesize_mask = sysconf(_SC_PAGESIZE) - 1; void *p = malloc(10 * 1024 * 1024); // surprise-surprise! - mprotect((void*)(((unsigned long)p + 4095) & ~4095), 16 * 1024, PROT_NONE); - mprotect((void*)(((unsigned long)data + 4095) & ~4095), 16 * 1024, PROT_NONE); + mprotect((void *)(((unsigned long)p + pagesize_mask) & ~pagesize_mask), + 16 * 1024, PROT_NONE); + mprotect((void *)(((unsigned long)data + pagesize_mask) & ~pagesize_mask), + 16 * 1024, PROT_NONE); __lsan_do_leak_check(); fprintf(stderr, "DONE\n"); } diff --git a/compiler-rt/test/asan/TestCases/Linux/ptrace.cpp b/compiler-rt/test/asan/TestCases/Linux/ptrace.cpp index 21743cfdd9aff6212fc0015b000feb7868538755..cae1493fd487cfa9cc34a9d8a31485b724ac8195 100644 --- a/compiler-rt/test/asan/TestCases/Linux/ptrace.cpp +++ b/compiler-rt/test/asan/TestCases/Linux/ptrace.cpp @@ -14,8 +14,8 @@ #include #include // for iovec #include // for NT_PRSTATUS -#ifdef __aarch64__ -# include +#if defined(__aarch64__) || defined(__loongarch__) +# include #endif #if defined(__i386__) || defined(__x86_64__) @@ -37,6 +37,13 @@ typedef struct user_fpsimd_state fpregs_struct; #define PRINT_REG_FP(__fpregs) printf ("%x\n", (unsigned) (__fpregs.fpsr)) #define ARCH_IOVEC_FOR_GETREGSET +#elif defined(__loongarch__) +typedef struct user_pt_regs regs_struct; +typedef struct user_fp_state fpregs_struct; +# define PRINT_REG_PC(__regs) printf("%lx\n", (unsigned long)(__regs.csr_era)) +# define PRINT_REG_FP(__fpregs) printf("%x\n", (unsigned)(__fpregs.fcsr)) +# define ARCH_IOVEC_FOR_GETREGSET + #elif defined(__powerpc64__) typedef struct pt_regs regs_struct; typedef elf_fpregset_t fpregs_struct; diff --git a/compiler-rt/test/asan/TestCases/Linux/vfork.cpp b/compiler-rt/test/asan/TestCases/Linux/vfork.cpp index 4c0f02c5088e47ff12fdd789d9d755567fd755f5..b943e4debce2cc0602e963d050729bf42f45da1c 100644 --- a/compiler-rt/test/asan/TestCases/Linux/vfork.cpp +++ b/compiler-rt/test/asan/TestCases/Linux/vfork.cpp @@ -1,7 +1,7 @@ // https://github.com/google/sanitizers/issues/925 // RUN: %clang_asan -O0 %s -o %t && %run %t 2>&1 -// REQUIRES: aarch64-target-arch || x86_64-target-arch || i386-target-arch || arm-target-arch || riscv64-target-arch +// REQUIRES: aarch64-target-arch || x86_64-target-arch || i386-target-arch || arm-target-arch || riscv64-target-arch || loongarch64-target-arch #include #include diff --git a/compiler-rt/test/asan/lit.cfg.py b/compiler-rt/test/asan/lit.cfg.py index 3fb929e3b16a2d6560fd2dacac1da72da0cee875..38c00420799573d80f9ee3aa72651228c012584f 100644 --- a/compiler-rt/test/asan/lit.cfg.py +++ b/compiler-rt/test/asan/lit.cfg.py @@ -202,7 +202,7 @@ if not config.arm_thumb: # Turn on leak detection on 64-bit Linux. leak_detection_android = config.android and 'android-thread-properties-api' in config.available_features and (config.target_arch in ['x86_64', 'i386', 'i686', 'aarch64']) -leak_detection_linux = (config.host_os == 'Linux') and (not config.android) and (config.target_arch in ['x86_64', 'i386', 'riscv64']) +leak_detection_linux = (config.host_os == 'Linux') and (not config.android) and (config.target_arch in ['x86_64', 'i386', 'riscv64', 'loongarch64']) leak_detection_mac = (config.host_os == 'Darwin') and (config.apple_platform == 'osx') leak_detection_netbsd = (config.host_os == 'NetBSD') and (config.target_arch in ['x86_64', 'i386']) if leak_detection_android or leak_detection_linux or leak_detection_mac or leak_detection_netbsd: diff --git a/compiler-rt/test/builtins/Unit/addtf3_test.c b/compiler-rt/test/builtins/Unit/addtf3_test.c index 82a802022318346a4d4888a3dab8c5a10956f681..fe2e2c80f655b7a230cbd00b4cd582a2c9892382 100644 --- a/compiler-rt/test/builtins/Unit/addtf3_test.c +++ b/compiler-rt/test/builtins/Unit/addtf3_test.c @@ -66,7 +66,8 @@ int main() return 1; #if (defined(__arm__) || defined(__aarch64__)) && defined(__ARM_FP) || \ - defined(i386) || defined(__x86_64__) + defined(i386) || defined(__x86_64__) || (defined(__loongarch__) && \ + __loongarch_frlen != 0) // Rounding mode tests on supported architectures const long double m = 1234.0L, n = 0.01L; diff --git a/compiler-rt/test/builtins/Unit/subtf3_test.c b/compiler-rt/test/builtins/Unit/subtf3_test.c index c06a0baba3c3e0e37315b48a229d7bcd58b0987c..377ae95a9a7d7bb2aad696e8cd45674dfae6d8ef 100644 --- a/compiler-rt/test/builtins/Unit/subtf3_test.c +++ b/compiler-rt/test/builtins/Unit/subtf3_test.c @@ -59,7 +59,8 @@ int main() return 1; #if (defined(__arm__) || defined(__aarch64__)) && defined(__ARM_FP) || \ - defined(i386) || defined(__x86_64__) + defined(i386) || defined(__x86_64__) || (defined(__loongarch__) && \ + __loongarch_frlen != 0) // Rounding mode tests on supported architectures const long double m = 1234.02L, n = 0.01L; diff --git a/compiler-rt/test/lsan/TestCases/swapcontext.cpp b/compiler-rt/test/lsan/TestCases/swapcontext.cpp index f78867cc06959f69cbe742a4a1b7d1dcdb2b8e6d..2b2c158f2f2c56b2749e19d9db18453e3c0db6ed 100644 --- a/compiler-rt/test/lsan/TestCases/swapcontext.cpp +++ b/compiler-rt/test/lsan/TestCases/swapcontext.cpp @@ -5,7 +5,7 @@ // RUN: %env_lsan_opts= %run %t 2>&1 // RUN: %env_lsan_opts= not %run %t foo 2>&1 | FileCheck %s // Missing 'getcontext' and 'makecontext' on Android. -// UNSUPPORTED: arm,aarch64,powerpc64,android +// UNSUPPORTED: arm,aarch64,loongarch64,powerpc64,android #include "sanitizer_common/sanitizer_ucontext.h" #include diff --git a/compiler-rt/test/lsan/TestCases/use_registers.cpp b/compiler-rt/test/lsan/TestCases/use_registers.cpp index d7852d4e061375ce898e2bf670aec7eddb4b6de3..ebea5f0a15d1b125bcb7b8e4c6b262c7f16920a1 100644 --- a/compiler-rt/test/lsan/TestCases/use_registers.cpp +++ b/compiler-rt/test/lsan/TestCases/use_registers.cpp @@ -43,6 +43,8 @@ extern "C" void *registers_thread_func(void *arg) { "mov x14, %0" : : "r"(p)); +#elif defined(__loongarch_lp64) + asm("move $s8, %0" : : "r"(p)); #elif defined(__powerpc__) asm("mr 30, %0" : diff --git a/compiler-rt/test/lsan/lit.common.cfg.py b/compiler-rt/test/lsan/lit.common.cfg.py index 88c557549b38b6238c612d7dd015ca2ba2fea20d..0abd7e83157ce7c6ecc3f5fe192fc424ac04a69f 100644 --- a/compiler-rt/test/lsan/lit.common.cfg.py +++ b/compiler-rt/test/lsan/lit.common.cfg.py @@ -74,9 +74,9 @@ config.substitutions.append( ("%clang_lsan ", build_invocation(clang_lsan_cflags config.substitutions.append( ("%clangxx_lsan ", build_invocation(clang_lsan_cxxflags)) ) # LeakSanitizer tests are currently supported on -# Android{aarch64, x86, x86_64}, x86-64 Linux, PowerPC64 Linux, arm Linux, mips64 Linux, s390x Linux and x86_64 Darwin. +# Android{aarch64, x86, x86_64}, x86-64 Linux, PowerPC64 Linux, arm Linux, mips64 Linux, s390x Linux, loongarch64 Linux and x86_64 Darwin. supported_android = config.android and config.target_arch in ['x86_64', 'i386', 'aarch64'] and 'android-thread-properties-api' in config.available_features -supported_linux = (not config.android) and config.host_os == 'Linux' and config.host_arch in ['aarch64', 'x86_64', 'ppc64', 'ppc64le', 'mips64', 'riscv64', 'arm', 'armhf', 'armv7l', 's390x'] +supported_linux = (not config.android) and config.host_os == 'Linux' and config.host_arch in ['aarch64', 'x86_64', 'ppc64', 'ppc64le', 'mips64', 'riscv64', 'arm', 'armhf', 'armv7l', 's390x', 'loongarch64'] supported_darwin = config.host_os == 'Darwin' and config.target_arch in ['x86_64'] supported_netbsd = config.host_os == 'NetBSD' and config.target_arch in ['x86_64', 'i386'] if not (supported_android or supported_linux or supported_darwin or supported_netbsd): diff --git a/compiler-rt/test/sanitizer_common/TestCases/Linux/ptrace.cpp b/compiler-rt/test/sanitizer_common/TestCases/Linux/ptrace.cpp index a2d32439e500388d038750c41b448e0187bfec18..c43b13a0b3be1ddeeaaf467108dd4d282abca410 100644 --- a/compiler-rt/test/sanitizer_common/TestCases/Linux/ptrace.cpp +++ b/compiler-rt/test/sanitizer_common/TestCases/Linux/ptrace.cpp @@ -17,7 +17,7 @@ #include #include #endif -#ifdef __aarch64__ +#if defined(__aarch64__) || defined(__loongarch__) // GLIBC 2.20+ sys/user does not include asm/ptrace.h #include #endif @@ -100,6 +100,28 @@ int main(void) { printf("%x\n", fpregs.fpsr); #endif // (__aarch64__) +#if (__loongarch__) + struct iovec regset_io; + + struct user_pt_regs regs; + regset_io.iov_base = ®s; + regset_io.iov_len = sizeof(regs); + res = + ptrace(PTRACE_GETREGSET, pid, (void *)NT_PRSTATUS, (void *)®set_io); + assert(!res); + if (regs.csr_era) + printf("%lx\n", regs.csr_era); + + struct user_fp_state fpregs; + regset_io.iov_base = &fpregs; + regset_io.iov_len = sizeof(fpregs); + res = + ptrace(PTRACE_GETREGSET, pid, (void *)NT_FPREGSET, (void *)®set_io); + assert(!res); + if (fpregs.fcsr) + printf("%x\n", fpregs.fcsr); +#endif // (__loongarch__) + #if (__s390__) struct iovec regset_io; diff --git a/compiler-rt/test/sanitizer_common/print_address.h b/compiler-rt/test/sanitizer_common/print_address.h index 49b960ebbb2ae2760a739c7929604930666b3f40..1128c928b0534231c426a22264c43643f934bc45 100644 --- a/compiler-rt/test/sanitizer_common/print_address.h +++ b/compiler-rt/test/sanitizer_common/print_address.h @@ -7,8 +7,9 @@ void print_address(const char *str, int n, ...) { va_start(ap, n); while (n--) { void *p = va_arg(ap, void *); -#if defined(__x86_64__) || defined(__aarch64__) || defined(__powerpc64__) || \ - defined(__s390x__) || (defined(__riscv) && __riscv_xlen == 64) +#if defined(__x86_64__) || defined(__aarch64__) || defined(__powerpc64__) || \ + defined(__s390x__) || (defined(__riscv) && __riscv_xlen == 64) || \ + defined(__loongarch_lp64) // On FreeBSD, the %p conversion specifier works as 0x%x and thus does not // match to the format used in the diagnotic message. fprintf(stderr, "0x%012lx ", (unsigned long) p); diff --git a/compiler-rt/test/tsan/map32bit.cpp b/compiler-rt/test/tsan/map32bit.cpp index 0f8236292be7a90ac03ded0c9e6e8cdef6fda132..614b270073386cf3d53ec6a8b3c40420464af259 100644 --- a/compiler-rt/test/tsan/map32bit.cpp +++ b/compiler-rt/test/tsan/map32bit.cpp @@ -12,6 +12,7 @@ // XFAIL: aarch64 // XFAIL: powerpc64 // XFAIL: s390x +// XFAIL: loongarch64 // MAP_32BIT doesn't exist on OS X and NetBSD. // UNSUPPORTED: darwin,netbsd diff --git a/compiler-rt/test/tsan/mmap_large.cpp b/compiler-rt/test/tsan/mmap_large.cpp index 1d4c73252832a1f7b75e81a93fe03ae08c4ed0c4..85ebe7f76b02364df77f3765802e4e4bd238dc32 100644 --- a/compiler-rt/test/tsan/mmap_large.cpp +++ b/compiler-rt/test/tsan/mmap_large.cpp @@ -17,7 +17,7 @@ int main() { #ifdef __x86_64__ const size_t kLog2Size = 39; -#elif defined(__mips64) || defined(__aarch64__) +#elif defined(__mips64) || defined(__aarch64__) || defined(__loongarch_lp64) const size_t kLog2Size = 32; #elif defined(__powerpc64__) const size_t kLog2Size = 39; diff --git a/libcxxabi/src/demangle/ItaniumDemangle.h b/libcxxabi/src/demangle/ItaniumDemangle.h index e3f0c6dfecdc29d03de7ac04a5798d9554fcf8f0..0b22fdbe3be6f2d1d59b658d40ce04fd8f6cda8a 100644 --- a/libcxxabi/src/demangle/ItaniumDemangle.h +++ b/libcxxabi/src/demangle/ItaniumDemangle.h @@ -5099,7 +5099,7 @@ template <> struct FloatData { #if defined(__mips__) && defined(__mips_n64) || defined(__aarch64__) || \ - defined(__wasm__) || defined(__riscv) + defined(__wasm__) || defined(__riscv) || defined(__loongarch__) static const size_t mangled_size = 32; #elif defined(__arm__) || defined(__mips__) || defined(__hexagon__) static const size_t mangled_size = 16; diff --git a/libunwind/include/__libunwind_config.h b/libunwind/include/__libunwind_config.h index 5e9de90f649fd5d1ea78c7f6b27778b8986f032e..f69fe89e9a265c88f8d54eb6902b645942eaaaa9 100644 --- a/libunwind/include/__libunwind_config.h +++ b/libunwind/include/__libunwind_config.h @@ -30,6 +30,7 @@ #define _LIBUNWIND_HIGHEST_DWARF_REGISTER_RISCV 64 #define _LIBUNWIND_HIGHEST_DWARF_REGISTER_VE 143 #define _LIBUNWIND_HIGHEST_DWARF_REGISTER_S390X 83 +#define _LIBUNWIND_HIGHEST_DWARF_REGISTER_LOONGARCH 64 #if defined(_LIBUNWIND_IS_NATIVE_ONLY) # if defined(__linux__) @@ -166,6 +167,16 @@ # define _LIBUNWIND_CONTEXT_SIZE 34 # define _LIBUNWIND_CURSOR_SIZE 46 # define _LIBUNWIND_HIGHEST_DWARF_REGISTER _LIBUNWIND_HIGHEST_DWARF_REGISTER_S390X +#elif defined(__loongarch__) +#define _LIBUNWIND_TARGET_LOONGARCH 1 +#if __loongarch_grlen == 64 +#define _LIBUNWIND_CONTEXT_SIZE 65 +#define _LIBUNWIND_CURSOR_SIZE 77 +#else +#error "Unsupported LoongArch ABI" +#endif +#define _LIBUNWIND_HIGHEST_DWARF_REGISTER \ + _LIBUNWIND_HIGHEST_DWARF_REGISTER_LOONGARCH # else # error "Unsupported architecture." # endif @@ -185,6 +196,7 @@ # define _LIBUNWIND_TARGET_RISCV 1 # define _LIBUNWIND_TARGET_VE 1 # define _LIBUNWIND_TARGET_S390X 1 +#define _LIBUNWIND_TARGET_LOONGARCH 1 # define _LIBUNWIND_CONTEXT_SIZE 167 # define _LIBUNWIND_CURSOR_SIZE 179 # define _LIBUNWIND_HIGHEST_DWARF_REGISTER 287 diff --git a/libunwind/include/libunwind.h b/libunwind/include/libunwind.h index b01348d8c92937eb295cc2bef1cbb830464b2125..3393221dced6b3a0c52692dbb55c150ab107fbee 100644 --- a/libunwind/include/libunwind.h +++ b/libunwind/include/libunwind.h @@ -1231,4 +1231,72 @@ enum { // 68-83 Vector Registers %v16-%v31 }; +// LoongArch registers. +enum { + UNW_LOONGARCH_R0 = 0, + UNW_LOONGARCH_R1 = 1, + UNW_LOONGARCH_R2 = 2, + UNW_LOONGARCH_R3 = 3, + UNW_LOONGARCH_R4 = 4, + UNW_LOONGARCH_R5 = 5, + UNW_LOONGARCH_R6 = 6, + UNW_LOONGARCH_R7 = 7, + UNW_LOONGARCH_R8 = 8, + UNW_LOONGARCH_R9 = 9, + UNW_LOONGARCH_R10 = 10, + UNW_LOONGARCH_R11 = 11, + UNW_LOONGARCH_R12 = 12, + UNW_LOONGARCH_R13 = 13, + UNW_LOONGARCH_R14 = 14, + UNW_LOONGARCH_R15 = 15, + UNW_LOONGARCH_R16 = 16, + UNW_LOONGARCH_R17 = 17, + UNW_LOONGARCH_R18 = 18, + UNW_LOONGARCH_R19 = 19, + UNW_LOONGARCH_R20 = 20, + UNW_LOONGARCH_R21 = 21, + UNW_LOONGARCH_R22 = 22, + UNW_LOONGARCH_R23 = 23, + UNW_LOONGARCH_R24 = 24, + UNW_LOONGARCH_R25 = 25, + UNW_LOONGARCH_R26 = 26, + UNW_LOONGARCH_R27 = 27, + UNW_LOONGARCH_R28 = 28, + UNW_LOONGARCH_R29 = 29, + UNW_LOONGARCH_R30 = 30, + UNW_LOONGARCH_R31 = 31, + UNW_LOONGARCH_F0 = 32, + UNW_LOONGARCH_F1 = 33, + UNW_LOONGARCH_F2 = 34, + UNW_LOONGARCH_F3 = 35, + UNW_LOONGARCH_F4 = 36, + UNW_LOONGARCH_F5 = 37, + UNW_LOONGARCH_F6 = 38, + UNW_LOONGARCH_F7 = 39, + UNW_LOONGARCH_F8 = 40, + UNW_LOONGARCH_F9 = 41, + UNW_LOONGARCH_F10 = 42, + UNW_LOONGARCH_F11 = 43, + UNW_LOONGARCH_F12 = 44, + UNW_LOONGARCH_F13 = 45, + UNW_LOONGARCH_F14 = 46, + UNW_LOONGARCH_F15 = 47, + UNW_LOONGARCH_F16 = 48, + UNW_LOONGARCH_F17 = 49, + UNW_LOONGARCH_F18 = 50, + UNW_LOONGARCH_F19 = 51, + UNW_LOONGARCH_F20 = 52, + UNW_LOONGARCH_F21 = 53, + UNW_LOONGARCH_F22 = 54, + UNW_LOONGARCH_F23 = 55, + UNW_LOONGARCH_F24 = 56, + UNW_LOONGARCH_F25 = 57, + UNW_LOONGARCH_F26 = 58, + UNW_LOONGARCH_F27 = 59, + UNW_LOONGARCH_F28 = 60, + UNW_LOONGARCH_F29 = 61, + UNW_LOONGARCH_F30 = 62, + UNW_LOONGARCH_F31 = 63, +}; + #endif diff --git a/libunwind/src/Registers.hpp b/libunwind/src/Registers.hpp index ac9d8f9086022e25af58648db79ec4cc64e28bae..fd3de946a90082f663d8a71f092692cb6790ec37 100644 --- a/libunwind/src/Registers.hpp +++ b/libunwind/src/Registers.hpp @@ -40,6 +40,7 @@ enum { REGISTERS_RISCV, REGISTERS_VE, REGISTERS_S390X, + REGISTERS_LOONGARCH, }; #if defined(_LIBUNWIND_TARGET_I386) @@ -5123,6 +5124,271 @@ inline const char *Registers_s390x::getRegisterName(int regNum) { } #endif // _LIBUNWIND_TARGET_S390X +#if defined(_LIBUNWIND_TARGET_LOONGARCH) +/// Registers_loongarch holds the register state of a thread in a 64-bit +/// LoongArch process. +class _LIBUNWIND_HIDDEN Registers_loongarch { +public: + Registers_loongarch(); + Registers_loongarch(const void *registers); + + bool validRegister(int num) const; + uint64_t getRegister(int num) const; + void setRegister(int num, uint64_t value); + bool validFloatRegister(int num) const; + double getFloatRegister(int num) const; + void setFloatRegister(int num, double value); + bool validVectorRegister(int num) const; + v128 getVectorRegister(int num) const; + void setVectorRegister(int num, v128 value); + static const char *getRegisterName(int num); + void jumpto(); + static constexpr int lastDwarfRegNum() { + return _LIBUNWIND_HIGHEST_DWARF_REGISTER_LOONGARCH; + } + static int getArch() { return REGISTERS_LOONGARCH; } + + uint64_t getSP() const { return _registers.__r[3]; } + void setSP(uint64_t value) { _registers.__r[3] = value; } + uint64_t getIP() const { return _registers.__pc; } + void setIP(uint64_t value) { _registers.__pc = value; } + +private: + struct loongarch_thread_state_t { + uint64_t __r[32]; + uint64_t __pc; + }; + + loongarch_thread_state_t _registers; +#if __loongarch_frlen == 64 + double _floats[32]; +#endif +}; + +inline Registers_loongarch::Registers_loongarch(const void *registers) { + static_assert((check_fit::does_fit), + "loongarch registers do not fit into unw_context_t"); + memcpy(&_registers, registers, sizeof(_registers)); + static_assert(sizeof(_registers) == 0x108, + "expected float registers to be at offset 264"); +#if __loongarch_frlen == 64 + memcpy(_floats, static_cast(registers) + sizeof(_registers), + sizeof(_floats)); +#endif +} + +inline Registers_loongarch::Registers_loongarch() { + memset(&_registers, 0, sizeof(_registers)); +#if __loongarch_frlen == 64 + memset(&_floats, 0, sizeof(_floats)); +#endif +} + +inline bool Registers_loongarch::validRegister(int regNum) const { + if (regNum == UNW_REG_IP || regNum == UNW_REG_SP) + return true; + if (regNum < 0 || regNum > UNW_LOONGARCH_F31) + return false; + return true; +} + +inline uint64_t Registers_loongarch::getRegister(int regNum) const { + if (regNum >= UNW_LOONGARCH_R0 && regNum <= UNW_LOONGARCH_R31) + return _registers.__r[regNum - UNW_LOONGARCH_R0]; + + if (regNum == UNW_REG_IP) + return _registers.__pc; + if (regNum == UNW_REG_SP) + return _registers.__r[3]; + _LIBUNWIND_ABORT("unsupported loongarch register"); +} + +inline void Registers_loongarch::setRegister(int regNum, uint64_t value) { + if (regNum >= UNW_LOONGARCH_R0 && regNum <= UNW_LOONGARCH_R31) + _registers.__r[regNum - UNW_LOONGARCH_R0] = value; + else if (regNum == UNW_REG_IP) + _registers.__pc = value; + else if (regNum == UNW_REG_SP) + _registers.__r[3] = value; + else + _LIBUNWIND_ABORT("unsupported loongarch register"); +} + +inline const char *Registers_loongarch::getRegisterName(int regNum) { + switch (regNum) { + case UNW_REG_IP: + return "$pc"; + case UNW_REG_SP: + return "$sp"; + case UNW_LOONGARCH_R0: + return "$r0"; + case UNW_LOONGARCH_R1: + return "$r1"; + case UNW_LOONGARCH_R2: + return "$r2"; + case UNW_LOONGARCH_R3: + return "$r3"; + case UNW_LOONGARCH_R4: + return "$r4"; + case UNW_LOONGARCH_R5: + return "$r5"; + case UNW_LOONGARCH_R6: + return "$r6"; + case UNW_LOONGARCH_R7: + return "$r7"; + case UNW_LOONGARCH_R8: + return "$r8"; + case UNW_LOONGARCH_R9: + return "$r9"; + case UNW_LOONGARCH_R10: + return "$r10"; + case UNW_LOONGARCH_R11: + return "$r11"; + case UNW_LOONGARCH_R12: + return "$r12"; + case UNW_LOONGARCH_R13: + return "$r13"; + case UNW_LOONGARCH_R14: + return "$r14"; + case UNW_LOONGARCH_R15: + return "$r15"; + case UNW_LOONGARCH_R16: + return "$r16"; + case UNW_LOONGARCH_R17: + return "$r17"; + case UNW_LOONGARCH_R18: + return "$r18"; + case UNW_LOONGARCH_R19: + return "$r19"; + case UNW_LOONGARCH_R20: + return "$r20"; + case UNW_LOONGARCH_R21: + return "$r21"; + case UNW_LOONGARCH_R22: + return "$r22"; + case UNW_LOONGARCH_R23: + return "$r23"; + case UNW_LOONGARCH_R24: + return "$r24"; + case UNW_LOONGARCH_R25: + return "$r25"; + case UNW_LOONGARCH_R26: + return "$r26"; + case UNW_LOONGARCH_R27: + return "$r27"; + case UNW_LOONGARCH_R28: + return "$r28"; + case UNW_LOONGARCH_R29: + return "$r29"; + case UNW_LOONGARCH_R30: + return "$r30"; + case UNW_LOONGARCH_R31: + return "$r31"; + case UNW_LOONGARCH_F0: + return "$f0"; + case UNW_LOONGARCH_F1: + return "$f1"; + case UNW_LOONGARCH_F2: + return "$f2"; + case UNW_LOONGARCH_F3: + return "$f3"; + case UNW_LOONGARCH_F4: + return "$f4"; + case UNW_LOONGARCH_F5: + return "$f5"; + case UNW_LOONGARCH_F6: + return "$f6"; + case UNW_LOONGARCH_F7: + return "$f7"; + case UNW_LOONGARCH_F8: + return "$f8"; + case UNW_LOONGARCH_F9: + return "$f9"; + case UNW_LOONGARCH_F10: + return "$f10"; + case UNW_LOONGARCH_F11: + return "$f11"; + case UNW_LOONGARCH_F12: + return "$f12"; + case UNW_LOONGARCH_F13: + return "$f13"; + case UNW_LOONGARCH_F14: + return "$f14"; + case UNW_LOONGARCH_F15: + return "$f15"; + case UNW_LOONGARCH_F16: + return "$f16"; + case UNW_LOONGARCH_F17: + return "$f17"; + case UNW_LOONGARCH_F18: + return "$f18"; + case UNW_LOONGARCH_F19: + return "$f19"; + case UNW_LOONGARCH_F20: + return "$f20"; + case UNW_LOONGARCH_F21: + return "$f21"; + case UNW_LOONGARCH_F22: + return "$f22"; + case UNW_LOONGARCH_F23: + return "$f23"; + case UNW_LOONGARCH_F24: + return "$f24"; + case UNW_LOONGARCH_F25: + return "$f25"; + case UNW_LOONGARCH_F26: + return "$f26"; + case UNW_LOONGARCH_F27: + return "$f27"; + case UNW_LOONGARCH_F28: + return "$f28"; + case UNW_LOONGARCH_F29: + return "$f29"; + case UNW_LOONGARCH_F30: + return "$f30"; + case UNW_LOONGARCH_F31: + return "$f31"; + default: + return "unknown register"; + } +} + +inline bool Registers_loongarch::validFloatRegister(int regNum) const { + if (regNum < UNW_LOONGARCH_F0 || regNum > UNW_LOONGARCH_F31) + return false; + return true; +} + +inline double Registers_loongarch::getFloatRegister(int regNum) const { +#if __loongarch_frlen == 64 + assert(validFloatRegister(regNum)); + return _floats[regNum - UNW_LOONGARCH_F0]; +#else + _LIBUNWIND_ABORT("libunwind not built with float support"); +#endif +} + +inline void Registers_loongarch::setFloatRegister(int regNum, double value) { +#if __loongarch_frlen == 64 + assert(validFloatRegister(regNum)); + _floats[regNum - UNW_LOONGARCH_F0] = value; +#else + _LIBUNWIND_ABORT("libunwind not built with float support"); +#endif +} + +inline bool Registers_loongarch::validVectorRegister(int) const { + return false; +} + +inline v128 Registers_loongarch::getVectorRegister(int) const { + _LIBUNWIND_ABORT("loongarch vector support not implemented"); +} + +inline void Registers_loongarch::setVectorRegister(int, v128) { + _LIBUNWIND_ABORT("loongarch vector support not implemented"); +} +#endif //_LIBUNWIND_TARGET_LOONGARCH } // namespace libunwind diff --git a/libunwind/src/UnwindCursor.hpp b/libunwind/src/UnwindCursor.hpp index b8bd9bc59010daba2c39390de1c3403855f025ca..ee535fffb1315b6ed55b89c5d55576e7e2a619ba 100644 --- a/libunwind/src/UnwindCursor.hpp +++ b/libunwind/src/UnwindCursor.hpp @@ -1066,6 +1066,10 @@ private: } #endif +#if defined(_LIBUNWIND_TARGET_LOONGARCH) + int stepWithCompactEncoding(Registers_loongarch &) { return UNW_EINVAL; } +#endif + #if defined(_LIBUNWIND_TARGET_SPARC) int stepWithCompactEncoding(Registers_sparc &) { return UNW_EINVAL; } #endif @@ -1142,6 +1146,12 @@ private: } #endif +#if defined(_LIBUNWIND_TARGET_LOONGARCH) + bool compactSaysUseDwarf(Registers_loongarch &, uint32_t *) const { + return true; + } +#endif + #if defined(_LIBUNWIND_TARGET_SPARC) bool compactSaysUseDwarf(Registers_sparc &, uint32_t *) const { return true; } #endif @@ -1226,6 +1236,12 @@ private: } #endif +#if defined(_LIBUNWIND_TARGET_LOONGARCH) + compact_unwind_encoding_t dwarfEncoding(Registers_loongarch &) const { + return 0; + } +#endif + #if defined(_LIBUNWIND_TARGET_SPARC) compact_unwind_encoding_t dwarfEncoding(Registers_sparc &) const { return 0; } #endif diff --git a/libunwind/src/UnwindRegistersRestore.S b/libunwind/src/UnwindRegistersRestore.S index 749db2357159d37e5e4015a269803c388b16ffeb..154bd423e86f3b9a0a384e444b7b5c90bc70d370 100644 --- a/libunwind/src/UnwindRegistersRestore.S +++ b/libunwind/src/UnwindRegistersRestore.S @@ -1374,6 +1374,88 @@ DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind15Registers_s390x6jumptoEv) // Return to PSWA (was loaded into %r1 above) br %r1 +#elif defined(__loongarch__) && __loongarch_grlen == 64 + +// +// void libunwind::Registers_loongarch::jumpto() +// +// On entry: +// thread_state pointer is in $a0($r4) +// + .p2align 2 +DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind19Registers_loongarch6jumptoEv) +# if __loongarch_frlen == 64 + fld.d $f0, $a0, (8 * 33 + 8 * 0) + fld.d $f1, $a0, (8 * 33 + 8 * 1) + fld.d $f2, $a0, (8 * 33 + 8 * 2) + fld.d $f3, $a0, (8 * 33 + 8 * 3) + fld.d $f4, $a0, (8 * 33 + 8 * 4) + fld.d $f5, $a0, (8 * 33 + 8 * 5) + fld.d $f6, $a0, (8 * 33 + 8 * 6) + fld.d $f7, $a0, (8 * 33 + 8 * 7) + fld.d $f8, $a0, (8 * 33 + 8 * 8) + fld.d $f9, $a0, (8 * 33 + 8 * 9) + fld.d $f10, $a0, (8 * 33 + 8 * 10) + fld.d $f11, $a0, (8 * 33 + 8 * 11) + fld.d $f12, $a0, (8 * 33 + 8 * 12) + fld.d $f13, $a0, (8 * 33 + 8 * 13) + fld.d $f14, $a0, (8 * 33 + 8 * 14) + fld.d $f15, $a0, (8 * 33 + 8 * 15) + fld.d $f16, $a0, (8 * 33 + 8 * 16) + fld.d $f17, $a0, (8 * 33 + 8 * 17) + fld.d $f18, $a0, (8 * 33 + 8 * 18) + fld.d $f19, $a0, (8 * 33 + 8 * 19) + fld.d $f20, $a0, (8 * 33 + 8 * 20) + fld.d $f21, $a0, (8 * 33 + 8 * 21) + fld.d $f22, $a0, (8 * 33 + 8 * 22) + fld.d $f23, $a0, (8 * 33 + 8 * 23) + fld.d $f24, $a0, (8 * 33 + 8 * 24) + fld.d $f25, $a0, (8 * 33 + 8 * 25) + fld.d $f26, $a0, (8 * 33 + 8 * 26) + fld.d $f27, $a0, (8 * 33 + 8 * 27) + fld.d $f28, $a0, (8 * 33 + 8 * 28) + fld.d $f29, $a0, (8 * 33 + 8 * 29) + fld.d $f30, $a0, (8 * 33 + 8 * 30) + fld.d $f31, $a0, (8 * 33 + 8 * 31) +# endif + + // $r0 is zero + ld.d $r1, $a0, (8 * 1) + ld.d $r2, $a0, (8 * 2) + ld.d $r3, $a0, (8 * 3) + // skip $a0 for now + ld.d $r5, $a0, (8 * 5) + ld.d $r6, $a0, (8 * 6) + ld.d $r7, $a0, (8 * 7) + ld.d $r8, $a0, (8 * 8) + ld.d $r9, $a0, (8 * 9) + ld.d $r10, $a0, (8 * 10) + ld.d $r11, $a0, (8 * 11) + ld.d $r12, $a0, (8 * 12) + ld.d $r13, $a0, (8 * 13) + ld.d $r14, $a0, (8 * 14) + ld.d $r15, $a0, (8 * 15) + ld.d $r16, $a0, (8 * 16) + ld.d $r17, $a0, (8 * 17) + ld.d $r18, $a0, (8 * 18) + ld.d $r19, $a0, (8 * 19) + ld.d $r20, $a0, (8 * 20) + ld.d $r21, $a0, (8 * 21) + ld.d $r22, $a0, (8 * 22) + ld.d $r23, $a0, (8 * 23) + ld.d $r24, $a0, (8 * 24) + ld.d $r25, $a0, (8 * 25) + ld.d $r26, $a0, (8 * 26) + ld.d $r27, $a0, (8 * 27) + ld.d $r28, $a0, (8 * 28) + ld.d $r29, $a0, (8 * 29) + ld.d $r30, $a0, (8 * 30) + ld.d $r31, $a0, (8 * 31) + ld.d $ra, $a0, (8 * 32) // load new pc into $ra + ld.d $a0, $a0, (8 * 4) // restore $a0 last + + jr $ra + #endif #endif /* !defined(__USING_SJLJ_EXCEPTIONS__) */ diff --git a/libunwind/src/UnwindRegistersSave.S b/libunwind/src/UnwindRegistersSave.S index c5bfd128cdf60a950c39f74b33183c7bfda6ccd9..8ae2ee1c3b3def40227a563f4f2c54554689bb47 100644 --- a/libunwind/src/UnwindRegistersSave.S +++ b/libunwind/src/UnwindRegistersSave.S @@ -1296,6 +1296,86 @@ DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext) lghi %r2, 0 br %r14 +#elif defined(__loongarch__) && __loongarch_grlen == 64 + +# +# extern int __unw_getcontext(unw_context_t* thread_state) +# +# On entry: +# thread_state pointer is in $a0($r4) +# +DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext) + st.d $r1, $a0, (8 * 1) + st.d $r2, $a0, (8 * 2) + st.d $r3, $a0, (8 * 3) + st.d $r4, $a0, (8 * 4) + st.d $r5, $a0, (8 * 5) + st.d $r6, $a0, (8 * 6) + st.d $r7, $a0, (8 * 7) + st.d $r8, $a0, (8 * 8) + st.d $r9, $a0, (8 * 9) + st.d $r10, $a0, (8 * 10) + st.d $r11, $a0, (8 * 11) + st.d $r12, $a0, (8 * 12) + st.d $r13, $a0, (8 * 13) + st.d $r14, $a0, (8 * 14) + st.d $r15, $a0, (8 * 15) + st.d $r16, $a0, (8 * 16) + st.d $r17, $a0, (8 * 17) + st.d $r18, $a0, (8 * 18) + st.d $r19, $a0, (8 * 19) + st.d $r20, $a0, (8 * 20) + st.d $r21, $a0, (8 * 21) + st.d $r22, $a0, (8 * 22) + st.d $r23, $a0, (8 * 23) + st.d $r24, $a0, (8 * 24) + st.d $r25, $a0, (8 * 25) + st.d $r26, $a0, (8 * 26) + st.d $r27, $a0, (8 * 27) + st.d $r28, $a0, (8 * 28) + st.d $r29, $a0, (8 * 29) + st.d $r30, $a0, (8 * 30) + st.d $r31, $a0, (8 * 31) + st.d $r1, $a0, (8 * 32) // store $ra to pc + +# if __loongarch_frlen == 64 + fst.d $f0, $a0, (8 * 33 + 8 * 0) + fst.d $f1, $a0, (8 * 33 + 8 * 1) + fst.d $f2, $a0, (8 * 33 + 8 * 2) + fst.d $f3, $a0, (8 * 33 + 8 * 3) + fst.d $f4, $a0, (8 * 33 + 8 * 4) + fst.d $f5, $a0, (8 * 33 + 8 * 5) + fst.d $f6, $a0, (8 * 33 + 8 * 6) + fst.d $f7, $a0, (8 * 33 + 8 * 7) + fst.d $f8, $a0, (8 * 33 + 8 * 8) + fst.d $f9, $a0, (8 * 33 + 8 * 9) + fst.d $f10, $a0, (8 * 33 + 8 * 10) + fst.d $f11, $a0, (8 * 33 + 8 * 11) + fst.d $f12, $a0, (8 * 33 + 8 * 12) + fst.d $f13, $a0, (8 * 33 + 8 * 13) + fst.d $f14, $a0, (8 * 33 + 8 * 14) + fst.d $f15, $a0, (8 * 33 + 8 * 15) + fst.d $f16, $a0, (8 * 33 + 8 * 16) + fst.d $f17, $a0, (8 * 33 + 8 * 17) + fst.d $f18, $a0, (8 * 33 + 8 * 18) + fst.d $f19, $a0, (8 * 33 + 8 * 19) + fst.d $f20, $a0, (8 * 33 + 8 * 20) + fst.d $f21, $a0, (8 * 33 + 8 * 21) + fst.d $f22, $a0, (8 * 33 + 8 * 22) + fst.d $f23, $a0, (8 * 33 + 8 * 23) + fst.d $f24, $a0, (8 * 33 + 8 * 24) + fst.d $f25, $a0, (8 * 33 + 8 * 25) + fst.d $f26, $a0, (8 * 33 + 8 * 26) + fst.d $f27, $a0, (8 * 33 + 8 * 27) + fst.d $f28, $a0, (8 * 33 + 8 * 28) + fst.d $f29, $a0, (8 * 33 + 8 * 29) + fst.d $f30, $a0, (8 * 33 + 8 * 30) + fst.d $f31, $a0, (8 * 33 + 8 * 31) +# endif + + move $a0, $zero // UNW_ESUCCESS + jr $ra + #endif WEAK_ALIAS(__unw_getcontext, unw_getcontext) diff --git a/libunwind/src/config.h b/libunwind/src/config.h index cc41b817acf6089dfc60f1aa1e39396848f08a73..4bbac951624f9e667d4844c32d6da45964f2f78b 100644 --- a/libunwind/src/config.h +++ b/libunwind/src/config.h @@ -115,7 +115,7 @@ #if defined(__i386__) || defined(__x86_64__) || defined(__powerpc__) || \ (!defined(__APPLE__) && defined(__arm__)) || defined(__aarch64__) || \ defined(__mips__) || defined(__riscv) || defined(__hexagon__) || \ - defined(__sparc__) || defined(__s390x__) + defined(__sparc__) || defined(__s390x__) || defined(__loongarch__) #if !defined(_LIBUNWIND_BUILD_SJLJ_APIS) #define _LIBUNWIND_BUILD_ZERO_COST_APIS #endif diff --git a/libunwind/src/libunwind.cpp b/libunwind/src/libunwind.cpp index 491ba882891d51509f8775a01db574136087aa8e..e0e98c04aabeeb0e8141dc8c4bde815a520968de 100644 --- a/libunwind/src/libunwind.cpp +++ b/libunwind/src/libunwind.cpp @@ -77,6 +77,8 @@ _LIBUNWIND_HIDDEN int __unw_init_local(unw_cursor_t *cursor, # define REGISTER_KIND Registers_ve #elif defined(__s390x__) # define REGISTER_KIND Registers_s390x +#elif defined(__loongarch__) && __loongarch_grlen == 64 +#define REGISTER_KIND Registers_loongarch #else # error Architecture not supported #endif diff --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp new file mode 100644 index 0000000000000000000000000000000000000000..c23ffafa58f40876b374f9c467ad7961e3f4fa78 --- /dev/null +++ b/lld/ELF/Arch/LoongArch.cpp @@ -0,0 +1,696 @@ +//===- LoongArch.cpp ------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "InputFiles.h" +#include "OutputSections.h" +#include "Symbols.h" +#include "SyntheticSections.h" +#include "Target.h" + +using namespace llvm; +using namespace llvm::object; +using namespace llvm::support::endian; +using namespace llvm::ELF; +using namespace lld; +using namespace lld::elf; + +namespace { +class LoongArch final : public TargetInfo { +public: + LoongArch(); + uint32_t calcEFlags() const override; + int64_t getImplicitAddend(const uint8_t *buf, RelType type) const override; + void writeGotPlt(uint8_t *buf, const Symbol &s) const override; + void writeIgotPlt(uint8_t *buf, const Symbol &s) const override; + void writePltHeader(uint8_t *buf) const override; + void writePlt(uint8_t *buf, const Symbol &sym, + uint64_t pltEntryAddr) const override; + RelType getDynRel(RelType type) const override; + RelExpr getRelExpr(RelType type, const Symbol &s, + const uint8_t *loc) const override; + bool usesOnlyLowPageBits(RelType type) const override; + void relocate(uint8_t *loc, const Relocation &rel, + uint64_t val) const override; +}; +} // end anonymous namespace + +enum Op { + SUB_W = 0x00110000, + SUB_D = 0x00118000, + BREAK = 0x002a0000, + SRLI_W = 0x00448000, + SRLI_D = 0x00450000, + ADDI_W = 0x02800000, + ADDI_D = 0x02c00000, + ANDI = 0x03400000, + PCADDU12I = 0x1c000000, + LD_W = 0x28800000, + LD_D = 0x28c00000, + JIRL = 0x4c000000, +}; + +enum Reg { + R_ZERO = 0, + R_RA = 1, + R_TP = 2, + R_T0 = 12, + R_T1 = 13, + R_T2 = 14, + R_T3 = 15, +}; + +// Mask out the input's lowest 12 bits for use with `pcalau12i`, in sequences +// like `pcalau12i + addi.[wd]` or `pcalau12i + {ld,st}.*` where the `pcalau12i` +// produces a PC-relative intermediate value with the lowest 12 bits zeroed (the +// "page") for the next instruction to add in the "page offset". (`pcalau12i` +// stands for something like "PC ALigned Add Upper that starts from the 12th +// bit, Immediate".) +// +// Here a "page" is in fact just another way to refer to the 12-bit range +// allowed by the immediate field of the addi/ld/st instructions, and not +// related to the system or the kernel's actual page size. The sematics happens +// to match the AArch64 `adrp`, so the concept of "page" is borrowed here. +static uint64_t getLoongArchPage(uint64_t p) { + return p & ~static_cast(0xfff); +} + +static uint32_t lo12(uint32_t val) { return val & 0xfff; } + +// Calculate the adjusted page delta between dest and PC. +uint64_t elf::getLoongArchPageDelta(uint64_t dest, uint64_t pc) { + // Consider the large code model access pattern, of which the smaller code + // models' access patterns are a subset: + // + // pcalau12i U, %foo_hi20(sym) ; b in [-0x80000, 0x7ffff] + // addi.d T, zero, %foo_lo12(sym) ; a in [-0x800, 0x7ff] + // lu32i.d T, %foo64_lo20(sym) ; c in [-0x80000, 0x7ffff] + // lu52i.d T, T, %foo64_hi12(sym) ; d in [-0x800, 0x7ff] + // {ldx,stx,add}.* dest, U, T + // + // Let page(pc) = 0xRRR'QQQQQ'PPPPP'000 and dest = 0xZZZ'YYYYY'XXXXX'AAA, + // with RQ, P, ZY, X and A representing the respective bitfields as unsigned + // integers. We have: + // + // page(dest) = 0xZZZ'YYYYY'XXXXX'000 + // - page(pc) = 0xRRR'QQQQQ'PPPPP'000 + // ---------------------------------- + // 0xddd'ccccc'bbbbb'000 + // + // Now consider the above pattern's actual effects: + // + // page(pc) 0xRRR'QQQQQ'PPPPP'000 + // pcalau12i + 0xiii'iiiii'bbbbb'000 + // addi + 0xjjj'jjjjj'kkkkk'AAA + // lu32i.d & lu52i.d + 0xddd'ccccc'00000'000 + // -------------------------------------------------- + // dest = U + T + // = ((RQ<<32) + (P<<12) + i + (b<<12)) + (j + k + A + (cd<<32)) + // = (((RQ+cd)<<32) + i + j) + (((P+b)<<12) + k) + A + // = (ZY<<32) + (X<<12) + A + // + // ZY<<32 = (RQ<<32)+(cd<<32)+i+j, X<<12 = (P<<12)+(b<<12)+k + // cd<<32 = (ZY<<32)-(RQ<<32)-i-j, b<<12 = (X<<12)-(P<<12)-k + // + // where i and k are terms representing the effect of b's and A's sign + // extension respectively. + // + // i = signed b < 0 ? -0x10000'0000 : 0 + // k = signed A < 0 ? -0x1000 : 0 + // + // The j term is a bit complex: it represents the higher half of + // sign-extended bits from A that are effectively lost if i == 0 but k != 0, + // due to overwriting by lu32i.d & lu52i.d. + // + // j = signed A < 0 && signed b >= 0 ? 0x10000'0000 : 0 + // + // The actual effect of the instruction sequence before the final addition, + // i.e. our desired result value, is thus: + // + // result = (cd<<32) + (b<<12) + // = (ZY<<32)-(RQ<<32)-i-j + (X<<12)-(P<<12)-k + // = ((ZY<<32)+(X<<12)) - ((RQ<<32)+(P<<12)) - i - j - k + // = page(dest) - page(pc) - i - j - k + // + // when signed A >= 0 && signed b >= 0: + // + // i = j = k = 0 + // result = page(dest) - page(pc) + // + // when signed A >= 0 && signed b < 0: + // + // i = -0x10000'0000, j = k = 0 + // result = page(dest) - page(pc) + 0x10000'0000 + // + // when signed A < 0 && signed b >= 0: + // + // i = 0, j = 0x10000'0000, k = -0x1000 + // result = page(dest) - page(pc) - 0x10000'0000 + 0x1000 + // + // when signed A < 0 && signed b < 0: + // + // i = -0x10000'0000, j = 0, k = -0x1000 + // result = page(dest) - page(pc) + 0x1000 + uint64_t result = getLoongArchPage(dest) - getLoongArchPage(pc); + bool negativeA = lo12(dest) > 0x7ff; + bool negativeB = (result & 0x8000'0000) != 0; + + if (negativeA) + result += 0x1000; + if (negativeA && !negativeB) + result -= 0x10000'0000; + else if (!negativeA && negativeB) + result += 0x10000'0000; + + return result; +} + +static uint32_t hi20(uint32_t val) { return (val + 0x800) >> 12; } + +static uint32_t insn(uint32_t op, uint32_t d, uint32_t j, uint32_t k) { + return op | d | (j << 5) | (k << 10); +} + +// Extract bits v[begin:end], where range is inclusive. +static uint32_t extractBits(uint64_t v, uint32_t begin, uint32_t end) { + return begin == 63 ? v >> end : (v & ((1ULL << (begin + 1)) - 1)) >> end; +} + +static uint32_t setD5k16(uint32_t insn, uint32_t imm) { + uint32_t immLo = extractBits(imm, 15, 0); + uint32_t immHi = extractBits(imm, 20, 16); + return (insn & 0xfc0003e0) | (immLo << 10) | immHi; +} + +static uint32_t setD10k16(uint32_t insn, uint32_t imm) { + uint32_t immLo = extractBits(imm, 15, 0); + uint32_t immHi = extractBits(imm, 25, 16); + return (insn & 0xfc000000) | (immLo << 10) | immHi; +} + +static uint32_t setJ20(uint32_t insn, uint32_t imm) { + return (insn & 0xfe00001f) | (extractBits(imm, 19, 0) << 5); +} + +static uint32_t setK12(uint32_t insn, uint32_t imm) { + return (insn & 0xffc003ff) | (extractBits(imm, 11, 0) << 10); +} + +static uint32_t setK16(uint32_t insn, uint32_t imm) { + return (insn & 0xfc0003ff) | (extractBits(imm, 15, 0) << 10); +} + +static bool isJirl(uint32_t insn) { + return (insn & 0xfc000000) == JIRL; +} + +LoongArch::LoongArch() { + // The LoongArch ISA itself does not have a limit on page sizes. According to + // the ISA manual, the PS (page size) field in MTLB entries and CSR.STLBPS is + // 6 bits wide, meaning the maximum page size is 2^63 which is equivalent to + // "unlimited". + // However, practically the maximum usable page size is constrained by the + // kernel implementation, and 64KiB is the biggest non-huge page size + // supported by Linux as of v6.4. The most widespread page size in use, + // though, is 16KiB. + defaultCommonPageSize = 16384; + defaultMaxPageSize = 65536; + write32le(trapInstr.data(), BREAK); // break 0 + + copyRel = R_LARCH_COPY; + pltRel = R_LARCH_JUMP_SLOT; + relativeRel = R_LARCH_RELATIVE; + iRelativeRel = R_LARCH_IRELATIVE; + + if (config->is64) { + symbolicRel = R_LARCH_64; + tlsModuleIndexRel = R_LARCH_TLS_DTPMOD64; + tlsOffsetRel = R_LARCH_TLS_DTPREL64; + tlsGotRel = R_LARCH_TLS_TPREL64; + } else { + symbolicRel = R_LARCH_32; + tlsModuleIndexRel = R_LARCH_TLS_DTPMOD32; + tlsOffsetRel = R_LARCH_TLS_DTPREL32; + tlsGotRel = R_LARCH_TLS_TPREL32; + } + + gotRel = symbolicRel; + + // .got.plt[0] = _dl_runtime_resolve, .got.plt[1] = link_map + gotPltHeaderEntriesNum = 2; + + pltHeaderSize = 32; + pltEntrySize = 16; + ipltEntrySize = 16; +} + +static uint32_t getEFlags(const InputFile *f) { + if (config->is64) + return cast>(f)->getObj().getHeader().e_flags; + return cast>(f)->getObj().getHeader().e_flags; +} + +static bool inputFileHasCode(const InputFile *f) { + for (const auto *sec : f->getSections()) + if (sec && sec->flags & SHF_EXECINSTR) + return true; + + return false; +} + +uint32_t LoongArch::calcEFlags() const { + // If there are only binary input files (from -b binary), use a + // value of 0 for the ELF header flags. + if (ctx->objectFiles.empty()) + return 0; + + uint32_t target = 0; + const InputFile *targetFile; + for (const InputFile *f : ctx->objectFiles) { + // Do not enforce ABI compatibility if the input file does not contain code. + // This is useful for allowing linkage with data-only object files produced + // with tools like objcopy, that have zero e_flags. + if (!inputFileHasCode(f)) + continue; + + // Take the first non-zero e_flags as the reference. + uint32_t flags = getEFlags(f); + if (target == 0 && flags != 0) { + target = flags; + targetFile = f; + } + + if ((flags & EF_LOONGARCH_ABI_MODIFIER_MASK) != + (target & EF_LOONGARCH_ABI_MODIFIER_MASK)) + error(toString(f) + + ": cannot link object files with different ABI from " + + toString(targetFile)); + + // We cannot process psABI v1.x / object ABI v0 files (containing stack + // relocations), unlike ld.bfd. + // + // Instead of blindly accepting every v0 object and only failing at + // relocation processing time, just disallow interlink altogether. We + // don't expect significant usage of object ABI v0 in the wild (the old + // world may continue using object ABI v0 for a while, but as it's not + // binary-compatible with the upstream i.e. new-world ecosystem, it's not + // being considered here). + // + // There are briefly some new-world systems with object ABI v0 binaries too. + // It is because these systems were built before the new ABI was finalized. + // These are not supported either due to the extremely small number of them, + // and the few impacted users are advised to simply rebuild world or + // reinstall a recent system. + if ((flags & EF_LOONGARCH_OBJABI_MASK) != EF_LOONGARCH_OBJABI_V1) + error(toString(f) + ": unsupported object file ABI version"); + } + + return target; +} + +int64_t LoongArch::getImplicitAddend(const uint8_t *buf, RelType type) const { + switch (type) { + default: + internalLinkerError(getErrorLocation(buf), + "cannot read addend for relocation " + toString(type)); + return 0; + case R_LARCH_32: + case R_LARCH_TLS_DTPMOD32: + case R_LARCH_TLS_DTPREL32: + case R_LARCH_TLS_TPREL32: + return SignExtend64<32>(read32le(buf)); + case R_LARCH_64: + case R_LARCH_TLS_DTPMOD64: + case R_LARCH_TLS_DTPREL64: + case R_LARCH_TLS_TPREL64: + return read64le(buf); + case R_LARCH_RELATIVE: + case R_LARCH_IRELATIVE: + return config->is64 ? read64le(buf) : read32le(buf); + case R_LARCH_NONE: + case R_LARCH_JUMP_SLOT: + // These relocations are defined as not having an implicit addend. + return 0; + } +} + +void LoongArch::writeGotPlt(uint8_t *buf, const Symbol &s) const { + if (config->is64) + write64le(buf, in.plt->getVA()); + else + write32le(buf, in.plt->getVA()); +} + +void LoongArch::writeIgotPlt(uint8_t *buf, const Symbol &s) const { + if (config->writeAddends) { + if (config->is64) + write64le(buf, s.getVA()); + else + write32le(buf, s.getVA()); + } +} + +void LoongArch::writePltHeader(uint8_t *buf) const { + // The LoongArch PLT is currently structured just like that of RISCV. + // Annoyingly, this means the PLT is still using `pcaddu12i` to perform + // PC-relative addressing (because `pcaddu12i` is the same as RISCV `auipc`), + // in contrast to the AArch64-like page-offset scheme with `pcalau12i` that + // is used everywhere else involving PC-relative operations in the LoongArch + // ELF psABI v2.00. + // + // The `pcrel_{hi20,lo12}` operators are illustrative only and not really + // supported by LoongArch assemblers. + // + // pcaddu12i $t2, %pcrel_hi20(.got.plt) + // sub.[wd] $t1, $t1, $t3 + // ld.[wd] $t3, $t2, %pcrel_lo12(.got.plt) ; t3 = _dl_runtime_resolve + // addi.[wd] $t1, $t1, -pltHeaderSize-12 ; t1 = &.plt[i] - &.plt[0] + // addi.[wd] $t0, $t2, %pcrel_lo12(.got.plt) + // srli.[wd] $t1, $t1, (is64?1:2) ; t1 = &.got.plt[i] - &.got.plt[0] + // ld.[wd] $t0, $t0, Wordsize ; t0 = link_map + // jr $t3 + uint32_t offset = in.gotPlt->getVA() - in.plt->getVA(); + uint32_t sub = config->is64 ? SUB_D : SUB_W; + uint32_t ld = config->is64 ? LD_D : LD_W; + uint32_t addi = config->is64 ? ADDI_D : ADDI_W; + uint32_t srli = config->is64 ? SRLI_D : SRLI_W; + write32le(buf + 0, insn(PCADDU12I, R_T2, hi20(offset), 0)); + write32le(buf + 4, insn(sub, R_T1, R_T1, R_T3)); + write32le(buf + 8, insn(ld, R_T3, R_T2, lo12(offset))); + write32le(buf + 12, insn(addi, R_T1, R_T1, lo12(-target->pltHeaderSize - 12))); + write32le(buf + 16, insn(addi, R_T0, R_T2, lo12(offset))); + write32le(buf + 20, insn(srli, R_T1, R_T1, config->is64 ? 1 : 2)); + write32le(buf + 24, insn(ld, R_T0, R_T0, config->wordsize)); + write32le(buf + 28, insn(JIRL, R_ZERO, R_T3, 0)); +} + +void LoongArch::writePlt(uint8_t *buf, const Symbol &sym, + uint64_t pltEntryAddr) const { + // See the comment in writePltHeader for reason why pcaddu12i is used instead + // of the pcalau12i that's more commonly seen in the ELF psABI v2.0 days. + // + // pcaddu12i $t3, %pcrel_hi20(f@.got.plt) + // ld.[wd] $t3, $t3, %pcrel_lo12(f@.got.plt) + // jirl $t1, $t3, 0 + // nop + uint32_t offset = sym.getGotPltVA() - pltEntryAddr; + write32le(buf + 0, insn(PCADDU12I, R_T3, hi20(offset), 0)); + write32le(buf + 4, + insn(config->is64 ? LD_D : LD_W, R_T3, R_T3, lo12(offset))); + write32le(buf + 8, insn(JIRL, R_T1, R_T3, 0)); + write32le(buf + 12, insn(ANDI, R_ZERO, R_ZERO, 0)); +} + +RelType LoongArch::getDynRel(RelType type) const { + return type == target->symbolicRel ? type + : static_cast(R_LARCH_NONE); +} + +RelExpr LoongArch::getRelExpr(const RelType type, const Symbol &s, + const uint8_t *loc) const { + switch (type) { + case R_LARCH_NONE: + case R_LARCH_MARK_LA: + case R_LARCH_MARK_PCREL: + return R_NONE; + case R_LARCH_32: + case R_LARCH_64: + case R_LARCH_ABS_HI20: + case R_LARCH_ABS_LO12: + case R_LARCH_ABS64_LO20: + case R_LARCH_ABS64_HI12: + return R_ABS; + case R_LARCH_PCALA_LO12: + // We could just R_ABS, but the JIRL instruction reuses the relocation type + // for a different purpose. The questionable usage is part of glibc 2.37 + // libc_nonshared.a [1], which is linked into user programs, so we have to + // work around it for a while, even if a new relocation type may be + // introduced in the future [2]. + // + // [1]: https://sourceware.org/git/?p=glibc.git;a=commitdiff;h=9f482b73f41a9a1bbfb173aad0733d1c824c788a + // [2]: https://github.com/loongson/la-abi-specs/pull/3 + return isJirl(read32le(loc)) ? R_PLT : R_ABS; + case R_LARCH_TLS_DTPREL32: + case R_LARCH_TLS_DTPREL64: + return R_DTPREL; + case R_LARCH_TLS_TPREL32: + case R_LARCH_TLS_TPREL64: + case R_LARCH_TLS_LE_HI20: + case R_LARCH_TLS_LE_LO12: + case R_LARCH_TLS_LE64_LO20: + case R_LARCH_TLS_LE64_HI12: + return R_TPREL; + case R_LARCH_ADD8: + case R_LARCH_ADD16: + case R_LARCH_ADD32: + case R_LARCH_ADD64: + case R_LARCH_SUB8: + case R_LARCH_SUB16: + case R_LARCH_SUB32: + case R_LARCH_SUB64: + // The LoongArch add/sub relocs behave like the RISCV counterparts; reuse + // the RelExpr to avoid code duplication. + return R_RISCV_ADD; + case R_LARCH_32_PCREL: + case R_LARCH_64_PCREL: + case R_LARCH_PCREL20_S2: + return R_PC; + case R_LARCH_B16: + case R_LARCH_B21: + case R_LARCH_B26: + return R_PLT_PC; + case R_LARCH_TLS_IE_PC_HI20: + case R_LARCH_TLS_IE64_PC_LO20: + case R_LARCH_TLS_IE64_PC_HI12: + config->hasTlsIe = true; + LLVM_FALLTHROUGH; + case R_LARCH_GOT_PC_HI20: + case R_LARCH_GOT64_PC_LO20: + case R_LARCH_GOT64_PC_HI12: + return R_LOONGARCH_GOT_PAGE_PC; + case R_LARCH_GOT_PC_LO12: + case R_LARCH_TLS_IE_PC_LO12: + return R_LOONGARCH_GOT; + case R_LARCH_TLS_LD_PC_HI20: + case R_LARCH_TLS_GD_PC_HI20: + return R_LOONGARCH_TLSGD_PAGE_PC; + case R_LARCH_PCALA_HI20: + // Why not R_LOONGARCH_PAGE_PC, majority of references don't go through PLT + // anyway so why waste time checking only to get everything relaxed back to + // it? + // + // This is again due to the R_LARCH_PCALA_LO12 on JIRL case, where we want + // both the HI20 and LO12 to potentially refer to the PLT. But in reality + // the HI20 reloc appears earlier, and the relocs don't contain enough + // information to let us properly resolve semantics per symbol. + // Unlike RISCV, our LO12 relocs *do not* point to their corresponding HI20 + // relocs, hence it is nearly impossible to 100% accurately determine each + // HI20's "flavor" without taking big performance hits, in the presence of + // edge cases (e.g. HI20 without pairing LO12; paired LO12 placed so far + // apart that relationship is not certain anymore), and programmer mistakes + // (e.g. as outlined in https://github.com/loongson/la-abi-specs/pull/3). + // + // Ideally we would scan in an extra pass for all LO12s on JIRL, then mark + // every HI20 reloc referring to the same symbol differently; this is not + // feasible with the current function signature of getRelExpr that doesn't + // allow for such inter-pass state. + // + // So, unfortunately we have to again workaround this quirk the same way as + // BFD: assuming every R_LARCH_PCALA_HI20 is potentially PLT-needing, only + // relaxing back to R_LOONGARCH_PAGE_PC if it's known not so at a later + // stage. + return R_LOONGARCH_PLT_PAGE_PC; + case R_LARCH_PCALA64_LO20: + case R_LARCH_PCALA64_HI12: + return R_LOONGARCH_PAGE_PC; + case R_LARCH_GOT_HI20: + case R_LARCH_GOT_LO12: + case R_LARCH_GOT64_LO20: + case R_LARCH_GOT64_HI12: + case R_LARCH_TLS_IE_HI20: + case R_LARCH_TLS_IE_LO12: + case R_LARCH_TLS_IE64_LO20: + case R_LARCH_TLS_IE64_HI12: + return R_GOT; + case R_LARCH_TLS_LD_HI20: + return R_TLSLD_GOT; + case R_LARCH_TLS_GD_HI20: + return R_TLSGD_GOT; + case R_LARCH_RELAX: + // LoongArch linker relaxation is not implemented yet. + return R_NONE; + + // Other known relocs that are explicitly unimplemented: + // + // - psABI v1 relocs that need a stateful stack machine to work, and not + // required when implementing psABI v2; + // - relocs that are not used anywhere (R_LARCH_{ADD,SUB}_24 [1], and the + // two GNU vtable-related relocs). + // + // [1]: https://web.archive.org/web/20230709064026/https://github.com/loongson/LoongArch-Documentation/issues/51 + default: + error(getErrorLocation(loc) + "unknown relocation (" + Twine(type) + + ") against symbol " + toString(s)); + return R_NONE; + } +} + +bool LoongArch::usesOnlyLowPageBits(RelType type) const { + switch (type) { + default: + return false; + case R_LARCH_PCALA_LO12: + case R_LARCH_GOT_LO12: + case R_LARCH_GOT_PC_LO12: + case R_LARCH_TLS_IE_PC_LO12: + return true; + } +} + +void LoongArch::relocate(uint8_t *loc, const Relocation &rel, + uint64_t val) const { + switch (rel.type) { + case R_LARCH_32_PCREL: + checkInt(loc, val, 32, rel); + LLVM_FALLTHROUGH; + case R_LARCH_32: + case R_LARCH_TLS_DTPREL32: + write32le(loc, val); + return; + case R_LARCH_64: + case R_LARCH_TLS_DTPREL64: + case R_LARCH_64_PCREL: + write64le(loc, val); + return; + + case R_LARCH_PCREL20_S2: + checkInt(loc, val, 22, rel); + checkAlignment(loc, val, 4, rel); + write32le(loc, setJ20(read32le(loc), val >> 2)); + return; + + case R_LARCH_B16: + checkInt(loc, val, 18, rel); + checkAlignment(loc, val, 4, rel); + write32le(loc, setK16(read32le(loc), val >> 2)); + return; + + case R_LARCH_B21: + checkInt(loc, val, 23, rel); + checkAlignment(loc, val, 4, rel); + write32le(loc, setD5k16(read32le(loc), val >> 2)); + return; + + case R_LARCH_B26: + checkInt(loc, val, 28, rel); + checkAlignment(loc, val, 4, rel); + write32le(loc, setD10k16(read32le(loc), val >> 2)); + return; + + // Relocs intended for `addi`, `ld` or `st`. + case R_LARCH_PCALA_LO12: + // We have to again inspect the insn word to handle the R_LARCH_PCALA_LO12 + // on JIRL case: firstly JIRL wants its immediate's 2 lowest zeroes + // removed by us (in contrast to regular R_LARCH_PCALA_LO12), secondly + // its immediate slot width is different too (16, not 12). + // In this case, process like an R_LARCH_B16, but without overflow checking + // and only taking the value's lowest 12 bits. + if (isJirl(read32le(loc))) { + checkAlignment(loc, val, 4, rel); + val = SignExtend64<12>(val); + write32le(loc, setK16(read32le(loc), val >> 2)); + return; + } + LLVM_FALLTHROUGH; + case R_LARCH_ABS_LO12: + case R_LARCH_GOT_PC_LO12: + case R_LARCH_GOT_LO12: + case R_LARCH_TLS_LE_LO12: + case R_LARCH_TLS_IE_PC_LO12: + case R_LARCH_TLS_IE_LO12: + write32le(loc, setK12(read32le(loc), extractBits(val, 11, 0))); + return; + + // Relocs intended for `lu12i.w` or `pcalau12i`. + case R_LARCH_ABS_HI20: + case R_LARCH_PCALA_HI20: + case R_LARCH_GOT_PC_HI20: + case R_LARCH_GOT_HI20: + case R_LARCH_TLS_LE_HI20: + case R_LARCH_TLS_IE_PC_HI20: + case R_LARCH_TLS_IE_HI20: + case R_LARCH_TLS_LD_PC_HI20: + case R_LARCH_TLS_LD_HI20: + case R_LARCH_TLS_GD_PC_HI20: + case R_LARCH_TLS_GD_HI20: + write32le(loc, setJ20(read32le(loc), extractBits(val, 31, 12))); + return; + + // Relocs intended for `lu32i.d`. + case R_LARCH_ABS64_LO20: + case R_LARCH_PCALA64_LO20: + case R_LARCH_GOT64_PC_LO20: + case R_LARCH_GOT64_LO20: + case R_LARCH_TLS_LE64_LO20: + case R_LARCH_TLS_IE64_PC_LO20: + case R_LARCH_TLS_IE64_LO20: + write32le(loc, setJ20(read32le(loc), extractBits(val, 51, 32))); + return; + + // Relocs intended for `lu52i.d`. + case R_LARCH_ABS64_HI12: + case R_LARCH_PCALA64_HI12: + case R_LARCH_GOT64_PC_HI12: + case R_LARCH_GOT64_HI12: + case R_LARCH_TLS_LE64_HI12: + case R_LARCH_TLS_IE64_PC_HI12: + case R_LARCH_TLS_IE64_HI12: + write32le(loc, setK12(read32le(loc), extractBits(val, 63, 52))); + return; + + case R_LARCH_ADD8: + *loc += val; + return; + case R_LARCH_ADD16: + write16le(loc, read16le(loc) + val); + return; + case R_LARCH_ADD32: + write32le(loc, read32le(loc) + val); + return; + case R_LARCH_ADD64: + write64le(loc, read64le(loc) + val); + return; + case R_LARCH_SUB8: + *loc -= val; + return; + case R_LARCH_SUB16: + write16le(loc, read16le(loc) - val); + return; + case R_LARCH_SUB32: + write32le(loc, read32le(loc) - val); + return; + case R_LARCH_SUB64: + write64le(loc, read64le(loc) - val); + return; + + case R_LARCH_MARK_LA: + case R_LARCH_MARK_PCREL: + // no-op + return; + + case R_LARCH_RELAX: + return; // Ignored (for now) + + default: + llvm_unreachable("unknown relocation"); + } +} + +TargetInfo *elf::getLoongArchTargetInfo() { + static LoongArch target; + return ⌖ +} diff --git a/lld/ELF/CMakeLists.txt b/lld/ELF/CMakeLists.txt index b37035d3e7429af6784134bafd32ad83b7530ff5..6d12da1f4b085d8b47ffd4db9535a4417158e743 100644 --- a/lld/ELF/CMakeLists.txt +++ b/lld/ELF/CMakeLists.txt @@ -13,6 +13,7 @@ add_lld_library(lldELF Arch/ARM.cpp Arch/AVR.cpp Arch/Hexagon.cpp + Arch/LoongArch.cpp Arch/Mips.cpp Arch/MipsArchTree.cpp Arch/MSP430.cpp diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index 185757ad8c29a94775e0b8694a71972659ec2b46..daf16ed3ca444ad4c336df0c0201d7d8b672584c 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -148,6 +148,7 @@ static std::tuple parseEmulation(StringRef emul) { .Case("elf32lriscv", {ELF32LEKind, EM_RISCV}) .Cases("elf32ppc", "elf32ppclinux", {ELF32BEKind, EM_PPC}) .Cases("elf32lppc", "elf32lppclinux", {ELF32LEKind, EM_PPC}) + .Case("elf32loongarch", {ELF32LEKind, EM_LOONGARCH}) .Case("elf64btsmip", {ELF64BEKind, EM_MIPS}) .Case("elf64ltsmip", {ELF64LEKind, EM_MIPS}) .Case("elf64lriscv", {ELF64LEKind, EM_RISCV}) @@ -160,6 +161,7 @@ static std::tuple parseEmulation(StringRef emul) { .Case("elf_iamcu", {ELF32LEKind, EM_IAMCU}) .Case("elf64_sparc", {ELF64BEKind, EM_SPARCV9}) .Case("msp430elf", {ELF32LEKind, EM_MSP430}) + .Case("elf64loongarch", {ELF64LEKind, EM_LOONGARCH}) .Default({ELFNoneKind, EM_NONE}); if (ret.first == ELFNoneKind) @@ -1001,8 +1003,9 @@ static bool getIsRela(opt::InputArgList &args) { // Otherwise use the psABI defined relocation entry format. uint16_t m = config->emachine; - return m == EM_AARCH64 || m == EM_AMDGPU || m == EM_HEXAGON || m == EM_PPC || - m == EM_PPC64 || m == EM_RISCV || m == EM_X86_64; + return m == EM_AARCH64 || m == EM_AMDGPU || m == EM_HEXAGON || + m == EM_LOONGARCH || m == EM_PPC || m == EM_PPC64 || m == EM_RISCV || + m == EM_X86_64; } static void parseClangOption(StringRef opt, const Twine &msg) { @@ -1575,7 +1578,8 @@ static void setConfigs(opt::InputArgList &args) { // have support for reading Elf_Rel addends, so we only enable for a subset. #ifndef NDEBUG bool checkDynamicRelocsDefault = m == EM_ARM || m == EM_386 || m == EM_MIPS || - m == EM_X86_64 || m == EM_RISCV; + m == EM_X86_64 || m == EM_RISCV || + m == EM_LOONGARCH; #else bool checkDynamicRelocsDefault = false; #endif diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp index ba317d1e4bb06d373f4c63609769e77968b21ecd..2518bbaa9af6e6cb301915a7cc2fa4824e0c0d31 100644 --- a/lld/ELF/InputFiles.cpp +++ b/lld/ELF/InputFiles.cpp @@ -1518,6 +1518,9 @@ static uint16_t getBitcodeMachineKind(StringRef path, const Triple &t) { return EM_AVR; case Triple::hexagon: return EM_HEXAGON; + case Triple::loongarch32: + case Triple::loongarch64: + return EM_LOONGARCH; case Triple::mips: case Triple::mipsel: case Triple::mips64: diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp index 8fe36eca6a4be91295854af2af08b9f50e2712d0..1420d8ce4e58ad9cfcc1b1689cf4cf717a62e53f 100644 --- a/lld/ELF/InputSection.cpp +++ b/lld/ELF/InputSection.cpp @@ -596,6 +596,7 @@ static int64_t getTlsTpOffset(const Symbol &s) { // to allow a signed 16-bit offset to reach 0x1000 of TCB/thread-library // data and 0xf000 of the program's TLS segment. return s.getVA(0) + (tls->p_vaddr & (tls->p_align - 1)) - 0x7000; + case EM_LOONGARCH: case EM_RISCV: return s.getVA(0) + (tls->p_vaddr & (tls->p_align - 1)); @@ -630,6 +631,14 @@ uint64_t InputSectionBase::getRelocTargetVA(const InputFile *file, RelType type, case R_GOT: case R_RELAX_TLS_GD_TO_IE_ABS: return sym.getGotVA() + a; + case R_LOONGARCH_GOT: + // The LoongArch TLS GD relocs reuse the R_LARCH_GOT_PC_LO12 reloc type + // for their page offsets. The arithmetics are different in the TLS case + // so we have to duplicate some logic here. + if (sym.needsTlsGd && type != R_LARCH_TLS_IE_PC_LO12) + // Like R_LOONGARCH_TLSGD_PAGE_PC but taking the absolute value. + return in.got->getGlobalDynAddr(sym) + a; + return getRelocTargetVA(file, type, a, p, sym, R_GOT); case R_GOTONLY_PC: return in.got->getVA() + a - p; case R_GOTPLTONLY_PC: @@ -654,6 +663,10 @@ uint64_t InputSectionBase::getRelocTargetVA(const InputFile *file, RelType type, case R_GOT_PC: case R_RELAX_TLS_GD_TO_IE: return sym.getGotVA() + a - p; + case R_LOONGARCH_GOT_PAGE_PC: + if (sym.needsTlsGd) + return getLoongArchPageDelta(in.got->getGlobalDynAddr(sym) + a, p); + return getLoongArchPageDelta(sym.getGotVA() + a, p); case R_MIPS_GOTREL: return sym.getVA(a) - in.mipsGot->getGp(file); case R_MIPS_GOT_GP: @@ -702,6 +715,8 @@ uint64_t InputSectionBase::getRelocTargetVA(const InputFile *file, RelType type, *hiRel->sym, hiRel->expr); return 0; } + case R_LOONGARCH_PAGE_PC: + return getLoongArchPageDelta(sym.getVA(a), p); case R_PC: case R_ARM_PCA: { uint64_t dest; @@ -735,6 +750,8 @@ uint64_t InputSectionBase::getRelocTargetVA(const InputFile *file, RelType type, case R_PLT_PC: case R_PPC64_CALL_PLT: return sym.getPltVA() + a - p; + case R_LOONGARCH_PLT_PAGE_PC: + return getLoongArchPageDelta(sym.getPltVA() + a, p); case R_PLT_GOTPLT: return sym.getPltVA() + a - in.gotPlt->getVA(); case R_PPC32_PLTREL: @@ -795,6 +812,8 @@ uint64_t InputSectionBase::getRelocTargetVA(const InputFile *file, RelType type, return in.got->getGlobalDynAddr(sym) + a - in.gotPlt->getVA(); case R_TLSGD_PC: return in.got->getGlobalDynAddr(sym) + a - p; + case R_LOONGARCH_TLSGD_PAGE_PC: + return getLoongArchPageDelta(in.got->getGlobalDynAddr(sym) + a, p); case R_TLSLD_GOTPLT: return in.got->getVA() + in.got->getTlsIndexOff() + a - in.gotPlt->getVA(); case R_TLSLD_GOT: diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp index 277c57505bb2f920b51068b7def01c75081ad01c..a617d55bfe000a08884d58b288f9e724111e6119 100644 --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -190,8 +190,8 @@ static bool isAbsoluteValue(const Symbol &sym) { // Returns true if Expr refers a PLT entry. static bool needsPlt(RelExpr expr) { - return oneof( - expr); + return oneof(expr); } // Returns true if Expr refers a GOT entry. Note that this function @@ -200,7 +200,8 @@ static bool needsPlt(RelExpr expr) { static bool needsGot(RelExpr expr) { return oneof(expr); + R_AARCH64_GOT_PAGE, R_LOONGARCH_GOT, R_LOONGARCH_GOT_PAGE_PC>( + expr); } // True if this expression is of the form Sym - X, where X is a position in the @@ -208,12 +209,14 @@ static bool needsGot(RelExpr expr) { static bool isRelExpr(RelExpr expr) { return oneof(expr); + R_RISCV_PC_INDIRECT, R_PPC64_RELAX_GOT_PC, R_LOONGARCH_PAGE_PC>( + expr); } - static RelExpr toPlt(RelExpr expr) { switch (expr) { + case R_LOONGARCH_PAGE_PC: + return R_LOONGARCH_PLT_PAGE_PC; case R_PPC64_CALL: return R_PPC64_CALL_PLT; case R_PC: @@ -232,6 +235,8 @@ static RelExpr fromPlt(RelExpr expr) { case R_PLT_PC: case R_PPC32_PLTREL: return R_PC; + case R_LOONGARCH_PLT_PAGE_PC: + return R_LOONGARCH_PAGE_PC; case R_PPC64_CALL_PLT: return R_PPC64_CALL; case R_PLT: @@ -962,7 +967,9 @@ bool RelocationScanner::isStaticLinkTimeConstant(RelExpr e, RelType type, R_MIPS_GOTREL, R_MIPS_GOT_OFF, R_MIPS_GOT_OFF32, R_MIPS_GOT_GP_PC, R_AARCH64_GOT_PAGE_PC, R_GOT_PC, R_GOTONLY_PC, R_GOTPLTONLY_PC, R_PLT_PC, R_PLT_GOTPLT, R_PPC32_PLTREL, R_PPC64_CALL_PLT, - R_PPC64_RELAX_TOC, R_RISCV_ADD, R_AARCH64_GOT_PAGE>(e)) + R_PPC64_RELAX_TOC, R_RISCV_ADD, R_AARCH64_GOT_PAGE, + R_LOONGARCH_PLT_PAGE_PC, R_LOONGARCH_GOT, R_LOONGARCH_GOT_PAGE_PC>( + e)) return true; // These never do, except if the entire file is position dependent or if @@ -1048,7 +1055,8 @@ void RelocationScanner::processAux(RelExpr expr, RelType type, uint64_t offset, bool canWrite = (sec.flags & SHF_WRITE) || !config->zText; if (canWrite) { RelType rel = target.getDynRel(type); - if (expr == R_GOT || (rel == target.symbolicRel && !sym.isPreemptible)) { + if (oneof(expr) || + (rel == target.symbolicRel && !sym.isPreemptible)) { addRelativeReloc(sec, offset, sym, addend, expr, type); return; } else if (rel != 0) { @@ -1194,11 +1202,13 @@ static unsigned handleTlsRelocation(RelType type, Symbol &sym, return 1; } - // ARM, Hexagon and RISC-V do not support GD/LD to IE/LE relaxation. For - // PPC64, if the file has missing R_PPC64_TLSGD/R_PPC64_TLSLD, disable + // ARM, Hexagon, LoongArch and RISC-V do not support GD/LD to IE/LE + // relaxation. + // For PPC64, if the file has missing R_PPC64_TLSGD/R_PPC64_TLSLD, disable // relaxation as well. bool toExecRelax = !config->shared && config->emachine != EM_ARM && config->emachine != EM_HEXAGON && + config->emachine != EM_LOONGARCH && config->emachine != EM_RISCV && !c.file->ppc64DisableTLSRelax; @@ -1215,8 +1225,7 @@ static unsigned handleTlsRelocation(RelType type, Symbol &sym, // being suitable for being dynamically loaded via dlopen. GOT[e0] is the // module index, with a special value of 0 for the current module. GOT[e1] is // unused. There only needs to be one module index entry. - if (oneof( - expr)) { + if (oneof(expr)) { // Local-Dynamic relocs can be relaxed to Local-Exec. if (toExecRelax) { c.relocations.push_back( @@ -1248,7 +1257,8 @@ static unsigned handleTlsRelocation(RelType type, Symbol &sym, } if (oneof(expr)) { + R_TLSDESC_GOTPLT, R_TLSGD_GOT, R_TLSGD_GOTPLT, R_TLSGD_PC, + R_LOONGARCH_TLSGD_PAGE_PC>(expr)) { if (!toExecRelax) { sym.needsTlsGd = true; c.relocations.push_back({expr, type, offset, addend, &sym}); @@ -1270,8 +1280,8 @@ static unsigned handleTlsRelocation(RelType type, Symbol &sym, return target->getTlsGdRelaxSkip(type); } - if (oneof(expr)) { + if (oneof(expr)) { // Initial-Exec relocs can be relaxed to Local-Exec if the symbol is locally // defined. if (toExecRelax && isLocalInExecutable) { @@ -1431,7 +1441,9 @@ template void RelocationScanner::scanOne(RelTy *&i) { // for detailed description: // ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf in.mipsGot->addEntry(*sec.file, sym, addend, expr); - } else { + } else if (!sym.isTls() || config->emachine != EM_LOONGARCH) { + // Many LoongArch TLS relocs reuse the R_LOONGARCH_GOT type, in which + // case the `needsGot` flag shouldn't get set. sym.needsGot = true; } } else if (needsPlt(expr)) { diff --git a/lld/ELF/Relocations.h b/lld/ELF/Relocations.h index f70d255ba229ab7c52a95b27f793ef32c7da9647..f512047802ef1271bfa60b2f55d814159c7b01f7 100644 --- a/lld/ELF/Relocations.h +++ b/lld/ELF/Relocations.h @@ -103,6 +103,15 @@ enum RelExpr { R_PPC64_RELAX_GOT_PC, R_RISCV_ADD, R_RISCV_PC_INDIRECT, + // Same as R_PC but with page-aligned semantics. + R_LOONGARCH_PAGE_PC, + // Same as R_PLT_PC but with page-aligned semantics. + R_LOONGARCH_PLT_PAGE_PC, + // In addition to having page-aligned semantics, LoongArch GOT relocs are + // also reused for TLS, making the semantics differ from other architectures. + R_LOONGARCH_GOT, + R_LOONGARCH_GOT_PAGE_PC, + R_LOONGARCH_TLSGD_PAGE_PC, }; // Architecture-neutral representation of relocation. diff --git a/lld/ELF/ScriptParser.cpp b/lld/ELF/ScriptParser.cpp index e37faf90e132d36e2ebc7a107556932281af43b6..e1c3861022ff4479f322c334b916a8ec6dea5075 100644 --- a/lld/ELF/ScriptParser.cpp +++ b/lld/ELF/ScriptParser.cpp @@ -438,6 +438,8 @@ static std::pair parseBfdName(StringRef s) { .Case("elf64-littleriscv", {ELF64LEKind, EM_RISCV}) .Case("elf64-sparc", {ELF64BEKind, EM_SPARCV9}) .Case("elf32-msp430", {ELF32LEKind, EM_MSP430}) + .Case("elf32-loongarch", {ELF32LEKind, EM_LOONGARCH}) + .Case("elf64-loongarch", {ELF64LEKind, EM_LOONGARCH}) .Default({ELFNoneKind, EM_NONE}); } diff --git a/lld/ELF/Target.cpp b/lld/ELF/Target.cpp index 7bc5121eabe4a480d6de61b9d776a4ce258e4d6b..6b9d67516cd748ee9bc1e2ab946ffb4b09bf6d32 100644 --- a/lld/ELF/Target.cpp +++ b/lld/ELF/Target.cpp @@ -62,6 +62,8 @@ TargetInfo *elf::getTarget() { return getAVRTargetInfo(); case EM_HEXAGON: return getHexagonTargetInfo(); + case EM_LOONGARCH: + return getLoongArchTargetInfo(); case EM_MIPS: switch (config->ekind) { case ELF32LEKind: diff --git a/lld/ELF/Target.h b/lld/ELF/Target.h index 5ec2d85f64e83b6e7f25bfa4c6f702657e91a979..27ef93e00b8b51f2a00c9f0848591d87b136410d 100644 --- a/lld/ELF/Target.h +++ b/lld/ELF/Target.h @@ -185,6 +185,7 @@ TargetInfo *getAMDGPUTargetInfo(); TargetInfo *getARMTargetInfo(); TargetInfo *getAVRTargetInfo(); TargetInfo *getHexagonTargetInfo(); +TargetInfo *getLoongArchTargetInfo(); TargetInfo *getMSP430TargetInfo(); TargetInfo *getPPC64TargetInfo(); TargetInfo *getPPCTargetInfo(); @@ -229,6 +230,7 @@ void writePrefixedInstruction(uint8_t *loc, uint64_t insn); void addPPC64SaveRestore(); uint64_t getPPC64TocBase(); uint64_t getAArch64Page(uint64_t expr); +uint64_t getLoongArchPageDelta(uint64_t dest, uint64_t pc); void riscvFinalizeRelax(int passes); class AArch64Relaxer { diff --git a/lld/docs/ld.lld.1 b/lld/docs/ld.lld.1 index b81eeb2232a2ce309cc5b1265929dac9c31630d0..b7ed1bc3fcd888238c7b21feb72556e389deff6c 100644 --- a/lld/docs/ld.lld.1 +++ b/lld/docs/ld.lld.1 @@ -27,8 +27,8 @@ It accepts most of the same command line arguments and linker scripts as GNU linkers. .Pp .Nm -currently supports i386, x86-64, ARM, AArch64, PowerPC32, PowerPC64, -MIPS32, MIPS64, RISC-V, AMDGPU, Hexagon and SPARC V9 targets. +currently supports i386, x86-64, ARM, AArch64, LoongArch, PowerPC32, +PowerPC64, MIPS32, MIPS64, RISC-V, AMDGPU, Hexagon and SPARC V9 targets. .Nm acts as a Microsoft link.exe-compatible linker if invoked as .Nm lld-link diff --git a/lld/test/ELF/emulation-loongarch.s b/lld/test/ELF/emulation-loongarch.s new file mode 100644 index 0000000000000000000000000000000000000000..343e836274a3e637ca9d1d4bfd1d521a9e3d6035 --- /dev/null +++ b/lld/test/ELF/emulation-loongarch.s @@ -0,0 +1,78 @@ +# REQUIRES: loongarch + +# RUN: llvm-mc -filetype=obj -triple=loongarch32 %s -o %t.o +# RUN: ld.lld %t.o -o %t +# RUN: llvm-readobj --file-headers %t | FileCheck --check-prefix=LA32 %s +# RUN: ld.lld -m elf32loongarch %t.o -o %t +# RUN: llvm-readobj --file-headers %t | FileCheck --check-prefix=LA32 %s +# RUN: echo 'OUTPUT_FORMAT(elf32-loongarch)' > %t.script +# RUN: ld.lld %t.script %t.o -o %t +# RUN: llvm-readobj --file-headers %t | FileCheck --check-prefix=LA32 %s + +# LA32: ElfHeader { +# LA32-NEXT: Ident { +# LA32-NEXT: Magic: (7F 45 4C 46) +# LA32-NEXT: Class: 32-bit (0x1) +# LA32-NEXT: DataEncoding: LittleEndian (0x1) +# LA32-NEXT: FileVersion: 1 +# LA32-NEXT: OS/ABI: SystemV (0x0) +# LA32-NEXT: ABIVersion: 0 +# LA32-NEXT: Unused: (00 00 00 00 00 00 00) +# LA32-NEXT: } +# LA32-NEXT: Type: Executable (0x2) +# LA32-NEXT: Machine: EM_LOONGARCH (0x102) +# LA32-NEXT: Version: 1 +# LA32-NEXT: Entry: +# LA32-NEXT: ProgramHeaderOffset: 0x34 +# LA32-NEXT: SectionHeaderOffset: +# LA32-NEXT: Flags [ (0x43) +# LA32-NEXT: EF_LOONGARCH_ABI_DOUBLE_FLOAT (0x3) +# LA32-NEXT: EF_LOONGARCH_OBJABI_V1 (0x40) +# LA32-NEXT: ] +# LA32-NEXT: HeaderSize: 52 +# LA32-NEXT: ProgramHeaderEntrySize: 32 +# LA32-NEXT: ProgramHeaderCount: +# LA32-NEXT: SectionHeaderEntrySize: 40 +# LA32-NEXT: SectionHeaderCount: +# LA32-NEXT: StringTableSectionIndex: +# LA32-NEXT: } + +# RUN: llvm-mc -filetype=obj -triple=loongarch64 %s -o %t.o +# RUN: ld.lld %t.o -o %t +# RUN: llvm-readobj --file-headers %t | FileCheck --check-prefix=LA64 %s +# RUN: ld.lld -m elf64loongarch %t.o -o %t +# RUN: llvm-readobj --file-headers %t | FileCheck --check-prefix=LA64 %s +# RUN: echo 'OUTPUT_FORMAT(elf64-loongarch)' > %t.script +# RUN: ld.lld %t.script %t.o -o %t +# RUN: llvm-readobj --file-headers %t | FileCheck --check-prefix=LA64 %s + +# LA64: ElfHeader { +# LA64-NEXT: Ident { +# LA64-NEXT: Magic: (7F 45 4C 46) +# LA64-NEXT: Class: 64-bit (0x2) +# LA64-NEXT: DataEncoding: LittleEndian (0x1) +# LA64-NEXT: FileVersion: 1 +# LA64-NEXT: OS/ABI: SystemV (0x0) +# LA64-NEXT: ABIVersion: 0 +# LA64-NEXT: Unused: (00 00 00 00 00 00 00) +# LA64-NEXT: } +# LA64-NEXT: Type: Executable (0x2) +# LA64-NEXT: Machine: EM_LOONGARCH (0x102) +# LA64-NEXT: Version: 1 +# LA64-NEXT: Entry: +# LA64-NEXT: ProgramHeaderOffset: 0x40 +# LA64-NEXT: SectionHeaderOffset: +# LA64-NEXT: Flags [ (0x43) +# LA64-NEXT: EF_LOONGARCH_ABI_DOUBLE_FLOAT (0x3) +# LA64-NEXT: EF_LOONGARCH_OBJABI_V1 (0x40) +# LA64-NEXT: ] +# LA64-NEXT: HeaderSize: 64 +# LA64-NEXT: ProgramHeaderEntrySize: 56 +# LA64-NEXT: ProgramHeaderCount: +# LA64-NEXT: SectionHeaderEntrySize: 64 +# LA64-NEXT: SectionHeaderCount: +# LA64-NEXT: StringTableSectionIndex: +# LA64-NEXT: } + +.globl _start +_start: diff --git a/lld/test/ELF/loongarch-abs64.s b/lld/test/ELF/loongarch-abs64.s new file mode 100644 index 0000000000000000000000000000000000000000..4bfe7df9135a9358d752ef976f00d23cfd3f5713 --- /dev/null +++ b/lld/test/ELF/loongarch-abs64.s @@ -0,0 +1,64 @@ +# REQUIRES: loongarch + +# RUN: llvm-mc --filetype=obj --triple=loongarch64-unknown-elf %s -o %t.la64.o + +# RUN: ld.lld %t.la64.o --defsym foo=0 --defsym bar=42 -o %t.la64.1 +# RUN: llvm-objdump --no-show-raw-insn -d %t.la64.1 | FileCheck --check-prefix=CASE1 %s +# CASE1: lu12i.w $a0, 0 +# CASE1-NEXT: ori $a0, $a0, 0 +# CASE1-NEXT: lu32i.d $a0, 0 +# CASE1-NEXT: lu52i.d $a0, $a0, 0 +# CASE1-NEXT: lu12i.w $a1, 0 +# CASE1-NEXT: ori $a1, $a1, 42 +# CASE1-NEXT: lu32i.d $a1, 0 +# CASE1-NEXT: lu52i.d $a1, $a1, 0 + +# RUN: ld.lld %t.la64.o --defsym foo=0x12345678 --defsym bar=0x87654321 -o %t.la64.2 +# RUN: llvm-objdump --no-show-raw-insn -d %t.la64.2 | FileCheck --check-prefix=CASE2 %s +# CASE2: lu12i.w $a0, 74565 +# CASE2-NEXT: ori $a0, $a0, 1656 +# CASE2-NEXT: lu32i.d $a0, 0 +# CASE2-NEXT: lu52i.d $a0, $a0, 0 +# CASE2-NEXT: lu12i.w $a1, -493996 +# CASE2-NEXT: ori $a1, $a1, 801 +# CASE2-NEXT: lu32i.d $a1, 0 +# CASE2-NEXT: lu52i.d $a1, $a1, 0 + +# RUN: ld.lld %t.la64.o --defsym foo=0x12345fedcb678 --defsym bar=0xfedcb12345000 -o %t.la64.3 +# RUN: llvm-objdump --no-show-raw-insn -d %t.la64.3 | FileCheck --check-prefix=CASE3 %s +# CASE3: lu12i.w $a0, -4661 +# CASE3-NEXT: ori $a0, $a0, 1656 +# CASE3-NEXT: lu32i.d $a0, 74565 +# CASE3-NEXT: lu52i.d $a0, $a0, 0 +# CASE3-NEXT: lu12i.w $a1, 74565 +# CASE3-NEXT: ori $a1, $a1, 0 +# CASE3-NEXT: lu32i.d $a1, -4661 +# CASE3-NEXT: lu52i.d $a1, $a1, 0 + +# RUN: ld.lld %t.la64.o --defsym foo=0xfffffeeeeeddd --defsym bar=0xfff00000f1111222 -o %t.la64.4 +# RUN: llvm-objdump --no-show-raw-insn -d %t.la64.4 | FileCheck --check-prefix=CASE4 %s +# CASE4: lu12i.w $a0, -69906 +# CASE4-NEXT: ori $a0, $a0, 3549 +# CASE4-NEXT: lu32i.d $a0, -1 +# CASE4-NEXT: lu52i.d $a0, $a0, 0 +# CASE4-NEXT: lu12i.w $a1, -61167 +# CASE4-NEXT: ori $a1, $a1, 546 +# CASE4-NEXT: lu32i.d $a1, 0 +# CASE4-NEXT: lu52i.d $a1, $a1, -1 + +.global _start + +_start: +1: + lu12i.w $a0, %abs_hi20(foo) +.reloc 1b, R_LARCH_MARK_LA, foo + ori $a0, $a0, %abs_lo12(foo) + lu32i.d $a0, %abs64_lo20(foo) + lu52i.d $a0, $a0, %abs64_hi12(foo) + +2: + lu12i.w $a1, %abs_hi20(bar) +.reloc 1b, R_LARCH_MARK_LA, bar + ori $a1, $a1, %abs_lo12(bar) + lu32i.d $a1, %abs64_lo20(bar) + lu52i.d $a1, $a1, %abs64_hi12(bar) diff --git a/lld/test/ELF/loongarch-add-sub.s b/lld/test/ELF/loongarch-add-sub.s new file mode 100644 index 0000000000000000000000000000000000000000..63a3f7de179e6b849aa9f5fac0b6d2b598e5ad71 --- /dev/null +++ b/lld/test/ELF/loongarch-add-sub.s @@ -0,0 +1,36 @@ +# REQUIRES: loongarch + +# RUN: llvm-mc --filetype=obj --triple=loongarch64-unknown-elf %s -o %t.la64.o + +# RUN: ld.lld --section-start=.text=0x1234567890 --section-start=.rodata=0x9876543210 %t.la64.o -o %t.la64 +# RUN: llvm-readelf -x .rodata %t.la64 | FileCheck --check-prefix=CHECK %s +# CHECK: section '.rodata': +# CHECK-NEXT: 0x9876543210 10325476 98badcfe 804602be 79ffffff +# CHECK-NEXT: 0x9876543220 804602be 804680 + +.text +.global _start +_start: +1: + break 0 + +.rodata +2: + .dword 0xfedcba9876543210 + +foo: + .dword 0 + .reloc foo, R_LARCH_ADD64, 1b + .reloc foo, R_LARCH_SUB64, 2b +bar: + .word 0 + .reloc bar, R_LARCH_ADD32, 1b + .reloc bar, R_LARCH_SUB32, 2b +baz: + .short 0 + .reloc baz, R_LARCH_ADD16, 1b + .reloc baz, R_LARCH_SUB16, 2b +quux: + .byte 0 + .reloc quux, R_LARCH_ADD8, 1b + .reloc quux, R_LARCH_SUB8, 2b diff --git a/lld/test/ELF/loongarch-branch.s b/lld/test/ELF/loongarch-branch.s new file mode 100644 index 0000000000000000000000000000000000000000..0090ae11454a3614b1d4dfb03ae0b3157d0284d6 --- /dev/null +++ b/lld/test/ELF/loongarch-branch.s @@ -0,0 +1,68 @@ +# REQUIRES: loongarch + +# RUN: llvm-mc --filetype=obj --triple=loongarch32-unknown-elf %s -o %t.la32.o +# RUN: llvm-mc --filetype=obj --triple=loongarch64-unknown-elf %s -o %t.la64.o + +# RUN: ld.lld %t.la32.o --defsym foo16=b16+4 --defsym bar16=b16 --defsym foo21=b21+4 --defsym bar21=b21 --defsym foo26=b26+4 --defsym bar26=b26 -o %t.la32 +# RUN: ld.lld %t.la64.o --defsym foo16=b16+4 --defsym bar16=b16 --defsym foo21=b21+4 --defsym bar21=b21 --defsym foo26=b26+4 --defsym bar26=b26 -o %t.la64 +# RUN: llvm-objdump --no-show-raw-insn -d %t.la32 | FileCheck %s --check-prefix=CHECK +# RUN: llvm-objdump --no-show-raw-insn -d %t.la64 | FileCheck %s --check-prefix=CHECK +# CHECK: beq $zero, $zero, 4 +# CHECK: bne $zero, $zero, -4 +# CHECK: beqz $s8, 4 +# CHECK: bnez $s8, -4 +# CHECK: b 4 +# CHECK: bl -4 + +# RUN: ld.lld %t.la32.o --defsym foo16=b16+0x1fffc --defsym bar16=b16+4-0x20000 --defsym foo21=b21+0x3ffffc --defsym bar21=b21+4-0x400000 --defsym foo26=b26+0x7fffffc --defsym bar26=b26+4-0x8000000 -o %t.la32.limits +# RUN: ld.lld %t.la64.o --defsym foo16=b16+0x1fffc --defsym bar16=b16+4-0x20000 --defsym foo21=b21+0x3ffffc --defsym bar21=b21+4-0x400000 --defsym foo26=b26+0x7fffffc --defsym bar26=b26+4-0x8000000 -o %t.la64.limits +# RUN: llvm-objdump --no-show-raw-insn -d %t.la32.limits | FileCheck --check-prefix=LIMITS %s +# RUN: llvm-objdump --no-show-raw-insn -d %t.la64.limits | FileCheck --check-prefix=LIMITS %s +# LIMITS: beq $zero, $zero, 131068 +# LIMITS-NEXT: bne $zero, $zero, -131072 +# LIMITS: beqz $s8, 4194300 +# LIMITS-NEXT: bnez $s8, -4194304 +# LIMITS: b 134217724 +# LIMITS-NEXT: bl -134217728 + +# RUN: not ld.lld %t.la32.o --defsym foo16=b16+0x20000 --defsym bar16=b16+4-0x20004 --defsym foo21=b21+0x400000 --defsym bar21=b21+4-0x400004 --defsym foo26=b26+0x8000000 --defsym bar26=b26+4-0x8000004 -o /dev/null 2>&1 | FileCheck -DFILE=%t.la32.o --check-prefix=ERROR-RANGE %s +# RUN: not ld.lld %t.la64.o --defsym foo16=b16+0x20000 --defsym bar16=b16+4-0x20004 --defsym foo21=b21+0x400000 --defsym bar21=b21+4-0x400004 --defsym foo26=b26+0x8000000 --defsym bar26=b26+4-0x8000004 -o /dev/null 2>&1 | FileCheck -DFILE=%t.la64.o --check-prefix=ERROR-RANGE %s +# ERROR-RANGE: error: [[FILE]]:(.text+0x0): relocation R_LARCH_B16 out of range: 131072 is not in [-131072, 131071]; references foo16 +# ERROR-RANGE: error: [[FILE]]:(.text+0x4): relocation R_LARCH_B16 out of range: -131076 is not in [-131072, 131071]; references bar16 +# ERROR-RANGE: error: [[FILE]]:(.text+0x8): relocation R_LARCH_B21 out of range: 4194304 is not in [-4194304, 4194303]; references foo21 +# ERROR-RANGE: error: [[FILE]]:(.text+0xc): relocation R_LARCH_B21 out of range: -4194308 is not in [-4194304, 4194303]; references bar21 +# ERROR-RANGE: error: [[FILE]]:(.text+0x10): relocation R_LARCH_B26 out of range: 134217728 is not in [-134217728, 134217727]; references foo26 +# ERROR-RANGE: error: [[FILE]]:(.text+0x14): relocation R_LARCH_B26 out of range: -134217732 is not in [-134217728, 134217727]; references bar26 + +# RUN: not ld.lld %t.la32.o --defsym foo16=b16+1 --defsym bar16=b16-1 --defsym foo21=b21+1 --defsym bar21=b21-1 --defsym foo26=b26+1 --defsym bar26=b26-1 -o /dev/null 2>&1 | FileCheck -DFILE=%t.la32.o --check-prefix=ERROR-ALIGN-1 %s +# RUN: not ld.lld %t.la64.o --defsym foo16=b16+1 --defsym bar16=b16-1 --defsym foo21=b21+1 --defsym bar21=b21-1 --defsym foo26=b26+1 --defsym bar26=b26-1 -o /dev/null 2>&1 | FileCheck -DFILE=%t.la64.o --check-prefix=ERROR-ALIGN-1 %s +# ERROR-ALIGN-1: error: [[FILE]]:(.text+0x0): improper alignment for relocation R_LARCH_B16: 0x1 is not aligned to 4 bytes +# ERROR-ALIGN-1-NEXT: error: [[FILE]]:(.text+0x4): improper alignment for relocation R_LARCH_B16: 0xFFFFFFFFFFFFFFFB is not aligned to 4 bytes +# ERROR-ALIGN-1-NEXT: error: [[FILE]]:(.text+0x8): improper alignment for relocation R_LARCH_B21: 0x1 is not aligned to 4 bytes +# ERROR-ALIGN-1-NEXT: error: [[FILE]]:(.text+0xc): improper alignment for relocation R_LARCH_B21: 0xFFFFFFFFFFFFFFFB is not aligned to 4 bytes +# ERROR-ALIGN-1-NEXT: error: [[FILE]]:(.text+0x10): improper alignment for relocation R_LARCH_B26: 0x1 is not aligned to 4 bytes +# ERROR-ALIGN-1-NEXT: error: [[FILE]]:(.text+0x14): improper alignment for relocation R_LARCH_B26: 0xFFFFFFFFFFFFFFFB is not aligned to 4 bytes + +# RUN: not ld.lld %t.la32.o --defsym foo16=b16+2 --defsym bar16=b16-2 --defsym foo21=b21+2 --defsym bar21=b21-2 --defsym foo26=b26+2 --defsym bar26=b26-2 -o /dev/null 2>&1 | FileCheck -DFILE=%t.la32.o --check-prefix=ERROR-ALIGN-2 %s +# RUN: not ld.lld %t.la64.o --defsym foo16=b16+2 --defsym bar16=b16-2 --defsym foo21=b21+2 --defsym bar21=b21-2 --defsym foo26=b26+2 --defsym bar26=b26-2 -o /dev/null 2>&1 | FileCheck -DFILE=%t.la64.o --check-prefix=ERROR-ALIGN-2 %s +# ERROR-ALIGN-2: error: [[FILE]]:(.text+0x0): improper alignment for relocation R_LARCH_B16: 0x2 is not aligned to 4 bytes +# ERROR-ALIGN-2-NEXT: error: [[FILE]]:(.text+0x4): improper alignment for relocation R_LARCH_B16: 0xFFFFFFFFFFFFFFFA is not aligned to 4 bytes +# ERROR-ALIGN-2-NEXT: error: [[FILE]]:(.text+0x8): improper alignment for relocation R_LARCH_B21: 0x2 is not aligned to 4 bytes +# ERROR-ALIGN-2-NEXT: error: [[FILE]]:(.text+0xc): improper alignment for relocation R_LARCH_B21: 0xFFFFFFFFFFFFFFFA is not aligned to 4 bytes +# ERROR-ALIGN-2-NEXT: error: [[FILE]]:(.text+0x10): improper alignment for relocation R_LARCH_B26: 0x2 is not aligned to 4 bytes +# ERROR-ALIGN-2-NEXT: error: [[FILE]]:(.text+0x14): improper alignment for relocation R_LARCH_B26: 0xFFFFFFFFFFFFFFFA is not aligned to 4 bytes + +.global _start +.global b16 +.global b21 +.global b26 +_start: +b16: + beq $zero, $zero, foo16 + bne $zero, $zero, bar16 +b21: + beqz $s8, foo21 + bnez $s8, bar21 +b26: + b foo26 + bl bar26 diff --git a/lld/test/ELF/loongarch-interlink.test b/lld/test/ELF/loongarch-interlink.test new file mode 100644 index 0000000000000000000000000000000000000000..44e5d03409a47ce4e887cfb89fa232e27d2202ac --- /dev/null +++ b/lld/test/ELF/loongarch-interlink.test @@ -0,0 +1,84 @@ +# REQUIRES: loongarch +# RUN: rm -rf %t && split-file %s %t + +# RUN: yaml2obj %t/blob.yaml -o %t/blob.o +# RUN: yaml2obj %t/v0-lp64d.yaml -o %t/v0-lp64d.o +# RUN: llvm-mc --filetype=obj --triple=loongarch64-unknown-gnu %t/start.s -o %t/v1-lp64d.o +# RUN: llvm-mc --filetype=obj --triple=loongarch64-unknown-gnusf %t/start.s -o %t/v1-lp64s.o +# RUN: llvm-mc --filetype=obj --triple=loongarch64-unknown-gnu %t/bar.s -o %t/v1-b-lp64d.o + +## Check that binary input results in e_flags=0 output. +# RUN: ld.lld -m elf64loongarch -b binary %t/blob.bin -o %t/blob.out +# RUN: llvm-readobj -h %t/blob.out | FileCheck --check-prefix=EMPTY %s +# EMPTY: Flags [ +# EMPTY-NEXT: ] + +## Check that interlink between e_flags=0 and normal input (that contain code) +## is allowed. +## Also check that the e_flags logic work as intended regardless of input file +## order. +# RUN: ld.lld %t/blob.o %t/v1-lp64d.o -o %t/v1-lp64d.out +# RUN: ld.lld %t/v1-lp64s.o %t/blob.o -o %t/v1-lp64s.out +# RUN: llvm-readobj -h %t/v1-lp64d.out | FileCheck --check-prefix=V1-LP64D %s +# RUN: llvm-readobj -h %t/v1-lp64s.out | FileCheck --check-prefix=V1-LP64S %s +# V1-LP64D: Flags [ (0x43) +# V1-LP64S: Flags [ (0x41) + +## Check that interlink between different ABIs is disallowed. +# RUN: not ld.lld %t/v1-lp64s.o %t/v1-b-lp64d.o -o /dev/null 2>&1 | FileCheck -DFILE1=%t/v1-b-lp64d.o -DFILE2=%t/v1-lp64s.o --check-prefix=INTERLINK-ERR %s +# INTERLINK-ERR: error: [[FILE1]]: cannot link object files with different ABI from [[FILE2]] + +## Check that interlink between different object ABI versions is disallowed. +# RUN: not ld.lld %t/v0-lp64d.o %t/v1-b-lp64d.o %t/blob.o -o /dev/null 2>&1 | FileCheck -DFILE=%t/v0-lp64d.o --check-prefix=VERSION-ERR %s +# VERSION-ERR: error: [[FILE]]: unsupported object file ABI version + +#--- blob.bin +BLOB + +#--- blob.yaml +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_LOONGARCH + SectionHeaderStringTable: .strtab +Sections: + - Name: .data + Type: SHT_PROGBITS + Flags: [ SHF_WRITE, SHF_ALLOC ] + AddressAlign: 0x1 + Content: 424C4F42 +Symbols: + - Name: blob + Section: .data + Binding: STB_GLOBAL + +#--- v0-lp64d.yaml +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_LOONGARCH + Flags: [ EF_LOONGARCH_ABI_DOUBLE_FLOAT ] + SectionHeaderStringTable: .strtab +Sections: + - Name: .text + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC, SHF_EXECINSTR ] + AddressAlign: 0x4 + Content: 0000a002 + +#--- start.s +.global _start +_start: + la $a0, blob + ld.b $a0, $a0, 0 + li.w $a7, 94 + syscall 0 + +#--- bar.s +bar: + move $a0, $zero + ret diff --git a/lld/test/ELF/loongarch-pc-aligned.s b/lld/test/ELF/loongarch-pc-aligned.s new file mode 100644 index 0000000000000000000000000000000000000000..9df3492d18772ba59901e1351b9c669090a8581b --- /dev/null +++ b/lld/test/ELF/loongarch-pc-aligned.s @@ -0,0 +1,283 @@ +# REQUIRES: loongarch +# RUN: rm -rf %t && split-file %s %t + +# RUN: llvm-mc --filetype=obj --triple=loongarch32 %t/a.s -o %t/a.la32.o +# RUN: llvm-mc --filetype=obj --triple=loongarch64 %t/a.s -o %t/a.la64.o +# RUN: llvm-mc --filetype=obj --triple=loongarch64 %t/extreme.s -o %t/extreme.o + +# RUN: ld.lld %t/a.la32.o --section-start=.rodata=0x11000 --section-start=.text=0x11ffc -o %t/case1.la32 +# RUN: ld.lld %t/a.la64.o --section-start=.rodata=0x11000 --section-start=.text=0x11ffc -o %t/case1.la64 +# RUN: llvm-objdump -d --no-show-raw-insn %t/case1.la32 | FileCheck %s --check-prefix=CASE1 +# RUN: llvm-objdump -d --no-show-raw-insn %t/case1.la64 | FileCheck %s --check-prefix=CASE1 +# CASE1: pcalau12i $a0, 0 +# CASE1-NEXT: ld.w $a0, $a0, 0 + +# RUN: ld.lld %t/a.la32.o --section-start=.rodata=0x11000 --section-start=.text=0x12000 -o %t/case2.la32 +# RUN: ld.lld %t/a.la64.o --section-start=.rodata=0x11000 --section-start=.text=0x12000 -o %t/case2.la64 +# RUN: llvm-objdump -d --no-show-raw-insn %t/case2.la32 | FileCheck %s --check-prefix=CASE2 +# RUN: llvm-objdump -d --no-show-raw-insn %t/case2.la64 | FileCheck %s --check-prefix=CASE2 +# CASE2: pcalau12i $a0, -1 +# CASE2-NEXT: ld.w $a0, $a0, 0 + +# RUN: ld.lld %t/a.la32.o --section-start=.rodata=0x117ff --section-start=.text=0x12000 -o %t/case3.la32 +# RUN: ld.lld %t/a.la64.o --section-start=.rodata=0x117ff --section-start=.text=0x12000 -o %t/case3.la64 +# RUN: llvm-objdump -d --no-show-raw-insn %t/case3.la32 | FileCheck %s --check-prefix=CASE3 +# RUN: llvm-objdump -d --no-show-raw-insn %t/case3.la64 | FileCheck %s --check-prefix=CASE3 +# CASE3: pcalau12i $a0, -1 +# CASE3-NEXT: ld.w $a0, $a0, 2047 + +# RUN: ld.lld %t/a.la32.o --section-start=.rodata=0x11800 --section-start=.text=0x12000 -o %t/case4.la32 +# RUN: ld.lld %t/a.la64.o --section-start=.rodata=0x11800 --section-start=.text=0x12000 -o %t/case4.la64 +# RUN: llvm-objdump -d --no-show-raw-insn %t/case4.la32 | FileCheck %s --check-prefix=CASE4 +# RUN: llvm-objdump -d --no-show-raw-insn %t/case4.la64 | FileCheck %s --check-prefix=CASE4 +# CASE4: pcalau12i $a0, 0 +# CASE4-NEXT: ld.w $a0, $a0, -2048 + +# RUN: ld.lld %t/a.la32.o --section-start=.rodata=0x12004 --section-start=.text=0x11ffc -o %t/case5.la32 +# RUN: ld.lld %t/a.la64.o --section-start=.rodata=0x12004 --section-start=.text=0x11ffc -o %t/case5.la64 +# RUN: llvm-objdump -d --no-show-raw-insn %t/case5.la32 | FileCheck %s --check-prefix=CASE5 +# RUN: llvm-objdump -d --no-show-raw-insn %t/case5.la64 | FileCheck %s --check-prefix=CASE5 +# CASE5: pcalau12i $a0, 1 +# CASE5-NEXT: ld.w $a0, $a0, 4 + +# RUN: ld.lld %t/a.la32.o --section-start=.rodata=0x12800 --section-start=.text=0x11ffc -o %t/case6.la32 +# RUN: ld.lld %t/a.la64.o --section-start=.rodata=0x12800 --section-start=.text=0x11ffc -o %t/case6.la64 +# RUN: llvm-objdump -d --no-show-raw-insn %t/case6.la32 | FileCheck %s --check-prefix=CASE6 +# RUN: llvm-objdump -d --no-show-raw-insn %t/case6.la64 | FileCheck %s --check-prefix=CASE6 +# CASE6: pcalau12i $a0, 2 +# CASE6-NEXT: ld.w $a0, $a0, -2048 + +# RUN: ld.lld %t/a.la32.o --section-start=.rodata=0x7ffff123 --section-start=.text=0x0 -o %t/case7.la32 +# RUN: ld.lld %t/a.la64.o --section-start=.rodata=0x7ffff123 --section-start=.text=0x0 -o %t/case7.la64 +# RUN: llvm-objdump -d --no-show-raw-insn %t/case7.la32 | FileCheck %s --check-prefix=CASE7 +# RUN: llvm-objdump -d --no-show-raw-insn %t/case7.la64 | FileCheck %s --check-prefix=CASE7 +# CASE7: pcalau12i $a0, 524287 +# CASE7-NEXT: ld.w $a0, $a0, 291 + +# RUN: ld.lld %t/a.la32.o --section-start=.rodata=0x7ffffabc --section-start=.text=0x0 -o %t/case8.la32 +# RUN: ld.lld %t/a.la64.o --section-start=.rodata=0x7ffffabc --section-start=.text=0x0 -o %t/case8.la64 +# RUN: llvm-objdump -d --no-show-raw-insn %t/case8.la32 | FileCheck %s --check-prefix=CASE8 +# RUN: llvm-objdump -d --no-show-raw-insn %t/case8.la64 | FileCheck %s --check-prefix=CASE8 +# CASE8: pcalau12i $a0, -524288 +# CASE8-NEXT: ld.w $a0, $a0, -1348 + +# RUN: ld.lld %t/a.la32.o --section-start=.rodata=0x10123 --section-start=.text=0x80010000 -o %t/case9.la32 +# RUN: ld.lld %t/a.la64.o --section-start=.rodata=0x10123 --section-start=.text=0x80010000 -o %t/case9.la64 +# RUN: llvm-objdump -d --no-show-raw-insn %t/case9.la32 | FileCheck %s --check-prefix=CASE9 +# RUN: llvm-objdump -d --no-show-raw-insn %t/case9.la64 | FileCheck %s --check-prefix=CASE9 +# CASE9: pcalau12i $a0, -524288 +# CASE9-NEXT: ld.w $a0, $a0, 291 + +## page delta = 0x4443333322222000, page offset = 0x111 +## %pc_lo12 = 0x111 = 273 +## %pc_hi20 = 0x22222 = 139810 +## %pc64_lo20 = 0x33333 = 209715 +## %pc64_hi12 = 0x444 = 1092 +# RUN: ld.lld %t/extreme.o --section-start=.rodata=0x4443333334567111 --section-start=.text=0x0000000012345678 -o %t/extreme0 +# RUN: llvm-objdump -d --no-show-raw-insn %t/extreme0 | FileCheck %s --check-prefix=EXTREME0 +# EXTREME0: addi.d $t0, $zero, 273 +# EXTREME0-NEXT: pcalau12i $t1, 139810 +# EXTREME0-NEXT: lu32i.d $t0, 209715 +# EXTREME0-NEXT: lu52i.d $t0, $t0, 1092 + +## page delta = 0x4443333222223000, page offset = 0x888 +## %pc_lo12 = 0x888 = -1912 +## %pc_hi20 = 0x22223 = 139811 +## %pc64_lo20 = 0x33332 = 209714 +## %pc64_hi12 = 0x444 = 1092 +# RUN: ld.lld %t/extreme.o --section-start=.rodata=0x4443333334567888 --section-start=.text=0x0000000012345678 -o %t/extreme1 +# RUN: llvm-objdump -d --no-show-raw-insn %t/extreme1 | FileCheck %s --check-prefix=EXTREME1 +# EXTREME1: addi.d $t0, $zero, -1912 +# EXTREME1-NEXT: pcalau12i $t1, 139811 +# EXTREME1-NEXT: lu32i.d $t0, 209714 +# EXTREME1-NEXT: lu52i.d $t0, $t0, 1092 + +## page delta = 0x4443333499999000, page offset = 0x111 +## %pc_lo12 = 0x111 = 273 +## %pc_hi20 = 0x99999 = -419431 +## %pc64_lo20 = 0x33334 = 209716 +## %pc64_hi12 = 0x444 = 1092 +# RUN: ld.lld %t/extreme.o --section-start=.rodata=0x44433333abcde111 --section-start=.text=0x0000000012345678 -o %t/extreme2 +# RUN: llvm-objdump -d --no-show-raw-insn %t/extreme2 | FileCheck %s --check-prefix=EXTREME2 +# EXTREME2: addi.d $t0, $zero, 273 +# EXTREME2-NEXT: pcalau12i $t1, -419431 +# EXTREME2-NEXT: lu32i.d $t0, 209716 +# EXTREME2-NEXT: lu52i.d $t0, $t0, 1092 + +## page delta = 0x444333339999a000, page offset = 0x888 +## %pc_lo12 = 0x888 = -1912 +## %pc_hi20 = 0x9999a = -419430 +## %pc64_lo20 = 0x33333 = 209715 +## %pc64_hi12 = 0x444 = 1092 +# RUN: ld.lld %t/extreme.o --section-start=.rodata=0x44433333abcde888 --section-start=.text=0x0000000012345678 -o %t/extreme3 +# RUN: llvm-objdump -d --no-show-raw-insn %t/extreme3 | FileCheck %s --check-prefix=EXTREME3 +# EXTREME3: addi.d $t0, $zero, -1912 +# EXTREME3-NEXT: pcalau12i $t1, -419430 +# EXTREME3-NEXT: lu32i.d $t0, 209715 +# EXTREME3-NEXT: lu52i.d $t0, $t0, 1092 + +## page delta = 0x444aaaaa22222000, page offset = 0x111 +## %pc_lo12 = 0x111 = 273 +## %pc_hi20 = 0x22222 = 139810 +## %pc64_lo20 = 0xaaaaa = -349526 +## %pc64_hi12 = 0x444 = 1092 +# RUN: ld.lld %t/extreme.o --section-start=.rodata=0x444aaaaa34567111 --section-start=.text=0x0000000012345678 -o %t/extreme4 +# RUN: llvm-objdump -d --no-show-raw-insn %t/extreme4 | FileCheck %s --check-prefix=EXTREME4 +# EXTREME4: addi.d $t0, $zero, 273 +# EXTREME4-NEXT: pcalau12i $t1, 139810 +# EXTREME4-NEXT: lu32i.d $t0, -349526 +# EXTREME4-NEXT: lu52i.d $t0, $t0, 1092 + +## page delta = 0x444aaaa922223000, page offset = 0x888 +## %pc_lo12 = 0x888 = -1912 +## %pc_hi20 = 0x22223 = 139811 +## %pc64_lo20 = 0xaaaa9 = -349527 +## %pc64_hi12 = 0x444 = 1092 +# RUN: ld.lld %t/extreme.o --section-start=.rodata=0x444aaaaa34567888 --section-start=.text=0x0000000012345678 -o %t/extreme5 +# RUN: llvm-objdump -d --no-show-raw-insn %t/extreme5 | FileCheck %s --check-prefix=EXTREME5 +# EXTREME5: addi.d $t0, $zero, -1912 +# EXTREME5-NEXT: pcalau12i $t1, 139811 +# EXTREME5-NEXT: lu32i.d $t0, -349527 +# EXTREME5-NEXT: lu52i.d $t0, $t0, 1092 + +## page delta = 0x444aaaab99999000, page offset = 0x111 +## %pc_lo12 = 0x111 = 273 +## %pc_hi20 = 0x99999 = -419431 +## %pc64_lo20 = 0xaaaab = -349525 +## %pc64_hi12 = 0x444 = 1092 +# RUN: ld.lld %t/extreme.o --section-start=.rodata=0x444aaaaaabcde111 --section-start=.text=0x0000000012345678 -o %t/extreme6 +# RUN: llvm-objdump -d --no-show-raw-insn %t/extreme6 | FileCheck %s --check-prefix=EXTREME6 +# EXTREME6: addi.d $t0, $zero, 273 +# EXTREME6-NEXT: pcalau12i $t1, -419431 +# EXTREME6-NEXT: lu32i.d $t0, -349525 +# EXTREME6-NEXT: lu52i.d $t0, $t0, 1092 + +## page delta = 0x444aaaaa9999a000, page offset = 0x888 +## %pc_lo12 = 0x888 = -1912 +## %pc_hi20 = 0x9999a = -419430 +## %pc64_lo20 = 0xaaaaa = -349526 +## %pc64_hi12 = 0x444 = 1092 +# RUN: ld.lld %t/extreme.o --section-start=.rodata=0x444aaaaaabcde888 --section-start=.text=0x0000000012345678 -o %t/extreme7 +# RUN: llvm-objdump -d --no-show-raw-insn %t/extreme7 | FileCheck %s --check-prefix=EXTREME7 +# EXTREME7: addi.d $t0, $zero, -1912 +# EXTREME7-NEXT: pcalau12i $t1, -419430 +# EXTREME7-NEXT: lu32i.d $t0, -349526 +# EXTREME7-NEXT: lu52i.d $t0, $t0, 1092 + +## page delta = 0xbbb3333322222000, page offset = 0x111 +## %pc_lo12 = 0x111 = 273 +## %pc_hi20 = 0x22222 = 139810 +## %pc64_lo20 = 0x33333 = 209715 +## %pc64_hi12 = 0xbbb = -1093 +# RUN: ld.lld %t/extreme.o --section-start=.rodata=0xbbb3333334567111 --section-start=.text=0x0000000012345678 -o %t/extreme8 +# RUN: llvm-objdump -d --no-show-raw-insn %t/extreme8 | FileCheck %s --check-prefix=EXTREME8 +# EXTREME8: addi.d $t0, $zero, 273 +# EXTREME8-NEXT: pcalau12i $t1, 139810 +# EXTREME8-NEXT: lu32i.d $t0, 209715 +# EXTREME8-NEXT: lu52i.d $t0, $t0, -1093 + +## page delta = 0xbbb3333222223000, page offset = 0x888 +## %pc_lo12 = 0x888 = -1912 +## %pc_hi20 = 0x22223 = 139811 +## %pc64_lo20 = 0x33332 = 209714 +## %pc64_hi12 = 0xbbb = -1093 +# RUN: ld.lld %t/extreme.o --section-start=.rodata=0xbbb3333334567888 --section-start=.text=0x0000000012345678 -o %t/extreme9 +# RUN: llvm-objdump -d --no-show-raw-insn %t/extreme9 | FileCheck %s --check-prefix=EXTREME9 +# EXTREME9: addi.d $t0, $zero, -1912 +# EXTREME9-NEXT: pcalau12i $t1, 139811 +# EXTREME9-NEXT: lu32i.d $t0, 209714 +# EXTREME9-NEXT: lu52i.d $t0, $t0, -1093 + +## page delta = 0xbbb3333499999000, page offset = 0x111 +## %pc_lo12 = 0x111 = 273 +## %pc_hi20 = 0x99999 = -419431 +## %pc64_lo20 = 0x33334 = 209716 +## %pc64_hi12 = 0xbbb = -1093 +# RUN: ld.lld %t/extreme.o --section-start=.rodata=0xbbb33333abcde111 --section-start=.text=0x0000000012345678 -o %t/extreme10 +# RUN: llvm-objdump -d --no-show-raw-insn %t/extreme10 | FileCheck %s --check-prefix=EXTREME10 +# EXTREME10: addi.d $t0, $zero, 273 +# EXTREME10-NEXT: pcalau12i $t1, -419431 +# EXTREME10-NEXT: lu32i.d $t0, 209716 +# EXTREME10-NEXT: lu52i.d $t0, $t0, -1093 + +## page delta = 0xbbb333339999a000, page offset = 0x888 +## %pc_lo12 = 0x888 = -1912 +## %pc_hi20 = 0x9999a = -419430 +## %pc64_lo20 = 0x33333 = 209715 +## %pc64_hi12 = 0xbbb = -1093 +# RUN: ld.lld %t/extreme.o --section-start=.rodata=0xbbb33333abcde888 --section-start=.text=0x0000000012345678 -o %t/extreme11 +# RUN: llvm-objdump -d --no-show-raw-insn %t/extreme11 | FileCheck %s --check-prefix=EXTREME11 +# EXTREME11: addi.d $t0, $zero, -1912 +# EXTREME11-NEXT: pcalau12i $t1, -419430 +# EXTREME11-NEXT: lu32i.d $t0, 209715 +# EXTREME11-NEXT: lu52i.d $t0, $t0, -1093 + +## page delta = 0xbbbaaaaa22222000, page offset = 0x111 +## %pc_lo12 = 0x111 = 273 +## %pc_hi20 = 0x22222 = 139810 +## %pc64_lo20 = 0xaaaaa = -349526 +## %pc64_hi12 = 0xbbb = -1093 +# RUN: ld.lld %t/extreme.o --section-start=.rodata=0xbbbaaaaa34567111 --section-start=.text=0x0000000012345678 -o %t/extreme12 +# RUN: llvm-objdump -d --no-show-raw-insn %t/extreme12 | FileCheck %s --check-prefix=EXTREME12 +# EXTREME12: addi.d $t0, $zero, 273 +# EXTREME12-NEXT: pcalau12i $t1, 139810 +# EXTREME12-NEXT: lu32i.d $t0, -349526 +# EXTREME12-NEXT: lu52i.d $t0, $t0, -1093 + +## page delta = 0xbbbaaaa922223000, page offset = 0x888 +## %pc_lo12 = 0x888 = -1912 +## %pc_hi20 = 0x22223 = 139811 +## %pc64_lo20 = 0xaaaa9 = -349527 +## %pc64_hi12 = 0xbbb = -1093 +# RUN: ld.lld %t/extreme.o --section-start=.rodata=0xbbbaaaaa34567888 --section-start=.text=0x0000000012345678 -o %t/extreme13 +# RUN: llvm-objdump -d --no-show-raw-insn %t/extreme13 | FileCheck %s --check-prefix=EXTREME13 +# EXTREME13: addi.d $t0, $zero, -1912 +# EXTREME13-NEXT: pcalau12i $t1, 139811 +# EXTREME13-NEXT: lu32i.d $t0, -349527 +# EXTREME13-NEXT: lu52i.d $t0, $t0, -1093 + +## page delta = 0xbbbaaaab99999000, page offset = 0x111 +## %pc_lo12 = 0x111 = 273 +## %pc_hi20 = 0x99999 = -419431 +## %pc64_lo20 = 0xaaaab = -349525 +## %pc64_hi12 = 0xbbb = -1093 +# RUN: ld.lld %t/extreme.o --section-start=.rodata=0xbbbaaaaaabcde111 --section-start=.text=0x0000000012345678 -o %t/extreme14 +# RUN: llvm-objdump -d --no-show-raw-insn %t/extreme14 | FileCheck %s --check-prefix=EXTREME14 +# EXTREME14: addi.d $t0, $zero, 273 +# EXTREME14-NEXT: pcalau12i $t1, -419431 +# EXTREME14-NEXT: lu32i.d $t0, -349525 +# EXTREME14-NEXT: lu52i.d $t0, $t0, -1093 + +## page delta = 0xbbbaaaaa9999a000, page offset = 0x888 +## %pc_lo12 = 0x888 = -1912 +## %pc_hi20 = 0x9999a = -419430 +## %pc64_lo20 = 0xaaaaa = -349526 +## %pc64_hi12 = 0xbbb = -1093 +# RUN: ld.lld %t/extreme.o --section-start=.rodata=0xbbbaaaaaabcde888 --section-start=.text=0x0000000012345678 -o %t/extreme15 +# RUN: llvm-objdump -d --no-show-raw-insn %t/extreme15 | FileCheck %s --check-prefix=EXTREME15 +# EXTREME15: addi.d $t0, $zero, -1912 +# EXTREME15-NEXT: pcalau12i $t1, -419430 +# EXTREME15-NEXT: lu32i.d $t0, -349526 +# EXTREME15-NEXT: lu52i.d $t0, $t0, -1093 + +#--- a.s +.rodata +x: +.word 10 +.text +.global _start +_start: + pcalau12i $a0, %pc_hi20(x) + ld.w $a0, $a0, %pc_lo12(x) + +#--- extreme.s +.rodata +x: +.word 10 +.text +.global _start +_start: + addi.d $t0, $zero, %pc_lo12(x) + pcalau12i $t1, %pc_hi20(x) + lu32i.d $t0, %pc64_lo20(x) + lu52i.d $t0, $t0, %pc64_hi12(x) diff --git a/lld/test/ELF/loongarch-pcala-lo12-jirl-shared.s b/lld/test/ELF/loongarch-pcala-lo12-jirl-shared.s new file mode 100644 index 0000000000000000000000000000000000000000..991f8fbe974f45c86303c135572ddf94b2381cc9 --- /dev/null +++ b/lld/test/ELF/loongarch-pcala-lo12-jirl-shared.s @@ -0,0 +1,60 @@ +# REQUIRES: loongarch +# RUN: rm -rf %t && split-file %s %t + +# RUN: llvm-mc --filetype=obj --triple=loongarch32-unknown-elf %t/a.s -o %t/a.la32.o +# RUN: llvm-mc --filetype=obj --triple=loongarch64-unknown-elf %t/a.s -o %t/a.la64.o + +# RUN: ld.lld %t/a.la32.o -shared -T %t/a.t -o %t/a.la32.so +# RUN: ld.lld %t/a.la64.o -shared -T %t/a.t -o %t/a.la64.so + +# RUN: llvm-objdump -d --no-show-raw-insn %t/a.la32.so | FileCheck --check-prefixes=DIS,DIS32 %s +# RUN: llvm-objdump -d --no-show-raw-insn %t/a.la64.so | FileCheck --check-prefixes=DIS,DIS64 %s + +## PLT should be present in this case. +# DIS: Disassembly of section .plt: +# DIS: <.plt>: +# DIS: 234020: pcaddu12i $t3, 510 +# DIS32-NEXT: ld.w $t3, $t3, 84 +# DIS64-NEXT: ld.d $t3, $t3, 184 +# DIS-NEXT: jirl $t1, $t3, 0 +# DIS-NEXT: nop + +# DIS: Disassembly of section .text: +# DIS: : +# DIS-NEXT: nop +# DIS-NEXT: nop +# DIS-NEXT: nop +# DIS-NEXT: pcalau12i $t0, -510 +# DIS-NEXT: jirl $zero, $t0, 32 + +# RUN: llvm-mc --filetype=obj --triple=loongarch32-unknown-elf %t/error.s -o %t/error.la32.o +# RUN: llvm-mc --filetype=obj --triple=loongarch64-unknown-elf %t/error.s -o %t/error.la64.o +# RUN: not ld.lld %t/error.la32.o -shared -o %t/error.la32.so 2>&1 | FileCheck --check-prefix=ERR %s +# RUN: not ld.lld %t/error.la64.o -shared -o %t/error.la64.so 2>&1 | FileCheck --check-prefix=ERR %s +# ERR: error: relocation R_LARCH_PCALA_LO12 cannot be used against symbol 'bar'; recompile with -fPIC + +#--- a.t +SECTIONS { + .plt 0x234000: { *(.plt) } + .text 0x432000: { *(.text) } +} + +#--- a.s +.p2align 12 +.global foo +foo: +## The nops are for pushing the relocs off page boundary, to better see the +## page-aligned semantics in action. + nop + nop + nop + ## The offsets should be -510 (0x234 - 0x432) and 32 (PLT header size + 0) + ## respectively. + pcalau12i $t0, %pc_hi20(bar) + jirl $zero, $t0, %pc_lo12(bar) + +#--- error.s +.global foo +foo: + pcalau12i $t0, %pc_hi20(bar) + ld.w $t0, $t0, %pc_lo12(bar) diff --git a/lld/test/ELF/loongarch-pcala-lo12-jirl.s b/lld/test/ELF/loongarch-pcala-lo12-jirl.s new file mode 100644 index 0000000000000000000000000000000000000000..1a03152aaa2afc40afff23422dd9aa39fd128282 --- /dev/null +++ b/lld/test/ELF/loongarch-pcala-lo12-jirl.s @@ -0,0 +1,42 @@ +# REQUIRES: loongarch + +# RUN: llvm-mc --filetype=obj --triple=loongarch32-unknown-elf %s -o %t.la32.o +# RUN: llvm-mc --filetype=obj --triple=loongarch64-unknown-elf %s -o %t.la64.o + +# RUN: ld.lld %t.la32.o -o %t.la32 +# RUN: ld.lld %t.la64.o -o %t.la64 +# RUN: llvm-objdump -d --no-show-raw-insn %t.la32 | FileCheck %s +# RUN: llvm-objdump -d --no-show-raw-insn %t.la64 | FileCheck %s +# CHECK: pcalau12i $t0, -1 +# CHECK-NEXT: jirl $ra, $t0, 564 +# CHECK-NEXT: pcalau12i $t0, 0 +# CHECK-NEXT: jirl $zero, $t0, -1348 + +## PLT shouldn't get generated in this case. +# CHECK-NOT: Disassembly of section .plt: + +.p2align 12 +.org 0x234 +.global foo +foo: + li.w $a0, 42 + ret + +.org 0xabc +.global bar +bar: + li.w $a7, 94 + syscall 0 + +.org 0x1000 +.global _start +_start: +## The nops are for pushing the relocs off page boundary, to better see the +## page-aligned semantics in action. + nop + nop + nop + pcalau12i $t0, %pc_hi20(foo) + jirl $ra, $t0, %pc_lo12(foo) + pcalau12i $t0, %pc_hi20(bar) + jirl $zero, $t0, %pc_lo12(bar) diff --git a/lld/test/ELF/loongarch-pcrel20-s2.s b/lld/test/ELF/loongarch-pcrel20-s2.s new file mode 100644 index 0000000000000000000000000000000000000000..06707b6ad58dd7f8678473c52479c8bc6d54f9e7 --- /dev/null +++ b/lld/test/ELF/loongarch-pcrel20-s2.s @@ -0,0 +1,32 @@ +# REQUIRES: loongarch + +# RUN: llvm-mc --filetype=obj --triple=loongarch32-unknown-elf %s -o %t.la32.o +# RUN: llvm-mc --filetype=obj --triple=loongarch64-unknown-elf %s -o %t.la64.o + +# RUN: ld.lld %t.la32.o --section-start=.text=0x20000 --section-start=.data=0x20008 -o %t.la32.1 +# RUN: ld.lld %t.la64.o --section-start=.text=0x20000 --section-start=.data=0x20008 -o %t.la64.1 +# RUN: llvm-objdump --no-show-raw-insn -d %t.la32.1 | FileCheck --match-full-lines %s +# RUN: llvm-objdump --no-show-raw-insn -d %t.la64.1 | FileCheck --match-full-lines %s +# CHECK: 20000: pcaddi $t0, 2 + +# RUN: not ld.lld %t.la32.o --section-start=.text=0x20000 --section-start=.data=0x220000 -o /dev/null 2>&1 | \ +# RUN: FileCheck -DFILE=%t.la32.o --check-prefix=ERROR-RANGE %s +# RUN: not ld.lld %t.la64.o --section-start=.text=0x20000 --section-start=.data=0x220000 -o /dev/null 2>&1 | \ +# RUN: FileCheck -DFILE=%t.la64.o --check-prefix=ERROR-RANGE %s +# ERROR-RANGE: error: [[FILE]]:(.text+0x0): relocation R_LARCH_PCREL20_S2 out of range: 2097152 is not in [-2097152, 2097151] + +# RUN: not ld.lld %t.la32.o --section-start=.text=0x20000 --section-start=.data=0x40001 -o /dev/null 2>&1 | \ +# RUN: FileCheck -DFILE=%t.la32.o --check-prefix=ERROR-ALIGN %s +# RUN: not ld.lld %t.la64.o --section-start=.text=0x20000 --section-start=.data=0x40001 -o /dev/null 2>&1 | \ +# RUN: FileCheck -DFILE=%t.la64.o --check-prefix=ERROR-ALIGN %s +# ERROR-ALIGN: error: [[FILE]]:(.text+0x0): improper alignment for relocation R_LARCH_PCREL20_S2: 0x20001 is not aligned to 4 bytes + +.global _start + +_start: +1: + pcaddi $t0, 0 + .reloc 1b, R_LARCH_PCREL20_S2, .data + +.data + .word 0 diff --git a/lld/test/ELF/loongarch-plt.s b/lld/test/ELF/loongarch-plt.s new file mode 100644 index 0000000000000000000000000000000000000000..82af53d39e730f777b38e64ebf96d8beaae45012 --- /dev/null +++ b/lld/test/ELF/loongarch-plt.s @@ -0,0 +1,108 @@ +# REQUIRES: loongarch +# RUN: echo '.globl bar, weak; .type bar,@function; .type weak,@function; bar: weak:' > %t1.s + +# RUN: llvm-mc --filetype=obj --triple=loongarch32 %t1.s -o %t1.32.o +# RUN: ld.lld -shared %t1.32.o -soname=t1.32.so -o %t1.32.so +# RUN: llvm-mc --filetype=obj --triple=loongarch32 %s -o %t.32.o +# RUN: ld.lld %t.32.o %t1.32.so -z separate-code -o %t.32 +# RUN: llvm-readelf -S -s %t.32 | FileCheck --check-prefixes=SEC,NM %s +# RUN: llvm-readobj -r %t.32 | FileCheck --check-prefix=RELOC32 %s +# RUN: llvm-readelf -x .got.plt %t.32 | FileCheck --check-prefix=GOTPLT32 %s +# RUN: llvm-objdump -d --no-show-raw-insn %t.32 | FileCheck --check-prefixes=DIS,DIS32 %s + +# RUN: llvm-mc --filetype=obj --triple=loongarch64 %t1.s -o %t1.64.o +# RUN: ld.lld -shared %t1.64.o -soname=t1.64.so -o %t1.64.so +# RUN: llvm-mc --filetype=obj --triple=loongarch64 %s -o %t.64.o +# RUN: ld.lld %t.64.o %t1.64.so -z separate-code -o %t.64 +# RUN: llvm-readelf -S -s %t.64 | FileCheck --check-prefixes=SEC,NM %s +# RUN: llvm-readobj -r %t.64 | FileCheck --check-prefix=RELOC64 %s +# RUN: llvm-readelf -x .got.plt %t.64 | FileCheck --check-prefix=GOTPLT64 %s +# RUN: llvm-objdump -d --no-show-raw-insn %t.64 | FileCheck --check-prefixes=DIS,DIS64 %s + +# SEC: .plt PROGBITS {{0*}}00020020 + +## A canonical PLT has a non-zero st_value. bar and weak are called but their +## addresses are not taken, so a canonical PLT is not necessary. +# NM: {{0*}}00000000 0 FUNC GLOBAL DEFAULT UND bar +# NM: {{0*}}00000000 0 FUNC WEAK DEFAULT UND weak + +## The .got.plt slots relocated by .rela.plt point to .plt +## This is required by glibc. +# RELOC32: .rela.plt { +# RELOC32-NEXT: 0x40070 R_LARCH_JUMP_SLOT bar 0x0 +# RELOC32-NEXT: 0x40074 R_LARCH_JUMP_SLOT weak 0x0 +# RELOC32-NEXT: } +# GOTPLT32: section '.got.plt' +# GOTPLT32-NEXT: 0x00040068 00000000 00000000 20000200 20000200 + +# RELOC64: .rela.plt { +# RELOC64-NEXT: 0x400E0 R_LARCH_JUMP_SLOT bar 0x0 +# RELOC64-NEXT: 0x400E8 R_LARCH_JUMP_SLOT weak 0x0 +# RELOC64-NEXT: } +# GOTPLT64: section '.got.plt' +# GOTPLT64-NEXT: 0x000400d0 00000000 00000000 00000000 00000000 +# GOTPLT64-NEXT: 0x000400e0 20000200 00000000 20000200 00000000 + +# DIS: <_start>: +## Direct call +## foo - . = 0x20010-0x20000 = 16 +# DIS-NEXT: 20000: bl 16 +## bar@plt - . = 0x20040-0x20004 = 60 +# DIS-NEXT: 20004: bl 60 +## bar@plt - . = 0x20040-0x20008 = 56 +# DIS-NEXT: 20008: bl 56 +## weak@plt - . = 0x20050-0x2000c = 68 +# DIS-NEXT: 2000c: bl 68 +# DIS: : +# DIS-NEXT: 20010: + +# DIS: Disassembly of section .plt: +# DIS: <.plt>: +## 32-bit: .got.plt - .plt = 0x40068 - 0x20020 = 4096*32+72 +# DIS32-NEXT: pcaddu12i $t2, 32 +# DIS32-NEXT: sub.w $t1, $t1, $t3 +# DIS32-NEXT: ld.w $t3, $t2, 72 +# DIS32-NEXT: addi.w $t1, $t1, -44 +# DIS32-NEXT: addi.w $t0, $t2, 72 +# DIS32-NEXT: srli.w $t1, $t1, 2 +# DIS32-NEXT: ld.w $t0, $t0, 4 +# DIS32-NEXT: jr $t3 + +## 64-bit: .got.plt - .plt = 0x400d0 - 0x20020 = 4096*32+176 +# DIS64-NEXT: pcaddu12i $t2, 32 +# DIS64-NEXT: sub.d $t1, $t1, $t3 +# DIS64-NEXT: ld.d $t3, $t2, 176 +# DIS64-NEXT: addi.d $t1, $t1, -44 +# DIS64-NEXT: addi.d $t0, $t2, 176 +# DIS64-NEXT: srli.d $t1, $t1, 1 +# DIS64-NEXT: ld.d $t0, $t0, 8 +# DIS64-NEXT: jr $t3 + +## 32-bit: &.got.plt[bar]-. = 0x40070-0x20040 = 4096*32+48 +## 64-bit: &.got.plt[bar]-. = 0x400e0-0x20040 = 4096*32+160 +# DIS: 20040: pcaddu12i $t3, 32 +# DIS32-NEXT: ld.w $t3, $t3, 48 +# DIS64-NEXT: ld.d $t3, $t3, 160 +# DIS-NEXT: jirl $t1, $t3, 0 +# DIS-NEXT: nop + +## 32-bit: &.got.plt[weak]-. = 0x40074-0x20050 = 4096*32+36 +## 64-bit: &.got.plt[weak]-. = 0x400e8-0x20050 = 4096*32+152 +# DIS: 20050: pcaddu12i $t3, 32 +# DIS32-NEXT: ld.w $t3, $t3, 36 +# DIS64-NEXT: ld.d $t3, $t3, 152 +# DIS-NEXT: jirl $t1, $t3, 0 +# DIS-NEXT: nop + +.global _start, foo, bar +.weak weak + +_start: + bl foo + bl bar + bl %plt(bar) + bl weak + +## foo is local and non-preemptible, no PLT is generated. +foo: + ret diff --git a/lld/test/ELF/loongarch-reloc-pic.s b/lld/test/ELF/loongarch-reloc-pic.s new file mode 100644 index 0000000000000000000000000000000000000000..b23ad55a2523853169b04840ded22b30e6339fad --- /dev/null +++ b/lld/test/ELF/loongarch-reloc-pic.s @@ -0,0 +1,44 @@ +# REQUIRES: loongarch +# RUN: rm -rf %t && split-file %s %t + +# RUN: llvm-mc --filetype=obj --triple=loongarch32 %t/32.s -o %t/32.o +# RUN: llvm-mc --filetype=obj --triple=loongarch64 %t/64.s -o %t/64.o +# RUN: ld.lld -shared %t/32.o -o %t/32.so +# RUN: llvm-nm %t/32.so | FileCheck --check-prefix=NM32 %s +# RUN: llvm-readobj -r %t/32.so | FileCheck --check-prefix=RELOC32 %s +# RUN: ld.lld -shared %t/64.o -o %t/64.so +# RUN: llvm-nm %t/64.so | FileCheck --check-prefix=NM64 %s +# RUN: llvm-readobj -r %t/64.so | FileCheck --check-prefix=RELOC64 %s + +## R_LARCH_32 and R_LARCH_64 are absolute relocation types. +## In PIC mode, they create relative relocations if the symbol is non-preemptable. + +# NM32: 000301fc d b +# NM64: 00030350 d b + +# RELOC32: .rela.dyn { +# RELOC32-NEXT: 0x301FC R_LARCH_RELATIVE - 0x301FC +# RELOC32-NEXT: 0x301F8 R_LARCH_32 a 0 +# RELOC32-NEXT: } +# RELOC64: .rela.dyn { +# RELOC64-NEXT: 0x30350 R_LARCH_RELATIVE - 0x30350 +# RELOC64-NEXT: 0x30348 R_LARCH_64 a 0 +# RELOC64-NEXT: } + +#--- 32.s +.globl a, b +.hidden b + +.data +.long a +b: +.long b + +#--- 64.s +.globl a, b +.hidden b + +.data +.quad a +b: +.quad b diff --git a/lld/test/ELF/loongarch-tls-gd-edge-case.s b/lld/test/ELF/loongarch-tls-gd-edge-case.s new file mode 100644 index 0000000000000000000000000000000000000000..9f25f10c73b44f4f585c744d9dec767dbb8b4337 --- /dev/null +++ b/lld/test/ELF/loongarch-tls-gd-edge-case.s @@ -0,0 +1,46 @@ +# REQUIRES: loongarch + +## Edge case: when a TLS symbol is being accessed in both GD and IE manners, +## correct reloc behavior should be preserved for both kinds of accesses. + +# RUN: llvm-mc --filetype=obj --triple=loongarch32 %s -o %t.la32.o +# RUN: ld.lld %t.la32.o -shared -o %t.la32 +# RUN: llvm-mc --filetype=obj --triple=loongarch64 %s -o %t.la64.o +# RUN: ld.lld %t.la64.o -shared -o %t.la64 + +# RUN: llvm-readelf -Wr %t.la32 | FileCheck --check-prefix=LA32-REL %s +# RUN: llvm-objdump -d --no-show-raw-insn %t.la32 | FileCheck --check-prefix=LA32 %s + +# RUN: llvm-readelf -Wr %t.la64 | FileCheck --check-prefix=LA64-REL %s +# RUN: llvm-objdump -d --no-show-raw-insn %t.la64 | FileCheck --check-prefix=LA64 %s + +# LA32-REL-NOT: R_LARCH_32 +# LA32-REL: 0002023c 00000206 R_LARCH_TLS_DTPMOD32 00000000 y + 0 +# LA32-REL-NEXT: 00020240 00000208 R_LARCH_TLS_DTPREL32 00000000 y + 0 +# LA32-REL-NEXT: 00020244 0000020a R_LARCH_TLS_TPREL32 00000000 y + 0 + +# LA64-REL-NOT: R_LARCH_64 +# LA64-REL: 00000000000203a0 0000000200000007 R_LARCH_TLS_DTPMOD64 0000000000000000 y + 0 +# LA64-REL-NEXT: 00000000000203a8 0000000200000009 R_LARCH_TLS_DTPREL64 0000000000000000 y + 0 +# LA64-REL-NEXT: 00000000000203b0 000000020000000b R_LARCH_TLS_TPREL64 0000000000000000 y + 0 + +# LA32: 101d4: pcalau12i $a0, 16 +# LA32-NEXT: ld.w $a0, $a0, 580 +# LA32-NEXT: pcalau12i $a1, 16 +# LA32-NEXT: addi.w $a1, $a1, 572 + +# LA64: 102e0: pcalau12i $a0, 16 +# LA64-NEXT: ld.d $a0, $a0, 944 +# LA64-NEXT: pcalau12i $a1, 16 +# LA64-NEXT: addi.d $a1, $a1, 928 + +.global _start +_start: +la.tls.ie $a0, y # should refer to the GOT entry relocated by the R_LARCH_TLS_TPRELnn record +la.tls.gd $a1, y # should refer to the GOT entry relocated by the R_LARCH_TLS_DTPMODnn record + +.section .tbss,"awT",@nobits +.global y +y: +.word 0 +.size y, 4 diff --git a/lld/test/ELF/loongarch-tls-gd.s b/lld/test/ELF/loongarch-tls-gd.s new file mode 100644 index 0000000000000000000000000000000000000000..2aecb44c17a343329a9790c5127a8e63d28e5ac3 --- /dev/null +++ b/lld/test/ELF/loongarch-tls-gd.s @@ -0,0 +1,136 @@ +# REQUIRES: loongarch +# RUN: rm -rf %t && split-file %s %t + +## LoongArch psABI doesn't specify TLS relaxation. Though the code sequences are not +## relaxed, dynamic relocations can be omitted for GD->LE relaxation. + +# RUN: llvm-mc --filetype=obj --triple=loongarch32 %t/a.s -o %t/a.32.o +# RUN: llvm-mc --filetype=obj --triple=loongarch32 %t/bc.s -o %t/bc.32.o +# RUN: ld.lld -shared -soname=bc.so %t/bc.32.o -o %t/bc.32.so +# RUN: llvm-mc --filetype=obj --triple=loongarch32 %t/tga.s -o %t/tga.32.o +# RUN: llvm-mc --filetype=obj --triple=loongarch64 %t/a.s -o %t/a.64.o +# RUN: llvm-mc --filetype=obj --triple=loongarch64 %t/bc.s -o %t/bc.64.o +# RUN: ld.lld -shared -soname=bc.so %t/bc.64.o -o %t/bc.64.so +# RUN: llvm-mc --filetype=obj --triple=loongarch64 %t/tga.s -o %t/tga.64.o + +## LA32 GD +# RUN: ld.lld -shared %t/a.32.o %t/bc.32.o -o %t/gd.32.so +# RUN: llvm-readobj -r %t/gd.32.so | FileCheck --check-prefix=GD32-REL %s +# RUN: llvm-objdump -d --no-show-raw-insn %t/gd.32.so | FileCheck --check-prefix=GD32 %s + +## LA32 GD -> LE +# RUN: ld.lld %t/a.32.o %t/bc.32.o %t/tga.32.o -o %t/le.32 +# RUN: llvm-readelf -r %t/le.32 | FileCheck --check-prefix=NOREL %s +# RUN: llvm-readelf -x .got %t/le.32 | FileCheck --check-prefix=LE32-GOT %s +# RUN: ld.lld -pie %t/a.32.o %t/bc.32.o %t/tga.32.o -o %t/le-pie.32 +# RUN: llvm-readelf -r %t/le-pie.32 | FileCheck --check-prefix=NOREL %s +# RUN: llvm-readelf -x .got %t/le-pie.32 | FileCheck --check-prefix=LE32-GOT %s + +## LA32 GD -> IE +# RUN: ld.lld %t/a.32.o %t/bc.32.so %t/tga.32.o -o %t/ie.32 +# RUN: llvm-readobj -r %t/ie.32 | FileCheck --check-prefix=IE32-REL %s +# RUN: llvm-readelf -x .got %t/ie.32 | FileCheck --check-prefix=IE32-GOT %s + +## LA64 GD +# RUN: ld.lld -shared %t/a.64.o %t/bc.64.o -o %t/gd.64.so +# RUN: llvm-readobj -r %t/gd.64.so | FileCheck --check-prefix=GD64-REL %s +# RUN: llvm-objdump -d --no-show-raw-insn %t/gd.64.so | FileCheck --check-prefix=GD64 %s + +## LA64 GD -> LE +# RUN: ld.lld %t/a.64.o %t/bc.64.o %t/tga.64.o -o %t/le.64 +# RUN: llvm-readelf -r %t/le.64 | FileCheck --check-prefix=NOREL %s +# RUN: llvm-readelf -x .got %t/le.64 | FileCheck --check-prefix=LE64-GOT %s +# RUN: ld.lld -pie %t/a.64.o %t/bc.64.o %t/tga.64.o -o %t/le-pie.64 +# RUN: llvm-readelf -r %t/le-pie.64 | FileCheck --check-prefix=NOREL %s +# RUN: llvm-readelf -x .got %t/le-pie.64 | FileCheck --check-prefix=LE64-GOT %s + +## LA64 GD -> IE +# RUN: ld.lld %t/a.64.o %t/bc.64.so %t/tga.64.o -o %t/ie.64 +# RUN: llvm-readobj -r %t/ie.64 | FileCheck --check-prefix=IE64-REL %s +# RUN: llvm-readelf -x .got %t/ie.64 | FileCheck --check-prefix=IE64-GOT %s + +# GD32-REL: .rela.dyn { +# GD32-REL-NEXT: 0x20310 R_LARCH_TLS_DTPMOD32 a 0x0 +# GD32-REL-NEXT: 0x20314 R_LARCH_TLS_DTPREL32 a 0x0 +# GD32-REL-NEXT: 0x20318 R_LARCH_TLS_DTPMOD32 b 0x0 +# GD32-REL-NEXT: 0x2031C R_LARCH_TLS_DTPREL32 b 0x0 +# GD32-REL-NEXT: } + +## &DTPMOD(a) - . = 0x20310 - 0x10250: 0x10 pages, page offset 0x310 +# GD32: 10250: pcalau12i $a0, 16 +# GD32-NEXT: addi.w $a0, $a0, 784 +# GD32-NEXT: bl 56 + +## &DTPMOD(b) - . = 0x20318 - 0x1025c: 0x10 pages, page offset 0x318 +# GD32: 1025c: pcalau12i $a0, 16 +# GD32-NEXT: addi.w $a0, $a0, 792 +# GD32-NEXT: bl 44 + +# GD64-REL: .rela.dyn { +# GD64-REL-NEXT: 0x204C0 R_LARCH_TLS_DTPMOD64 a 0x0 +# GD64-REL-NEXT: 0x204C8 R_LARCH_TLS_DTPREL64 a 0x0 +# GD64-REL-NEXT: 0x204D0 R_LARCH_TLS_DTPMOD64 b 0x0 +# GD64-REL-NEXT: 0x204D8 R_LARCH_TLS_DTPREL64 b 0x0 +# GD64-REL-NEXT: } + +## &DTPMOD(a) - . = 0x204c0 - 0x10398: 0x10 pages, page offset 0x4c0 +# GD64: 10398: pcalau12i $a0, 16 +# GD64-NEXT: addi.d $a0, $a0, 1216 +# GD64-NEXT: bl 48 + +## &DTPMOD(b) - . = 0x204d0 - 0x103a4: 0x10 pages, page offset 0x4d0 +# GD64: 103a4: pcalau12i $a0, 16 +# GD64-NEXT: addi.d $a0, $a0, 1232 +# GD64-NEXT: bl 36 + +# NOREL: no relocations + +## .got contains pre-populated values: [a@dtpmod, a@dtprel, b@dtpmod, b@dtprel] +## a@dtprel = st_value(a) = 0x8 +## b@dtprel = st_value(b) = 0xc +# LE32-GOT: section '.got': +# LE32-GOT-NEXT: 0x[[#%x,A:]] 01000000 08000000 01000000 0c000000 +# LE64-GOT: section '.got': +# LE64-GOT-NEXT: 0x[[#%x,A:]] 01000000 00000000 08000000 00000000 +# LE64-GOT-NEXT: 0x[[#%x,A:]] 01000000 00000000 0c000000 00000000 + +## a is local - relaxed to LE - its DTPMOD/DTPREL slots are link-time constants. +## b is external - DTPMOD/DTPREL dynamic relocations are required. +# IE32-REL: .rela.dyn { +# IE32-REL-NEXT: 0x30228 R_LARCH_TLS_DTPMOD32 b 0x0 +# IE32-REL-NEXT: 0x3022C R_LARCH_TLS_DTPREL32 b 0x0 +# IE32-REL-NEXT: } +# IE32-GOT: section '.got': +# IE32-GOT-NEXT: 0x00030220 01000000 08000000 00000000 00000000 + +# IE64-REL: .rela.dyn { +# IE64-REL-NEXT: 0x30388 R_LARCH_TLS_DTPMOD64 b 0x0 +# IE64-REL-NEXT: 0x30390 R_LARCH_TLS_DTPREL64 b 0x0 +# IE64-REL-NEXT: } +# IE64-GOT: section '.got': +# IE64-GOT-NEXT: 0x00030378 01000000 00000000 08000000 00000000 +# IE64-GOT-NEXT: 0x00030388 00000000 00000000 00000000 00000000 + +#--- a.s +la.tls.gd $a0, a +bl %plt(__tls_get_addr) + +la.tls.gd $a0, b +bl %plt(__tls_get_addr) + +.section .tbss,"awT",@nobits +.globl a +.zero 8 +a: +.zero 4 + +#--- bc.s +.section .tbss,"awT",@nobits +.globl b, c +b: +.zero 4 +c: + +#--- tga.s +.globl __tls_get_addr +__tls_get_addr: diff --git a/lld/test/ELF/loongarch-tls-ie.s b/lld/test/ELF/loongarch-tls-ie.s new file mode 100644 index 0000000000000000000000000000000000000000..78c207991b4e6117438efe021e7acea43b19b9fe --- /dev/null +++ b/lld/test/ELF/loongarch-tls-ie.s @@ -0,0 +1,114 @@ +# REQUIRES: loongarch +# RUN: rm -rf %t && split-file %s %t + +# RUN: llvm-mc --filetype=obj --triple=loongarch32 %t/32.s -o %t/32.o +# RUN: llvm-mc --filetype=obj --triple=loongarch64 %t/64.s -o %t/64.o + +## LA32 IE +# RUN: ld.lld -shared %t/32.o -o %t/32.so +# RUN: llvm-readobj -r -d %t/32.so | FileCheck --check-prefix=IE32-REL %s +# RUN: llvm-objdump -d --no-show-raw-insn %t/32.so | FileCheck --check-prefixes=IE32 %s + +## LA32 IE -> LE +# RUN: ld.lld %t/32.o -o %t/32 +# RUN: llvm-readelf -r %t/32 | FileCheck --check-prefix=NOREL %s +# RUN: llvm-readelf -x .got %t/32 | FileCheck --check-prefix=LE32-GOT %s +# RUN: llvm-objdump -d --no-show-raw-insn %t/32 | FileCheck --check-prefixes=LE32 %s + +## LA64 IE +# RUN: ld.lld -shared %t/64.o -o %t/64.so +# RUN: llvm-readobj -r -d %t/64.so | FileCheck --check-prefix=IE64-REL %s +# RUN: llvm-objdump -d --no-show-raw-insn %t/64.so | FileCheck --check-prefixes=IE64 %s + +## LA64 IE -> LE +# RUN: ld.lld %t/64.o -o %t/64 +# RUN: llvm-readelf -r %t/64 | FileCheck --check-prefix=NOREL %s +# RUN: llvm-readelf -x .got %t/64 | FileCheck --check-prefix=LE64-GOT %s +# RUN: llvm-objdump -d --no-show-raw-insn %t/64 | FileCheck --check-prefixes=LE64 %s + +# IE32-REL: FLAGS STATIC_TLS +# IE32-REL: .rela.dyn { +# IE32-REL-NEXT: 0x20218 R_LARCH_TLS_TPREL32 - 0xC +# IE32-REL-NEXT: 0x20214 R_LARCH_TLS_TPREL32 a 0x0 +# IE32-REL-NEXT: } + +# IE64-REL: FLAGS STATIC_TLS +# IE64-REL: .rela.dyn { +# IE64-REL-NEXT: 0x20370 R_LARCH_TLS_TPREL64 - 0xC +# IE64-REL-NEXT: 0x20368 R_LARCH_TLS_TPREL64 a 0x0 +# IE64-REL-NEXT: } + +## LA32: +## &.got[0] - . = 0x20214 - 0x101a4: 0x10 pages, page offset 0x214 +## &.got[1] - . = 0x20218 - 0x101b0: 0x10 pages, page offset 0x218 +# IE32: 101a4: pcalau12i $a4, 16 +# IE32-NEXT: ld.w $a4, $a4, 532 +# IE32-NEXT: add.w $a4, $a4, $tp +# IE32-NEXT: 101b0: pcalau12i $a5, 16 +# IE32-NEXT: ld.w $a5, $a5, 536 +# IE32-NEXT: add.w $a5, $a5, $tp + +## LA64: +## &.got[0] - . = 0x20368 - 0x102a0: 0x10 pages, page offset 0x368 +## &.got[1] - . = 0x20370 - 0x102ac: 0x10 pages, page offset 0x370 +# IE64: 102a0: pcalau12i $a4, 16 +# IE64-NEXT: ld.d $a4, $a4, 872 +# IE64-NEXT: add.d $a4, $a4, $tp +# IE64-NEXT: 102ac: pcalau12i $a5, 16 +# IE64-NEXT: ld.d $a5, $a5, 880 +# IE64-NEXT: add.d $a5, $a5, $tp + +# NOREL: no relocations + +# a@tprel = st_value(a) = 0x8 +# b@tprel = st_value(a) = 0xc +# LE32-GOT: section '.got': +# LE32-GOT-NEXT: 0x0003012c 08000000 0c000000 +# LE64-GOT: section '.got': +# LE64-GOT-NEXT: 0x000301e0 08000000 00000000 0c000000 00000000 + +## LA32: +## &.got[0] - . = 0x3012c - 0x20114: 0x10 pages, page offset 0x12c +## &.got[1] - . = 0x30130 - 0x20120: 0x10 pages, page offset 0x130 +# LE32: 20114: pcalau12i $a4, 16 +# LE32-NEXT: ld.w $a4, $a4, 300 +# LE32-NEXT: add.w $a4, $a4, $tp +# LE32-NEXT: 20120: pcalau12i $a5, 16 +# LE32-NEXT: ld.w $a5, $a5, 304 +# LE32-NEXT: add.w $a5, $a5, $tp + +## LA64: +## &.got[0] - . = 0x301e0 - 0x201c8: 0x10 pages, page offset 0x1e0 +## &.got[1] - . = 0x301e8 - 0x201d4: 0x10 pages, page offset 0x1e8 +# LE64: 201c8: pcalau12i $a4, 16 +# LE64-NEXT: ld.d $a4, $a4, 480 +# LE64-NEXT: add.d $a4, $a4, $tp +# LE64-NEXT: 201d4: pcalau12i $a5, 16 +# LE64-NEXT: ld.d $a5, $a5, 488 +# LE64-NEXT: add.d $a5, $a5, $tp + +#--- 32.s +la.tls.ie $a4, a +add.w $a4, $a4, $tp +la.tls.ie $a5, b +add.w $a5, $a5, $tp + +.section .tbss,"awT",@nobits +.globl a +.zero 8 +a: +.zero 4 +b: + +#--- 64.s +la.tls.ie $a4, a +add.d $a4, $a4, $tp +la.tls.ie $a5, b +add.d $a5, $a5, $tp + +.section .tbss,"awT",@nobits +.globl a +.zero 8 +a: +.zero 4 +b: diff --git a/lld/test/ELF/loongarch-tls-ld.s b/lld/test/ELF/loongarch-tls-ld.s new file mode 100644 index 0000000000000000000000000000000000000000..a5be3ad905b764411d3c179d8bc1b7a018c0327b --- /dev/null +++ b/lld/test/ELF/loongarch-tls-ld.s @@ -0,0 +1,89 @@ +# REQUIRES: loongarch +# RUN: rm -rf %t && split-file %s %t + +## LoongArch psABI doesn't specify TLS relaxation. Though the code sequences are not +## relaxed, dynamic relocations can be omitted for LD->LE relaxation. + +# RUN: llvm-mc --filetype=obj --triple=loongarch32 --position-independent %t/a.s -o %t/a.32.o +# RUN: llvm-mc --filetype=obj --triple=loongarch32 %t/tga.s -o %t/tga.32.o +# RUN: llvm-mc --filetype=obj --triple=loongarch64 --position-independent %t/a.s -o %t/a.64.o +# RUN: llvm-mc --filetype=obj --triple=loongarch64 %t/tga.s -o %t/tga.64.o + +## LA32 LD +# RUN: ld.lld -shared %t/a.32.o -o %t/ld.32.so +# RUN: llvm-readobj -r %t/ld.32.so | FileCheck --check-prefix=LD32-REL %s +# RUN: llvm-readelf -x .got %t/ld.32.so | FileCheck --check-prefix=LD32-GOT %s +# RUN: llvm-objdump -d --no-show-raw-insn %t/ld.32.so | FileCheck --check-prefixes=LD32 %s + +## LA32 LD -> LE +# RUN: ld.lld %t/a.32.o %t/tga.32.o -o %t/le.32 +# RUN: llvm-readelf -r %t/le.32 | FileCheck --check-prefix=NOREL %s +# RUN: llvm-readelf -x .got %t/le.32 | FileCheck --check-prefix=LE32-GOT %s +# RUN: llvm-objdump -d --no-show-raw-insn %t/le.32 | FileCheck --check-prefixes=LE32 %s + +## LA64 LD +# RUN: ld.lld -shared %t/a.64.o -o %t/ld.64.so +# RUN: llvm-readobj -r %t/ld.64.so | FileCheck --check-prefix=LD64-REL %s +# RUN: llvm-readelf -x .got %t/ld.64.so | FileCheck --check-prefix=LD64-GOT %s +# RUN: llvm-objdump -d --no-show-raw-insn %t/ld.64.so | FileCheck --check-prefixes=LD64 %s + +## LA64 LD -> LE +# RUN: ld.lld %t/a.64.o %t/tga.64.o -o %t/le.64 +# RUN: llvm-readelf -r %t/le.64 | FileCheck --check-prefix=NOREL %s +# RUN: llvm-readelf -x .got %t/le.64 | FileCheck --check-prefix=LE64-GOT %s +# RUN: llvm-objdump -d --no-show-raw-insn %t/le.64 | FileCheck --check-prefixes=LE64 %s + +## a@dtprel = st_value(a) = 0 is a link-time constant. +# LD32-REL: .rela.dyn { +# LD32-REL-NEXT: 0x20280 R_LARCH_TLS_DTPMOD32 - 0x0 +# LD32-REL-NEXT: } +# LD32-GOT: section '.got': +# LD32-GOT-NEXT: 0x00020280 00000000 00000000 + +# LD64-REL: .rela.dyn { +# LD64-REL-NEXT: 0x20400 R_LARCH_TLS_DTPMOD64 - 0x0 +# LD64-REL-NEXT: } +# LD64-GOT: section '.got': +# LD64-GOT-NEXT: 0x00020400 00000000 00000000 00000000 00000000 + +## LA32: &DTPMOD(a) - . = 0x20280 - 0x101cc: 0x10 pages, page offset 0x280 +# LD32: 101cc: pcalau12i $a0, 16 +# LD32-NEXT: addi.w $a0, $a0, 640 +# LD32-NEXT: bl 44 + +## LA64: &DTPMOD(a) - . = 0x20400 - 0x102e0: 0x10 pages, page offset 0x400 +# LD64: 102e0: pcalau12i $a0, 16 +# LD64-NEXT: addi.d $a0, $a0, 1024 +# LD64-NEXT: bl 40 + +# NOREL: no relocations + +## a is local - its DTPMOD/DTPREL slots are link-time constants. +## a@dtpmod = 1 (main module) +# LE32-GOT: section '.got': +# LE32-GOT-NEXT: 0x00030120 01000000 00000000 + +# LE64-GOT: section '.got': +# LE64-GOT-NEXT: 0x000301d8 01000000 00000000 00000000 00000000 + +## LA32: DTPMOD(.LANCHOR0) - . = 0x30120 - 0x20114: 0x10 pages, page offset 0x120 +# LE32: 20114: pcalau12i $a0, 16 +# LE32-NEXT: addi.w $a0, $a0, 288 +# LE32-NEXT: bl 4 + +## LA64: DTPMOD(.LANCHOR0) - . = 0x301d8 - 0x201c8: 0x10 pages, page offset 0x1d8 +# LE64: 201c8: pcalau12i $a0, 16 +# LE64-NEXT: addi.d $a0, $a0, 472 +# LE64-NEXT: bl 4 + +#--- a.s +la.tls.ld $a0, .LANCHOR0 +bl %plt(__tls_get_addr) + +.section .tbss,"awT",@nobits +.set .LANCHOR0, . + 0 +.zero 8 + +#--- tga.s +.globl __tls_get_addr +__tls_get_addr: diff --git a/lld/test/ELF/loongarch-tls-le.s b/lld/test/ELF/loongarch-tls-le.s new file mode 100644 index 0000000000000000000000000000000000000000..a20d7d83bae3f221a9a710cf38749901477c75d1 --- /dev/null +++ b/lld/test/ELF/loongarch-tls-le.s @@ -0,0 +1,42 @@ +# REQUIRES: loongarch + +# RUN: llvm-mc --filetype=obj --triple=loongarch32 %s -o %t.32.o +# RUN: llvm-mc --filetype=obj --triple=loongarch64 %s -o %t.64.o + +# RUN: ld.lld %t.32.o -o %t.32 +# RUN: llvm-nm -p %t.32 | FileCheck --check-prefixes=NM %s +# RUN: llvm-objdump -d --no-show-raw-insn %t.32 | FileCheck --check-prefixes=LE %s + +# RUN: ld.lld %t.64.o -o %t.64 +# RUN: llvm-objdump -d --no-show-raw-insn %t.64 | FileCheck --check-prefixes=LE %s + +# RUN: not ld.lld -shared %t.32.o -o /dev/null 2>&1 | FileCheck %s --check-prefix=ERR --implicit-check-not=error: + +# ERR: error: relocation R_LARCH_TLS_LE_HI20 against .LANCHOR0 cannot be used with -shared +# ERR: error: relocation R_LARCH_TLS_LE_LO12 against .LANCHOR0 cannot be used with -shared +# ERR: error: relocation R_LARCH_TLS_LE_HI20 against a cannot be used with -shared +# ERR: error: relocation R_LARCH_TLS_LE_LO12 against a cannot be used with -shared + +# NM: {{0*}}00000008 b .LANCHOR0 +# NM: {{0*}}00000800 B a + +## .LANCHOR0@tprel = 8 +## a@tprel = 0x800 +# LE: lu12i.w $a0, 0 +# LE-NEXT: ori $a0, $a0, 8 +# LE-NEXT: lu12i.w $a1, 0 +# LE-NEXT: ori $a1, $a1, 2048 +# LE-EMPTY: + +.text +_start: +la.tls.le $a0, .LANCHOR0 +la.tls.le $a1, a + +.section .tbss,"awT",@nobits +.space 8 +.LANCHOR0: +.space 0x800-8 +.globl a +a: +.zero 4 diff --git a/lld/test/lit.cfg.py b/lld/test/lit.cfg.py index 96a1d652573fec95f0caf8569a7f0e020ffb315b..1b1f1e781240b719012fa9cfc10d2df8b8eb20e9 100644 --- a/lld/test/lit.cfg.py +++ b/lld/test/lit.cfg.py @@ -68,6 +68,7 @@ llvm_config.feature_config( 'ARM': 'arm', 'AVR': 'avr', 'Hexagon': 'hexagon', + 'LoongArch': 'loongarch', 'Mips': 'mips', 'MSP430': 'msp430', 'PowerPC': 'ppc', diff --git a/lldb/include/lldb/Utility/ArchSpec.h b/lldb/include/lldb/Utility/ArchSpec.h index f67acedf11c2efb45e28fb35b5cb4a8141e59a02..8bb176fa85d3d24888402b19be8aa9ec0860b038 100644 --- a/lldb/include/lldb/Utility/ArchSpec.h +++ b/lldb/include/lldb/Utility/ArchSpec.h @@ -109,6 +109,12 @@ public: eRISCVSubType_riscv64, }; + enum LoongArchSubType { + eLoongArchSubType_unknown, + eLoongArchSubType_loongarch32, + eLoongArchSubType_loongarch64, + }; + enum Core { eCore_arm_generic, eCore_arm_armv4, @@ -205,6 +211,9 @@ public: eCore_riscv32, eCore_riscv64, + eCore_loongarch32, + eCore_loongarch64, + eCore_uknownMach32, eCore_uknownMach64, diff --git a/lldb/source/Host/common/HostInfoBase.cpp b/lldb/source/Host/common/HostInfoBase.cpp index 22c0403006e9da0360d4b567a5cfd461a096fddb..e06b627a67b13bbb980a4ece74c5945b98def8c8 100644 --- a/lldb/source/Host/common/HostInfoBase.cpp +++ b/lldb/source/Host/common/HostInfoBase.cpp @@ -339,6 +339,7 @@ void HostInfoBase::ComputeHostArchitectureSupport(ArchSpec &arch_32, case llvm::Triple::ppc64: case llvm::Triple::ppc64le: case llvm::Triple::x86_64: + case llvm::Triple::loongarch64: arch_64.SetTriple(triple); arch_32.SetTriple(triple.get32BitArchVariant()); break; diff --git a/lldb/source/Host/common/NativeProcessProtocol.cpp b/lldb/source/Host/common/NativeProcessProtocol.cpp index be521a31cb3778bdf517e89f786a38650617a631..69b04ee55ba297550c928e7f6546f72e89c42ac6 100644 --- a/lldb/source/Host/common/NativeProcessProtocol.cpp +++ b/lldb/source/Host/common/NativeProcessProtocol.cpp @@ -505,6 +505,8 @@ NativeProcessProtocol::GetSoftwareBreakpointTrapOpcode(size_t size_hint) { static const uint8_t g_s390x_opcode[] = {0x00, 0x01}; static const uint8_t g_ppc_opcode[] = {0x7f, 0xe0, 0x00, 0x08}; // trap static const uint8_t g_ppcle_opcode[] = {0x08, 0x00, 0xe0, 0x7f}; // trap + static const uint8_t g_loongarch_opcode[] = {0x05, 0x00, 0x2a, + 0x00}; // break 0x5 switch (GetArchitecture().GetMachine()) { case llvm::Triple::aarch64: @@ -533,6 +535,10 @@ NativeProcessProtocol::GetSoftwareBreakpointTrapOpcode(size_t size_hint) { case llvm::Triple::ppc64le: return llvm::makeArrayRef(g_ppcle_opcode); + case llvm::Triple::loongarch32: + case llvm::Triple::loongarch64: + return llvm::makeArrayRef(g_loongarch_opcode); + default: return llvm::createStringError(llvm::inconvertibleErrorCode(), "CPU type not supported!"); @@ -557,6 +563,8 @@ size_t NativeProcessProtocol::GetSoftwareBreakpointPCOffset() { case llvm::Triple::ppc: case llvm::Triple::ppc64: case llvm::Triple::ppc64le: + case llvm::Triple::loongarch32: + case llvm::Triple::loongarch64: // On these architectures the PC doesn't get updated for breakpoint hits. return 0; diff --git a/lldb/source/Plugins/Instruction/CMakeLists.txt b/lldb/source/Plugins/Instruction/CMakeLists.txt index 89771e8f46d144f299a880da67427cf7a440ebf4..19973dd553ac62a1edb02c402db7b8f8a4928f79 100644 --- a/lldb/source/Plugins/Instruction/CMakeLists.txt +++ b/lldb/source/Plugins/Instruction/CMakeLists.txt @@ -1,5 +1,6 @@ add_subdirectory(ARM) add_subdirectory(ARM64) +add_subdirectory(LoongArch) add_subdirectory(MIPS) add_subdirectory(MIPS64) add_subdirectory(PPC64) diff --git a/lldb/source/Plugins/Instruction/LoongArch/CMakeLists.txt b/lldb/source/Plugins/Instruction/LoongArch/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..59802ee8fa9ad1f076b7aed38df69717d77fef2a --- /dev/null +++ b/lldb/source/Plugins/Instruction/LoongArch/CMakeLists.txt @@ -0,0 +1,11 @@ +add_lldb_library(lldbPluginInstructionLoongArch PLUGIN + EmulateInstructionLoongArch.cpp + + LINK_LIBS + lldbCore + lldbInterpreter + lldbPluginProcessUtility + lldbSymbol + LINK_COMPONENTS + Support + ) diff --git a/lldb/source/Plugins/Instruction/LoongArch/EmulateInstructionLoongArch.cpp b/lldb/source/Plugins/Instruction/LoongArch/EmulateInstructionLoongArch.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f99d560e76943b31fa1b43bfff651e2f0b2bdf4c --- /dev/null +++ b/lldb/source/Plugins/Instruction/LoongArch/EmulateInstructionLoongArch.cpp @@ -0,0 +1,545 @@ +//===---EmulateInstructionLoongArch.cpp------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include + +#include "EmulateInstructionLoongArch.h" +#include "Plugins/Process/Utility/InstructionUtils.h" +#include "Plugins/Process/Utility/RegisterInfoPOSIX_loongarch64.h" +#include "Plugins/Process/Utility/lldb-loongarch-register-enums.h" +#include "lldb/Core/Address.h" +#include "lldb/Core/PluginManager.h" +#include "lldb/Interpreter/OptionValueArray.h" +#include "lldb/Interpreter/OptionValueDictionary.h" +#include "lldb/Symbol/UnwindPlan.h" +#include "lldb/Utility/ArchSpec.h" +#include "lldb/Utility/LLDBLog.h" +#include "lldb/Utility/RegisterValue.h" +#include "lldb/Utility/Stream.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/MathExtras.h" + +using namespace lldb; +using namespace lldb_private; + +LLDB_PLUGIN_DEFINE_ADV(EmulateInstructionLoongArch, InstructionLoongArch) + +namespace lldb_private { + +EmulateInstructionLoongArch::Opcode * +EmulateInstructionLoongArch::GetOpcodeForInstruction(uint32_t inst) { + // TODO: Add the mask for other instruction. + static EmulateInstructionLoongArch::Opcode g_opcodes[] = { + {0xfc000000, 0x40000000, &EmulateInstructionLoongArch::EmulateBEQZ, + "beqz rj, offs21"}, + {0xfc000000, 0x44000000, &EmulateInstructionLoongArch::EmulateBNEZ, + "bnez rj, offs21"}, + {0xfc000300, 0x48000000, &EmulateInstructionLoongArch::EmulateBCEQZ, + "bceqz cj, offs21"}, + {0xfc000300, 0x48000100, &EmulateInstructionLoongArch::EmulateBCNEZ, + "bcnez cj, offs21"}, + {0xfc000000, 0x4c000000, &EmulateInstructionLoongArch::EmulateJIRL, + "jirl rd, rj, offs16"}, + {0xfc000000, 0x50000000, &EmulateInstructionLoongArch::EmulateB, + " b offs26"}, + {0xfc000000, 0x54000000, &EmulateInstructionLoongArch::EmulateBL, + "bl offs26"}, + {0xfc000000, 0x58000000, &EmulateInstructionLoongArch::EmulateBEQ, + "beq rj, rd, offs16"}, + {0xfc000000, 0x5c000000, &EmulateInstructionLoongArch::EmulateBNE, + "bne rj, rd, offs16"}, + {0xfc000000, 0x60000000, &EmulateInstructionLoongArch::EmulateBLT, + "blt rj, rd, offs16"}, + {0xfc000000, 0x64000000, &EmulateInstructionLoongArch::EmulateBGE, + "bge rj, rd, offs16"}, + {0xfc000000, 0x68000000, &EmulateInstructionLoongArch::EmulateBLTU, + "bltu rj, rd, offs16"}, + {0xfc000000, 0x6c000000, &EmulateInstructionLoongArch::EmulateBGEU, + "bgeu rj, rd, offs16"}, + {0x00000000, 0x00000000, &EmulateInstructionLoongArch::EmulateNonJMP, + "NonJMP"}}; + static const size_t num_loongarch_opcodes = llvm::array_lengthof(g_opcodes); + + for (size_t i = 0; i < num_loongarch_opcodes; ++i) + if ((g_opcodes[i].mask & inst) == g_opcodes[i].value) + return &g_opcodes[i]; + return nullptr; +} + +bool EmulateInstructionLoongArch::TestExecute(uint32_t inst) { + Opcode *opcode_data = GetOpcodeForInstruction(inst); + if (!opcode_data) + return false; + // Call the Emulate... function. + if (!(this->*opcode_data->callback)(inst)) + return false; + return true; +} + +bool EmulateInstructionLoongArch::EvaluateInstruction(uint32_t options) { + uint32_t inst_size = m_opcode.GetByteSize(); + uint32_t inst = m_opcode.GetOpcode32(); + bool increase_pc = options & eEmulateInstructionOptionAutoAdvancePC; + bool success = false; + + Opcode *opcode_data = GetOpcodeForInstruction(inst); + if (!opcode_data) + return false; + + lldb::addr_t old_pc = 0; + if (increase_pc) { + old_pc = ReadPC(&success); + if (!success) + return false; + } + + // Call the Emulate... function. + if (!(this->*opcode_data->callback)(inst)) + return false; + + if (increase_pc) { + lldb::addr_t new_pc = ReadPC(&success); + if (!success) + return false; + + if (new_pc == old_pc && !WritePC(old_pc + inst_size)) + return false; + } + return true; +} + +bool EmulateInstructionLoongArch::ReadInstruction() { + bool success = false; + m_addr = ReadPC(&success); + if (!success) { + m_addr = LLDB_INVALID_ADDRESS; + return false; + } + + Context ctx; + ctx.type = eContextReadOpcode; + ctx.SetNoArgs(); + uint32_t inst = (uint32_t)ReadMemoryUnsigned(ctx, m_addr, 4, 0, &success); + m_opcode.SetOpcode32(inst, GetByteOrder()); + + return true; +} + +lldb::addr_t EmulateInstructionLoongArch::ReadPC(bool *success) { + return ReadRegisterUnsigned(eRegisterKindGeneric, LLDB_REGNUM_GENERIC_PC, + LLDB_INVALID_ADDRESS, success); +} + +bool EmulateInstructionLoongArch::WritePC(lldb::addr_t pc) { + EmulateInstruction::Context ctx; + ctx.type = eContextAdvancePC; + ctx.SetNoArgs(); + return WriteRegisterUnsigned(ctx, eRegisterKindGeneric, + LLDB_REGNUM_GENERIC_PC, pc); +} + +bool EmulateInstructionLoongArch::GetRegisterInfo(lldb::RegisterKind reg_kind, + uint32_t reg_index, + RegisterInfo ®_info) { + if (reg_kind == eRegisterKindGeneric) { + switch (reg_index) { + case LLDB_REGNUM_GENERIC_PC: + reg_kind = eRegisterKindLLDB; + reg_index = gpr_pc_loongarch; + break; + case LLDB_REGNUM_GENERIC_SP: + reg_kind = eRegisterKindLLDB; + reg_index = gpr_sp_loongarch; + break; + case LLDB_REGNUM_GENERIC_FP: + reg_kind = eRegisterKindLLDB; + reg_index = gpr_fp_loongarch; + break; + case LLDB_REGNUM_GENERIC_RA: + reg_kind = eRegisterKindLLDB; + reg_index = gpr_ra_loongarch; + break; + // We may handle LLDB_REGNUM_GENERIC_ARGx when more instructions are + // supported. + default: + llvm_unreachable("unsupported register"); + } + } + + const RegisterInfo *array = + RegisterInfoPOSIX_loongarch64::GetRegisterInfoPtr(m_arch); + const uint32_t length = + RegisterInfoPOSIX_loongarch64::GetRegisterInfoCount(m_arch); + + if (reg_index >= length || reg_kind != eRegisterKindLLDB) + return false; + + reg_info = array[reg_index]; + return true; +} + +bool EmulateInstructionLoongArch::SetTargetTriple(const ArchSpec &arch) { + return SupportsThisArch(arch); +} + +bool EmulateInstructionLoongArch::TestEmulation( + Stream *out_stream, ArchSpec &arch, OptionValueDictionary *test_data) { + return false; +} + +void EmulateInstructionLoongArch::Initialize() { + PluginManager::RegisterPlugin(GetPluginNameStatic(), + GetPluginDescriptionStatic(), CreateInstance); +} + +void EmulateInstructionLoongArch::Terminate() { + PluginManager::UnregisterPlugin(CreateInstance); +} + +lldb_private::EmulateInstruction * +EmulateInstructionLoongArch::CreateInstance(const ArchSpec &arch, + InstructionType inst_type) { + if (EmulateInstructionLoongArch::SupportsThisInstructionType(inst_type) && + SupportsThisArch(arch)) + return new EmulateInstructionLoongArch(arch); + return nullptr; +} + +bool EmulateInstructionLoongArch::SupportsThisArch(const ArchSpec &arch) { + return arch.GetTriple().isLoongArch(); +} + +bool EmulateInstructionLoongArch::EmulateBEQZ(uint32_t inst) { + return IsLoongArch64() ? EmulateBEQZ64(inst) : false; +} + +bool EmulateInstructionLoongArch::EmulateBNEZ(uint32_t inst) { + return IsLoongArch64() ? EmulateBNEZ64(inst) : false; +} + +bool EmulateInstructionLoongArch::EmulateBCEQZ(uint32_t inst) { + return IsLoongArch64() ? EmulateBCEQZ64(inst) : false; +} + +bool EmulateInstructionLoongArch::EmulateBCNEZ(uint32_t inst) { + return IsLoongArch64() ? EmulateBCNEZ64(inst) : false; +} + +bool EmulateInstructionLoongArch::EmulateJIRL(uint32_t inst) { + return IsLoongArch64() ? EmulateJIRL64(inst) : false; +} + +bool EmulateInstructionLoongArch::EmulateB(uint32_t inst) { + return IsLoongArch64() ? EmulateB64(inst) : false; +} + +bool EmulateInstructionLoongArch::EmulateBL(uint32_t inst) { + return IsLoongArch64() ? EmulateBL64(inst) : false; +} + +bool EmulateInstructionLoongArch::EmulateBEQ(uint32_t inst) { + return IsLoongArch64() ? EmulateBEQ64(inst) : false; +} + +bool EmulateInstructionLoongArch::EmulateBNE(uint32_t inst) { + return IsLoongArch64() ? EmulateBNE64(inst) : false; +} + +bool EmulateInstructionLoongArch::EmulateBLT(uint32_t inst) { + return IsLoongArch64() ? EmulateBLT64(inst) : false; +} + +bool EmulateInstructionLoongArch::EmulateBGE(uint32_t inst) { + return IsLoongArch64() ? EmulateBGE64(inst) : false; +} + +bool EmulateInstructionLoongArch::EmulateBLTU(uint32_t inst) { + return IsLoongArch64() ? EmulateBLTU64(inst) : false; +} + +bool EmulateInstructionLoongArch::EmulateBGEU(uint32_t inst) { + return IsLoongArch64() ? EmulateBGEU64(inst) : false; +} + +bool EmulateInstructionLoongArch::EmulateNonJMP(uint32_t inst) { return false; } + +// beqz rj, offs21 +// if GR[rj] == 0: +// PC = PC + SignExtend({offs21, 2'b0}, GRLEN) +bool EmulateInstructionLoongArch::EmulateBEQZ64(uint32_t inst) { + bool success = false; + uint32_t rj = Bits32(inst, 9, 5); + uint64_t pc = ReadPC(&success); + if (!success) + return false; + uint32_t offs21 = Bits32(inst, 25, 10) + (Bits32(inst, 4, 0) << 16); + uint64_t rj_val = ReadRegisterUnsigned(eRegisterKindLLDB, rj, 0, &success); + if (!success) + return false; + if (rj_val == 0) { + uint64_t next_pc = pc + llvm::SignExtend64<23>(offs21 << 2); + return WritePC(next_pc); + } else + return WritePC(pc + 4); +} + +// bnez rj, offs21 +// if GR[rj] != 0: +// PC = PC + SignExtend({offs21, 2'b0}, GRLEN) +bool EmulateInstructionLoongArch::EmulateBNEZ64(uint32_t inst) { + bool success = false; + uint32_t rj = Bits32(inst, 9, 5); + uint64_t pc = ReadPC(&success); + if (!success) + return false; + uint32_t offs21 = Bits32(inst, 25, 10) + (Bits32(inst, 4, 0) << 16); + uint64_t rj_val = ReadRegisterUnsigned(eRegisterKindLLDB, rj, 0, &success); + if (!success) + return false; + if (rj_val != 0) { + uint64_t next_pc = pc + llvm::SignExtend64<23>(offs21 << 2); + return WritePC(next_pc); + } else + return WritePC(pc + 4); +} + +// bceqz cj, offs21 +// if CFR[cj] == 0: +// PC = PC + SignExtend({offs21, 2'b0}, GRLEN) +bool EmulateInstructionLoongArch::EmulateBCEQZ64(uint32_t inst) { + bool success = false; + uint32_t cj = Bits32(inst, 7, 5) + fpr_fcc0_loongarch; + uint64_t pc = ReadPC(&success); + if (!success) + return false; + uint32_t offs21 = Bits32(inst, 25, 10) + (Bits32(inst, 4, 0) << 16); + uint8_t cj_val = + (uint8_t)ReadRegisterUnsigned(eRegisterKindLLDB, cj, 0, &success); + if (!success) + return false; + if (cj_val == 0) { + uint64_t next_pc = pc + llvm::SignExtend64<23>(offs21 << 2); + return WritePC(next_pc); + } else + return WritePC(pc + 4); + return false; +} + +// bcnez cj, offs21 +// if CFR[cj] != 0: +// PC = PC + SignExtend({offs21, 2'b0}, GRLEN) +bool EmulateInstructionLoongArch::EmulateBCNEZ64(uint32_t inst) { + bool success = false; + uint32_t cj = Bits32(inst, 7, 5) + fpr_fcc0_loongarch; + uint64_t pc = ReadPC(&success); + if (!success) + return false; + uint32_t offs21 = Bits32(inst, 25, 10) + (Bits32(inst, 4, 0) << 16); + uint8_t cj_val = + (uint8_t)ReadRegisterUnsigned(eRegisterKindLLDB, cj, 0, &success); + if (!success) + return false; + if (cj_val != 0) { + uint64_t next_pc = pc + llvm::SignExtend64<23>(offs21 << 2); + return WritePC(next_pc); + } else + return WritePC(pc + 4); + return false; +} + +// jirl rd, rj, offs16 +// GR[rd] = PC + 4 +// PC = GR[rj] + SignExtend({offs16, 2'b0}, GRLEN) +bool EmulateInstructionLoongArch::EmulateJIRL64(uint32_t inst) { + uint32_t rj = Bits32(inst, 9, 5); + uint32_t rd = Bits32(inst, 4, 0); + bool success = false; + uint64_t pc = ReadPC(&success); + if (!success) + return false; + EmulateInstruction::Context ctx; + if (!WriteRegisterUnsigned(ctx, eRegisterKindLLDB, rd, pc + 4)) + return false; + uint64_t rj_val = ReadRegisterUnsigned(eRegisterKindLLDB, rj, 0, &success); + if (!success) + return false; + uint64_t next_pc = rj_val + llvm::SignExtend64<18>(Bits32(inst, 25, 10) << 2); + return WritePC(next_pc); +} + +// b offs26 +// PC = PC + SignExtend({offs26, 2' b0}, GRLEN) +bool EmulateInstructionLoongArch::EmulateB64(uint32_t inst) { + bool success = false; + uint64_t pc = ReadPC(&success); + if (!success) + return false; + uint32_t offs26 = Bits32(inst, 25, 10) + (Bits32(inst, 9, 0) << 16); + uint64_t next_pc = pc + llvm::SignExtend64<28>(offs26 << 2); + return WritePC(next_pc); +} + +// bl offs26 +// GR[1] = PC + 4 +// PC = PC + SignExtend({offs26, 2'b0}, GRLEN) +bool EmulateInstructionLoongArch::EmulateBL64(uint32_t inst) { + bool success = false; + uint64_t pc = ReadPC(&success); + if (!success) + return false; + EmulateInstruction::Context ctx; + if (!WriteRegisterUnsigned(ctx, eRegisterKindLLDB, gpr_r1_loongarch, pc + 4)) + return false; + uint32_t offs26 = Bits32(inst, 25, 10) + (Bits32(inst, 9, 0) << 16); + uint64_t next_pc = pc + llvm::SignExtend64<28>(offs26 << 2); + return WritePC(next_pc); +} + +// beq rj, rd, offs16 +// if GR[rj] == GR[rd]: +// PC = PC + SignExtend({offs16, 2'b0}, GRLEN) +bool EmulateInstructionLoongArch::EmulateBEQ64(uint32_t inst) { + bool success = false; + uint32_t rj = Bits32(inst, 9, 5); + uint32_t rd = Bits32(inst, 4, 0); + uint64_t pc = ReadPC(&success); + if (!success) + return false; + uint64_t rj_val = ReadRegisterUnsigned(eRegisterKindLLDB, rj, 0, &success); + if (!success) + return false; + uint64_t rd_val = ReadRegisterUnsigned(eRegisterKindLLDB, rd, 0, &success); + if (!success) + return false; + if (rj_val == rd_val) { + uint64_t next_pc = pc + llvm::SignExtend64<18>(Bits32(inst, 25, 10) << 2); + return WritePC(next_pc); + } else + return WritePC(pc + 4); +} + +// bne rj, rd, offs16 +// if GR[rj] != GR[rd]: +// PC = PC + SignExtend({offs16, 2'b0}, GRLEN) +bool EmulateInstructionLoongArch::EmulateBNE64(uint32_t inst) { + bool success = false; + uint32_t rj = Bits32(inst, 9, 5); + uint32_t rd = Bits32(inst, 4, 0); + uint64_t pc = ReadPC(&success); + if (!success) + return false; + uint64_t rj_val = ReadRegisterUnsigned(eRegisterKindLLDB, rj, 0, &success); + if (!success) + return false; + uint64_t rd_val = ReadRegisterUnsigned(eRegisterKindLLDB, rd, 0, &success); + if (!success) + return false; + if (rj_val != rd_val) { + uint64_t next_pc = pc + llvm::SignExtend64<18>(Bits32(inst, 25, 10) << 2); + return WritePC(next_pc); + } else + return WritePC(pc + 4); +} + +// blt rj, rd, offs16 +// if signed(GR[rj]) < signed(GR[rd]): +// PC = PC + SignExtend({offs16, 2'b0}, GRLEN) +bool EmulateInstructionLoongArch::EmulateBLT64(uint32_t inst) { + bool success = false; + uint32_t rj = Bits32(inst, 9, 5); + uint32_t rd = Bits32(inst, 4, 0); + uint64_t pc = ReadPC(&success); + if (!success) + return false; + int64_t rj_val = + (int64_t)ReadRegisterUnsigned(eRegisterKindLLDB, rj, 0, &success); + if (!success) + return false; + int64_t rd_val = + (int64_t)ReadRegisterUnsigned(eRegisterKindLLDB, rd, 0, &success); + if (!success) + return false; + if (rj_val < rd_val) { + uint64_t next_pc = pc + llvm::SignExtend64<18>(Bits32(inst, 25, 10) << 2); + return WritePC(next_pc); + } else + return WritePC(pc + 4); +} + +// bge rj, rd, offs16 +// if signed(GR[rj]) >= signed(GR[rd]): +// PC = PC + SignExtend({offs16, 2'b0}, GRLEN) +bool EmulateInstructionLoongArch::EmulateBGE64(uint32_t inst) { + bool success = false; + uint32_t rj = Bits32(inst, 9, 5); + uint32_t rd = Bits32(inst, 4, 0); + uint64_t pc = ReadPC(&success); + if (!success) + return false; + int64_t rj_val = + (int64_t)ReadRegisterUnsigned(eRegisterKindLLDB, rj, 0, &success); + if (!success) + return false; + int64_t rd_val = + (int64_t)ReadRegisterUnsigned(eRegisterKindLLDB, rd, 0, &success); + if (!success) + return false; + if (rj_val >= rd_val) { + uint64_t next_pc = pc + llvm::SignExtend64<18>(Bits32(inst, 25, 10) << 2); + return WritePC(next_pc); + } else + return WritePC(pc + 4); +} + +// bltu rj, rd, offs16 +// if unsigned(GR[rj]) < unsigned(GR[rd]): +// PC = PC + SignExtend({offs16, 2'b0}, GRLEN) +bool EmulateInstructionLoongArch::EmulateBLTU64(uint32_t inst) { + bool success = false; + uint32_t rj = Bits32(inst, 9, 5); + uint32_t rd = Bits32(inst, 4, 0); + uint64_t pc = ReadPC(&success); + if (!success) + return false; + uint64_t rj_val = ReadRegisterUnsigned(eRegisterKindLLDB, rj, 0, &success); + if (!success) + return false; + uint64_t rd_val = ReadRegisterUnsigned(eRegisterKindLLDB, rd, 0, &success); + if (!success) + return false; + if (rj_val < rd_val) { + uint64_t next_pc = pc + llvm::SignExtend64<18>(Bits32(inst, 25, 10) << 2); + return WritePC(next_pc); + } else + return WritePC(pc + 4); +} + +// bgeu rj, rd, offs16 +// if unsigned(GR[rj]) >= unsigned(GR[rd]): +// PC = PC + SignExtend({offs16, 2'b0}, GRLEN) +bool EmulateInstructionLoongArch::EmulateBGEU64(uint32_t inst) { + bool success = false; + uint32_t rj = Bits32(inst, 9, 5); + uint32_t rd = Bits32(inst, 4, 0); + uint64_t pc = ReadPC(&success); + if (!success) + return false; + uint64_t rj_val = ReadRegisterUnsigned(eRegisterKindLLDB, rj, 0, &success); + if (!success) + return false; + uint64_t rd_val = ReadRegisterUnsigned(eRegisterKindLLDB, rd, 0, &success); + if (!success) + return false; + if (rj_val >= rd_val) { + uint64_t next_pc = pc + llvm::SignExtend64<18>(Bits32(inst, 25, 10) << 2); + return WritePC(next_pc); + } else + return WritePC(pc + 4); +} + +} // namespace lldb_private diff --git a/lldb/source/Plugins/Instruction/LoongArch/EmulateInstructionLoongArch.h b/lldb/source/Plugins/Instruction/LoongArch/EmulateInstructionLoongArch.h new file mode 100644 index 0000000000000000000000000000000000000000..735de468dab94905be64c40c455eff7a93ee0599 --- /dev/null +++ b/lldb/source/Plugins/Instruction/LoongArch/EmulateInstructionLoongArch.h @@ -0,0 +1,108 @@ +//===---EmulateInstructionLoongArch.h--------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLDB_SOURCE_PLUGINS_INSTRUCTION_LOONGARCH_EMULATEINSTRUCTIONLOONGARCH_H +#define LLDB_SOURCE_PLUGINS_INSTRUCTION_LOONGARCH_EMULATEINSTRUCTIONLOONGARCH_H + +#include "lldb/Core/EmulateInstruction.h" +#include "lldb/Interpreter/OptionValue.h" +#include "lldb/Utility/Log.h" +#include "lldb/Utility/Status.h" + +namespace lldb_private { + +class EmulateInstructionLoongArch : public EmulateInstruction { +public: + static llvm::StringRef GetPluginNameStatic() { return "LoongArch"; } + + static llvm::StringRef GetPluginDescriptionStatic() { + return "Emulate instructions for the LoongArch architecture."; + } + + static bool SupportsThisInstructionType(InstructionType inst_type) { + return inst_type == eInstructionTypePCModifying; + } + + static bool SupportsThisArch(const ArchSpec &arch); + + static lldb_private::EmulateInstruction * + CreateInstance(const lldb_private::ArchSpec &arch, InstructionType inst_type); + + static void Initialize(); + + static void Terminate(); + +public: + EmulateInstructionLoongArch(const ArchSpec &arch) : EmulateInstruction(arch) { + m_arch_subtype = arch.GetMachine(); + } + + llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); } + + bool SupportsEmulatingInstructionsOfType(InstructionType inst_type) override { + return SupportsThisInstructionType(inst_type); + } + + bool SetTargetTriple(const ArchSpec &arch) override; + bool ReadInstruction() override; + bool EvaluateInstruction(uint32_t options) override; + bool TestEmulation(Stream *out_stream, ArchSpec &arch, + OptionValueDictionary *test_data) override; + + bool GetRegisterInfo(lldb::RegisterKind reg_kind, uint32_t reg_num, + lldb_private::RegisterInfo ®_info) override; + + lldb::addr_t ReadPC(bool *success); + bool WritePC(lldb::addr_t pc); + bool IsLoongArch64() { return m_arch_subtype == llvm::Triple::loongarch64; } + bool TestExecute(uint32_t inst); + +private: + struct Opcode { + uint32_t mask; + uint32_t value; + bool (EmulateInstructionLoongArch::*callback)(uint32_t opcode); + const char *name; + }; + + llvm::Triple::ArchType m_arch_subtype; + Opcode *GetOpcodeForInstruction(uint32_t inst); + + bool EmulateBEQZ(uint32_t inst); + bool EmulateBNEZ(uint32_t inst); + bool EmulateBCEQZ(uint32_t inst); + bool EmulateBCNEZ(uint32_t inst); + bool EmulateJIRL(uint32_t inst); + bool EmulateB(uint32_t inst); + bool EmulateBL(uint32_t inst); + bool EmulateBEQ(uint32_t inst); + bool EmulateBNE(uint32_t inst); + bool EmulateBLT(uint32_t inst); + bool EmulateBGE(uint32_t inst); + bool EmulateBLTU(uint32_t inst); + bool EmulateBGEU(uint32_t inst); + bool EmulateNonJMP(uint32_t inst); + + bool EmulateBEQZ64(uint32_t inst); + bool EmulateBNEZ64(uint32_t inst); + bool EmulateBCEQZ64(uint32_t inst); + bool EmulateBCNEZ64(uint32_t inst); + bool EmulateJIRL64(uint32_t inst); + bool EmulateB64(uint32_t inst); + bool EmulateBL64(uint32_t inst); + bool EmulateBEQ64(uint32_t inst); + bool EmulateBNE64(uint32_t inst); + bool EmulateBLT64(uint32_t inst); + bool EmulateBGE64(uint32_t inst); + bool EmulateBLTU64(uint32_t inst); + bool EmulateBGEU64(uint32_t inst); +}; + +} // namespace lldb_private + +#endif // LLDB_SOURCE_PLUGINS_INSTRUCTION_LOONGARCH_EMULATEINSTRUCTIONLOONGARCH_H diff --git a/lldb/source/Plugins/ObjectFile/ELF/ELFHeader.cpp b/lldb/source/Plugins/ObjectFile/ELF/ELFHeader.cpp index abda0cd0e9a3840f265a51af4dc6636692de4489..a6e385f70709bf8c71d479590929bc9f598dbf3f 100644 --- a/lldb/source/Plugins/ObjectFile/ELF/ELFHeader.cpp +++ b/lldb/source/Plugins/ObjectFile/ELF/ELFHeader.cpp @@ -211,6 +211,9 @@ unsigned ELFHeader::GetRelocationJumpSlotType() const { case EM_RISCV: slot = R_RISCV_JUMP_SLOT; break; + case EM_LOONGARCH: + slot = R_LARCH_JUMP_SLOT; + break; } return slot; diff --git a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp index 122298d87bf8dbd11c027375712dd9748ef6c944..f81b6e78cb8e25fddcdad06e5dab1d9c416d4270 100644 --- a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp +++ b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp @@ -318,6 +318,18 @@ static uint32_t ppc64VariantFromElfFlags(const elf::ELFHeader &header) { return ArchSpec::eCore_ppc64_generic; } +static uint32_t loongarchVariantFromElfFlags(const elf::ELFHeader &header) { + uint32_t fileclass = header.e_ident[EI_CLASS]; + switch (fileclass) { + case llvm::ELF::ELFCLASS32: + return ArchSpec::eLoongArchSubType_loongarch32; + case llvm::ELF::ELFCLASS64: + return ArchSpec::eLoongArchSubType_loongarch64; + default: + return ArchSpec::eLoongArchSubType_unknown; + } +} + static uint32_t subTypeFromElfHeader(const elf::ELFHeader &header) { if (header.e_machine == llvm::ELF::EM_MIPS) return mipsVariantFromElfFlags(header); @@ -325,6 +337,8 @@ static uint32_t subTypeFromElfHeader(const elf::ELFHeader &header) { return ppc64VariantFromElfFlags(header); else if (header.e_machine == llvm::ELF::EM_RISCV) return riscvVariantFromElfFlags(header); + else if (header.e_machine == llvm::ELF::EM_LOONGARCH) + return loongarchVariantFromElfFlags(header); return LLDB_INVALID_CPUTYPE; } @@ -2593,6 +2607,50 @@ ObjectFileELF::ParseTrampolineSymbols(Symtab *symbol_table, user_id_t start_id, rel_data, symtab_data, strtab_data); } +static void ApplyELF64ABS64Relocation(Symtab *symtab, ELFRelocation &rel, + DataExtractor &debug_data, + Section *rel_section) { + Symbol *symbol = symtab->FindSymbolByID(ELFRelocation::RelocSymbol64(rel)); + if (symbol) { + addr_t value = symbol->GetAddressRef().GetFileAddress(); + DataBufferSP &data_buffer_sp = debug_data.GetSharedDataBuffer(); + // ObjectFileELF creates a WritableDataBuffer in CreateInstance. + WritableDataBuffer *data_buffer = + llvm::cast(data_buffer_sp.get()); + uint64_t *dst = reinterpret_cast( + data_buffer->GetBytes() + rel_section->GetFileOffset() + + ELFRelocation::RelocOffset64(rel)); + uint64_t val_offset = value + ELFRelocation::RelocAddend64(rel); + memcpy(dst, &val_offset, sizeof(uint64_t)); + } +} + +static void ApplyELF64ABS32Relocation(Symtab *symtab, ELFRelocation &rel, + DataExtractor &debug_data, + Section *rel_section, bool is_signed) { + Symbol *symbol = symtab->FindSymbolByID(ELFRelocation::RelocSymbol64(rel)); + if (symbol) { + addr_t value = symbol->GetAddressRef().GetFileAddress(); + value += ELFRelocation::RelocAddend32(rel); + if ((!is_signed && (value > UINT32_MAX)) || + (is_signed && + ((int64_t)value > INT32_MAX || (int64_t)value < INT32_MIN))) { + Log *log = GetLog(LLDBLog::Modules); + LLDB_LOGF(log, "Failed to apply debug info relocations"); + return; + } + uint32_t truncated_addr = (value & 0xFFFFFFFF); + DataBufferSP &data_buffer_sp = debug_data.GetSharedDataBuffer(); + // ObjectFileELF creates a WritableDataBuffer in CreateInstance. + WritableDataBuffer *data_buffer = + llvm::cast(data_buffer_sp.get()); + uint32_t *dst = reinterpret_cast( + data_buffer->GetBytes() + rel_section->GetFileOffset() + + ELFRelocation::RelocOffset32(rel)); + memcpy(dst, &truncated_addr, sizeof(uint32_t)); + } +} + unsigned ObjectFileELF::ApplyRelocations( Symtab *symtab, const ELFHeader *hdr, const ELFSectionHeader *rel_hdr, const ELFSectionHeader *symtab_hdr, const ELFSectionHeader *debug_hdr, @@ -2635,55 +2693,50 @@ unsigned ObjectFileELF::ApplyRelocations( assert(false && "unexpected relocation type"); } } else { - switch (reloc_type(rel)) { - case R_AARCH64_ABS64: - case R_X86_64_64: { - symbol = symtab->FindSymbolByID(reloc_symbol(rel)); - if (symbol) { - addr_t value = symbol->GetAddressRef().GetFileAddress(); - DataBufferSP &data_buffer_sp = debug_data.GetSharedDataBuffer(); - // ObjectFileELF creates a WritableDataBuffer in CreateInstance. - WritableDataBuffer *data_buffer = - llvm::cast(data_buffer_sp.get()); - uint64_t *dst = reinterpret_cast( - data_buffer->GetBytes() + rel_section->GetFileOffset() + - ELFRelocation::RelocOffset64(rel)); - uint64_t val_offset = value + ELFRelocation::RelocAddend64(rel); - memcpy(dst, &val_offset, sizeof(uint64_t)); + switch (hdr->e_machine) { + case llvm::ELF::EM_AARCH64: + switch (reloc_type(rel)) { + case R_AARCH64_ABS64: + ApplyELF64ABS64Relocation(symtab, rel, debug_data, rel_section); + break; + case R_AARCH64_ABS32: + ApplyELF64ABS32Relocation(symtab, rel, debug_data, rel_section, true); + break; + default: + assert(false && "unexpected relocation type"); } break; - } - case R_X86_64_32: - case R_X86_64_32S: - case R_AARCH64_ABS32: { - symbol = symtab->FindSymbolByID(reloc_symbol(rel)); - if (symbol) { - addr_t value = symbol->GetAddressRef().GetFileAddress(); - value += ELFRelocation::RelocAddend32(rel); - if ((reloc_type(rel) == R_X86_64_32 && (value > UINT32_MAX)) || - (reloc_type(rel) == R_X86_64_32S && - ((int64_t)value > INT32_MAX && (int64_t)value < INT32_MIN)) || - (reloc_type(rel) == R_AARCH64_ABS32 && - ((int64_t)value > INT32_MAX && (int64_t)value < INT32_MIN))) { - Log *log = GetLog(LLDBLog::Modules); - LLDB_LOGF(log, "Failed to apply debug info relocations"); - break; - } - uint32_t truncated_addr = (value & 0xFFFFFFFF); - DataBufferSP &data_buffer_sp = debug_data.GetSharedDataBuffer(); - // ObjectFileELF creates a WritableDataBuffer in CreateInstance. - WritableDataBuffer *data_buffer = - llvm::cast(data_buffer_sp.get()); - uint32_t *dst = reinterpret_cast( - data_buffer->GetBytes() + rel_section->GetFileOffset() + - ELFRelocation::RelocOffset32(rel)); - memcpy(dst, &truncated_addr, sizeof(uint32_t)); + case llvm::ELF::EM_LOONGARCH: + switch (reloc_type(rel)) { + case R_LARCH_64: + ApplyELF64ABS64Relocation(symtab, rel, debug_data, rel_section); + break; + case R_LARCH_32: + ApplyELF64ABS32Relocation(symtab, rel, debug_data, rel_section, true); + break; + default: + assert(false && "unexpected relocation type"); + } + break; + case llvm::ELF::EM_X86_64: + switch (reloc_type(rel)) { + case R_X86_64_64: + ApplyELF64ABS64Relocation(symtab, rel, debug_data, rel_section); + break; + case R_X86_64_32: + ApplyELF64ABS32Relocation(symtab, rel, debug_data, rel_section, + false); + break; + case R_X86_64_32S: + ApplyELF64ABS32Relocation(symtab, rel, debug_data, rel_section, true); + break; + case R_X86_64_PC32: + default: + assert(false && "unexpected relocation type"); } break; - } - case R_X86_64_PC32: default: - assert(false && "unexpected relocation type"); + assert(false && "unsupported machine"); } } } diff --git a/lldb/source/Plugins/Process/Linux/CMakeLists.txt b/lldb/source/Plugins/Process/Linux/CMakeLists.txt index 36d5037b048622b01ce3ae2531597ef4597b39f0..7e7c777ec89217216f533788f1dde92de456ddd5 100644 --- a/lldb/source/Plugins/Process/Linux/CMakeLists.txt +++ b/lldb/source/Plugins/Process/Linux/CMakeLists.txt @@ -8,6 +8,7 @@ add_lldb_library(lldbPluginProcessLinux NativeRegisterContextLinux.cpp NativeRegisterContextLinux_arm.cpp NativeRegisterContextLinux_arm64.cpp + NativeRegisterContextLinux_loongarch64.cpp NativeRegisterContextLinux_ppc64le.cpp NativeRegisterContextLinux_s390x.cpp NativeRegisterContextLinux_x86_64.cpp diff --git a/lldb/source/Plugins/Process/Linux/NativeProcessLinux.cpp b/lldb/source/Plugins/Process/Linux/NativeProcessLinux.cpp index abee8dbebe759e8de254c5ab37368c24b72d0b84..a1f588dc5e9f39a07b6ac39540da49605180038f 100644 --- a/lldb/source/Plugins/Process/Linux/NativeProcessLinux.cpp +++ b/lldb/source/Plugins/Process/Linux/NativeProcessLinux.cpp @@ -889,7 +889,8 @@ bool NativeProcessLinux::MonitorClone(NativeThreadLinux &parent, } bool NativeProcessLinux::SupportHardwareSingleStepping() const { - if (m_arch.GetMachine() == llvm::Triple::arm || m_arch.IsMIPS()) + if (m_arch.GetMachine() == llvm::Triple::arm || m_arch.IsMIPS() || + m_arch.GetTriple().isLoongArch()) return false; return true; } diff --git a/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_loongarch64.cpp b/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_loongarch64.cpp new file mode 100644 index 0000000000000000000000000000000000000000..35be4216021a885fad19be2a32903e8079bfd879 --- /dev/null +++ b/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_loongarch64.cpp @@ -0,0 +1,333 @@ +//===-- NativeRegisterContextLinux_loongarch64.cpp ------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#if defined(__loongarch__) && __loongarch_grlen == 64 + +#include "NativeRegisterContextLinux_loongarch64.h" + +#include "lldb/Host/HostInfo.h" +#include "lldb/Utility/DataBufferHeap.h" +#include "lldb/Utility/Log.h" +#include "lldb/Utility/RegisterValue.h" +#include "lldb/Utility/Status.h" + +#include "Plugins/Process/Linux/NativeProcessLinux.h" +#include "Plugins/Process/Linux/Procfs.h" +#include "Plugins/Process/Utility/RegisterInfoPOSIX_loongarch64.h" +#include "Plugins/Process/Utility/lldb-loongarch-register-enums.h" + +// NT_PRSTATUS and NT_FPREGSET definition +#include +// struct iovec definition +#include + +#define REG_CONTEXT_SIZE (GetGPRSize() + GetFPRSize()) + +using namespace lldb; +using namespace lldb_private; +using namespace lldb_private::process_linux; + +std::unique_ptr +NativeRegisterContextLinux::CreateHostNativeRegisterContextLinux( + const ArchSpec &target_arch, NativeThreadLinux &native_thread) { + switch (target_arch.GetMachine()) { + case llvm::Triple::loongarch64: { + Flags opt_regsets; + auto register_info_up = std::make_unique( + target_arch, opt_regsets); + return std::make_unique( + target_arch, native_thread, std::move(register_info_up)); + } + default: + llvm_unreachable("have no register context for architecture"); + } +} + +NativeRegisterContextLinux_loongarch64::NativeRegisterContextLinux_loongarch64( + const ArchSpec &target_arch, NativeThreadProtocol &native_thread, + std::unique_ptr register_info_up) + : NativeRegisterContextRegisterInfo(native_thread, + register_info_up.release()), + NativeRegisterContextLinux(native_thread) { + ::memset(&m_fpr, 0, sizeof(m_fpr)); + ::memset(&m_gpr, 0, sizeof(m_gpr)); + + m_gpr_is_valid = false; + m_fpu_is_valid = false; +} + +const RegisterInfoPOSIX_loongarch64 & +NativeRegisterContextLinux_loongarch64::GetRegisterInfo() const { + return static_cast( + NativeRegisterContextRegisterInfo::GetRegisterInfoInterface()); +} + +uint32_t NativeRegisterContextLinux_loongarch64::GetRegisterSetCount() const { + return GetRegisterInfo().GetRegisterSetCount(); +} + +const RegisterSet *NativeRegisterContextLinux_loongarch64::GetRegisterSet( + uint32_t set_index) const { + return GetRegisterInfo().GetRegisterSet(set_index); +} + +uint32_t NativeRegisterContextLinux_loongarch64::GetUserRegisterCount() const { + uint32_t count = 0; + for (uint32_t set_index = 0; set_index < GetRegisterSetCount(); ++set_index) + count += GetRegisterSet(set_index)->num_registers; + return count; +} + +Status NativeRegisterContextLinux_loongarch64::ReadRegister( + const RegisterInfo *reg_info, RegisterValue ®_value) { + Status error; + + if (!reg_info) { + error.SetErrorString("reg_info NULL"); + return error; + } + + const uint32_t reg = reg_info->kinds[lldb::eRegisterKindLLDB]; + + if (reg == LLDB_INVALID_REGNUM) + return Status("no lldb regnum for %s", reg_info && reg_info->name + ? reg_info->name + : ""); + + uint8_t *src = nullptr; + uint32_t offset = LLDB_INVALID_INDEX32; + + if (IsGPR(reg)) { + error = ReadGPR(); + if (error.Fail()) + return error; + + offset = reg_info->byte_offset; + assert(offset < GetGPRSize()); + src = (uint8_t *)GetGPRBuffer() + offset; + + } else if (IsFPR(reg)) { + error = ReadFPR(); + if (error.Fail()) + return error; + + offset = CalculateFprOffset(reg_info); + assert(offset < GetFPRSize()); + src = (uint8_t *)GetFPRBuffer() + offset; + } else + return Status("failed - register wasn't recognized to be a GPR or an FPR, " + "write strategy unknown"); + + reg_value.SetFromMemoryData(reg_info, src, reg_info->byte_size, + eByteOrderLittle, error); + + return error; +} + +Status NativeRegisterContextLinux_loongarch64::WriteRegister( + const RegisterInfo *reg_info, const RegisterValue ®_value) { + Status error; + + if (!reg_info) + return Status("reg_info NULL"); + + const uint32_t reg = reg_info->kinds[lldb::eRegisterKindLLDB]; + + if (reg == LLDB_INVALID_REGNUM) + return Status("no lldb regnum for %s", reg_info->name != nullptr + ? reg_info->name + : ""); + + uint8_t *dst = nullptr; + uint32_t offset = LLDB_INVALID_INDEX32; + + if (IsGPR(reg)) { + error = ReadGPR(); + if (error.Fail()) + return error; + + assert(reg_info->byte_offset < GetGPRSize()); + dst = (uint8_t *)GetGPRBuffer() + reg_info->byte_offset; + ::memcpy(dst, reg_value.GetBytes(), reg_info->byte_size); + + return WriteGPR(); + } else if (IsFPR(reg)) { + error = ReadFPR(); + if (error.Fail()) + return error; + + offset = CalculateFprOffset(reg_info); + assert(offset < GetFPRSize()); + dst = (uint8_t *)GetFPRBuffer() + offset; + ::memcpy(dst, reg_value.GetBytes(), reg_info->byte_size); + + return WriteFPR(); + } + + return Status("Failed to write register value"); +} + +Status NativeRegisterContextLinux_loongarch64::ReadAllRegisterValues( + lldb::WritableDataBufferSP &data_sp) { + Status error; + + data_sp.reset(new DataBufferHeap(REG_CONTEXT_SIZE, 0)); + + error = ReadGPR(); + if (error.Fail()) + return error; + + error = ReadFPR(); + if (error.Fail()) + return error; + + uint8_t *dst = data_sp->GetBytes(); + ::memcpy(dst, GetGPRBuffer(), GetGPRSize()); + dst += GetGPRSize(); + ::memcpy(dst, GetFPRBuffer(), GetFPRSize()); + + return error; +} + +Status NativeRegisterContextLinux_loongarch64::WriteAllRegisterValues( + const lldb::DataBufferSP &data_sp) { + Status error; + + if (!data_sp) { + error.SetErrorStringWithFormat( + "NativeRegisterContextLinux_loongarch64::%s invalid data_sp provided", + __FUNCTION__); + return error; + } + + if (data_sp->GetByteSize() != REG_CONTEXT_SIZE) { + error.SetErrorStringWithFormat( + "NativeRegisterContextLinux_loongarch64::%s data_sp contained " + "mismatched data size, expected %" PRIu64 ", actual %" PRIu64, + __FUNCTION__, REG_CONTEXT_SIZE, data_sp->GetByteSize()); + return error; + } + + const uint8_t *src = data_sp->GetBytes(); + if (src == nullptr) { + error.SetErrorStringWithFormat("NativeRegisterContextLinux_loongarch64::%s " + "DataBuffer::GetBytes() returned a null " + "pointer", + __FUNCTION__); + return error; + } + ::memcpy(GetGPRBuffer(), src, GetRegisterInfoInterface().GetGPRSize()); + + error = WriteGPR(); + if (error.Fail()) + return error; + + src += GetRegisterInfoInterface().GetGPRSize(); + ::memcpy(GetFPRBuffer(), src, GetFPRSize()); + + error = WriteFPR(); + if (error.Fail()) + return error; + + return error; +} + +bool NativeRegisterContextLinux_loongarch64::IsGPR(unsigned reg) const { + return GetRegisterInfo().GetRegisterSetFromRegisterIndex(reg) == + RegisterInfoPOSIX_loongarch64::GPRegSet; +} + +bool NativeRegisterContextLinux_loongarch64::IsFPR(unsigned reg) const { + return GetRegisterInfo().GetRegisterSetFromRegisterIndex(reg) == + RegisterInfoPOSIX_loongarch64::FPRegSet; +} + +Status NativeRegisterContextLinux_loongarch64::ReadGPR() { + Status error; + + if (m_gpr_is_valid) + return error; + + struct iovec ioVec; + ioVec.iov_base = GetGPRBuffer(); + ioVec.iov_len = GetGPRSize(); + + error = ReadRegisterSet(&ioVec, GetGPRSize(), NT_PRSTATUS); + + if (error.Success()) + m_gpr_is_valid = true; + + return error; +} + +Status NativeRegisterContextLinux_loongarch64::WriteGPR() { + Status error = ReadGPR(); + if (error.Fail()) + return error; + + struct iovec ioVec; + ioVec.iov_base = GetGPRBuffer(); + ioVec.iov_len = GetGPRSize(); + + m_gpr_is_valid = false; + + return WriteRegisterSet(&ioVec, GetGPRSize(), NT_PRSTATUS); +} + +Status NativeRegisterContextLinux_loongarch64::ReadFPR() { + Status error; + + if (m_fpu_is_valid) + return error; + + struct iovec ioVec; + ioVec.iov_base = GetFPRBuffer(); + ioVec.iov_len = GetFPRSize(); + + error = ReadRegisterSet(&ioVec, GetFPRSize(), NT_FPREGSET); + + if (error.Success()) + m_fpu_is_valid = true; + + return error; +} + +Status NativeRegisterContextLinux_loongarch64::WriteFPR() { + Status error = ReadFPR(); + if (error.Fail()) + return error; + + struct iovec ioVec; + ioVec.iov_base = GetFPRBuffer(); + ioVec.iov_len = GetFPRSize(); + + m_fpu_is_valid = false; + + return WriteRegisterSet(&ioVec, GetFPRSize(), NT_FPREGSET); +} + +void NativeRegisterContextLinux_loongarch64::InvalidateAllRegisters() { + m_gpr_is_valid = false; + m_fpu_is_valid = false; +} + +uint32_t NativeRegisterContextLinux_loongarch64::CalculateFprOffset( + const RegisterInfo *reg_info) const { + return reg_info->byte_offset - GetGPRSize(); +} + +std::vector +NativeRegisterContextLinux_loongarch64::GetExpeditedRegisters( + ExpeditedRegs expType) const { + std::vector expedited_reg_nums = + NativeRegisterContext::GetExpeditedRegisters(expType); + + return expedited_reg_nums; +} + +#endif // defined(__loongarch__) && __loongarch_grlen == 64 diff --git a/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_loongarch64.h b/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_loongarch64.h new file mode 100644 index 0000000000000000000000000000000000000000..0a6084ff4206dbc2c2aa1dc2df24847fc7aa590c --- /dev/null +++ b/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_loongarch64.h @@ -0,0 +1,93 @@ +//===-- NativeRegisterContextLinux_loongarch64.h ----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#if defined(__loongarch__) && __loongarch_grlen == 64 + +#ifndef lldb_NativeRegisterContextLinux_loongarch64_h +#define lldb_NativeRegisterContextLinux_loongarch64_h + +#include "Plugins/Process/Linux/NativeRegisterContextLinux.h" +#include "Plugins/Process/Utility/RegisterInfoPOSIX_loongarch64.h" + +#include + +namespace lldb_private { +namespace process_linux { + +class NativeProcessLinux; + +class NativeRegisterContextLinux_loongarch64 + : public NativeRegisterContextLinux { +public: + NativeRegisterContextLinux_loongarch64( + const ArchSpec &target_arch, NativeThreadProtocol &native_thread, + std::unique_ptr register_info_up); + + uint32_t GetRegisterSetCount() const override; + + uint32_t GetUserRegisterCount() const override; + + const RegisterSet *GetRegisterSet(uint32_t set_index) const override; + + Status ReadRegister(const RegisterInfo *reg_info, + RegisterValue ®_value) override; + + Status WriteRegister(const RegisterInfo *reg_info, + const RegisterValue ®_value) override; + + Status ReadAllRegisterValues(lldb::WritableDataBufferSP &data_sp) override; + + Status WriteAllRegisterValues(const lldb::DataBufferSP &data_sp) override; + + void InvalidateAllRegisters() override; + + std::vector + GetExpeditedRegisters(ExpeditedRegs expType) const override; + + bool RegisterOffsetIsDynamic() const override { return true; } + +protected: + Status ReadGPR() override; + + Status WriteGPR() override; + + Status ReadFPR() override; + + Status WriteFPR() override; + + void *GetGPRBuffer() override { return &m_gpr; } + + void *GetFPRBuffer() override { return &m_fpr; } + + size_t GetGPRSize() const override { return GetRegisterInfo().GetGPRSize(); } + + size_t GetFPRSize() override { return GetRegisterInfo().GetFPRSize(); } + +private: + bool m_gpr_is_valid; + bool m_fpu_is_valid; + + RegisterInfoPOSIX_loongarch64::GPR m_gpr; + + RegisterInfoPOSIX_loongarch64::FPR m_fpr; + + bool IsGPR(unsigned reg) const; + + bool IsFPR(unsigned reg) const; + + uint32_t CalculateFprOffset(const RegisterInfo *reg_info) const; + + const RegisterInfoPOSIX_loongarch64 &GetRegisterInfo() const; +}; + +} // namespace process_linux +} // namespace lldb_private + +#endif // #ifndef lldb_NativeRegisterContextLinux_loongarch64_h + +#endif // defined(__loongarch__) && __loongarch_grlen == 64 diff --git a/lldb/source/Plugins/Process/Utility/CMakeLists.txt b/lldb/source/Plugins/Process/Utility/CMakeLists.txt index 2a06af008dcec00dd8304b02d2d9d83f6290b252..acc8eb615edb1fa02f7fd8b660ffc51124567182 100644 --- a/lldb/source/Plugins/Process/Utility/CMakeLists.txt +++ b/lldb/source/Plugins/Process/Utility/CMakeLists.txt @@ -38,6 +38,7 @@ add_lldb_library(lldbPluginProcessUtility RegisterContextOpenBSD_x86_64.cpp RegisterContextPOSIX_arm.cpp RegisterContextPOSIX_arm64.cpp + RegisterContextPOSIX_loongarch64.cpp RegisterContextPOSIX_mips64.cpp RegisterContextPOSIX_powerpc.cpp RegisterContextPOSIX_ppc64le.cpp @@ -48,6 +49,7 @@ add_lldb_library(lldbPluginProcessUtility RegisterContextWindows_x86_64.cpp RegisterInfoPOSIX_arm.cpp RegisterInfoPOSIX_arm64.cpp + RegisterInfoPOSIX_loongarch64.cpp RegisterInfoPOSIX_ppc64le.cpp StopInfoMachException.cpp ThreadMemory.cpp diff --git a/lldb/source/Plugins/Process/Utility/NativeProcessSoftwareSingleStep.cpp b/lldb/source/Plugins/Process/Utility/NativeProcessSoftwareSingleStep.cpp index ee5295bf65651a6c66ea55de7d2ac41e78c24961..15e94b0924f1ce30fe49ef9bb9d63819bd368b49 100644 --- a/lldb/source/Plugins/Process/Utility/NativeProcessSoftwareSingleStep.cpp +++ b/lldb/source/Plugins/Process/Utility/NativeProcessSoftwareSingleStep.cpp @@ -165,7 +165,8 @@ Status NativeProcessSoftwareSingleStep::SetupSoftwareSingleStepping( // Arm mode size_hint = 4; } - } else if (arch.IsMIPS() || arch.GetTriple().isPPC64()) + } else if (arch.IsMIPS() || arch.GetTriple().isPPC64() || + arch.GetTriple().isLoongArch()) size_hint = 4; error = process.SetBreakpoint(next_pc, size_hint, /*hardware=*/false); diff --git a/lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_loongarch64.cpp b/lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_loongarch64.cpp new file mode 100644 index 0000000000000000000000000000000000000000..a48a58f28f7aadf4733352dc77a9dabfa1045fd5 --- /dev/null +++ b/lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_loongarch64.cpp @@ -0,0 +1,82 @@ +//===-- RegisterContextPOSIX_loongarch64.cpp --------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "lldb/Target/Process.h" +#include "lldb/Target/Target.h" +#include "lldb/Target/Thread.h" +#include "lldb/Utility/DataBufferHeap.h" +#include "lldb/Utility/DataExtractor.h" +#include "lldb/Utility/Endian.h" +#include "lldb/Utility/RegisterValue.h" +#include "lldb/Utility/Scalar.h" +#include "llvm/Support/Compiler.h" + +#include "RegisterContextPOSIX_loongarch64.h" + +using namespace lldb; +using namespace lldb_private; + +RegisterContextPOSIX_loongarch64::RegisterContextPOSIX_loongarch64( + lldb_private::Thread &thread, + std::unique_ptr register_info) + : lldb_private::RegisterContext(thread, 0), + m_register_info_up(std::move(register_info)) {} + +RegisterContextPOSIX_loongarch64::~RegisterContextPOSIX_loongarch64() = default; + +void RegisterContextPOSIX_loongarch64::invalidate() {} + +void RegisterContextPOSIX_loongarch64::InvalidateAllRegisters() {} + +size_t RegisterContextPOSIX_loongarch64::GetRegisterCount() { + return m_register_info_up->GetRegisterCount(); +} + +size_t RegisterContextPOSIX_loongarch64::GetGPRSize() { + return m_register_info_up->GetGPRSize(); +} + +unsigned RegisterContextPOSIX_loongarch64::GetRegisterSize(unsigned int reg) { + return m_register_info_up->GetRegisterInfo()[reg].byte_size; +} + +unsigned RegisterContextPOSIX_loongarch64::GetRegisterOffset(unsigned int reg) { + return m_register_info_up->GetRegisterInfo()[reg].byte_offset; +} + +const lldb_private::RegisterInfo * +RegisterContextPOSIX_loongarch64::GetRegisterInfoAtIndex(size_t reg) { + if (reg < GetRegisterCount()) + return &GetRegisterInfo()[reg]; + + return nullptr; +} + +size_t RegisterContextPOSIX_loongarch64::GetRegisterSetCount() { + return m_register_info_up->GetRegisterCount(); +} + +const lldb_private::RegisterSet * +RegisterContextPOSIX_loongarch64::GetRegisterSet(size_t set) { + return m_register_info_up->GetRegisterSet(set); +} + +const lldb_private::RegisterInfo * +RegisterContextPOSIX_loongarch64::GetRegisterInfo() { + return m_register_info_up->GetRegisterInfo(); +} + +bool RegisterContextPOSIX_loongarch64::IsGPR(unsigned int reg) { + return m_register_info_up->GetRegisterSetFromRegisterIndex(reg) == + RegisterInfoPOSIX_loongarch64::GPRegSet; +} + +bool RegisterContextPOSIX_loongarch64::IsFPR(unsigned int reg) { + return m_register_info_up->GetRegisterSetFromRegisterIndex(reg) == + RegisterInfoPOSIX_loongarch64::FPRegSet; +} diff --git a/lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_loongarch64.h b/lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_loongarch64.h new file mode 100644 index 0000000000000000000000000000000000000000..95f93bb41f015dddd8fc85c7bbd08c80ba2ffc6a --- /dev/null +++ b/lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_loongarch64.h @@ -0,0 +1,63 @@ +//===-- RegisterContextPOSIX_loongarch64.h ----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLDB_SOURCE_PLUGINS_PROCESS_UTILITY_REGISTERCONTEXTPOSIX_LOONGARCH64_H +#define LLDB_SOURCE_PLUGINS_PROCESS_UTILITY_REGISTERCONTEXTPOSIX_LOONGARCH64_H + +#include "RegisterInfoInterface.h" +#include "RegisterInfoPOSIX_loongarch64.h" +#include "lldb-loongarch-register-enums.h" +#include "lldb/Target/RegisterContext.h" +#include "lldb/Utility/Log.h" + +class RegisterContextPOSIX_loongarch64 : public lldb_private::RegisterContext { +public: + RegisterContextPOSIX_loongarch64( + lldb_private::Thread &thread, + std::unique_ptr register_info); + + ~RegisterContextPOSIX_loongarch64() override; + + void invalidate(); + + void InvalidateAllRegisters() override; + + size_t GetRegisterCount() override; + + virtual size_t GetGPRSize(); + + virtual unsigned GetRegisterSize(unsigned reg); + + virtual unsigned GetRegisterOffset(unsigned reg); + + const lldb_private::RegisterInfo *GetRegisterInfoAtIndex(size_t reg) override; + + size_t GetRegisterSetCount() override; + + const lldb_private::RegisterSet *GetRegisterSet(size_t set) override; + +protected: + std::unique_ptr m_register_info_up; + + virtual const lldb_private::RegisterInfo *GetRegisterInfo(); + + bool IsGPR(unsigned reg); + + bool IsFPR(unsigned reg); + + size_t GetFPRSize() { return sizeof(RegisterInfoPOSIX_loongarch64::FPR); } + + uint32_t GetRegNumFCSR() const { return fpr_fcsr_loongarch; } + + virtual bool ReadGPR() = 0; + virtual bool ReadFPR() = 0; + virtual bool WriteGPR() = 0; + virtual bool WriteFPR() = 0; +}; + +#endif // LLDB_SOURCE_PLUGINS_PROCESS_UTILITY_REGISTERCONTEXTPOSIX_LOONGARCH64_H diff --git a/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_loongarch64.cpp b/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_loongarch64.cpp new file mode 100644 index 0000000000000000000000000000000000000000..6c723afe4b69482375244839eb560202b83a9943 --- /dev/null +++ b/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_loongarch64.cpp @@ -0,0 +1,158 @@ +//===-- RegisterInfoPOSIX_loongarch64.cpp --------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===---------------------------------------------------------------------===// + +#include +#include +#include + +#include "lldb/lldb-defines.h" +#include "llvm/Support/Compiler.h" + +#include "RegisterInfoPOSIX_loongarch64.h" + +#define GPR_OFFSET(idx) ((idx)*8 + 0) +#define FPR_OFFSET(idx) ((idx)*8 + sizeof(RegisterInfoPOSIX_loongarch64::GPR)) +#define FCC_OFFSET(idx) ((idx)*1 + 32 * 8 + sizeof(RegisterInfoPOSIX_loongarch64::GPR)) +#define FCSR_OFFSET (8 * 1 + 32 * 8 + sizeof(RegisterInfoPOSIX_loongarch64::GPR)) + +#define REG_CONTEXT_SIZE \ + (sizeof(RegisterInfoPOSIX_loongarch64::GPR) + \ + sizeof(RegisterInfoPOSIX_loongarch64::FPR)) + +#define DECLARE_REGISTER_INFOS_LOONGARCH64_STRUCT +#include "RegisterInfos_loongarch64.h" +#undef DECLARE_REGISTER_INFOS_LOONGARCH64_STRUCT + +const lldb_private::RegisterInfo * +RegisterInfoPOSIX_loongarch64::GetRegisterInfoPtr( + const lldb_private::ArchSpec &target_arch) { + switch (target_arch.GetMachine()) { + case llvm::Triple::loongarch64: + return g_register_infos_loongarch64; + default: + assert(false && "Unhandled target architecture."); + return nullptr; + } +} + +uint32_t RegisterInfoPOSIX_loongarch64::GetRegisterInfoCount( + const lldb_private::ArchSpec &target_arch) { + switch (target_arch.GetMachine()) { + case llvm::Triple::loongarch64: + return static_cast(sizeof(g_register_infos_loongarch64) / + sizeof(g_register_infos_loongarch64[0])); + default: + assert(false && "Unhandled target architecture."); + return 0; + } +} + +// Number of register sets provided by this context. +enum { + k_num_gpr_registers = gpr_last_loongarch - gpr_first_loongarch + 1, + k_num_fpr_registers = fpr_last_loongarch - fpr_first_loongarch + 1, + k_num_register_sets = 2 +}; + +// LoongArch64 general purpose registers. +static const uint32_t g_gpr_regnums_loongarch64[] = { + gpr_r0_loongarch, gpr_r1_loongarch, gpr_r2_loongarch, + gpr_r3_loongarch, gpr_r4_loongarch, gpr_r5_loongarch, + gpr_r6_loongarch, gpr_r7_loongarch, gpr_r8_loongarch, + gpr_r9_loongarch, gpr_r10_loongarch, gpr_r11_loongarch, + gpr_r12_loongarch, gpr_r13_loongarch, gpr_r14_loongarch, + gpr_r15_loongarch, gpr_r16_loongarch, gpr_r17_loongarch, + gpr_r18_loongarch, gpr_r19_loongarch, gpr_r20_loongarch, + gpr_r21_loongarch, gpr_r22_loongarch, gpr_r23_loongarch, + gpr_r24_loongarch, gpr_r25_loongarch, gpr_r26_loongarch, + gpr_r27_loongarch, gpr_r28_loongarch, gpr_r29_loongarch, + gpr_r30_loongarch, gpr_r31_loongarch, gpr_orig_a0_loongarch, + gpr_pc_loongarch, gpr_badv_loongarch, gpr_reserved0_loongarch, + gpr_reserved1_loongarch, gpr_reserved2_loongarch, gpr_reserved3_loongarch, + gpr_reserved4_loongarch, gpr_reserved5_loongarch, gpr_reserved6_loongarch, + gpr_reserved7_loongarch, gpr_reserved8_loongarch, gpr_reserved9_loongarch, + LLDB_INVALID_REGNUM}; + +static_assert(((sizeof g_gpr_regnums_loongarch64 / + sizeof g_gpr_regnums_loongarch64[0]) - + 1) == k_num_gpr_registers, + "g_gpr_regnums_loongarch64 has wrong number of register infos"); + +// LoongArch64 floating point registers. +static const uint32_t g_fpr_regnums_loongarch64[] = { + fpr_f0_loongarch, fpr_f1_loongarch, fpr_f2_loongarch, + fpr_f3_loongarch, fpr_f4_loongarch, fpr_f5_loongarch, + fpr_f6_loongarch, fpr_f7_loongarch, fpr_f8_loongarch, + fpr_f9_loongarch, fpr_f10_loongarch, fpr_f11_loongarch, + fpr_f12_loongarch, fpr_f13_loongarch, fpr_f14_loongarch, + fpr_f15_loongarch, fpr_f16_loongarch, fpr_f17_loongarch, + fpr_f18_loongarch, fpr_f19_loongarch, fpr_f20_loongarch, + fpr_f21_loongarch, fpr_f22_loongarch, fpr_f23_loongarch, + fpr_f24_loongarch, fpr_f25_loongarch, fpr_f26_loongarch, + fpr_f27_loongarch, fpr_f28_loongarch, fpr_f29_loongarch, + fpr_f30_loongarch, fpr_f31_loongarch, fpr_fcc0_loongarch, + fpr_fcc1_loongarch, fpr_fcc2_loongarch, fpr_fcc3_loongarch, + fpr_fcc4_loongarch, fpr_fcc5_loongarch, fpr_fcc6_loongarch, + fpr_fcc7_loongarch, fpr_fcsr_loongarch, LLDB_INVALID_REGNUM}; + +static_assert(((sizeof g_fpr_regnums_loongarch64 / + sizeof g_fpr_regnums_loongarch64[0]) - + 1) == k_num_fpr_registers, + "g_fpr_regnums_loongarch64 has wrong number of register infos"); + +// Register sets for LoongArch64. +static const lldb_private::RegisterSet + g_reg_sets_loongarch64[k_num_register_sets] = { + {"General Purpose Registers", "gpr", k_num_gpr_registers, + g_gpr_regnums_loongarch64}, + {"Floating Point Registers", "fpr", k_num_fpr_registers, + g_fpr_regnums_loongarch64}}; + +RegisterInfoPOSIX_loongarch64::RegisterInfoPOSIX_loongarch64( + const lldb_private::ArchSpec &target_arch, lldb_private::Flags flags) + : lldb_private::RegisterInfoAndSetInterface(target_arch), + m_register_info_p(GetRegisterInfoPtr(target_arch)), + m_register_info_count(GetRegisterInfoCount(target_arch)) {} + +uint32_t RegisterInfoPOSIX_loongarch64::GetRegisterCount() const { + return m_register_info_count; +} + +size_t RegisterInfoPOSIX_loongarch64::GetGPRSize() const { + return sizeof(struct RegisterInfoPOSIX_loongarch64::GPR); +} + +size_t RegisterInfoPOSIX_loongarch64::GetFPRSize() const { + return sizeof(struct RegisterInfoPOSIX_loongarch64::FPR); +} + +const lldb_private::RegisterInfo * +RegisterInfoPOSIX_loongarch64::GetRegisterInfo() const { + return m_register_info_p; +} + +size_t RegisterInfoPOSIX_loongarch64::GetRegisterSetCount() const { + return k_num_register_sets; +} + +size_t RegisterInfoPOSIX_loongarch64::GetRegisterSetFromRegisterIndex( + uint32_t reg_index) const { + // coverity[unsigned_compare] + if (reg_index >= gpr_first_loongarch && reg_index <= gpr_last_loongarch) + return GPRegSet; + if (reg_index >= fpr_first_loongarch && reg_index <= fpr_last_loongarch) + return FPRegSet; + return LLDB_INVALID_REGNUM; +} + +const lldb_private::RegisterSet * +RegisterInfoPOSIX_loongarch64::GetRegisterSet(size_t set_index) const { + if (set_index < GetRegisterSetCount()) + return &g_reg_sets_loongarch64[set_index]; + return nullptr; +} diff --git a/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_loongarch64.h b/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_loongarch64.h new file mode 100644 index 0000000000000000000000000000000000000000..a3338acbbc97bd1d35f2f18b435b513dfbbd5472 --- /dev/null +++ b/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_loongarch64.h @@ -0,0 +1,69 @@ +//===-- RegisterInfoPOSIX_loongarch64.h -------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLDB_SOURCE_PLUGINS_PROCESS_UTILITY_REGISTERINFOPOSIX_LOONGARCH64_H +#define LLDB_SOURCE_PLUGINS_PROCESS_UTILITY_REGISTERINFOPOSIX_LOONGARCH64_H + +#include "RegisterInfoAndSetInterface.h" +#include "lldb/Target/RegisterContext.h" +#include "lldb/lldb-private.h" +#include + +class RegisterInfoPOSIX_loongarch64 + : public lldb_private::RegisterInfoAndSetInterface { +public: + static const lldb_private::RegisterInfo * + GetRegisterInfoPtr(const lldb_private::ArchSpec &target_arch); + static uint32_t + GetRegisterInfoCount(const lldb_private::ArchSpec &target_arch); + +public: + enum RegSetKind { + GPRegSet, + FPRegSet, + }; + + struct GPR { + uint64_t gpr[32]; + + uint64_t orig_a0; + uint64_t csr_era; + uint64_t csr_badv; + uint64_t reserved[10]; + }; + + struct FPR { + uint64_t fpr[32]; + uint64_t fcc; + uint32_t fcsr; + }; + + RegisterInfoPOSIX_loongarch64(const lldb_private::ArchSpec &target_arch, + lldb_private::Flags flags); + + size_t GetGPRSize() const override; + + size_t GetFPRSize() const override; + + const lldb_private::RegisterInfo *GetRegisterInfo() const override; + + uint32_t GetRegisterCount() const override; + + const lldb_private::RegisterSet * + GetRegisterSet(size_t reg_set) const override; + + size_t GetRegisterSetCount() const override; + + size_t GetRegisterSetFromRegisterIndex(uint32_t reg_index) const override; + +private: + const lldb_private::RegisterInfo *m_register_info_p; + uint32_t m_register_info_count; +}; + +#endif diff --git a/lldb/source/Plugins/Process/Utility/RegisterInfos_loongarch64.h b/lldb/source/Plugins/Process/Utility/RegisterInfos_loongarch64.h new file mode 100644 index 0000000000000000000000000000000000000000..27f2bac22dd51dbfc23d6874efb541ad7946d271 --- /dev/null +++ b/lldb/source/Plugins/Process/Utility/RegisterInfos_loongarch64.h @@ -0,0 +1,171 @@ +//===-- RegisterInfos_loongarch64.h -----------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifdef DECLARE_REGISTER_INFOS_LOONGARCH64_STRUCT + +#include + +#include "lldb/lldb-defines.h" +#include "lldb/lldb-enumerations.h" +#include "lldb/lldb-private.h" + +#include "Utility/LoongArch_DWARF_Registers.h" +#include "lldb-loongarch-register-enums.h" + +#ifndef GPR_OFFSET +#error GPR_OFFSET must be defined before including this header file +#endif + +#ifndef FPR_OFFSET +#error FPR_OFFSET must be defined before including this header file +#endif + +using namespace loongarch_dwarf; + +// clang-format off + +// I suppose EHFrame and DWARF are the same. +#define KIND_HELPER(reg, generic_kind) \ + { \ + loongarch_dwarf::dwarf_##reg, loongarch_dwarf::dwarf_##reg, generic_kind, \ + LLDB_INVALID_REGNUM, reg##_loongarch \ + } + +// Generates register kinds array for generic purpose registers +#define GPR64_KIND(reg, generic_kind) KIND_HELPER(reg, generic_kind) + +// Generates register kinds array for floating point registers +#define FPR64_KIND(reg, generic_kind) KIND_HELPER(reg, generic_kind) + +// Defines a 64-bit general purpose register +#define DEFINE_GPR64(reg, generic_kind) DEFINE_GPR64_ALT(reg, reg, generic_kind) +#define DEFINE_GPR64_ALT(reg, alt, generic_kind) \ + { \ + #reg, #alt, 8, GPR_OFFSET(gpr_##reg##_loongarch - gpr_first_loongarch), \ + lldb::eEncodingUint, lldb::eFormatHex, \ + GPR64_KIND(gpr_##reg, generic_kind), nullptr, nullptr \ + } + +// Defines a 64-bit floating point register +#define DEFINE_FPR64(reg, generic_kind) DEFINE_FPR64_ALT(reg, reg, generic_kind) +#define DEFINE_FPR64_ALT(reg, alt, generic_kind) \ + { \ + #reg, #alt, 8, FPR_OFFSET(fpr_##reg##_loongarch - fpr_first_loongarch), \ + lldb::eEncodingUint, lldb::eFormatHex, \ + FPR64_KIND(fpr_##reg, generic_kind), nullptr, nullptr \ + } + +#define DEFINE_FCC(reg, generic_kind) \ + { \ + #reg, nullptr, 1, FCC_OFFSET(fpr_##reg##_loongarch - fpr_fcc0_loongarch), \ + lldb::eEncodingUint, lldb::eFormatHex, \ + FPR64_KIND(fpr_##reg, generic_kind), nullptr, nullptr \ + } + +#define DEFINE_FCSR(reg, generic_kind) \ + { \ + #reg, nullptr, 4, FCSR_OFFSET, \ + lldb::eEncodingUint, lldb::eFormatHex, \ + FPR64_KIND(fpr_##reg, generic_kind), nullptr, nullptr \ + } + +// clang-format on + +static lldb_private::RegisterInfo g_register_infos_loongarch64[] = { + DEFINE_GPR64_ALT(r0, zero, LLDB_INVALID_REGNUM), + DEFINE_GPR64_ALT(r1, ra, LLDB_REGNUM_GENERIC_RA), + DEFINE_GPR64_ALT(r2, tp, LLDB_INVALID_REGNUM), + DEFINE_GPR64_ALT(r3, sp, LLDB_REGNUM_GENERIC_SP), + DEFINE_GPR64_ALT(r4, a0, LLDB_REGNUM_GENERIC_ARG1), + DEFINE_GPR64_ALT(r5, a1, LLDB_REGNUM_GENERIC_ARG2), + DEFINE_GPR64_ALT(r6, a2, LLDB_REGNUM_GENERIC_ARG3), + DEFINE_GPR64_ALT(r7, a3, LLDB_REGNUM_GENERIC_ARG4), + DEFINE_GPR64_ALT(r8, a4, LLDB_REGNUM_GENERIC_ARG5), + DEFINE_GPR64_ALT(r9, a5, LLDB_REGNUM_GENERIC_ARG6), + DEFINE_GPR64_ALT(r10, a6, LLDB_REGNUM_GENERIC_ARG7), + DEFINE_GPR64_ALT(r11, a7, LLDB_REGNUM_GENERIC_ARG8), + DEFINE_GPR64_ALT(r12, t0, LLDB_INVALID_REGNUM), + DEFINE_GPR64_ALT(r13, t1, LLDB_INVALID_REGNUM), + DEFINE_GPR64_ALT(r14, t2, LLDB_INVALID_REGNUM), + DEFINE_GPR64_ALT(r15, t3, LLDB_INVALID_REGNUM), + DEFINE_GPR64_ALT(r16, t4, LLDB_INVALID_REGNUM), + DEFINE_GPR64_ALT(r17, t5, LLDB_INVALID_REGNUM), + DEFINE_GPR64_ALT(r18, t6, LLDB_INVALID_REGNUM), + DEFINE_GPR64_ALT(r19, t7, LLDB_INVALID_REGNUM), + DEFINE_GPR64_ALT(r20, t8, LLDB_INVALID_REGNUM), + DEFINE_GPR64(r21, LLDB_INVALID_REGNUM), + DEFINE_GPR64_ALT(r22, fp, LLDB_REGNUM_GENERIC_FP), + DEFINE_GPR64_ALT(r23, s0, LLDB_INVALID_REGNUM), + DEFINE_GPR64_ALT(r24, s1, LLDB_INVALID_REGNUM), + DEFINE_GPR64_ALT(r25, s2, LLDB_INVALID_REGNUM), + DEFINE_GPR64_ALT(r26, s3, LLDB_INVALID_REGNUM), + DEFINE_GPR64_ALT(r27, s4, LLDB_INVALID_REGNUM), + DEFINE_GPR64_ALT(r28, s5, LLDB_INVALID_REGNUM), + DEFINE_GPR64_ALT(r29, s6, LLDB_INVALID_REGNUM), + DEFINE_GPR64_ALT(r30, s7, LLDB_INVALID_REGNUM), + DEFINE_GPR64_ALT(r31, s8, LLDB_INVALID_REGNUM), + + DEFINE_GPR64(orig_a0, LLDB_INVALID_REGNUM), + DEFINE_GPR64(pc, LLDB_REGNUM_GENERIC_PC), + DEFINE_GPR64(badv, LLDB_INVALID_REGNUM), + DEFINE_GPR64(reserved0, LLDB_INVALID_REGNUM), + DEFINE_GPR64(reserved1, LLDB_INVALID_REGNUM), + DEFINE_GPR64(reserved2, LLDB_INVALID_REGNUM), + DEFINE_GPR64(reserved3, LLDB_INVALID_REGNUM), + DEFINE_GPR64(reserved4, LLDB_INVALID_REGNUM), + DEFINE_GPR64(reserved5, LLDB_INVALID_REGNUM), + DEFINE_GPR64(reserved6, LLDB_INVALID_REGNUM), + DEFINE_GPR64(reserved7, LLDB_INVALID_REGNUM), + DEFINE_GPR64(reserved8, LLDB_INVALID_REGNUM), + DEFINE_GPR64(reserved9, LLDB_INVALID_REGNUM), + + DEFINE_FPR64_ALT(f0, fa0, LLDB_INVALID_REGNUM), + DEFINE_FPR64_ALT(f1, fa1, LLDB_INVALID_REGNUM), + DEFINE_FPR64_ALT(f2, fa2, LLDB_INVALID_REGNUM), + DEFINE_FPR64_ALT(f3, fa3, LLDB_INVALID_REGNUM), + DEFINE_FPR64_ALT(f4, fa4, LLDB_INVALID_REGNUM), + DEFINE_FPR64_ALT(f5, fa5, LLDB_INVALID_REGNUM), + DEFINE_FPR64_ALT(f6, fa6, LLDB_INVALID_REGNUM), + DEFINE_FPR64_ALT(f7, fa7, LLDB_INVALID_REGNUM), + DEFINE_FPR64_ALT(f8, ft0, LLDB_INVALID_REGNUM), + DEFINE_FPR64_ALT(f9, ft1, LLDB_INVALID_REGNUM), + DEFINE_FPR64_ALT(f10, ft2, LLDB_INVALID_REGNUM), + DEFINE_FPR64_ALT(f11, ft3, LLDB_INVALID_REGNUM), + DEFINE_FPR64_ALT(f12, ft4, LLDB_INVALID_REGNUM), + DEFINE_FPR64_ALT(f13, ft5, LLDB_INVALID_REGNUM), + DEFINE_FPR64_ALT(f14, ft6, LLDB_INVALID_REGNUM), + DEFINE_FPR64_ALT(f15, ft7, LLDB_INVALID_REGNUM), + DEFINE_FPR64_ALT(f16, ft8, LLDB_INVALID_REGNUM), + DEFINE_FPR64_ALT(f17, ft9, LLDB_INVALID_REGNUM), + DEFINE_FPR64_ALT(f18, ft10, LLDB_INVALID_REGNUM), + DEFINE_FPR64_ALT(f19, ft11, LLDB_INVALID_REGNUM), + DEFINE_FPR64_ALT(f20, ft12, LLDB_INVALID_REGNUM), + DEFINE_FPR64_ALT(f21, ft13, LLDB_INVALID_REGNUM), + DEFINE_FPR64_ALT(f22, ft14, LLDB_INVALID_REGNUM), + DEFINE_FPR64_ALT(f23, ft15, LLDB_INVALID_REGNUM), + DEFINE_FPR64_ALT(f24, fs0, LLDB_INVALID_REGNUM), + DEFINE_FPR64_ALT(f25, fs1, LLDB_INVALID_REGNUM), + DEFINE_FPR64_ALT(f26, fs2, LLDB_INVALID_REGNUM), + DEFINE_FPR64_ALT(f27, fs3, LLDB_INVALID_REGNUM), + DEFINE_FPR64_ALT(f28, fs4, LLDB_INVALID_REGNUM), + DEFINE_FPR64_ALT(f29, fs5, LLDB_INVALID_REGNUM), + DEFINE_FPR64_ALT(f30, fs6, LLDB_INVALID_REGNUM), + DEFINE_FPR64_ALT(f31, fs7, LLDB_INVALID_REGNUM), + + DEFINE_FCC(fcc0, LLDB_INVALID_REGNUM), + DEFINE_FCC(fcc1, LLDB_INVALID_REGNUM), + DEFINE_FCC(fcc2, LLDB_INVALID_REGNUM), + DEFINE_FCC(fcc3, LLDB_INVALID_REGNUM), + DEFINE_FCC(fcc4, LLDB_INVALID_REGNUM), + DEFINE_FCC(fcc5, LLDB_INVALID_REGNUM), + DEFINE_FCC(fcc6, LLDB_INVALID_REGNUM), + DEFINE_FCC(fcc7, LLDB_INVALID_REGNUM), + DEFINE_FCSR(fcsr, LLDB_INVALID_REGNUM), +}; + +#endif // DECLARE_REGISTER_INFOS_LOONGARCH64_STRUCT diff --git a/lldb/source/Plugins/Process/Utility/lldb-loongarch-register-enums.h b/lldb/source/Plugins/Process/Utility/lldb-loongarch-register-enums.h new file mode 100644 index 0000000000000000000000000000000000000000..f55c807f86c00efab402ed14d13a824f39d034a2 --- /dev/null +++ b/lldb/source/Plugins/Process/Utility/lldb-loongarch-register-enums.h @@ -0,0 +1,178 @@ +//===-- lldb-loongarch-register-enums.h -------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLDB_SOURCE_PLUGINS_PROCESS_UTILITY_LLDB_LOONGARCH_REGISTER_ENUMS_H +#define LLDB_SOURCE_PLUGINS_PROCESS_UTILITY_LLDB_LOONGARCH_REGISTER_ENUMS_H + +// LLDB register codes (e.g. RegisterKind == eRegisterKindLLDB) + +// Internal codes for all loongarch registers. +enum { + // The same order as user_regs_struct in + // note: these enum values are used as byte_offset + gpr_first_loongarch = 0, + gpr_r0_loongarch = gpr_first_loongarch, + gpr_r1_loongarch, + gpr_r2_loongarch, + gpr_r3_loongarch, + gpr_r4_loongarch, + gpr_r5_loongarch, + gpr_r6_loongarch, + gpr_r7_loongarch, + gpr_r8_loongarch, + gpr_r9_loongarch, + gpr_r10_loongarch, + gpr_r11_loongarch, + gpr_r12_loongarch, + gpr_r13_loongarch, + gpr_r14_loongarch, + gpr_r15_loongarch, + gpr_r16_loongarch, + gpr_r17_loongarch, + gpr_r18_loongarch, + gpr_r19_loongarch, + gpr_r20_loongarch, + gpr_r21_loongarch, + gpr_r22_loongarch, + gpr_r23_loongarch, + gpr_r24_loongarch, + gpr_r25_loongarch, + gpr_r26_loongarch, + gpr_r27_loongarch, + gpr_r28_loongarch, + gpr_r29_loongarch, + gpr_r30_loongarch, + gpr_r31_loongarch, + gpr_orig_a0_loongarch, + gpr_pc_loongarch, + gpr_badv_loongarch, + gpr_reserved0_loongarch, + gpr_reserved1_loongarch, + gpr_reserved2_loongarch, + gpr_reserved3_loongarch, + gpr_reserved4_loongarch, + gpr_reserved5_loongarch, + gpr_reserved6_loongarch, + gpr_reserved7_loongarch, + gpr_reserved8_loongarch, + gpr_reserved9_loongarch, + gpr_last_loongarch = 44, + + gpr_zero_loongarch = gpr_r0_loongarch, + gpr_ra_loongarch = gpr_r1_loongarch, + gpr_tp_loongarch = gpr_r2_loongarch, + gpr_sp_loongarch = gpr_r3_loongarch, + gpr_a0_loongarch = gpr_r4_loongarch, + gpr_a1_loongarch = gpr_r5_loongarch, + gpr_a2_loongarch = gpr_r6_loongarch, + gpr_a3_loongarch = gpr_r7_loongarch, + gpr_a4_loongarch = gpr_r8_loongarch, + gpr_a5_loongarch = gpr_r9_loongarch, + gpr_a6_loongarch = gpr_r10_loongarch, + gpr_a7_loongarch = gpr_r11_loongarch, + gpr_t0_loongarch = gpr_r12_loongarch, + gpr_t1_loongarch = gpr_r13_loongarch, + gpr_t2_loongarch = gpr_r14_loongarch, + gpr_t3_loongarch = gpr_r15_loongarch, + gpr_t4_loongarch = gpr_r16_loongarch, + gpr_t5_loongarch = gpr_r17_loongarch, + gpr_t6_loongarch = gpr_r18_loongarch, + gpr_t7_loongarch = gpr_r19_loongarch, + gpr_t8_loongarch = gpr_r20_loongarch, + gpr_fp_loongarch = gpr_r22_loongarch, + gpr_s0_loongarch = gpr_r23_loongarch, + gpr_s1_loongarch = gpr_r24_loongarch, + gpr_s2_loongarch = gpr_r25_loongarch, + gpr_s3_loongarch = gpr_r26_loongarch, + gpr_s4_loongarch = gpr_r27_loongarch, + gpr_s5_loongarch = gpr_r28_loongarch, + gpr_s6_loongarch = gpr_r29_loongarch, + gpr_s7_loongarch = gpr_r30_loongarch, + gpr_s8_loongarch = gpr_r31_loongarch, + + fpr_first_loongarch = 45, + fpr_f0_loongarch = fpr_first_loongarch, + fpr_f1_loongarch, + fpr_f2_loongarch, + fpr_f3_loongarch, + fpr_f4_loongarch, + fpr_f5_loongarch, + fpr_f6_loongarch, + fpr_f7_loongarch, + fpr_f8_loongarch, + fpr_f9_loongarch, + fpr_f10_loongarch, + fpr_f11_loongarch, + fpr_f12_loongarch, + fpr_f13_loongarch, + fpr_f14_loongarch, + fpr_f15_loongarch, + fpr_f16_loongarch, + fpr_f17_loongarch, + fpr_f18_loongarch, + fpr_f19_loongarch, + fpr_f20_loongarch, + fpr_f21_loongarch, + fpr_f22_loongarch, + fpr_f23_loongarch, + fpr_f24_loongarch, + fpr_f25_loongarch, + fpr_f26_loongarch, + fpr_f27_loongarch, + fpr_f28_loongarch, + fpr_f29_loongarch, + fpr_f30_loongarch, + fpr_f31_loongarch, + fpr_fcc0_loongarch, + fpr_fcc1_loongarch, + fpr_fcc2_loongarch, + fpr_fcc3_loongarch, + fpr_fcc4_loongarch, + fpr_fcc5_loongarch, + fpr_fcc6_loongarch, + fpr_fcc7_loongarch, + fpr_fcsr_loongarch, + fpr_last_loongarch = fpr_fcsr_loongarch, + + fpr_fa0_loongarch = fpr_f0_loongarch, + fpr_fa1_loongarch = fpr_f1_loongarch, + fpr_fa2_loongarch = fpr_f2_loongarch, + fpr_fa3_loongarch = fpr_f3_loongarch, + fpr_fa4_loongarch = fpr_f4_loongarch, + fpr_fa5_loongarch = fpr_f5_loongarch, + fpr_fa6_loongarch = fpr_f6_loongarch, + fpr_fa7_loongarch = fpr_f7_loongarch, + fpr_ft0_loongarch = fpr_f8_loongarch, + fpr_ft1_loongarch = fpr_f9_loongarch, + fpr_ft2_loongarch = fpr_f10_loongarch, + fpr_ft3_loongarch = fpr_f11_loongarch, + fpr_ft4_loongarch = fpr_f12_loongarch, + fpr_ft5_loongarch = fpr_f13_loongarch, + fpr_ft6_loongarch = fpr_f14_loongarch, + fpr_ft7_loongarch = fpr_f15_loongarch, + fpr_ft8_loongarch = fpr_f16_loongarch, + fpr_ft9_loongarch = fpr_f17_loongarch, + fpr_ft10_loongarch = fpr_f18_loongarch, + fpr_ft11_loongarch = fpr_f19_loongarch, + fpr_ft12_loongarch = fpr_f20_loongarch, + fpr_ft13_loongarch = fpr_f21_loongarch, + fpr_ft14_loongarch = fpr_f22_loongarch, + fpr_ft15_loongarch = fpr_f23_loongarch, + fpr_fs0_loongarch = fpr_f24_loongarch, + fpr_fs1_loongarch = fpr_f25_loongarch, + fpr_fs2_loongarch = fpr_f26_loongarch, + fpr_fs3_loongarch = fpr_f27_loongarch, + fpr_fs4_loongarch = fpr_f28_loongarch, + fpr_fs5_loongarch = fpr_f29_loongarch, + fpr_fs6_loongarch = fpr_f30_loongarch, + fpr_fs7_loongarch = fpr_f31_loongarch, + + k_num_registers_loongarch +}; + +#endif // LLDB_SOURCE_PLUGINS_PROCESS_UTILITY_LLDB_LOONGARCH_REGISTER_ENUMS_H diff --git a/lldb/source/Target/Platform.cpp b/lldb/source/Target/Platform.cpp index c914a59ecd8a6b3de7f15d2dd4d7d1665e972701..2516f114e01668de4e43dd32dc0903912d232f1a 100644 --- a/lldb/source/Target/Platform.cpp +++ b/lldb/source/Target/Platform.cpp @@ -1945,6 +1945,14 @@ size_t Platform::GetSoftwareBreakpointTrapOpcode(Target &target, trap_opcode_size = sizeof(g_i386_opcode); } break; + case llvm::Triple::loongarch32: + case llvm::Triple::loongarch64: { + static const uint8_t g_loongarch_opcode[] = {0x05, 0x00, 0x2a, + 0x00}; // break 0x5 + trap_opcode = g_loongarch_opcode; + trap_opcode_size = sizeof(g_loongarch_opcode); + } break; + default: return 0; } diff --git a/lldb/source/Utility/ArchSpec.cpp b/lldb/source/Utility/ArchSpec.cpp index 79c5cd3f18fc0a4a93a072219e26a1faffb87e49..d88faed6817a66c992de5aa8e1078fdc9c3df7b2 100644 --- a/lldb/source/Utility/ArchSpec.cpp +++ b/lldb/source/Utility/ArchSpec.cpp @@ -220,6 +220,11 @@ static const CoreDefinition g_core_definitions[] = { {eByteOrderLittle, 8, 2, 4, llvm::Triple::riscv64, ArchSpec::eCore_riscv64, "riscv64"}, + {eByteOrderLittle, 4, 4, 4, llvm::Triple::loongarch32, + ArchSpec::eCore_loongarch32, "loongarch32"}, + {eByteOrderLittle, 8, 4, 4, llvm::Triple::loongarch64, + ArchSpec::eCore_loongarch64, "loongarch64"}, + {eByteOrderLittle, 4, 4, 4, llvm::Triple::UnknownArch, ArchSpec::eCore_uknownMach32, "unknown-mach-32"}, {eByteOrderLittle, 8, 4, 4, llvm::Triple::UnknownArch, @@ -406,6 +411,12 @@ static const ArchDefinitionEntry g_elf_arch_entries[] = { ArchSpec::eRISCVSubType_riscv32, 0xFFFFFFFFu, 0xFFFFFFFFu}, // riscv32 {ArchSpec::eCore_riscv64, llvm::ELF::EM_RISCV, ArchSpec::eRISCVSubType_riscv64, 0xFFFFFFFFu, 0xFFFFFFFFu}, // riscv64 + {ArchSpec::eCore_loongarch32, llvm::ELF::EM_LOONGARCH, + ArchSpec::eLoongArchSubType_loongarch32, 0xFFFFFFFFu, + 0xFFFFFFFFu}, // loongarch32 + {ArchSpec::eCore_loongarch64, llvm::ELF::EM_LOONGARCH, + ArchSpec::eLoongArchSubType_loongarch64, 0xFFFFFFFFu, + 0xFFFFFFFFu}, // loongarch64 }; static const ArchDefinition g_elf_arch_def = { diff --git a/lldb/source/Utility/LoongArch_DWARF_Registers.h b/lldb/source/Utility/LoongArch_DWARF_Registers.h new file mode 100644 index 0000000000000000000000000000000000000000..34e40a066051eefa64ad47a8a38ec64695d07c82 --- /dev/null +++ b/lldb/source/Utility/LoongArch_DWARF_Registers.h @@ -0,0 +1,177 @@ +//===-- LoongArch_DWARF_Registers.h -----------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLDB_SOURCE_UTILITY_LOONGARCH_DWARF_REGISTERS_H +#define LLDB_SOURCE_UTILITY_LOONGARCH_DWARF_REGISTERS_H + +#include "lldb/lldb-private.h" + +namespace loongarch_dwarf { + +enum { + dwarf_gpr_r0 = 0, + dwarf_gpr_r1, + dwarf_gpr_r2, + dwarf_gpr_r3, + dwarf_gpr_r4, + dwarf_gpr_r5, + dwarf_gpr_r6, + dwarf_gpr_r7, + dwarf_gpr_r8, + dwarf_gpr_r9, + dwarf_gpr_r10, + dwarf_gpr_r11, + dwarf_gpr_r12, + dwarf_gpr_r13, + dwarf_gpr_r14, + dwarf_gpr_r15, + dwarf_gpr_r16, + dwarf_gpr_r17, + dwarf_gpr_r18, + dwarf_gpr_r19, + dwarf_gpr_r20, + dwarf_gpr_r21, + dwarf_gpr_r22, + dwarf_gpr_r23, + dwarf_gpr_r24, + dwarf_gpr_r25, + dwarf_gpr_r26, + dwarf_gpr_r27, + dwarf_gpr_r28, + dwarf_gpr_r29, + dwarf_gpr_r30, + dwarf_gpr_r31 = 31, + + dwarf_gpr_orig_a0, + dwarf_gpr_pc, + dwarf_gpr_badv, + + dwarf_gpr_reserved0 = 35, + dwarf_gpr_reserved1, + dwarf_gpr_reserved2, + dwarf_gpr_reserved3, + dwarf_gpr_reserved4, + dwarf_gpr_reserved5, + dwarf_gpr_reserved6, + dwarf_gpr_reserved7, + dwarf_gpr_reserved8, + dwarf_gpr_reserved9, + + dwarf_fpr_f0 = 45, + dwarf_fpr_f1, + dwarf_fpr_f2, + dwarf_fpr_f3, + dwarf_fpr_f4, + dwarf_fpr_f5, + dwarf_fpr_f6, + dwarf_fpr_f7, + dwarf_fpr_f8, + dwarf_fpr_f9, + dwarf_fpr_f10, + dwarf_fpr_f11, + dwarf_fpr_f12, + dwarf_fpr_f13, + dwarf_fpr_f14, + dwarf_fpr_f15, + dwarf_fpr_f16, + dwarf_fpr_f17, + dwarf_fpr_f18, + dwarf_fpr_f19, + dwarf_fpr_f20, + dwarf_fpr_f21, + dwarf_fpr_f22, + dwarf_fpr_f23, + dwarf_fpr_f24, + dwarf_fpr_f25, + dwarf_fpr_f26, + dwarf_fpr_f27, + dwarf_fpr_f28, + dwarf_fpr_f29, + dwarf_fpr_f30, + dwarf_fpr_f31 = 76, + + dwarf_fpr_fcc0, + dwarf_fpr_fcc1, + dwarf_fpr_fcc2, + dwarf_fpr_fcc3, + dwarf_fpr_fcc4, + dwarf_fpr_fcc5, + dwarf_fpr_fcc6, + dwarf_fpr_fcc7, + dwarf_fpr_fcsr, + + // register name alias + dwarf_gpr_zero = dwarf_gpr_r0, + dwarf_gpr_ra = dwarf_gpr_r1, + dwarf_gpr_tp = dwarf_gpr_r2, + dwarf_gpr_sp = dwarf_gpr_r3, + dwarf_gpr_a0 = dwarf_gpr_r4, + dwarf_gpr_a1 = dwarf_gpr_r5, + dwarf_gpr_a2 = dwarf_gpr_r6, + dwarf_gpr_a3 = dwarf_gpr_r7, + dwarf_gpr_a4 = dwarf_gpr_r8, + dwarf_gpr_a5 = dwarf_gpr_r9, + dwarf_gpr_a6 = dwarf_gpr_r10, + dwarf_gpr_a7 = dwarf_gpr_r11, + dwarf_gpr_t0 = dwarf_gpr_r12, + dwarf_gpr_t1 = dwarf_gpr_r13, + dwarf_gpr_t2 = dwarf_gpr_r14, + dwarf_gpr_t3 = dwarf_gpr_r15, + dwarf_gpr_t4 = dwarf_gpr_r16, + dwarf_gpr_t5 = dwarf_gpr_r17, + dwarf_gpr_t6 = dwarf_gpr_r18, + dwarf_gpr_t7 = dwarf_gpr_r19, + dwarf_gpr_t8 = dwarf_gpr_r20, + dwarf_gpr_fp = dwarf_gpr_r22, + dwarf_gpr_s0 = dwarf_gpr_r23, + dwarf_gpr_s1 = dwarf_gpr_r24, + dwarf_gpr_s2 = dwarf_gpr_r25, + dwarf_gpr_s3 = dwarf_gpr_r26, + dwarf_gpr_s4 = dwarf_gpr_r27, + dwarf_gpr_s5 = dwarf_gpr_r28, + dwarf_gpr_s6 = dwarf_gpr_r29, + dwarf_gpr_s7 = dwarf_gpr_r30, + dwarf_gpr_s8 = dwarf_gpr_r31, + + dwarf_fpr_fa0 = dwarf_fpr_f0, + dwarf_fpr_fa1 = dwarf_fpr_f1, + dwarf_fpr_fa2 = dwarf_fpr_f2, + dwarf_fpr_fa3 = dwarf_fpr_f3, + dwarf_fpr_fa4 = dwarf_fpr_f4, + dwarf_fpr_fa5 = dwarf_fpr_f5, + dwarf_fpr_fa6 = dwarf_fpr_f6, + dwarf_fpr_fa7 = dwarf_fpr_f7, + dwarf_fpr_ft0 = dwarf_fpr_f8, + dwarf_fpr_ft1 = dwarf_fpr_f9, + dwarf_fpr_ft2 = dwarf_fpr_f10, + dwarf_fpr_ft3 = dwarf_fpr_f11, + dwarf_fpr_ft4 = dwarf_fpr_f12, + dwarf_fpr_ft5 = dwarf_fpr_f13, + dwarf_fpr_ft6 = dwarf_fpr_f14, + dwarf_fpr_ft7 = dwarf_fpr_f15, + dwarf_fpr_ft8 = dwarf_fpr_f16, + dwarf_fpr_ft9 = dwarf_fpr_f17, + dwarf_fpr_ft10 = dwarf_fpr_f18, + dwarf_fpr_ft11 = dwarf_fpr_f19, + dwarf_fpr_ft12 = dwarf_fpr_f20, + dwarf_fpr_ft13 = dwarf_fpr_f21, + dwarf_fpr_ft14 = dwarf_fpr_f22, + dwarf_fpr_ft15 = dwarf_fpr_f23, + dwarf_fpr_fs0 = dwarf_fpr_f24, + dwarf_fpr_fs1 = dwarf_fpr_f25, + dwarf_fpr_fs2 = dwarf_fpr_f26, + dwarf_fpr_fs3 = dwarf_fpr_f27, + dwarf_fpr_fs4 = dwarf_fpr_f28, + dwarf_fpr_fs5 = dwarf_fpr_f29, + dwarf_fpr_fs6 = dwarf_fpr_f30, + dwarf_fpr_fs7 = dwarf_fpr_f31, +}; + +} // namespace loongarch_dwarf + +#endif // LLDB_SOURCE_UTILITY_LOONGARCH_DWARF_REGISTERS_H diff --git a/lldb/test/Shell/ObjectFile/ELF/loongarch-arch.yaml b/lldb/test/Shell/ObjectFile/ELF/loongarch-arch.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d57831ce5789e0676129ee995b7d00f9b06c3318 --- /dev/null +++ b/lldb/test/Shell/ObjectFile/ELF/loongarch-arch.yaml @@ -0,0 +1,24 @@ +# RUN: yaml2obj --docnum=1 %s > %t32 +# RUN: yaml2obj --docnum=2 %s > %t64 +# RUN: lldb-test object-file %t32 | FileCheck --check-prefix=CHECK-LA32 %s +# RUN: lldb-test object-file %t64 | FileCheck --check-prefix=CHECK-LA64 %s + +# CHECK-LA32: Architecture: loongarch32-- + +--- !ELF +FileHeader: + Class: ELFCLASS32 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_LOONGARCH +... + +# CHECK-LA64: Architecture: loongarch64-- + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_LOONGARCH +... diff --git a/lldb/test/Shell/ObjectFile/ELF/loongarch64-relocations.yaml b/lldb/test/Shell/ObjectFile/ELF/loongarch64-relocations.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1be63870343bb2340f191180fca18d869fb5d1cf --- /dev/null +++ b/lldb/test/Shell/ObjectFile/ELF/loongarch64-relocations.yaml @@ -0,0 +1,45 @@ +# RUN: yaml2obj %s -o %t +# RUN: lldb-test object-file -contents %t | FileCheck %s + +## Test that relocations are correctly applied to the .debug_info section on loongarch64. + +# CHECK: Name: .debug_info +# CHECK: Data: ( +## Before relocation: +## 0000: 00000000 00000000 00000000 +## After relocation: +# CHECK-NEXT: 0000: 34120000 78560000 00000000 +# CHECK-NEXT: ) + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_LOONGARCH +Sections: + - Name: .debug_str + Type: SHT_PROGBITS + - Name: .debug_info + Type: SHT_PROGBITS + Content: 000000000000000000000000 + - Name: .rela.debug_info + Type: SHT_RELA + Info: .debug_info + Relocations: + - Offset: 0x0000000000000000 + Symbol: .debug_str + Type: R_LARCH_32 + Addend: 0x1234 + - Offset: 0x0000000000000004 + Symbol: .debug_str + Type: R_LARCH_64 + Addend: 0x5678 +Symbols: + - Name: .debug_str + Type: STT_SECTION + Section: .debug_str + - Name: .debug_info + Type: STT_SECTION + Section: .debug_info +... diff --git a/lldb/tools/lldb-server/CMakeLists.txt b/lldb/tools/lldb-server/CMakeLists.txt index b7026ef121654f9d9c5791555e1df8b042777104..0a564fee6f109906bd2a12712c2c8e2c6fb21d37 100644 --- a/lldb/tools/lldb-server/CMakeLists.txt +++ b/lldb/tools/lldb-server/CMakeLists.txt @@ -51,6 +51,7 @@ add_lldb_tool(lldb-server lldbVersion ${LLDB_PLUGINS} lldbPluginInstructionARM + lldbPluginInstructionLoongArch lldbPluginInstructionMIPS lldbPluginInstructionMIPS64 ${LLDB_SYSTEM_LIBS} diff --git a/lldb/tools/lldb-server/SystemInitializerLLGS.cpp b/lldb/tools/lldb-server/SystemInitializerLLGS.cpp index b93e6b40dcd940a957ffdc2296940c1ce3022f20..581c43b722fcc1719c6fa75ba311a7a276c0d241 100644 --- a/lldb/tools/lldb-server/SystemInitializerLLGS.cpp +++ b/lldb/tools/lldb-server/SystemInitializerLLGS.cpp @@ -29,6 +29,11 @@ using HostObjectFile = ObjectFileELF; #include "Plugins/Instruction/ARM/EmulateInstructionARM.h" #endif +#if defined(__loongarch__) +#define LLDB_TARGET_LoongArch +#include "Plugins/Instruction/LoongArch/EmulateInstructionLoongArch.h" +#endif + #if defined(__mips64__) || defined(mips64) || defined(__mips64) || \ defined(__MIPS64__) || defined(_M_MIPS64) #define LLDB_TARGET_MIPS64 @@ -52,6 +57,9 @@ llvm::Error SystemInitializerLLGS::Initialize() { #if defined(LLDB_TARGET_ARM) || defined(LLDB_TARGET_ARM64) EmulateInstructionARM::Initialize(); #endif +#if defined(LLDB_TARGET_LoongArch) + EmulateInstructionLoongArch::Initialize(); +#endif #if defined(LLDB_TARGET_MIPS) || defined(LLDB_TARGET_MIPS64) EmulateInstructionMIPS::Initialize(); #endif @@ -68,6 +76,9 @@ void SystemInitializerLLGS::Terminate() { #if defined(LLDB_TARGET_ARM) || defined(LLDB_TARGET_ARM64) EmulateInstructionARM::Terminate(); #endif +#if defined(LLDB_TARGET_LoongArch) + EmulateInstructionLoongArch::Terminate(); +#endif #if defined(LLDB_TARGET_MIPS) || defined(LLDB_TARGET_MIPS64) EmulateInstructionMIPS::Terminate(); #endif diff --git a/lldb/unittests/Instruction/CMakeLists.txt b/lldb/unittests/Instruction/CMakeLists.txt index 63d8298310239a550154bf3b4cd6344399adf05c..aa1706630dae33d4b777e4589fde1e3e4be194e4 100644 --- a/lldb/unittests/Instruction/CMakeLists.txt +++ b/lldb/unittests/Instruction/CMakeLists.txt @@ -1,12 +1,13 @@ -if("ARM" IN_LIST LLVM_TARGETS_TO_BUILD) - add_lldb_unittest(EmulatorTests - TestAArch64Emulator.cpp - LINK_LIBS - lldbCore - lldbSymbol - lldbTarget - lldbPluginInstructionARM64 - LINK_COMPONENTS - Support - ${LLVM_TARGETS_TO_BUILD}) -endif() +add_lldb_unittest(EmulatorTests + TestAArch64Emulator.cpp + TestLoongArchEmulator.cpp + LINK_LIBS + lldbCore + lldbSymbol + lldbTarget + lldbPluginInstructionARM64 + lldbPluginInstructionLoongArch + LINK_COMPONENTS + Support + ${LLVM_TARGETS_TO_BUILD} +) diff --git a/lldb/unittests/Instruction/TestLoongArchEmulator.cpp b/lldb/unittests/Instruction/TestLoongArchEmulator.cpp new file mode 100644 index 0000000000000000000000000000000000000000..376af1f015905e9d68a87f1593706be0d1717319 --- /dev/null +++ b/lldb/unittests/Instruction/TestLoongArchEmulator.cpp @@ -0,0 +1,273 @@ +//===-- TestLoongArchEmulator.cpp -----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "lldb/Core/Address.h" +#include "lldb/Core/Disassembler.h" +#include "lldb/Core/PluginManager.h" +#include "lldb/Target/ExecutionContext.h" +#include "lldb/Utility/ArchSpec.h" +#include "lldb/Utility/RegisterValue.h" +#include "gtest/gtest.h" + +#include "Plugins/Instruction/LoongArch/EmulateInstructionLoongArch.h" +#include "Plugins/Process/Utility/RegisterInfoPOSIX_loongarch64.h" +#include "Plugins/Process/Utility/lldb-loongarch-register-enums.h" + +using namespace llvm; +using namespace lldb; +using namespace lldb_private; + +#define GEN_BCOND_TEST(bit, name, rj_val, rd_val_branched, rd_val_continued) \ + TEST_F(LoongArch##bit##EmulatorTester, test##name##branched) { \ + testBcondBranch(this, name, true, rj_val, rd_val_branched); \ + } \ + TEST_F(LoongArch##bit##EmulatorTester, test##name##continued) { \ + testBcondBranch(this, name, false, rj_val, rd_val_continued); \ + } + +#define GEN_BZCOND_TEST(bit, name, rj_val_branched, rj_val_continued) \ + TEST_F(LoongArch##bit##EmulatorTester, test##name##branched) { \ + testBZcondBranch(this, name, true, rj_val_branched); \ + } \ + TEST_F(LoongArch##bit##EmulatorTester, test##name##continued) { \ + testBZcondBranch(this, name, false, rj_val_continued); \ + } + +#define GEN_BCZCOND_TEST(bit, name, cj_val_branched, cj_val_continued) \ + TEST_F(LoongArch##bit##EmulatorTester, test##name##branched) { \ + testBCZcondBranch(this, name, true, cj_val_branched); \ + } \ + TEST_F(LoongArch##bit##EmulatorTester, test##name##continued) { \ + testBCZcondBranch(this, name, false, cj_val_continued); \ + } + +struct LoongArch64EmulatorTester : public EmulateInstructionLoongArch, + testing::Test { + RegisterInfoPOSIX_loongarch64::GPR gpr; + RegisterInfoPOSIX_loongarch64::FPR fpr; + + LoongArch64EmulatorTester( + std::string triple = "loongarch64-unknown-linux-gnu") + : EmulateInstructionLoongArch(ArchSpec(triple)) { + EmulateInstruction::SetReadRegCallback(ReadRegisterCallback); + EmulateInstruction::SetWriteRegCallback(WriteRegisterCallback); + } + + static bool ReadRegisterCallback(EmulateInstruction *instruction, void *baton, + const RegisterInfo *reg_info, + RegisterValue ®_value) { + LoongArch64EmulatorTester *tester = + (LoongArch64EmulatorTester *)instruction; + uint32_t reg = reg_info->kinds[eRegisterKindLLDB]; + if (reg >= gpr_r0_loongarch && reg <= gpr_r31_loongarch) + reg_value.SetUInt(tester->gpr.gpr[reg], reg_info->byte_size); + else if (reg == gpr_orig_a0_loongarch) + reg_value.SetUInt(tester->gpr.orig_a0, reg_info->byte_size); + else if (reg == gpr_pc_loongarch) + reg_value.SetUInt(tester->gpr.csr_era, reg_info->byte_size); + else if (reg == gpr_badv_loongarch) + reg_value.SetUInt(tester->gpr.csr_badv, reg_info->byte_size); + else if (reg == fpr_first_loongarch + 32) + // fcc0 + reg_value.SetUInt(tester->fpr.fcc, reg_info->byte_size); + return true; + } + + static bool WriteRegisterCallback(EmulateInstruction *instruction, + void *baton, const Context &context, + const RegisterInfo *reg_info, + const RegisterValue ®_value) { + LoongArch64EmulatorTester *tester = + (LoongArch64EmulatorTester *)instruction; + uint32_t reg = reg_info->kinds[eRegisterKindLLDB]; + if (reg >= gpr_r0_loongarch && reg <= gpr_r31_loongarch) + tester->gpr.gpr[reg] = reg_value.GetAsUInt64(); + else if (reg == gpr_orig_a0_loongarch) + tester->gpr.orig_a0 = reg_value.GetAsUInt64(); + else if (reg == gpr_pc_loongarch) + tester->gpr.csr_era = reg_value.GetAsUInt64(); + else if (reg == gpr_badv_loongarch) + tester->gpr.csr_badv = reg_value.GetAsUInt64(); + return true; + } +}; + +// BEQ BNE BLT BGE BLTU BGEU +static uint32_t EncodeBcondType(uint32_t opcode, uint32_t rj, uint32_t rd, + uint32_t offs16) { + offs16 = offs16 & 0x0000ffff; + return opcode << 26 | offs16 << 10 | rj << 5 | rd; +} + +static uint32_t BEQ(uint32_t rj, uint32_t rd, int32_t offs16) { + return EncodeBcondType(0b010110, rj, rd, uint32_t(offs16)); +} + +static uint32_t BNE(uint32_t rj, uint32_t rd, int32_t offs16) { + return EncodeBcondType(0b010111, rj, rd, uint32_t(offs16)); +} + +static uint32_t BLT(uint32_t rj, uint32_t rd, int32_t offs16) { + return EncodeBcondType(0b011000, rj, rd, uint32_t(offs16)); +} + +static uint32_t BGE(uint32_t rj, uint32_t rd, int32_t offs16) { + return EncodeBcondType(0b011001, rj, rd, uint32_t(offs16)); +} + +static uint32_t BLTU(uint32_t rj, uint32_t rd, int32_t offs16) { + return EncodeBcondType(0b011010, rj, rd, uint32_t(offs16)); +} + +static uint32_t BGEU(uint32_t rj, uint32_t rd, int32_t offs16) { + return EncodeBcondType(0b011011, rj, rd, uint32_t(offs16)); +} + +// BEQZ BNEZ +static uint32_t EncodeBZcondType(uint32_t opcode, uint32_t rj, + uint32_t offs21) { + uint32_t offs20_16 = (offs21 & 0x001f0000) >> 16; + uint32_t offs15_0 = offs21 & 0x0000ffff; + return opcode << 26 | offs15_0 << 10 | rj << 5 | offs20_16; +} + +static uint32_t BEQZ(uint32_t rj, int32_t offs21) { + return EncodeBZcondType(0b010000, rj, uint32_t(offs21)); +} + +static uint32_t BNEZ(uint32_t rj, int32_t offs21) { + return EncodeBZcondType(0b010001, rj, uint32_t(offs21)); +} + +// BCEQZ BCNEZ +static uint32_t EncodeBCZcondType(uint32_t opcode, uint8_t cj, + uint32_t offs21) { + uint32_t offs20_16 = (offs21 & 0x001f0000) >> 16; + uint32_t offs15_0 = offs21 & 0x0000ffff; + return (opcode >> 2) << 26 | offs15_0 << 10 | (opcode & 0b11) << 8 | cj << 5 | + offs20_16; +} + +static uint32_t BCEQZ(uint8_t cj, int32_t offs21) { + return EncodeBCZcondType(0b01001000, cj, uint32_t(offs21)); +} + +static uint32_t BCNEZ(uint8_t cj, int32_t offs21) { + return EncodeBCZcondType(0b01001001, cj, uint32_t(offs21)); +} + +using EncoderBcond = uint32_t (*)(uint32_t rj, uint32_t rd, int32_t offs16); +using EncoderBZcond = uint32_t (*)(uint32_t rj, int32_t offs21); +using EncoderBCZcond = uint32_t (*)(uint8_t cj, int32_t offs21); + +TEST_F(LoongArch64EmulatorTester, testJIRL) { + bool success = false; + addr_t old_pc = 0x12000600; + WritePC(old_pc); + // JIRL r1, r12, 0x10 + // | 31 26 | 25 15 | 9 5 | 4 0 | + // | 0 1 0 0 1 1 | 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 | 0 1 1 0 0 | 0 0 0 0 1 | + uint32_t inst = 0b01001100000000000100000110000001; + uint32_t offs16 = 0x10; + gpr.gpr[12] = 0x12000400; + ASSERT_TRUE(TestExecute(inst)); + auto r1 = gpr.gpr[1]; + auto pc = ReadPC(&success); + ASSERT_TRUE(success); + ASSERT_EQ(r1, old_pc + 4); + ASSERT_EQ(pc, gpr.gpr[12] + (offs16 * 4)); +} + +TEST_F(LoongArch64EmulatorTester, testB) { + bool success = false; + addr_t old_pc = 0x12000600; + WritePC(old_pc); + // B 0x10010 + // | 31 26 | 25 10 | 9 0 | + // | 0 1 0 1 0 0 | 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 | 0 0 0 0 0 0 0 0 0 1 | + uint32_t inst = 0b01010000000000000100000000000001; + uint32_t offs26 = 0x10010; + ASSERT_TRUE(TestExecute(inst)); + auto pc = ReadPC(&success); + ASSERT_TRUE(success); + ASSERT_EQ(pc, old_pc + (offs26 * 4)); +} + +TEST_F(LoongArch64EmulatorTester, testBL) { + bool success = false; + addr_t old_pc = 0x12000600; + WritePC(old_pc); + // BL 0x10010 + // | 31 26 | 25 10 | 9 0 | + // | 0 1 0 1 0 1 | 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 | 0 0 0 0 0 0 0 0 0 1 | + uint32_t inst = 0b01010100000000000100000000000001; + uint32_t offs26 = 0x10010; + ASSERT_TRUE(TestExecute(inst)); + auto r1 = gpr.gpr[1]; + auto pc = ReadPC(&success); + ASSERT_TRUE(success); + ASSERT_EQ(r1, old_pc + 4); + ASSERT_EQ(pc, old_pc + (offs26 * 4)); +} + +static void testBcondBranch(LoongArch64EmulatorTester *tester, + EncoderBcond encoder, bool branched, + uint64_t rj_val, uint64_t rd_val) { + bool success = false; + addr_t old_pc = 0x12000600; + tester->WritePC(old_pc); + tester->gpr.gpr[12] = rj_val; + tester->gpr.gpr[13] = rd_val; + // b r12, r13, (-256) + uint32_t inst = encoder(12, 13, -256); + ASSERT_TRUE(tester->TestExecute(inst)); + auto pc = tester->ReadPC(&success); + ASSERT_TRUE(success); + ASSERT_EQ(pc, old_pc + (branched ? (-256 * 4) : 4)); +} + +static void testBZcondBranch(LoongArch64EmulatorTester *tester, + EncoderBZcond encoder, bool branched, + uint64_t rj_val) { + bool success = false; + addr_t old_pc = 0x12000600; + tester->WritePC(old_pc); + tester->gpr.gpr[4] = rj_val; + // bz r4, (-256) + uint32_t inst = encoder(4, -256); + ASSERT_TRUE(tester->TestExecute(inst)); + auto pc = tester->ReadPC(&success); + ASSERT_TRUE(success); + ASSERT_EQ(pc, old_pc + (branched ? (-256 * 4) : 4)); +} + +static void testBCZcondBranch(LoongArch64EmulatorTester *tester, + EncoderBCZcond encoder, bool branched, + uint32_t cj_val) { + bool success = false; + addr_t old_pc = 0x12000600; + tester->WritePC(old_pc); + tester->fpr.fcc = cj_val; + // bcz fcc0, 256 + uint32_t inst = encoder(0, 256); + ASSERT_TRUE(tester->TestExecute(inst)); + auto pc = tester->ReadPC(&success); + ASSERT_TRUE(success); + ASSERT_EQ(pc, old_pc + (branched ? (256 * 4) : 4)); +} + +GEN_BCOND_TEST(64, BEQ, 1, 1, 0) +GEN_BCOND_TEST(64, BNE, 1, 0, 1) +GEN_BCOND_TEST(64, BLT, -2, 1, -3) +GEN_BCOND_TEST(64, BGE, -2, -3, 1) +GEN_BCOND_TEST(64, BLTU, -2, -1, 1) +GEN_BCOND_TEST(64, BGEU, -2, 1, -1) +GEN_BZCOND_TEST(64, BEQZ, 0, 1) +GEN_BZCOND_TEST(64, BNEZ, 1, 0) +GEN_BCZCOND_TEST(64, BCEQZ, 0, 1) +GEN_BCZCOND_TEST(64, BCNEZ, 1, 0) diff --git a/llvm-build/Makefile b/llvm-build/Makefile index ecb982228b0ecbb4d4dca3e65e1b2e9e62087551..b6b1bae7d9fcf1d6b0dcccf51467a2e9025d5026 100644 --- a/llvm-build/Makefile +++ b/llvm-build/Makefile @@ -83,6 +83,9 @@ else ifeq ($(ARCH),x86_64) ARCH_CFLAGS = else +ifeq ($(ARCH),loongarch64) +ARCH_CFLAGS = +else $(warning *** warning: ARCH $(ARCH) has not been tested yet, use with cautions!) ARCH_CFLAGS = endif @@ -103,6 +106,9 @@ else ifeq ($(ARCH),x86_64) CFLAGS = -march=x86-64 -O2 -Wall -fstack-protector-strong -D_FORTIFY_SOURCE=2 -Wl,-z,relro,-z,now,-z,noexecstack else +ifeq ($(ARCH),loongarch64) +CFLAGS = -march=loongarch64 -Wall -fstack-protector-strong -D_FORTIFY_SOURCE=2 -Wl,-z,relro,-z,now,-z,noexecstack +else CFLAGS = -march=armv7-a -O2 -Wall -fstack-protector-strong -D_FORTIFY_SOURCE=2 -Wl,-z,relro,-z,now,-z,noexecstack endif endif diff --git a/llvm-build/build.py b/llvm-build/build.py index ee12969b8e24e5aed27446f043eecf5cbe131618..0bc804ad9807f4a8190abacbde2e318d29108859 100755 --- a/llvm-build/build.py +++ b/llvm-build/build.py @@ -59,6 +59,7 @@ class BuildConfig(): self.no_build_riscv64 = args.skip_build or args.no_build_riscv64 self.no_build_mipsel = args.skip_build or args.no_build_mipsel self.no_build_x86_64 = args.skip_build or args.no_build_x86_64 + self.no_build_loongarch64 = args.skip_build or args.no_build_loongarch64 self.build_ncurses = args.build_ncurses self.build_libedit = args.build_libedit self.build_lldb_static = args.build_lldb_static @@ -182,6 +183,12 @@ class BuildConfig(): action='store_true', default=False, help='Omit build os target: x86_64.') + + parser.add_argument( + '--no-build-loongarch64', + action='store_true', + default=False, + help='Omit build os target: loongarch64.') parser.add_argument( '--no-lto', @@ -1165,7 +1172,6 @@ class SysrootComposer(BuildUtils): self.build_musl_libs(product_name, target_cpu, target_name, multi_lib_dir, sysroot_multi_lib_dir, ld_musl_lib, gn_args) - def install_linux_headers(self, arch, target): dir_suffix = arch if arch == 'x86_64': @@ -1295,7 +1301,8 @@ class LlvmLibs(BuildUtils): ('riscv64', self.open_ohos_triple('riscv64'), '', ''), ('mipsel', self.open_ohos_triple('mipsel'), '', ''), ('mipsel', self.open_ohos_triple('mipsel'), '-mnan=legacy', 'nanlegacy'), - ('x86_64', self.open_ohos_triple('x86_64'), '', ''),] + ('x86_64', self.open_ohos_triple('x86_64'), '', ''), + ('loongarch64', self.open_ohos_triple('loongarch64'), '', '')] cc = os.path.join(llvm_install, 'bin', 'clang') cxx = os.path.join(llvm_install, 'bin', 'clang++') @@ -1329,7 +1336,8 @@ class LlvmLibs(BuildUtils): llvm_path = self.merge_out_path('llvm_make') arch_list = [self.liteos_triple('arm'), self.open_ohos_triple('arm'), self.open_ohos_triple('aarch64'), self.open_ohos_triple('riscv64'), - self.open_ohos_triple('mipsel'), self.open_ohos_triple('x86_64')] + self.open_ohos_triple('mipsel'), self.open_ohos_triple('x86_64'), + self.open_ohos_triple('loongarch64')] libcxx_ndk_install = self.merge_out_path('libcxx-ndk') self.check_create_dir(libcxx_ndk_install) diff --git a/llvm-build/build_musl.sh b/llvm-build/build_musl.sh index 7f7f44c774e2cb65e738a34608d8acec63f90bc3..39a0d23ac89363cfec0565bc91b237dc8e22311f 100755 --- a/llvm-build/build_musl.sh +++ b/llvm-build/build_musl.sh @@ -87,6 +87,9 @@ elif [ $TARGET_TRIPLE == "riscv64-linux-ohos" ]; then elif [ $TARGET_TRIPLE == "x86_64-linux-ohos" ]; then TARGET_USER="linux_user" TARGETS_PREFIX="x86_64" +elif [ $TARGET_TRIPLE == "loongarch64-linux-ohos" ]; then + TARGET_USER="linux_user" + TARGETS_PREFIX="loongarch64" else TARGET_USER="linux_user" TARGETS_PREFIX="aarch64" @@ -109,7 +112,7 @@ make musl_header_install_for_${TARGET_USER} CLANG="${CLANG_BIN_ROOT}/clang" TOPD # build musl_libs if ((make_libs == 1)); then if [ $TARGET_TRIPLE == "aarch64-linux-ohos" ] || [ $TARGET_TRIPLE == "riscv64-linux-ohos" ] || \ - [ $TARGET_TRIPLE == "x86_64-linux-ohos" ]; then + [ $TARGET_TRIPLE == "x86_64-linux-ohos" ] || [ $TARGET_TRIPLE == "loongarch64-linux-ohos" ]; then make CLANG="${CLANG_BIN_ROOT}/clang" TOPDIR=${TOPDIR} SYSROOTDIR=${OUT}/sysroot MUSLCOPYDIR=${OUT}/musl_build \ TARGETS=${TARGET_USER} TARGET=${TARGET_TRIPLE} ARCH=${TARGETS_PREFIX} -f Makefile else diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt index 34c1f19a9f95a1866b5778f079893b548f6927eb..90a31d2877c93eda255cee8a00426a32b094eebb 100644 --- a/llvm/CMakeLists.txt +++ b/llvm/CMakeLists.txt @@ -373,6 +373,7 @@ set(LLVM_ALL_TARGETS BPF Hexagon Lanai + LoongArch Mips MSP430 NVPTX diff --git a/llvm/cmake/config-ix.cmake b/llvm/cmake/config-ix.cmake index 83512760d8dde240618647465b543f54d8dc55e2..6beb25017ee502aaa7f7b2fa263e66e49ebf1538 100644 --- a/llvm/cmake/config-ix.cmake +++ b/llvm/cmake/config-ix.cmake @@ -498,6 +498,8 @@ elseif (LLVM_NATIVE_ARCH MATCHES "riscv64") set(LLVM_NATIVE_ARCH RISCV) elseif (LLVM_NATIVE_ARCH STREQUAL "m68k") set(LLVM_NATIVE_ARCH M68k) +elseif (LLVM_NATIVE_ARCH MATCHES "loongarch") + set(LLVM_NATIVE_ARCH LoongArch) else () message(FATAL_ERROR "Unknown architecture ${LLVM_NATIVE_ARCH}") endif () diff --git a/llvm/cmake/config.guess b/llvm/cmake/config.guess index 60d3f588d6f7e8b341b47b7b379a6b5be299b4b6..71abbf939f97f44425d1aea93d457d7b7ac41045 100644 --- a/llvm/cmake/config.guess +++ b/llvm/cmake/config.guess @@ -922,6 +922,30 @@ EOF ia64:Linux:*:*) echo ${UNAME_MACHINE}-unknown-linux-gnu exit ;; + loongarch32:Linux:*:* | loongarch64:Linux:*:* | loongarchx32:Linux:*:*) + LIBC=gnu + eval $set_cc_for_build + # Do not check for __GLIBC__ because uclibc defines it too + sed 's/^ //' << EOF >$dummy.c + #include + #if defined(__UCLIBC__) + LIBC=uclibc + #elif defined(__dietlibc__) + LIBC=dietlibc + #endif +EOF + eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^LIBC'` + + # There is no features test macro for musl + # Follow the GNU's config.guess approach of + # checking the output of ldd + if command -v ldd >/dev/null && \ + ldd --version 2>&1 | grep -q ^musl; then + LIBC=musl + fi + + echo "${UNAME_MACHINE}-unknown-linux-${LIBC}" + exit ;; m32r*:Linux:*:*) echo ${UNAME_MACHINE}-unknown-linux-gnu exit ;; diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index e3cc48d7e9ca8ae1767c52678b2ce518da390580..53de21837e4653771eb05c601444b60953859d80 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -4929,13 +4929,30 @@ ARM's Thumb1 mode: - ``x``: A 32, 64, or 128-bit floating-point/SIMD register in the ranges ``s0-s15``, ``d0-d7``, or ``q0-q3``, respectively. - Hexagon: - ``o``, ``v``: A memory address operand, treated the same as constraint ``m``, at the moment. - ``r``: A 32 or 64-bit register. +LoongArch: + +- ``f``: A floating-point register (if available). +- ``k``: A memory operand whose address is formed by a base register and + (optionally scaled) index register. +- ``l``: A signed 16-bit constant. +- ``m``: A memory operand whose address is formed by a base register and + offset that is suitable for use in instructions with the same addressing + mode as st.w and ld.w. +- ``I``: A signed 12-bit constant (for arithmetic instructions). +- ``J``: An immediate integer zero. +- ``K``: An unsigned 12-bit constant (for logic instructions). +- ``ZB``: An address that is held in a general-purpose register. The offset + is zero. +- ``ZC``: A memory operand whose address is formed by a base register and + offset that is suitable for use in instructions with the same addressing + mode as ll.w and sc.w. + MSP430: - ``r``: An 8 or 16-bit register. @@ -5160,6 +5177,10 @@ Hexagon: - ``I``: Print the letter 'i' if the operand is an integer constant, otherwise nothing. Used to print 'addi' vs 'add' instructions. +LoongArch: + +- ``z``: Print $zero register if operand is zero, otherwise print it normally. + MSP430: No additional modifiers. diff --git a/llvm/include/llvm/ADT/Triple.h b/llvm/include/llvm/ADT/Triple.h index a021f862c4dbd7c79c02236aaaadbbe2731d61ba..99df3714da2b83c5a4002384637091b01c7c9f39 100644 --- a/llvm/include/llvm/ADT/Triple.h +++ b/llvm/include/llvm/ADT/Triple.h @@ -232,6 +232,9 @@ public: GNUABI64, GNUEABI, GNUEABIHF, + GNUF32, + GNUF64, + GNUSF, GNUX32, GNUILP32, CODE16, @@ -558,7 +561,9 @@ public: EnvironmentType Env = getEnvironment(); return Env == Triple::GNU || Env == Triple::GNUABIN32 || Env == Triple::GNUABI64 || Env == Triple::GNUEABI || - Env == Triple::GNUEABIHF || Env == Triple::GNUX32; + Env == Triple::GNUEABIHF || Env == Triple::GNUF32 || + Env == Triple::GNUF64 || Env == Triple::GNUSF || + Env == Triple::GNUX32; } bool isOSContiki() const { @@ -837,10 +842,14 @@ public: : PointerWidth == 64; } + /// Tests whether the target is 32-bit LoongArch. + bool isLoongArch32() const { return getArch() == Triple::loongarch32; } + + /// Tests whether the target is 64-bit LoongArch. + bool isLoongArch64() const { return getArch() == Triple::loongarch64; } + /// Tests whether the target is LoongArch (32- and 64-bit). - bool isLoongArch() const { - return getArch() == Triple::loongarch32 || getArch() == Triple::loongarch64; - } + bool isLoongArch() const { return isLoongArch32() || isLoongArch64(); } /// Tests whether the target is MIPS 32-bit (little and big endian). bool isMIPS32() const { diff --git a/llvm/include/llvm/BinaryFormat/ELF.h b/llvm/include/llvm/BinaryFormat/ELF.h index 234c946b2014b812c789004bf2c163b594371382..d531052a42b0279231095e7eaec0efd24da91038 100644 --- a/llvm/include/llvm/BinaryFormat/ELF.h +++ b/llvm/include/llvm/BinaryFormat/ELF.h @@ -911,22 +911,20 @@ enum { // LoongArch Specific e_flags enum : unsigned { - // Reference: https://github.com/loongson/LoongArch-Documentation. - // The last commit hash (main branch) is - // 99016636af64d02dee05e39974d4c1e55875c45b. - // Note that there is an open PR - // https://github.com/loongson/LoongArch-Documentation/pull/47 - // talking about using 0x1, 0x2, 0x3 for ILP32S/F/D and use EI_CLASS to - // distinguish LP64 and ILP32. If this PR get merged, we will update - // the definition here. - // Base ABI Types. - EF_LOONGARCH_BASE_ABI_LP64S = 0x1, // LP64 soft-float ABI - EF_LOONGARCH_BASE_ABI_LP64F = 0x2, // LP64 single-float ABI - EF_LOONGARCH_BASE_ABI_LP64D = 0x3, // LP64 double-float ABI - EF_LOONGARCH_BASE_ABI_ILP32S = 0x5, // ILP32 soft-float ABI - EF_LOONGARCH_BASE_ABI_ILP32F = 0x6, // ILP32 single-float ABI - EF_LOONGARCH_BASE_ABI_ILP32D = 0x7, // ILP32 double-float ABI - EF_LOONGARCH_BASE_ABI_MASK = 0x7, // Mask for selecting base ABI + // Definitions from LoongArch ELF psABI v2.01. + // Reference: https://github.com/loongson/LoongArch-Documentation + // (commit hash 296de4def055c871809068e0816325a4ac04eb12) + + // Base ABI Modifiers + EF_LOONGARCH_ABI_SOFT_FLOAT = 0x1, + EF_LOONGARCH_ABI_SINGLE_FLOAT = 0x2, + EF_LOONGARCH_ABI_DOUBLE_FLOAT = 0x3, + EF_LOONGARCH_ABI_MODIFIER_MASK = 0x7, + + // Object file ABI versions + EF_LOONGARCH_OBJABI_V0 = 0x0, + EF_LOONGARCH_OBJABI_V1 = 0x40, + EF_LOONGARCH_OBJABI_MASK = 0xC0, }; // ELF Relocation types for LoongArch diff --git a/llvm/include/llvm/BinaryFormat/ELFRelocs/LoongArch.def b/llvm/include/llvm/BinaryFormat/ELFRelocs/LoongArch.def index 8cbfe2fe4235bf1b7fa9de80b8760a89312f6c1f..02bce3c71712743cb2951b0df9a6c304dcd54c32 100644 --- a/llvm/include/llvm/BinaryFormat/ELFRelocs/LoongArch.def +++ b/llvm/include/llvm/BinaryFormat/ELFRelocs/LoongArch.def @@ -60,3 +60,61 @@ ELF_RELOC(R_LARCH_SUB32, 55) ELF_RELOC(R_LARCH_SUB64, 56) ELF_RELOC(R_LARCH_GNU_VTINHERIT, 57) ELF_RELOC(R_LARCH_GNU_VTENTRY, 58) + +// Relocs whose processing do not require a stack machine. +// +// Spec addition: https://github.com/loongson/LoongArch-Documentation/pull/57 +// Binutils commit 6d13722a97cee3fd397e116bde3bcedbb1e220be +// and commit 9801120721c3a702ce3bd50433ef920f92a83502 +ELF_RELOC(R_LARCH_B16, 64) +ELF_RELOC(R_LARCH_B21, 65) +ELF_RELOC(R_LARCH_B26, 66) +ELF_RELOC(R_LARCH_ABS_HI20, 67) +ELF_RELOC(R_LARCH_ABS_LO12, 68) +ELF_RELOC(R_LARCH_ABS64_LO20, 69) +ELF_RELOC(R_LARCH_ABS64_HI12, 70) +ELF_RELOC(R_LARCH_PCALA_HI20, 71) +ELF_RELOC(R_LARCH_PCALA_LO12, 72) +ELF_RELOC(R_LARCH_PCALA64_LO20, 73) +ELF_RELOC(R_LARCH_PCALA64_HI12, 74) +ELF_RELOC(R_LARCH_GOT_PC_HI20, 75) +ELF_RELOC(R_LARCH_GOT_PC_LO12, 76) +ELF_RELOC(R_LARCH_GOT64_PC_LO20, 77) +ELF_RELOC(R_LARCH_GOT64_PC_HI12, 78) +ELF_RELOC(R_LARCH_GOT_HI20, 79) +ELF_RELOC(R_LARCH_GOT_LO12, 80) +ELF_RELOC(R_LARCH_GOT64_LO20, 81) +ELF_RELOC(R_LARCH_GOT64_HI12, 82) +ELF_RELOC(R_LARCH_TLS_LE_HI20, 83) +ELF_RELOC(R_LARCH_TLS_LE_LO12, 84) +ELF_RELOC(R_LARCH_TLS_LE64_LO20, 85) +ELF_RELOC(R_LARCH_TLS_LE64_HI12, 86) +ELF_RELOC(R_LARCH_TLS_IE_PC_HI20, 87) +ELF_RELOC(R_LARCH_TLS_IE_PC_LO12, 88) +ELF_RELOC(R_LARCH_TLS_IE64_PC_LO20, 89) +ELF_RELOC(R_LARCH_TLS_IE64_PC_HI12, 90) +ELF_RELOC(R_LARCH_TLS_IE_HI20, 91) +ELF_RELOC(R_LARCH_TLS_IE_LO12, 92) +ELF_RELOC(R_LARCH_TLS_IE64_LO20, 93) +ELF_RELOC(R_LARCH_TLS_IE64_HI12, 94) +ELF_RELOC(R_LARCH_TLS_LD_PC_HI20, 95) +ELF_RELOC(R_LARCH_TLS_LD_HI20, 96) +ELF_RELOC(R_LARCH_TLS_GD_PC_HI20, 97) +ELF_RELOC(R_LARCH_TLS_GD_HI20, 98) +ELF_RELOC(R_LARCH_32_PCREL, 99) +ELF_RELOC(R_LARCH_RELAX, 100) + +// Relocs added in ELF for the LoongArchâ„¢ Architecture v20230519, part of the +// v2.10 LoongArch ABI specs. +// +// Spec addition: https://github.com/loongson/la-abi-specs/pull/1 +// Binutils commit 57a930e3bfe4b2c7fd6463ed39311e1938513138 +ELF_RELOC(R_LARCH_DELETE, 101) +ELF_RELOC(R_LARCH_ALIGN, 102) +ELF_RELOC(R_LARCH_PCREL20_S2, 103) +ELF_RELOC(R_LARCH_CFA, 104) +ELF_RELOC(R_LARCH_ADD6, 105) +ELF_RELOC(R_LARCH_SUB6, 106) +ELF_RELOC(R_LARCH_ADD_ULEB128, 107) +ELF_RELOC(R_LARCH_SUB_ULEB128, 108) +ELF_RELOC(R_LARCH_64_PCREL, 109) diff --git a/llvm/include/llvm/Demangle/ItaniumDemangle.h b/llvm/include/llvm/Demangle/ItaniumDemangle.h index 6d4f6222af4478b7b00d48dcbc2908e92cc19570..226601b204c5434fe53e803c25a436005065d9d7 100644 --- a/llvm/include/llvm/Demangle/ItaniumDemangle.h +++ b/llvm/include/llvm/Demangle/ItaniumDemangle.h @@ -5099,7 +5099,7 @@ template <> struct FloatData { #if defined(__mips__) && defined(__mips_n64) || defined(__aarch64__) || \ - defined(__wasm__) || defined(__riscv) + defined(__wasm__) || defined(__riscv) || defined(__loongarch__) static const size_t mangled_size = 32; #elif defined(__arm__) || defined(__mips__) || defined(__hexagon__) static const size_t mangled_size = 16; diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/ELF_loongarch.h b/llvm/include/llvm/ExecutionEngine/JITLink/ELF_loongarch.h new file mode 100644 index 0000000000000000000000000000000000000000..4d7655c4b988b74de745103f8f7d469762cf9290 --- /dev/null +++ b/llvm/include/llvm/ExecutionEngine/JITLink/ELF_loongarch.h @@ -0,0 +1,39 @@ +//===-- ELF_loongarch.h - JIT link functions for ELF/loongarch -*- C++ -*--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +//===----------------------------------------------------------------------===// +// +// jit-link functions for ELF/loongarch. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EXECUTIONENGINE_JITLINK_ELF_LOONGARCH_H +#define LLVM_EXECUTIONENGINE_JITLINK_ELF_LOONGARCH_H + +#include "llvm/ExecutionEngine/JITLink/JITLink.h" + +namespace llvm { +namespace jitlink { + +/// Create a LinkGraph from an ELF/loongarch relocatable object +/// +/// Note: The graph does not take ownership of the underlying buffer, nor copy +/// its contents. The caller is responsible for ensuring that the object buffer +/// outlives the graph. +Expected> +createLinkGraphFromELFObject_loongarch(MemoryBufferRef ObjectBuffer); + +/// jit-link the given object buffer, which must be an ELF loongarch object +/// file. +void link_ELF_loongarch(std::unique_ptr G, + std::unique_ptr Ctx); + +} // end namespace jitlink +} // end namespace llvm + +#endif // LLVM_EXECUTIONENGINE_JITLINK_ELF_LOONGARCH_H diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/loongarch.h b/llvm/include/llvm/ExecutionEngine/JITLink/loongarch.h new file mode 100644 index 0000000000000000000000000000000000000000..fc98c7ee4818e15abd20a2be3051d53efbac1ff0 --- /dev/null +++ b/llvm/include/llvm/ExecutionEngine/JITLink/loongarch.h @@ -0,0 +1,399 @@ +//= loongarch.h - Generic JITLink loongarch edge kinds, utilities -*- C++ -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Generic utilities for graphs representing loongarch objects. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EXECUTIONENGINE_JITLINK_LOONGARCH_H +#define LLVM_EXECUTIONENGINE_JITLINK_LOONGARCH_H + +#include "TableManager.h" +#include "llvm/ExecutionEngine/JITLink/JITLink.h" +#include "llvm/ExecutionEngine/JITLink/MemoryFlags.h" + +namespace llvm { +namespace jitlink { +namespace loongarch { + +/// Represents loongarch fixups. +enum EdgeKind_loongarch : Edge::Kind { + /// A plain 64-bit pointer value relocation. + /// + /// Fixup expression: + /// Fixup <- Target + Addend : uint64 + /// + Pointer64 = Edge::FirstRelocation, + + /// A plain 32-bit pointer value relocation. + /// + /// Fixup expression: + /// Fixup <- Target + Addend : uint32 + /// + /// Errors: + /// - The target must reside in the low 32-bits of the address space, + /// otherwise an out-of-range error will be returned. + /// + Pointer32, + + /// A 26-bit PC-relative branch. + /// + /// Represents a PC-relative call or branch to a target within +/-128Mb. The + /// target must be 4-byte aligned. + /// + /// Fixup expression: + /// Fixup <- (Target - Fixup + Addend) >> 2 : int26 + /// + /// Notes: + /// The '26' in the name refers to the number operand bits and follows the + /// naming convention used by the corresponding ELF relocations. Since the low + /// two bits must be zero (because of the 4-byte alignment of the target) the + /// operand is effectively a signed 28-bit number. + /// + /// Errors: + /// - The result of the unshifted part of the fixup expression must be + /// 4-byte aligned otherwise an alignment error will be returned. + /// - The result of the fixup expression must fit into an int26 otherwise an + /// out-of-range error will be returned. + /// + Branch26PCRel, + + /// A 32-bit delta. + /// + /// Delta from the fixup to the target. + /// + /// Fixup expression: + /// Fixup <- Target - Fixup + Addend : int32 + /// + /// Errors: + /// - The result of the fixup expression must fit into an int32, otherwise + /// an out-of-range error will be returned. + /// + Delta32, + + /// A 32-bit negative delta. + /// + /// Delta from the target back to the fixup. + /// + /// Fixup expression: + /// Fixup <- Fixup - Target + Addend : int32 + /// + /// Errors: + /// - The result of the fixup expression must fit into an int32, otherwise + /// an out-of-range error will be returned. + /// + NegDelta32, + + /// A 64-bit delta. + /// + /// Delta from the fixup to the target. + /// + /// Fixup expression: + /// Fixup <- Target - Fixup + Addend : int64 + /// + Delta64, + + /// The signed 20-bit delta from the fixup page to the page containing the + /// target. + /// + /// Fixup expression: + /// Fixup <- (((Target + Addend + ((Target + Addend) & 0x800)) & ~0xfff) + // - (Fixup & ~0xfff)) >> 12 : int20 + /// + /// Notes: + /// For PCALAU12I fixups. + /// + /// Errors: + /// - The result of the fixup expression must fit into an int20 otherwise an + /// out-of-range error will be returned. + /// + Page20, + + /// The 12-bit offset of the target within its page. + /// + /// Typically used to fix up ADDI/LD_W/LD_D immediates. + /// + /// Fixup expression: + /// Fixup <- ((Target + Addend) >> Shift) & 0xfff : int12 + /// + PageOffset12, + + /// A GOT entry getter/constructor, transformed to Page20 pointing at the GOT + /// entry for the original target. + /// + /// Indicates that this edge should be transformed into a Page20 targeting + /// the GOT entry for the edge's current target, maintaining the same addend. + /// A GOT entry for the target should be created if one does not already + /// exist. + /// + /// Edges of this kind are usually handled by a GOT/PLT builder pass inserted + /// by default. + /// + /// Fixup expression: + /// NONE + /// + /// Errors: + /// - *ASSERTION* Failure to handle edges of this kind prior to the fixup + /// phase will result in an assert/unreachable during the fixup phase. + /// + RequestGOTAndTransformToPage20, + + /// A GOT entry getter/constructor, transformed to Pageoffset12 pointing at + /// the GOT entry for the original target. + /// + /// Indicates that this edge should be transformed into a PageOffset12 + /// targeting the GOT entry for the edge's current target, maintaining the + /// same addend. A GOT entry for the target should be created if one does not + /// already exist. + /// + /// Edges of this kind are usually handled by a GOT/PLT builder pass inserted + /// by default. + /// + /// Fixup expression: + /// NONE + /// + RequestGOTAndTransformToPageOffset12, +}; + +/// Returns a string name for the given loongarch edge. For debugging purposes +/// only. +const char *getEdgeKindName(Edge::Kind K); + +// Returns extract bits Val[Hi:Lo]. +inline uint32_t extractBits(uint32_t Val, unsigned Hi, unsigned Lo) { + return (Val & (((1UL << (Hi + 1)) - 1))) >> Lo; +} + +/// Apply fixup expression for edge to block content. +inline Error applyFixup(LinkGraph &G, Block &B, const Edge &E) { + using namespace support; + + char *BlockWorkingMem = B.getAlreadyMutableContent().data(); + char *FixupPtr = BlockWorkingMem + E.getOffset(); + uint64_t FixupAddress = (B.getAddress() + E.getOffset()).getValue(); + uint64_t TargetAddress = E.getTarget().getAddress().getValue(); + int64_t Addend = E.getAddend(); + + switch (E.getKind()) { + case Pointer64: + *(ulittle64_t *)FixupPtr = TargetAddress + Addend; + break; + case Pointer32: { + uint64_t Value = TargetAddress + Addend; + if (Value > std::numeric_limits::max()) + return makeTargetOutOfRangeError(G, B, E); + *(ulittle32_t *)FixupPtr = Value; + break; + } + case Branch26PCRel: { + int64_t Value = TargetAddress - FixupAddress + Addend; + + if (!isInt<28>(Value)) + return makeTargetOutOfRangeError(G, B, E); + + if (!isShiftedInt<26, 2>(Value)) + return makeAlignmentError(orc::ExecutorAddr(FixupAddress), Value, 4, E); + + uint32_t RawInstr = *(little32_t *)FixupPtr; + uint32_t Imm = static_cast(Value >> 2); + uint32_t Imm15_0 = extractBits(Imm, /*Hi=*/15, /*Lo=*/0) << 10; + uint32_t Imm25_16 = extractBits(Imm, /*Hi=*/25, /*Lo=*/16); + *(little32_t *)FixupPtr = RawInstr | Imm15_0 | Imm25_16; + break; + } + case Delta32: { + int64_t Value = TargetAddress - FixupAddress + Addend; + + if (!isInt<32>(Value)) + return makeTargetOutOfRangeError(G, B, E); + *(little32_t *)FixupPtr = Value; + break; + } + case NegDelta32: { + int64_t Value = FixupAddress - TargetAddress + Addend; + if (!isInt<32>(Value)) + return makeTargetOutOfRangeError(G, B, E); + *(little32_t *)FixupPtr = Value; + break; + } + case Delta64: + *(little64_t *)FixupPtr = TargetAddress - FixupAddress + Addend; + break; + case Page20: { + uint64_t Target = TargetAddress + Addend; + uint64_t TargetPage = + (Target + (Target & 0x800)) & ~static_cast(0xfff); + uint64_t PCPage = FixupAddress & ~static_cast(0xfff); + + int64_t PageDelta = TargetPage - PCPage; + if (!isInt<32>(PageDelta)) + return makeTargetOutOfRangeError(G, B, E); + + uint32_t RawInstr = *(little32_t *)FixupPtr; + uint32_t Imm31_12 = extractBits(PageDelta, /*Hi=*/31, /*Lo=*/12) << 5; + *(little32_t *)FixupPtr = RawInstr | Imm31_12; + break; + } + case PageOffset12: { + uint64_t TargetOffset = (TargetAddress + Addend) & 0xfff; + + uint32_t RawInstr = *(ulittle32_t *)FixupPtr; + uint32_t Imm11_0 = TargetOffset << 10; + *(ulittle32_t *)FixupPtr = RawInstr | Imm11_0; + break; + } + default: + return make_error( + "In graph " + G.getName() + ", section " + B.getSection().getName() + + " unsupported edge kind " + getEdgeKindName(E.getKind())); + } + + return Error::success(); +} + +/// loongarch null pointer content. +extern const char NullPointerContent[8]; +inline ArrayRef getGOTEntryBlockContent(LinkGraph &G) { + return {reinterpret_cast(NullPointerContent), + G.getPointerSize()}; +} + +/// loongarch stub content. +/// +/// Contains the instruction sequence for an indirect jump via an in-memory +/// pointer: +/// pcalau12i $t8, %page20(ptr) +/// ld.[w/d] $t8, %pageoff12(ptr) +/// jr $t8 +constexpr size_t StubEntrySize = 12; +extern const uint8_t LA64StubContent[StubEntrySize]; +extern const uint8_t LA32StubContent[StubEntrySize]; +inline ArrayRef getStubBlockContent(LinkGraph &G) { + auto StubContent = + G.getPointerSize() == 8 ? LA64StubContent : LA32StubContent; + return {reinterpret_cast(StubContent), StubEntrySize}; +} + +/// Creates a new pointer block in the given section and returns an +/// Anonymous symobl pointing to it. +/// +/// If InitialTarget is given then an Pointer64 relocation will be added to the +/// block pointing at InitialTarget. +/// +/// The pointer block will have the following default values: +/// alignment: PointerSize +/// alignment-offset: 0 +inline Symbol &createAnonymousPointer(LinkGraph &G, Section &PointerSection, + Symbol *InitialTarget = nullptr, + uint64_t InitialAddend = 0) { + auto &B = G.createContentBlock(PointerSection, getGOTEntryBlockContent(G), + orc::ExecutorAddr(), G.getPointerSize(), 0); + if (InitialTarget) + B.addEdge(G.getPointerSize() == 8 ? Pointer64 : Pointer32, 0, + *InitialTarget, InitialAddend); + return G.addAnonymousSymbol(B, 0, G.getPointerSize(), false, false); +} + +/// Create a jump stub that jumps via the pointer at the given symbol and +/// an anonymous symbol pointing to it. Return the anonymous symbol. +inline Symbol &createAnonymousPointerJumpStub(LinkGraph &G, + Section &StubSection, + Symbol &PointerSymbol) { + Block &StubContentBlock = G.createContentBlock( + StubSection, getStubBlockContent(G), orc::ExecutorAddr(), 4, 0); + StubContentBlock.addEdge(Page20, 0, PointerSymbol, 0); + StubContentBlock.addEdge(PageOffset12, 4, PointerSymbol, 0); + return G.addAnonymousSymbol(StubContentBlock, 0, StubEntrySize, true, false); +} + +/// Global Offset Table Builder. +class GOTTableManager : public TableManager { +public: + static StringRef getSectionName() { return "$__GOT"; } + + bool visitEdge(LinkGraph &G, Block *B, Edge &E) { + Edge::Kind KindToSet = Edge::Invalid; + switch (E.getKind()) { + case RequestGOTAndTransformToPage20: + KindToSet = Page20; + break; + case RequestGOTAndTransformToPageOffset12: + KindToSet = PageOffset12; + break; + default: + return false; + } + assert(KindToSet != Edge::Invalid && + "Fell through switch, but no new kind to set"); + DEBUG_WITH_TYPE("jitlink", { + dbgs() << " Fixing " << G.getEdgeKindName(E.getKind()) << " edge at " + << B->getFixupAddress(E) << " (" << B->getAddress() << " + " + << formatv("{0:x}", E.getOffset()) << ")\n"; + }); + E.setKind(KindToSet); + E.setTarget(getEntryForTarget(G, E.getTarget())); + return true; + } + + Symbol &createEntry(LinkGraph &G, Symbol &Target) { + return createAnonymousPointer(G, getGOTSection(G), &Target); + } + +private: + Section &getGOTSection(LinkGraph &G) { + if (!GOTSection) + GOTSection = + &G.createSection(getSectionName(), MemProt::Read | MemProt::Exec); + return *GOTSection; + } + + Section *GOTSection = nullptr; +}; + +/// Procedure Linkage Table Builder. +class PLTTableManager : public TableManager { +public: + PLTTableManager(GOTTableManager &GOT) : GOT(GOT) {} + + static StringRef getSectionName() { return "$__STUBS"; } + + bool visitEdge(LinkGraph &G, Block *B, Edge &E) { + if (E.getKind() == Branch26PCRel && !E.getTarget().isDefined()) { + DEBUG_WITH_TYPE("jitlink", { + dbgs() << " Fixing " << G.getEdgeKindName(E.getKind()) << " edge at " + << B->getFixupAddress(E) << " (" << B->getAddress() << " + " + << formatv("{0:x}", E.getOffset()) << ")\n"; + }); + E.setTarget(getEntryForTarget(G, E.getTarget())); + return true; + } + return false; + } + + Symbol &createEntry(LinkGraph &G, Symbol &Target) { + return createAnonymousPointerJumpStub(G, getStubsSection(G), + GOT.getEntryForTarget(G, Target)); + } + +public: + Section &getStubsSection(LinkGraph &G) { + if (!StubsSection) + StubsSection = + &G.createSection(getSectionName(), MemProt::Read | MemProt::Exec); + return *StubsSection; + } + + GOTTableManager &GOT; + Section *StubsSection = nullptr; +}; + +} // namespace loongarch +} // namespace jitlink +} // namespace llvm + +#endif diff --git a/llvm/include/llvm/ExecutionEngine/Orc/OrcABISupport.h b/llvm/include/llvm/ExecutionEngine/Orc/OrcABISupport.h index c5c2780bc9ee5f96593d9a12785a45d5ea23d35b..3048547912788b7602229f46fcf1f876daa1cb1b 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/OrcABISupport.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/OrcABISupport.h @@ -369,6 +369,46 @@ public: JITTargetAddress PointersBlockTargetAddress, unsigned NumStubs); }; +// @brief loongarch64 support. +// +// LoongArch 64 supports lazy JITing. +class OrcLoongArch64 { +public: + static constexpr unsigned PointerSize = 8; + static constexpr unsigned TrampolineSize = 16; + static constexpr unsigned StubSize = 16; + static constexpr unsigned StubToPointerMaxDisplacement = 1 << 31; + static constexpr unsigned ResolverCodeSize = 0xc8; + + /// Write the resolver code into the given memory. The user is + /// responsible for allocating the memory and setting permissions. + /// + /// ReentryFnAddr should be the address of a function whose signature matches + /// void* (*)(void *TrampolineAddr, void *ReentryCtxAddr). The ReentryCtxAddr + /// argument of writeResolverCode will be passed as the second argument to + /// the function at ReentryFnAddr. + static void writeResolverCode(char *ResolverWorkingMem, + JITTargetAddress ResolverTargetAddress, + JITTargetAddress ReentryFnAddr, + JITTargetAddress ReentryCtxAddr); + + /// Write the requested number of trampolines into the given memory, + /// which must be big enough to hold 1 pointer, plus NumTrampolines + /// trampolines. + static void writeTrampolines(char *TrampolineBlockWorkingMem, + JITTargetAddress TrampolineBlockTargetAddress, + JITTargetAddress ResolverFnAddr, + unsigned NumTrampolines); + + /// Write NumStubs indirect stubs to working memory at StubsBlockWorkingMem. + /// Stubs will be written as if linked at StubsBlockTargetAddress, with the + /// Nth stub using the Nth pointer in memory starting at + /// PointersBlockTargetAddress. + static void writeIndirectStubsBlock( + char *StubsBlockWorkingMem, JITTargetAddress StubsBlockTargetAddress, + JITTargetAddress PointersBlockTargetAddress, unsigned NumStubs); +}; + } // end namespace orc } // end namespace llvm diff --git a/llvm/include/llvm/IR/CMakeLists.txt b/llvm/include/llvm/IR/CMakeLists.txt index 5151f9125b9464f847df2e20ea60e21395c7f61e..468d663796ed43a1257c285cbae70b1c70a57ce6 100644 --- a/llvm/include/llvm/IR/CMakeLists.txt +++ b/llvm/include/llvm/IR/CMakeLists.txt @@ -10,6 +10,7 @@ tablegen(LLVM IntrinsicsARM.h -gen-intrinsic-enums -intrinsic-prefix=arm) tablegen(LLVM IntrinsicsBPF.h -gen-intrinsic-enums -intrinsic-prefix=bpf) tablegen(LLVM IntrinsicsDirectX.h -gen-intrinsic-enums -intrinsic-prefix=dx) tablegen(LLVM IntrinsicsHexagon.h -gen-intrinsic-enums -intrinsic-prefix=hexagon) +tablegen(LLVM IntrinsicsLoongArch.h -gen-intrinsic-enums -intrinsic-prefix=loongarch) tablegen(LLVM IntrinsicsMips.h -gen-intrinsic-enums -intrinsic-prefix=mips) tablegen(LLVM IntrinsicsNVPTX.h -gen-intrinsic-enums -intrinsic-prefix=nvvm) tablegen(LLVM IntrinsicsPowerPC.h -gen-intrinsic-enums -intrinsic-prefix=ppc) diff --git a/llvm/include/llvm/IR/InlineAsm.h b/llvm/include/llvm/IR/InlineAsm.h index 0a8d27aad58a2e0ba9fe1607f38618d83ec6c01a..65e5335168d6998d2bb55425ca3fde5d36e851df 100644 --- a/llvm/include/llvm/IR/InlineAsm.h +++ b/llvm/include/llvm/IR/InlineAsm.h @@ -252,6 +252,7 @@ public: Constraint_Unknown = 0, Constraint_es, Constraint_i, + Constraint_k, Constraint_m, Constraint_o, Constraint_v, @@ -269,6 +270,7 @@ public: Constraint_Uy, Constraint_X, Constraint_Z, + Constraint_ZB, Constraint_ZC, Constraint_Zy, @@ -428,6 +430,8 @@ public: return "es"; case InlineAsm::Constraint_i: return "i"; + case InlineAsm::Constraint_k: + return "k"; case InlineAsm::Constraint_m: return "m"; case InlineAsm::Constraint_o: @@ -460,6 +464,8 @@ public: return "X"; case InlineAsm::Constraint_Z: return "Z"; + case InlineAsm::Constraint_ZB: + return "ZB"; case InlineAsm::Constraint_ZC: return "ZC"; case InlineAsm::Constraint_Zy: diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index d46fa4fbf5b50f7cea3c6ae11ec737d7f70c65b3..76f2d66b74d68be3a646e29a3c667d0d4d6ba3ce 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -2053,3 +2053,4 @@ include "llvm/IR/IntrinsicsRISCV.td" include "llvm/IR/IntrinsicsSPIRV.td" include "llvm/IR/IntrinsicsVE.td" include "llvm/IR/IntrinsicsDirectX.td" +include "llvm/IR/IntrinsicsLoongArch.td" diff --git a/llvm/include/llvm/IR/IntrinsicsLoongArch.td b/llvm/include/llvm/IR/IntrinsicsLoongArch.td new file mode 100644 index 0000000000000000000000000000000000000000..5edce3c529e1217514b67f6ef3f1d4a9d8a8fc61 --- /dev/null +++ b/llvm/include/llvm/IR/IntrinsicsLoongArch.td @@ -0,0 +1,124 @@ +//===- IntrinsicsLoongArch.td - Defines LoongArch intrinsics *- tablegen -*===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines all of the LoongArch-specific intrinsics. +// +//===----------------------------------------------------------------------===// + +let TargetPrefix = "loongarch" in { + +//===----------------------------------------------------------------------===// +// Atomics + +// T @llvm..T.

(any*, T, T, T imm); +class MaskedAtomicRMW + : Intrinsic<[itype], [llvm_anyptr_ty, itype, itype, itype], + [IntrArgMemOnly, NoCapture>, ImmArg>]>; + +// We define 32-bit and 64-bit variants of the above, where T stands for i32 +// or i64 respectively: +multiclass MaskedAtomicRMWIntrinsics { + // i32 @llvm..i32.

(any*, i32, i32, i32 imm); + def _i32 : MaskedAtomicRMW; + // i64 @llvm..i32.

(any*, i64, i64, i64 imm); + def _i64 : MaskedAtomicRMW; +} + +multiclass MaskedAtomicRMWFiveOpIntrinsics { + // TODO: Support cmpxchg on LA32. + // i64 @llvm..i64.

(any*, i64, i64, i64, i64 imm); + def _i64 : MaskedAtomicRMWFiveArg; +} + +defm int_loongarch_masked_atomicrmw_xchg : MaskedAtomicRMWIntrinsics; +defm int_loongarch_masked_atomicrmw_add : MaskedAtomicRMWIntrinsics; +defm int_loongarch_masked_atomicrmw_sub : MaskedAtomicRMWIntrinsics; +defm int_loongarch_masked_atomicrmw_nand : MaskedAtomicRMWIntrinsics; +defm int_loongarch_masked_atomicrmw_umax : MaskedAtomicRMWIntrinsics; +defm int_loongarch_masked_atomicrmw_umin : MaskedAtomicRMWIntrinsics; +defm int_loongarch_masked_atomicrmw_max : MaskedAtomicRMWFiveOpIntrinsics; +defm int_loongarch_masked_atomicrmw_min : MaskedAtomicRMWFiveOpIntrinsics; + +// @llvm.loongarch.masked.cmpxchg.i64.

( +// ptr addr, grlen cmpval, grlen newval, grlen mask, grlenimm ordering) +defm int_loongarch_masked_cmpxchg : MaskedAtomicRMWFiveOpIntrinsics; + +//===----------------------------------------------------------------------===// +// LoongArch BASE + +def int_loongarch_break : Intrinsic<[], [llvm_i32_ty], [ImmArg>]>; +def int_loongarch_cacop_d : Intrinsic<[], [llvm_i64_ty, llvm_i64_ty, llvm_i64_ty], + [ImmArg>, ImmArg>]>; +def int_loongarch_cacop_w : Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [ImmArg>, ImmArg>]>; +def int_loongarch_dbar : Intrinsic<[], [llvm_i32_ty], [ImmArg>]>; +def int_loongarch_ibar : Intrinsic<[], [llvm_i32_ty], [ImmArg>]>; +def int_loongarch_movfcsr2gr : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], + [ImmArg>]>; +def int_loongarch_movgr2fcsr : Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], + [ImmArg>]>; +def int_loongarch_syscall : Intrinsic<[], [llvm_i32_ty], [ImmArg>]>; + +def int_loongarch_crc_w_b_w : Intrinsic<[llvm_i32_ty], + [llvm_i32_ty, llvm_i32_ty]>; +def int_loongarch_crc_w_h_w : Intrinsic<[llvm_i32_ty], + [llvm_i32_ty, llvm_i32_ty]>; +def int_loongarch_crc_w_w_w : Intrinsic<[llvm_i32_ty], + [llvm_i32_ty, llvm_i32_ty]>; +def int_loongarch_crc_w_d_w : Intrinsic<[llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty]>; + +def int_loongarch_crcc_w_b_w : Intrinsic<[llvm_i32_ty], + [llvm_i32_ty, llvm_i32_ty]>; +def int_loongarch_crcc_w_h_w : Intrinsic<[llvm_i32_ty], + [llvm_i32_ty, llvm_i32_ty]>; +def int_loongarch_crcc_w_w_w : Intrinsic<[llvm_i32_ty], + [llvm_i32_ty, llvm_i32_ty]>; +def int_loongarch_crcc_w_d_w : Intrinsic<[llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty]>; + +def int_loongarch_csrrd_w : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], + [ImmArg>]>; +def int_loongarch_csrrd_d : Intrinsic<[llvm_i64_ty], [llvm_i32_ty], + [ImmArg>]>; +def int_loongarch_csrwr_w : Intrinsic<[llvm_i32_ty], + [llvm_i32_ty, llvm_i32_ty], + [ImmArg>]>; +def int_loongarch_csrwr_d : Intrinsic<[llvm_i64_ty], + [llvm_i64_ty, llvm_i32_ty], + [ImmArg>]>; +def int_loongarch_csrxchg_w : Intrinsic<[llvm_i32_ty], + [llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty], + [ImmArg>]>; +def int_loongarch_csrxchg_d : Intrinsic<[llvm_i64_ty], + [llvm_i64_ty, llvm_i64_ty, + llvm_i32_ty], + [ImmArg>]>; + +def int_loongarch_iocsrrd_b : Intrinsic<[llvm_i32_ty], [llvm_i32_ty]>; +def int_loongarch_iocsrrd_h : Intrinsic<[llvm_i32_ty], [llvm_i32_ty]>; +def int_loongarch_iocsrrd_w : Intrinsic<[llvm_i32_ty], [llvm_i32_ty]>; +def int_loongarch_iocsrrd_d : Intrinsic<[llvm_i64_ty], [llvm_i32_ty]>; + +def int_loongarch_iocsrwr_b : Intrinsic<[], [llvm_i32_ty, llvm_i32_ty]>; +def int_loongarch_iocsrwr_h : Intrinsic<[], [llvm_i32_ty, llvm_i32_ty]>; +def int_loongarch_iocsrwr_w : Intrinsic<[], [llvm_i32_ty, llvm_i32_ty]>; +def int_loongarch_iocsrwr_d : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty]>; + +def int_loongarch_cpucfg : Intrinsic<[llvm_i32_ty], [llvm_i32_ty]>; + +def int_loongarch_asrtle_d : Intrinsic<[], [llvm_i64_ty, llvm_i64_ty]>; +def int_loongarch_asrtgt_d : Intrinsic<[], [llvm_i64_ty, llvm_i64_ty]>; + +def int_loongarch_lddir_d : Intrinsic<[llvm_i64_ty], + [llvm_i64_ty, llvm_i64_ty], + [ImmArg>]>; +def int_loongarch_ldpte_d : Intrinsic<[], [llvm_i64_ty, llvm_i64_ty], + [ImmArg>]>; +} // TargetPrefix = "loongarch" diff --git a/llvm/include/llvm/Object/ELFObjectFile.h b/llvm/include/llvm/Object/ELFObjectFile.h index f50e34e1d6a5001b7a2e920895b1ff135a10fdc9..a9dfe137b150586080a30c3084fdcb47e536793c 100644 --- a/llvm/include/llvm/Object/ELFObjectFile.h +++ b/llvm/include/llvm/Object/ELFObjectFile.h @@ -56,6 +56,7 @@ class ELFObjectFileBase : public ObjectFile { SubtargetFeatures getMIPSFeatures() const; SubtargetFeatures getARMFeatures() const; SubtargetFeatures getRISCVFeatures() const; + SubtargetFeatures getLoongArchFeatures() const; StringRef getAMDGPUCPUName() const; diff --git a/llvm/include/llvm/Support/LoongArchTargetParser.def b/llvm/include/llvm/Support/LoongArchTargetParser.def new file mode 100644 index 0000000000000000000000000000000000000000..b20d124953f882e49eeb3082f629160028a29397 --- /dev/null +++ b/llvm/include/llvm/Support/LoongArchTargetParser.def @@ -0,0 +1,23 @@ +#ifndef LOONGARCH_FEATURE +#define LOONGARCH_FEATURE(NAME, KIND) +#endif + +LOONGARCH_FEATURE("+64bit", FK_64BIT) +LOONGARCH_FEATURE("+f", FK_FP32) +LOONGARCH_FEATURE("+d", FK_FP64) +LOONGARCH_FEATURE("+lsx", FK_LSX) +LOONGARCH_FEATURE("+lasx", FK_LASX) +LOONGARCH_FEATURE("+lbt", FK_LBT) +LOONGARCH_FEATURE("+lvz", FK_LVZ) +LOONGARCH_FEATURE("+ual", FK_UAL) + +#undef LOONGARCH_FEATURE + +#ifndef LOONGARCH_ARCH +#define LOONGARCH_ARCH(NAME, KIND, FEATURES) +#endif + +LOONGARCH_ARCH("loongarch64", AK_LOONGARCH64, FK_64BIT | FK_FP32 | FK_FP64 | FK_UAL) +LOONGARCH_ARCH("la464", AK_LA464, FK_64BIT | FK_FP32 | FK_FP64 | FK_LSX | FK_LASX | FK_UAL) + +#undef LOONGARCH_ARCH diff --git a/llvm/include/llvm/Support/LoongArchTargetParser.h b/llvm/include/llvm/Support/LoongArchTargetParser.h new file mode 100644 index 0000000000000000000000000000000000000000..0251b174aac404826333566aa47a6482a3f3f9d8 --- /dev/null +++ b/llvm/include/llvm/Support/LoongArchTargetParser.h @@ -0,0 +1,77 @@ +//====-- LoongArchTargetParser - Parser for LoongArch features --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a target parser to recognise LoongArch hardware features +// such as CPU/ARCH and extension names. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_LOONGARCHTARGETPARSER_H +#define LLVM_SUPPORT_LOONGARCHTARGETPARSER_H + +#include "llvm/ADT/Triple.h" +#include + +namespace llvm { +class StringRef; + +namespace LoongArch { + +enum FeatureKind : uint32_t { + // 64-bit ISA is available. + FK_64BIT = 1 << 1, + + // Single-precision floating-point instructions are available. + FK_FP32 = 1 << 2, + + // Double-precision floating-point instructions are available. + FK_FP64 = 1 << 3, + + // Loongson SIMD Extension is available. + FK_LSX = 1 << 4, + + // Loongson Advanced SIMD Extension is available. + FK_LASX = 1 << 5, + + // Loongson Binary Translation Extension is available. + FK_LBT = 1 << 6, + + // Loongson Virtualization Extension is available. + FK_LVZ = 1 << 7, + + // Allow memory accesses to be unaligned. + FK_UAL = 1 << 8, +}; + +struct FeatureInfo { + StringRef Name; + FeatureKind Kind; +}; + +enum class ArchKind { +#define LOONGARCH_ARCH(NAME, KIND, FEATURES) KIND, +#include "LoongArchTargetParser.def" +}; + +struct ArchInfo { + StringRef Name; + ArchKind Kind; + uint32_t Features; +}; + +bool isValidArchName(StringRef Arch); +bool getArchFeatures(StringRef Arch, std::vector &Features); +bool isValidCPUName(StringRef TuneCPU); +void fillValidCPUList(SmallVectorImpl &Values); +StringRef getDefaultArch(bool Is64Bit); + +} // namespace LoongArch + +} // namespace llvm + +#endif // LLVM_SUPPORT_LOONGARCHTARGETPARSER_H diff --git a/llvm/include/llvm/TargetParser/LoongArchTargetParser.h b/llvm/include/llvm/TargetParser/LoongArchTargetParser.h new file mode 100644 index 0000000000000000000000000000000000000000..028844187584b236644a3283fa05a5b2602e828d --- /dev/null +++ b/llvm/include/llvm/TargetParser/LoongArchTargetParser.h @@ -0,0 +1,77 @@ +//==-- LoongArch64TargetParser - Parser for LoongArch64 features --*- C++ -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a target parser to recognise LoongArch hardware features +// such as CPU/ARCH and extension names. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TARGETPARSER_LOONGARCHTARGETPARSER_H +#define LLVM_TARGETPARSER_LOONGARCHTARGETPARSER_H + +#include "llvm/TargetParser/Triple.h" +#include + +namespace llvm { +class StringRef; + +namespace LoongArch { + +enum FeatureKind : uint32_t { + // 64-bit ISA is available. + FK_64BIT = 1 << 1, + + // Single-precision floating-point instructions are available. + FK_FP32 = 1 << 2, + + // Double-precision floating-point instructions are available. + FK_FP64 = 1 << 3, + + // Loongson SIMD Extension is available. + FK_LSX = 1 << 4, + + // Loongson Advanced SIMD Extension is available. + FK_LASX = 1 << 5, + + // Loongson Binary Translation Extension is available. + FK_LBT = 1 << 6, + + // Loongson Virtualization Extension is available. + FK_LVZ = 1 << 7, + + // Allow memory accesses to be unaligned. + FK_UAL = 1 << 8, +}; + +struct FeatureInfo { + StringRef Name; + FeatureKind Kind; +}; + +enum class ArchKind { +#define LOONGARCH_ARCH(NAME, KIND, FEATURES) KIND, +#include "LoongArchTargetParser.def" +}; + +struct ArchInfo { + StringRef Name; + ArchKind Kind; + uint32_t Features; +}; + +bool isValidArchName(StringRef Arch); +bool getArchFeatures(StringRef Arch, std::vector &Features); +bool isValidCPUName(StringRef TuneCPU); +void fillValidCPUList(SmallVectorImpl &Values); +StringRef getDefaultArch(bool Is64Bit); + +} // namespace LoongArch + +} // namespace llvm + +#endif // LLVM_TARGETPARSER_LOONGARCHTARGETPARSER_H diff --git a/llvm/include/llvm/module.modulemap b/llvm/include/llvm/module.modulemap index 76b10621541c476fae96e667da1f30bacdf12114..948bceaff286fb3d37447cab4949e696a1e6ccf4 100644 --- a/llvm/include/llvm/module.modulemap +++ b/llvm/include/llvm/module.modulemap @@ -418,6 +418,7 @@ module LLVM_Utils { textual header "Support/RISCVTargetParser.def" textual header "Support/TargetOpcodes.def" textual header "Support/X86TargetParser.def" + textual header "Support/LoongArchTargetParser.def" } // This part of the module is usable from both C and C++ code. diff --git a/llvm/lib/BinaryFormat/ELF.cpp b/llvm/lib/BinaryFormat/ELF.cpp index e2e601b6d90f900fdb5d4fc8fa5dc750df82d79f..dc8f3051a1495e28b703682e858c3dc42cb30279 100644 --- a/llvm/lib/BinaryFormat/ELF.cpp +++ b/llvm/lib/BinaryFormat/ELF.cpp @@ -197,6 +197,7 @@ uint16_t ELF::convertArchNameToEMachine(StringRef Arch) { .Case("bpf", EM_BPF) .Case("ve", EM_VE) .Case("csky", EM_CSKY) + .Case("loongarch", EM_LOONGARCH) .Default(EM_NONE); } @@ -561,6 +562,8 @@ StringRef ELF::convertEMachineToArchName(uint16_t EMachine) { return "ve"; case EM_CSKY: return "csky"; + case EM_LOONGARCH: + return "loongarch"; default: return "None"; } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 195c0e6a836f06137b21c69e2c75d2a2396c8e43..c8c219d4461d13422019f68080f3e9c5d20166a9 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -9996,8 +9996,7 @@ void SelectionDAG::salvageDebugInfo(SDNode &N) { case ISD::ADD: SDValue N0 = N.getOperand(0); SDValue N1 = N.getOperand(1); - if (!isConstantIntBuildVectorOrConstantInt(N0) && - isConstantIntBuildVectorOrConstantInt(N1)) { + if (!isa(N0) && isa(N1)) { uint64_t Offset = N.getConstantOperandVal(1); // Rewrite an ADD constant node into a DIExpression. Since we are diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index 56308d7808622a8a3be1ce65336d9d37b9c25e8c..407ad034c18edcdfb537db705fa2b161b8fe5f11 100644 --- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -289,6 +289,14 @@ void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx, TTypeEncoding = dwarf::DW_EH_PE_absptr; } break; + case Triple::loongarch32: + case Triple::loongarch64: + LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4; + PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | + dwarf::DW_EH_PE_sdata4; + TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | + dwarf::DW_EH_PE_sdata4; + break; default: break; } diff --git a/llvm/lib/ExecutionEngine/JITLink/CMakeLists.txt b/llvm/lib/ExecutionEngine/JITLink/CMakeLists.txt index 87892c080af2578137c5e41fb19868106bbd3649..7b3730a7f16f50f1907beea71bd54c82c9f8aa5c 100644 --- a/llvm/lib/ExecutionEngine/JITLink/CMakeLists.txt +++ b/llvm/lib/ExecutionEngine/JITLink/CMakeLists.txt @@ -18,6 +18,7 @@ add_llvm_component_library(LLVMJITLink ELF.cpp ELFLinkGraphBuilder.cpp ELF_aarch64.cpp + ELF_loongarch.cpp ELF_riscv.cpp ELF_x86_64.cpp @@ -28,6 +29,7 @@ add_llvm_component_library(LLVMJITLink # Architectures: aarch64.cpp + loongarch.cpp riscv.cpp x86_64.cpp diff --git a/llvm/lib/ExecutionEngine/JITLink/ELF.cpp b/llvm/lib/ExecutionEngine/JITLink/ELF.cpp index eb98e4ba404187a93181009a1235872781dff12a..e71f356e76f19d509103e480f238374abe5fc796 100644 --- a/llvm/lib/ExecutionEngine/JITLink/ELF.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/ELF.cpp @@ -14,6 +14,7 @@ #include "llvm/BinaryFormat/ELF.h" #include "llvm/ExecutionEngine/JITLink/ELF_aarch64.h" +#include "llvm/ExecutionEngine/JITLink/ELF_loongarch.h" #include "llvm/ExecutionEngine/JITLink/ELF_riscv.h" #include "llvm/ExecutionEngine/JITLink/ELF_x86_64.h" #include "llvm/Object/ELF.h" @@ -67,6 +68,8 @@ createLinkGraphFromELFObject(MemoryBufferRef ObjectBuffer) { switch (*TargetMachineArch) { case ELF::EM_AARCH64: return createLinkGraphFromELFObject_aarch64(ObjectBuffer); + case ELF::EM_LOONGARCH: + return createLinkGraphFromELFObject_loongarch(ObjectBuffer); case ELF::EM_RISCV: return createLinkGraphFromELFObject_riscv(ObjectBuffer); case ELF::EM_X86_64: @@ -84,6 +87,10 @@ void link_ELF(std::unique_ptr G, case Triple::aarch64: link_ELF_aarch64(std::move(G), std::move(Ctx)); return; + case Triple::loongarch32: + case Triple::loongarch64: + link_ELF_loongarch(std::move(G), std::move(Ctx)); + return; case Triple::riscv32: case Triple::riscv64: link_ELF_riscv(std::move(G), std::move(Ctx)); diff --git a/llvm/lib/ExecutionEngine/JITLink/ELF_loongarch.cpp b/llvm/lib/ExecutionEngine/JITLink/ELF_loongarch.cpp new file mode 100644 index 0000000000000000000000000000000000000000..26f86163cb68c7074c688e1afe2dddb812a10cbb --- /dev/null +++ b/llvm/lib/ExecutionEngine/JITLink/ELF_loongarch.cpp @@ -0,0 +1,209 @@ +//===--- ELF_loongarch.cpp - JIT linker implementation for ELF/loongarch --===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// ELF/loongarch jit-link implementation. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ExecutionEngine/JITLink/ELF_loongarch.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/ExecutionEngine/JITLink/DWARFRecordSectionSplitter.h" +#include "llvm/ExecutionEngine/JITLink/JITLink.h" +#include "llvm/ExecutionEngine/JITLink/loongarch.h" +#include "llvm/Object/ELF.h" +#include "llvm/Object/ELFObjectFile.h" + +#include "EHFrameSupportImpl.h" +#include "ELFLinkGraphBuilder.h" +#include "JITLinkGeneric.h" + +#define DEBUG_TYPE "jitlink" + +using namespace llvm; +using namespace llvm::jitlink; +using namespace llvm::jitlink::loongarch; + +namespace { + +class ELFJITLinker_loongarch : public JITLinker { + friend class JITLinker; + +public: + ELFJITLinker_loongarch(std::unique_ptr Ctx, + std::unique_ptr G, + PassConfiguration PassConfig) + : JITLinker(std::move(Ctx), std::move(G), std::move(PassConfig)) {} + +private: + Error applyFixup(LinkGraph &G, Block &B, const Edge &E) const { + return loongarch::applyFixup(G, B, E); + } +}; + +template +class ELFLinkGraphBuilder_loongarch : public ELFLinkGraphBuilder { +private: + static Expected + getRelocationKind(const uint32_t Type) { + using namespace loongarch; + switch (Type) { + case ELF::R_LARCH_64: + return Pointer64; + case ELF::R_LARCH_32: + return Pointer32; + case ELF::R_LARCH_32_PCREL: + return Delta32; + case ELF::R_LARCH_B26: + return Branch26PCRel; + case ELF::R_LARCH_PCALA_HI20: + return Page20; + case ELF::R_LARCH_PCALA_LO12: + return PageOffset12; + case ELF::R_LARCH_GOT_PC_HI20: + return RequestGOTAndTransformToPage20; + case ELF::R_LARCH_GOT_PC_LO12: + return RequestGOTAndTransformToPageOffset12; + } + + return make_error( + "Unsupported loongarch relocation:" + formatv("{0:d}: ", Type) + + object::getELFRelocationTypeName(ELF::EM_LOONGARCH, Type)); + } + + Error addRelocations() override { + LLVM_DEBUG(dbgs() << "Processing relocations:\n"); + + using Base = ELFLinkGraphBuilder; + using Self = ELFLinkGraphBuilder_loongarch; + for (const auto &RelSect : Base::Sections) + if (Error Err = Base::forEachRelocation(RelSect, this, + &Self::addSingleRelocation)) + return Err; + + return Error::success(); + } + + Error addSingleRelocation(const typename ELFT::Rela &Rel, + const typename ELFT::Shdr &FixupSect, + Block &BlockToFix) { + using Base = ELFLinkGraphBuilder; + + uint32_t SymbolIndex = Rel.getSymbol(false); + auto ObjSymbol = Base::Obj.getRelocationSymbol(Rel, Base::SymTabSec); + if (!ObjSymbol) + return ObjSymbol.takeError(); + + Symbol *GraphSymbol = Base::getGraphSymbol(SymbolIndex); + if (!GraphSymbol) + return make_error( + formatv("Could not find symbol at given index, did you add it to " + "JITSymbolTable? index: {0}, shndx: {1} Size of table: {2}", + SymbolIndex, (*ObjSymbol)->st_shndx, + Base::GraphSymbols.size()), + inconvertibleErrorCode()); + + uint32_t Type = Rel.getType(false); + Expected Kind = getRelocationKind(Type); + if (!Kind) + return Kind.takeError(); + + int64_t Addend = Rel.r_addend; + auto FixupAddress = orc::ExecutorAddr(FixupSect.sh_addr) + Rel.r_offset; + Edge::OffsetT Offset = FixupAddress - BlockToFix.getAddress(); + Edge GE(*Kind, Offset, *GraphSymbol, Addend); + LLVM_DEBUG({ + dbgs() << " "; + printEdge(dbgs(), BlockToFix, GE, loongarch::getEdgeKindName(*Kind)); + dbgs() << "\n"; + }); + + BlockToFix.addEdge(std::move(GE)); + + return Error::success(); + } + +public: + ELFLinkGraphBuilder_loongarch(StringRef FileName, + const object::ELFFile &Obj, + const Triple T) + : ELFLinkGraphBuilder(Obj, std::move(T), FileName, + loongarch::getEdgeKindName) {} +}; + +Error buildTables_ELF_loongarch(LinkGraph &G) { + LLVM_DEBUG(dbgs() << "Visiting edges in graph:\n"); + + GOTTableManager GOT; + PLTTableManager PLT(GOT); + visitExistingEdges(G, GOT, PLT); + return Error::success(); +} + +} // namespace + +namespace llvm { +namespace jitlink { + +Expected> +createLinkGraphFromELFObject_loongarch(MemoryBufferRef ObjectBuffer) { + LLVM_DEBUG({ + dbgs() << "Building jitlink graph for new input " + << ObjectBuffer.getBufferIdentifier() << "...\n"; + }); + + auto ELFObj = object::ObjectFile::createELFObjectFile(ObjectBuffer); + if (!ELFObj) + return ELFObj.takeError(); + + if ((*ELFObj)->getArch() == Triple::loongarch64) { + auto &ELFObjFile = cast>(**ELFObj); + return ELFLinkGraphBuilder_loongarch( + (*ELFObj)->getFileName(), ELFObjFile.getELFFile(), + (*ELFObj)->makeTriple()) + .buildGraph(); + } + + assert((*ELFObj)->getArch() == Triple::loongarch32 && + "Invalid triple for LoongArch ELF object file"); + auto &ELFObjFile = cast>(**ELFObj); + return ELFLinkGraphBuilder_loongarch( + (*ELFObj)->getFileName(), ELFObjFile.getELFFile(), + (*ELFObj)->makeTriple()) + .buildGraph(); +} + +void link_ELF_loongarch(std::unique_ptr G, + std::unique_ptr Ctx) { + PassConfiguration Config; + const Triple &TT = G->getTargetTriple(); + if (Ctx->shouldAddDefaultTargetPasses(TT)) { + // Add eh-frame passses. + Config.PrePrunePasses.push_back(DWARFRecordSectionSplitter(".eh_frame")); + Config.PrePrunePasses.push_back( + EHFrameEdgeFixer(".eh_frame", G->getPointerSize(), Pointer32, Pointer64, + Delta32, Delta64, NegDelta32)); + Config.PrePrunePasses.push_back(EHFrameNullTerminator(".eh_frame")); + + // Add a mark-live pass. + if (auto MarkLive = Ctx->getMarkLivePass(TT)) + Config.PrePrunePasses.push_back(std::move(MarkLive)); + else + Config.PrePrunePasses.push_back(markAllSymbolsLive); + + // Add an in-place GOT/PLTStubs build pass. + Config.PostPrunePasses.push_back(buildTables_ELF_loongarch); + } + + if (auto Err = Ctx->modifyPassConfig(*G, Config)) + return Ctx->notifyFailed(std::move(Err)); + + ELFJITLinker_loongarch::link(std::move(Ctx), std::move(G), std::move(Config)); +} + +} // namespace jitlink +} // namespace llvm diff --git a/llvm/lib/ExecutionEngine/JITLink/loongarch.cpp b/llvm/lib/ExecutionEngine/JITLink/loongarch.cpp new file mode 100644 index 0000000000000000000000000000000000000000..d1e44ec187cc80b47692c7ef7526063762ff7e3d --- /dev/null +++ b/llvm/lib/ExecutionEngine/JITLink/loongarch.cpp @@ -0,0 +1,60 @@ +//===--- loongarch.cpp - Generic JITLink loongarch edge kinds, utilities --===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Generic utilities for graphs representing loongarch objects. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ExecutionEngine/JITLink/loongarch.h" + +#define DEBUG_TYPE "jitlink" + +namespace llvm { +namespace jitlink { +namespace loongarch { + +const char NullPointerContent[8] = {0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00}; + +const uint8_t LA64StubContent[StubEntrySize] = { + 0x14, 0x00, 0x00, 0x1a, // pcalau12i $t8, %page20(imm) + 0x94, 0x02, 0xc0, 0x28, // ld.d $t8, $t8, %pageoff12(imm) + 0x80, 0x02, 0x00, 0x4c // jr $t8 +}; + +const uint8_t LA32StubContent[StubEntrySize] = { + 0x14, 0x00, 0x00, 0x1a, // pcalau12i $t8, %page20(imm) + 0x94, 0x02, 0x80, 0x28, // ld.w $t8, $t8, %pageoff12(imm) + 0x80, 0x02, 0x00, 0x4c // jr $t8 +}; + +const char *getEdgeKindName(Edge::Kind K) { +#define KIND_NAME_CASE(K) \ + case K: \ + return #K; + + switch (K) { + KIND_NAME_CASE(Pointer64) + KIND_NAME_CASE(Pointer32) + KIND_NAME_CASE(Delta32) + KIND_NAME_CASE(NegDelta32) + KIND_NAME_CASE(Delta64) + KIND_NAME_CASE(Branch26PCRel) + KIND_NAME_CASE(Page20) + KIND_NAME_CASE(PageOffset12) + KIND_NAME_CASE(RequestGOTAndTransformToPage20) + KIND_NAME_CASE(RequestGOTAndTransformToPageOffset12) + default: + return getGenericEdgeKindName(K); + } +#undef KIND_NAME_CASE +} + +} // namespace loongarch +} // namespace jitlink +} // namespace llvm diff --git a/llvm/lib/ExecutionEngine/Orc/EPCIndirectionUtils.cpp b/llvm/lib/ExecutionEngine/Orc/EPCIndirectionUtils.cpp index 48aaab96e71ff491bbb0f00564f627eddbbc27ed..ddfb30500c7b50e112429f23508eb7e568ff4c37 100644 --- a/llvm/lib/ExecutionEngine/Orc/EPCIndirectionUtils.cpp +++ b/llvm/lib/ExecutionEngine/Orc/EPCIndirectionUtils.cpp @@ -250,6 +250,9 @@ EPCIndirectionUtils::Create(ExecutorProcessControl &EPC) { case Triple::x86: return CreateWithABI(EPC); + case Triple::loongarch64: + return CreateWithABI(EPC); + case Triple::mips: return CreateWithABI(EPC); diff --git a/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp b/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp index 38cab526704fcbf51cbb7fd52625a3fffcc9f2ac..ca9f04597eb048017187f4b6ccde51215b12bd4a 100644 --- a/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp +++ b/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp @@ -137,6 +137,11 @@ createLocalCompileCallbackManager(const Triple &T, ExecutionSession &ES, return CCMgrT::Create(ES, ErrorHandlerAddress); } + case Triple::loongarch64: { + typedef orc::LocalJITCompileCallbackManager CCMgrT; + return CCMgrT::Create(ES, ErrorHandlerAddress); + } + case Triple::mips: { typedef orc::LocalJITCompileCallbackManager CCMgrT; return CCMgrT::Create(ES, ErrorHandlerAddress); @@ -192,6 +197,12 @@ createLocalIndirectStubsManagerBuilder(const Triple &T) { orc::LocalIndirectStubsManager>(); }; + case Triple::loongarch64: + return []() { + return std::make_unique< + orc::LocalIndirectStubsManager>(); + }; + case Triple::mips: return [](){ return std::make_unique< diff --git a/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp b/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp index 1926ef1ecc72becb756afd9259e576facecf6044..e4fcc39e8b4eef5d010417b41a6b6dc414e92351 100644 --- a/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp +++ b/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp @@ -667,6 +667,7 @@ Error LLJITBuilderState::prepareForConstruction() { if (!CreateObjectLinkingLayer) { auto &TT = JTMB->getTargetTriple(); if (TT.getArch() == Triple::riscv64 || + TT.getArch() == Triple::loongarch64 || (TT.isOSBinFormatMachO() && (TT.getArch() == Triple::aarch64 || TT.getArch() == Triple::x86_64))) { diff --git a/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp b/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp index 20b655bdf4b108e6c9ee538890eb173a3e5eba03..c0a740d42dbde232497de81d1ec265f1c27a122a 100644 --- a/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp +++ b/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp @@ -119,6 +119,10 @@ createLocalLazyCallThroughManager(const Triple &T, ExecutionSession &ES, case Triple::x86: return LocalLazyCallThroughManager::Create(ES, ErrorHandlerAddr); + case Triple::loongarch64: + return LocalLazyCallThroughManager::Create( + ES, ErrorHandlerAddr); + case Triple::mips: return LocalLazyCallThroughManager::Create(ES, ErrorHandlerAddr); diff --git a/llvm/lib/ExecutionEngine/Orc/OrcABISupport.cpp b/llvm/lib/ExecutionEngine/Orc/OrcABISupport.cpp index da8aaad08cad332a803c2b98e03fbf453d99eac8..48dd0df804156d8423399149ec86091d68f4c7c0 100644 --- a/llvm/lib/ExecutionEngine/Orc/OrcABISupport.cpp +++ b/llvm/lib/ExecutionEngine/Orc/OrcABISupport.cpp @@ -1077,5 +1077,158 @@ void OrcRiscv64::writeIndirectStubsBlock( } } +void OrcLoongArch64::writeResolverCode(char *ResolverWorkingMem, + JITTargetAddress ResolverTargetAddress, + JITTargetAddress ReentryFnAddr, + JITTargetAddress ReentryCtxAddr) { + + LLVM_DEBUG({ + dbgs() << "Writing resolver code to " + << formatv("{0:x16}", ResolverTargetAddress) << "\n"; + }); + + const uint32_t ResolverCode[] = { + 0x02fde063, // 0x0: addi.d $sp, $sp, -136(0xf78) + 0x29c00061, // 0x4: st.d $ra, $sp, 0 + 0x29c02064, // 0x8: st.d $a0, $sp, 8(0x8) + 0x29c04065, // 0xc: st.d $a1, $sp, 16(0x10) + 0x29c06066, // 0x10: st.d $a2, $sp, 24(0x18) + 0x29c08067, // 0x14: st.d $a3, $sp, 32(0x20) + 0x29c0a068, // 0x18: st.d $a4, $sp, 40(0x28) + 0x29c0c069, // 0x1c: st.d $a5, $sp, 48(0x30) + 0x29c0e06a, // 0x20: st.d $a6, $sp, 56(0x38) + 0x29c1006b, // 0x24: st.d $a7, $sp, 64(0x40) + 0x2bc12060, // 0x28: fst.d $fa0, $sp, 72(0x48) + 0x2bc14061, // 0x2c: fst.d $fa1, $sp, 80(0x50) + 0x2bc16062, // 0x30: fst.d $fa2, $sp, 88(0x58) + 0x2bc18063, // 0x34: fst.d $fa3, $sp, 96(0x60) + 0x2bc1a064, // 0x38: fst.d $fa4, $sp, 104(0x68) + 0x2bc1c065, // 0x3c: fst.d $fa5, $sp, 112(0x70) + 0x2bc1e066, // 0x40: fst.d $fa6, $sp, 120(0x78) + 0x2bc20067, // 0x44: fst.d $fa7, $sp, 128(0x80) + 0x1c000004, // 0x48: pcaddu12i $a0, 0 + 0x28c1c084, // 0x4c: ld.d $a0, $a0, 112(0x70) + 0x001501a5, // 0x50: move $a1, $t1 + 0x02ffd0a5, // 0x54: addi.d $a1, $a1, -12(0xff4) + 0x1c000006, // 0x58: pcaddu12i $a2, 0 + 0x28c1a0c6, // 0x5c: ld.d $a2, $a2, 104(0x68) + 0x4c0000c1, // 0x60: jirl $ra, $a2, 0 + 0x0015008c, // 0x64: move $t0, $a0 + 0x2b820067, // 0x68: fld.d $fa7, $sp, 128(0x80) + 0x2b81e066, // 0x6c: fld.d $fa6, $sp, 120(0x78) + 0x2b81c065, // 0x70: fld.d $fa5, $sp, 112(0x70) + 0x2b81a064, // 0x74: fld.d $fa4, $sp, 104(0x68) + 0x2b818063, // 0x78: fld.d $fa3, $sp, 96(0x60) + 0x2b816062, // 0x7c: fld.d $fa2, $sp, 88(0x58) + 0x2b814061, // 0x80: fld.d $fa1, $sp, 80(0x50) + 0x2b812060, // 0x84: fld.d $fa0, $sp, 72(0x48) + 0x28c1006b, // 0x88: ld.d $a7, $sp, 64(0x40) + 0x28c0e06a, // 0x8c: ld.d $a6, $sp, 56(0x38) + 0x28c0c069, // 0x90: ld.d $a5, $sp, 48(0x30) + 0x28c0a068, // 0x94: ld.d $a4, $sp, 40(0x28) + 0x28c08067, // 0x98: ld.d $a3, $sp, 32(0x20) + 0x28c06066, // 0x9c: ld.d $a2, $sp, 24(0x18) + 0x28c04065, // 0xa0: ld.d $a1, $sp, 16(0x10) + 0x28c02064, // 0xa4: ld.d $a0, $sp, 8(0x8) + 0x28c00061, // 0xa8: ld.d $ra, $sp, 0 + 0x02c22063, // 0xac: addi.d $sp, $sp, 136(0x88) + 0x4c000180, // 0xb0: jr $t0 + 0x00000000, // 0xb4: padding to align at 8 bytes + 0x01234567, // 0xb8: Lreentry_ctx_ptr: + 0xdeedbeef, // 0xbc: .dword 0 + 0x98765432, // 0xc0: Lreentry_fn_ptr: + 0xcafef00d, // 0xc4: .dword 0 + }; + + const unsigned ReentryCtxAddrOffset = 0xb8; + const unsigned ReentryFnAddrOffset = 0xc0; + + memcpy(ResolverWorkingMem, ResolverCode, sizeof(ResolverCode)); + memcpy(ResolverWorkingMem + ReentryFnAddrOffset, &ReentryFnAddr, + sizeof(uint64_t)); + memcpy(ResolverWorkingMem + ReentryCtxAddrOffset, &ReentryCtxAddr, + sizeof(uint64_t)); +} + +void OrcLoongArch64::writeTrampolines( + char *TrampolineBlockWorkingMem, + JITTargetAddress TrampolineBlockTargetAddress, + JITTargetAddress ResolverAddr, unsigned NumTrampolines) { + + LLVM_DEBUG({ + dbgs() << "Writing trampoline code to " + << formatv("{0:x16}", TrampolineBlockTargetAddress) << "\n"; + }); + + unsigned OffsetToPtr = alignTo(NumTrampolines * TrampolineSize, 8); + + memcpy(TrampolineBlockWorkingMem + OffsetToPtr, &ResolverAddr, + sizeof(uint64_t)); + + uint32_t *Trampolines = + reinterpret_cast(TrampolineBlockWorkingMem); + for (unsigned I = 0; I < NumTrampolines; ++I, OffsetToPtr -= TrampolineSize) { + uint32_t Hi20 = (OffsetToPtr + 0x800) & 0xfffff000; + uint32_t Lo12 = OffsetToPtr - Hi20; + Trampolines[4 * I + 0] = + 0x1c00000c | + (((Hi20 >> 12) & 0xfffff) << 5); // pcaddu12i $t0, %pc_hi20(Lptr) + Trampolines[4 * I + 1] = + 0x28c0018c | ((Lo12 & 0xfff) << 10); // ld.d $t0, $t0, %pc_lo12(Lptr) + Trampolines[4 * I + 2] = 0x4c00018d; // jirl $t1, $t0, 0 + Trampolines[4 * I + 3] = 0x0; // padding + } +} + +void OrcLoongArch64::writeIndirectStubsBlock( + char *StubsBlockWorkingMem, JITTargetAddress StubsBlockTargetAddress, + JITTargetAddress PointersBlockTargetAddress, unsigned NumStubs) { + // Stub format is: + // + // .section __orc_stubs + // stub1: + // pcaddu12i $t0, %pc_hi20(ptr1) ; PC-rel load of ptr1 + // ld.d $t0, $t0, %pc_lo12(ptr1) + // jr $t0 ; Jump to resolver + // .dword 0 ; Pad to 16 bytes + // stub2: + // pcaddu12i $t0, %pc_hi20(ptr2) ; PC-rel load of ptr2 + // ld.d $t0, $t0, %pc_lo12(ptr2) + // jr $t0 ; Jump to resolver + // .dword 0 ; Pad to 16 bytes + // ... + // + // .section __orc_ptrs + // ptr1: + // .dword 0x0 + // ptr2: + // .dword 0x0 + // ... + LLVM_DEBUG({ + dbgs() << "Writing stubs code to " + << formatv("{0:x16}", StubsBlockTargetAddress) << "\n"; + }); + assert(stubAndPointerRangesOk( + StubsBlockTargetAddress, PointersBlockTargetAddress, NumStubs) && + "PointersBlock is out of range"); + + uint32_t *Stub = reinterpret_cast(StubsBlockWorkingMem); + + for (unsigned I = 0; I < NumStubs; ++I) { + uint64_t PtrDisplacement = + PointersBlockTargetAddress - StubsBlockTargetAddress; + uint32_t Hi20 = (PtrDisplacement + 0x800) & 0xfffff000; + uint32_t Lo12 = PtrDisplacement - Hi20; + Stub[4 * I + 0] = 0x1c00000c | (((Hi20 >> 12) & 0xfffff) + << 5); // pcaddu12i $t0, %pc_hi20(Lptr) + Stub[4 * I + 1] = + 0x28c0018c | ((Lo12 & 0xfff) << 10); // ld.d $t0, $t0, %pc_lo12(Lptr) + Stub[4 * I + 2] = 0x4c000180; // jr $t0 + Stub[4 * I + 3] = 0x0; // padding + PointersBlockTargetAddress += PointerSize; + StubsBlockTargetAddress += StubSize; + } +} + } // End namespace orc. } // End namespace llvm. diff --git a/llvm/lib/Object/ELF.cpp b/llvm/lib/Object/ELF.cpp index 3139ca3c6dcbb29022fb5b7b5eeb40bf1d468c30..ac47ecd7e6cae33bb695ab5fd7aba98031753290 100644 --- a/llvm/lib/Object/ELF.cpp +++ b/llvm/lib/Object/ELF.cpp @@ -230,6 +230,8 @@ uint32_t llvm::object::getELFRelativeRelocationType(uint32_t Machine) { break; case ELF::EM_BPF: break; + case ELF::EM_LOONGARCH: + return ELF::R_LARCH_RELATIVE; default: break; } diff --git a/llvm/lib/Object/ELFObjectFile.cpp b/llvm/lib/Object/ELFObjectFile.cpp index 1f342e55e77fe7fa4dc0aad6386e9ef8c9a27fed..5b4380e4738612fc6147dee98ca3e1d0dec24966 100644 --- a/llvm/lib/Object/ELFObjectFile.cpp +++ b/llvm/lib/Object/ELFObjectFile.cpp @@ -341,6 +341,24 @@ SubtargetFeatures ELFObjectFileBase::getRISCVFeatures() const { return Features; } +SubtargetFeatures ELFObjectFileBase::getLoongArchFeatures() const { + SubtargetFeatures Features; + + switch (getPlatformFlags() & ELF::EF_LOONGARCH_ABI_MODIFIER_MASK) { + case ELF::EF_LOONGARCH_ABI_SOFT_FLOAT: + break; + case ELF::EF_LOONGARCH_ABI_DOUBLE_FLOAT: + Features.AddFeature("d"); + // D implies F according to LoongArch ISA spec. + [[fallthrough]]; + case ELF::EF_LOONGARCH_ABI_SINGLE_FLOAT: + Features.AddFeature("f"); + break; + } + + return Features; +} + SubtargetFeatures ELFObjectFileBase::getFeatures() const { switch (getEMachine()) { case ELF::EM_MIPS: @@ -349,6 +367,8 @@ SubtargetFeatures ELFObjectFileBase::getFeatures() const { return getARMFeatures(); case ELF::EM_RISCV: return getRISCVFeatures(); + case ELF::EM_LOONGARCH: + return getLoongArchFeatures(); default: return SubtargetFeatures(); } diff --git a/llvm/lib/Object/RelocationResolver.cpp b/llvm/lib/Object/RelocationResolver.cpp index e14301663df3abf9a76e6d851f627cd5b1f997ce..d1726053e7d57f9029d4da048093e0461a6a9c42 100644 --- a/llvm/lib/Object/RelocationResolver.cpp +++ b/llvm/lib/Object/RelocationResolver.cpp @@ -511,6 +511,58 @@ static uint64_t resolveCSKY(uint64_t Type, uint64_t Offset, uint64_t S, } } +static bool supportsLoongArch(uint64_t Type) { + switch (Type) { + case ELF::R_LARCH_NONE: + case ELF::R_LARCH_32: + case ELF::R_LARCH_32_PCREL: + case ELF::R_LARCH_64: + case ELF::R_LARCH_ADD8: + case ELF::R_LARCH_SUB8: + case ELF::R_LARCH_ADD16: + case ELF::R_LARCH_SUB16: + case ELF::R_LARCH_ADD32: + case ELF::R_LARCH_SUB32: + case ELF::R_LARCH_ADD64: + case ELF::R_LARCH_SUB64: + return true; + default: + return false; + } +} + +static uint64_t resolveLoongArch(uint64_t Type, uint64_t Offset, uint64_t S, + uint64_t LocData, int64_t Addend) { + switch (Type) { + case ELF::R_LARCH_NONE: + return LocData; + case ELF::R_LARCH_32: + return (S + Addend) & 0xFFFFFFFF; + case ELF::R_LARCH_32_PCREL: + return (S + Addend - Offset) & 0xFFFFFFFF; + case ELF::R_LARCH_64: + return S + Addend; + case ELF::R_LARCH_ADD8: + return (LocData + (S + Addend)) & 0xFF; + case ELF::R_LARCH_SUB8: + return (LocData - (S + Addend)) & 0xFF; + case ELF::R_LARCH_ADD16: + return (LocData + (S + Addend)) & 0xFFFF; + case ELF::R_LARCH_SUB16: + return (LocData - (S + Addend)) & 0xFFFF; + case ELF::R_LARCH_ADD32: + return (LocData + (S + Addend)) & 0xFFFFFFFF; + case ELF::R_LARCH_SUB32: + return (LocData - (S + Addend)) & 0xFFFFFFFF; + case ELF::R_LARCH_ADD64: + return (LocData + (S + Addend)); + case ELF::R_LARCH_SUB64: + return (LocData - (S + Addend)); + default: + llvm_unreachable("Invalid relocation type"); + } +} + static bool supportsCOFFX86(uint64_t Type) { switch (Type) { case COFF::IMAGE_REL_I386_SECREL: @@ -711,6 +763,8 @@ getRelocationResolver(const ObjectFile &Obj) { case Triple::bpfel: case Triple::bpfeb: return {supportsBPF, resolveBPF}; + case Triple::loongarch64: + return {supportsLoongArch, resolveLoongArch}; case Triple::mips64el: case Triple::mips64: return {supportsMips64, resolveMips64}; @@ -747,6 +801,8 @@ getRelocationResolver(const ObjectFile &Obj) { return {supportsAVR, resolveAVR}; case Triple::lanai: return {supportsLanai, resolveLanai}; + case Triple::loongarch32: + return {supportsLoongArch, resolveLoongArch}; case Triple::mipsel: case Triple::mips: return {supportsMips32, resolveMips32}; diff --git a/llvm/lib/ObjectYAML/ELFYAML.cpp b/llvm/lib/ObjectYAML/ELFYAML.cpp index 9ad2c41351672862baf2966e307f895a68df7c28..90e3c62ea591e6e1a25f0f98b42d5f0ae72c75cd 100644 --- a/llvm/lib/ObjectYAML/ELFYAML.cpp +++ b/llvm/lib/ObjectYAML/ELFYAML.cpp @@ -519,12 +519,11 @@ void ScalarBitSetTraits::bitset(IO &IO, BCase(EF_AVR_LINKRELAX_PREPARED); break; case ELF::EM_LOONGARCH: - BCaseMask(EF_LOONGARCH_BASE_ABI_ILP32S, EF_LOONGARCH_BASE_ABI_MASK); - BCaseMask(EF_LOONGARCH_BASE_ABI_ILP32F, EF_LOONGARCH_BASE_ABI_MASK); - BCaseMask(EF_LOONGARCH_BASE_ABI_ILP32D, EF_LOONGARCH_BASE_ABI_MASK); - BCaseMask(EF_LOONGARCH_BASE_ABI_LP64S, EF_LOONGARCH_BASE_ABI_MASK); - BCaseMask(EF_LOONGARCH_BASE_ABI_LP64F, EF_LOONGARCH_BASE_ABI_MASK); - BCaseMask(EF_LOONGARCH_BASE_ABI_LP64D, EF_LOONGARCH_BASE_ABI_MASK); + BCaseMask(EF_LOONGARCH_ABI_SOFT_FLOAT, EF_LOONGARCH_ABI_MODIFIER_MASK); + BCaseMask(EF_LOONGARCH_ABI_SINGLE_FLOAT, EF_LOONGARCH_ABI_MODIFIER_MASK); + BCaseMask(EF_LOONGARCH_ABI_DOUBLE_FLOAT, EF_LOONGARCH_ABI_MODIFIER_MASK); + BCaseMask(EF_LOONGARCH_OBJABI_V0, EF_LOONGARCH_OBJABI_MASK); + BCaseMask(EF_LOONGARCH_OBJABI_V1, EF_LOONGARCH_OBJABI_MASK); break; case ELF::EM_RISCV: BCase(EF_RISCV_RVC); diff --git a/llvm/lib/Support/CMakeLists.txt b/llvm/lib/Support/CMakeLists.txt index 806cbc884cc5d880720efd5b55d8162f05835d6e..09f838b96495a732dbef7f58840b072f62c69eaf 100644 --- a/llvm/lib/Support/CMakeLists.txt +++ b/llvm/lib/Support/CMakeLists.txt @@ -187,6 +187,7 @@ add_llvm_component_library(LLVMSupport LineIterator.cpp Locale.cpp LockFileManager.cpp + LoongArchTargetParser.cpp LowLevelType.cpp ManagedStatic.cpp MathExtras.cpp diff --git a/llvm/lib/Support/Host.cpp b/llvm/lib/Support/Host.cpp index 94a1536f4690094a30da010711808752846e1312..39b7bdb7eeac27ffeb22db36b1a0b966b3e958bc 100644 --- a/llvm/lib/Support/Host.cpp +++ b/llvm/lib/Support/Host.cpp @@ -1400,6 +1400,20 @@ StringRef sys::getHostCPUName() { return "generic"; } } +#elif defined(__loongarch__) +StringRef sys::getHostCPUName() { + // Use processor id to detect cpu name. + uint32_t processor_id; + __asm__("cpucfg %[prid], $zero\n\t" : [prid] "=r"(processor_id)); + switch (processor_id & 0xff00) { + case 0xc000: // Loongson 64bit, 4-issue + return "la464"; + // TODO: Others. + default: + break; + } + return "generic"; +} #elif defined(__riscv) StringRef sys::getHostCPUName() { #if defined(__linux__) @@ -1907,6 +1921,23 @@ bool sys::getHostCPUFeatures(StringMap &Features) { return true; } +#elif defined(__linux__) && defined(__loongarch__) +#include +bool sys::getHostCPUFeatures(StringMap &Features) { + unsigned long hwcap = getauxval(AT_HWCAP); + bool HasFPU = hwcap & (1UL << 3); // HWCAP_LOONGARCH_FPU + uint32_t cpucfg2 = 0x2; + __asm__("cpucfg %[cpucfg2], %[cpucfg2]\n\t" : [cpucfg2] "+r"(cpucfg2)); + + Features["f"] = HasFPU && (cpucfg2 & (1U << 1)); // CPUCFG.2.FP_SP + Features["d"] = HasFPU && (cpucfg2 & (1U << 2)); // CPUCFG.2.FP_DP + + Features["lsx"] = hwcap & (1UL << 4); // HWCAP_LOONGARCH_LSX + Features["lasx"] = hwcap & (1UL << 5); // HWCAP_LOONGARCH_LASX + Features["lvz"] = hwcap & (1UL << 9); // HWCAP_LOONGARCH_LVZ + + return true; +} #else bool sys::getHostCPUFeatures(StringMap &Features) { return false; } #endif diff --git a/llvm/lib/Support/LoongArchTargetParser.cpp b/llvm/lib/Support/LoongArchTargetParser.cpp new file mode 100644 index 0000000000000000000000000000000000000000..3b6a9c77c71fb78dce5c8574541fc6f545fcb69d --- /dev/null +++ b/llvm/lib/Support/LoongArchTargetParser.cpp @@ -0,0 +1,61 @@ +//====-- LoongArchTargetParser - Parser for LoongArch features --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a target parser to recognise LoongArch hardware features +// such as CPU/ARCH and extension names. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/LoongArchTargetParser.h" +#include "llvm/ADT/StringSwitch.h" + +using namespace llvm; +using namespace llvm::LoongArch; + +const FeatureInfo AllFeatures[] = { +#define LOONGARCH_FEATURE(NAME, KIND) {NAME, KIND}, +#include "llvm/Support/LoongArchTargetParser.def" +}; + +const ArchInfo AllArchs[] = { +#define LOONGARCH_ARCH(NAME, KIND, FEATURES) \ + {NAME, LoongArch::ArchKind::KIND, FEATURES}, +#include "llvm/Support/LoongArchTargetParser.def" +}; + +bool LoongArch::isValidArchName(StringRef Arch) { + for (const auto A : AllArchs) + if (A.Name == Arch) + return true; + return false; +} + +bool LoongArch::getArchFeatures(StringRef Arch, + std::vector &Features) { + for (const auto A : AllArchs) { + if (A.Name == Arch) { + for (const auto F : AllFeatures) + if ((A.Features & F.Kind) == F.Kind) + Features.push_back(F.Name); + return true; + } + } + return false; +} + +bool LoongArch::isValidCPUName(StringRef Name) { return isValidArchName(Name); } + +void LoongArch::fillValidCPUList(SmallVectorImpl &Values) { + for (const auto A : AllArchs) + Values.emplace_back(A.Name); +} + +StringRef LoongArch::getDefaultArch(bool Is64Bit) { + // TODO: use a real 32-bit arch name. + return Is64Bit ? "loongarch64" : ""; +} diff --git a/llvm/lib/Support/Triple.cpp b/llvm/lib/Support/Triple.cpp index 8acfe4c47ce732745857d48280ea3094afef1187..52de2088742d1793d3edcceb63f587e17335ad80 100644 --- a/llvm/lib/Support/Triple.cpp +++ b/llvm/lib/Support/Triple.cpp @@ -265,6 +265,9 @@ StringRef Triple::getEnvironmentTypeName(EnvironmentType Kind) { case GNUABIN32: return "gnuabin32"; case GNUEABI: return "gnueabi"; case GNUEABIHF: return "gnueabihf"; + case GNUF32: return "gnuf32"; + case GNUF64: return "gnuf64"; + case GNUSF: return "gnusf"; case GNUX32: return "gnux32"; case GNUILP32: return "gnu_ilp32"; case Itanium: return "itanium"; @@ -610,6 +613,9 @@ static Triple::EnvironmentType parseEnvironment(StringRef EnvironmentName) { .StartsWith("gnuabi64", Triple::GNUABI64) .StartsWith("gnueabihf", Triple::GNUEABIHF) .StartsWith("gnueabi", Triple::GNUEABI) + .StartsWith("gnuf32", Triple::GNUF32) + .StartsWith("gnuf64", Triple::GNUF64) + .StartsWith("gnusf", Triple::GNUSF) .StartsWith("gnux32", Triple::GNUX32) .StartsWith("gnu_ilp32", Triple::GNUILP32) .StartsWith("code16", Triple::CODE16) diff --git a/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp b/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp index 9793c7bc3532c1ff6bd206450de1424ddb2792fa..1e686c2443c2263a38b3259ae932cceba4c7dd1b 100644 --- a/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp +++ b/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp @@ -7,9 +7,12 @@ //===----------------------------------------------------------------------===// #include "MCTargetDesc/LoongArchInstPrinter.h" +#include "MCTargetDesc/LoongArchMCExpr.h" #include "MCTargetDesc/LoongArchMCTargetDesc.h" +#include "MCTargetDesc/LoongArchMatInt.h" #include "TargetInfo/LoongArchTargetInfo.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/MCInstBuilder.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" @@ -17,6 +20,7 @@ #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCValue.h" #include "llvm/MC/TargetRegistry.h" #include "llvm/Support/Casting.h" @@ -27,6 +31,16 @@ using namespace llvm; namespace { class LoongArchAsmParser : public MCTargetAsmParser { SMLoc getLoc() const { return getParser().getTok().getLoc(); } + bool is64Bit() const { return getSTI().hasFeature(LoongArch::Feature64Bit); } + + struct Inst { + unsigned Opc; + LoongArchMCExpr::VariantKind VK; + Inst(unsigned Opc, + LoongArchMCExpr::VariantKind VK = LoongArchMCExpr::VK_LoongArch_None) + : Opc(Opc), VK(VK) {} + }; + using InstSeq = SmallVector; /// Parse a register as used in CFI directives. bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; @@ -62,19 +76,67 @@ class LoongArchAsmParser : public MCTargetAsmParser { OperandMatchResultTy parseRegister(OperandVector &Operands); OperandMatchResultTy parseImmediate(OperandVector &Operands); + OperandMatchResultTy parseOperandWithModifier(OperandVector &Operands); + OperandMatchResultTy parseSImm26Operand(OperandVector &Operands); + OperandMatchResultTy parseAtomicMemOp(OperandVector &Operands); bool parseOperand(OperandVector &Operands, StringRef Mnemonic); + // Helper to emit the sequence of instructions generated by the + // "emitLoadAddress*" functions. + void emitLAInstSeq(MCRegister DestReg, MCRegister TmpReg, + const MCExpr *Symbol, SmallVectorImpl &Insts, + SMLoc IDLoc, MCStreamer &Out); + + // Helper to emit pseudo instruction "la.abs $rd, sym". + void emitLoadAddressAbs(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out); + + // Helper to emit pseudo instruction "la.pcrel $rd, sym". + void emitLoadAddressPcrel(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out); + // Helper to emit pseudo instruction "la.pcrel $rd, $rj, sym". + void emitLoadAddressPcrelLarge(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out); + + // Helper to emit pseudo instruction "la.got $rd, sym". + void emitLoadAddressGot(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out); + // Helper to emit pseudo instruction "la.got $rd, $rj, sym". + void emitLoadAddressGotLarge(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out); + + // Helper to emit pseudo instruction "la.tls.le $rd, sym". + void emitLoadAddressTLSLE(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out); + + // Helper to emit pseudo instruction "la.tls.ie $rd, sym". + void emitLoadAddressTLSIE(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out); + // Helper to emit pseudo instruction "la.tls.ie $rd, $rj, sym". + void emitLoadAddressTLSIELarge(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out); + + // Helper to emit pseudo instruction "la.tls.ld $rd, sym". + void emitLoadAddressTLSLD(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out); + // Helper to emit pseudo instruction "la.tls.ld $rd, $rj, sym". + void emitLoadAddressTLSLDLarge(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out); + + // Helper to emit pseudo instruction "la.tls.gd $rd, sym". + void emitLoadAddressTLSGD(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out); + // Helper to emit pseudo instruction "la.tls.gd $rd, $rj, sym". + void emitLoadAddressTLSGDLarge(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out); + + // Helper to emit pseudo instruction "li.w/d $rd, $imm". + void emitLoadImm(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out); + public: enum LoongArchMatchResultTy { Match_Dummy = FIRST_TARGET_MATCH_RESULT_TY, Match_RequiresMsbNotLessThanLsb, Match_RequiresOpnd2NotR0R1, + Match_RequiresAMORdDifferRkRj, + Match_RequiresLAORdDifferRj, #define GET_OPERAND_DIAGNOSTIC_TYPES #include "LoongArchGenAsmMatcher.inc" #undef GET_OPERAND_DIAGNOSTIC_TYPES }; + static bool classifySymbolRef(const MCExpr *Expr, + LoongArchMCExpr::VariantKind &Kind); + LoongArchAsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser, const MCInstrInfo &MII, const MCTargetOptions &Options) : MCTargetAsmParser(Options, STI, MII) { @@ -119,8 +181,19 @@ public: bool isImm() const override { return Kind == KindTy::Immediate; } bool isMem() const override { return false; } void setReg(MCRegister PhysReg) { Reg.RegNum = PhysReg; } + bool isGPR() const { + return Kind == KindTy::Register && + LoongArchMCRegisterClasses[LoongArch::GPRRegClassID].contains( + Reg.RegNum); + } + + static bool evaluateConstantImm(const MCExpr *Expr, int64_t &Imm, + LoongArchMCExpr::VariantKind &VK) { + if (auto *LE = dyn_cast(Expr)) { + VK = LE->getKind(); + return false; + } - static bool evaluateConstantImm(const MCExpr *Expr, int64_t &Imm) { if (auto CE = dyn_cast(Expr)) { Imm = CE->getValue(); return true; @@ -134,8 +207,10 @@ public: return false; int64_t Imm; - bool IsConstantImm = evaluateConstantImm(getImm(), Imm); - return IsConstantImm && isUInt(Imm - P); + LoongArchMCExpr::VariantKind VK = LoongArchMCExpr::VK_LoongArch_None; + bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK); + return IsConstantImm && isUInt(Imm - P) && + VK == LoongArchMCExpr::VK_LoongArch_None; } template bool isSImm() const { @@ -143,8 +218,20 @@ public: return false; int64_t Imm; - bool IsConstantImm = evaluateConstantImm(getImm(), Imm); - return IsConstantImm && isShiftedInt(Imm); + LoongArchMCExpr::VariantKind VK = LoongArchMCExpr::VK_LoongArch_None; + bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK); + return IsConstantImm && isShiftedInt(Imm) && + VK == LoongArchMCExpr::VK_LoongArch_None; + } + + bool isBareSymbol() const { + int64_t Imm; + LoongArchMCExpr::VariantKind VK = LoongArchMCExpr::VK_LoongArch_None; + // Must be of 'immediate' type but not a constant. + if (!isImm() || evaluateConstantImm(getImm(), Imm, VK)) + return false; + return LoongArchAsmParser::classifySymbolRef(getImm(), VK) && + VK == LoongArchMCExpr::VK_LoongArch_None; } bool isUImm2() const { return isUImm<2>(); } @@ -153,16 +240,187 @@ public: bool isUImm5() const { return isUImm<5>(); } bool isUImm6() const { return isUImm<6>(); } bool isUImm8() const { return isUImm<8>(); } + bool isSImm12() const { return isSImm<12>(); } + + bool isSImm12addlike() const { + if (!isImm()) + return false; + + int64_t Imm; + LoongArchMCExpr::VariantKind VK = LoongArchMCExpr::VK_LoongArch_None; + bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK); + bool IsValidKind = VK == LoongArchMCExpr::VK_LoongArch_None || + VK == LoongArchMCExpr::VK_LoongArch_PCALA_LO12 || + VK == LoongArchMCExpr::VK_LoongArch_GOT_PC_LO12 || + VK == LoongArchMCExpr::VK_LoongArch_TLS_IE_PC_LO12; + return IsConstantImm + ? isInt<12>(Imm) && IsValidKind + : LoongArchAsmParser::classifySymbolRef(getImm(), VK) && + IsValidKind; + } + + bool isSImm12lu52id() const { + if (!isImm()) + return false; + + int64_t Imm; + LoongArchMCExpr::VariantKind VK = LoongArchMCExpr::VK_LoongArch_None; + bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK); + bool IsValidKind = VK == LoongArchMCExpr::VK_LoongArch_None || + VK == LoongArchMCExpr::VK_LoongArch_ABS64_HI12 || + VK == LoongArchMCExpr::VK_LoongArch_PCALA64_HI12 || + VK == LoongArchMCExpr::VK_LoongArch_GOT64_HI12 || + VK == LoongArchMCExpr::VK_LoongArch_GOT64_PC_HI12 || + VK == LoongArchMCExpr::VK_LoongArch_TLS_LE64_HI12 || + VK == LoongArchMCExpr::VK_LoongArch_TLS_IE64_HI12 || + VK == LoongArchMCExpr::VK_LoongArch_TLS_IE64_PC_HI12; + return IsConstantImm + ? isInt<12>(Imm) && IsValidKind + : LoongArchAsmParser::classifySymbolRef(getImm(), VK) && + IsValidKind; + } + bool isUImm12() const { return isUImm<12>(); } + + bool isUImm12ori() const { + if (!isImm()) + return false; + + int64_t Imm; + LoongArchMCExpr::VariantKind VK = LoongArchMCExpr::VK_LoongArch_None; + bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK); + bool IsValidKind = VK == LoongArchMCExpr::VK_LoongArch_None || + VK == LoongArchMCExpr::VK_LoongArch_ABS_LO12 || + VK == LoongArchMCExpr::VK_LoongArch_PCALA_LO12 || + VK == LoongArchMCExpr::VK_LoongArch_GOT_LO12 || + VK == LoongArchMCExpr::VK_LoongArch_GOT_PC_LO12 || + VK == LoongArchMCExpr::VK_LoongArch_TLS_LE_LO12 || + VK == LoongArchMCExpr::VK_LoongArch_TLS_IE_LO12 || + VK == LoongArchMCExpr::VK_LoongArch_TLS_IE_PC_LO12; + return IsConstantImm + ? isUInt<12>(Imm) && IsValidKind + : LoongArchAsmParser::classifySymbolRef(getImm(), VK) && + IsValidKind; + } + bool isUImm14() const { return isUImm<14>(); } bool isUImm15() const { return isUImm<15>(); } - bool isSImm12() const { return isSImm<12>(); } + bool isSImm14lsl2() const { return isSImm<14, 2>(); } bool isSImm16() const { return isSImm<16>(); } - bool isSImm16lsl2() const { return isSImm<16, 2>(); } + + bool isSImm16lsl2() const { + if (!isImm()) + return false; + + int64_t Imm; + LoongArchMCExpr::VariantKind VK = LoongArchMCExpr::VK_LoongArch_None; + bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK); + bool IsValidKind = VK == LoongArchMCExpr::VK_LoongArch_None || + VK == LoongArchMCExpr::VK_LoongArch_B16 || + VK == LoongArchMCExpr::VK_LoongArch_PCALA_LO12; + return IsConstantImm + ? isShiftedInt<16, 2>(Imm) && IsValidKind + : LoongArchAsmParser::classifySymbolRef(getImm(), VK) && + IsValidKind; + } + bool isSImm20() const { return isSImm<20>(); } - bool isSImm21lsl2() const { return isSImm<21, 2>(); } - bool isSImm26lsl2() const { return isSImm<26, 2>(); } + + bool isSImm20pcalau12i() const { + if (!isImm()) + return false; + + int64_t Imm; + LoongArchMCExpr::VariantKind VK = LoongArchMCExpr::VK_LoongArch_None; + bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK); + bool IsValidKind = VK == LoongArchMCExpr::VK_LoongArch_None || + VK == LoongArchMCExpr::VK_LoongArch_PCALA_HI20 || + VK == LoongArchMCExpr::VK_LoongArch_GOT_PC_HI20 || + VK == LoongArchMCExpr::VK_LoongArch_TLS_IE_PC_HI20 || + VK == LoongArchMCExpr::VK_LoongArch_TLS_LD_PC_HI20 || + VK == LoongArchMCExpr::VK_LoongArch_TLS_GD_PC_HI20; + return IsConstantImm + ? isInt<20>(Imm) && IsValidKind + : LoongArchAsmParser::classifySymbolRef(getImm(), VK) && + IsValidKind; + } + + bool isSImm20lu12iw() const { + if (!isImm()) + return false; + + int64_t Imm; + LoongArchMCExpr::VariantKind VK = LoongArchMCExpr::VK_LoongArch_None; + bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK); + bool IsValidKind = VK == LoongArchMCExpr::VK_LoongArch_None || + VK == LoongArchMCExpr::VK_LoongArch_ABS_HI20 || + VK == LoongArchMCExpr::VK_LoongArch_GOT_HI20 || + VK == LoongArchMCExpr::VK_LoongArch_TLS_GD_HI20 || + VK == LoongArchMCExpr::VK_LoongArch_TLS_LD_HI20 || + VK == LoongArchMCExpr::VK_LoongArch_TLS_IE_HI20 || + VK == LoongArchMCExpr::VK_LoongArch_TLS_LE_HI20; + return IsConstantImm + ? isInt<20>(Imm) && IsValidKind + : LoongArchAsmParser::classifySymbolRef(getImm(), VK) && + IsValidKind; + } + + bool isSImm20lu32id() const { + if (!isImm()) + return false; + + int64_t Imm; + LoongArchMCExpr::VariantKind VK = LoongArchMCExpr::VK_LoongArch_None; + bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK); + bool IsValidKind = VK == LoongArchMCExpr::VK_LoongArch_None || + VK == LoongArchMCExpr::VK_LoongArch_ABS64_LO20 || + VK == LoongArchMCExpr::VK_LoongArch_PCALA64_LO20 || + VK == LoongArchMCExpr::VK_LoongArch_GOT64_LO20 || + VK == LoongArchMCExpr::VK_LoongArch_GOT64_PC_LO20 || + VK == LoongArchMCExpr::VK_LoongArch_TLS_IE64_LO20 || + VK == LoongArchMCExpr::VK_LoongArch_TLS_IE64_PC_LO20 || + VK == LoongArchMCExpr::VK_LoongArch_TLS_LE64_LO20; + + return IsConstantImm + ? isInt<20>(Imm) && IsValidKind + : LoongArchAsmParser::classifySymbolRef(getImm(), VK) && + IsValidKind; + } + + bool isSImm21lsl2() const { + if (!isImm()) + return false; + + int64_t Imm; + LoongArchMCExpr::VariantKind VK = LoongArchMCExpr::VK_LoongArch_None; + bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK); + bool IsValidKind = VK == LoongArchMCExpr::VK_LoongArch_None || + VK == LoongArchMCExpr::VK_LoongArch_B21; + return IsConstantImm + ? isShiftedInt<21, 2>(Imm) && IsValidKind + : LoongArchAsmParser::classifySymbolRef(getImm(), VK) && + IsValidKind; + } + + bool isSImm26Operand() const { + if (!isImm()) + return false; + + int64_t Imm; + LoongArchMCExpr::VariantKind VK = LoongArchMCExpr::VK_LoongArch_None; + bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK); + bool IsValidKind = VK == LoongArchMCExpr::VK_LoongArch_None || + VK == LoongArchMCExpr::VK_LoongArch_CALL || + VK == LoongArchMCExpr::VK_LoongArch_CALL_PLT || + VK == LoongArchMCExpr::VK_LoongArch_B26; + return IsConstantImm + ? isShiftedInt<26, 2>(Imm) && IsValidKind + : LoongArchAsmParser::classifySymbolRef(getImm(), VK) && + IsValidKind; + } + + bool isImm32() const { return isSImm<32>() || isUImm<32>(); } /// Gets location of the first token of this operand. SMLoc getStartLoc() const override { return StartLoc; } @@ -185,7 +443,7 @@ public: } void print(raw_ostream &OS) const override { - auto RegName = [](unsigned Reg) { + auto RegName = [](MCRegister Reg) { if (Reg) return LoongArchInstPrinter::getRegisterName(Reg); else @@ -289,6 +547,21 @@ OperandMatchResultTy LoongArchAsmParser::tryParseRegister(unsigned &RegNo, llvm_unreachable("Unimplemented function."); } +bool LoongArchAsmParser::classifySymbolRef(const MCExpr *Expr, + LoongArchMCExpr::VariantKind &Kind) { + Kind = LoongArchMCExpr::VK_LoongArch_None; + + if (const LoongArchMCExpr *RE = dyn_cast(Expr)) { + Kind = RE->getKind(); + Expr = RE->getSubExpr(); + } + + MCValue Res; + if (Expr->evaluateAsRelocatable(Res, nullptr, nullptr)) + return Res.getRefKind() == LoongArchMCExpr::VK_LoongArch_None; + return false; +} + OperandMatchResultTy LoongArchAsmParser::parseRegister(OperandVector &Operands) { if (getLexer().getTok().isNot(AsmToken::Dollar)) @@ -319,17 +592,130 @@ LoongArchAsmParser::parseImmediate(OperandVector &Operands) { SMLoc E; const MCExpr *Res; - if (getParser().parseExpression(Res, E)) + switch (getLexer().getKind()) { + default: + return MatchOperand_NoMatch; + case AsmToken::LParen: + case AsmToken::Dot: + case AsmToken::Minus: + case AsmToken::Plus: + case AsmToken::Exclaim: + case AsmToken::Tilde: + case AsmToken::Integer: + case AsmToken::String: + case AsmToken::Identifier: + if (getParser().parseExpression(Res, E)) + return MatchOperand_ParseFail; + break; + case AsmToken::Percent: + return parseOperandWithModifier(Operands); + } + + Operands.push_back(LoongArchOperand::createImm(Res, S, E)); + return MatchOperand_Success; +} + +OperandMatchResultTy +LoongArchAsmParser::parseOperandWithModifier(OperandVector &Operands) { + SMLoc S = getLoc(); + SMLoc E; + + if (getLexer().getKind() != AsmToken::Percent) { + Error(getLoc(), "expected '%' for operand modifier"); return MatchOperand_ParseFail; + } + + getParser().Lex(); // Eat '%' + if (getLexer().getKind() != AsmToken::Identifier) { + Error(getLoc(), "expected valid identifier for operand modifier"); + return MatchOperand_ParseFail; + } + StringRef Identifier = getParser().getTok().getIdentifier(); + LoongArchMCExpr::VariantKind VK = + LoongArchMCExpr::getVariantKindForName(Identifier); + if (VK == LoongArchMCExpr::VK_LoongArch_Invalid) { + Error(getLoc(), "unrecognized operand modifier"); + return MatchOperand_ParseFail; + } + + getParser().Lex(); // Eat the identifier + if (getLexer().getKind() != AsmToken::LParen) { + Error(getLoc(), "expected '('"); + return MatchOperand_ParseFail; + } + getParser().Lex(); // Eat '(' + + const MCExpr *SubExpr; + if (getParser().parseParenExpression(SubExpr, E)) { + return MatchOperand_ParseFail; + } + + const MCExpr *ModExpr = LoongArchMCExpr::create(SubExpr, VK, getContext()); + Operands.push_back(LoongArchOperand::createImm(ModExpr, S, E)); + return MatchOperand_Success; +} + +OperandMatchResultTy +LoongArchAsmParser::parseSImm26Operand(OperandVector &Operands) { + SMLoc S = getLoc(); + const MCExpr *Res; + + if (getLexer().getKind() == AsmToken::Percent) + return parseOperandWithModifier(Operands); + + if (getLexer().getKind() != AsmToken::Identifier) + return MatchOperand_NoMatch; + + StringRef Identifier; + if (getParser().parseIdentifier(Identifier)) + return MatchOperand_ParseFail; + + SMLoc E = SMLoc::getFromPointer(S.getPointer() + Identifier.size()); + + MCSymbol *Sym = getContext().getOrCreateSymbol(Identifier); + Res = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext()); + Res = LoongArchMCExpr::create(Res, LoongArchMCExpr::VK_LoongArch_CALL, + getContext()); Operands.push_back(LoongArchOperand::createImm(Res, S, E)); return MatchOperand_Success; } +OperandMatchResultTy +LoongArchAsmParser::parseAtomicMemOp(OperandVector &Operands) { + // Parse "$r*". + if (parseRegister(Operands) != MatchOperand_Success) + return MatchOperand_NoMatch; + + // If there is a next operand and it is 0, ignore it. Otherwise print a + // diagnostic message. + if (getLexer().is(AsmToken::Comma)) { + getLexer().Lex(); // Consume comma token. + int64_t ImmVal; + SMLoc ImmStart = getLoc(); + if (getParser().parseIntToken(ImmVal, "expected optional integer offset")) + return MatchOperand_ParseFail; + if (ImmVal) { + Error(ImmStart, "optional integer offset must be 0"); + return MatchOperand_ParseFail; + } + } + + return MatchOperand_Success; +} /// Looks at a token type and creates the relevant operand from this /// information, adding to Operands. Return true upon an error. bool LoongArchAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) { + // Check if the current operand has a custom associated parser, if so, try to + // custom parse the operand, or fallback to the general approach. + OperandMatchResultTy Result = + MatchOperandParserImpl(Operands, Mnemonic, /*ParseForAllFeatures=*/true); + if (Result == MatchOperand_Success) + return false; + if (Result == MatchOperand_ParseFail) + return true; + if (parseRegister(Operands) == MatchOperand_Success || parseImmediate(Operands) == MatchOperand_Success) return false; @@ -367,18 +753,445 @@ bool LoongArchAsmParser::ParseInstruction(ParseInstructionInfo &Info, return Error(Loc, "unexpected token"); } +void LoongArchAsmParser::emitLAInstSeq(MCRegister DestReg, MCRegister TmpReg, + const MCExpr *Symbol, + SmallVectorImpl &Insts, + SMLoc IDLoc, MCStreamer &Out) { + MCContext &Ctx = getContext(); + for (LoongArchAsmParser::Inst &Inst : Insts) { + unsigned Opc = Inst.Opc; + LoongArchMCExpr::VariantKind VK = Inst.VK; + const LoongArchMCExpr *LE = LoongArchMCExpr::create(Symbol, VK, Ctx); + switch (Opc) { + default: + llvm_unreachable("unexpected opcode"); + case LoongArch::PCALAU12I: + case LoongArch::LU12I_W: + Out.emitInstruction(MCInstBuilder(Opc).addReg(DestReg).addExpr(LE), + getSTI()); + break; + case LoongArch::ORI: + case LoongArch::ADDI_W: + case LoongArch::LD_W: + case LoongArch::LD_D: { + if (VK == LoongArchMCExpr::VK_LoongArch_None) { + Out.emitInstruction( + MCInstBuilder(Opc).addReg(DestReg).addReg(DestReg).addImm(0), + getSTI()); + continue; + } + Out.emitInstruction( + MCInstBuilder(Opc).addReg(DestReg).addReg(DestReg).addExpr(LE), + getSTI()); + break; + } + case LoongArch::LU32I_D: + Out.emitInstruction(MCInstBuilder(Opc) + .addReg(DestReg == TmpReg ? DestReg : TmpReg) + .addReg(DestReg == TmpReg ? DestReg : TmpReg) + .addExpr(LE), + getSTI()); + break; + case LoongArch::LU52I_D: + Out.emitInstruction( + MCInstBuilder(Opc).addReg(TmpReg).addReg(TmpReg).addExpr(LE), + getSTI()); + break; + case LoongArch::ADDI_D: + Out.emitInstruction( + MCInstBuilder(Opc) + .addReg(TmpReg) + .addReg(DestReg == TmpReg ? TmpReg : LoongArch::R0) + .addExpr(LE), + getSTI()); + break; + case LoongArch::ADD_D: + case LoongArch::LDX_D: + Out.emitInstruction( + MCInstBuilder(Opc).addReg(DestReg).addReg(DestReg).addReg(TmpReg), + getSTI()); + break; + } + } +} + +void LoongArchAsmParser::emitLoadAddressAbs(MCInst &Inst, SMLoc IDLoc, + MCStreamer &Out) { + // la.abs $rd, sym + // expands to: + // lu12i.w $rd, %abs_hi20(sym) + // ori $rd, $rd, %abs_lo12(sym) + // + // for 64bit appends: + // lu32i.d $rd, %abs64_lo20(sym) + // lu52i.d $rd, $rd, %abs64_hi12(sym) + MCRegister DestReg = Inst.getOperand(0).getReg(); + const MCExpr *Symbol = Inst.getOpcode() == LoongArch::PseudoLA_ABS + ? Inst.getOperand(1).getExpr() + : Inst.getOperand(2).getExpr(); + InstSeq Insts; + + Insts.push_back(LoongArchAsmParser::Inst( + LoongArch::LU12I_W, LoongArchMCExpr::VK_LoongArch_ABS_HI20)); + Insts.push_back(LoongArchAsmParser::Inst( + LoongArch::ORI, LoongArchMCExpr::VK_LoongArch_ABS_LO12)); + + if (is64Bit()) { + Insts.push_back(LoongArchAsmParser::Inst( + LoongArch::LU32I_D, LoongArchMCExpr::VK_LoongArch_ABS64_LO20)); + Insts.push_back(LoongArchAsmParser::Inst( + LoongArch::LU52I_D, LoongArchMCExpr::VK_LoongArch_ABS64_HI12)); + } + + emitLAInstSeq(DestReg, DestReg, Symbol, Insts, IDLoc, Out); +} + +void LoongArchAsmParser::emitLoadAddressPcrel(MCInst &Inst, SMLoc IDLoc, + MCStreamer &Out) { + // la.pcrel $rd, sym + // expands to: + // pcalau12i $rd, %pc_hi20(sym) + // addi.w/d $rd, rd, %pc_lo12(sym) + MCRegister DestReg = Inst.getOperand(0).getReg(); + const MCExpr *Symbol = Inst.getOperand(1).getExpr(); + InstSeq Insts; + unsigned ADDI = is64Bit() ? LoongArch::ADDI_D : LoongArch::ADDI_W; + + Insts.push_back(LoongArchAsmParser::Inst( + LoongArch::PCALAU12I, LoongArchMCExpr::VK_LoongArch_PCALA_HI20)); + Insts.push_back( + LoongArchAsmParser::Inst(ADDI, LoongArchMCExpr::VK_LoongArch_PCALA_LO12)); + + emitLAInstSeq(DestReg, DestReg, Symbol, Insts, IDLoc, Out); +} + +void LoongArchAsmParser::emitLoadAddressPcrelLarge(MCInst &Inst, SMLoc IDLoc, + MCStreamer &Out) { + // la.pcrel $rd, $rj, sym + // expands to: + // pcalau12i $rd, %pc_hi20(sym) + // addi.d $rj, $r0, %pc_lo12(sym) + // lu32i.d $rj, %pc64_lo20(sym) + // lu52i.d $rj, $rj, %pc64_hi12(sym) + // add.d $rd, $rd, $rj + MCRegister DestReg = Inst.getOperand(0).getReg(); + MCRegister TmpReg = Inst.getOperand(1).getReg(); + const MCExpr *Symbol = Inst.getOperand(2).getExpr(); + InstSeq Insts; + + Insts.push_back(LoongArchAsmParser::Inst( + LoongArch::PCALAU12I, LoongArchMCExpr::VK_LoongArch_PCALA_HI20)); + Insts.push_back(LoongArchAsmParser::Inst( + LoongArch::ADDI_D, LoongArchMCExpr::VK_LoongArch_PCALA_LO12)); + Insts.push_back(LoongArchAsmParser::Inst( + LoongArch::LU32I_D, LoongArchMCExpr::VK_LoongArch_PCALA64_LO20)); + Insts.push_back(LoongArchAsmParser::Inst( + LoongArch::LU52I_D, LoongArchMCExpr::VK_LoongArch_PCALA64_HI12)); + Insts.push_back(LoongArchAsmParser::Inst(LoongArch::ADD_D)); + + emitLAInstSeq(DestReg, TmpReg, Symbol, Insts, IDLoc, Out); +} + +void LoongArchAsmParser::emitLoadAddressGot(MCInst &Inst, SMLoc IDLoc, + MCStreamer &Out) { + // la.got $rd, sym + // expands to: + // pcalau12i $rd, %got_pc_hi20(sym) + // ld.w/d $rd, $rd, %got_pc_lo12(sym) + MCRegister DestReg = Inst.getOperand(0).getReg(); + const MCExpr *Symbol = Inst.getOperand(1).getExpr(); + InstSeq Insts; + unsigned LD = is64Bit() ? LoongArch::LD_D : LoongArch::LD_W; + + Insts.push_back(LoongArchAsmParser::Inst( + LoongArch::PCALAU12I, LoongArchMCExpr::VK_LoongArch_GOT_PC_HI20)); + Insts.push_back( + LoongArchAsmParser::Inst(LD, LoongArchMCExpr::VK_LoongArch_GOT_PC_LO12)); + + emitLAInstSeq(DestReg, DestReg, Symbol, Insts, IDLoc, Out); +} + +void LoongArchAsmParser::emitLoadAddressGotLarge(MCInst &Inst, SMLoc IDLoc, + MCStreamer &Out) { + // la.got $rd, $rj, sym + // expands to: + // pcalau12i $rd, %got_pc_hi20(sym) + // addi.d $rj, $r0, %got_pc_lo12(sym) + // lu32i.d $rj, %got64_pc_lo20(sym) + // lu52i.d $rj, $rj, %got64_pc_hi12(sym) + // ldx.d $rd, $rd, $rj + MCRegister DestReg = Inst.getOperand(0).getReg(); + MCRegister TmpReg = Inst.getOperand(1).getReg(); + const MCExpr *Symbol = Inst.getOperand(2).getExpr(); + InstSeq Insts; + + Insts.push_back(LoongArchAsmParser::Inst( + LoongArch::PCALAU12I, LoongArchMCExpr::VK_LoongArch_GOT_PC_HI20)); + Insts.push_back(LoongArchAsmParser::Inst( + LoongArch::ADDI_D, LoongArchMCExpr::VK_LoongArch_GOT_PC_LO12)); + Insts.push_back(LoongArchAsmParser::Inst( + LoongArch::LU32I_D, LoongArchMCExpr::VK_LoongArch_GOT64_PC_LO20)); + Insts.push_back(LoongArchAsmParser::Inst( + LoongArch::LU52I_D, LoongArchMCExpr::VK_LoongArch_GOT64_PC_HI12)); + Insts.push_back(LoongArchAsmParser::Inst(LoongArch::LDX_D)); + + emitLAInstSeq(DestReg, TmpReg, Symbol, Insts, IDLoc, Out); +} + +void LoongArchAsmParser::emitLoadAddressTLSLE(MCInst &Inst, SMLoc IDLoc, + MCStreamer &Out) { + // la.tls.le $rd, sym + // expands to: + // lu12i.w $rd, %le_hi20(sym) + // ori $rd, $rd, %le_lo12(sym) + MCRegister DestReg = Inst.getOperand(0).getReg(); + const MCExpr *Symbol = Inst.getOperand(1).getExpr(); + InstSeq Insts; + + Insts.push_back(LoongArchAsmParser::Inst( + LoongArch::LU12I_W, LoongArchMCExpr::VK_LoongArch_TLS_LE_HI20)); + Insts.push_back(LoongArchAsmParser::Inst( + LoongArch::ORI, LoongArchMCExpr::VK_LoongArch_TLS_LE_LO12)); + + emitLAInstSeq(DestReg, DestReg, Symbol, Insts, IDLoc, Out); +} + +void LoongArchAsmParser::emitLoadAddressTLSIE(MCInst &Inst, SMLoc IDLoc, + MCStreamer &Out) { + // la.tls.ie $rd, sym + // expands to: + // pcalau12i $rd, %ie_pc_hi20(sym) + // ld.w/d $rd, $rd, %ie_pc_lo12(sym) + MCRegister DestReg = Inst.getOperand(0).getReg(); + const MCExpr *Symbol = Inst.getOperand(1).getExpr(); + InstSeq Insts; + unsigned LD = is64Bit() ? LoongArch::LD_D : LoongArch::LD_W; + + Insts.push_back(LoongArchAsmParser::Inst( + LoongArch::PCALAU12I, LoongArchMCExpr::VK_LoongArch_TLS_IE_PC_HI20)); + Insts.push_back(LoongArchAsmParser::Inst( + LD, LoongArchMCExpr::VK_LoongArch_TLS_IE_PC_LO12)); + + emitLAInstSeq(DestReg, DestReg, Symbol, Insts, IDLoc, Out); +} + +void LoongArchAsmParser::emitLoadAddressTLSIELarge(MCInst &Inst, SMLoc IDLoc, + MCStreamer &Out) { + // la.tls.ie $rd, $rj, sym + // expands to: + // pcalau12i $rd, %ie_pc_hi20(sym) + // addi.d $rj, $r0, %ie_pc_lo12(sym) + // lu32i.d $rj, %ie64_pc_lo20(sym) + // lu52i.d $rj, $rj, %ie64_pc_hi12(sym) + // ldx.d $rd, $rd, $rj + MCRegister DestReg = Inst.getOperand(0).getReg(); + MCRegister TmpReg = Inst.getOperand(1).getReg(); + const MCExpr *Symbol = Inst.getOperand(2).getExpr(); + InstSeq Insts; + + Insts.push_back(LoongArchAsmParser::Inst( + LoongArch::PCALAU12I, LoongArchMCExpr::VK_LoongArch_TLS_IE_PC_HI20)); + Insts.push_back(LoongArchAsmParser::Inst( + LoongArch::ADDI_D, LoongArchMCExpr::VK_LoongArch_TLS_IE_PC_LO12)); + Insts.push_back(LoongArchAsmParser::Inst( + LoongArch::LU32I_D, LoongArchMCExpr::VK_LoongArch_TLS_IE64_PC_LO20)); + Insts.push_back(LoongArchAsmParser::Inst( + LoongArch::LU52I_D, LoongArchMCExpr::VK_LoongArch_TLS_IE64_PC_HI12)); + Insts.push_back(LoongArchAsmParser::Inst(LoongArch::LDX_D)); + + emitLAInstSeq(DestReg, TmpReg, Symbol, Insts, IDLoc, Out); +} + +void LoongArchAsmParser::emitLoadAddressTLSLD(MCInst &Inst, SMLoc IDLoc, + MCStreamer &Out) { + // la.tls.ld $rd, sym + // expands to: + // pcalau12i $rd, %ld_pc_hi20(sym) + // addi.w/d $rd, $rd, %got_pc_lo12(sym) + MCRegister DestReg = Inst.getOperand(0).getReg(); + const MCExpr *Symbol = Inst.getOperand(1).getExpr(); + InstSeq Insts; + unsigned ADDI = is64Bit() ? LoongArch::ADDI_D : LoongArch::ADDI_W; + + Insts.push_back(LoongArchAsmParser::Inst( + LoongArch::PCALAU12I, LoongArchMCExpr::VK_LoongArch_TLS_LD_PC_HI20)); + Insts.push_back(LoongArchAsmParser::Inst( + ADDI, LoongArchMCExpr::VK_LoongArch_GOT_PC_LO12)); + + emitLAInstSeq(DestReg, DestReg, Symbol, Insts, IDLoc, Out); +} + +void LoongArchAsmParser::emitLoadAddressTLSLDLarge(MCInst &Inst, SMLoc IDLoc, + MCStreamer &Out) { + // la.tls.ld $rd, $rj, sym + // expands to: + // pcalau12i $rd, %ld_pc_hi20(sym) + // addi.d $rj, $r0, %got_pc_lo12(sym) + // lu32i.d $rj, %got64_pc_lo20(sym) + // lu52i.d $rj, $rj, %got64_pc_hi12(sym) + // add.d $rd, $rd, $rj + MCRegister DestReg = Inst.getOperand(0).getReg(); + MCRegister TmpReg = Inst.getOperand(1).getReg(); + const MCExpr *Symbol = Inst.getOperand(2).getExpr(); + InstSeq Insts; + + Insts.push_back(LoongArchAsmParser::Inst( + LoongArch::PCALAU12I, LoongArchMCExpr::VK_LoongArch_TLS_LD_PC_HI20)); + Insts.push_back(LoongArchAsmParser::Inst( + LoongArch::ADDI_D, LoongArchMCExpr::VK_LoongArch_GOT_PC_LO12)); + Insts.push_back(LoongArchAsmParser::Inst( + LoongArch::LU32I_D, LoongArchMCExpr::VK_LoongArch_GOT64_PC_LO20)); + Insts.push_back(LoongArchAsmParser::Inst( + LoongArch::LU52I_D, LoongArchMCExpr::VK_LoongArch_GOT64_PC_HI12)); + Insts.push_back(LoongArchAsmParser::Inst(LoongArch::ADD_D)); + + emitLAInstSeq(DestReg, TmpReg, Symbol, Insts, IDLoc, Out); +} + +void LoongArchAsmParser::emitLoadAddressTLSGD(MCInst &Inst, SMLoc IDLoc, + MCStreamer &Out) { + // la.tls.gd $rd, sym + // expands to: + // pcalau12i $rd, %gd_pc_hi20(sym) + // addi.w/d $rd, $rd, %got_pc_lo12(sym) + MCRegister DestReg = Inst.getOperand(0).getReg(); + const MCExpr *Symbol = Inst.getOperand(1).getExpr(); + InstSeq Insts; + unsigned ADDI = is64Bit() ? LoongArch::ADDI_D : LoongArch::ADDI_W; + + Insts.push_back(LoongArchAsmParser::Inst( + LoongArch::PCALAU12I, LoongArchMCExpr::VK_LoongArch_TLS_GD_PC_HI20)); + Insts.push_back(LoongArchAsmParser::Inst( + ADDI, LoongArchMCExpr::VK_LoongArch_GOT_PC_LO12)); + + emitLAInstSeq(DestReg, DestReg, Symbol, Insts, IDLoc, Out); +} + +void LoongArchAsmParser::emitLoadAddressTLSGDLarge(MCInst &Inst, SMLoc IDLoc, + MCStreamer &Out) { + // la.tls.gd $rd, $rj, sym + // expands to: + // pcalau12i $rd, %gd_pc_hi20(sym) + // addi.d $rj, $r0, %got_pc_lo12(sym) + // lu32i.d $rj, %got64_pc_lo20(sym) + // lu52i.d $rj, $rj, %got64_pc_hi12(sym) + // add.d $rd, $rd, $rj + MCRegister DestReg = Inst.getOperand(0).getReg(); + MCRegister TmpReg = Inst.getOperand(1).getReg(); + const MCExpr *Symbol = Inst.getOperand(2).getExpr(); + InstSeq Insts; + + Insts.push_back(LoongArchAsmParser::Inst( + LoongArch::PCALAU12I, LoongArchMCExpr::VK_LoongArch_TLS_GD_PC_HI20)); + Insts.push_back(LoongArchAsmParser::Inst( + LoongArch::ADDI_D, LoongArchMCExpr::VK_LoongArch_GOT_PC_LO12)); + Insts.push_back(LoongArchAsmParser::Inst( + LoongArch::LU32I_D, LoongArchMCExpr::VK_LoongArch_GOT64_PC_LO20)); + Insts.push_back(LoongArchAsmParser::Inst( + LoongArch::LU52I_D, LoongArchMCExpr::VK_LoongArch_GOT64_PC_HI12)); + Insts.push_back(LoongArchAsmParser::Inst(LoongArch::ADD_D)); + + emitLAInstSeq(DestReg, TmpReg, Symbol, Insts, IDLoc, Out); +} + +void LoongArchAsmParser::emitLoadImm(MCInst &Inst, SMLoc IDLoc, + MCStreamer &Out) { + MCRegister DestReg = Inst.getOperand(0).getReg(); + int64_t Imm = Inst.getOperand(1).getImm(); + MCRegister SrcReg = LoongArch::R0; + + if (Inst.getOpcode() == LoongArch::PseudoLI_W) + Imm = SignExtend64<32>(Imm); + + for (LoongArchMatInt::Inst &Inst : LoongArchMatInt::generateInstSeq(Imm)) { + unsigned Opc = Inst.Opc; + if (Opc == LoongArch::LU12I_W) + Out.emitInstruction(MCInstBuilder(Opc).addReg(DestReg).addImm(Inst.Imm), + getSTI()); + else + Out.emitInstruction( + MCInstBuilder(Opc).addReg(DestReg).addReg(SrcReg).addImm(Inst.Imm), + getSTI()); + SrcReg = DestReg; + } +} + bool LoongArchAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc, OperandVector &Operands, MCStreamer &Out) { Inst.setLoc(IDLoc); + switch (Inst.getOpcode()) { + default: + break; + case LoongArch::PseudoLA_ABS: + case LoongArch::PseudoLA_ABS_LARGE: + emitLoadAddressAbs(Inst, IDLoc, Out); + return false; + case LoongArch::PseudoLA_PCREL: + emitLoadAddressPcrel(Inst, IDLoc, Out); + return false; + case LoongArch::PseudoLA_PCREL_LARGE: + emitLoadAddressPcrelLarge(Inst, IDLoc, Out); + return false; + case LoongArch::PseudoLA_GOT: + emitLoadAddressGot(Inst, IDLoc, Out); + return false; + case LoongArch::PseudoLA_GOT_LARGE: + emitLoadAddressGotLarge(Inst, IDLoc, Out); + return false; + case LoongArch::PseudoLA_TLS_LE: + emitLoadAddressTLSLE(Inst, IDLoc, Out); + return false; + case LoongArch::PseudoLA_TLS_IE: + emitLoadAddressTLSIE(Inst, IDLoc, Out); + return false; + case LoongArch::PseudoLA_TLS_IE_LARGE: + emitLoadAddressTLSIELarge(Inst, IDLoc, Out); + return false; + case LoongArch::PseudoLA_TLS_LD: + emitLoadAddressTLSLD(Inst, IDLoc, Out); + return false; + case LoongArch::PseudoLA_TLS_LD_LARGE: + emitLoadAddressTLSLDLarge(Inst, IDLoc, Out); + return false; + case LoongArch::PseudoLA_TLS_GD: + emitLoadAddressTLSGD(Inst, IDLoc, Out); + return false; + case LoongArch::PseudoLA_TLS_GD_LARGE: + emitLoadAddressTLSGDLarge(Inst, IDLoc, Out); + return false; + case LoongArch::PseudoLI_W: + case LoongArch::PseudoLI_D: + emitLoadImm(Inst, IDLoc, Out); + return false; + } Out.emitInstruction(Inst, getSTI()); return false; } unsigned LoongArchAsmParser::checkTargetMatchPredicate(MCInst &Inst) { - switch (Inst.getOpcode()) { + unsigned Opc = Inst.getOpcode(); + switch (Opc) { default: + if (Opc >= LoongArch::AMADD_D && Opc <= LoongArch::AMXOR_W) { + unsigned Rd = Inst.getOperand(0).getReg(); + unsigned Rk = Inst.getOperand(1).getReg(); + unsigned Rj = Inst.getOperand(2).getReg(); + if ((Rd == Rk || Rd == Rj) && Rd != LoongArch::R0) + return Match_RequiresAMORdDifferRkRj; + } break; + case LoongArch::PseudoLA_PCREL_LARGE: + case LoongArch::PseudoLA_GOT_LARGE: + case LoongArch::PseudoLA_TLS_IE_LARGE: + case LoongArch::PseudoLA_TLS_LD_LARGE: + case LoongArch::PseudoLA_TLS_GD_LARGE: { + unsigned Rd = Inst.getOperand(0).getReg(); + unsigned Rj = Inst.getOperand(1).getReg(); + if (Rd == Rj) + return Match_RequiresLAORdDifferRj; + break; + } case LoongArch::CSRXCHG: { unsigned Rj = Inst.getOperand(2).getReg(); if (Rj == LoongArch::R0 || Rj == LoongArch::R1) @@ -500,6 +1313,11 @@ bool LoongArchAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, } case Match_RequiresOpnd2NotR0R1: return Error(Operands[2]->getStartLoc(), "must not be $r0 or $r1"); + case Match_RequiresAMORdDifferRkRj: + return Error(Operands[1]->getStartLoc(), + "$rd must be different from both $rk and $rj"); + case Match_RequiresLAORdDifferRj: + return Error(Operands[1]->getStartLoc(), "$rd must be different from $rj"); case Match_InvalidUImm2: return generateImmOutOfRangeError(Operands, ErrorInfo, /*Lower=*/0, /*Upper=*/(1 << 2) - 1); @@ -518,12 +1336,30 @@ bool LoongArchAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, case Match_InvalidUImm12: return generateImmOutOfRangeError(Operands, ErrorInfo, /*Lower=*/0, /*Upper=*/(1 << 12) - 1); + case Match_InvalidUImm12ori: + return generateImmOutOfRangeError( + Operands, ErrorInfo, /*Lower=*/0, + /*Upper=*/(1 << 12) - 1, + "operand must be a symbol with modifier (e.g. %abs_lo12) or an " + "integer in the range"); case Match_InvalidUImm15: return generateImmOutOfRangeError(Operands, ErrorInfo, /*Lower=*/0, /*Upper=*/(1 << 15) - 1); case Match_InvalidSImm12: return generateImmOutOfRangeError(Operands, ErrorInfo, /*Lower=*/-(1 << 11), /*Upper=*/(1 << 11) - 1); + case Match_InvalidSImm12addlike: + return generateImmOutOfRangeError( + Operands, ErrorInfo, /*Lower=*/-(1 << 11), + /*Upper=*/(1 << 11) - 1, + "operand must be a symbol with modifier (e.g. %pc_lo12) or an integer " + "in the range"); + case Match_InvalidSImm12lu52id: + return generateImmOutOfRangeError( + Operands, ErrorInfo, /*Lower=*/-(1 << 11), + /*Upper=*/(1 << 11) - 1, + "operand must be a symbol with modifier (e.g. %pc64_hi12) or an " + "integer in the range"); case Match_InvalidSImm14lsl2: return generateImmOutOfRangeError( Operands, ErrorInfo, /*Lower=*/-(1 << 15), /*Upper=*/(1 << 15) - 4, @@ -534,18 +1370,47 @@ bool LoongArchAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, case Match_InvalidSImm16lsl2: return generateImmOutOfRangeError( Operands, ErrorInfo, /*Lower=*/-(1 << 17), /*Upper=*/(1 << 17) - 4, - "immediate must be a multiple of 4 in the range"); + "operand must be a symbol with modifier (e.g. %b16) or an integer " + "in the range"); case Match_InvalidSImm20: return generateImmOutOfRangeError(Operands, ErrorInfo, /*Lower=*/-(1 << 19), /*Upper=*/(1 << 19) - 1); + case Match_InvalidSImm20lu12iw: + return generateImmOutOfRangeError( + Operands, ErrorInfo, /*Lower=*/-(1 << 19), + /*Upper=*/(1 << 19) - 1, + "operand must be a symbol with modifier (e.g. %abs_hi20) or an integer " + "in the range"); + case Match_InvalidSImm20lu32id: + return generateImmOutOfRangeError( + Operands, ErrorInfo, /*Lower=*/-(1 << 19), + /*Upper=*/(1 << 19) - 1, + "operand must be a symbol with modifier (e.g. %abs64_lo20) or an " + "integer in the range"); + case Match_InvalidSImm20pcalau12i: + return generateImmOutOfRangeError( + Operands, ErrorInfo, /*Lower=*/-(1 << 19), + /*Upper=*/(1 << 19) - 1, + "operand must be a symbol with modifier (e.g. %pc_hi20) or an integer " + "in the range"); case Match_InvalidSImm21lsl2: return generateImmOutOfRangeError( Operands, ErrorInfo, /*Lower=*/-(1 << 22), /*Upper=*/(1 << 22) - 4, - "immediate must be a multiple of 4 in the range"); - case Match_InvalidSImm26lsl2: + "operand must be a symbol with modifier (e.g. %b21) or an integer " + "in the range"); + case Match_InvalidSImm26Operand: return generateImmOutOfRangeError( Operands, ErrorInfo, /*Lower=*/-(1 << 27), /*Upper=*/(1 << 27) - 4, - "immediate must be a multiple of 4 in the range"); + "operand must be a bare symbol name or an immediate must be a multiple " + "of 4 in the range"); + case Match_InvalidImm32: { + SMLoc ErrorLoc = ((LoongArchOperand &)*Operands[ErrorInfo]).getStartLoc(); + return Error(ErrorLoc, "operand must be a 32 bit immediate"); + } + case Match_InvalidBareSymbol: { + SMLoc ErrorLoc = ((LoongArchOperand &)*Operands[ErrorInfo]).getStartLoc(); + return Error(ErrorLoc, "operand must be a bare symbol name"); + } } llvm_unreachable("Unknown match type detected!"); } diff --git a/llvm/lib/Target/LoongArch/CMakeLists.txt b/llvm/lib/Target/LoongArch/CMakeLists.txt index 4d8e81aea4200d5c7c654378750379fbea8404d5..41a38f9eb0c5f94c32b6d554497b782ea665d6d7 100644 --- a/llvm/lib/Target/LoongArch/CMakeLists.txt +++ b/llvm/lib/Target/LoongArch/CMakeLists.txt @@ -16,6 +16,8 @@ add_public_tablegen_target(LoongArchCommonTableGen) add_llvm_target(LoongArchCodeGen LoongArchAsmPrinter.cpp + LoongArchExpandAtomicPseudoInsts.cpp + LoongArchExpandPseudoInsts.cpp LoongArchFrameLowering.cpp LoongArchInstrInfo.cpp LoongArchISelDAGToDAG.cpp @@ -36,7 +38,6 @@ add_llvm_target(LoongArchCodeGen SelectionDAG Support Target - GlobalISel ADD_TO_COMPONENT LoongArch diff --git a/llvm/lib/Target/LoongArch/Disassembler/LoongArchDisassembler.cpp b/llvm/lib/Target/LoongArch/Disassembler/LoongArchDisassembler.cpp index beb757c78596926a78b1e7c7019ce5d6142dc9b8..2335152e5ab17015e8e781ace14958aaf9bb4150 100644 --- a/llvm/lib/Target/LoongArch/Disassembler/LoongArchDisassembler.cpp +++ b/llvm/lib/Target/LoongArch/Disassembler/LoongArchDisassembler.cpp @@ -114,9 +114,9 @@ static DecodeStatus decodeSImmOperand(MCInst &Inst, uint64_t Imm, int64_t Address, const MCDisassembler *Decoder) { assert(isUInt(Imm) && "Invalid immediate"); - // Sign-extend the number in the bottom bits of Imm, then shift left + // Shift left Imm bits, then sign-extend the number in the bottom // bits. - Inst.addOperand(MCOperand::createImm(SignExtend64(Imm) << S)); + Inst.addOperand(MCOperand::createImm(SignExtend64(Imm << S))); return MCDisassembler::Success; } diff --git a/llvm/lib/Target/LoongArch/LoongArch.h b/llvm/lib/Target/LoongArch/LoongArch.h index e6c9c24dd1b2942f4c6cc8f1481669cf9d47bee8..a43c5f111e61a48a730eac151e020e4305e39e32 100644 --- a/llvm/lib/Target/LoongArch/LoongArch.h +++ b/llvm/lib/Target/LoongArch/LoongArch.h @@ -18,13 +18,14 @@ #include "llvm/Target/TargetMachine.h" namespace llvm { -class LoongArchTargetMachine; class AsmPrinter; class FunctionPass; +class LoongArchTargetMachine; class MCInst; class MCOperand; class MachineInstr; class MachineOperand; +class PassRegistry; bool lowerLoongArchMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, AsmPrinter &AP); @@ -32,7 +33,11 @@ bool lowerLoongArchMachineOperandToMCOperand(const MachineOperand &MO, MCOperand &MCOp, const AsmPrinter &AP); +FunctionPass *createLoongArchExpandAtomicPseudoPass(); FunctionPass *createLoongArchISelDag(LoongArchTargetMachine &TM); +FunctionPass *createLoongArchPreRAExpandPseudoPass(); +void initializeLoongArchExpandAtomicPseudoPass(PassRegistry &); +void initializeLoongArchPreRAExpandPseudoPass(PassRegistry &); } // end namespace llvm #endif // LLVM_LIB_TARGET_LOONGARCH_LOONGARCH_H diff --git a/llvm/lib/Target/LoongArch/LoongArch.td b/llvm/lib/Target/LoongArch/LoongArch.td index bf465c27ef99c8c94eaf2ad71670f0dc969faa30..3e9e8b2519f3131320acce0995b2edd52e62202c 100644 --- a/llvm/lib/Target/LoongArch/LoongArch.td +++ b/llvm/lib/Target/LoongArch/LoongArch.td @@ -17,6 +17,9 @@ include "llvm/Target/Target.td" def Feature64Bit : SubtargetFeature<"64bit", "HasLA64", "true", "LA64 Basic Integer and Privilege Instruction Set">; +def Feature32Bit + : SubtargetFeature<"32bit", "HasLA32", "true", + "LA32 Basic Integer and Privilege Instruction Set">; def IsLA64 : Predicate<"Subtarget->is64Bit()">, AssemblerPredicate<(all_of Feature64Bit), @@ -85,6 +88,38 @@ def HasExtLBT AssemblerPredicate<(all_of FeatureExtLBT), "'LBT' (Loongson Binary Translation Extension)">; +// Expand la.global as la.pcrel +def LaGlobalWithPcrel + : SubtargetFeature<"la-global-with-pcrel", "HasLaGlobalWithPcrel", "true", + "Expand la.global as la.pcrel">; +def HasLaGlobalWithPcrel + : Predicate<"Subtarget->hasLaGlobalWithPcrel()">, + AssemblerPredicate<(all_of LaGlobalWithPcrel), + "Expand la.global as la.pcrel">; + +// Expand la.global as la.abs +def LaGlobalWithAbs + : SubtargetFeature<"la-global-with-abs", "HasLaGlobalWithAbs", "true", + "Expand la.global as la.abs">; +def HasLaGlobalWithAbs + : Predicate<"Subtarget->hasLaGlobalWithAbs()">, + AssemblerPredicate<(all_of LaGlobalWithAbs), + "Expand la.global as la.abs">; + +// Expand la.local as la.abs +def LaLocalWithAbs + : SubtargetFeature<"la-local-with-abs", "HasLaLocalWithAbs", "true", + "Expand la.local as la.abs">; +def HasLaLocalWithAbs + : Predicate<"Subtarget->hasLaLocalWithAbs()">, + AssemblerPredicate<(all_of LaLocalWithAbs), + "Expand la.local as la.abs">; + +// Unaligned memory access +def FeatureUAL + : SubtargetFeature<"ual", "HasUAL", "true", + "Allow memory accesses to be unaligned">; + //===----------------------------------------------------------------------===// // Registers, instruction descriptions ... //===----------------------------------------------------------------------===// @@ -97,10 +132,20 @@ include "LoongArchInstrInfo.td" // LoongArch processors supported. //===----------------------------------------------------------------------===// -def : ProcessorModel<"generic-la32", NoSchedModel, []>; -def : ProcessorModel<"generic-la64", NoSchedModel, [Feature64Bit]>; +def : ProcessorModel<"generic-la32", NoSchedModel, [Feature32Bit]>; +def : ProcessorModel<"generic-la64", NoSchedModel, [Feature64Bit, FeatureUAL]>; + +// Generic 64-bit processor with double-precision floating-point support. +def : ProcessorModel<"loongarch64", NoSchedModel, [Feature64Bit, + FeatureUAL, + FeatureBasicD]>; + +// Support generic for compatibility with other targets. The triple will be used +// to change to the appropriate la32/la64 version. +def : ProcessorModel<"generic", NoSchedModel, []>; def : ProcessorModel<"la464", NoSchedModel, [Feature64Bit, + FeatureUAL, FeatureExtLASX, FeatureExtLVZ, FeatureExtLBT]>; diff --git a/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.cpp b/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.cpp index 1467d1757ff0cbad8e15e4c392191d88b2504e6f..04fdd41d677308eac2f4fc7b741f3125f3dfe329 100644 --- a/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.cpp @@ -14,6 +14,7 @@ #include "LoongArchAsmPrinter.h" #include "LoongArch.h" #include "LoongArchTargetMachine.h" +#include "MCTargetDesc/LoongArchInstPrinter.h" #include "TargetInfo/LoongArchTargetInfo.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/MC/TargetRegistry.h" @@ -34,11 +35,103 @@ void LoongArchAsmPrinter::emitInstruction(const MachineInstr *MI) { if (emitPseudoExpansionLowering(*OutStreamer, MI)) return; + switch (MI->getOpcode()) { + case TargetOpcode::PATCHABLE_FUNCTION_ENTER: + LowerPATCHABLE_FUNCTION_ENTER(*MI); + return; + } + MCInst TmpInst; if (!lowerLoongArchMachineInstrToMCInst(MI, TmpInst, *this)) EmitToStreamer(*OutStreamer, TmpInst); } +bool LoongArchAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + const char *ExtraCode, + raw_ostream &OS) { + // First try the generic code, which knows about modifiers like 'c' and 'n'. + if (!AsmPrinter::PrintAsmOperand(MI, OpNo, ExtraCode, OS)) + return false; + + const MachineOperand &MO = MI->getOperand(OpNo); + if (ExtraCode && ExtraCode[0]) { + if (ExtraCode[1] != 0) + return true; // Unknown modifier. + + switch (ExtraCode[0]) { + default: + return true; // Unknown modifier. + case 'z': // Print $zero register if zero, regular printing otherwise. + if (MO.isImm() && MO.getImm() == 0) { + OS << '$' << LoongArchInstPrinter::getRegisterName(LoongArch::R0); + return false; + } + break; + // TODO: handle other extra codes if any. + } + } + + switch (MO.getType()) { + case MachineOperand::MO_Immediate: + OS << MO.getImm(); + return false; + case MachineOperand::MO_Register: + OS << '$' << LoongArchInstPrinter::getRegisterName(MO.getReg()); + return false; + case MachineOperand::MO_GlobalAddress: + PrintSymbolOperand(MO, OS); + return false; + default: + llvm_unreachable("not implemented"); + } + + return true; +} + +bool LoongArchAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, + unsigned OpNo, + const char *ExtraCode, + raw_ostream &OS) { + // TODO: handle extra code. + if (ExtraCode) + return true; + + // We only support memory operands like "Base + Offset", where base must be a + // register, and offset can be a register or an immediate value. + const MachineOperand &BaseMO = MI->getOperand(OpNo); + // Base address must be a register. + if (!BaseMO.isReg()) + return true; + // Print the base address register. + OS << "$" << LoongArchInstPrinter::getRegisterName(BaseMO.getReg()); + // Print the offset operand. + const MachineOperand &OffsetMO = MI->getOperand(OpNo + 1); + if (OffsetMO.isReg()) + OS << ", $" << LoongArchInstPrinter::getRegisterName(OffsetMO.getReg()); + else if (OffsetMO.isImm()) + OS << ", " << OffsetMO.getImm(); + else + return true; + + return false; +} + +void LoongArchAsmPrinter::LowerPATCHABLE_FUNCTION_ENTER( + const MachineInstr &MI) { + const Function &F = MF->getFunction(); + if (F.hasFnAttribute("patchable-function-entry")) { + unsigned Num; + if (F.getFnAttribute("patchable-function-entry") + .getValueAsString() + .getAsInteger(10, Num)) + return; + emitNops(Num); + return; + } + + // TODO: Emit sled here once we get support for XRay. +} + bool LoongArchAsmPrinter::runOnMachineFunction(MachineFunction &MF) { AsmPrinter::runOnMachineFunction(MF); return true; diff --git a/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.h b/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.h index b51c19188051958c654e9f3e4634f3539b81fc82..c8bf657f8de7c3c19f1bc7069854c3f86d419e09 100644 --- a/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.h +++ b/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.h @@ -36,6 +36,13 @@ public: void emitInstruction(const MachineInstr *MI) override; + bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + const char *ExtraCode, raw_ostream &OS) override; + bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, + const char *ExtraCode, raw_ostream &OS) override; + + void LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI); + // tblgen'erated function. bool emitPseudoExpansionLowering(MCStreamer &OutStreamer, const MachineInstr *MI); diff --git a/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp b/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp new file mode 100644 index 0000000000000000000000000000000000000000..51df0463e23524850cea69550a4822bdadebcc94 --- /dev/null +++ b/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp @@ -0,0 +1,628 @@ +//==- LoongArchExpandAtomicPseudoInsts.cpp - Expand atomic pseudo instrs. -===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains a pass that expands atomic pseudo instructions into +// target instructions. This pass should be run at the last possible moment, +// avoiding the possibility for other passes to break the requirements for +// forward progress in the LL/SC block. +// +//===----------------------------------------------------------------------===// + +#include "LoongArch.h" +#include "LoongArchInstrInfo.h" +#include "LoongArchTargetMachine.h" + +#include "llvm/CodeGen/LivePhysRegs.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" + +using namespace llvm; + +#define LoongArch_EXPAND_ATOMIC_PSEUDO_NAME \ + "LoongArch atomic pseudo instruction expansion pass" + +namespace { + +class LoongArchExpandAtomicPseudo : public MachineFunctionPass { +public: + const LoongArchInstrInfo *TII; + static char ID; + + LoongArchExpandAtomicPseudo() : MachineFunctionPass(ID) { + initializeLoongArchExpandAtomicPseudoPass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + + StringRef getPassName() const override { + return LoongArch_EXPAND_ATOMIC_PSEUDO_NAME; + } + +private: + bool expandMBB(MachineBasicBlock &MBB); + bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI); + bool expandAtomicBinOp(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, AtomicRMWInst::BinOp, + bool IsMasked, int Width, + MachineBasicBlock::iterator &NextMBBI); + bool expandAtomicMinMaxOp(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + AtomicRMWInst::BinOp, bool IsMasked, int Width, + MachineBasicBlock::iterator &NextMBBI); + bool expandAtomicCmpXchg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, bool IsMasked, + int Width, MachineBasicBlock::iterator &NextMBBI); +}; + +char LoongArchExpandAtomicPseudo::ID = 0; + +bool LoongArchExpandAtomicPseudo::runOnMachineFunction(MachineFunction &MF) { + TII = + static_cast(MF.getSubtarget().getInstrInfo()); + bool Modified = false; + for (auto &MBB : MF) + Modified |= expandMBB(MBB); + return Modified; +} + +bool LoongArchExpandAtomicPseudo::expandMBB(MachineBasicBlock &MBB) { + bool Modified = false; + + MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); + while (MBBI != E) { + MachineBasicBlock::iterator NMBBI = std::next(MBBI); + Modified |= expandMI(MBB, MBBI, NMBBI); + MBBI = NMBBI; + } + + return Modified; +} + +bool LoongArchExpandAtomicPseudo::expandMI( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI) { + switch (MBBI->getOpcode()) { + case LoongArch::PseudoMaskedAtomicSwap32: + return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xchg, true, 32, + NextMBBI); + case LoongArch::PseudoAtomicSwap32: + return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xchg, false, 32, + NextMBBI); + case LoongArch::PseudoMaskedAtomicLoadAdd32: + return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Add, true, 32, NextMBBI); + case LoongArch::PseudoMaskedAtomicLoadSub32: + return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Sub, true, 32, NextMBBI); + case LoongArch::PseudoAtomicLoadNand32: + return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, false, 32, + NextMBBI); + case LoongArch::PseudoAtomicLoadNand64: + return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, false, 64, + NextMBBI); + case LoongArch::PseudoMaskedAtomicLoadNand32: + return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, true, 32, + NextMBBI); + case LoongArch::PseudoAtomicLoadAdd32: + return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Add, false, 32, + NextMBBI); + case LoongArch::PseudoAtomicLoadSub32: + return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Sub, false, 32, + NextMBBI); + case LoongArch::PseudoAtomicLoadAnd32: + return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::And, false, 32, + NextMBBI); + case LoongArch::PseudoAtomicLoadOr32: + return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Or, false, 32, NextMBBI); + case LoongArch::PseudoAtomicLoadXor32: + return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xor, false, 32, + NextMBBI); + case LoongArch::PseudoMaskedAtomicLoadUMax32: + return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMax, true, 32, + NextMBBI); + case LoongArch::PseudoMaskedAtomicLoadUMin32: + return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMin, true, 32, + NextMBBI); + case LoongArch::PseudoCmpXchg32: + return expandAtomicCmpXchg(MBB, MBBI, false, 32, NextMBBI); + case LoongArch::PseudoCmpXchg64: + return expandAtomicCmpXchg(MBB, MBBI, false, 64, NextMBBI); + case LoongArch::PseudoMaskedCmpXchg32: + return expandAtomicCmpXchg(MBB, MBBI, true, 32, NextMBBI); + case LoongArch::PseudoMaskedAtomicLoadMax32: + return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Max, true, 32, + NextMBBI); + case LoongArch::PseudoMaskedAtomicLoadMin32: + return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Min, true, 32, + NextMBBI); + } + return false; +} + +static void doAtomicBinOpExpansion(const LoongArchInstrInfo *TII, + MachineInstr &MI, DebugLoc DL, + MachineBasicBlock *ThisMBB, + MachineBasicBlock *LoopMBB, + MachineBasicBlock *DoneMBB, + AtomicRMWInst::BinOp BinOp, int Width) { + Register DestReg = MI.getOperand(0).getReg(); + Register ScratchReg = MI.getOperand(1).getReg(); + Register AddrReg = MI.getOperand(2).getReg(); + Register IncrReg = MI.getOperand(3).getReg(); + AtomicOrdering Ordering = + static_cast(MI.getOperand(4).getImm()); + + // .loop: + // if(Ordering != AtomicOrdering::Monotonic) + // dbar 0 + // ll.[w|d] dest, (addr) + // binop scratch, dest, val + // sc.[w|d] scratch, scratch, (addr) + // beqz scratch, loop + if (Ordering != AtomicOrdering::Monotonic) + BuildMI(LoopMBB, DL, TII->get(LoongArch::DBAR)).addImm(0); + BuildMI(LoopMBB, DL, + TII->get(Width == 32 ? LoongArch::LL_W : LoongArch::LL_D), DestReg) + .addReg(AddrReg) + .addImm(0); + switch (BinOp) { + default: + llvm_unreachable("Unexpected AtomicRMW BinOp"); + case AtomicRMWInst::Xchg: + BuildMI(LoopMBB, DL, TII->get(LoongArch::OR), ScratchReg) + .addReg(IncrReg) + .addReg(LoongArch::R0); + break; + case AtomicRMWInst::Nand: + BuildMI(LoopMBB, DL, TII->get(LoongArch::AND), ScratchReg) + .addReg(DestReg) + .addReg(IncrReg); + BuildMI(LoopMBB, DL, TII->get(LoongArch::NOR), ScratchReg) + .addReg(ScratchReg) + .addReg(LoongArch::R0); + break; + case AtomicRMWInst::Add: + BuildMI(LoopMBB, DL, TII->get(LoongArch::ADD_W), ScratchReg) + .addReg(DestReg) + .addReg(IncrReg); + break; + case AtomicRMWInst::Sub: + BuildMI(LoopMBB, DL, TII->get(LoongArch::SUB_W), ScratchReg) + .addReg(DestReg) + .addReg(IncrReg); + break; + case AtomicRMWInst::And: + BuildMI(LoopMBB, DL, TII->get(LoongArch::AND), ScratchReg) + .addReg(DestReg) + .addReg(IncrReg); + break; + case AtomicRMWInst::Or: + BuildMI(LoopMBB, DL, TII->get(LoongArch::OR), ScratchReg) + .addReg(DestReg) + .addReg(IncrReg); + break; + case AtomicRMWInst::Xor: + BuildMI(LoopMBB, DL, TII->get(LoongArch::XOR), ScratchReg) + .addReg(DestReg) + .addReg(IncrReg); + break; + } + BuildMI(LoopMBB, DL, + TII->get(Width == 32 ? LoongArch::SC_W : LoongArch::SC_D), ScratchReg) + .addReg(ScratchReg) + .addReg(AddrReg) + .addImm(0); + BuildMI(LoopMBB, DL, TII->get(LoongArch::BEQZ)) + .addReg(ScratchReg) + .addMBB(LoopMBB); +} + +static void insertMaskedMerge(const LoongArchInstrInfo *TII, DebugLoc DL, + MachineBasicBlock *MBB, Register DestReg, + Register OldValReg, Register NewValReg, + Register MaskReg, Register ScratchReg) { + assert(OldValReg != ScratchReg && "OldValReg and ScratchReg must be unique"); + assert(OldValReg != MaskReg && "OldValReg and MaskReg must be unique"); + assert(ScratchReg != MaskReg && "ScratchReg and MaskReg must be unique"); + + // res = oldval ^ ((oldval ^ newval) & masktargetdata); + BuildMI(MBB, DL, TII->get(LoongArch::XOR), ScratchReg) + .addReg(OldValReg) + .addReg(NewValReg); + BuildMI(MBB, DL, TII->get(LoongArch::AND), ScratchReg) + .addReg(ScratchReg) + .addReg(MaskReg); + BuildMI(MBB, DL, TII->get(LoongArch::XOR), DestReg) + .addReg(OldValReg) + .addReg(ScratchReg); +} + +static void doMaskedAtomicBinOpExpansion( + const LoongArchInstrInfo *TII, MachineInstr &MI, DebugLoc DL, + MachineBasicBlock *ThisMBB, MachineBasicBlock *LoopMBB, + MachineBasicBlock *DoneMBB, AtomicRMWInst::BinOp BinOp, int Width) { + assert(Width == 32 && "Should never need to expand masked 64-bit operations"); + Register DestReg = MI.getOperand(0).getReg(); + Register ScratchReg = MI.getOperand(1).getReg(); + Register AddrReg = MI.getOperand(2).getReg(); + Register IncrReg = MI.getOperand(3).getReg(); + Register MaskReg = MI.getOperand(4).getReg(); + AtomicOrdering Ordering = + static_cast(MI.getOperand(5).getImm()); + + // .loop: + // if(Ordering != AtomicOrdering::Monotonic) + // dbar 0 + // ll.w destreg, (alignedaddr) + // binop scratch, destreg, incr + // xor scratch, destreg, scratch + // and scratch, scratch, masktargetdata + // xor scratch, destreg, scratch + // sc.w scratch, scratch, (alignedaddr) + // beqz scratch, loop + if (Ordering != AtomicOrdering::Monotonic) + BuildMI(LoopMBB, DL, TII->get(LoongArch::DBAR)).addImm(0); + BuildMI(LoopMBB, DL, TII->get(LoongArch::LL_W), DestReg) + .addReg(AddrReg) + .addImm(0); + switch (BinOp) { + default: + llvm_unreachable("Unexpected AtomicRMW BinOp"); + case AtomicRMWInst::Xchg: + BuildMI(LoopMBB, DL, TII->get(LoongArch::ADDI_W), ScratchReg) + .addReg(IncrReg) + .addImm(0); + break; + case AtomicRMWInst::Add: + BuildMI(LoopMBB, DL, TII->get(LoongArch::ADD_W), ScratchReg) + .addReg(DestReg) + .addReg(IncrReg); + break; + case AtomicRMWInst::Sub: + BuildMI(LoopMBB, DL, TII->get(LoongArch::SUB_W), ScratchReg) + .addReg(DestReg) + .addReg(IncrReg); + break; + case AtomicRMWInst::Nand: + BuildMI(LoopMBB, DL, TII->get(LoongArch::AND), ScratchReg) + .addReg(DestReg) + .addReg(IncrReg); + BuildMI(LoopMBB, DL, TII->get(LoongArch::NOR), ScratchReg) + .addReg(ScratchReg) + .addReg(LoongArch::R0); + // TODO: support other AtomicRMWInst. + } + + insertMaskedMerge(TII, DL, LoopMBB, ScratchReg, DestReg, ScratchReg, MaskReg, + ScratchReg); + + BuildMI(LoopMBB, DL, TII->get(LoongArch::SC_W), ScratchReg) + .addReg(ScratchReg) + .addReg(AddrReg) + .addImm(0); + BuildMI(LoopMBB, DL, TII->get(LoongArch::BEQZ)) + .addReg(ScratchReg) + .addMBB(LoopMBB); +} + +bool LoongArchExpandAtomicPseudo::expandAtomicBinOp( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + AtomicRMWInst::BinOp BinOp, bool IsMasked, int Width, + MachineBasicBlock::iterator &NextMBBI) { + MachineInstr &MI = *MBBI; + DebugLoc DL = MI.getDebugLoc(); + + MachineFunction *MF = MBB.getParent(); + auto LoopMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); + auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); + + // Insert new MBBs. + MF->insert(++MBB.getIterator(), LoopMBB); + MF->insert(++LoopMBB->getIterator(), DoneMBB); + + // Set up successors and transfer remaining instructions to DoneMBB. + LoopMBB->addSuccessor(LoopMBB); + LoopMBB->addSuccessor(DoneMBB); + DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end()); + DoneMBB->transferSuccessors(&MBB); + MBB.addSuccessor(LoopMBB); + + if (IsMasked) + doMaskedAtomicBinOpExpansion(TII, MI, DL, &MBB, LoopMBB, DoneMBB, BinOp, + Width); + else + doAtomicBinOpExpansion(TII, MI, DL, &MBB, LoopMBB, DoneMBB, BinOp, Width); + + NextMBBI = MBB.end(); + MI.eraseFromParent(); + + LivePhysRegs LiveRegs; + computeAndAddLiveIns(LiveRegs, *LoopMBB); + computeAndAddLiveIns(LiveRegs, *DoneMBB); + + return true; +} + +static void insertSext(const LoongArchInstrInfo *TII, DebugLoc DL, + MachineBasicBlock *MBB, Register ValReg, + Register ShamtReg) { + BuildMI(MBB, DL, TII->get(LoongArch::SLL_W), ValReg) + .addReg(ValReg) + .addReg(ShamtReg); + BuildMI(MBB, DL, TII->get(LoongArch::SRA_W), ValReg) + .addReg(ValReg) + .addReg(ShamtReg); +} + +bool LoongArchExpandAtomicPseudo::expandAtomicMinMaxOp( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + AtomicRMWInst::BinOp BinOp, bool IsMasked, int Width, + MachineBasicBlock::iterator &NextMBBI) { + assert(IsMasked == true && + "Should only need to expand masked atomic max/min"); + assert(Width == 32 && "Should never need to expand masked 64-bit operations"); + + MachineInstr &MI = *MBBI; + DebugLoc DL = MI.getDebugLoc(); + MachineFunction *MF = MBB.getParent(); + auto LoopHeadMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); + auto LoopIfBodyMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); + auto LoopTailMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); + auto TailMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); + auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); + + // Insert new MBBs. + MF->insert(++MBB.getIterator(), LoopHeadMBB); + MF->insert(++LoopHeadMBB->getIterator(), LoopIfBodyMBB); + MF->insert(++LoopIfBodyMBB->getIterator(), LoopTailMBB); + MF->insert(++LoopTailMBB->getIterator(), TailMBB); + MF->insert(++TailMBB->getIterator(), DoneMBB); + + // Set up successors and transfer remaining instructions to DoneMBB. + LoopHeadMBB->addSuccessor(LoopIfBodyMBB); + LoopHeadMBB->addSuccessor(LoopTailMBB); + LoopIfBodyMBB->addSuccessor(LoopTailMBB); + LoopTailMBB->addSuccessor(LoopHeadMBB); + LoopTailMBB->addSuccessor(TailMBB); + TailMBB->addSuccessor(DoneMBB); + DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end()); + DoneMBB->transferSuccessors(&MBB); + MBB.addSuccessor(LoopHeadMBB); + + Register DestReg = MI.getOperand(0).getReg(); + Register Scratch1Reg = MI.getOperand(1).getReg(); + Register Scratch2Reg = MI.getOperand(2).getReg(); + Register AddrReg = MI.getOperand(3).getReg(); + Register IncrReg = MI.getOperand(4).getReg(); + Register MaskReg = MI.getOperand(5).getReg(); + + // + // .loophead: + // dbar 0 + // ll.w destreg, (alignedaddr) + // and scratch2, destreg, mask + // move scratch1, destreg + BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::DBAR)).addImm(0); + BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::LL_W), DestReg) + .addReg(AddrReg) + .addImm(0); + BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::AND), Scratch2Reg) + .addReg(DestReg) + .addReg(MaskReg); + BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::OR), Scratch1Reg) + .addReg(DestReg) + .addReg(LoongArch::R0); + + switch (BinOp) { + default: + llvm_unreachable("Unexpected AtomicRMW BinOp"); + // bgeu scratch2, incr, .looptail + case AtomicRMWInst::UMax: + BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::BGEU)) + .addReg(Scratch2Reg) + .addReg(IncrReg) + .addMBB(LoopTailMBB); + break; + // bgeu incr, scratch2, .looptail + case AtomicRMWInst::UMin: + BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::BGEU)) + .addReg(IncrReg) + .addReg(Scratch2Reg) + .addMBB(LoopTailMBB); + break; + case AtomicRMWInst::Max: + insertSext(TII, DL, LoopHeadMBB, Scratch2Reg, MI.getOperand(6).getReg()); + // bge scratch2, incr, .looptail + BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::BGE)) + .addReg(Scratch2Reg) + .addReg(IncrReg) + .addMBB(LoopTailMBB); + break; + case AtomicRMWInst::Min: + insertSext(TII, DL, LoopHeadMBB, Scratch2Reg, MI.getOperand(6).getReg()); + // bge incr, scratch2, .looptail + BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::BGE)) + .addReg(IncrReg) + .addReg(Scratch2Reg) + .addMBB(LoopTailMBB); + break; + // TODO: support other AtomicRMWInst. + } + + // .loopifbody: + // xor scratch1, destreg, incr + // and scratch1, scratch1, mask + // xor scratch1, destreg, scratch1 + insertMaskedMerge(TII, DL, LoopIfBodyMBB, Scratch1Reg, DestReg, IncrReg, + MaskReg, Scratch1Reg); + + // .looptail: + // sc.w scratch1, scratch1, (addr) + // beqz scratch1, loop + // dbar 0x700 + BuildMI(LoopTailMBB, DL, TII->get(LoongArch::SC_W), Scratch1Reg) + .addReg(Scratch1Reg) + .addReg(AddrReg) + .addImm(0); + BuildMI(LoopTailMBB, DL, TII->get(LoongArch::BEQZ)) + .addReg(Scratch1Reg) + .addMBB(LoopHeadMBB); + + // .tail: + // dbar 0x700 + BuildMI(TailMBB, DL, TII->get(LoongArch::DBAR)).addImm(0x700); + + NextMBBI = MBB.end(); + MI.eraseFromParent(); + + LivePhysRegs LiveRegs; + computeAndAddLiveIns(LiveRegs, *LoopHeadMBB); + computeAndAddLiveIns(LiveRegs, *LoopIfBodyMBB); + computeAndAddLiveIns(LiveRegs, *LoopTailMBB); + computeAndAddLiveIns(LiveRegs, *TailMBB); + computeAndAddLiveIns(LiveRegs, *DoneMBB); + + return true; +} + +bool LoongArchExpandAtomicPseudo::expandAtomicCmpXchg( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, bool IsMasked, + int Width, MachineBasicBlock::iterator &NextMBBI) { + MachineInstr &MI = *MBBI; + DebugLoc DL = MI.getDebugLoc(); + MachineFunction *MF = MBB.getParent(); + auto LoopHeadMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); + auto LoopTailMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); + auto TailMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); + auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); + + // Insert new MBBs. + MF->insert(++MBB.getIterator(), LoopHeadMBB); + MF->insert(++LoopHeadMBB->getIterator(), LoopTailMBB); + MF->insert(++LoopTailMBB->getIterator(), TailMBB); + MF->insert(++TailMBB->getIterator(), DoneMBB); + + // Set up successors and transfer remaining instructions to DoneMBB. + LoopHeadMBB->addSuccessor(LoopTailMBB); + LoopHeadMBB->addSuccessor(TailMBB); + LoopTailMBB->addSuccessor(DoneMBB); + LoopTailMBB->addSuccessor(LoopHeadMBB); + TailMBB->addSuccessor(DoneMBB); + DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end()); + DoneMBB->transferSuccessors(&MBB); + MBB.addSuccessor(LoopHeadMBB); + + Register DestReg = MI.getOperand(0).getReg(); + Register ScratchReg = MI.getOperand(1).getReg(); + Register AddrReg = MI.getOperand(2).getReg(); + Register CmpValReg = MI.getOperand(3).getReg(); + Register NewValReg = MI.getOperand(4).getReg(); + + if (!IsMasked) { + // .loophead: + // ll.[w|d] dest, (addr) + // bne dest, cmpval, tail + BuildMI(LoopHeadMBB, DL, + TII->get(Width == 32 ? LoongArch::LL_W : LoongArch::LL_D), DestReg) + .addReg(AddrReg) + .addImm(0); + BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::BNE)) + .addReg(DestReg) + .addReg(CmpValReg) + .addMBB(TailMBB); + // .looptail: + // dbar 0 + // move scratch, newval + // sc.[w|d] scratch, scratch, (addr) + // beqz scratch, loophead + // b done + BuildMI(LoopTailMBB, DL, TII->get(LoongArch::DBAR)).addImm(0); + BuildMI(LoopTailMBB, DL, TII->get(LoongArch::OR), ScratchReg) + .addReg(NewValReg) + .addReg(LoongArch::R0); + BuildMI(LoopTailMBB, DL, + TII->get(Width == 32 ? LoongArch::SC_W : LoongArch::SC_D), + ScratchReg) + .addReg(ScratchReg) + .addReg(AddrReg) + .addImm(0); + BuildMI(LoopTailMBB, DL, TII->get(LoongArch::BEQZ)) + .addReg(ScratchReg) + .addMBB(LoopHeadMBB); + BuildMI(LoopTailMBB, DL, TII->get(LoongArch::B)).addMBB(DoneMBB); + } else { + // .loophead: + // ll.[w|d] dest, (addr) + // and scratch, dest, mask + // bne scratch, cmpval, tail + Register MaskReg = MI.getOperand(5).getReg(); + BuildMI(LoopHeadMBB, DL, + TII->get(Width == 32 ? LoongArch::LL_W : LoongArch::LL_D), DestReg) + .addReg(AddrReg) + .addImm(0); + BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::AND), ScratchReg) + .addReg(DestReg) + .addReg(MaskReg); + BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::BNE)) + .addReg(ScratchReg) + .addReg(CmpValReg) + .addMBB(TailMBB); + + // .looptail: + // dbar 0 + // andn scratch, dest, mask + // or scratch, scratch, newval + // sc.[w|d] scratch, scratch, (addr) + // beqz scratch, loophead + // b done + BuildMI(LoopTailMBB, DL, TII->get(LoongArch::DBAR)).addImm(0); + BuildMI(LoopTailMBB, DL, TII->get(LoongArch::ANDN), ScratchReg) + .addReg(DestReg) + .addReg(MaskReg); + BuildMI(LoopTailMBB, DL, TII->get(LoongArch::OR), ScratchReg) + .addReg(ScratchReg) + .addReg(NewValReg); + BuildMI(LoopTailMBB, DL, + TII->get(Width == 32 ? LoongArch::SC_W : LoongArch::SC_D), + ScratchReg) + .addReg(ScratchReg) + .addReg(AddrReg) + .addImm(0); + BuildMI(LoopTailMBB, DL, TII->get(LoongArch::BEQZ)) + .addReg(ScratchReg) + .addMBB(LoopHeadMBB); + BuildMI(LoopTailMBB, DL, TII->get(LoongArch::B)).addMBB(DoneMBB); + } + + // .tail: + // dbar 0x700 + BuildMI(TailMBB, DL, TII->get(LoongArch::DBAR)).addImm(0x700); + + NextMBBI = MBB.end(); + MI.eraseFromParent(); + + LivePhysRegs LiveRegs; + computeAndAddLiveIns(LiveRegs, *LoopHeadMBB); + computeAndAddLiveIns(LiveRegs, *LoopTailMBB); + computeAndAddLiveIns(LiveRegs, *TailMBB); + computeAndAddLiveIns(LiveRegs, *DoneMBB); + + return true; +} + +} // end namespace + +INITIALIZE_PASS(LoongArchExpandAtomicPseudo, "loongarch-expand-atomic-pseudo", + LoongArch_EXPAND_ATOMIC_PSEUDO_NAME, false, false) + +namespace llvm { + +FunctionPass *createLoongArchExpandAtomicPseudoPass() { + return new LoongArchExpandAtomicPseudo(); +} + +} // end namespace llvm diff --git a/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp b/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp new file mode 100644 index 0000000000000000000000000000000000000000..bad39dc3a14fe7ee39db7d45f2244315cc706366 --- /dev/null +++ b/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp @@ -0,0 +1,325 @@ +//===-- LoongArchExpandPseudoInsts.cpp - Expand pseudo instructions -------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains a pass that expands pseudo instructions into target +// instructions. +// +//===----------------------------------------------------------------------===// + +#include "LoongArch.h" +#include "LoongArchInstrInfo.h" +#include "LoongArchTargetMachine.h" +#include "MCTargetDesc/LoongArchBaseInfo.h" +#include "MCTargetDesc/LoongArchMCTargetDesc.h" +#include "llvm/CodeGen/LivePhysRegs.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/MC/MCContext.h" +#include "llvm/Support/CodeGen.h" + +using namespace llvm; + +#define LOONGARCH_PRERA_EXPAND_PSEUDO_NAME \ + "LoongArch Pre-RA pseudo instruction expansion pass" + +namespace { + +class LoongArchPreRAExpandPseudo : public MachineFunctionPass { +public: + const LoongArchInstrInfo *TII; + static char ID; + + LoongArchPreRAExpandPseudo() : MachineFunctionPass(ID) { + initializeLoongArchPreRAExpandPseudoPass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + StringRef getPassName() const override { + return LOONGARCH_PRERA_EXPAND_PSEUDO_NAME; + } + +private: + bool expandMBB(MachineBasicBlock &MBB); + bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI); + bool expandPcalau12iInstPair(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI, + unsigned FlagsHi, unsigned SecondOpcode, + unsigned FlagsLo); + bool expandLoadAddressPcrel(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI); + bool expandLoadAddressGot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI); + bool expandLoadAddressTLSLE(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI); + bool expandLoadAddressTLSIE(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI); + bool expandLoadAddressTLSLD(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI); + bool expandLoadAddressTLSGD(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI); + bool expandFunctionCALL(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI, + bool IsTailCall); +}; + +char LoongArchPreRAExpandPseudo::ID = 0; + +bool LoongArchPreRAExpandPseudo::runOnMachineFunction(MachineFunction &MF) { + TII = + static_cast(MF.getSubtarget().getInstrInfo()); + bool Modified = false; + for (auto &MBB : MF) + Modified |= expandMBB(MBB); + return Modified; +} + +bool LoongArchPreRAExpandPseudo::expandMBB(MachineBasicBlock &MBB) { + bool Modified = false; + + MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); + while (MBBI != E) { + MachineBasicBlock::iterator NMBBI = std::next(MBBI); + Modified |= expandMI(MBB, MBBI, NMBBI); + MBBI = NMBBI; + } + + return Modified; +} + +bool LoongArchPreRAExpandPseudo::expandMI( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI) { + switch (MBBI->getOpcode()) { + case LoongArch::PseudoLA_PCREL: + return expandLoadAddressPcrel(MBB, MBBI, NextMBBI); + case LoongArch::PseudoLA_GOT: + return expandLoadAddressGot(MBB, MBBI, NextMBBI); + case LoongArch::PseudoLA_TLS_LE: + return expandLoadAddressTLSLE(MBB, MBBI, NextMBBI); + case LoongArch::PseudoLA_TLS_IE: + return expandLoadAddressTLSIE(MBB, MBBI, NextMBBI); + case LoongArch::PseudoLA_TLS_LD: + return expandLoadAddressTLSLD(MBB, MBBI, NextMBBI); + case LoongArch::PseudoLA_TLS_GD: + return expandLoadAddressTLSGD(MBB, MBBI, NextMBBI); + case LoongArch::PseudoCALL: + return expandFunctionCALL(MBB, MBBI, NextMBBI, /*IsTailCall=*/false); + case LoongArch::PseudoTAIL: + return expandFunctionCALL(MBB, MBBI, NextMBBI, /*IsTailCall=*/true); + } + return false; +} + +bool LoongArchPreRAExpandPseudo::expandPcalau12iInstPair( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI, unsigned FlagsHi, + unsigned SecondOpcode, unsigned FlagsLo) { + MachineFunction *MF = MBB.getParent(); + MachineInstr &MI = *MBBI; + DebugLoc DL = MI.getDebugLoc(); + + Register DestReg = MI.getOperand(0).getReg(); + Register ScratchReg = + MF->getRegInfo().createVirtualRegister(&LoongArch::GPRRegClass); + MachineOperand &Symbol = MI.getOperand(1); + + BuildMI(MBB, MBBI, DL, TII->get(LoongArch::PCALAU12I), ScratchReg) + .addDisp(Symbol, 0, FlagsHi); + + MachineInstr *SecondMI = + BuildMI(MBB, MBBI, DL, TII->get(SecondOpcode), DestReg) + .addReg(ScratchReg) + .addDisp(Symbol, 0, FlagsLo); + + if (MI.hasOneMemOperand()) + SecondMI->addMemOperand(*MF, *MI.memoperands_begin()); + + MI.eraseFromParent(); + return true; +} + +bool LoongArchPreRAExpandPseudo::expandLoadAddressPcrel( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI) { + // Code Sequence: + // pcalau12i $rd, %pc_hi20(sym) + // addi.w/d $rd, $rd, %pc_lo12(sym) + MachineFunction *MF = MBB.getParent(); + const auto &STI = MF->getSubtarget(); + unsigned SecondOpcode = STI.is64Bit() ? LoongArch::ADDI_D : LoongArch::ADDI_W; + return expandPcalau12iInstPair(MBB, MBBI, NextMBBI, LoongArchII::MO_PCREL_HI, + SecondOpcode, LoongArchII::MO_PCREL_LO); +} + +bool LoongArchPreRAExpandPseudo::expandLoadAddressGot( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI) { + // Code Sequence: + // pcalau12i $rd, %got_pc_hi20(sym) + // ld.w/d $rd, $rd, %got_pc_lo12(sym) + MachineFunction *MF = MBB.getParent(); + const auto &STI = MF->getSubtarget(); + unsigned SecondOpcode = STI.is64Bit() ? LoongArch::LD_D : LoongArch::LD_W; + return expandPcalau12iInstPair(MBB, MBBI, NextMBBI, LoongArchII::MO_GOT_PC_HI, + SecondOpcode, LoongArchII::MO_GOT_PC_LO); +} + +bool LoongArchPreRAExpandPseudo::expandLoadAddressTLSLE( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI) { + // Code Sequence: + // lu12i.w $rd, %le_hi20(sym) + // ori $rd, $rd, %le_lo12(sym) + MachineFunction *MF = MBB.getParent(); + MachineInstr &MI = *MBBI; + DebugLoc DL = MI.getDebugLoc(); + + Register DestReg = MI.getOperand(0).getReg(); + Register ScratchReg = + MF->getRegInfo().createVirtualRegister(&LoongArch::GPRRegClass); + MachineOperand &Symbol = MI.getOperand(1); + + BuildMI(MBB, MBBI, DL, TII->get(LoongArch::LU12I_W), ScratchReg) + .addDisp(Symbol, 0, LoongArchII::MO_LE_HI); + + BuildMI(MBB, MBBI, DL, TII->get(LoongArch::ORI), DestReg) + .addReg(ScratchReg) + .addDisp(Symbol, 0, LoongArchII::MO_LE_LO); + + MI.eraseFromParent(); + return true; +} + +bool LoongArchPreRAExpandPseudo::expandLoadAddressTLSIE( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI) { + // Code Sequence: + // pcalau12i $rd, %ie_pc_hi20(sym) + // ld.w/d $rd, $rd, %ie_pc_lo12(sym) + MachineFunction *MF = MBB.getParent(); + const auto &STI = MF->getSubtarget(); + unsigned SecondOpcode = STI.is64Bit() ? LoongArch::LD_D : LoongArch::LD_W; + return expandPcalau12iInstPair(MBB, MBBI, NextMBBI, LoongArchII::MO_IE_PC_HI, + SecondOpcode, LoongArchII::MO_IE_PC_LO); +} + +bool LoongArchPreRAExpandPseudo::expandLoadAddressTLSLD( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI) { + // Code Sequence: + // pcalau12i $rd, %ld_pc_hi20(sym) + // addi.w/d $rd, $rd, %got_pc_lo12(sym) + MachineFunction *MF = MBB.getParent(); + const auto &STI = MF->getSubtarget(); + unsigned SecondOpcode = STI.is64Bit() ? LoongArch::ADDI_D : LoongArch::ADDI_W; + return expandPcalau12iInstPair(MBB, MBBI, NextMBBI, LoongArchII::MO_LD_PC_HI, + SecondOpcode, LoongArchII::MO_GOT_PC_LO); +} + +bool LoongArchPreRAExpandPseudo::expandLoadAddressTLSGD( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI) { + // Code Sequence: + // pcalau12i $rd, %gd_pc_hi20(sym) + // addi.w/d $rd, $rd, %got_pc_lo12(sym) + MachineFunction *MF = MBB.getParent(); + const auto &STI = MF->getSubtarget(); + unsigned SecondOpcode = STI.is64Bit() ? LoongArch::ADDI_D : LoongArch::ADDI_W; + return expandPcalau12iInstPair(MBB, MBBI, NextMBBI, LoongArchII::MO_GD_PC_HI, + SecondOpcode, LoongArchII::MO_GOT_PC_LO); +} + +bool LoongArchPreRAExpandPseudo::expandFunctionCALL( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI, bool IsTailCall) { + MachineFunction *MF = MBB.getParent(); + MachineInstr &MI = *MBBI; + DebugLoc DL = MI.getDebugLoc(); + const MachineOperand &Func = MI.getOperand(0); + MachineInstrBuilder CALL; + unsigned Opcode; + + switch (MF->getTarget().getCodeModel()) { + default: + report_fatal_error("Unsupported code model"); + break; + case CodeModel::Small: { + // CALL: + // bl func + // TAIL: + // b func + Opcode = IsTailCall ? LoongArch::PseudoB_TAIL : LoongArch::BL; + CALL = BuildMI(MBB, MBBI, DL, TII->get(Opcode)).add(Func); + break; + } + case CodeModel::Medium: { + // CALL: + // pcalau12i $ra, %pc_hi20(func) + // jirl $ra, $ra, %pc_lo12(func) + // TAIL: + // pcalau12i $scratch, %pc_hi20(func) + // jirl $r0, $scratch, %pc_lo12(func) + Opcode = + IsTailCall ? LoongArch::PseudoJIRL_TAIL : LoongArch::PseudoJIRL_CALL; + Register ScratchReg = + IsTailCall + ? MF->getRegInfo().createVirtualRegister(&LoongArch::GPRRegClass) + : LoongArch::R1; + MachineInstrBuilder MIB = + BuildMI(MBB, MBBI, DL, TII->get(LoongArch::PCALAU12I), ScratchReg); + CALL = BuildMI(MBB, MBBI, DL, TII->get(Opcode)).addReg(ScratchReg); + if (Func.isSymbol()) { + const char *FnName = Func.getSymbolName(); + MIB.addExternalSymbol(FnName, LoongArchII::MO_PCREL_HI); + CALL.addExternalSymbol(FnName, LoongArchII::MO_PCREL_LO); + break; + } + assert(Func.isGlobal() && "Expected a GlobalValue at this time"); + const GlobalValue *GV = Func.getGlobal(); + MIB.addGlobalAddress(GV, 0, LoongArchII::MO_PCREL_HI); + CALL.addGlobalAddress(GV, 0, LoongArchII::MO_PCREL_LO); + break; + } + } + + // Transfer implicit operands. + CALL.copyImplicitOps(MI); + + // Transfer MI flags. + CALL.setMIFlags(MI.getFlags()); + + MI.eraseFromParent(); + return true; +} + +} // end namespace + +INITIALIZE_PASS(LoongArchPreRAExpandPseudo, "loongarch-prera-expand-pseudo", + LOONGARCH_PRERA_EXPAND_PSEUDO_NAME, false, false) + +namespace llvm { + +FunctionPass *createLoongArchPreRAExpandPseudoPass() { + return new LoongArchPreRAExpandPseudo(); +} + +} // end namespace llvm diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td index 20448492a558cf1a22d57f86c3b0b1c1f86c0f3f..40e7665fb1f7ec1d50a787de0e05196f5fa35dcb 100644 --- a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td @@ -121,6 +121,13 @@ def FLDLE_S : FP_LOAD_3R<0b00111000011101010, "fldle.s", FPR32>; def FSTGT_S : FP_STORE_3R<0b00111000011101100, "fstgt.s", FPR32>; def FSTLE_S : FP_STORE_3R<0b00111000011101110, "fstle.s", FPR32>; +// Pseudo instructions for spill/reload CFRs. +let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in +def PseudoST_CFR : Pseudo<(outs), + (ins CFR:$ccd, GPR:$rj, grlenimm:$imm)>; +let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in +def PseudoLD_CFR : Pseudo<(outs CFR:$ccd), + (ins GPR:$rj, grlenimm:$imm)>; } // Predicates = [HasBasicF] //===----------------------------------------------------------------------===// @@ -142,22 +149,31 @@ def : PatFprFpr; def : PatFprFpr; def : PatFprFpr; def : PatFprFpr; +def : PatFprFpr; +def : PatFprFpr; +def : PatFprFpr; def : PatFpr; +def : PatFpr; +def : PatFpr; + +def : Pat<(fdiv fpimm1, (fsqrt FPR32:$fj)), (FRSQRT_S FPR32:$fj)>; + +def : Pat<(fcanonicalize FPR32:$fj), (FMAX_S $fj, $fj)>; /// Setcc // Match non-signaling comparison -// TODO: change setcc to any_fsetcc after call is supported because -// we need to call llvm.experimental.constrained.fcmp.f32 in testcase. -// See RISCV float-fcmp-strict.ll for reference. class PatFPSetcc - : Pat<(setcc RegTy:$fj, RegTy:$fk, cc), - (MOVCF2GR (CmpInst RegTy:$fj, RegTy:$fk))>; -// SETOGT/SETOGE/SETUGT/SETUGE will expand into SETOLT/SETOLE/SETULT/SETULE. + : Pat<(any_fsetcc RegTy:$fj, RegTy:$fk, cc), + (CmpInst RegTy:$fj, RegTy:$fk)>; +// SETOGT/SETOGE/SETUGT/SETUGE/SETGE/SETNE/SETGT will expand into +// SETOLT/SETOLE/SETULT/SETULE/SETLE/SETEQ/SETLT. def : PatFPSetcc; +def : PatFPSetcc; def : PatFPSetcc; def : PatFPSetcc; +def : PatFPSetcc; def : PatFPSetcc; def : PatFPSetcc; def : PatFPSetcc; @@ -167,12 +183,47 @@ def : PatFPSetcc; def : PatFPSetcc; def : PatFPSetcc; -// TODO: Match signaling comparison strict_fsetccs with FCMP_S*_S instructions. +multiclass PatFPBrcond { + def : Pat<(brcond (xor (GRLenVT (setcc RegTy:$fj, RegTy:$fk, cc)), -1), + bb:$imm21), + (BCEQZ (CmpInst RegTy:$fj, RegTy:$fk), bb:$imm21)>; + def : Pat<(brcond (GRLenVT (setcc RegTy:$fj, RegTy:$fk, cc)), bb:$imm21), + (BCNEZ (CmpInst RegTy:$fj, RegTy:$fk), bb:$imm21)>; +} + +defm : PatFPBrcond; +defm : PatFPBrcond; +defm : PatFPBrcond; +defm : PatFPBrcond; +defm : PatFPBrcond; +defm : PatFPBrcond; +defm : PatFPBrcond; +defm : PatFPBrcond; +defm : PatFPBrcond; +defm : PatFPBrcond; +defm : PatFPBrcond; + +// Match signaling comparison + +class PatStrictFsetccs + : Pat<(strict_fsetccs RegTy:$fj, RegTy:$fk, cc), + (CmpInst RegTy:$fj, RegTy:$fk)>; +def : PatStrictFsetccs; +def : PatStrictFsetccs; +def : PatStrictFsetccs; +def : PatStrictFsetccs; +def : PatStrictFsetccs; +def : PatStrictFsetccs; +def : PatStrictFsetccs; +def : PatStrictFsetccs; +def : PatStrictFsetccs; +def : PatStrictFsetccs; +def : PatStrictFsetccs; /// Select -def : Pat<(select GPR:$cc, FPR32:$fk, FPR32:$fj), - (FSEL_S FPR32:$fj, FPR32:$fk, (MOVGR2CF GPR:$cc))>; +def : Pat<(select CFR:$cc, FPR32:$fk, FPR32:$fj), + (FSEL_S FPR32:$fj, FPR32:$fk, CFR:$cc)>; /// Selectcc @@ -194,10 +245,12 @@ def : PatFPSelectcc; /// Loads defm : LdPat; +def : RegRegLdPat; /// Stores defm : StPat; +def : RegRegStPat; /// Floating point constants @@ -207,6 +260,28 @@ def : Pat<(f32 fpimm1), (FFINT_S_W (MOVGR2FR_W (ADDI_W R0, 1)))>; // FP Conversion def : Pat<(loongarch_ftint FPR32:$src), (FTINTRZ_W_S FPR32:$src)>; + +// FP reciprocal operation +def : Pat<(fdiv fpimm1, FPR32:$src), (FRECIP_S $src)>; + +// fmadd.s: fj * fk + fa +def : Pat<(fma FPR32:$fj, FPR32:$fk, FPR32:$fa), (FMADD_S $fj, $fk, $fa)>; + +// fmsub.s: fj * fk - fa +def : Pat<(fma FPR32:$fj, FPR32:$fk, (fneg FPR32:$fa)), + (FMSUB_S FPR32:$fj, FPR32:$fk, FPR32:$fa)>; + +// fnmadd.s: -(fj * fk + fa) +def : Pat<(fneg (fma FPR32:$fj, FPR32:$fk, FPR32:$fa)), + (FNMADD_S FPR32:$fj, FPR32:$fk, FPR32:$fa)>; + +// fnmadd.s: -fj * fk - fa (the nsz flag on the FMA) +def : Pat<(fma_nsz (fneg FPR32:$fj), FPR32:$fk, (fneg FPR32:$fa)), + (FNMADD_S FPR32:$fj, FPR32:$fk, FPR32:$fa)>; + +// fnmsub.s: -fj * fk + fa +def : Pat<(fma (fneg FPR32:$fj), FPR32:$fk, FPR32:$fa), + (FNMSUB_S FPR32:$fj, FPR32:$fk, FPR32:$fa)>; } // Predicates = [HasBasicF] let Predicates = [HasBasicF, IsLA64] in { @@ -216,7 +291,16 @@ def : Pat<(loongarch_movgr2fr_w_la64 GPR:$src), (MOVGR2FR_W GPR:$src)>; def : Pat<(loongarch_movfr2gr_s_la64 FPR32:$src), (MOVFR2GR_S FPR32:$src)>; // int -> f32 -def : Pat<(f32 (sint_to_fp GPR:$src)), (FFINT_S_W (MOVGR2FR_W GPR:$src))>; +def : Pat<(f32 (sint_to_fp (i64 (sexti32 (i64 GPR:$src))))), + (FFINT_S_W (MOVGR2FR_W GPR:$src))>; +// uint -> f32 +def : Pat<(f32 (uint_to_fp (i64 (sexti32 (i64 GPR:$src))))), + (FFINT_S_W (MOVGR2FR_W GPR:$src))>; +} // Predicates = [HasBasicF, IsLA64] + +// FP Rounding +let Predicates = [HasBasicF, IsLA64] in { +def : PatFpr; } // Predicates = [HasBasicF, IsLA64] let Predicates = [HasBasicF, IsLA32] in { @@ -226,4 +310,4 @@ def : Pat<(bitconvert (i32 GPR:$src)), (MOVGR2FR_W GPR:$src)>; def : Pat<(i32 (bitconvert FPR32:$src)), (MOVFR2GR_S FPR32:$src)>; // int -> f32 def : Pat<(f32 (sint_to_fp (i32 GPR:$src))), (FFINT_S_W (MOVGR2FR_W GPR:$src))>; -} // Predicates = [HasBasicF, IsLA64] +} // Predicates = [HasBasicF, IsLA32] diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td index bb50cec9f4c07179e798b3b7c0ec3f89ad9e1d61..50d7e9920ea99e42c74a35a7a18759bb807b1168 100644 --- a/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td @@ -148,20 +148,33 @@ def : PatFprFpr; def : PatFprFpr; def : PatFprFpr; def : PatFprFpr; +def : PatFprFpr; +def : PatFprFpr; +def : PatFprFpr; def : PatFpr; +def : PatFpr; +def : PatFpr; + +def : Pat<(fdiv fpimm1, (fsqrt FPR64:$fj)), (FRSQRT_D FPR64:$fj)>; + +def : Pat<(fcopysign FPR64:$fj, FPR32:$fk), + (FCOPYSIGN_D FPR64:$fj, (FCVT_D_S FPR32:$fk))>; +def : Pat<(fcopysign FPR32:$fj, FPR64:$fk), + (FCOPYSIGN_S FPR32:$fj, (FCVT_S_D FPR64:$fk))>; + +def : Pat<(fcanonicalize FPR64:$fj), (FMAX_D $fj, $fj)>; /// Setcc // Match non-signaling comparison -// TODO: Change setcc to any_fsetcc after call is supported because -// we need to call llvm.experimental.constrained.fcmp.f64 in testcase. -// See RISCV float-fcmp-strict.ll for reference. - -// SETOGT/SETOGE/SETUGT/SETUGE will expand into SETOLT/SETOLE/SETULT/SETULE. +// SETOGT/SETOGE/SETUGT/SETUGE/SETGE/SETNE/SETGT will expand into +// SETOLT/SETOLE/SETULT/SETULE/SETLE/SETEQ/SETLT. def : PatFPSetcc; +def : PatFPSetcc; def : PatFPSetcc; def : PatFPSetcc; +def : PatFPSetcc; def : PatFPSetcc; def : PatFPSetcc; def : PatFPSetcc; @@ -171,12 +184,36 @@ def : PatFPSetcc; def : PatFPSetcc; def : PatFPSetcc; -// TODO: Match signaling comparison strict_fsetccs with FCMP_S*_D instructions. +defm : PatFPBrcond; +defm : PatFPBrcond; +defm : PatFPBrcond; +defm : PatFPBrcond; +defm : PatFPBrcond; +defm : PatFPBrcond; +defm : PatFPBrcond; +defm : PatFPBrcond; +defm : PatFPBrcond; +defm : PatFPBrcond; +defm : PatFPBrcond; + +// Match signaling comparison + +def : PatStrictFsetccs; +def : PatStrictFsetccs; +def : PatStrictFsetccs; +def : PatStrictFsetccs; +def : PatStrictFsetccs; +def : PatStrictFsetccs; +def : PatStrictFsetccs; +def : PatStrictFsetccs; +def : PatStrictFsetccs; +def : PatStrictFsetccs; +def : PatStrictFsetccs; /// Select -def : Pat<(select GPR:$cc, FPR64:$fk, FPR64:$fj), - (FSEL_D FPR64:$fj, FPR64:$fk, (MOVGR2CF GPR:$cc))>; +def : Pat<(select CFR:$cc, FPR64:$fk, FPR64:$fj), + (FSEL_D FPR64:$fj, FPR64:$fk, CFR:$cc)>; /// Selectcc @@ -194,10 +231,12 @@ def : PatFPSelectcc; /// Loads defm : LdPat; +def : RegRegLdPat; /// Stores defm : StPat; +def : RegRegStPat; /// FP conversion operations @@ -209,6 +248,28 @@ def : Pat<(loongarch_ftint FPR32:$src), (FTINTRZ_L_S FPR32:$src)>; def : Pat<(f32 (fpround FPR64:$src)), (FCVT_S_D FPR64:$src)>; // f32 -> f64 def : Pat<(f64 (fpextend FPR32:$src)), (FCVT_D_S FPR32:$src)>; + +// FP reciprocal operation +def : Pat<(fdiv fpimm1, FPR64:$src), (FRECIP_D $src)>; + +// fmadd.d: fj * fk + fa +def : Pat<(fma FPR64:$fj, FPR64:$fk, FPR64:$fa), (FMADD_D $fj, $fk, $fa)>; + +// fmsub.d: fj * fk - fa +def : Pat<(fma FPR64:$fj, FPR64:$fk, (fneg FPR64:$fa)), + (FMSUB_D FPR64:$fj, FPR64:$fk, FPR64:$fa)>; + +// fnmadd.d: -(fj * fk + fa) +def : Pat<(fneg (fma FPR64:$fj, FPR64:$fk, FPR64:$fa)), + (FNMADD_D FPR64:$fj, FPR64:$fk, FPR64:$fa)>; + +// fnmadd.d: -fj * fk - fa (the nsz flag on the FMA) +def : Pat<(fma_nsz (fneg FPR64:$fj), FPR64:$fk, (fneg FPR64:$fa)), + (FNMADD_D FPR64:$fj, FPR64:$fk, FPR64:$fa)>; + +// fnmsub.d: -(fj * fk - fa) +def : Pat<(fma (fneg FPR64:$fj), FPR64:$fk, FPR64:$fa), + (FNMSUB_D FPR64:$fj, FPR64:$fk, FPR64:$fa)>; } // Predicates = [HasBasicD] /// Floating point constants @@ -217,26 +278,33 @@ let Predicates = [HasBasicD, IsLA64] in { def : Pat<(f64 fpimm0), (MOVGR2FR_D R0)>; def : Pat<(f64 fpimm0neg), (FNEG_D (MOVGR2FR_D R0))>; def : Pat<(f64 fpimm1), (FFINT_D_L (MOVGR2FR_D (ADDI_D R0, 1)))>; +} // Predicates = [HasBasicD, IsLA64] +let Predicates = [HasBasicD, IsLA32] in { +def : Pat<(f64 fpimm0), (MOVGR2FRH_W (MOVGR2FR_W_64 R0), R0)>; +def : Pat<(f64 fpimm0neg), (FNEG_D (MOVGR2FRH_W (MOVGR2FR_W_64 R0), R0))>; +def : Pat<(f64 fpimm1), (FCVT_D_S (FFINT_S_W (MOVGR2FR_W (ADDI_W R0, 1))))>; +} // Predicates = [HasBasicD, IsLA32] + +/// Convert int to FP -// Convert int to FP +let Predicates = [HasBasicD, IsLA64] in { +def : Pat<(f32 (sint_to_fp GPR:$src)), (FFINT_S_L (MOVGR2FR_D GPR:$src))>; def : Pat<(f64 (sint_to_fp (i64 (sexti32 (i64 GPR:$src))))), (FFINT_D_W (MOVGR2FR_W GPR:$src))>; def : Pat<(f64 (sint_to_fp GPR:$src)), (FFINT_D_L (MOVGR2FR_D GPR:$src))>; -def : Pat<(f64 (uint_to_fp (i64 (zexti32 (i64 GPR:$src))))), - (FFINT_D_W (MOVGR2FR_W GPR:$src))>; - def : Pat<(bitconvert GPR:$src), (MOVGR2FR_D GPR:$src)>; +} // Predicates = [HasBasicD, IsLA64] +let Predicates = [HasBasicD, IsLA32] in { +def : Pat<(f64 (sint_to_fp (i32 GPR:$src))), (FFINT_D_W (MOVGR2FR_W GPR:$src))>; +} // Predicates = [HasBasicD, IsLA32] // Convert FP to int +let Predicates = [HasBasicD, IsLA64] in { def : Pat<(bitconvert FPR64:$src), (MOVFR2GR_D FPR64:$src)>; } // Predicates = [HasBasicD, IsLA64] -let Predicates = [HasBasicD, IsLA32] in { -def : Pat<(f64 fpimm0), (MOVGR2FRH_W (MOVGR2FR_W_64 R0), R0)>; -def : Pat<(f64 fpimm0neg), (FNEG_D (MOVGR2FRH_W (MOVGR2FR_W_64 R0), R0))>; -def : Pat<(f64 fpimm1), (FCVT_D_S (FFINT_S_W (MOVGR2FR_W (ADDI_W R0, 1))))>; - -// Convert int to FP -def : Pat<(f64 (sint_to_fp (i32 GPR:$src))), (FFINT_D_W (MOVGR2FR_W GPR:$src))>; -} // Predicates = [HasBasicD, IsLA32] +// FP Rounding +let Predicates = [HasBasicD, IsLA64] in { +def : PatFpr; +} // Predicates = [HasBasicD, IsLA64] diff --git a/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp index 0d9ec9e2eaaac597cca89dde52e0c9ea809c7688..7c51e213f2d745f5948f64c1cf44cb3c3a9a6f1a 100644 --- a/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp @@ -14,6 +14,7 @@ #include "LoongArchMachineFunctionInfo.h" #include "LoongArchSubtarget.h" #include "MCTargetDesc/LoongArchBaseInfo.h" +#include "MCTargetDesc/LoongArchMCTargetDesc.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -53,21 +54,55 @@ void LoongArchFrameLowering::adjustReg(MachineBasicBlock &MBB, MachineInstr::MIFlag Flag) const { const LoongArchInstrInfo *TII = STI.getInstrInfo(); bool IsLA64 = STI.is64Bit(); + unsigned Addi = IsLA64 ? LoongArch::ADDI_D : LoongArch::ADDI_W; if (DestReg == SrcReg && Val == 0) return; if (isInt<12>(Val)) { // addi.w/d $DstReg, $SrcReg, Val - BuildMI(MBB, MBBI, DL, - TII->get(IsLA64 ? LoongArch::ADDI_D : LoongArch::ADDI_W), DestReg) + BuildMI(MBB, MBBI, DL, TII->get(Addi), DestReg) .addReg(SrcReg) .addImm(Val) .setMIFlag(Flag); return; } - report_fatal_error("adjustReg cannot yet handle adjustments >12 bits"); + // Try to split the offset across two ADDIs. We need to keep the stack pointer + // aligned after each ADDI. We need to determine the maximum value we can put + // in each ADDI. In the negative direction, we can use -2048 which is always + // sufficiently aligned. In the positive direction, we need to find the + // largest 12-bit immediate that is aligned. Exclude -4096 since it can be + // created with LU12I.W. + assert(getStackAlign().value() < 2048 && "Stack alignment too large"); + int64_t MaxPosAdjStep = 2048 - getStackAlign().value(); + if (Val > -4096 && Val <= (2 * MaxPosAdjStep)) { + int64_t FirstAdj = Val < 0 ? -2048 : MaxPosAdjStep; + Val -= FirstAdj; + BuildMI(MBB, MBBI, DL, TII->get(Addi), DestReg) + .addReg(SrcReg) + .addImm(FirstAdj) + .setMIFlag(Flag); + BuildMI(MBB, MBBI, DL, TII->get(Addi), DestReg) + .addReg(DestReg, RegState::Kill) + .addImm(Val) + .setMIFlag(Flag); + return; + } + + unsigned Opc = IsLA64 ? LoongArch::ADD_D : LoongArch::ADD_W; + if (Val < 0) { + Val = -Val; + Opc = IsLA64 ? LoongArch::SUB_D : LoongArch::SUB_W; + } + + MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); + Register ScratchReg = MRI.createVirtualRegister(&LoongArch::GPRRegClass); + TII->movImm(MBB, MBBI, DL, ScratchReg, Val, Flag); + BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg) + .addReg(SrcReg) + .addReg(ScratchReg, RegState::Kill) + .setMIFlag(Flag); } // Determine the size of the frame and maximum call frame size. @@ -84,12 +119,71 @@ void LoongArchFrameLowering::determineFrameLayout(MachineFunction &MF) const { MFI.setStackSize(FrameSize); } +static uint64_t estimateFunctionSizeInBytes(const LoongArchInstrInfo *TII, + const MachineFunction &MF) { + uint64_t FuncSize = 0; + for (auto &MBB : MF) + for (auto &MI : MBB) + FuncSize += TII->getInstSizeInBytes(MI); + return FuncSize; +} + +static bool needScavSlotForCFR(MachineFunction &MF) { + if (!MF.getSubtarget().hasBasicF()) + return false; + for (auto &MBB : MF) + for (auto &MI : MBB) + if (MI.getOpcode() == LoongArch::PseudoST_CFR) + return true; + return false; +} + +void LoongArchFrameLowering::processFunctionBeforeFrameFinalized( + MachineFunction &MF, RegScavenger *RS) const { + const LoongArchRegisterInfo *RI = STI.getRegisterInfo(); + const TargetRegisterClass &RC = LoongArch::GPRRegClass; + const LoongArchInstrInfo *TII = STI.getInstrInfo(); + LoongArchMachineFunctionInfo *LAFI = + MF.getInfo(); + MachineFrameInfo &MFI = MF.getFrameInfo(); + + unsigned ScavSlotsNum = 0; + + // Far branches beyond 27-bit offset require a spill slot for scratch register. + bool IsLargeFunction = !isInt<27>(estimateFunctionSizeInBytes(TII, MF)); + if (IsLargeFunction) + ScavSlotsNum = 1; + + // estimateStackSize has been observed to under-estimate the final stack + // size, so give ourselves wiggle-room by checking for stack size + // representable an 11-bit signed field rather than 12-bits. + if (!isInt<11>(MFI.estimateStackSize(MF))) + ScavSlotsNum = std::max(ScavSlotsNum, 1u); + + // For CFR spill. + if (needScavSlotForCFR(MF)) + ++ScavSlotsNum; + + // Create emergency spill slots. + for (unsigned i = 0; i < ScavSlotsNum; ++i) { + int FI = MFI.CreateStackObject(RI->getSpillSize(RC), RI->getSpillAlign(RC), + false); + RS->addScavengingFrameIndex(FI); + if (IsLargeFunction && LAFI->getBranchRelaxationSpillFrameIndex() == -1) + LAFI->setBranchRelaxationSpillFrameIndex(FI); + LLVM_DEBUG(dbgs() << "Allocated FI(" << FI + << ") as the emergency spill slot.\n"); + } +} + void LoongArchFrameLowering::emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineFrameInfo &MFI = MF.getFrameInfo(); + auto *LoongArchFI = MF.getInfo(); const LoongArchRegisterInfo *RI = STI.getRegisterInfo(); const LoongArchInstrInfo *TII = STI.getInstrInfo(); MachineBasicBlock::iterator MBBI = MBB.begin(); + bool IsLA64 = STI.is64Bit(); Register SPReg = LoongArch::R3; Register FPReg = LoongArch::R22; @@ -97,25 +191,37 @@ void LoongArchFrameLowering::emitPrologue(MachineFunction &MF, // Debug location must be unknown since the first debug location is used // to determine the end of the prologue. DebugLoc DL; - + // All calls are tail calls in GHC calling conv, and functions have no + // prologue/epilogue. + if (MF.getFunction().getCallingConv() == CallingConv::GHC) + return; // Determine the correct frame layout determineFrameLayout(MF); // First, compute final stack size. uint64_t StackSize = MFI.getStackSize(); + uint64_t RealStackSize = StackSize; // Early exit if there is no need to allocate space in the stack. if (StackSize == 0 && !MFI.adjustsStack()) return; + uint64_t FirstSPAdjustAmount = getFirstSPAdjustAmount(MF, true); + uint64_t SecondSPAdjustAmount = RealStackSize - FirstSPAdjustAmount; + // Split the SP adjustment to reduce the offsets of callee saved spill. + if (FirstSPAdjustAmount) + StackSize = FirstSPAdjustAmount; + // Adjust stack. adjustReg(MBB, MBBI, DL, SPReg, SPReg, -StackSize, MachineInstr::FrameSetup); - // Emit ".cfi_def_cfa_offset StackSize". - unsigned CFIIndex = - MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, StackSize)); - BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex) - .setMIFlag(MachineInstr::FrameSetup); + if (FirstSPAdjustAmount != 2048 || SecondSPAdjustAmount == 0) { + // Emit ".cfi_def_cfa_offset StackSize". + unsigned CFIIndex = + MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, StackSize)); + BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex) + .setMIFlag(MachineInstr::FrameSetup); + } const auto &CSI = MFI.getCalleeSavedInfo(); @@ -138,23 +244,91 @@ void LoongArchFrameLowering::emitPrologue(MachineFunction &MF, // Generate new FP. if (hasFP(MF)) { - adjustReg(MBB, MBBI, DL, FPReg, SPReg, StackSize, MachineInstr::FrameSetup); - - // Emit ".cfi_def_cfa $fp, 0" - unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfa( - nullptr, RI->getDwarfRegNum(FPReg, true), 0)); + adjustReg(MBB, MBBI, DL, FPReg, SPReg, + StackSize - LoongArchFI->getVarArgsSaveSize(), + MachineInstr::FrameSetup); + + // Emit ".cfi_def_cfa $fp, LoongArchFI->getVarArgsSaveSize()" + unsigned CFIIndex = MF.addFrameInst( + MCCFIInstruction::cfiDefCfa(nullptr, RI->getDwarfRegNum(FPReg, true), + LoongArchFI->getVarArgsSaveSize())); BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlag(MachineInstr::FrameSetup); } + + // Emit the second SP adjustment after saving callee saved registers. + if (FirstSPAdjustAmount && SecondSPAdjustAmount) { + if (hasFP(MF)) { + assert(SecondSPAdjustAmount > 0 && + "SecondSPAdjustAmount should be greater than zero"); + adjustReg(MBB, MBBI, DL, SPReg, SPReg, -SecondSPAdjustAmount, + MachineInstr::FrameSetup); + } else { + // FIXME: RegScavenger will place the spill instruction before the + // prologue if a VReg is created in the prologue. This will pollute the + // caller's stack data. Therefore, until there is better way, we just use + // the `addi.w/d` instruction for stack adjustment to ensure that VReg + // will not be created. + for (int Val = SecondSPAdjustAmount; Val > 0; Val -= 2048) + BuildMI(MBB, MBBI, DL, + TII->get(IsLA64 ? LoongArch::ADDI_D : LoongArch::ADDI_W), SPReg) + .addReg(SPReg) + .addImm(Val < 2048 ? -Val : -2048) + .setMIFlag(MachineInstr::FrameSetup); + + // If we are using a frame-pointer, and thus emitted ".cfi_def_cfa fp, 0", + // don't emit an sp-based .cfi_def_cfa_offset + // Emit ".cfi_def_cfa_offset RealStackSize" + unsigned CFIIndex = MF.addFrameInst( + MCCFIInstruction::cfiDefCfaOffset(nullptr, RealStackSize)); + BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex) + .setMIFlag(MachineInstr::FrameSetup); + } + } + + if (hasFP(MF)) { + // Realign stack. + if (RI->hasStackRealignment(MF)) { + unsigned ShiftAmount = Log2(MFI.getMaxAlign()); + Register VR = + MF.getRegInfo().createVirtualRegister(&LoongArch::GPRRegClass); + BuildMI(MBB, MBBI, DL, + TII->get(IsLA64 ? LoongArch::SRLI_D : LoongArch::SRLI_W), VR) + .addReg(SPReg) + .addImm(ShiftAmount) + .setMIFlag(MachineInstr::FrameSetup); + BuildMI(MBB, MBBI, DL, + TII->get(IsLA64 ? LoongArch::SLLI_D : LoongArch::SLLI_W), SPReg) + .addReg(VR) + .addImm(ShiftAmount) + .setMIFlag(MachineInstr::FrameSetup); + // FP will be used to restore the frame in the epilogue, so we need + // another base register BP to record SP after re-alignment. SP will + // track the current stack after allocating variable sized objects. + if (hasBP(MF)) { + // move BP, $sp + BuildMI(MBB, MBBI, DL, TII->get(LoongArch::OR), + LoongArchABI::getBPReg()) + .addReg(SPReg) + .addReg(LoongArch::R0) + .setMIFlag(MachineInstr::FrameSetup); + } + } + } } void LoongArchFrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { const LoongArchRegisterInfo *RI = STI.getRegisterInfo(); MachineFrameInfo &MFI = MF.getFrameInfo(); + auto *LoongArchFI = MF.getInfo(); Register SPReg = LoongArch::R3; - + // All calls are tail calls in GHC calling conv, and functions have no + // prologue/epilogue. + if (MF.getFunction().getCallingConv() == CallingConv::GHC) + return; MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); @@ -170,14 +344,59 @@ void LoongArchFrameLowering::emitEpilogue(MachineFunction &MF, // Restore the stack pointer. if (RI->hasStackRealignment(MF) || MFI.hasVarSizedObjects()) { assert(hasFP(MF) && "frame pointer should not have been eliminated"); - adjustReg(MBB, LastFrameDestroy, DL, SPReg, LoongArch::R22, -StackSize, + adjustReg(MBB, LastFrameDestroy, DL, SPReg, LoongArch::R22, + -StackSize + LoongArchFI->getVarArgsSaveSize(), MachineInstr::FrameDestroy); } + uint64_t FirstSPAdjustAmount = getFirstSPAdjustAmount(MF); + if (FirstSPAdjustAmount) { + uint64_t SecondSPAdjustAmount = StackSize - FirstSPAdjustAmount; + assert(SecondSPAdjustAmount > 0 && + "SecondSPAdjustAmount should be greater than zero"); + + adjustReg(MBB, LastFrameDestroy, DL, SPReg, SPReg, SecondSPAdjustAmount, + MachineInstr::FrameDestroy); + StackSize = FirstSPAdjustAmount; + } + // Deallocate stack adjustReg(MBB, MBBI, DL, SPReg, SPReg, StackSize, MachineInstr::FrameDestroy); } +// We would like to split the SP adjustment to reduce prologue/epilogue +// as following instructions. In this way, the offset of the callee saved +// register could fit in a single store. +// e.g. +// addi.d $sp, $sp, -2032 +// st.d $ra, $sp, 2024 +// st.d $fp, $sp, 2016 +// addi.d $sp, $sp, -16 +uint64_t +LoongArchFrameLowering::getFirstSPAdjustAmount(const MachineFunction &MF, + bool IsPrologue) const { + const MachineFrameInfo &MFI = MF.getFrameInfo(); + const std::vector &CSI = MFI.getCalleeSavedInfo(); + + // Return the FirstSPAdjustAmount if the StackSize can not fit in a signed + // 12-bit and there exists a callee-saved register needing to be pushed. + if (!isInt<12>(MFI.getStackSize())) { + // FirstSPAdjustAmount is chosen as (2048 - StackAlign) because 2048 will + // cause sp = sp + 2048 in the epilogue to be split into multiple + // instructions. Offsets smaller than 2048 can fit in a single load/store + // instruction, and we have to stick with the stack alignment. + // So (2048 - StackAlign) will satisfy the stack alignment. + // + // FIXME: This place may seem odd. When using multiple ADDI instructions to + // adjust the stack in Prologue, and there are no callee-saved registers, we + // can take advantage of the logic of split sp ajustment to reduce code + // changes. + return CSI.size() > 0 ? 2048 - getStackAlign().value() + : (IsPrologue ? 2048 : 0); + } + return 0; +} + void LoongArchFrameLowering::determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS) const { @@ -193,10 +412,74 @@ void LoongArchFrameLowering::determineCalleeSaves(MachineFunction &MF, SavedRegs.set(LoongArchABI::getBPReg()); } +// Do not preserve stack space within prologue for outgoing variables if the +// function contains variable size objects. +// Let eliminateCallFramePseudoInstr preserve stack space for it. +bool LoongArchFrameLowering::hasReservedCallFrame( + const MachineFunction &MF) const { + return !MF.getFrameInfo().hasVarSizedObjects(); +} + +// Eliminate ADJCALLSTACKDOWN, ADJCALLSTACKUP pseudo instructions. +MachineBasicBlock::iterator +LoongArchFrameLowering::eliminateCallFramePseudoInstr( + MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI) const { + Register SPReg = LoongArch::R3; + DebugLoc DL = MI->getDebugLoc(); + + if (!hasReservedCallFrame(MF)) { + // If space has not been reserved for a call frame, ADJCALLSTACKDOWN and + // ADJCALLSTACKUP must be converted to instructions manipulating the stack + // pointer. This is necessary when there is a variable length stack + // allocation (e.g. alloca), which means it's not possible to allocate + // space for outgoing arguments from within the function prologue. + int64_t Amount = MI->getOperand(0).getImm(); + + if (Amount != 0) { + // Ensure the stack remains aligned after adjustment. + Amount = alignSPAdjust(Amount); + + if (MI->getOpcode() == LoongArch::ADJCALLSTACKDOWN) + Amount = -Amount; + + adjustReg(MBB, MI, DL, SPReg, SPReg, Amount, MachineInstr::NoFlags); + } + } + + return MBB.erase(MI); +} + +bool LoongArchFrameLowering::spillCalleeSavedRegisters( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, + ArrayRef CSI, const TargetRegisterInfo *TRI) const { + if (CSI.empty()) + return true; + + MachineFunction *MF = MBB.getParent(); + const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo(); + + // Insert the spill to the stack frame. + for (auto &CS : CSI) { + Register Reg = CS.getReg(); + // If the register is RA and the return address is taken by method + // LoongArchTargetLowering::lowerRETURNADDR, don't set kill flag. + bool IsKill = + !(Reg == LoongArch::R1 && MF->getFrameInfo().isReturnAddressTaken()); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.storeRegToStackSlot(MBB, MI, Reg, IsKill, CS.getFrameIdx(), RC, TRI); + } + + return true; +} + StackOffset LoongArchFrameLowering::getFrameIndexReference( const MachineFunction &MF, int FI, Register &FrameReg) const { const MachineFrameInfo &MFI = MF.getFrameInfo(); const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); + auto *LoongArchFI = MF.getInfo(); + uint64_t StackSize = MFI.getStackSize(); + uint64_t FirstSPAdjustAmount = getFirstSPAdjustAmount(MF); // Callee-saved registers should be referenced relative to the stack // pointer (positive offset), otherwise use the frame pointer (negative @@ -213,10 +496,24 @@ StackOffset LoongArchFrameLowering::getFrameIndexReference( MaxCSFI = CSI[CSI.size() - 1].getFrameIdx(); } - FrameReg = RI->getFrameRegister(MF); - if ((FI >= MinCSFI && FI <= MaxCSFI) || !hasFP(MF)) { + if (FI >= MinCSFI && FI <= MaxCSFI) { FrameReg = LoongArch::R3; - Offset += StackOffset::getFixed(MFI.getStackSize()); + if (FirstSPAdjustAmount) + Offset += StackOffset::getFixed(FirstSPAdjustAmount); + else + Offset += StackOffset::getFixed(StackSize); + } else if (RI->hasStackRealignment(MF) && !MFI.isFixedObjectIndex(FI)) { + // If the stack was realigned, the frame pointer is set in order to allow + // SP to be restored, so we need another base register to record the stack + // after realignment. + FrameReg = hasBP(MF) ? LoongArchABI::getBPReg() : LoongArch::R3; + Offset += StackOffset::getFixed(StackSize); + } else { + FrameReg = RI->getFrameRegister(MF); + if (hasFP(MF)) + Offset += StackOffset::getFixed(LoongArchFI->getVarArgsSaveSize()); + else + Offset += StackOffset::getFixed(StackSize); } return Offset; diff --git a/llvm/lib/Target/LoongArch/LoongArchFrameLowering.h b/llvm/lib/Target/LoongArch/LoongArchFrameLowering.h index 72d8e006a0bbfb6f68a0ac3c2473cece63147f81..414d671593d0b4e674b05c527934359845dd3e4e 100644 --- a/llvm/lib/Target/LoongArch/LoongArchFrameLowering.h +++ b/llvm/lib/Target/LoongArch/LoongArchFrameLowering.h @@ -34,11 +34,17 @@ public: void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS) const override; + void processFunctionBeforeFrameFinalized(MachineFunction &MF, + RegScavenger *RS) const override; + + bool hasReservedCallFrame(const MachineFunction &MF) const override; MachineBasicBlock::iterator eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI) const override { - return MBB.erase(MI); - } + MachineBasicBlock::iterator MI) const override; + bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + ArrayRef CSI, + const TargetRegisterInfo *TRI) const override; StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const override; @@ -46,6 +52,9 @@ public: bool hasFP(const MachineFunction &MF) const override; bool hasBP(const MachineFunction &MF) const; + uint64_t getFirstSPAdjustAmount(const MachineFunction &MF, + bool IsPrologue = false) const; + private: void determineFrameLayout(MachineFunction &MF) const; void adjustReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, diff --git a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp index bb40ff8175748110c3cf8ded99baf5f995bb3054..49684b911cc63a1e8effd54c6d90cd0e78e1de29 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp @@ -77,6 +77,51 @@ void LoongArchDAGToDAGISel::Select(SDNode *Node) { SelectCode(Node); } +bool LoongArchDAGToDAGISel::SelectInlineAsmMemoryOperand( + const SDValue &Op, unsigned ConstraintID, std::vector &OutOps) { + SDValue Base = Op; + SDValue Offset = + CurDAG->getTargetConstant(0, SDLoc(Op), Subtarget->getGRLenVT()); + switch (ConstraintID) { + default: + llvm_unreachable("unexpected asm memory constraint"); + // Reg+Reg addressing. + case InlineAsm::Constraint_k: + Base = Op.getOperand(0); + Offset = Op.getOperand(1); + break; + // Reg+simm12 addressing. + case InlineAsm::Constraint_m: + if (CurDAG->isBaseWithConstantOffset(Op)) { + ConstantSDNode *CN = dyn_cast(Op.getOperand(1)); + if (isIntN(12, CN->getSExtValue())) { + Base = Op.getOperand(0); + Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(Op), + Op.getValueType()); + } + } + break; + // Reg+0 addressing. + case InlineAsm::Constraint_ZB: + break; + // Reg+(simm14<<2) addressing. + case InlineAsm::Constraint_ZC: + if (CurDAG->isBaseWithConstantOffset(Op)) { + ConstantSDNode *CN = dyn_cast(Op.getOperand(1)); + if (isIntN(16, CN->getSExtValue()) && + isAligned(Align(4ULL), CN->getZExtValue())) { + Base = Op.getOperand(0); + Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(Op), + Op.getValueType()); + } + } + break; + } + OutOps.push_back(Base); + OutOps.push_back(Offset); + return false; +} + bool LoongArchDAGToDAGISel::SelectBaseAddr(SDValue Addr, SDValue &Base) { // If this is FrameIndex, select it directly. Otherwise just let it get // selected to a register independently. @@ -88,6 +133,14 @@ bool LoongArchDAGToDAGISel::SelectBaseAddr(SDValue Addr, SDValue &Base) { return true; } +bool LoongArchDAGToDAGISel::selectNonFIBaseAddr(SDValue Addr, SDValue &Base) { + // If this is FrameIndex, don't select it. + if (isa(Addr)) + return false; + Base = Addr; + return true; +} + bool LoongArchDAGToDAGISel::selectShiftMask(SDValue N, unsigned ShiftWidth, SDValue &ShAmt) { // Shift instructions on LoongArch only read the lower 5 or 6 bits of the @@ -152,6 +205,12 @@ bool LoongArchDAGToDAGISel::selectSExti32(SDValue N, SDValue &Val) { Val = N.getOperand(0); return true; } + if (N.getOpcode() == LoongArchISD::BSTRPICK && + N.getConstantOperandVal(1) < UINT64_C(0X1F) && + N.getConstantOperandVal(2) == UINT64_C(0)) { + Val = N; + return true; + } MVT VT = N.getSimpleValueType(); if (CurDAG->ComputeNumSignBits(N) > (VT.getSizeInBits() - 32)) { Val = N; diff --git a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h index 8c9357d75979c9611f4338890e4401852345ae83..49843ac610da2621be3d0c214144032b11775ec2 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h +++ b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h @@ -38,7 +38,11 @@ public: void Select(SDNode *Node) override; + bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, + std::vector &OutOps) override; + bool SelectBaseAddr(SDValue Addr, SDValue &Base); + bool selectNonFIBaseAddr(SDValue Addr, SDValue &Base); bool selectShiftMask(SDValue N, unsigned ShiftWidth, SDValue &ShAmt); bool selectShiftMaskGRLen(SDValue N, SDValue &ShAmt) { diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index 93c8864347bbeb2f4ef920dd978d69ae00de9275..6a6f987eed8b22fb57ad766ea53e50290798aeb3 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -17,16 +17,23 @@ #include "LoongArchRegisterInfo.h" #include "LoongArchSubtarget.h" #include "LoongArchTargetMachine.h" +#include "MCTargetDesc/LoongArchBaseInfo.h" #include "MCTargetDesc/LoongArchMCTargetDesc.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/ISDOpcodes.h" +#include "llvm/CodeGen/RuntimeLibcalls.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/IntrinsicsLoongArch.h" #include "llvm/Support/Debug.h" #include "llvm/Support/KnownBits.h" +#include "llvm/Support/MathExtras.h" using namespace llvm; #define DEBUG_TYPE "loongarch-isel-lowering" +STATISTIC(NumTailCalls, "Number of tail calls"); + static cl::opt ZeroDivCheck( "loongarch-check-zero-division", cl::Hidden, cl::desc("Trap on integer division by zero."), @@ -52,8 +59,29 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, setOperationAction(ISD::SRA_PARTS, GRLenVT, Custom); setOperationAction(ISD::SRL_PARTS, GRLenVT, Custom); setOperationAction(ISD::FP_TO_SINT, GRLenVT, Custom); + setOperationAction(ISD::ROTL, GRLenVT, Expand); + setOperationAction(ISD::CTPOP, GRLenVT, Expand); + setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal); + setOperationAction(ISD::TRAP, MVT::Other, Legal); + setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); + setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); + + setOperationAction({ISD::GlobalAddress, ISD::BlockAddress, ISD::ConstantPool, + ISD::JumpTable}, + GRLenVT, Custom); + + setOperationAction(ISD::GlobalTLSAddress, GRLenVT, Custom); + + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); - setOperationAction({ISD::GlobalAddress, ISD::ConstantPool}, GRLenVT, Custom); + setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom); + if (Subtarget.is64Bit()) + setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom); + + setOperationAction(ISD::DYNAMIC_STACKALLOC, GRLenVT, Expand); + setOperationAction({ISD::STACKSAVE, ISD::STACKRESTORE}, MVT::Other, Expand); + setOperationAction(ISD::VASTART, MVT::Other, Custom); + setOperationAction({ISD::VAARG, ISD::VACOPY, ISD::VAEND}, MVT::Other, Expand); if (Subtarget.is64Bit()) { setOperationAction(ISD::SHL, MVT::i32, Custom); @@ -61,24 +89,85 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, setOperationAction(ISD::SRL, MVT::i32, Custom); setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); setOperationAction(ISD::BITCAST, MVT::i32, Custom); + setOperationAction(ISD::ROTR, MVT::i32, Custom); + setOperationAction(ISD::ROTL, MVT::i32, Custom); + setOperationAction(ISD::CTTZ, MVT::i32, Custom); + setOperationAction(ISD::CTLZ, MVT::i32, Custom); + setOperationAction(ISD::INTRINSIC_VOID, MVT::i32, Custom); + setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i32, Custom); + setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); + setOperationAction(ISD::READ_REGISTER, MVT::i32, Custom); + setOperationAction(ISD::WRITE_REGISTER, MVT::i32, Custom); if (Subtarget.hasBasicF() && !Subtarget.hasBasicD()) setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); + if (Subtarget.hasBasicF()) + setOperationAction(ISD::FRINT, MVT::f32, Legal); + if (Subtarget.hasBasicD()) + setOperationAction(ISD::FRINT, MVT::f64, Legal); } - static const ISD::CondCode FPCCToExpand[] = {ISD::SETOGT, ISD::SETOGE, - ISD::SETUGT, ISD::SETUGE}; + // LA32 does not have REVB.2W and REVB.D due to the 64-bit operands, and + // the narrower REVB.W does not exist. But LA32 does have REVB.2H, so i16 + // and i32 could still be byte-swapped relatively cheaply. + setOperationAction(ISD::BSWAP, MVT::i16, Custom); + if (Subtarget.is64Bit()) { + setOperationAction(ISD::BSWAP, MVT::i32, Custom); + } + + // Expand bitreverse.i16 with native-width bitrev and shift for now, before + // we get to know which of sll and revb.2h is faster. + setOperationAction(ISD::BITREVERSE, MVT::i8, Custom); + if (Subtarget.is64Bit()) { + setOperationAction(ISD::BITREVERSE, MVT::i32, Custom); + setOperationAction(ISD::BITREVERSE, MVT::i64, Legal); + } else { + setOperationAction(ISD::BITREVERSE, MVT::i32, Legal); + setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom); + setOperationAction(ISD::READ_REGISTER, MVT::i64, Custom); + setOperationAction(ISD::WRITE_REGISTER, MVT::i64, Custom); + setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); + setOperationAction(ISD::INTRINSIC_VOID, MVT::i64, Custom); + } + + static const ISD::CondCode FPCCToExpand[] = { + ISD::SETOGT, ISD::SETOGE, ISD::SETUGT, ISD::SETUGE, + ISD::SETGE, ISD::SETNE, ISD::SETGT}; if (Subtarget.hasBasicF()) { setCondCodeAction(FPCCToExpand, MVT::f32, Expand); setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); + setOperationAction(ISD::BR_CC, MVT::f32, Expand); + setOperationAction(ISD::FMA, MVT::f32, Legal); + setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal); + setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal); + setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Legal); + setOperationAction(ISD::STRICT_FSETCC, MVT::f32, Legal); + setOperationAction(ISD::FSIN, MVT::f32, Expand); + setOperationAction(ISD::FCOS, MVT::f32, Expand); + setOperationAction(ISD::FSINCOS, MVT::f32, Expand); + setOperationAction(ISD::FPOW, MVT::f32, Expand); + setOperationAction(ISD::FREM, MVT::f32, Expand); } if (Subtarget.hasBasicD()) { setCondCodeAction(FPCCToExpand, MVT::f64, Expand); setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); + setOperationAction(ISD::BR_CC, MVT::f64, Expand); + setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Legal); + setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Legal); setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); - setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); + setOperationAction(ISD::FMA, MVT::f64, Legal); + setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal); + setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal); + setOperationAction(ISD::FSIN, MVT::f64, Expand); + setOperationAction(ISD::FCOS, MVT::f64, Expand); + setOperationAction(ISD::FSINCOS, MVT::f64, Expand); + setOperationAction(ISD::FPOW, MVT::f64, Expand); + setOperationAction(ISD::FREM, MVT::f64, Expand); + setTruncStoreAction(MVT::f64, MVT::f32, Expand); } + setOperationAction(ISD::BR_JT, MVT::Other, Expand); + setOperationAction(ISD::BR_CC, GRLenVT, Expand); setOperationAction(ISD::SELECT_CC, GRLenVT, Expand); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); @@ -87,7 +176,12 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, setLibcallName(RTLIB::MUL_I128, nullptr); setOperationAction(ISD::FP_TO_UINT, GRLenVT, Custom); - setOperationAction(ISD::UINT_TO_FP, GRLenVT, Custom); + setOperationAction(ISD::UINT_TO_FP, GRLenVT, Expand); + if ((Subtarget.is64Bit() && Subtarget.hasBasicF() && + !Subtarget.hasBasicD())) { + setOperationAction(ISD::SINT_TO_FP, GRLenVT, Custom); + setOperationAction(ISD::UINT_TO_FP, GRLenVT, Custom); + } // Compute derived properties from the register classes. computeRegisterProperties(STI.getRegisterInfo()); @@ -98,6 +192,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, setMaxAtomicSizeInBitsSupported(Subtarget.getGRLen()); + setMinCmpXchgSizeInBits(32); + // Function alignments. const Align FunctionAlignment(4); setMinFunctionAlignment(FunctionAlignment); @@ -107,54 +203,211 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, setTargetDAGCombine(ISD::SRL); } +bool LoongArchTargetLowering::isOffsetFoldingLegal( + const GlobalAddressSDNode *GA) const { + // In order to maximise the opportunity for common subexpression elimination, + // keep a separate ADD node for the global address offset instead of folding + // it in the global address node. Later peephole optimisations may choose to + // fold it back in when profitable. + return false; +} + SDValue LoongArchTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { - default: - report_fatal_error("unimplemented operand"); + case ISD::EH_DWARF_CFA: + return lowerEH_DWARF_CFA(Op, DAG); case ISD::GlobalAddress: return lowerGlobalAddress(Op, DAG); + case ISD::GlobalTLSAddress: + return lowerGlobalTLSAddress(Op, DAG); + case ISD::INTRINSIC_WO_CHAIN: + return lowerINTRINSIC_WO_CHAIN(Op, DAG); + case ISD::INTRINSIC_W_CHAIN: + return lowerINTRINSIC_W_CHAIN(Op, DAG); + case ISD::INTRINSIC_VOID: + return lowerINTRINSIC_VOID(Op, DAG); + case ISD::BlockAddress: + return lowerBlockAddress(Op, DAG); + case ISD::JumpTable: + return lowerJumpTable(Op, DAG); case ISD::SHL_PARTS: return lowerShiftLeftParts(Op, DAG); case ISD::SRA_PARTS: return lowerShiftRightParts(Op, DAG, true); case ISD::SRL_PARTS: return lowerShiftRightParts(Op, DAG, false); - case ISD::SHL: - case ISD::SRA: - case ISD::SRL: - // This can be called for an i32 shift amount that needs to be promoted. - assert(Op.getOperand(1).getValueType() == MVT::i32 && Subtarget.is64Bit() && - "Unexpected custom legalisation"); - return SDValue(); case ISD::ConstantPool: return lowerConstantPool(Op, DAG); case ISD::FP_TO_SINT: return lowerFP_TO_SINT(Op, DAG); case ISD::BITCAST: return lowerBITCAST(Op, DAG); - case ISD::FP_TO_UINT: - return SDValue(); case ISD::UINT_TO_FP: return lowerUINT_TO_FP(Op, DAG); + case ISD::SINT_TO_FP: + return lowerSINT_TO_FP(Op, DAG); + case ISD::VASTART: + return lowerVASTART(Op, DAG); + case ISD::FRAMEADDR: + return lowerFRAMEADDR(Op, DAG); + case ISD::RETURNADDR: + return lowerRETURNADDR(Op, DAG); + case ISD::WRITE_REGISTER: + return lowerWRITE_REGISTER(Op, DAG); + } + return SDValue(); +} + +SDValue LoongArchTargetLowering::lowerWRITE_REGISTER(SDValue Op, + SelectionDAG &DAG) const { + + if (Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i32) { + DAG.getContext()->emitError( + "On LA64, only 64-bit registers can be written."); + return Op.getOperand(0); + } + + if (!Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i64) { + DAG.getContext()->emitError( + "On LA32, only 32-bit registers can be written."); + return Op.getOperand(0); + } + + return Op; +} + +SDValue LoongArchTargetLowering::lowerFRAMEADDR(SDValue Op, + SelectionDAG &DAG) const { + if (!isa(Op.getOperand(0))) { + DAG.getContext()->emitError("argument to '__builtin_frame_address' must " + "be a constant integer"); + return SDValue(); + } + + MachineFunction &MF = DAG.getMachineFunction(); + MF.getFrameInfo().setFrameAddressIsTaken(true); + Register FrameReg = Subtarget.getRegisterInfo()->getFrameRegister(MF); + EVT VT = Op.getValueType(); + SDLoc DL(Op); + SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT); + unsigned Depth = cast(Op.getOperand(0))->getZExtValue(); + int GRLenInBytes = Subtarget.getGRLen() / 8; + + while (Depth--) { + int Offset = -(GRLenInBytes * 2); + SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr, + DAG.getIntPtrConstant(Offset, DL)); + FrameAddr = + DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo()); } + return FrameAddr; +} + +SDValue LoongArchTargetLowering::lowerRETURNADDR(SDValue Op, + SelectionDAG &DAG) const { + if (verifyReturnAddressArgumentIsConstant(Op, DAG)) + return SDValue(); + + // Currently only support lowering return address for current frame. + if (cast(Op.getOperand(0))->getZExtValue() != 0) { + DAG.getContext()->emitError( + "return address can only be determined for the current frame"); + return SDValue(); + } + + MachineFunction &MF = DAG.getMachineFunction(); + MF.getFrameInfo().setReturnAddressIsTaken(true); + MVT GRLenVT = Subtarget.getGRLenVT(); + + // Return the value of the return address register, marking it an implicit + // live-in. + Register Reg = MF.addLiveIn(Subtarget.getRegisterInfo()->getRARegister(), + getRegClassFor(GRLenVT)); + return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), Reg, GRLenVT); +} + +SDValue LoongArchTargetLowering::lowerEH_DWARF_CFA(SDValue Op, + SelectionDAG &DAG) const { + MachineFunction &MF = DAG.getMachineFunction(); + auto Size = Subtarget.getGRLen() / 8; + auto FI = MF.getFrameInfo().CreateFixedObject(Size, 0, false); + return DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); +} + +SDValue LoongArchTargetLowering::lowerVASTART(SDValue Op, + SelectionDAG &DAG) const { + MachineFunction &MF = DAG.getMachineFunction(); + auto *FuncInfo = MF.getInfo(); + + SDLoc DL(Op); + SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), + getPointerTy(MF.getDataLayout())); + + // vastart just stores the address of the VarArgsFrameIndex slot into the + // memory location argument. + const Value *SV = cast(Op.getOperand(2))->getValue(); + return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1), + MachinePointerInfo(SV)); } SDValue LoongArchTargetLowering::lowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const { + assert(Subtarget.is64Bit() && Subtarget.hasBasicF() && + !Subtarget.hasBasicD() && "unexpected target features"); SDLoc DL(Op); - auto &TLI = DAG.getTargetLoweringInfo(); - SDValue Tmp1, Tmp2; - SDValue Op1 = Op.getOperand(0); - if (Op1->getOpcode() == ISD::AssertZext || - Op1->getOpcode() == ISD::AssertSext) + SDValue Op0 = Op.getOperand(0); + if (Op0->getOpcode() == ISD::AND) { + auto *C = dyn_cast(Op0.getOperand(1)); + if (C && C->getZExtValue() < UINT64_C(0xFFFFFFFF)) + return Op; + } + + if (Op0->getOpcode() == LoongArchISD::BSTRPICK && + Op0.getConstantOperandVal(1) < UINT64_C(0X1F) && + Op0.getConstantOperandVal(2) == UINT64_C(0)) + return Op; + + if (Op0.getOpcode() == ISD::AssertZext && + dyn_cast(Op0.getOperand(1))->getVT().bitsLT(MVT::i32)) return Op; - SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op.getOperand(0)); - SDValue Res = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f64, Trunc); - SDNode *N = Res.getNode(); - TLI.expandUINT_TO_FP(N, Tmp1, Tmp2, DAG); - return Tmp1; + + EVT OpVT = Op0.getValueType(); + EVT RetVT = Op.getValueType(); + RTLIB::Libcall LC = RTLIB::getUINTTOFP(OpVT, RetVT); + MakeLibCallOptions CallOptions; + CallOptions.setTypeListBeforeSoften(OpVT, RetVT, true); + SDValue Chain = SDValue(); + SDValue Result; + std::tie(Result, Chain) = + makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain); + return Result; +} + +SDValue LoongArchTargetLowering::lowerSINT_TO_FP(SDValue Op, + SelectionDAG &DAG) const { + assert(Subtarget.is64Bit() && Subtarget.hasBasicF() && + !Subtarget.hasBasicD() && "unexpected target features"); + + SDLoc DL(Op); + SDValue Op0 = Op.getOperand(0); + + if ((Op0.getOpcode() == ISD::AssertSext || + Op0.getOpcode() == ISD::SIGN_EXTEND_INREG) && + dyn_cast(Op0.getOperand(1))->getVT().bitsLE(MVT::i32)) + return Op; + + EVT OpVT = Op0.getValueType(); + EVT RetVT = Op.getValueType(); + RTLIB::Libcall LC = RTLIB::getSINTTOFP(OpVT, RetVT); + MakeLibCallOptions CallOptions; + CallOptions.setTypeListBeforeSoften(OpVT, RetVT, true); + SDValue Chain = SDValue(); + SDValue Result; + std::tie(Result, Chain) = + makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain); + return Result; } SDValue LoongArchTargetLowering::lowerBITCAST(SDValue Op, @@ -188,43 +441,376 @@ SDValue LoongArchTargetLowering::lowerFP_TO_SINT(SDValue Op, return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Trunc); } +static SDValue getTargetNode(GlobalAddressSDNode *N, SDLoc DL, EVT Ty, + SelectionDAG &DAG, unsigned Flags) { + return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags); +} + +static SDValue getTargetNode(BlockAddressSDNode *N, SDLoc DL, EVT Ty, + SelectionDAG &DAG, unsigned Flags) { + return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(), + Flags); +} + +static SDValue getTargetNode(ConstantPoolSDNode *N, SDLoc DL, EVT Ty, + SelectionDAG &DAG, unsigned Flags) { + return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(), + N->getOffset(), Flags); +} + +static SDValue getTargetNode(JumpTableSDNode *N, SDLoc DL, EVT Ty, + SelectionDAG &DAG, unsigned Flags) { + return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags); +} + +template +SDValue LoongArchTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG, + bool IsLocal) const { + SDLoc DL(N); + EVT Ty = getPointerTy(DAG.getDataLayout()); + SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0); + // TODO: Check CodeModel. + if (IsLocal) + // This generates the pattern (PseudoLA_PCREL sym), which expands to + // (addi.w/d (pcalau12i %pc_hi20(sym)) %pc_lo12(sym)). + return SDValue(DAG.getMachineNode(LoongArch::PseudoLA_PCREL, DL, Ty, Addr), + 0); + + // This generates the pattern (PseudoLA_GOT sym), which expands to (ld.w/d + // (pcalau12i %got_pc_hi20(sym)) %got_pc_lo12(sym)). + return SDValue(DAG.getMachineNode(LoongArch::PseudoLA_GOT, DL, Ty, Addr), 0); +} + +SDValue LoongArchTargetLowering::lowerBlockAddress(SDValue Op, + SelectionDAG &DAG) const { + return getAddr(cast(Op), DAG); +} + +SDValue LoongArchTargetLowering::lowerJumpTable(SDValue Op, + SelectionDAG &DAG) const { + return getAddr(cast(Op), DAG); +} + SDValue LoongArchTargetLowering::lowerConstantPool(SDValue Op, SelectionDAG &DAG) const { - SDLoc DL(Op); - EVT Ty = Op.getValueType(); - ConstantPoolSDNode *N = cast(Op); - - // FIXME: Only support PC-relative addressing to access the symbol. - // Target flags will be added later. - if (!isPositionIndependent()) { - SDValue ConstantN = DAG.getTargetConstantPool( - N->getConstVal(), Ty, N->getAlign(), N->getOffset()); - SDValue AddrHi(DAG.getMachineNode(LoongArch::PCALAU12I, DL, Ty, ConstantN), - 0); - SDValue Addr(DAG.getMachineNode(Subtarget.is64Bit() ? LoongArch::ADDI_D - : LoongArch::ADDI_W, - DL, Ty, AddrHi, ConstantN), - 0); - return Addr; - } - report_fatal_error("Unable to lower ConstantPool"); + return getAddr(cast(Op), DAG); } SDValue LoongArchTargetLowering::lowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { - SDLoc DL(Op); + GlobalAddressSDNode *N = cast(Op); + assert(N->getOffset() == 0 && "unexpected offset in global node"); + return getAddr(N, DAG, N->getGlobal()->isDSOLocal()); +} + +SDValue LoongArchTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N, + SelectionDAG &DAG, + unsigned Opc) const { + SDLoc DL(N); + EVT Ty = getPointerTy(DAG.getDataLayout()); + MVT GRLenVT = Subtarget.getGRLenVT(); + + SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0); + SDValue Offset = SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0); + + // Add the thread pointer. + return DAG.getNode(ISD::ADD, DL, Ty, Offset, + DAG.getRegister(LoongArch::R2, GRLenVT)); +} + +SDValue LoongArchTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N, + SelectionDAG &DAG, + unsigned Opc) const { + SDLoc DL(N); EVT Ty = getPointerTy(DAG.getDataLayout()); - const GlobalValue *GV = cast(Op)->getGlobal(); - unsigned ADDIOp = Subtarget.is64Bit() ? LoongArch::ADDI_D : LoongArch::ADDI_W; + IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits()); + + // Use a PC-relative addressing mode to access the dynamic GOT address. + SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0); + SDValue Load = SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0); + + // Prepare argument list to generate call. + ArgListTy Args; + ArgListEntry Entry; + Entry.Node = Load; + Entry.Ty = CallTy; + Args.push_back(Entry); + + // Setup call to __tls_get_addr. + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(DL) + .setChain(DAG.getEntryNode()) + .setLibCallee(CallingConv::C, CallTy, + DAG.getExternalSymbol("__tls_get_addr", Ty), + std::move(Args)); + + return LowerCallTo(CLI).first; +} + +SDValue +LoongArchTargetLowering::lowerGlobalTLSAddress(SDValue Op, + SelectionDAG &DAG) const { + if (DAG.getMachineFunction().getFunction().getCallingConv() == + CallingConv::GHC) + report_fatal_error("In GHC calling convention TLS is not supported"); + + GlobalAddressSDNode *N = cast(Op); + assert(N->getOffset() == 0 && "unexpected offset in global node"); + // OHOS_LOCAL begin + if (DAG.getTarget().useEmulatedTLS()) + return LowerToTLSEmulatedModel(N, DAG); + // OHOS_LOCAL end + SDValue Addr; + switch (getTargetMachine().getTLSModel(N->getGlobal())) { + case TLSModel::GeneralDynamic: + // In this model, application code calls the dynamic linker function + // __tls_get_addr to locate TLS offsets into the dynamic thread vector at + // runtime. + Addr = getDynamicTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_GD); + break; + case TLSModel::LocalDynamic: + // Same as GeneralDynamic, except for assembly modifiers and relocation + // records. + Addr = getDynamicTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_LD); + break; + case TLSModel::InitialExec: + // This model uses the GOT to resolve TLS offsets. + Addr = getStaticTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_IE); + break; + case TLSModel::LocalExec: + // This model is used when static linking as the TLS offsets are resolved + // during program linking. + Addr = getStaticTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_LE); + break; + } + + return Addr; +} + +SDValue +LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, + SelectionDAG &DAG) const { + switch (Op.getConstantOperandVal(0)) { + default: + return SDValue(); // Don't custom lower most intrinsics. + case Intrinsic::thread_pointer: { + EVT PtrVT = getPointerTy(DAG.getDataLayout()); + return DAG.getRegister(LoongArch::R2, PtrVT); + } + } +} + +// Helper function that emits error message for intrinsics with chain and return +// merge values of a UNDEF and the chain. +static SDValue emitIntrinsicWithChainErrorMessage(SDValue Op, + StringRef ErrorMsg, + SelectionDAG &DAG) { + DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + "."); + return DAG.getMergeValues({DAG.getUNDEF(Op.getValueType()), Op.getOperand(0)}, + SDLoc(Op)); +} + +SDValue +LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + MVT GRLenVT = Subtarget.getGRLenVT(); + EVT VT = Op.getValueType(); + SDValue Chain = Op.getOperand(0); + const StringRef ErrorMsgOOR = "argument out of range"; + const StringRef ErrorMsgReqLA64 = "requires loongarch64"; + const StringRef ErrorMsgReqF = "requires basic 'f' target feature"; + + switch (Op.getConstantOperandVal(1)) { + default: + return Op; + case Intrinsic::loongarch_crc_w_b_w: + case Intrinsic::loongarch_crc_w_h_w: + case Intrinsic::loongarch_crc_w_w_w: + case Intrinsic::loongarch_crc_w_d_w: + case Intrinsic::loongarch_crcc_w_b_w: + case Intrinsic::loongarch_crcc_w_h_w: + case Intrinsic::loongarch_crcc_w_w_w: + case Intrinsic::loongarch_crcc_w_d_w: + return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqLA64, DAG); + case Intrinsic::loongarch_csrrd_w: + case Intrinsic::loongarch_csrrd_d: { + unsigned Imm = cast(Op.getOperand(2))->getZExtValue(); + return !isUInt<14>(Imm) + ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG) + : DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other}, + {Chain, DAG.getConstant(Imm, DL, GRLenVT)}); + } + case Intrinsic::loongarch_csrwr_w: + case Intrinsic::loongarch_csrwr_d: { + unsigned Imm = cast(Op.getOperand(3))->getZExtValue(); + return !isUInt<14>(Imm) + ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG) + : DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other}, + {Chain, Op.getOperand(2), + DAG.getConstant(Imm, DL, GRLenVT)}); + } + case Intrinsic::loongarch_csrxchg_w: + case Intrinsic::loongarch_csrxchg_d: { + unsigned Imm = cast(Op.getOperand(4))->getZExtValue(); + return !isUInt<14>(Imm) + ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG) + : DAG.getNode(LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other}, + {Chain, Op.getOperand(2), Op.getOperand(3), + DAG.getConstant(Imm, DL, GRLenVT)}); + } + case Intrinsic::loongarch_iocsrrd_d: { + return DAG.getNode( + LoongArchISD::IOCSRRD_D, DL, {GRLenVT, MVT::Other}, + {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2))}); + } +#define IOCSRRD_CASE(NAME, NODE) \ + case Intrinsic::loongarch_##NAME: { \ + return DAG.getNode(LoongArchISD::NODE, DL, {GRLenVT, MVT::Other}, \ + {Chain, Op.getOperand(2)}); \ + } + IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B); + IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H); + IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W); +#undef IOCSRRD_CASE + case Intrinsic::loongarch_cpucfg: { + return DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other}, + {Chain, Op.getOperand(2)}); + } + case Intrinsic::loongarch_lddir_d: { + unsigned Imm = cast(Op.getOperand(3))->getZExtValue(); + return !isUInt<8>(Imm) + ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG) + : Op; + } + case Intrinsic::loongarch_movfcsr2gr: { + if (!Subtarget.hasBasicF()) + return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqF, DAG); + unsigned Imm = cast(Op.getOperand(2))->getZExtValue(); + return !isUInt<2>(Imm) + ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG) + : DAG.getNode(LoongArchISD::MOVFCSR2GR, DL, {VT, MVT::Other}, + {Chain, DAG.getConstant(Imm, DL, GRLenVT)}); + } + } +} + +// Helper function that emits error message for intrinsics with void return +// value and return the chain. +static SDValue emitIntrinsicErrorMessage(SDValue Op, StringRef ErrorMsg, + SelectionDAG &DAG) { - // TODO: Support dso_preemptable and target flags. - if (GV->isDSOLocal()) { - SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty); - SDValue AddrHi(DAG.getMachineNode(LoongArch::PCALAU12I, DL, Ty, GA), 0); - SDValue Addr(DAG.getMachineNode(ADDIOp, DL, Ty, AddrHi, GA), 0); - return Addr; + DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + "."); + return Op.getOperand(0); +} + +SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + MVT GRLenVT = Subtarget.getGRLenVT(); + SDValue Chain = Op.getOperand(0); + uint64_t IntrinsicEnum = Op.getConstantOperandVal(1); + SDValue Op2 = Op.getOperand(2); + const StringRef ErrorMsgOOR = "argument out of range"; + const StringRef ErrorMsgReqLA64 = "requires loongarch64"; + const StringRef ErrorMsgReqLA32 = "requires loongarch32"; + const StringRef ErrorMsgReqF = "requires basic 'f' target feature"; + + switch (IntrinsicEnum) { + default: + // TODO: Add more Intrinsics. + return SDValue(); + case Intrinsic::loongarch_cacop_d: + case Intrinsic::loongarch_cacop_w: { + if (IntrinsicEnum == Intrinsic::loongarch_cacop_d && !Subtarget.is64Bit()) + return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG); + if (IntrinsicEnum == Intrinsic::loongarch_cacop_w && Subtarget.is64Bit()) + return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA32, DAG); + // call void @llvm.loongarch.cacop.[d/w](uimm5, rj, simm12) + unsigned Imm1 = cast(Op2)->getZExtValue(); + int Imm2 = cast(Op.getOperand(4))->getSExtValue(); + if (!isUInt<5>(Imm1) || !isInt<12>(Imm2)) + return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG); + return Op; + } + case Intrinsic::loongarch_dbar: { + unsigned Imm = cast(Op2)->getZExtValue(); + return !isUInt<15>(Imm) + ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) + : DAG.getNode(LoongArchISD::DBAR, DL, MVT::Other, Chain, + DAG.getConstant(Imm, DL, GRLenVT)); + } + case Intrinsic::loongarch_ibar: { + unsigned Imm = cast(Op2)->getZExtValue(); + return !isUInt<15>(Imm) + ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) + : DAG.getNode(LoongArchISD::IBAR, DL, MVT::Other, Chain, + DAG.getConstant(Imm, DL, GRLenVT)); + } + case Intrinsic::loongarch_break: { + unsigned Imm = cast(Op2)->getZExtValue(); + return !isUInt<15>(Imm) + ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) + : DAG.getNode(LoongArchISD::BREAK, DL, MVT::Other, Chain, + DAG.getConstant(Imm, DL, GRLenVT)); + } + case Intrinsic::loongarch_movgr2fcsr: { + if (!Subtarget.hasBasicF()) + return emitIntrinsicErrorMessage(Op, ErrorMsgReqF, DAG); + unsigned Imm = cast(Op2)->getZExtValue(); + return !isUInt<2>(Imm) + ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) + : DAG.getNode(LoongArchISD::MOVGR2FCSR, DL, MVT::Other, Chain, + DAG.getConstant(Imm, DL, GRLenVT), + DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, + Op.getOperand(3))); + } + case Intrinsic::loongarch_syscall: { + unsigned Imm = cast(Op2)->getZExtValue(); + return !isUInt<15>(Imm) + ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) + : DAG.getNode(LoongArchISD::SYSCALL, DL, MVT::Other, Chain, + DAG.getConstant(Imm, DL, GRLenVT)); + } +#define IOCSRWR_CASE(NAME, NODE) \ + case Intrinsic::loongarch_##NAME: { \ + SDValue Op3 = Op.getOperand(3); \ + return Subtarget.is64Bit() \ + ? DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, \ + DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \ + DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op3)) \ + : DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, Op2, \ + Op3); \ + } + IOCSRWR_CASE(iocsrwr_b, IOCSRWR_B); + IOCSRWR_CASE(iocsrwr_h, IOCSRWR_H); + IOCSRWR_CASE(iocsrwr_w, IOCSRWR_W); +#undef IOCSRWR_CASE + case Intrinsic::loongarch_iocsrwr_d: { + return !Subtarget.is64Bit() + ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG) + : DAG.getNode(LoongArchISD::IOCSRWR_D, DL, MVT::Other, Chain, + Op2, + DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, + Op.getOperand(3))); + } +#define ASRT_LE_GT_CASE(NAME) \ + case Intrinsic::loongarch_##NAME: { \ + return !Subtarget.is64Bit() \ + ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG) \ + : Op; \ + } + ASRT_LE_GT_CASE(asrtle_d) + ASRT_LE_GT_CASE(asrtgt_d) +#undef ASRT_LE_GT_CASE + case Intrinsic::loongarch_ldpte_d: { + unsigned Imm = cast(Op.getOperand(3))->getZExtValue(); + return !Subtarget.is64Bit() + ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG) + : !isUInt<8>(Imm) ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) + : Op; + } } - report_fatal_error("Unable to lowerGlobalAddress"); } SDValue LoongArchTargetLowering::lowerShiftLeftParts(SDValue Op, @@ -331,6 +917,14 @@ static LoongArchISD::NodeType getLoongArchWOpcode(unsigned Opcode) { return LoongArchISD::SRA_W; case ISD::SRL: return LoongArchISD::SRL_W; + case ISD::ROTR: + return LoongArchISD::ROTR_W; + case ISD::ROTL: + return LoongArchISD::ROTL_W; + case ISD::CTTZ: + return LoongArchISD::CTZ_W; + case ISD::CTLZ: + return LoongArchISD::CLZ_W; } } @@ -339,44 +933,95 @@ static LoongArchISD::NodeType getLoongArchWOpcode(unsigned Opcode) { // otherwise be promoted to i64, making it difficult to select the // SLL_W/.../*W later one because the fact the operation was originally of // type i8/i16/i32 is lost. -static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, +static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, unsigned ExtOpc = ISD::ANY_EXTEND) { SDLoc DL(N); LoongArchISD::NodeType WOpcode = getLoongArchWOpcode(N->getOpcode()); - SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0)); - SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1)); - SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1); - // ReplaceNodeResults requires we maintain the same type for the return value. + SDValue NewOp0, NewRes; + + switch (NumOp) { + default: + llvm_unreachable("Unexpected NumOp"); + case 1: { + NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0)); + NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0); + break; + } + case 2: { + NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0)); + SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1)); + NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1); + break; + } + // TODO:Handle more NumOp. + } + + // ReplaceNodeResults requires we maintain the same type for the return + // value. return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes); } +// Helper function that emits error message for intrinsics with chain and return +// a UNDEF and the chain as the results. +static void emitErrorAndReplaceIntrinsicWithChainResults( + SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG, + StringRef ErrorMsg) { + DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + "."); + Results.push_back(DAG.getUNDEF(N->getValueType(0))); + Results.push_back(N->getOperand(0)); +} + void LoongArchTargetLowering::ReplaceNodeResults( SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG) const { SDLoc DL(N); + EVT VT = N->getValueType(0); switch (N->getOpcode()) { default: llvm_unreachable("Don't know how to legalize this operation"); case ISD::SHL: case ISD::SRA: case ISD::SRL: - assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && + case ISD::ROTR: + assert(VT == MVT::i32 && Subtarget.is64Bit() && "Unexpected custom legalisation"); if (N->getOperand(1).getOpcode() != ISD::Constant) { - Results.push_back(customLegalizeToWOp(N, DAG)); + Results.push_back(customLegalizeToWOp(N, DAG, 2)); + break; + } + break; + case ISD::ROTL: + ConstantSDNode *CN; + if ((CN = dyn_cast(N->getOperand(1)))) { + Results.push_back(customLegalizeToWOp(N, DAG, 2)); break; } break; case ISD::FP_TO_SINT: { - assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && + assert(VT == MVT::i32 && Subtarget.is64Bit() && "Unexpected custom legalisation"); SDValue Src = N->getOperand(0); - EVT VT = EVT::getFloatingPointVT(N->getValueSizeInBits(0)); - SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, VT, Src); - Results.push_back(DAG.getNode(ISD::BITCAST, DL, N->getValueType(0), Dst)); + EVT FVT = EVT::getFloatingPointVT(N->getValueSizeInBits(0)); + if (getTypeAction(*DAG.getContext(), Src.getValueType()) != + TargetLowering::TypeSoftenFloat) { + SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, FVT, Src); + Results.push_back(DAG.getNode(ISD::BITCAST, DL, VT, Dst)); + return; + } + // If the FP type needs to be softened, emit a library call using the 'si' + // version. If we left it to default legalization we'd end up with 'di'. + RTLIB::Libcall LC; + LC = RTLIB::getFPTOSINT(Src.getValueType(), VT); + MakeLibCallOptions CallOptions; + EVT OpVT = Src.getValueType(); + CallOptions.setTypeListBeforeSoften(OpVT, VT, true); + SDValue Chain = SDValue(); + SDValue Result; + std::tie(Result, Chain) = + makeLibCall(DAG, LC, VT, Src, CallOptions, DL, Chain); + Results.push_back(Result); break; } case ISD::BITCAST: { - EVT VT = N->getValueType(0); SDValue Src = N->getOperand(0); EVT SrcVT = Src.getValueType(); if (VT == MVT::i32 && SrcVT == MVT::f32 && Subtarget.is64Bit() && @@ -388,7 +1033,7 @@ void LoongArchTargetLowering::ReplaceNodeResults( break; } case ISD::FP_TO_UINT: { - assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && + assert(VT == MVT::i32 && Subtarget.is64Bit() && "Unexpected custom legalisation"); auto &TLI = DAG.getTargetLoweringInfo(); SDValue Tmp1, Tmp2; @@ -396,6 +1041,222 @@ void LoongArchTargetLowering::ReplaceNodeResults( Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Tmp1)); break; } + case ISD::BSWAP: { + SDValue Src = N->getOperand(0); + assert((VT == MVT::i16 || VT == MVT::i32) && + "Unexpected custom legalization"); + MVT GRLenVT = Subtarget.getGRLenVT(); + SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src); + SDValue Tmp; + switch (VT.getSizeInBits()) { + default: + llvm_unreachable("Unexpected operand width"); + case 16: + Tmp = DAG.getNode(LoongArchISD::REVB_2H, DL, GRLenVT, NewSrc); + break; + case 32: + // Only LA64 will get to here due to the size mismatch between VT and + // GRLenVT, LA32 lowering is directly defined in LoongArchInstrInfo. + Tmp = DAG.getNode(LoongArchISD::REVB_2W, DL, GRLenVT, NewSrc); + break; + } + Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp)); + break; + } + case ISD::BITREVERSE: { + SDValue Src = N->getOperand(0); + assert((VT == MVT::i8 || (VT == MVT::i32 && Subtarget.is64Bit())) && + "Unexpected custom legalization"); + MVT GRLenVT = Subtarget.getGRLenVT(); + SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src); + SDValue Tmp; + switch (VT.getSizeInBits()) { + default: + llvm_unreachable("Unexpected operand width"); + case 8: + Tmp = DAG.getNode(LoongArchISD::BITREV_4B, DL, GRLenVT, NewSrc); + break; + case 32: + Tmp = DAG.getNode(LoongArchISD::BITREV_W, DL, GRLenVT, NewSrc); + break; + } + Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp)); + break; + } + case ISD::CTLZ: + case ISD::CTTZ: { + assert(VT == MVT::i32 && Subtarget.is64Bit() && + "Unexpected custom legalisation"); + Results.push_back(customLegalizeToWOp(N, DAG, 1)); + break; + } + case ISD::INTRINSIC_W_CHAIN: { + SDValue Chain = N->getOperand(0); + SDValue Op2 = N->getOperand(2); + MVT GRLenVT = Subtarget.getGRLenVT(); + const StringRef ErrorMsgOOR = "argument out of range"; + const StringRef ErrorMsgReqLA64 = "requires loongarch64"; + const StringRef ErrorMsgReqF = "requires basic 'f' target feature"; + + switch (N->getConstantOperandVal(1)) { + default: + llvm_unreachable("Unexpected Intrinsic."); + case Intrinsic::loongarch_movfcsr2gr: { + if (!Subtarget.hasBasicF()) { + emitErrorAndReplaceIntrinsicWithChainResults(N, Results, DAG, + ErrorMsgReqF); + return; + } + unsigned Imm = cast(Op2)->getZExtValue(); + if (!isUInt<2>(Imm)) { + emitErrorAndReplaceIntrinsicWithChainResults(N, Results, DAG, + ErrorMsgOOR); + return; + } + SDValue MOVFCSR2GRResults = DAG.getNode( + LoongArchISD::MOVFCSR2GR, SDLoc(N), {MVT::i64, MVT::Other}, + {Chain, DAG.getConstant(Imm, DL, GRLenVT)}); + Results.push_back( + DAG.getNode(ISD::TRUNCATE, DL, VT, MOVFCSR2GRResults.getValue(0))); + Results.push_back(MOVFCSR2GRResults.getValue(1)); + break; + } +#define CRC_CASE_EXT_BINARYOP(NAME, NODE) \ + case Intrinsic::loongarch_##NAME: { \ + SDValue NODE = DAG.getNode( \ + LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \ + {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \ + DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \ + Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \ + Results.push_back(NODE.getValue(1)); \ + break; \ + } + CRC_CASE_EXT_BINARYOP(crc_w_b_w, CRC_W_B_W) + CRC_CASE_EXT_BINARYOP(crc_w_h_w, CRC_W_H_W) + CRC_CASE_EXT_BINARYOP(crc_w_w_w, CRC_W_W_W) + CRC_CASE_EXT_BINARYOP(crcc_w_b_w, CRCC_W_B_W) + CRC_CASE_EXT_BINARYOP(crcc_w_h_w, CRCC_W_H_W) + CRC_CASE_EXT_BINARYOP(crcc_w_w_w, CRCC_W_W_W) +#undef CRC_CASE_EXT_BINARYOP + +#define CRC_CASE_EXT_UNARYOP(NAME, NODE) \ + case Intrinsic::loongarch_##NAME: { \ + SDValue NODE = DAG.getNode( \ + LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \ + {Chain, Op2, \ + DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \ + Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \ + Results.push_back(NODE.getValue(1)); \ + break; \ + } + CRC_CASE_EXT_UNARYOP(crc_w_d_w, CRC_W_D_W) + CRC_CASE_EXT_UNARYOP(crcc_w_d_w, CRCC_W_D_W) +#undef CRC_CASE_EXT_UNARYOP +#define CSR_CASE(ID) \ + case Intrinsic::loongarch_##ID: { \ + if (!Subtarget.is64Bit()) \ + emitErrorAndReplaceIntrinsicWithChainResults(N, Results, DAG, \ + ErrorMsgReqLA64); \ + break; \ + } + CSR_CASE(csrrd_d); + CSR_CASE(csrwr_d); + CSR_CASE(csrxchg_d); + CSR_CASE(iocsrrd_d); +#undef CSR_CASE + case Intrinsic::loongarch_csrrd_w: { + unsigned Imm = cast(Op2)->getZExtValue(); + if (!isUInt<14>(Imm)) { + emitErrorAndReplaceIntrinsicWithChainResults(N, Results, DAG, + ErrorMsgOOR); + return; + } + SDValue CSRRDResults = + DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other}, + {Chain, DAG.getConstant(Imm, DL, GRLenVT)}); + Results.push_back( + DAG.getNode(ISD::TRUNCATE, DL, VT, CSRRDResults.getValue(0))); + Results.push_back(CSRRDResults.getValue(1)); + break; + } + case Intrinsic::loongarch_csrwr_w: { + unsigned Imm = cast(N->getOperand(3))->getZExtValue(); + if (!isUInt<14>(Imm)) { + emitErrorAndReplaceIntrinsicWithChainResults(N, Results, DAG, + ErrorMsgOOR); + return; + } + SDValue CSRWRResults = + DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other}, + {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), + DAG.getConstant(Imm, DL, GRLenVT)}); + Results.push_back( + DAG.getNode(ISD::TRUNCATE, DL, VT, CSRWRResults.getValue(0))); + Results.push_back(CSRWRResults.getValue(1)); + break; + } + case Intrinsic::loongarch_csrxchg_w: { + unsigned Imm = cast(N->getOperand(4))->getZExtValue(); + if (!isUInt<14>(Imm)) { + emitErrorAndReplaceIntrinsicWithChainResults(N, Results, DAG, + ErrorMsgOOR); + return; + } + SDValue CSRXCHGResults = DAG.getNode( + LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other}, + {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), + DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3)), + DAG.getConstant(Imm, DL, GRLenVT)}); + Results.push_back( + DAG.getNode(ISD::TRUNCATE, DL, VT, CSRXCHGResults.getValue(0))); + Results.push_back(CSRXCHGResults.getValue(1)); + break; + } +#define IOCSRRD_CASE(NAME, NODE) \ + case Intrinsic::loongarch_##NAME: { \ + SDValue IOCSRRDResults = \ + DAG.getNode(LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \ + {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)}); \ + Results.push_back( \ + DAG.getNode(ISD::TRUNCATE, DL, VT, IOCSRRDResults.getValue(0))); \ + Results.push_back(IOCSRRDResults.getValue(1)); \ + break; \ + } + IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B); + IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H); + IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W); +#undef IOCSRRD_CASE + case Intrinsic::loongarch_cpucfg: { + SDValue CPUCFGResults = + DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other}, + {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)}); + Results.push_back( + DAG.getNode(ISD::TRUNCATE, DL, VT, CPUCFGResults.getValue(0))); + Results.push_back(CPUCFGResults.getValue(1)); + break; + } + case Intrinsic::loongarch_lddir_d: { + if (!Subtarget.is64Bit()) { + emitErrorAndReplaceIntrinsicWithChainResults(N, Results, DAG, + ErrorMsgReqLA64); + return; + } + break; + } + } + break; + } + case ISD::READ_REGISTER: { + if (Subtarget.is64Bit()) + DAG.getContext()->emitError( + "On LA64, only 64-bit registers can be read."); + else + DAG.getContext()->emitError( + "On LA32, only 32-bit registers can be read."); + Results.push_back(DAG.getUNDEF(VT)); + Results.push_back(N->getOperand(0)); + break; + } } } @@ -721,6 +1582,21 @@ Retry2: return SDValue(); } +// Combine (loongarch_bitrev_w (loongarch_revb_2w X)) to loongarch_bitrev_4b. +static SDValue performBITREV_WCombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const LoongArchSubtarget &Subtarget) { + if (DCI.isBeforeLegalizeOps()) + return SDValue(); + + SDValue Src = N->getOperand(0); + if (Src.getOpcode() != LoongArchISD::REVB_2W) + return SDValue(); + + return DAG.getNode(LoongArchISD::BITREV_4B, SDLoc(N), N->getValueType(0), + Src.getOperand(0)); +} + SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -733,41 +1609,65 @@ SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N, return performORCombine(N, DAG, DCI, Subtarget); case ISD::SRL: return performSRLCombine(N, DAG, DCI, Subtarget); + case LoongArchISD::BITREV_W: + return performBITREV_WCombine(N, DAG, DCI, Subtarget); } return SDValue(); } static MachineBasicBlock *insertDivByZeroTrap(MachineInstr &MI, - MachineBasicBlock &MBB, - const TargetInstrInfo &TII) { + MachineBasicBlock *MBB) { if (!ZeroDivCheck) - return &MBB; + return MBB; // Build instructions: + // MBB: // div(or mod) $dst, $dividend, $divisor - // bnez $divisor, 8 - // break 7 + // bnez $divisor, SinkMBB + // BreakMBB: + // break 7 // BRK_DIVZERO + // SinkMBB: // fallthrough + const BasicBlock *LLVM_BB = MBB->getBasicBlock(); + MachineFunction::iterator It = ++MBB->getIterator(); + MachineFunction *MF = MBB->getParent(); + auto BreakMBB = MF->CreateMachineBasicBlock(LLVM_BB); + auto SinkMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MF->insert(It, BreakMBB); + MF->insert(It, SinkMBB); + + // Transfer the remainder of MBB and its successor edges to SinkMBB. + SinkMBB->splice(SinkMBB->end(), MBB, std::next(MI.getIterator()), MBB->end()); + SinkMBB->transferSuccessorsAndUpdatePHIs(MBB); + + const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo(); + DebugLoc DL = MI.getDebugLoc(); MachineOperand &Divisor = MI.getOperand(2); - auto FallThrough = std::next(MI.getIterator()); + Register DivisorReg = Divisor.getReg(); - BuildMI(MBB, FallThrough, MI.getDebugLoc(), TII.get(LoongArch::BNEZ)) - .addReg(Divisor.getReg(), getKillRegState(Divisor.isKill())) - .addImm(8); + // MBB: + BuildMI(MBB, DL, TII.get(LoongArch::BNEZ)) + .addReg(DivisorReg, getKillRegState(Divisor.isKill())) + .addMBB(SinkMBB); + MBB->addSuccessor(BreakMBB); + MBB->addSuccessor(SinkMBB); + // BreakMBB: // See linux header file arch/loongarch/include/uapi/asm/break.h for the // definition of BRK_DIVZERO. - BuildMI(MBB, FallThrough, MI.getDebugLoc(), TII.get(LoongArch::BREAK)) - .addImm(7/*BRK_DIVZERO*/); + BuildMI(BreakMBB, DL, TII.get(LoongArch::BREAK)).addImm(7 /*BRK_DIVZERO*/); + BreakMBB->addSuccessor(SinkMBB); // Clear Divisor's kill flag. Divisor.setIsKill(false); - return &MBB; + return SinkMBB; } MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter( MachineInstr &MI, MachineBasicBlock *BB) const { + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + DebugLoc DL = MI.getDebugLoc(); switch (MI.getOpcode()) { default: @@ -780,11 +1680,39 @@ MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter( case LoongArch::DIV_DU: case LoongArch::MOD_D: case LoongArch::MOD_DU: - return insertDivByZeroTrap(MI, *BB, *Subtarget.getInstrInfo()); + return insertDivByZeroTrap(MI, BB); break; + case LoongArch::WRFCSR: { + BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVGR2FCSR), + LoongArch::FCSR0 + MI.getOperand(0).getImm()) + .addReg(MI.getOperand(1).getReg()); + MI.eraseFromParent(); + return BB; + } + case LoongArch::RDFCSR: { + MachineInstr *ReadFCSR = + BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVFCSR2GR), + MI.getOperand(0).getReg()) + .addReg(LoongArch::FCSR0 + MI.getOperand(1).getImm()); + ReadFCSR->getOperand(1).setIsUndef(); + MI.eraseFromParent(); + return BB; + } } } +bool LoongArchTargetLowering::allowsMisalignedMemoryAccesses( + EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags, + bool *Fast) const { + if (!Subtarget.hasUAL()) + return false; + + // TODO: set reasonable speed number. + if (Fast) + *Fast = 1; + return true; +} + const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const { switch ((LoongArchISD::NodeType)Opcode) { case LoongArchISD::FIRST_NUMBER: @@ -797,6 +1725,7 @@ const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const { // TODO: Add more target-dependent nodes later. NODE_NAME_CASE(CALL) NODE_NAME_CASE(RET) + NODE_NAME_CASE(TAIL) NODE_NAME_CASE(SLL_W) NODE_NAME_CASE(SRA_W) NODE_NAME_CASE(SRL_W) @@ -805,6 +1734,42 @@ const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(MOVGR2FR_W_LA64) NODE_NAME_CASE(MOVFR2GR_S_LA64) NODE_NAME_CASE(FTINT) + NODE_NAME_CASE(REVB_2H) + NODE_NAME_CASE(REVB_2W) + NODE_NAME_CASE(BITREV_4B) + NODE_NAME_CASE(BITREV_W) + NODE_NAME_CASE(ROTR_W) + NODE_NAME_CASE(ROTL_W) + NODE_NAME_CASE(CLZ_W) + NODE_NAME_CASE(CTZ_W) + NODE_NAME_CASE(DBAR) + NODE_NAME_CASE(IBAR) + NODE_NAME_CASE(BREAK) + NODE_NAME_CASE(SYSCALL) + NODE_NAME_CASE(CRC_W_B_W) + NODE_NAME_CASE(CRC_W_H_W) + NODE_NAME_CASE(CRC_W_W_W) + NODE_NAME_CASE(CRC_W_D_W) + NODE_NAME_CASE(CRCC_W_B_W) + NODE_NAME_CASE(CRCC_W_H_W) + NODE_NAME_CASE(CRCC_W_W_W) + NODE_NAME_CASE(CRCC_W_D_W) + NODE_NAME_CASE(CSRRD) + NODE_NAME_CASE(CSRWR) + NODE_NAME_CASE(CSRXCHG) + NODE_NAME_CASE(IOCSRRD_B) + NODE_NAME_CASE(IOCSRRD_H) + NODE_NAME_CASE(IOCSRRD_W) + NODE_NAME_CASE(IOCSRRD_D) + NODE_NAME_CASE(IOCSRWR_B) + NODE_NAME_CASE(IOCSRWR_H) + NODE_NAME_CASE(IOCSRWR_W) + NODE_NAME_CASE(IOCSRWR_D) + NODE_NAME_CASE(CPUCFG) + NODE_NAME_CASE(MOVGR2FCSR) + NODE_NAME_CASE(MOVFCSR2GR) + NODE_NAME_CASE(CACOP_D) + NODE_NAME_CASE(CACOP_W) } #undef NODE_NAME_CASE return nullptr; @@ -813,46 +1778,226 @@ const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const { //===----------------------------------------------------------------------===// // Calling Convention Implementation //===----------------------------------------------------------------------===// -// FIXME: Now, we only support CallingConv::C with fixed arguments which are -// passed with integer or floating-point registers. + +// Eight general-purpose registers a0-a7 used for passing integer arguments, +// with a0-a1 reused to return values. Generally, the GPRs are used to pass +// fixed-point arguments, and floating-point arguments when no FPR is available +// or with soft float ABI. const MCPhysReg ArgGPRs[] = {LoongArch::R4, LoongArch::R5, LoongArch::R6, LoongArch::R7, LoongArch::R8, LoongArch::R9, LoongArch::R10, LoongArch::R11}; +// Eight floating-point registers fa0-fa7 used for passing floating-point +// arguments, and fa0-fa1 are also used to return values. const MCPhysReg ArgFPR32s[] = {LoongArch::F0, LoongArch::F1, LoongArch::F2, LoongArch::F3, LoongArch::F4, LoongArch::F5, LoongArch::F6, LoongArch::F7}; +// FPR32 and FPR64 alias each other. const MCPhysReg ArgFPR64s[] = { LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, LoongArch::F3_64, LoongArch::F4_64, LoongArch::F5_64, LoongArch::F6_64, LoongArch::F7_64}; +// Pass a 2*GRLen argument that has been split into two GRLen values through +// registers or the stack as necessary. +static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State, + CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1, + unsigned ValNo2, MVT ValVT2, MVT LocVT2, + ISD::ArgFlagsTy ArgFlags2) { + unsigned GRLenInBytes = GRLen / 8; + if (Register Reg = State.AllocateReg(ArgGPRs)) { + // At least one half can be passed via register. + State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg, + VA1.getLocVT(), CCValAssign::Full)); + } else { + // Both halves must be passed on the stack, with proper alignment. + Align StackAlign = + std::max(Align(GRLenInBytes), ArgFlags1.getNonZeroOrigAlign()); + State.addLoc( + CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(), + State.AllocateStack(GRLenInBytes, StackAlign), + VA1.getLocVT(), CCValAssign::Full)); + State.addLoc(CCValAssign::getMem( + ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)), + LocVT2, CCValAssign::Full)); + return false; + } + if (Register Reg = State.AllocateReg(ArgGPRs)) { + // The second half can also be passed via register. + State.addLoc( + CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full)); + } else { + // The second half is passed via the stack, without additional alignment. + State.addLoc(CCValAssign::getMem( + ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)), + LocVT2, CCValAssign::Full)); + } + return false; +} + // Implements the LoongArch calling convention. Returns true upon failure. -static bool CC_LoongArch(unsigned ValNo, MVT ValVT, - CCValAssign::LocInfo LocInfo, CCState &State) { - // Allocate to a register if possible. +static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI, + unsigned ValNo, MVT ValVT, + CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, + CCState &State, bool IsFixed, bool IsRet, + Type *OrigTy) { + unsigned GRLen = DL.getLargestLegalIntTypeSizeInBits(); + assert((GRLen == 32 || GRLen == 64) && "Unspport GRLen"); + MVT GRLenVT = GRLen == 32 ? MVT::i32 : MVT::i64; + MVT LocVT = ValVT; + + // Any return value split into more than two values can't be returned + // directly. + if (IsRet && ValNo > 1) + return true; + + // If passing a variadic argument, or if no FPR is available. + bool UseGPRForFloat = true; + + switch (ABI) { + default: + llvm_unreachable("Unexpected ABI"); + case LoongArchABI::ABI_ILP32S: + case LoongArchABI::ABI_ILP32F: + case LoongArchABI::ABI_LP64F: + report_fatal_error("Unimplemented ABI"); + break; + case LoongArchABI::ABI_ILP32D: + case LoongArchABI::ABI_LP64D: + UseGPRForFloat = !IsFixed; + break; + case LoongArchABI::ABI_LP64S: + break; + } + + // FPR32 and FPR64 alias each other. + if (State.getFirstUnallocated(ArgFPR32s) == array_lengthof(ArgFPR32s)) + UseGPRForFloat = true; + + if (UseGPRForFloat && ValVT == MVT::f32) { + LocVT = GRLenVT; + LocInfo = CCValAssign::BCvt; + } else if (UseGPRForFloat && GRLen == 64 && ValVT == MVT::f64) { + LocVT = MVT::i64; + LocInfo = CCValAssign::BCvt; + } else if (UseGPRForFloat && GRLen == 32 && ValVT == MVT::f64) { + // TODO: Handle passing f64 on LA32 with D feature. + report_fatal_error("Passing f64 with GPR on LA32 is undefined"); + } + + // If this is a variadic argument, the LoongArch calling convention requires + // that it is assigned an 'even' or 'aligned' register if it has (2*GRLen)/8 + // byte alignment. An aligned register should be used regardless of whether + // the original argument was split during legalisation or not. The argument + // will not be passed by registers if the original type is larger than + // 2*GRLen, so the register alignment rule does not apply. + unsigned TwoGRLenInBytes = (2 * GRLen) / 8; + if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoGRLenInBytes && + DL.getTypeAllocSize(OrigTy) == TwoGRLenInBytes) { + unsigned RegIdx = State.getFirstUnallocated(ArgGPRs); + // Skip 'odd' register if necessary. + if (RegIdx != array_lengthof(ArgGPRs) && RegIdx % 2 == 1) + State.AllocateReg(ArgGPRs); + } + + SmallVectorImpl &PendingLocs = State.getPendingLocs(); + SmallVectorImpl &PendingArgFlags = + State.getPendingArgFlags(); + + assert(PendingLocs.size() == PendingArgFlags.size() && + "PendingLocs and PendingArgFlags out of sync"); + + // Split arguments might be passed indirectly, so keep track of the pending + // values. + if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) { + LocVT = GRLenVT; + LocInfo = CCValAssign::Indirect; + PendingLocs.push_back( + CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo)); + PendingArgFlags.push_back(ArgFlags); + if (!ArgFlags.isSplitEnd()) { + return false; + } + } + + // If the split argument only had two elements, it should be passed directly + // in registers or on the stack. + if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() && + PendingLocs.size() <= 2) { + assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()"); + // Apply the normal calling convention rules to the first half of the + // split argument. + CCValAssign VA = PendingLocs[0]; + ISD::ArgFlagsTy AF = PendingArgFlags[0]; + PendingLocs.clear(); + PendingArgFlags.clear(); + return CC_LoongArchAssign2GRLen(GRLen, State, VA, AF, ValNo, ValVT, LocVT, + ArgFlags); + } + + // Allocate to a register if possible, or else a stack slot. Register Reg; + unsigned StoreSizeBytes = GRLen / 8; + Align StackAlign = Align(GRLen / 8); - if (ValVT == MVT::f32) + if (ValVT == MVT::f32 && !UseGPRForFloat) Reg = State.AllocateReg(ArgFPR32s); - else if (ValVT == MVT::f64) + else if (ValVT == MVT::f64 && !UseGPRForFloat) Reg = State.AllocateReg(ArgFPR64s); else Reg = State.AllocateReg(ArgGPRs); + + unsigned StackOffset = + Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign); + + // If we reach this point and PendingLocs is non-empty, we must be at the + // end of a split argument that must be passed indirectly. + if (!PendingLocs.empty()) { + assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()"); + assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()"); + for (auto &It : PendingLocs) { + if (Reg) + It.convertToReg(Reg); + else + It.convertToMem(StackOffset); + State.addLoc(It); + } + PendingLocs.clear(); + PendingArgFlags.clear(); + return false; + } + assert((!UseGPRForFloat || LocVT == GRLenVT) && + "Expected an GRLenVT at this stage"); + if (Reg) { - State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, ValVT, LocInfo)); + State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); return false; } - // TODO: Handle arguments passed without register. - return true; + // When a floating-point value is passed on the stack, no bit-cast is needed. + if (ValVT.isFloatingPoint()) { + LocVT = ValVT; + LocInfo = CCValAssign::Full; + } + + State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo)); + return false; } void LoongArchTargetLowering::analyzeInputArgs( - CCState &CCInfo, const SmallVectorImpl &Ins, + MachineFunction &MF, CCState &CCInfo, + const SmallVectorImpl &Ins, bool IsRet, LoongArchCCAssignFn Fn) const { + FunctionType *FType = MF.getFunction().getFunctionType(); for (unsigned i = 0, e = Ins.size(); i != e; ++i) { MVT ArgVT = Ins[i].VT; - - if (Fn(i, ArgVT, CCValAssign::Full, CCInfo)) { + Type *ArgTy = nullptr; + if (IsRet) + ArgTy = FType->getReturnType(); + else if (Ins[i].isOrigArg()) + ArgTy = FType->getParamType(Ins[i].getOrigArgIndex()); + LoongArchABI::ABI ABI = + MF.getSubtarget().getTargetABI(); + if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Ins[i].Flags, + CCInfo, /*IsFixed=*/true, IsRet, ArgTy)) { LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " << EVT(ArgVT).getEVTString() << '\n'); llvm_unreachable(""); @@ -861,12 +2006,16 @@ void LoongArchTargetLowering::analyzeInputArgs( } void LoongArchTargetLowering::analyzeOutputArgs( - CCState &CCInfo, const SmallVectorImpl &Outs, - LoongArchCCAssignFn Fn) const { + MachineFunction &MF, CCState &CCInfo, + const SmallVectorImpl &Outs, bool IsRet, + CallLoweringInfo *CLI, LoongArchCCAssignFn Fn) const { for (unsigned i = 0, e = Outs.size(); i != e; ++i) { MVT ArgVT = Outs[i].VT; - - if (Fn(i, ArgVT, CCValAssign::Full, CCInfo)) { + Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr; + LoongArchABI::ABI ABI = + MF.getSubtarget().getTargetABI(); + if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Outs[i].Flags, + CCInfo, Outs[i].IsFixed, IsRet, OrigTy)) { LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " << EVT(ArgVT).getEVTString() << "\n"); llvm_unreachable(""); @@ -874,17 +2023,126 @@ void LoongArchTargetLowering::analyzeOutputArgs( } } +// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect +// values. +static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, + const CCValAssign &VA, const SDLoc &DL) { + switch (VA.getLocInfo()) { + default: + llvm_unreachable("Unexpected CCValAssign::LocInfo"); + case CCValAssign::Full: + case CCValAssign::Indirect: + break; + case CCValAssign::BCvt: + if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) + Val = DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Val); + else + Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val); + break; + } + return Val; +} + static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL, const LoongArchTargetLowering &TLI) { MachineFunction &MF = DAG.getMachineFunction(); MachineRegisterInfo &RegInfo = MF.getRegInfo(); EVT LocVT = VA.getLocVT(); + SDValue Val; const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT()); Register VReg = RegInfo.createVirtualRegister(RC); RegInfo.addLiveIn(VA.getLocReg(), VReg); + Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT); + + return convertLocVTToValVT(DAG, Val, VA, DL); +} + +// The caller is responsible for loading the full value if the argument is +// passed with CCValAssign::Indirect. +static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, + const CCValAssign &VA, const SDLoc &DL) { + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo &MFI = MF.getFrameInfo(); + EVT ValVT = VA.getValVT(); + int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(), + /*IsImmutable=*/true); + SDValue FIN = DAG.getFrameIndex( + FI, MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0))); + + ISD::LoadExtType ExtType; + switch (VA.getLocInfo()) { + default: + llvm_unreachable("Unexpected CCValAssign::LocInfo"); + case CCValAssign::Full: + case CCValAssign::Indirect: + case CCValAssign::BCvt: + ExtType = ISD::NON_EXTLOAD; + break; + } + return DAG.getExtLoad( + ExtType, DL, VA.getLocVT(), Chain, FIN, + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT); +} + +static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, + const CCValAssign &VA, const SDLoc &DL) { + EVT LocVT = VA.getLocVT(); + + switch (VA.getLocInfo()) { + default: + llvm_unreachable("Unexpected CCValAssign::LocInfo"); + case CCValAssign::Full: + break; + case CCValAssign::BCvt: + if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) + Val = DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Val); + else + Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val); + break; + } + return Val; +} - return DAG.getCopyFromReg(Chain, DL, VReg, LocVT); +static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, + CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State) { + if (LocVT == MVT::i32 || LocVT == MVT::i64) { + // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, SpLim + // s0 s1 s2 s3 s4 s5 s6 s7 s8 + static const MCPhysReg GPRList[] = { + LoongArch::R23, LoongArch::R24, LoongArch::R25, LoongArch::R26, LoongArch::R27, + LoongArch::R28, LoongArch::R29, LoongArch::R30, LoongArch::R31}; + if (unsigned Reg = State.AllocateReg(GPRList)) { + State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + return false; + } + } + + if (LocVT == MVT::f32) { + // Pass in STG registers: F1, F2, F3, F4 + // fs0,fs1,fs2,fs3 + static const MCPhysReg FPR32List[] = {LoongArch::F24, LoongArch::F25, + LoongArch::F26, LoongArch::F27}; + if (unsigned Reg = State.AllocateReg(FPR32List)) { + State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + return false; + } + } + + if (LocVT == MVT::f64) { + // Pass in STG registers: D1, D2, D3, D4 + // fs4,fs5,fs6,fs7 + static const MCPhysReg FPR64List[] = {LoongArch::F28_64, LoongArch::F29_64, + LoongArch::F30_64, LoongArch::F31_64}; + if (unsigned Reg = State.AllocateReg(FPR64List)) { + State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + return false; + } + } + + report_fatal_error("No registers left in GHC calling convention"); + return true; } // Transform physical registers into virtual registers. @@ -899,21 +2157,176 @@ SDValue LoongArchTargetLowering::LowerFormalArguments( default: llvm_unreachable("Unsupported calling convention"); case CallingConv::C: + case CallingConv::Fast: break; + case CallingConv::GHC: + if (!MF.getSubtarget().getFeatureBits()[LoongArch::FeatureBasicF] || + !MF.getSubtarget().getFeatureBits()[LoongArch::FeatureBasicD]) + report_fatal_error( + "GHC calling convention requires the F and D extensions"); } + EVT PtrVT = getPointerTy(DAG.getDataLayout()); + MVT GRLenVT = Subtarget.getGRLenVT(); + unsigned GRLenInBytes = Subtarget.getGRLen() / 8; + // Used with varargs to acumulate store chains. + std::vector OutChains; + // Assign locations to all of the incoming arguments. SmallVector ArgLocs; CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); - analyzeInputArgs(CCInfo, Ins, CC_LoongArch); + if (CallConv == CallingConv::GHC) + CCInfo.AnalyzeFormalArguments(Ins, CC_LoongArch_GHC); + else + analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false, CC_LoongArch); + + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + CCValAssign &VA = ArgLocs[i]; + SDValue ArgValue; + if (VA.isRegLoc()) + ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, *this); + else + ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL); + if (VA.getLocInfo() == CCValAssign::Indirect) { + // If the original argument was split and passed by reference, we need to + // load all parts of it here (using the same address). + InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue, + MachinePointerInfo())); + unsigned ArgIndex = Ins[i].OrigArgIndex; + unsigned ArgPartOffset = Ins[i].PartOffset; + assert(ArgPartOffset == 0); + while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) { + CCValAssign &PartVA = ArgLocs[i + 1]; + unsigned PartOffset = Ins[i + 1].PartOffset - ArgPartOffset; + SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL); + SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset); + InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address, + MachinePointerInfo())); + ++i; + } + continue; + } + InVals.push_back(ArgValue); + } + + if (IsVarArg) { + ArrayRef ArgRegs = makeArrayRef(ArgGPRs); + unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs); + const TargetRegisterClass *RC = &LoongArch::GPRRegClass; + MachineFrameInfo &MFI = MF.getFrameInfo(); + MachineRegisterInfo &RegInfo = MF.getRegInfo(); + auto *LoongArchFI = MF.getInfo(); + + // Offset of the first variable argument from stack pointer, and size of + // the vararg save area. For now, the varargs save area is either zero or + // large enough to hold a0-a7. + int VaArgOffset, VarArgsSaveSize; + + // If all registers are allocated, then all varargs must be passed on the + // stack and we don't need to save any argregs. + if (ArgRegs.size() == Idx) { + VaArgOffset = CCInfo.getNextStackOffset(); + VarArgsSaveSize = 0; + } else { + VarArgsSaveSize = GRLenInBytes * (ArgRegs.size() - Idx); + VaArgOffset = -VarArgsSaveSize; + } + + // Record the frame index of the first variable argument + // which is a value necessary to VASTART. + int FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true); + LoongArchFI->setVarArgsFrameIndex(FI); + + // If saving an odd number of registers then create an extra stack slot to + // ensure that the frame pointer is 2*GRLen-aligned, which in turn ensures + // offsets to even-numbered registered remain 2*GRLen-aligned. + if (Idx % 2) { + MFI.CreateFixedObject(GRLenInBytes, VaArgOffset - (int)GRLenInBytes, + true); + VarArgsSaveSize += GRLenInBytes; + } + + // Copy the integer registers that may have been used for passing varargs + // to the vararg save area. + for (unsigned I = Idx; I < ArgRegs.size(); + ++I, VaArgOffset += GRLenInBytes) { + const Register Reg = RegInfo.createVirtualRegister(RC); + RegInfo.addLiveIn(ArgRegs[I], Reg); + SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, GRLenVT); + FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true); + SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); + SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff, + MachinePointerInfo::getFixedStack(MF, FI)); + cast(Store.getNode()) + ->getMemOperand() + ->setValue((Value *)nullptr); + OutChains.push_back(Store); + } + LoongArchFI->setVarArgsSaveSize(VarArgsSaveSize); + } - for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) - InVals.push_back(unpackFromRegLoc(DAG, Chain, ArgLocs[i], DL, *this)); + // All stores are grouped in one node to allow the matching between + // the size of Ins and InVals. This only happens for vararg functions. + if (!OutChains.empty()) { + OutChains.push_back(Chain); + Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains); + } return Chain; } +bool LoongArchTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { + return CI->isTailCall(); +} + +// Check whether the call is eligible for tail call optimization. +bool LoongArchTargetLowering::isEligibleForTailCallOptimization( + CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF, + const SmallVectorImpl &ArgLocs) const { + + auto CalleeCC = CLI.CallConv; + auto &Outs = CLI.Outs; + auto &Caller = MF.getFunction(); + auto CallerCC = Caller.getCallingConv(); + + // Do not tail call opt if the stack is used to pass parameters. + if (CCInfo.getNextStackOffset() != 0) + return false; + + // Do not tail call opt if any parameters need to be passed indirectly. + for (auto &VA : ArgLocs) + if (VA.getLocInfo() == CCValAssign::Indirect) + return false; + + // Do not tail call opt if either caller or callee uses struct return + // semantics. + auto IsCallerStructRet = Caller.hasStructRetAttr(); + auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet(); + if (IsCallerStructRet || IsCalleeStructRet) + return false; + + // Do not tail call opt if either the callee or caller has a byval argument. + for (auto &Arg : Outs) + if (Arg.Flags.isByVal()) + return false; + + // The callee has to preserve all registers the caller needs to preserve. + const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo(); + const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC); + if (CalleeCC != CallerCC) { + const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC); + if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved)) + return false; + } + return true; +} + +static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG) { + return DAG.getDataLayout().getPrefTypeAlign( + VT.getTypeForEVT(*DAG.getContext())); +} + // Lower a call to a callseq_start + CALL + callseq_end chain, and add input // and output parameter nodes. SDValue @@ -929,10 +2342,8 @@ LoongArchTargetLowering::LowerCall(CallLoweringInfo &CLI, CallingConv::ID CallConv = CLI.CallConv; bool IsVarArg = CLI.IsVarArg; EVT PtrVT = getPointerTy(DAG.getDataLayout()); - CLI.IsTailCall = false; - - if (IsVarArg) - report_fatal_error("LowerCall with varargs not implemented"); + MVT GRLenVT = Subtarget.getGRLenVT(); + bool &IsTailCall = CLI.IsTailCall; MachineFunction &MF = DAG.getMachineFunction(); @@ -940,38 +2351,135 @@ LoongArchTargetLowering::LowerCall(CallLoweringInfo &CLI, SmallVector ArgLocs; CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); - analyzeOutputArgs(ArgCCInfo, Outs, CC_LoongArch); + if (CallConv == CallingConv::GHC) + ArgCCInfo.AnalyzeCallOperands(Outs, CC_LoongArch_GHC); + else + analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI, CC_LoongArch); + + // Check if it's really possible to do a tail call. + if (IsTailCall) + IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs); + + if (IsTailCall) + ++NumTailCalls; + else if (CLI.CB && CLI.CB->isMustTailCall()) + report_fatal_error("failed to perform tail call elimination on a call " + "site marked musttail"); // Get a count of how many bytes are to be pushed on the stack. unsigned NumBytes = ArgCCInfo.getNextStackOffset(); - for (auto &Arg : Outs) { - if (!Arg.Flags.isByVal()) + // Create local copies for byval args. + SmallVector ByValArgs; + for (unsigned i = 0, e = Outs.size(); i != e; ++i) { + ISD::ArgFlagsTy Flags = Outs[i].Flags; + if (!Flags.isByVal()) continue; - report_fatal_error("Passing arguments byval not implemented"); + + SDValue Arg = OutVals[i]; + unsigned Size = Flags.getByValSize(); + Align Alignment = Flags.getNonZeroByValAlign(); + + int FI = + MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false); + SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); + SDValue SizeNode = DAG.getConstant(Size, DL, GRLenVT); + + Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment, + /*IsVolatile=*/false, + /*AlwaysInline=*/false, /*isTailCall=*/IsTailCall, + MachinePointerInfo(), MachinePointerInfo()); + ByValArgs.push_back(FIPtr); } - Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL); + if (!IsTailCall) + Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL); // Copy argument values to their designated locations. SmallVector> RegsToPass; - for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + SmallVector MemOpChains; + SDValue StackPtr; + for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; SDValue ArgValue = OutVals[i]; + ISD::ArgFlagsTy Flags = Outs[i].Flags; // Promote the value if needed. - // For now, only handle fully promoted arguments. - if (VA.getLocInfo() != CCValAssign::Full) - report_fatal_error("Unknown loc info"); + // For now, only handle fully promoted and indirect arguments. + if (VA.getLocInfo() == CCValAssign::Indirect) { + // Store the argument in a stack slot and pass its address. + Align StackAlign = + std::max(getPrefTypeAlign(Outs[i].ArgVT, DAG), + getPrefTypeAlign(ArgValue.getValueType(), DAG)); + TypeSize StoredSize = ArgValue.getValueType().getStoreSize(); + // If the original argument was split and passed by reference, we need to + // store the required parts of it here (and pass just one address). + unsigned ArgIndex = Outs[i].OrigArgIndex; + unsigned ArgPartOffset = Outs[i].PartOffset; + assert(ArgPartOffset == 0); + // Calculate the total size to store. We don't have access to what we're + // actually storing other than performing the loop and collecting the + // info. + SmallVector> Parts; + while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) { + SDValue PartValue = OutVals[i + 1]; + unsigned PartOffset = Outs[i + 1].PartOffset - ArgPartOffset; + SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL); + EVT PartVT = PartValue.getValueType(); + + StoredSize += PartVT.getStoreSize(); + StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG)); + Parts.push_back(std::make_pair(PartValue, Offset)); + ++i; + } + SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign); + int FI = cast(SpillSlot)->getIndex(); + MemOpChains.push_back( + DAG.getStore(Chain, DL, ArgValue, SpillSlot, + MachinePointerInfo::getFixedStack(MF, FI))); + for (const auto &Part : Parts) { + SDValue PartValue = Part.first; + SDValue PartOffset = Part.second; + SDValue Address = + DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset); + MemOpChains.push_back( + DAG.getStore(Chain, DL, PartValue, Address, + MachinePointerInfo::getFixedStack(MF, FI))); + } + ArgValue = SpillSlot; + } else { + ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL); + } + + // Use local copy if it is a byval arg. + if (Flags.isByVal()) + ArgValue = ByValArgs[j++]; if (VA.isRegLoc()) { // Queue up the argument copies and emit them at the end. RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue)); } else { - report_fatal_error("Passing arguments via the stack not implemented"); + assert(VA.isMemLoc() && "Argument not register or memory"); + assert(!IsTailCall && "Tail call not allowed if stack is used " + "for passing parameters"); + + // Work out the address of the stack slot. + if (!StackPtr.getNode()) + StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT); + SDValue Address = + DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, + DAG.getIntPtrConstant(VA.getLocMemOffset(), DL)); + + // Emit the store. + MemOpChains.push_back( + DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo())); } } + // Join the stores, which are independent of one another. + if (!MemOpChains.empty()) + Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains); + SDValue Glue; // Build a sequence of copy-to-reg nodes, chained and glued together. @@ -983,11 +2491,20 @@ LoongArchTargetLowering::LowerCall(CallLoweringInfo &CLI, // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't // split it and then direct call can be matched by PseudoCALL. - // FIXME: Add target flags for relocation. - if (GlobalAddressSDNode *S = dyn_cast(Callee)) - Callee = DAG.getTargetGlobalAddress(S->getGlobal(), DL, PtrVT); - else if (ExternalSymbolSDNode *S = dyn_cast(Callee)) - Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT); + if (GlobalAddressSDNode *S = dyn_cast(Callee)) { + const GlobalValue *GV = S->getGlobal(); + unsigned OpFlags = + getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV) + ? LoongArchII::MO_CALL + : LoongArchII::MO_CALL_PLT; + Callee = DAG.getTargetGlobalAddress(S->getGlobal(), DL, PtrVT, 0, OpFlags); + } else if (ExternalSymbolSDNode *S = dyn_cast(Callee)) { + unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal( + *MF.getFunction().getParent(), nullptr) + ? LoongArchII::MO_CALL + : LoongArchII::MO_CALL_PLT; + Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags); + } // The first call operand is the chain and the second is the target address. SmallVector Ops; @@ -999,11 +2516,13 @@ LoongArchTargetLowering::LowerCall(CallLoweringInfo &CLI, for (auto &Reg : RegsToPass) Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType())); - // Add a register mask operand representing the call-preserved registers. - const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); - const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv); - assert(Mask && "Missing call preserved mask for calling convention"); - Ops.push_back(DAG.getRegisterMask(Mask)); + if (!IsTailCall) { + // Add a register mask operand representing the call-preserved registers. + const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); + const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv); + assert(Mask && "Missing call preserved mask for calling convention"); + Ops.push_back(DAG.getRegisterMask(Mask)); + } // Glue the call to the argument copies, if any. if (Glue.getNode()) @@ -1012,6 +2531,11 @@ LoongArchTargetLowering::LowerCall(CallLoweringInfo &CLI, // Emit the call. SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + if (IsTailCall) { + MF.getFrameInfo().setHasTailCall(); + return DAG.getNode(LoongArchISD::TAIL, DL, NodeTys, Ops); + } + Chain = DAG.getNode(LoongArchISD::CALL, DL, NodeTys, Ops); DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge); Glue = Chain.getValue(1); @@ -1024,17 +2548,20 @@ LoongArchTargetLowering::LowerCall(CallLoweringInfo &CLI, // Assign locations to each value returned by this call. SmallVector RVLocs; CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext()); - analyzeInputArgs(RetCCInfo, Ins, CC_LoongArch); + analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_LoongArch); // Copy all of the result registers out of their specified physreg. for (auto &VA : RVLocs) { // Copy the value out. SDValue RetValue = DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue); + // Glue the RetValue to the end of the call sequence. Chain = RetValue.getValue(1); Glue = RetValue.getValue(2); - InVals.push_back(Chain.getValue(0)); + RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL); + + InVals.push_back(RetValue); } return Chain; @@ -1043,9 +2570,18 @@ LoongArchTargetLowering::LowerCall(CallLoweringInfo &CLI, bool LoongArchTargetLowering::CanLowerReturn( CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, const SmallVectorImpl &Outs, LLVMContext &Context) const { - // Any return value split in to more than two values can't be returned - // directly. - return Outs.size() <= 2; + SmallVector RVLocs; + CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context); + + for (unsigned i = 0, e = Outs.size(); i != e; ++i) { + LoongArchABI::ABI ABI = + MF.getSubtarget().getTargetABI(); + if (CC_LoongArch(MF.getDataLayout(), ABI, i, Outs[i].VT, CCValAssign::Full, + Outs[i].Flags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true, + nullptr)) + return false; + } + return true; } SDValue LoongArchTargetLowering::LowerReturn( @@ -1060,8 +2596,10 @@ SDValue LoongArchTargetLowering::LowerReturn( CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs, *DAG.getContext()); - analyzeOutputArgs(CCInfo, Outs, CC_LoongArch); - + analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true, + nullptr, CC_LoongArch); + if (CallConv == CallingConv::GHC && !RVLocs.empty()) + report_fatal_error("GHC functions return void only"); SDValue Glue; SmallVector RetOps(1, Chain); @@ -1071,7 +2609,8 @@ SDValue LoongArchTargetLowering::LowerReturn( assert(VA.isRegLoc() && "Can only return in registers!"); // Handle a 'normal' return. - Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), OutVals[i], Glue); + SDValue Val = convertValVTToLocVT(DAG, OutVals[i], VA, DL); + Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue); // Guarantee that all emitted copies are stuck together. Glue = Chain.getValue(1); @@ -1089,11 +2628,433 @@ SDValue LoongArchTargetLowering::LowerReturn( bool LoongArchTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const { - assert((VT == MVT::f32 || VT == MVT::f64) && "Unexpected VT"); - + // TODO: Maybe need more checks here after vector extension is supported. if (VT == MVT::f32 && !Subtarget.hasBasicF()) return false; if (VT == MVT::f64 && !Subtarget.hasBasicD()) return false; return (Imm.isZero() || Imm.isExactlyValue(+1.0)); } + +bool LoongArchTargetLowering::isCheapToSpeculateCttz() const { + return true; +} + +bool LoongArchTargetLowering::isCheapToSpeculateCtlz() const { + return true; +} + +bool LoongArchTargetLowering::shouldInsertFencesForAtomic( + const Instruction *I) const { + if (!Subtarget.is64Bit()) + return isa(I) || isa(I); + + if (isa(I)) + return true; + + // On LA64, atomic store operations with IntegerBitWidth of 32 and 64 do not + // require fences beacuse we can use amswap_db.[w/d]. + if (isa(I)) { + unsigned Size = I->getOperand(0)->getType()->getIntegerBitWidth(); + return (Size == 8 || Size == 16); + } + + return false; +} + +EVT LoongArchTargetLowering::getSetCCResultType(const DataLayout &DL, + LLVMContext &Context, + EVT VT) const { + if (!VT.isVector()) + return getPointerTy(DL); + return VT.changeVectorElementTypeToInteger(); +} + +bool LoongArchTargetLowering::hasAndNot(SDValue Y) const { + // TODO: Support vectors. + return Y.getValueType().isScalarInteger() && !isa(Y); +} + +bool LoongArchTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, + const CallInst &I, + MachineFunction &MF, + unsigned Intrinsic) const { + switch (Intrinsic) { + default: + return false; + case Intrinsic::loongarch_masked_atomicrmw_xchg_i32: + case Intrinsic::loongarch_masked_atomicrmw_add_i32: + case Intrinsic::loongarch_masked_atomicrmw_sub_i32: + case Intrinsic::loongarch_masked_atomicrmw_nand_i32: + Info.opc = ISD::INTRINSIC_W_CHAIN; + Info.memVT = MVT::i32; + Info.ptrVal = I.getArgOperand(0); + Info.offset = 0; + Info.align = Align(4); + Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore | + MachineMemOperand::MOVolatile; + return true; + // TODO: Add more Intrinsics later. + } +} + +TargetLowering::AtomicExpansionKind +LoongArchTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { + // TODO: Add more AtomicRMWInst that needs to be extended. + + // Since floating-point operation requires a non-trivial set of data + // operations, use CmpXChg to expand. + if (AI->isFloatingPointOperation()) + return AtomicExpansionKind::CmpXChg; + + unsigned Size = AI->getType()->getPrimitiveSizeInBits(); + if (Size == 8 || Size == 16) + return AtomicExpansionKind::MaskedIntrinsic; + return AtomicExpansionKind::None; +} + +static Intrinsic::ID +getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen, + AtomicRMWInst::BinOp BinOp) { + if (GRLen == 64) { + switch (BinOp) { + default: + llvm_unreachable("Unexpected AtomicRMW BinOp"); + case AtomicRMWInst::Xchg: + return Intrinsic::loongarch_masked_atomicrmw_xchg_i64; + case AtomicRMWInst::Add: + return Intrinsic::loongarch_masked_atomicrmw_add_i64; + case AtomicRMWInst::Sub: + return Intrinsic::loongarch_masked_atomicrmw_sub_i64; + case AtomicRMWInst::Nand: + return Intrinsic::loongarch_masked_atomicrmw_nand_i64; + case AtomicRMWInst::UMax: + return Intrinsic::loongarch_masked_atomicrmw_umax_i64; + case AtomicRMWInst::UMin: + return Intrinsic::loongarch_masked_atomicrmw_umin_i64; + case AtomicRMWInst::Max: + return Intrinsic::loongarch_masked_atomicrmw_max_i64; + case AtomicRMWInst::Min: + return Intrinsic::loongarch_masked_atomicrmw_min_i64; + // TODO: support other AtomicRMWInst. + } + } + + if (GRLen == 32) { + switch (BinOp) { + default: + llvm_unreachable("Unexpected AtomicRMW BinOp"); + case AtomicRMWInst::Xchg: + return Intrinsic::loongarch_masked_atomicrmw_xchg_i32; + case AtomicRMWInst::Add: + return Intrinsic::loongarch_masked_atomicrmw_add_i32; + case AtomicRMWInst::Sub: + return Intrinsic::loongarch_masked_atomicrmw_sub_i32; + case AtomicRMWInst::Nand: + return Intrinsic::loongarch_masked_atomicrmw_nand_i32; + // TODO: support other AtomicRMWInst. + } + } + + llvm_unreachable("Unexpected GRLen\n"); +} + +TargetLowering::AtomicExpansionKind +LoongArchTargetLowering::shouldExpandAtomicCmpXchgInIR( + AtomicCmpXchgInst *CI) const { + unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits(); + if (Size == 8 || Size == 16) + return AtomicExpansionKind::MaskedIntrinsic; + return AtomicExpansionKind::None; +} + +Value *LoongArchTargetLowering::emitMaskedAtomicCmpXchgIntrinsic( + IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, + Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const { + Value *Ordering = + Builder.getIntN(Subtarget.getGRLen(), static_cast(Ord)); + + // TODO: Support cmpxchg on LA32. + Intrinsic::ID CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i64; + CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty()); + NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty()); + Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty()); + Type *Tys[] = {AlignedAddr->getType()}; + Function *MaskedCmpXchg = + Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys); + Value *Result = Builder.CreateCall( + MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering}); + Result = Builder.CreateTrunc(Result, Builder.getInt32Ty()); + return Result; +} + +Value *LoongArchTargetLowering::emitMaskedAtomicRMWIntrinsic( + IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, + Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const { + unsigned GRLen = Subtarget.getGRLen(); + Value *Ordering = + Builder.getIntN(GRLen, static_cast(AI->getOrdering())); + Type *Tys[] = {AlignedAddr->getType()}; + Function *LlwOpScwLoop = Intrinsic::getDeclaration( + AI->getModule(), + getIntrinsicForMaskedAtomicRMWBinOp(GRLen, AI->getOperation()), Tys); + + if (GRLen == 64) { + Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty()); + Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty()); + ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty()); + } + + Value *Result; + + // Must pass the shift amount needed to sign extend the loaded value prior + // to performing a signed comparison for min/max. ShiftAmt is the number of + // bits to shift the value into position. Pass GRLen-ShiftAmt-ValWidth, which + // is the number of bits to left+right shift the value in order to + // sign-extend. + if (AI->getOperation() == AtomicRMWInst::Min || + AI->getOperation() == AtomicRMWInst::Max) { + const DataLayout &DL = AI->getModule()->getDataLayout(); + unsigned ValWidth = + DL.getTypeStoreSizeInBits(AI->getValOperand()->getType()); + Value *SextShamt = + Builder.CreateSub(Builder.getIntN(GRLen, GRLen - ValWidth), ShiftAmt); + Result = Builder.CreateCall(LlwOpScwLoop, + {AlignedAddr, Incr, Mask, SextShamt, Ordering}); + } else { + Result = + Builder.CreateCall(LlwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering}); + } + + if (GRLen == 64) + Result = Builder.CreateTrunc(Result, Builder.getInt32Ty()); + return Result; +} + +bool LoongArchTargetLowering::isFMAFasterThanFMulAndFAdd( + const MachineFunction &MF, EVT VT) const { + VT = VT.getScalarType(); + + if (!VT.isSimple()) + return false; + + switch (VT.getSimpleVT().SimpleTy) { + case MVT::f32: + case MVT::f64: + return true; + default: + break; + } + + return false; +} + +Register LoongArchTargetLowering::getExceptionPointerRegister( + const Constant *PersonalityFn) const { + return LoongArch::R4; +} + +Register LoongArchTargetLowering::getExceptionSelectorRegister( + const Constant *PersonalityFn) const { + return LoongArch::R5; +} + +//===----------------------------------------------------------------------===// +// LoongArch Inline Assembly Support +//===----------------------------------------------------------------------===// + +LoongArchTargetLowering::ConstraintType +LoongArchTargetLowering::getConstraintType(StringRef Constraint) const { + // LoongArch specific constraints in GCC: config/loongarch/constraints.md + // + // 'f': A floating-point register (if available). + // 'k': A memory operand whose address is formed by a base register and + // (optionally scaled) index register. + // 'l': A signed 16-bit constant. + // 'm': A memory operand whose address is formed by a base register and + // offset that is suitable for use in instructions with the same + // addressing mode as st.w and ld.w. + // 'I': A signed 12-bit constant (for arithmetic instructions). + // 'J': Integer zero. + // 'K': An unsigned 12-bit constant (for logic instructions). + // "ZB": An address that is held in a general-purpose register. The offset is + // zero. + // "ZC": A memory operand whose address is formed by a base register and + // offset that is suitable for use in instructions with the same + // addressing mode as ll.w and sc.w. + if (Constraint.size() == 1) { + switch (Constraint[0]) { + default: + break; + case 'f': + return C_RegisterClass; + case 'l': + case 'I': + case 'J': + case 'K': + return C_Immediate; + case 'k': + return C_Memory; + } + } + + if (Constraint == "ZC" || Constraint == "ZB") + return C_Memory; + + // 'm' is handled here. + return TargetLowering::getConstraintType(Constraint); +} + +unsigned LoongArchTargetLowering::getInlineAsmMemConstraint( + StringRef ConstraintCode) const { + return StringSwitch(ConstraintCode) + .Case("k", InlineAsm::Constraint_k) + .Case("ZB", InlineAsm::Constraint_ZB) + .Case("ZC", InlineAsm::Constraint_ZC) + .Default(TargetLowering::getInlineAsmMemConstraint(ConstraintCode)); +} + +std::pair +LoongArchTargetLowering::getRegForInlineAsmConstraint( + const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const { + // First, see if this is a constraint that directly corresponds to a LoongArch + // register class. + if (Constraint.size() == 1) { + switch (Constraint[0]) { + case 'r': + // TODO: Support fixed vectors up to GRLen? + if (VT.isVector()) + break; + return std::make_pair(0U, &LoongArch::GPRRegClass); + case 'f': + if (Subtarget.hasBasicF() && VT == MVT::f32) + return std::make_pair(0U, &LoongArch::FPR32RegClass); + if (Subtarget.hasBasicD() && VT == MVT::f64) + return std::make_pair(0U, &LoongArch::FPR64RegClass); + break; + default: + break; + } + } + + // TargetLowering::getRegForInlineAsmConstraint uses the name of the TableGen + // record (e.g. the "R0" in `def R0`) to choose registers for InlineAsm + // constraints while the official register name is prefixed with a '$'. So we + // clip the '$' from the original constraint string (e.g. {$r0} to {r0}.) + // before it being parsed. And TargetLowering::getRegForInlineAsmConstraint is + // case insensitive, so no need to convert the constraint to upper case here. + // + // For now, no need to support ABI names (e.g. `$a0`) as clang will correctly + // decode the usage of register name aliases into their official names. And + // AFAIK, the not yet upstreamed `rustc` for LoongArch will always use + // official register names. + if (Constraint.startswith("{$r") || Constraint.startswith("{$f")) { + bool IsFP = Constraint[2] == 'f'; + std::pair Temp = Constraint.split('$'); + std::pair R; + R = TargetLowering::getRegForInlineAsmConstraint( + TRI, join_items("", Temp.first, Temp.second), VT); + // Match those names to the widest floating point register type available. + if (IsFP) { + unsigned RegNo = R.first; + if (LoongArch::F0 <= RegNo && RegNo <= LoongArch::F31) { + if (Subtarget.hasBasicD() && (VT == MVT::f64 || VT == MVT::Other)) { + unsigned DReg = RegNo - LoongArch::F0 + LoongArch::F0_64; + return std::make_pair(DReg, &LoongArch::FPR64RegClass); + } + } + } + return R; + } + + return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); +} + +void LoongArchTargetLowering::LowerAsmOperandForConstraint( + SDValue Op, std::string &Constraint, std::vector &Ops, + SelectionDAG &DAG) const { + // Currently only support length 1 constraints. + if (Constraint.length() == 1) { + switch (Constraint[0]) { + case 'l': + // Validate & create a 16-bit signed immediate operand. + if (auto *C = dyn_cast(Op)) { + uint64_t CVal = C->getSExtValue(); + if (isInt<16>(CVal)) + Ops.push_back( + DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT())); + } + return; + case 'I': + // Validate & create a 12-bit signed immediate operand. + if (auto *C = dyn_cast(Op)) { + uint64_t CVal = C->getSExtValue(); + if (isInt<12>(CVal)) + Ops.push_back( + DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT())); + } + return; + case 'J': + // Validate & create an integer zero operand. + if (auto *C = dyn_cast(Op)) + if (C->getZExtValue() == 0) + Ops.push_back( + DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getGRLenVT())); + return; + case 'K': + // Validate & create a 12-bit unsigned immediate operand. + if (auto *C = dyn_cast(Op)) { + uint64_t CVal = C->getZExtValue(); + if (isUInt<12>(CVal)) + Ops.push_back( + DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT())); + } + return; + default: + break; + } + } + TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); +} + +#define GET_REGISTER_MATCHER +#include "LoongArchGenAsmMatcher.inc" + +Register +LoongArchTargetLowering::getRegisterByName(const char *RegName, LLT VT, + const MachineFunction &MF) const { + std::pair Name = StringRef(RegName).split('$'); + std::string NewRegName = Name.second.str(); + Register Reg = MatchRegisterAltName(NewRegName); + if (Reg == LoongArch::NoRegister) + Reg = MatchRegisterName(NewRegName); + if (Reg == LoongArch::NoRegister) + report_fatal_error( + Twine("Invalid register name \"" + StringRef(RegName) + "\".")); + BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF); + if (!ReservedRegs.test(Reg)) + report_fatal_error(Twine("Trying to obtain non-reserved register \"" + + StringRef(RegName) + "\".")); + return Reg; +} + +bool LoongArchTargetLowering::decomposeMulByConstant(LLVMContext &Context, + EVT VT, SDValue C) const { + // TODO: Support vectors. + if (!VT.isScalarInteger()) + return false; + + // Omit the optimization if the data size exceeds GRLen. + if (VT.getSizeInBits() > Subtarget.getGRLen()) + return false; + + // Break MUL into (SLLI + ADD/SUB) or ALSL. + if (auto *ConstNode = dyn_cast(C.getNode())) { + const APInt &Imm = ConstNode->getAPIntValue(); + if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() || + (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2()) + return true; + } + + return false; +} diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h index 141f1fd3a55db1e5a21d07d41a0b7bc4c138cd11..f6a2b2dfce2220462ae967d66fef98e3ea2a54f3 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h @@ -29,21 +29,88 @@ enum NodeType : unsigned { // TODO: add more LoongArchISDs CALL, RET, + TAIL, + // 32-bit shifts, directly matching the semantics of the named LoongArch // instructions. SLL_W, SRA_W, SRL_W, + ROTL_W, + ROTR_W, + // FPR<->GPR transfer operations MOVGR2FR_W_LA64, MOVFR2GR_S_LA64, + MOVFCSR2GR, + MOVGR2FCSR, FTINT, + // Bit counting operations + CLZ_W, + CTZ_W, + BSTRINS, BSTRPICK, + // Byte-swapping and bit-reversal + REVB_2H, + REVB_2W, + BITREV_4B, + BITREV_W, + + // Intrinsic operations start ============================================ + BREAK, + CACOP_D, + CACOP_W, + DBAR, + IBAR, + SYSCALL, + + // CRC check operations + CRC_W_B_W, + CRC_W_H_W, + CRC_W_W_W, + CRC_W_D_W, + CRCC_W_B_W, + CRCC_W_H_W, + CRCC_W_W_W, + CRCC_W_D_W, + + CSRRD, + + // Write new value to CSR and return old value. + // Operand 0: A chain pointer. + // Operand 1: The new value to write. + // Operand 2: The address of the required CSR. + // Result 0: The old value of the CSR. + // Result 1: The new chain pointer. + CSRWR, + + // Similar to CSRWR but with a write mask. + // Operand 0: A chain pointer. + // Operand 1: The new value to write. + // Operand 2: The write mask. + // Operand 3: The address of the required CSR. + // Result 0: The old value of the CSR. + // Result 1: The new chain pointer. + CSRXCHG, + + // IOCSR access operations + IOCSRRD_B, + IOCSRRD_W, + IOCSRRD_H, + IOCSRRD_D, + IOCSRWR_B, + IOCSRWR_H, + IOCSRWR_W, + IOCSRWR_D, + + // Read CPU configuration information operation + CPUCFG, + // Intrinsic operations end ============================================= }; } // end namespace LoongArchISD @@ -56,6 +123,8 @@ public: const LoongArchSubtarget &getSubtarget() const { return Subtarget; } + bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; + // Provide custom lowering hooks for some operations. SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; void ReplaceNodeResults(SDNode *N, SmallVectorImpl &Results, @@ -82,21 +151,82 @@ public: SelectionDAG &DAG) const override; SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl &InVals) const override; + bool isCheapToSpeculateCttz() const override; + bool isCheapToSpeculateCtlz() const override; + bool hasAndNot(SDValue Y) const override; + TargetLowering::AtomicExpansionKind + shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; + + Value *emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, + Value *AlignedAddr, Value *Incr, + Value *Mask, Value *ShiftAmt, + AtomicOrdering Ord) const override; + + EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, + EVT VT) const override; + TargetLowering::AtomicExpansionKind + shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CI) const override; + Value *emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, + AtomicCmpXchgInst *CI, + Value *AlignedAddr, Value *CmpVal, + Value *NewVal, Value *Mask, + AtomicOrdering Ord) const override; + + bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, + MachineFunction &MF, + unsigned Intrinsic) const override; + + bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, + EVT VT) const override; + + Register + getExceptionPointerRegister(const Constant *PersonalityFn) const override; + + Register + getExceptionSelectorRegister(const Constant *PersonalityFn) const override; + + ISD::NodeType getExtendForAtomicOps() const override { + return ISD::SIGN_EXTEND; + } + + Register getRegisterByName(const char *RegName, LLT VT, + const MachineFunction &MF) const override; + bool mayBeEmittedAsTailCall(const CallInst *CI) const override; + + bool decomposeMulByConstant(LLVMContext &Context, EVT VT, + SDValue C) const override; + + bool allowsMisalignedMemoryAccesses( + EVT VT, unsigned AddrSpace = 0, Align Alignment = Align(1), + MachineMemOperand::Flags Flags = MachineMemOperand::MONone, + bool *Fast = nullptr) const override; private: /// Target-specific function used to lower LoongArch calling conventions. - typedef bool LoongArchCCAssignFn(unsigned ValNo, MVT ValVT, + typedef bool LoongArchCCAssignFn(const DataLayout &DL, LoongArchABI::ABI ABI, + unsigned ValNo, MVT ValVT, CCValAssign::LocInfo LocInfo, - CCState &State); + ISD::ArgFlagsTy ArgFlags, CCState &State, + bool IsFixed, bool IsReg, Type *OrigTy); - void analyzeInputArgs(CCState &CCInfo, - const SmallVectorImpl &Ins, + void analyzeInputArgs(MachineFunction &MF, CCState &CCInfo, + const SmallVectorImpl &Ins, bool IsRet, LoongArchCCAssignFn Fn) const; - void analyzeOutputArgs(CCState &CCInfo, + void analyzeOutputArgs(MachineFunction &MF, CCState &CCInfo, const SmallVectorImpl &Outs, + bool IsRet, CallLoweringInfo *CLI, LoongArchCCAssignFn Fn) const; + template + SDValue getAddr(NodeTy *N, SelectionDAG &DAG, bool IsLocal = true) const; + SDValue getStaticTLSAddr(GlobalAddressSDNode *N, SelectionDAG &DAG, + unsigned Opc) const; + SDValue getDynamicTLSAddr(GlobalAddressSDNode *N, SelectionDAG &DAG, + unsigned Opc) const; SDValue lowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerJumpTable(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; SDValue lowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const; SDValue lowerShiftRightParts(SDValue Op, SelectionDAG &DAG, bool IsSRA) const; @@ -104,16 +234,39 @@ private: EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override; SDValue lowerConstantPool(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerEH_DWARF_CFA(SDValue Op, SelectionDAG &DAG) const; SDValue lowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const; SDValue lowerBITCAST(SDValue Op, SelectionDAG &DAG) const; SDValue lowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerVASTART(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerWRITE_REGISTER(SDValue Op, SelectionDAG &DAG) const; bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override; - bool shouldInsertFencesForAtomic(const Instruction *I) const override { - return isa(I) || isa(I); - } + bool shouldInsertFencesForAtomic(const Instruction *I) const override; + + ConstraintType getConstraintType(StringRef Constraint) const override; + + unsigned getInlineAsmMemConstraint(StringRef ConstraintCode) const override; + + std::pair + getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, + StringRef Constraint, MVT VT) const override; + + void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, + std::vector &Ops, + SelectionDAG &DAG) const override; + + bool isEligibleForTailCallOptimization( + CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF, + const SmallVectorImpl &ArgLocs) const; }; } // end namespace llvm diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp index bcbd4b28f3c714f5c6dbabe6a6a5b65e656807bb..0145e3c765875bdf8e34a55709c548c71b0de9f3 100644 --- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp @@ -13,6 +13,11 @@ #include "LoongArchInstrInfo.h" #include "LoongArch.h" #include "LoongArchMachineFunctionInfo.h" +#include "LoongArchRegisterInfo.h" +#include "MCTargetDesc/LoongArchMCTargetDesc.h" +#include "MCTargetDesc/LoongArchMatInt.h" +#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/MC/MCInstBuilder.h" using namespace llvm; @@ -21,7 +26,15 @@ using namespace llvm; LoongArchInstrInfo::LoongArchInstrInfo(LoongArchSubtarget &STI) : LoongArchGenInstrInfo(LoongArch::ADJCALLSTACKDOWN, - LoongArch::ADJCALLSTACKUP) {} + LoongArch::ADJCALLSTACKUP), + STI(STI) {} + +MCInst LoongArchInstrInfo::getNop() const { + return MCInstBuilder(LoongArch::ANDI) + .addReg(LoongArch::R0) + .addReg(LoongArch::R0) + .addImm(0); +} void LoongArchInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, @@ -34,6 +47,21 @@ void LoongArchInstrInfo::copyPhysReg(MachineBasicBlock &MBB, return; } + // GPR->CFR copy. + if (LoongArch::CFRRegClass.contains(DstReg) && + LoongArch::GPRRegClass.contains(SrcReg)) { + BuildMI(MBB, MBBI, DL, get(LoongArch::MOVGR2CF), DstReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + return; + } + // CFR->GPR copy. + if (LoongArch::GPRRegClass.contains(DstReg) && + LoongArch::CFRRegClass.contains(SrcReg)) { + BuildMI(MBB, MBBI, DL, get(LoongArch::MOVCF2GR), DstReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + return; + } + // FPR->FPR copies. unsigned Opc; if (LoongArch::FPR32RegClass.contains(DstReg, SrcReg)) { @@ -68,6 +96,8 @@ void LoongArchInstrInfo::storeRegToStackSlot( Opcode = LoongArch::FST_S; else if (LoongArch::FPR64RegClass.hasSubClassEq(RC)) Opcode = LoongArch::FST_D; + else if (LoongArch::CFRRegClass.hasSubClassEq(RC)) + Opcode = LoongArch::PseudoST_CFR; else llvm_unreachable("Can't store this register to stack slot"); @@ -101,6 +131,8 @@ void LoongArchInstrInfo::loadRegFromStackSlot( Opcode = LoongArch::FLD_S; else if (LoongArch::FPR64RegClass.hasSubClassEq(RC)) Opcode = LoongArch::FLD_D; + else if (LoongArch::CFRRegClass.hasSubClassEq(RC)) + Opcode = LoongArch::PseudoLD_CFR; else llvm_unreachable("Can't load this register from stack slot"); @@ -113,3 +145,349 @@ void LoongArchInstrInfo::loadRegFromStackSlot( .addImm(0) .addMemOperand(MMO); } + +void LoongArchInstrInfo::movImm(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, Register DstReg, + uint64_t Val, MachineInstr::MIFlag Flag) const { + Register SrcReg = LoongArch::R0; + + if (!STI.is64Bit() && !isInt<32>(Val)) + report_fatal_error("Should only materialize 32-bit constants for LA32"); + + auto Seq = LoongArchMatInt::generateInstSeq(Val); + assert(!Seq.empty()); + + for (auto &Inst : Seq) { + switch (Inst.Opc) { + case LoongArch::LU12I_W: + BuildMI(MBB, MBBI, DL, get(Inst.Opc), DstReg) + .addImm(Inst.Imm) + .setMIFlag(Flag); + break; + case LoongArch::ADDI_W: + case LoongArch::ORI: + case LoongArch::LU32I_D: // "rj" is needed due to InstrInfo pattern + case LoongArch::LU52I_D: + BuildMI(MBB, MBBI, DL, get(Inst.Opc), DstReg) + .addReg(SrcReg, RegState::Kill) + .addImm(Inst.Imm) + .setMIFlag(Flag); + break; + default: + assert(false && "Unknown insn emitted by LoongArchMatInt"); + } + + // Only the first instruction has $zero as its source. + SrcReg = DstReg; + } +} + +unsigned LoongArchInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { + unsigned Opcode = MI.getOpcode(); + + if (Opcode == TargetOpcode::INLINEASM || + Opcode == TargetOpcode::INLINEASM_BR) { + const MachineFunction *MF = MI.getParent()->getParent(); + const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo(); + return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI); + } + return MI.getDesc().getSize(); +} + +MachineBasicBlock * +LoongArchInstrInfo::getBranchDestBlock(const MachineInstr &MI) const { + assert(MI.getDesc().isBranch() && "Unexpected opcode!"); + // The branch target is always the last operand. + return MI.getOperand(MI.getNumExplicitOperands() - 1).getMBB(); +} + +static void parseCondBranch(MachineInstr &LastInst, MachineBasicBlock *&Target, + SmallVectorImpl &Cond) { + // Block ends with fall-through condbranch. + assert(LastInst.getDesc().isConditionalBranch() && + "Unknown conditional branch"); + int NumOp = LastInst.getNumExplicitOperands(); + Target = LastInst.getOperand(NumOp - 1).getMBB(); + + Cond.push_back(MachineOperand::CreateImm(LastInst.getOpcode())); + for (int i = 0; i < NumOp - 1; i++) + Cond.push_back(LastInst.getOperand(i)); +} + +bool LoongArchInstrInfo::analyzeBranch(MachineBasicBlock &MBB, + MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl &Cond, + bool AllowModify) const { + TBB = FBB = nullptr; + Cond.clear(); + + // If the block has no terminators, it just falls into the block after it. + MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); + if (I == MBB.end() || !isUnpredicatedTerminator(*I)) + return false; + + // Count the number of terminators and find the first unconditional or + // indirect branch. + MachineBasicBlock::iterator FirstUncondOrIndirectBr = MBB.end(); + int NumTerminators = 0; + for (auto J = I.getReverse(); J != MBB.rend() && isUnpredicatedTerminator(*J); + J++) { + NumTerminators++; + if (J->getDesc().isUnconditionalBranch() || + J->getDesc().isIndirectBranch()) { + FirstUncondOrIndirectBr = J.getReverse(); + } + } + + // If AllowModify is true, we can erase any terminators after + // FirstUncondOrIndirectBR. + if (AllowModify && FirstUncondOrIndirectBr != MBB.end()) { + while (std::next(FirstUncondOrIndirectBr) != MBB.end()) { + std::next(FirstUncondOrIndirectBr)->eraseFromParent(); + NumTerminators--; + } + I = FirstUncondOrIndirectBr; + } + + // Handle a single unconditional branch. + if (NumTerminators == 1 && I->getDesc().isUnconditionalBranch()) { + TBB = getBranchDestBlock(*I); + return false; + } + + // Handle a single conditional branch. + if (NumTerminators == 1 && I->getDesc().isConditionalBranch()) { + parseCondBranch(*I, TBB, Cond); + return false; + } + + // Handle a conditional branch followed by an unconditional branch. + if (NumTerminators == 2 && std::prev(I)->getDesc().isConditionalBranch() && + I->getDesc().isUnconditionalBranch()) { + parseCondBranch(*std::prev(I), TBB, Cond); + FBB = getBranchDestBlock(*I); + return false; + } + + // Otherwise, we can't handle this. + return true; +} + +bool LoongArchInstrInfo::isBranchOffsetInRange(unsigned BranchOp, + int64_t BrOffset) const { + switch (BranchOp) { + default: + llvm_unreachable("Unknown branch instruction!"); + case LoongArch::BEQ: + case LoongArch::BNE: + case LoongArch::BLT: + case LoongArch::BGE: + case LoongArch::BLTU: + case LoongArch::BGEU: + return isInt<18>(BrOffset); + case LoongArch::BEQZ: + case LoongArch::BNEZ: + case LoongArch::BCEQZ: + case LoongArch::BCNEZ: + return isInt<23>(BrOffset); + case LoongArch::B: + case LoongArch::PseudoBR: + return isInt<28>(BrOffset); + } +} + +unsigned LoongArchInstrInfo::removeBranch(MachineBasicBlock &MBB, + int *BytesRemoved) const { + if (BytesRemoved) + *BytesRemoved = 0; + MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); + if (I == MBB.end()) + return 0; + + if (!I->getDesc().isBranch()) + return 0; + + // Remove the branch. + if (BytesRemoved) + *BytesRemoved += getInstSizeInBytes(*I); + I->eraseFromParent(); + + I = MBB.end(); + + if (I == MBB.begin()) + return 1; + --I; + if (!I->getDesc().isConditionalBranch()) + return 1; + + // Remove the branch. + if (BytesRemoved) + *BytesRemoved += getInstSizeInBytes(*I); + I->eraseFromParent(); + return 2; +} + +// Inserts a branch into the end of the specific MachineBasicBlock, returning +// the number of instructions inserted. +unsigned LoongArchInstrInfo::insertBranch( + MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, + ArrayRef Cond, const DebugLoc &DL, int *BytesAdded) const { + if (BytesAdded) + *BytesAdded = 0; + + // Shouldn't be a fall through. + assert(TBB && "insertBranch must not be told to insert a fallthrough"); + assert(Cond.size() <= 3 && Cond.size() != 1 && + "LoongArch branch conditions have at most two components!"); + + // Unconditional branch. + if (Cond.empty()) { + MachineInstr &MI = *BuildMI(&MBB, DL, get(LoongArch::PseudoBR)).addMBB(TBB); + if (BytesAdded) + *BytesAdded += getInstSizeInBytes(MI); + return 1; + } + + // Either a one or two-way conditional branch. + MachineInstrBuilder MIB = BuildMI(&MBB, DL, get(Cond[0].getImm())); + for (unsigned i = 1; i < Cond.size(); ++i) + MIB.add(Cond[i]); + MIB.addMBB(TBB); + if (BytesAdded) + *BytesAdded += getInstSizeInBytes(*MIB); + + // One-way conditional branch. + if (!FBB) + return 1; + + // Two-way conditional branch. + MachineInstr &MI = *BuildMI(&MBB, DL, get(LoongArch::PseudoBR)).addMBB(FBB); + if (BytesAdded) + *BytesAdded += getInstSizeInBytes(MI); + return 2; +} + +void LoongArchInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB, + MachineBasicBlock &DestBB, + MachineBasicBlock &RestoreBB, + const DebugLoc &DL, + int64_t BrOffset, + RegScavenger *RS) const { + assert(RS && "RegScavenger required for long branching"); + assert(MBB.empty() && + "new block should be inserted for expanding unconditional branch"); + assert(MBB.pred_size() == 1); + + MachineFunction *MF = MBB.getParent(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); + auto *LAFI = MF->getInfo(); + + if (!isInt<32>(BrOffset)) + report_fatal_error( + "Branch offsets outside of the signed 32-bit range not supported"); + + Register ScratchReg = MRI.createVirtualRegister(&LoongArch::GPRRegClass); + auto II = MBB.end(); + + MachineInstr &PCALAU12I = + *BuildMI(MBB, II, DL, get(LoongArch::PCALAU12I), ScratchReg) + .addMBB(&DestBB, LoongArchII::MO_PCREL_HI); + MachineInstr &ADDI = + *BuildMI(MBB, II, DL, + get(STI.is64Bit() ? LoongArch::ADDI_D : LoongArch::ADDI_W), + ScratchReg) + .addReg(ScratchReg) + .addMBB(&DestBB, LoongArchII::MO_PCREL_LO); + BuildMI(MBB, II, DL, get(LoongArch::PseudoBRIND)) + .addReg(ScratchReg, RegState::Kill) + .addImm(0); + + RS->enterBasicBlockEnd(MBB); + Register Scav = RS->scavengeRegisterBackwards( + LoongArch::GPRRegClass, PCALAU12I.getIterator(), /*RestoreAfter=*/false, + /*SPAdj=*/0, /*AllowSpill=*/false); + if (Scav != LoongArch::NoRegister) + RS->setRegUsed(Scav); + else { + // When there is no scavenged register, it needs to specify a register. + // Specify t8 register because it won't be used too often. + Scav = LoongArch::R20; + int FrameIndex = LAFI->getBranchRelaxationSpillFrameIndex(); + if (FrameIndex == -1) + report_fatal_error("The function size is incorrectly estimated."); + storeRegToStackSlot(MBB, PCALAU12I, Scav, /*IsKill=*/true, FrameIndex, + &LoongArch::GPRRegClass, TRI); + TRI->eliminateFrameIndex(std::prev(PCALAU12I.getIterator()), + /*SpAdj=*/0, /*FIOperandNum=*/1); + PCALAU12I.getOperand(1).setMBB(&RestoreBB); + ADDI.getOperand(2).setMBB(&RestoreBB); + loadRegFromStackSlot(RestoreBB, RestoreBB.end(), Scav, FrameIndex, + &LoongArch::GPRRegClass, TRI); + TRI->eliminateFrameIndex(RestoreBB.back(), + /*SpAdj=*/0, /*FIOperandNum=*/1); + } + MRI.replaceRegWith(ScratchReg, Scav); + MRI.clearVirtRegs(); +} + +static unsigned getOppositeBranchOpc(unsigned Opc) { + switch (Opc) { + default: + llvm_unreachable("Unrecognized conditional branch"); + case LoongArch::BEQ: + return LoongArch::BNE; + case LoongArch::BNE: + return LoongArch::BEQ; + case LoongArch::BEQZ: + return LoongArch::BNEZ; + case LoongArch::BNEZ: + return LoongArch::BEQZ; + case LoongArch::BCEQZ: + return LoongArch::BCNEZ; + case LoongArch::BCNEZ: + return LoongArch::BCEQZ; + case LoongArch::BLT: + return LoongArch::BGE; + case LoongArch::BGE: + return LoongArch::BLT; + case LoongArch::BLTU: + return LoongArch::BGEU; + case LoongArch::BGEU: + return LoongArch::BLTU; + } +} + +bool LoongArchInstrInfo::reverseBranchCondition( + SmallVectorImpl &Cond) const { + assert((Cond.size() && Cond.size() <= 3) && "Invalid branch condition!"); + Cond[0].setImm(getOppositeBranchOpc(Cond[0].getImm())); + return false; +} + +std::pair +LoongArchInstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const { + return std::make_pair(TF, 0u); +} + +ArrayRef> +LoongArchInstrInfo::getSerializableDirectMachineOperandTargetFlags() const { + using namespace LoongArchII; + // TODO: Add more target flags. + static const std::pair TargetFlags[] = { + {MO_CALL, "loongarch-call"}, + {MO_CALL_PLT, "loongarch-call-plt"}, + {MO_PCREL_HI, "loongarch-pcrel-hi"}, + {MO_PCREL_LO, "loongarch-pcrel-lo"}, + {MO_GOT_PC_HI, "loongarch-got-pc-hi"}, + {MO_GOT_PC_LO, "loongarch-got-pc-lo"}, + {MO_LE_HI, "loongarch-le-hi"}, + {MO_LE_LO, "loongarch-le-lo"}, + {MO_IE_PC_HI, "loongarch-ie-pc-hi"}, + {MO_IE_PC_LO, "loongarch-ie-pc-lo"}, + {MO_LD_PC_HI, "loongarch-ld-pc-hi"}, + {MO_GD_PC_HI, "loongarch-gd-pc-hi"}}; + return makeArrayRef(TargetFlags); +} diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h index 0a8c86a5e0c2f4636410f763d57508e771a82ae6..0776609c3af0d26d9a716856bb4328b7b5312de7 100644 --- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h +++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h @@ -27,6 +27,8 @@ class LoongArchInstrInfo : public LoongArchGenInstrInfo { public: explicit LoongArchInstrInfo(LoongArchSubtarget &STI); + MCInst getNop() const override; + void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, MCRegister DstReg, MCRegister SrcReg, bool KillSrc) const override; @@ -40,6 +42,48 @@ public: MachineBasicBlock::iterator MBBI, Register DstReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const override; + + // Materializes the given integer Val into DstReg. + void movImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, Register DstReg, uint64_t Val, + MachineInstr::MIFlag Flag = MachineInstr::NoFlags) const; + + unsigned getInstSizeInBytes(const MachineInstr &MI) const override; + + MachineBasicBlock *getBranchDestBlock(const MachineInstr &MI) const override; + + bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl &Cond, + bool AllowModify) const override; + + bool isBranchOffsetInRange(unsigned BranchOpc, + int64_t BrOffset) const override; + + unsigned removeBranch(MachineBasicBlock &MBB, + int *BytesRemoved = nullptr) const override; + + unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, ArrayRef Cond, + const DebugLoc &dl, + int *BytesAdded = nullptr) const override; + + void insertIndirectBranch(MachineBasicBlock &MBB, + MachineBasicBlock &NewDestBB, + MachineBasicBlock &RestoreBB, const DebugLoc &DL, + int64_t BrOffset, RegScavenger *RS) const override; + + bool + reverseBranchCondition(SmallVectorImpl &Cond) const override; + + std::pair + decomposeMachineOperandsTargetFlags(unsigned TF) const override; + + ArrayRef> + getSerializableDirectMachineOperandTargetFlags() const override; + +protected: + const LoongArchSubtarget &STI; }; } // end namespace llvm diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td index d07d086bd7da882548d99111e9271c42fd08e8c5..f20beee9224b104181eff14c4a223101df0f1191 100644 --- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td @@ -35,6 +35,24 @@ def SDT_LoongArchBStrPick: SDTypeProfile<1, 3, [ SDTCisInt<0>, SDTCisSameAs<0, 1>, SDTCisInt<2>, SDTCisSameAs<2, 3> ]>; +// "VI" means no output and an integer input. +def SDT_LoongArchVI : SDTypeProfile<0, 1, [SDTCisVT<0, GRLenVT>]>; + +def SDT_LoongArchCsrrd : SDTypeProfile<1, 1, [SDTCisInt<0>, + SDTCisVT<1, GRLenVT>]>; +def SDT_LoongArchCsrwr : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>, + SDTCisVT<2, GRLenVT>]>; +def SDT_LoongArchCsrxchg : SDTypeProfile<1, 3, [SDTCisInt<0>, + SDTCisSameAs<0, 1>, + SDTCisSameAs<0, 2>, + SDTCisVT<3, GRLenVT>]>; +def SDT_LoongArchIocsrwr : SDTypeProfile<0, 2, [SDTCisInt<0>, + SDTCisSameAs<0, 1>]>; +def SDT_LoongArchMovgr2fcsr : SDTypeProfile<0, 2, [SDTCisVT<0, GRLenVT>, + SDTCisSameAs<0, 1>]>; +def SDT_LoongArchMovfcsr2gr : SDTypeProfile<1, 1, [SDTCisVT<0, GRLenVT>, + SDTCisSameAs<0, 1>]>; + // TODO: Add LoongArch specific DAG Nodes // Target-independent nodes, but with target-specific formats. def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_CallSeqStart, @@ -48,13 +66,81 @@ def loongarch_call : SDNode<"LoongArchISD::CALL", SDT_LoongArchCall, SDNPVariadic]>; def loongarch_ret : SDNode<"LoongArchISD::RET", SDTNone, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; +def loongarch_tail : SDNode<"LoongArchISD::TAIL", SDT_LoongArchCall, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, + SDNPVariadic]>; def loongarch_sll_w : SDNode<"LoongArchISD::SLL_W", SDT_LoongArchIntBinOpW>; def loongarch_sra_w : SDNode<"LoongArchISD::SRA_W", SDT_LoongArchIntBinOpW>; def loongarch_srl_w : SDNode<"LoongArchISD::SRL_W", SDT_LoongArchIntBinOpW>; +def loongarch_rotr_w : SDNode<"LoongArchISD::ROTR_W", SDT_LoongArchIntBinOpW>; +def loongarch_rotl_w : SDNode<"LoongArchISD::ROTL_W", SDT_LoongArchIntBinOpW>; +def loongarch_crc_w_b_w + : SDNode<"LoongArchISD::CRC_W_B_W", SDT_LoongArchIntBinOpW, [SDNPHasChain]>; +def loongarch_crc_w_h_w + : SDNode<"LoongArchISD::CRC_W_H_W", SDT_LoongArchIntBinOpW, [SDNPHasChain]>; +def loongarch_crc_w_w_w + : SDNode<"LoongArchISD::CRC_W_W_W", SDT_LoongArchIntBinOpW, [SDNPHasChain]>; +def loongarch_crc_w_d_w + : SDNode<"LoongArchISD::CRC_W_D_W", SDT_LoongArchIntBinOpW, [SDNPHasChain]>; +def loongarch_crcc_w_b_w + : SDNode<"LoongArchISD::CRCC_W_B_W", SDT_LoongArchIntBinOpW, [SDNPHasChain]>; +def loongarch_crcc_w_h_w + : SDNode<"LoongArchISD::CRCC_W_H_W", SDT_LoongArchIntBinOpW, [SDNPHasChain]>; +def loongarch_crcc_w_w_w + : SDNode<"LoongArchISD::CRCC_W_W_W", SDT_LoongArchIntBinOpW, [SDNPHasChain]>; +def loongarch_crcc_w_d_w + : SDNode<"LoongArchISD::CRCC_W_D_W", SDT_LoongArchIntBinOpW, [SDNPHasChain]>; def loongarch_bstrins : SDNode<"LoongArchISD::BSTRINS", SDT_LoongArchBStrIns>; def loongarch_bstrpick : SDNode<"LoongArchISD::BSTRPICK", SDT_LoongArchBStrPick>; +def loongarch_revb_2h : SDNode<"LoongArchISD::REVB_2H", SDTUnaryOp>; +def loongarch_revb_2w : SDNode<"LoongArchISD::REVB_2W", SDTUnaryOp>; +def loongarch_bitrev_4b : SDNode<"LoongArchISD::BITREV_4B", SDTUnaryOp>; +def loongarch_bitrev_w : SDNode<"LoongArchISD::BITREV_W", SDTUnaryOp>; +def loongarch_clzw : SDNode<"LoongArchISD::CLZ_W", SDTIntBitCountUnaryOp>; +def loongarch_ctzw : SDNode<"LoongArchISD::CTZ_W", SDTIntBitCountUnaryOp>; +def loongarch_dbar : SDNode<"LoongArchISD::DBAR", SDT_LoongArchVI, + [SDNPHasChain, SDNPSideEffect]>; +def loongarch_ibar : SDNode<"LoongArchISD::IBAR", SDT_LoongArchVI, + [SDNPHasChain, SDNPSideEffect]>; +def loongarch_break : SDNode<"LoongArchISD::BREAK", SDT_LoongArchVI, + [SDNPHasChain, SDNPSideEffect]>; +def loongarch_movfcsr2gr : SDNode<"LoongArchISD::MOVFCSR2GR", SDT_LoongArchMovfcsr2gr, + [SDNPHasChain]>; +def loongarch_movgr2fcsr : SDNode<"LoongArchISD::MOVGR2FCSR", SDT_LoongArchMovgr2fcsr, + [SDNPHasChain, SDNPSideEffect]>; +def loongarch_syscall : SDNode<"LoongArchISD::SYSCALL", SDT_LoongArchVI, + [SDNPHasChain, SDNPSideEffect]>; +def loongarch_csrrd : SDNode<"LoongArchISD::CSRRD", SDT_LoongArchCsrrd, + [SDNPHasChain, SDNPSideEffect]>; +def loongarch_csrwr : SDNode<"LoongArchISD::CSRWR", SDT_LoongArchCsrwr, + [SDNPHasChain, SDNPSideEffect]>; +def loongarch_csrxchg : SDNode<"LoongArchISD::CSRXCHG", + SDT_LoongArchCsrxchg, + [SDNPHasChain, SDNPSideEffect]>; +def loongarch_iocsrrd_b : SDNode<"LoongArchISD::IOCSRRD_B", SDTUnaryOp, + [SDNPHasChain, SDNPSideEffect]>; +def loongarch_iocsrrd_h : SDNode<"LoongArchISD::IOCSRRD_H", SDTUnaryOp, + [SDNPHasChain, SDNPSideEffect]>; +def loongarch_iocsrrd_w : SDNode<"LoongArchISD::IOCSRRD_W", SDTUnaryOp, + [SDNPHasChain, SDNPSideEffect]>; +def loongarch_iocsrrd_d : SDNode<"LoongArchISD::IOCSRRD_D", SDTUnaryOp, + [SDNPHasChain, SDNPSideEffect]>; +def loongarch_iocsrwr_b : SDNode<"LoongArchISD::IOCSRWR_B", + SDT_LoongArchIocsrwr, + [SDNPHasChain, SDNPSideEffect]>; +def loongarch_iocsrwr_h : SDNode<"LoongArchISD::IOCSRWR_H", + SDT_LoongArchIocsrwr, + [SDNPHasChain, SDNPSideEffect]>; +def loongarch_iocsrwr_w : SDNode<"LoongArchISD::IOCSRWR_W", + SDT_LoongArchIocsrwr, + [SDNPHasChain, SDNPSideEffect]>; +def loongarch_iocsrwr_d : SDNode<"LoongArchISD::IOCSRWR_D", + SDT_LoongArchIocsrwr, + [SDNPHasChain, SDNPSideEffect]>; +def loongarch_cpucfg : SDNode<"LoongArchISD::CPUCFG", SDTUnaryOp, + [SDNPHasChain]>; //===----------------------------------------------------------------------===// // Operand and SDNode transformation definitions. @@ -75,11 +161,32 @@ class UImmAsmOperand : ImmAsmOperand<"U", width, suffix> { } -def uimm2 : Operand { +// A parse method for "$r*" or "$r*, 0", where the 0 is be silently ignored. +// Only used for "AM*" instructions, in order to be compatible with GAS. +def AtomicMemAsmOperand : AsmOperandClass { + let Name = "AtomicMemAsmOperand"; + let RenderMethod = "addRegOperands"; + let PredicateMethod = "isGPR"; + let ParserMethod = "parseAtomicMemOp"; +} + +def GPRMemAtomic : RegisterOperand { + let ParserMatchClass = AtomicMemAsmOperand; + let PrintMethod = "printAtomicMemOp"; +} + +// A parameterized register class alternative to i32imm/i64imm from Target.td. +def grlenimm : Operand; +def imm32 : Operand { + let ParserMatchClass = ImmAsmOperand<"", 32, "">; +} + +def uimm2 : Operand, ImmLeaf(Imm);}]> { let ParserMatchClass = UImmAsmOperand<2>; } -def uimm2_plus1 : Operand { +def uimm2_plus1 : Operand, + ImmLeaf(Imm - 1);}]> { let ParserMatchClass = UImmAsmOperand<2, "plus1">; let EncoderMethod = "getImmOpValueSub1"; let DecoderMethod = "decodeUImmOperand<2, 1>"; @@ -97,28 +204,52 @@ def uimm6 : Operand, ImmLeaf(Imm);}]> { let ParserMatchClass = UImmAsmOperand<6>; } -def uimm8 : Operand { +def uimm8 : Operand, ImmLeaf(Imm);}]> { let ParserMatchClass = UImmAsmOperand<8>; } -def uimm12 : Operand, ImmLeaf(Imm);}]> { +class UImm12Operand : Operand, + ImmLeaf (Imm);}]> { + let DecoderMethod = "decodeUImmOperand<12>"; +} + +def uimm12 : UImm12Operand { let ParserMatchClass = UImmAsmOperand<12>; } -def uimm14 : Operand { +def uimm12_ori : UImm12Operand { + let ParserMatchClass = UImmAsmOperand<12, "ori">; +} + +def uimm14 : Operand, + ImmLeaf (Imm);}]> { let ParserMatchClass = UImmAsmOperand<14>; } -def uimm15 : Operand { +def uimm15 : Operand, + ImmLeaf (Imm);}]> { let ParserMatchClass = UImmAsmOperand<15>; } -def simm12 : Operand, ImmLeaf(Imm);}]> { - let ParserMatchClass = SImmAsmOperand<12>; +class SImm12Operand : Operand, + ImmLeaf (Imm);}]> { let DecoderMethod = "decodeSImmOperand<12>"; } -def simm14_lsl2 : Operand { +def simm12 : SImm12Operand { + let ParserMatchClass = SImmAsmOperand<12>; +} + +def simm12_addlike : SImm12Operand { + let ParserMatchClass = SImmAsmOperand<12, "addlike">; +} + +def simm12_lu52id : SImm12Operand { + let ParserMatchClass = SImmAsmOperand<12, "lu52id">; +} + +def simm14_lsl2 : Operand, + ImmLeaf(Imm);}]> { let ParserMatchClass = SImmAsmOperand<14, "lsl2">; let EncoderMethod = "getImmOpValueAsr2"; let DecoderMethod = "decodeSImmOperand<14, 2>"; @@ -142,23 +273,74 @@ def simm16_lsl2_br : Operand { let DecoderMethod = "decodeSImmOperand<16, 2>"; } -def simm20 : Operand { - let ParserMatchClass = SImmAsmOperand<20>; +class SImm20Operand : Operand { let DecoderMethod = "decodeSImmOperand<20>"; } +def simm20 : SImm20Operand { + let ParserMatchClass = SImmAsmOperand<20>; +} + +def simm20_pcalau12i : SImm20Operand { + let ParserMatchClass = SImmAsmOperand<20, "pcalau12i">; +} + +def simm20_lu12iw : SImm20Operand { + let ParserMatchClass = SImmAsmOperand<20, "lu12iw">; +} + +def simm20_lu32id : SImm20Operand { + let ParserMatchClass = SImmAsmOperand<20, "lu32id">; +} + def simm21_lsl2 : Operand { let ParserMatchClass = SImmAsmOperand<21, "lsl2">; let EncoderMethod = "getImmOpValueAsr2"; let DecoderMethod = "decodeSImmOperand<21, 2>"; } -def simm26_lsl2 : Operand { - let ParserMatchClass = SImmAsmOperand<26, "lsl2">; +def SImm26OperandB: AsmOperandClass { + let Name = "SImm26OperandB"; + let PredicateMethod = "isSImm26Operand"; + let RenderMethod = "addImmOperands"; + let DiagnosticType = "InvalidSImm26Operand"; + let ParserMethod = "parseImmediate"; +} + +// A symbol or an imm used in B/PseudoBR. +def simm26_b : Operand { + let ParserMatchClass = SImm26OperandB; + let EncoderMethod = "getImmOpValueAsr2"; + let DecoderMethod = "decodeSImmOperand<26, 2>"; +} + +def SImm26OperandBL: AsmOperandClass { + let Name = "SImm26OperandBL"; + let PredicateMethod = "isSImm26Operand"; + let RenderMethod = "addImmOperands"; + let DiagnosticType = "InvalidSImm26Operand"; + let ParserMethod = "parseSImm26Operand"; +} + +// A symbol or an imm used in BL/PseudoCALL/PseudoTAIL. +def simm26_symbol : Operand { + let ParserMatchClass = SImm26OperandBL; let EncoderMethod = "getImmOpValueAsr2"; let DecoderMethod = "decodeSImmOperand<26, 2>"; } +def BareSymbol : AsmOperandClass { + let Name = "BareSymbol"; + let RenderMethod = "addImmOperands"; + let DiagnosticType = "InvalidBareSymbol"; + let ParserMethod = "parseImmediate"; +} + +// A bare symbol used in "PseudoLA_*" instructions. +def bare_symbol : Operand { + let ParserMatchClass = BareSymbol; +} + // Standalone (codegen-only) immleaf patterns. // A 12-bit signed immediate plus one where the imm range will be [-2047, 2048]. @@ -176,18 +358,19 @@ def fpimm0 : PatLeaf<(fpimm), [{return N->isExactlyValue(+0.0);}]>; def fpimm0neg : PatLeaf<(fpimm), [{return N->isExactlyValue(-0.0);}]>; def fpimm1 : PatLeaf<(fpimm), [{return N->isExactlyValue(+1.0);}]>; -def CallSymbol: AsmOperandClass { - let Name = "CallSymbol"; - let RenderMethod = "addImmOperands"; - let PredicateMethod = "isImm"; -} - -// A bare symbol used in call only. -def call_symbol : Operand { - let ParserMatchClass = CallSymbol; -} +// Return an immediate subtracted from 32. +def ImmSubFrom32 : SDNodeXFormgetTargetConstant(32 - N->getZExtValue(), SDLoc(N), + N->getValueType(0)); +}]>; def BaseAddr : ComplexPattern; +def NonFIBaseAddr : ComplexPattern; + +def fma_nsz : PatFrag<(ops node:$fj, node:$fk, node:$fa), + (fma node:$fj, node:$fk, node:$fa), [{ + return N->getFlags().hasNoSignedZeros(); +}]>; //===----------------------------------------------------------------------===// // Instruction Formats @@ -245,7 +428,7 @@ class BrCCZ_1RI21 op, string opstr> let isTerminator = 1; } class Br_I26 op, string opstr> - : FmtI26 { + : FmtI26 { let isBranch = 1; let isTerminator = 1; } @@ -254,7 +437,7 @@ let mayLoad = 1 in { class LOAD_3R op, string opstr> : Fmt3R; class LOAD_2RI12 op, string opstr> - : Fmt2RI12; class LOAD_2RI14 op, string opstr> : Fmt2RI14 op, string opstr> : Fmt3R; class STORE_2RI12 op, string opstr> - : Fmt2RI12; class STORE_2RI14 op, string opstr> : Fmt2RI14; } // mayStore = 1 -let mayLoad = 1, mayStore = 1 in +let mayLoad = 1, mayStore = 1, Constraints = "@earlyclobber $rd" in class AM_3R op, string opstr> - : Fmt3R; + : Fmt3R; let mayLoad = 1 in class LLBase op, string opstr> @@ -300,16 +484,16 @@ class IOCSRWR op, string opstr> // Arithmetic Operation Instructions def ADD_W : ALU_3R<0b00000000000100000, "add.w">; def SUB_W : ALU_3R<0b00000000000100010, "sub.w">; -def ADDI_W : ALU_2RI12<0b0000001010, "addi.w", simm12>; +def ADDI_W : ALU_2RI12<0b0000001010, "addi.w", simm12_addlike>; def ALSL_W : ALU_3RI2<0b000000000000010, "alsl.w", uimm2_plus1>; -def LU12I_W : ALU_1RI20<0b0001010, "lu12i.w", simm20>; +def LU12I_W : ALU_1RI20<0b0001010, "lu12i.w", simm20_lu12iw>; def SLT : ALU_3R<0b00000000000100100, "slt">; def SLTU : ALU_3R<0b00000000000100101, "sltu">; def SLTI : ALU_2RI12<0b0000001000, "slti", simm12>; def SLTUI : ALU_2RI12<0b0000001001, "sltui", simm12>; def PCADDI : ALU_1RI20<0b0001100, "pcaddi", simm20>; def PCADDU12I : ALU_1RI20<0b0001110, "pcaddu12i", simm20>; -def PCALAU12I : ALU_1RI20<0b0001101, "pcalau12i", simm20>; +def PCALAU12I : ALU_1RI20<0b0001101, "pcalau12i", simm20_pcalau12i>; def AND : ALU_3R<0b00000000000101001, "and">; def OR : ALU_3R<0b00000000000101010, "or">; def NOR : ALU_3R<0b00000000000101000, "nor">; @@ -317,7 +501,7 @@ def XOR : ALU_3R<0b00000000000101011, "xor">; def ANDN : ALU_3R<0b00000000000101101, "andn">; def ORN : ALU_3R<0b00000000000101100, "orn">; def ANDI : ALU_2RI12<0b0000001101, "andi", uimm12>; -def ORI : ALU_2RI12<0b0000001110, "ori", uimm12>; +def ORI : ALU_2RI12<0b0000001110, "ori", uimm12_ori>; def XORI : ALU_2RI12<0b0000001111, "xori", uimm12>; def MUL_W : ALU_3R<0b00000000000111000, "mul.w">; def MULH_W : ALU_3R<0b00000000000111001, "mulh.w">; @@ -373,8 +557,8 @@ def BEQZ : BrCCZ_1RI21<0b010000, "beqz">; def BNEZ : BrCCZ_1RI21<0b010001, "bnez">; def B : Br_I26<0b010100, "b">; -let isCall = 1 in -def BL : FmtI26<0b010101, (outs), (ins simm26_lsl2:$imm26), "bl", "$imm26">; +let isCall = 1, Defs=[R1] in +def BL : FmtI26<0b010101, (outs), (ins simm26_symbol:$imm26), "bl", "$imm26">; def JIRL : Fmt2RI16<0b010011, (outs GPR:$rd), (ins GPR:$rj, simm16_lsl2:$imm16), "jirl", "$rd, $rj, $imm16">; @@ -406,6 +590,10 @@ def RDTIMEL_W : RDTIME_2R<0b0000000000000000011000, "rdtimel.w">; def RDTIMEH_W : RDTIME_2R<0b0000000000000000011001, "rdtimeh.w">; def CPUCFG : ALU_2R<0b0000000000000000011011, "cpucfg">; +// Cache Maintenance Instructions +def CACOP : FmtCACOP<(outs), (ins uimm5:$op, GPR:$rj, simm12:$imm12), "cacop", + "$op, $rj, $imm12">; + /// LA64 instructions let Predicates = [IsLA64] in { @@ -413,16 +601,16 @@ let Predicates = [IsLA64] in { // Arithmetic Operation Instructions for 64-bits def ADD_D : ALU_3R<0b00000000000100001, "add.d">; def SUB_D : ALU_3R<0b00000000000100011, "sub.d">; -def ADDI_D : ALU_2RI12<0b0000001011, "addi.d", simm12>; +def ADDI_D : ALU_2RI12<0b0000001011, "addi.d", simm12_addlike>; def ADDU16I_D : ALU_2RI16<0b000100, "addu16i.d", simm16>; def ALSL_WU : ALU_3RI2<0b000000000000011, "alsl.wu", uimm2_plus1>; def ALSL_D : ALU_3RI2<0b000000000010110, "alsl.d", uimm2_plus1>; let Constraints = "$rd = $dst" in { def LU32I_D : Fmt1RI20<0b0001011, (outs GPR:$dst), - (ins GPR:$rd, simm20:$imm20), "lu32i.d", + (ins GPR:$rd, simm20_lu32id:$imm20), "lu32i.d", "$rd, $imm20">; } -def LU52I_D : ALU_2RI12<0b0000001100, "lu52i.d", simm12>; +def LU52I_D : ALU_2RI12<0b0000001100, "lu52i.d", simm12_lu52id>; def PCADDU18I : ALU_1RI20<0b0001111, "pcaddu18i", simm20>; def MUL_D : ALU_3R<0b00000000000111011, "mul.d">; def MULH_D : ALU_3R<0b00000000000111100, "mulh.d">; @@ -579,6 +767,8 @@ class PatGprGpr : Pat<(OpNode GPR:$rj, GPR:$rk), (Inst GPR:$rj, GPR:$rk)>; class PatGprGpr_32 : Pat<(sext_inreg (OpNode GPR:$rj, GPR:$rk), i32), (Inst GPR:$rj, GPR:$rk)>; +class PatGpr + : Pat<(OpNode GPR:$rj), (Inst GPR:$rj)>; class PatGprImm : Pat<(OpNode GPR:$rj, ImmOpnd:$imm), @@ -587,6 +777,12 @@ class PatGprImm_32 : Pat<(sext_inreg (OpNode GPR:$rj, ImmOpnd:$imm), i32), (Inst GPR:$rj, ImmOpnd:$imm)>; +/// Predicates +def AddLike: PatFrags<(ops node:$A, node:$B), + [(add node:$A, node:$B), (or node:$A, node:$B)], [{ + return N->getOpcode() == ISD::ADD || isOrEquivalentToAdd(N); +}]>; + /// Simple arithmetic operations // Match both a plain shift and one where the shift amount is masked (this is @@ -618,6 +814,8 @@ def : PatGprGpr; def : PatGprGpr; def : PatGprGpr; def : PatGprGpr; +def : PatGprGpr; +def : PatGprImm; } // Predicates = [IsLA32] let Predicates = [IsLA64] in { @@ -631,6 +829,14 @@ def : PatGprGpr; def : PatGprGpr; def : PatGprGpr; def : PatGprGpr; +def : PatGprGpr; +def : PatGprGpr; +def : PatGprImm; +def : PatGprImm_32; +def : Pat<(loongarch_rotl_w GPR:$rj, uimm5:$imm), + (ROTRI_W GPR:$rj, (ImmSubFrom32 uimm5:$imm))>; +def : Pat<(sext_inreg (loongarch_rotl_w GPR:$rj, uimm5:$imm), i32), + (ROTRI_W GPR:$rj, (ImmSubFrom32 uimm5:$imm))>; // TODO: Select "_W[U]" instructions for i32xi32 if only lower 32 bits of the // product are used. def : PatGprGpr; @@ -653,6 +859,68 @@ def : PatGprGpr; def : PatGprImm; def : PatGprGpr; def : PatGprImm; +def : Pat<(not GPR:$rj), (NOR GPR:$rj, R0)>; +def : Pat<(not (or GPR:$rj, GPR:$rk)), (NOR GPR:$rj, GPR:$rk)>; +def : Pat<(or GPR:$rj, (not GPR:$rk)), (ORN GPR:$rj, GPR:$rk)>; +def : Pat<(and GPR:$rj, (not GPR:$rk)), (ANDN GPR:$rj, GPR:$rk)>; + +/// Traps + +// We lower `trap` to `amswap.w rd:$r0, rk:$r1, rj:$r0`, as this is guaranteed +// to trap with an INE (non-existent on LA32, explicitly documented to INE on +// LA64). And the resulting signal is different from `debugtrap` like on some +// other existing ports so programs/porters might have an easier time. +def PseudoUNIMP : Pseudo<(outs), (ins), [(trap)]>, + PseudoInstExpansion<(AMSWAP_W R0, R1, R0)>; + +// We lower `debugtrap` to `break 0`, as this is guaranteed to exist and work, +// even for LA32 Primary. Also, because so far the ISA does not provide a +// specific trap instruction/kind exclusively for alerting the debugger, +// every other project uses the generic immediate of 0 for this. +def : Pat<(debugtrap), (BREAK 0)>; + +/// Bit counting operations + +let Predicates = [IsLA64] in { +def : PatGpr; +def : PatGpr; +def : Pat<(ctlz (not GPR:$rj)), (CLO_D GPR:$rj)>; +def : Pat<(cttz (not GPR:$rj)), (CTO_D GPR:$rj)>; +def : PatGpr; +def : PatGpr; +def : Pat<(loongarch_clzw (not GPR:$rj)), (CLO_W GPR:$rj)>; +def : Pat<(loongarch_ctzw (not GPR:$rj)), (CTO_W GPR:$rj)>; +} // Predicates = [IsLA64] + +let Predicates = [IsLA32] in { +def : PatGpr; +def : PatGpr; +def : Pat<(ctlz (not GPR:$rj)), (CLO_W GPR:$rj)>; +def : Pat<(cttz (not GPR:$rj)), (CTO_W GPR:$rj)>; +} // Predicates = [IsLA32] + +/// FrameIndex calculations +let Predicates = [IsLA32] in { +def : Pat<(AddLike (i32 BaseAddr:$rj), simm12:$imm12), + (ADDI_W (i32 BaseAddr:$rj), simm12:$imm12)>; +} // Predicates = [IsLA32] +let Predicates = [IsLA64] in { +def : Pat<(AddLike (i64 BaseAddr:$rj), simm12:$imm12), + (ADDI_D (i64 BaseAddr:$rj), simm12:$imm12)>; +} // Predicates = [IsLA64] + +/// Shifted addition +let Predicates = [IsLA32] in { +def : Pat<(add GPR:$rk, (shl GPR:$rj, uimm2_plus1:$imm2)), + (ALSL_W GPR:$rj, GPR:$rk, uimm2_plus1:$imm2)>; +} // Predicates = [IsLA32] +let Predicates = [IsLA64] in { +def : Pat<(add GPR:$rk, (shl GPR:$rj, uimm2_plus1:$imm2)), + (ALSL_D GPR:$rj, GPR:$rk, uimm2_plus1:$imm2)>; +def : Pat<(loongarch_bstrpick (add GPR:$rk, (shl GPR:$rj, uimm2_plus1:$imm2)), + (i64 31), (i64 0)), + (ALSL_WU GPR:$rj, GPR:$rk, uimm2_plus1:$imm2)>; +} // Predicates = [IsLA64] /// Shift @@ -755,12 +1023,17 @@ def : BccSwapPat; // condition was calculated elsewhere). def : Pat<(brcond GPR:$rj, bb:$imm21), (BNEZ GPR:$rj, bb:$imm21)>; +def : Pat<(brcond (GRLenVT (seteq GPR:$rj, 0)), bb:$imm21), + (BEQZ GPR:$rj, bb:$imm21)>; +def : Pat<(brcond (GRLenVT (setne GPR:$rj, 0)), bb:$imm21), + (BNEZ GPR:$rj, bb:$imm21)>; + let isBarrier = 1, isBranch = 1, isTerminator = 1 in -def PseudoBR : Pseudo<(outs), (ins simm26_lsl2:$imm26), [(br bb:$imm26)]>, - PseudoInstExpansion<(B simm26_lsl2:$imm26)>; +def PseudoBR : Pseudo<(outs), (ins simm26_b:$imm26), [(br bb:$imm26)]>, + PseudoInstExpansion<(B simm26_b:$imm26)>; let isBarrier = 1, isBranch = 1, isIndirectBranch = 1, isTerminator = 1 in -def PseudoBRIND : Pseudo<(outs), (ins GPR:$rj, simm16_lsl2:$imm16), []>, +def PseudoBRIND : Pseudo<(outs), (ins GPR:$rj, simm16_lsl2:$imm16)>, PseudoInstExpansion<(JIRL R0, GPR:$rj, simm16_lsl2:$imm16)>; def : Pat<(brind GPR:$rj), (PseudoBRIND GPR:$rj, 0)>; @@ -768,9 +1041,7 @@ def : Pat<(brind (add GPR:$rj, simm16_lsl2:$imm16)), (PseudoBRIND GPR:$rj, simm16_lsl2:$imm16)>; let isCall = 1, Defs = [R1] in -def PseudoCALL : Pseudo<(outs), (ins call_symbol:$func), []> { - let AsmString = "bl\t$func"; -} +def PseudoCALL : Pseudo<(outs), (ins simm26_symbol:$func)>; def : Pat<(loongarch_call tglobaladdr:$func), (PseudoCALL tglobaladdr:$func)>; def : Pat<(loongarch_call texternalsym:$func), (PseudoCALL texternalsym:$func)>; @@ -780,10 +1051,134 @@ def PseudoCALLIndirect : Pseudo<(outs), (ins GPR:$rj), [(loongarch_call GPR:$rj)]>, PseudoInstExpansion<(JIRL R1, GPR:$rj, 0)>; +let isCall = 1, Defs = [R1] in +def PseudoJIRL_CALL : Pseudo<(outs), (ins GPR:$rj, simm16_lsl2:$imm16)>, + PseudoInstExpansion<(JIRL R1, GPR:$rj, + simm16_lsl2:$imm16)>; + let isBarrier = 1, isReturn = 1, isTerminator = 1 in def PseudoRET : Pseudo<(outs), (ins), [(loongarch_ret)]>, PseudoInstExpansion<(JIRL R0, R1, 0)>; +let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [R3] in +def PseudoTAIL : Pseudo<(outs), (ins simm26_symbol:$dst)>; + +def : Pat<(loongarch_tail (iPTR tglobaladdr:$dst)), + (PseudoTAIL tglobaladdr:$dst)>; +def : Pat<(loongarch_tail (iPTR texternalsym:$dst)), + (PseudoTAIL texternalsym:$dst)>; + +let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [R3] in +def PseudoTAILIndirect : Pseudo<(outs), (ins GPRT:$rj), + [(loongarch_tail GPRT:$rj)]>, + PseudoInstExpansion<(JIRL R0, GPR:$rj, 0)>; + +let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [R3] in +def PseudoB_TAIL : Pseudo<(outs), (ins simm26_b:$imm26)>, + PseudoInstExpansion<(B simm26_b:$imm26)>; + +let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [R3] in +def PseudoJIRL_TAIL : Pseudo<(outs), (ins GPR:$rj, simm16_lsl2:$imm16)>, + PseudoInstExpansion<(JIRL R0, GPR:$rj, + simm16_lsl2:$imm16)>; + +/// Load address (la*) macro instructions. + +// Define isCodeGenOnly = 0 to expose them to tablegened assembly parser. +let hasSideEffects = 0, mayLoad = 0, mayStore = 0, isCodeGenOnly = 0, + isAsmParserOnly = 1 in { +def PseudoLA_ABS : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [], + "la.abs", "$dst, $src">; +def PseudoLA_ABS_LARGE : Pseudo<(outs GPR:$dst), + (ins GPR:$tmp, bare_symbol:$src), [], + "la.abs", "$dst, $src">; +def PseudoLA_PCREL : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [], + "la.pcrel", "$dst, $src">; +def PseudoLA_PCREL_LARGE : Pseudo<(outs GPR:$dst), + (ins GPR:$tmp, bare_symbol:$src), [], + "la.pcrel", "$dst, $tmp, $src">, + Requires<[IsLA64]>; +def PseudoLA_TLS_LE : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [], + "la.tls.le", "$dst, $src">; +} +let hasSideEffects = 0, mayLoad = 1, mayStore = 0, isCodeGenOnly = 0, + isAsmParserOnly = 1 in { +def PseudoLA_GOT : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [], + "la.got", "$dst, $src">; +def PseudoLA_GOT_LARGE : Pseudo<(outs GPR:$dst), + (ins GPR:$tmp, bare_symbol:$src), [], + "la.got", "$dst, $tmp, $src">, + Requires<[IsLA64]>; +def PseudoLA_TLS_IE : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [], + "la.tls.ie", "$dst, $src">; +def PseudoLA_TLS_IE_LARGE : Pseudo<(outs GPR:$dst), + (ins GPR:$tmp, bare_symbol:$src), [], + "la.tls.ie", "$dst, $tmp, $src">, + Requires<[IsLA64]>; +def PseudoLA_TLS_LD : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [], + "la.tls.ld", "$dst, $src">; +def PseudoLA_TLS_LD_LARGE : Pseudo<(outs GPR:$dst), + (ins GPR:$tmp, bare_symbol:$src), [], + "la.tls.ld", "$dst, $tmp, $src">, + Requires<[IsLA64]>; +def PseudoLA_TLS_GD : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [], + "la.tls.gd", "$dst, $src">; +def PseudoLA_TLS_GD_LARGE : Pseudo<(outs GPR:$dst), + (ins GPR:$tmp, bare_symbol:$src), [], + "la.tls.gd", "$dst, $tmp, $src">, + Requires<[IsLA64]>; +} + +// Load address inst alias: "la", "la.global" and "la.local". +// Default: +// la = la.global = la.got +// la.local = la.pcrel +// With feature "+la-global-with-pcrel": +// la = la.global = la.pcrel +// With feature "+la-global-with-abs": +// la = la.global = la.abs +// With feature "+la-local-with-abs": +// la.local = la.abs +// With features "+la-global-with-pcrel,+la-global-with-abs"(disorder): +// la = la.global = la.pcrel +// Note: To keep consistent with gnu-as behavior, the "la" can only have one +// register operand. +def : InstAlias<"la $dst, $src", (PseudoLA_GOT GPR:$dst, bare_symbol:$src)>; +def : InstAlias<"la.global $dst, $src", + (PseudoLA_GOT GPR:$dst, bare_symbol:$src)>; +def : InstAlias<"la.global $dst, $tmp, $src", + (PseudoLA_GOT_LARGE GPR:$dst, GPR:$tmp, bare_symbol:$src)>; +def : InstAlias<"la.local $dst, $src", + (PseudoLA_PCREL GPR:$dst, bare_symbol:$src)>; +def : InstAlias<"la.local $dst, $tmp, $src", + (PseudoLA_PCREL_LARGE GPR:$dst, GPR:$tmp, bare_symbol:$src)>; + +// Note: Keep HasLaGlobalWithPcrel before HasLaGlobalWithAbs to ensure +// "la-global-with-pcrel" takes effect when bose "la-global-with-pcrel" and +// "la-global-with-abs" are enabled. +let Predicates = [HasLaGlobalWithPcrel] in { +def : InstAlias<"la $dst, $src", (PseudoLA_PCREL GPR:$dst, bare_symbol:$src)>; +def : InstAlias<"la.global $dst, $src", + (PseudoLA_PCREL GPR:$dst, bare_symbol:$src)>; +def : InstAlias<"la.global $dst, $tmp, $src", + (PseudoLA_PCREL_LARGE GPR:$dst, GPR:$tmp, bare_symbol:$src)>; +} // Predicates = [HasLaGlobalWithPcrel] + +let Predicates = [HasLaGlobalWithAbs] in { +def : InstAlias<"la $dst, $src", (PseudoLA_ABS GPR:$dst, bare_symbol:$src)>; +def : InstAlias<"la.global $dst, $src", + (PseudoLA_ABS GPR:$dst, bare_symbol:$src)>; +def : InstAlias<"la.global $dst, $tmp, $src", + (PseudoLA_ABS_LARGE GPR:$dst, GPR:$tmp, bare_symbol:$src)>; +} // Predicates = [HasLaGlobalWithAbs] + +let Predicates = [HasLaLocalWithAbs] in { +def : InstAlias<"la.local $dst, $src", + (PseudoLA_ABS GPR:$dst, bare_symbol:$src)>; +def : InstAlias<"la.local $dst, $tmp, $src", + (PseudoLA_ABS_LARGE GPR:$dst, GPR:$tmp, bare_symbol:$src)>; +} // Predicates = [HasLaLocalWithAbs] + /// BSTRINS and BSTRPICK let Predicates = [IsLA32] in { @@ -800,11 +1195,32 @@ def : Pat<(loongarch_bstrpick GPR:$rj, uimm6:$msbd, uimm6:$lsbd), (BSTRPICK_D GPR:$rj, uimm6:$msbd, uimm6:$lsbd)>; } // Predicates = [IsLA64] +/// Byte-swapping and bit-reversal + +def : Pat<(loongarch_revb_2h GPR:$rj), (REVB_2H GPR:$rj)>; +def : Pat<(loongarch_bitrev_4b GPR:$rj), (BITREV_4B GPR:$rj)>; + +let Predicates = [IsLA32] in { +def : Pat<(bswap GPR:$rj), (ROTRI_W (REVB_2H GPR:$rj), 16)>; +def : Pat<(bitreverse GPR:$rj), (BITREV_W GPR:$rj)>; +def : Pat<(bswap (bitreverse GPR:$rj)), (BITREV_4B GPR:$rj)>; +def : Pat<(bitreverse (bswap GPR:$rj)), (BITREV_4B GPR:$rj)>; +} // Predicates = [IsLA32] + +let Predicates = [IsLA64] in { +def : Pat<(loongarch_revb_2w GPR:$rj), (REVB_2W GPR:$rj)>; +def : Pat<(bswap GPR:$rj), (REVB_D GPR:$rj)>; +def : Pat<(loongarch_bitrev_w GPR:$rj), (BITREV_W GPR:$rj)>; +def : Pat<(bitreverse GPR:$rj), (BITREV_D GPR:$rj)>; +def : Pat<(bswap (bitreverse GPR:$rj)), (BITREV_8B GPR:$rj)>; +def : Pat<(bitreverse (bswap GPR:$rj)), (BITREV_8B GPR:$rj)>; +} // Predicates = [IsLA64] + /// Loads multiclass LdPat { def : Pat<(vt (LoadOp BaseAddr:$rj)), (Inst BaseAddr:$rj, 0)>; - def : Pat<(vt (LoadOp (add BaseAddr:$rj, simm12:$imm12))), + def : Pat<(vt (LoadOp (AddLike BaseAddr:$rj, simm12:$imm12))), (Inst BaseAddr:$rj, simm12:$imm12)>; } @@ -822,13 +1238,31 @@ defm : LdPat; defm : LdPat; } // Predicates = [IsLA64] +// LA64 register-register-addressed loads +let Predicates = [IsLA64] in { +class RegRegLdPat + : Pat<(vt (LoadOp (add NonFIBaseAddr:$rj, GPR:$rk))), + (Inst NonFIBaseAddr:$rj, GPR:$rk)>; + +def : RegRegLdPat; +def : RegRegLdPat; +def : RegRegLdPat; +def : RegRegLdPat; +def : RegRegLdPat; +def : RegRegLdPat; +def : RegRegLdPat; +def : RegRegLdPat; +def : RegRegLdPat; +def : RegRegLdPat; +} // Predicates = [IsLA64] + /// Stores multiclass StPat { def : Pat<(StoreOp (vt StTy:$rd), BaseAddr:$rj), (Inst StTy:$rd, BaseAddr:$rj, 0)>; - def : Pat<(StoreOp (vt StTy:$rd), (add BaseAddr:$rj, simm12:$imm12)), + def : Pat<(StoreOp (vt StTy:$rd), (AddLike BaseAddr:$rj, simm12:$imm12)), (Inst StTy:$rd, BaseAddr:$rj, simm12:$imm12)>; } @@ -840,6 +1274,31 @@ defm : StPat; defm : StPat; } // Predicates = [IsLA64] +let Predicates = [IsLA64] in { +def : Pat<(i64 (sextloadi32 (AddLike BaseAddr:$rj, simm14_lsl2:$imm14))), + (LDPTR_W BaseAddr:$rj, simm14_lsl2:$imm14)>; +def : Pat<(i64 (load (AddLike BaseAddr:$rj, simm14_lsl2:$imm14))), + (LDPTR_D BaseAddr:$rj, simm14_lsl2:$imm14)>; +def : Pat<(truncstorei32 (i64 GPR:$rd), + (AddLike BaseAddr:$rj, simm14_lsl2:$imm14)), + (STPTR_W GPR:$rd, BaseAddr:$rj, simm14_lsl2:$imm14)>; +def : Pat<(store (i64 GPR:$rd), (AddLike BaseAddr:$rj, simm14_lsl2:$imm14)), + (STPTR_D GPR:$rd, BaseAddr:$rj, simm14_lsl2:$imm14)>; +} // Predicates = [IsLA64] + +// LA64 register-register-addressed stores +let Predicates = [IsLA64] in { +class RegRegStPat + : Pat<(StoreOp (vt StTy:$rd), (add NonFIBaseAddr:$rj, GPR:$rk)), + (Inst StTy:$rd, NonFIBaseAddr:$rj, GPR:$rk)>; + +def : RegRegStPat; +def : RegRegStPat; +def : RegRegStPat; +def : RegRegStPat; +} // Predicates = [IsLA64] + /// Atomic loads and stores def : Pat<(atomic_fence timm, timm), (DBAR 0)>; @@ -848,13 +1307,286 @@ defm : LdPat; defm : LdPat; defm : LdPat; -defm : StPat; -defm : StPat; -defm : StPat, Requires<[IsLA32]>; +class release_seqcst_store + : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val), [{ + AtomicOrdering Ordering = cast(N)->getSuccessOrdering(); + return isReleaseOrStronger(Ordering); +}]>; + +class unordered_monotonic_store + : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val), [{ + AtomicOrdering Ordering = cast(N)->getSuccessOrdering(); + return !isReleaseOrStronger(Ordering); +}]>; + +def atomic_store_release_seqcst_32 : release_seqcst_store; +def atomic_store_release_seqcst_64 : release_seqcst_store; +def atomic_store_unordered_monotonic_32 + : unordered_monotonic_store; +def atomic_store_unordered_monotonic_64 + : unordered_monotonic_store; + +/// AtomicStores + +multiclass AtomicStPat { + def : Pat<(StoreOp BaseAddr:$ptr, (vt StTy:$val)), + (Inst StTy:$val, BaseAddr:$ptr, 0)>; + def : Pat<(StoreOp (AddLike BaseAddr:$ptr, simm12:$imm12), (vt StTy:$val)), + (Inst StTy:$val, BaseAddr:$ptr, simm12:$imm12)>; +} + +defm : AtomicStPat; +defm : AtomicStPat; +defm : AtomicStPat, + Requires<[IsLA32]>; + +def PseudoAtomicStoreW + : Pseudo<(outs GPR:$dst), (ins GPR:$rj, GPR:$rk)>, + PseudoInstExpansion<(AMSWAP_DB_W R0, GPR:$rk, GPRMemAtomic:$rj)>; + +def : Pat<(atomic_store_release_seqcst_32 GPR:$rj, GPR:$rk), + (PseudoAtomicStoreW GPR:$rj, GPR:$rk)>; + let Predicates = [IsLA64] in { +def PseudoAtomicStoreD + : Pseudo<(outs GPR:$dst), (ins GPR:$rj, GPR:$rk)>, + PseudoInstExpansion<(AMSWAP_DB_D R0, GPR:$rk, GPRMemAtomic:$rj)>; + +def : Pat<(atomic_store_release_seqcst_64 GPR:$rj, GPR:$rk), + (PseudoAtomicStoreD GPR:$rj, GPR:$rk)>; + defm : LdPat; -defm : StPat; -defm : StPat; +defm : AtomicStPat; +defm : AtomicStPat; +} // Predicates = [IsLA64] + +/// Atomic Ops + +class PseudoMaskedAM + : Pseudo<(outs GPR:$res, GPR:$scratch), + (ins GPR:$addr, GPR:$incr, GPR:$mask, grlenimm:$ordering)> { + let Constraints = "@earlyclobber $res,@earlyclobber $scratch"; + let mayLoad = 1; + let mayStore = 1; + let hasSideEffects = 0; + let Size = 36; +} + +def PseudoMaskedAtomicSwap32 : PseudoMaskedAM; +def PseudoMaskedAtomicLoadAdd32 : PseudoMaskedAM; +def PseudoMaskedAtomicLoadSub32 : PseudoMaskedAM; +def PseudoMaskedAtomicLoadNand32 : PseudoMaskedAM; + +class PseudoAM : Pseudo<(outs GPR:$res, GPR:$scratch), + (ins GPR:$addr, GPR:$incr, grlenimm:$ordering)> { + let Constraints = "@earlyclobber $res,@earlyclobber $scratch"; + let mayLoad = 1; + let mayStore = 1; + let hasSideEffects = 0; + let Size = 24; +} + +def PseudoAtomicSwap32 : PseudoAM; +def PseudoAtomicLoadNand32 : PseudoAM; +def PseudoAtomicLoadNand64 : PseudoAM; +def PseudoAtomicLoadAdd32 : PseudoAM; +def PseudoAtomicLoadSub32 : PseudoAM; +def PseudoAtomicLoadAnd32 : PseudoAM; +def PseudoAtomicLoadOr32 : PseudoAM; +def PseudoAtomicLoadXor32 : PseudoAM; + +multiclass PseudoBinPat { + def : Pat<(!cast(Op#"_monotonic") GPR:$addr, GPR:$incr), + (BinInst GPR:$addr, GPR:$incr, 2)>; + def : Pat<(!cast(Op#"_acquire") GPR:$addr, GPR:$incr), + (BinInst GPR:$addr, GPR:$incr, 4)>; + def : Pat<(!cast(Op#"_release") GPR:$addr, GPR:$incr), + (BinInst GPR:$addr, GPR:$incr, 5)>; + def : Pat<(!cast(Op#"_acq_rel") GPR:$addr, GPR:$incr), + (BinInst GPR:$addr, GPR:$incr, 6)>; + def : Pat<(!cast(Op#"_seq_cst") GPR:$addr, GPR:$incr), + (BinInst GPR:$addr, GPR:$incr, 7)>; +} + +class PseudoMaskedAMUMinUMax + : Pseudo<(outs GPR:$res, GPR:$scratch1, GPR:$scratch2), + (ins GPR:$addr, GPR:$incr, GPR:$mask, grlenimm:$ordering)> { + let Constraints = "@earlyclobber $res,@earlyclobber $scratch1," + "@earlyclobber $scratch2"; + let mayLoad = 1; + let mayStore = 1; + let hasSideEffects = 0; + let Size = 48; +} + +def PseudoMaskedAtomicLoadUMax32 : PseudoMaskedAMUMinUMax; +def PseudoMaskedAtomicLoadUMin32 : PseudoMaskedAMUMinUMax; + +class PseudoMaskedAMMinMax + : Pseudo<(outs GPR:$res, GPR:$scratch1, GPR:$scratch2), + (ins GPR:$addr, GPR:$incr, GPR:$mask, grlenimm:$sextshamt, + grlenimm:$ordering)> { + let Constraints = "@earlyclobber $res,@earlyclobber $scratch1," + "@earlyclobber $scratch2"; + let mayLoad = 1; + let mayStore = 1; + let hasSideEffects = 0; + let Size = 56; +} + +def PseudoMaskedAtomicLoadMax32 : PseudoMaskedAMMinMax; +def PseudoMaskedAtomicLoadMin32 : PseudoMaskedAMMinMax; + +/// Compare and exchange + +class PseudoCmpXchg + : Pseudo<(outs GPR:$res, GPR:$scratch), + (ins GPR:$addr, GPR:$cmpval, GPR:$newval)> { + let Constraints = "@earlyclobber $res,@earlyclobber $scratch"; + let mayLoad = 1; + let mayStore = 1; + let hasSideEffects = 0; + let Size = 36; +} + +def PseudoCmpXchg32 : PseudoCmpXchg; +def PseudoCmpXchg64 : PseudoCmpXchg; + +def PseudoMaskedCmpXchg32 + : Pseudo<(outs GPR:$res, GPR:$scratch), + (ins GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, + grlenimm:$ordering)> { + let Constraints = "@earlyclobber $res,@earlyclobber $scratch"; + let mayLoad = 1; + let mayStore = 1; + let hasSideEffects = 0; + let Size = 44; +} + +class PseudoMaskedAMMinMaxPat + : Pat<(intrin GPR:$addr, GPR:$incr, GPR:$mask, GPR:$shiftamt, + timm:$ordering), + (AMInst GPR:$addr, GPR:$incr, GPR:$mask, GPR:$shiftamt, + timm:$ordering)>; + +class AtomicPat + : Pat<(intrin GPR:$addr, GPR:$incr, GPR:$mask, timm:$ordering), + (AMInst GPR:$addr, GPR:$incr, GPR:$mask, timm:$ordering)>; + +let Predicates = [IsLA64] in { +def : AtomicPat; +def : Pat<(atomic_swap_32 GPR:$addr, GPR:$incr), + (AMSWAP_DB_W GPR:$incr, GPR:$addr)>; +def : Pat<(atomic_swap_64 GPR:$addr, GPR:$incr), + (AMSWAP_DB_D GPR:$incr, GPR:$addr)>; +def : Pat<(atomic_load_add_64 GPR:$rj, GPR:$rk), + (AMADD_DB_D GPR:$rk, GPR:$rj)>; +def : AtomicPat; +def : Pat<(atomic_load_sub_32 GPR:$rj, GPR:$rk), + (AMADD_DB_W (SUB_W R0, GPR:$rk), GPR:$rj)>; +def : Pat<(atomic_load_sub_64 GPR:$rj, GPR:$rk), + (AMADD_DB_D (SUB_D R0, GPR:$rk), GPR:$rj)>; +def : AtomicPat; +defm : PseudoBinPat<"atomic_load_nand_64", PseudoAtomicLoadNand64>; +def : AtomicPat; +def : Pat<(atomic_load_add_32 GPR:$rj, GPR:$rk), + (AMADD_DB_W GPR:$rk, GPR:$rj)>; +def : Pat<(atomic_load_and_32 GPR:$rj, GPR:$rk), + (AMAND_DB_W GPR:$rk, GPR:$rj)>; +def : Pat<(atomic_load_and_64 GPR:$rj, GPR:$rk), + (AMAND_DB_D GPR:$rk, GPR:$rj)>; +def : Pat<(atomic_load_or_32 GPR:$rj, GPR:$rk), + (AMOR_DB_W GPR:$rk, GPR:$rj)>; +def : Pat<(atomic_load_or_64 GPR:$rj, GPR:$rk), + (AMOR_DB_D GPR:$rk, GPR:$rj)>; +def : Pat<(atomic_load_xor_32 GPR:$rj, GPR:$rk), + (AMXOR_DB_W GPR:$rk, GPR:$rj)>; +def : Pat<(atomic_load_xor_64 GPR:$rj, GPR:$rk), + (AMXOR_DB_D GPR:$rk, GPR:$rj)>; + +def : Pat<(atomic_load_umin_32 GPR:$rj, GPR:$rk), + (AMMIN_DB_WU GPR:$rk, GPR:$rj)>; +def : Pat<(atomic_load_umin_64 GPR:$rj, GPR:$rk), + (AMMIN_DB_DU GPR:$rk, GPR:$rj)>; +def : Pat<(atomic_load_umax_32 GPR:$rj, GPR:$rk), + (AMMAX_DB_WU GPR:$rk, GPR:$rj)>; +def : Pat<(atomic_load_umax_64 GPR:$rj, GPR:$rk), + (AMMAX_DB_DU GPR:$rk, GPR:$rj)>; + +def : Pat<(atomic_load_min_32 GPR:$rj, GPR:$rk), + (AMMIN_DB_W GPR:$rk, GPR:$rj)>; +def : Pat<(atomic_load_min_64 GPR:$rj, GPR:$rk), + (AMMIN_DB_D GPR:$rk, GPR:$rj)>; +def : Pat<(atomic_load_max_32 GPR:$rj, GPR:$rk), + (AMMAX_DB_W GPR:$rk, GPR:$rj)>; +def : Pat<(atomic_load_max_64 GPR:$rj, GPR:$rk), + (AMMAX_DB_D GPR:$rk, GPR:$rj)>; + +def : AtomicPat; +def : AtomicPat; + +def : Pat<(atomic_cmp_swap_64 GPR:$addr, GPR:$cmp, GPR:$new), + (PseudoCmpXchg64 GPR:$addr, GPR:$cmp, GPR:$new)>; +def : Pat<(int_loongarch_masked_cmpxchg_i64 + GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering), + (PseudoMaskedCmpXchg32 + GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering)>; +def : Pat<(atomic_cmp_swap_32 GPR:$addr, GPR:$cmp, GPR:$new), + (PseudoCmpXchg32 GPR:$addr, GPR:$cmp, GPR:$new)>; + +def : PseudoMaskedAMMinMaxPat; +def : PseudoMaskedAMMinMaxPat; +} // Predicates = [IsLA64] + +defm : PseudoBinPat<"atomic_load_nand_32", PseudoAtomicLoadNand32>; + +let Predicates = [IsLA32] in { +def : AtomicPat; +defm : PseudoBinPat<"atomic_swap_32", PseudoAtomicSwap32>; +def : AtomicPat; +def : AtomicPat; +def : AtomicPat; +defm : PseudoBinPat<"atomic_load_add_32", PseudoAtomicLoadAdd32>; +defm : PseudoBinPat<"atomic_load_sub_32", PseudoAtomicLoadSub32>; +defm : PseudoBinPat<"atomic_load_and_32", PseudoAtomicLoadAnd32>; +defm : PseudoBinPat<"atomic_load_or_32", PseudoAtomicLoadOr32>; +defm : PseudoBinPat<"atomic_load_xor_32", PseudoAtomicLoadXor32>; +} // Predicates = [IsLA32] + +/// Intrinsics + +def : Pat<(int_loongarch_cacop_d timm:$op, i64:$rj, timm:$imm12), + (CACOP uimm5:$op, GPR:$rj, simm12:$imm12)>; +def : Pat<(int_loongarch_cacop_w i32:$op, i32:$rj, i32:$imm12), + (CACOP uimm5:$op, GPR:$rj, simm12:$imm12)>; +def : Pat<(loongarch_dbar uimm15:$imm15), (DBAR uimm15:$imm15)>; +def : Pat<(loongarch_ibar uimm15:$imm15), (IBAR uimm15:$imm15)>; +def : Pat<(loongarch_break uimm15:$imm15), (BREAK uimm15:$imm15)>; +def : Pat<(loongarch_syscall uimm15:$imm15), (SYSCALL uimm15:$imm15)>; + +let Predicates = [IsLA64] in { +// CRC Check Instructions +def : PatGprGpr; +def : PatGprGpr; +def : PatGprGpr; +def : PatGprGpr; +def : PatGprGpr; +def : PatGprGpr; +def : PatGprGpr; +def : PatGprGpr; } // Predicates = [IsLA64] /// Other pseudo-instructions @@ -873,6 +1605,41 @@ def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2), def : InstAlias<"nop", (ANDI R0, R0, 0)>; def : InstAlias<"move $dst, $src", (OR GPR:$dst, GPR:$src, R0)>; +// `ret` is supported since binutils commit 20f2e2686c79a5ac (version 2.40 and +// later). +def : InstAlias<"ret", (JIRL R0, R1, 0)>; +def : InstAlias<"jr $rj", (JIRL R0, GPR:$rj, 0)>; + +// Branches implemented with alias. +// Always output the canonical mnemonic for the pseudo branch instructions. +// The GNU tools emit the canonical mnemonic for the branch pseudo instructions +// as well (e.g. "bgt" will be recognised by the assembler but never printed by +// objdump). Match this behaviour by setting a zero weight. +def : InstAlias<"bgt $rj, $rd, $imm16", + (BLT GPR:$rd, GPR:$rj, simm16_lsl2_br:$imm16), 0>; +def : InstAlias<"bgtu $rj, $rd, $imm16", + (BLTU GPR:$rd, GPR:$rj, simm16_lsl2_br:$imm16), 0>; +def : InstAlias<"ble $rj, $rd, $imm16", + (BGE GPR:$rd, GPR:$rj, simm16_lsl2_br:$imm16), 0>; +def : InstAlias<"bleu $rj, $rd, $imm16", + (BGEU GPR:$rd, GPR:$rj, simm16_lsl2_br:$imm16), 0>; +def : InstAlias<"bltz $rd, $imm16", + (BLT GPR:$rd, R0, simm16_lsl2_br:$imm16), 0>; +def : InstAlias<"bgtz $rj, $imm16", + (BLT R0, GPR:$rj, simm16_lsl2_br:$imm16), 0>; +def : InstAlias<"blez $rj, $imm16", + (BGE R0, GPR:$rj, simm16_lsl2_br:$imm16), 0>; +def : InstAlias<"bgez $rd, $imm16", + (BGE GPR:$rd, R0, simm16_lsl2_br:$imm16), 0>; + +// Load immediate. +let hasSideEffects = 0, mayLoad = 0, mayStore = 0, isCodeGenOnly = 0, + isAsmParserOnly = 1 in { +def PseudoLI_W : Pseudo<(outs GPR:$rd), (ins imm32:$imm), [], + "li.w", "$rd, $imm">; +def PseudoLI_D : Pseudo<(outs GPR:$rd), (ins grlenimm:$imm), [], + "li.d", "$rd, $imm">, Requires<[IsLA64]>; +} //===----------------------------------------------------------------------===// // Basic Floating-Point Instructions @@ -881,6 +1648,13 @@ def : InstAlias<"move $dst, $src", (OR GPR:$dst, GPR:$src, R0)>; include "LoongArchFloat32InstrInfo.td" include "LoongArchFloat64InstrInfo.td" +let Predicates = [HasBasicF], usesCustomInserter = 1 in { + def WRFCSR : Pseudo<(outs), (ins uimm2:$fcsr, GPR:$src), + [(loongarch_movgr2fcsr uimm2:$fcsr, GRLenVT:$src)]>; + def RDFCSR : Pseudo<(outs GPR:$rd), (ins uimm2:$fcsr), + [(set GPR:$rd, (loongarch_movfcsr2gr uimm2:$fcsr))]>; +} + //===----------------------------------------------------------------------===// // Privilege Instructions //===----------------------------------------------------------------------===// @@ -908,10 +1682,6 @@ def IOCSRRD_D : IOCSRRD<0b0000011001001000000011, "iocsrrd.d">; def IOCSRWR_D : IOCSRWR<0b0000011001001000000111, "iocsrwr.d">; } // Predicates = [IsLA64] -// Cache Maintenance Instructions -def CACOP : FmtCACOP<(outs), (ins uimm5:$op, GPR:$rj, simm12:$imm12), "cacop", - "$op, $rj, $imm12">; - // TLB Maintenance Instructions def TLBSRCH : FmtI32<0b00000110010010000010100000000000, "tlbsrch">; def TLBRD : FmtI32<0b00000110010010000010110000000000, "tlbrd">; @@ -932,3 +1702,36 @@ def LDPTE : FmtLDPTE<(outs), (ins GPR:$rj, uimm8:$seq), "ldpte", "$rj, $seq">; def ERTN : FmtI32<0b00000110010010000011100000000000, "ertn">; def DBCL : MISC_I15<0b00000000001010101, "dbcl">; def IDLE : MISC_I15<0b00000110010010001, "idle">; + +//===----------------------------------------------------------------------===// +// Privilege Intrinsics +//===----------------------------------------------------------------------===// + +def : Pat<(loongarch_csrrd uimm14:$imm14), (CSRRD uimm14:$imm14)>; +def : Pat<(loongarch_csrwr GPR:$rd, uimm14:$imm14), + (CSRWR GPR:$rd, uimm14:$imm14)>; +def : Pat<(loongarch_csrxchg GPR:$rd, GPR:$rj, uimm14:$imm14), + (CSRXCHG GPR:$rd, GPR:$rj, uimm14:$imm14)>; + +def : Pat<(loongarch_iocsrrd_b GPR:$rj), (IOCSRRD_B GPR:$rj)>; +def : Pat<(loongarch_iocsrrd_h GPR:$rj), (IOCSRRD_H GPR:$rj)>; +def : Pat<(loongarch_iocsrrd_w GPR:$rj), (IOCSRRD_W GPR:$rj)>; + +def : Pat<(loongarch_iocsrwr_b GPR:$rd, GPR:$rj), (IOCSRWR_B GPR:$rd, GPR:$rj)>; +def : Pat<(loongarch_iocsrwr_h GPR:$rd, GPR:$rj), (IOCSRWR_H GPR:$rd, GPR:$rj)>; +def : Pat<(loongarch_iocsrwr_w GPR:$rd, GPR:$rj), (IOCSRWR_W GPR:$rd, GPR:$rj)>; + +def : Pat<(loongarch_cpucfg GPR:$rj), (CPUCFG GPR:$rj)>; + +let Predicates = [IsLA64] in { +def : Pat<(loongarch_iocsrrd_d GPR:$rj), (IOCSRRD_D GPR:$rj)>; +def : Pat<(loongarch_iocsrwr_d GPR:$rd, GPR:$rj), (IOCSRWR_D GPR:$rd, GPR:$rj)>; +def : Pat<(int_loongarch_asrtle_d GPR:$rj, GPR:$rk), + (ASRTLE_D GPR:$rj, GPR:$rk)>; +def : Pat<(int_loongarch_asrtgt_d GPR:$rj, GPR:$rk), + (ASRTGT_D GPR:$rj, GPR:$rk)>; +def : Pat<(int_loongarch_lddir_d GPR:$rj, timm:$imm8), + (LDDIR GPR:$rj, uimm8:$imm8)>; +def : Pat<(int_loongarch_ldpte_d GPR:$rj, timm:$imm8), + (LDPTE GPR:$rj, uimm8:$imm8)>; +} // Predicates = [IsLA64] diff --git a/llvm/lib/Target/LoongArch/LoongArchMCInstLower.cpp b/llvm/lib/Target/LoongArch/LoongArchMCInstLower.cpp index 488c66f47863c1668e517a6fd2627f68f5e21597..64f08e260381279064a9f9ba1dab68487170919e 100644 --- a/llvm/lib/Target/LoongArch/LoongArchMCInstLower.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchMCInstLower.cpp @@ -13,6 +13,8 @@ #include "LoongArch.h" #include "LoongArchSubtarget.h" +#include "MCTargetDesc/LoongArchBaseInfo.h" +#include "MCTargetDesc/LoongArchMCExpr.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineInstr.h" @@ -25,8 +27,52 @@ using namespace llvm; static MCOperand lowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym, const AsmPrinter &AP) { MCContext &Ctx = AP.OutContext; + LoongArchMCExpr::VariantKind Kind; - // TODO: Processing target flags. + switch (MO.getTargetFlags()) { + default: + llvm_unreachable("Unknown target flag on GV operand"); + case LoongArchII::MO_None: + Kind = LoongArchMCExpr::VK_LoongArch_None; + break; + case LoongArchII::MO_CALL: + Kind = LoongArchMCExpr::VK_LoongArch_CALL; + break; + case LoongArchII::MO_CALL_PLT: + Kind = LoongArchMCExpr::VK_LoongArch_CALL_PLT; + break; + case LoongArchII::MO_PCREL_HI: + Kind = LoongArchMCExpr::VK_LoongArch_PCALA_HI20; + break; + case LoongArchII::MO_PCREL_LO: + Kind = LoongArchMCExpr::VK_LoongArch_PCALA_LO12; + break; + case LoongArchII::MO_GOT_PC_HI: + Kind = LoongArchMCExpr::VK_LoongArch_GOT_PC_HI20; + break; + case LoongArchII::MO_GOT_PC_LO: + Kind = LoongArchMCExpr::VK_LoongArch_GOT_PC_LO12; + break; + case LoongArchII::MO_LE_HI: + Kind = LoongArchMCExpr::VK_LoongArch_TLS_LE_HI20; + break; + case LoongArchII::MO_LE_LO: + Kind = LoongArchMCExpr::VK_LoongArch_TLS_LE_LO12; + break; + case LoongArchII::MO_IE_PC_HI: + Kind = LoongArchMCExpr::VK_LoongArch_TLS_IE_PC_HI20; + break; + case LoongArchII::MO_IE_PC_LO: + Kind = LoongArchMCExpr::VK_LoongArch_TLS_IE_PC_LO12; + break; + case LoongArchII::MO_LD_PC_HI: + Kind = LoongArchMCExpr::VK_LoongArch_TLS_LD_PC_HI20; + break; + case LoongArchII::MO_GD_PC_HI: + Kind = LoongArchMCExpr::VK_LoongArch_TLS_GD_PC_HI20; + break; + // TODO: Handle more target-flags. + } const MCExpr *ME = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, Ctx); @@ -35,6 +81,8 @@ static MCOperand lowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym, ME = MCBinaryExpr::createAdd( ME, MCConstantExpr::create(MO.getOffset(), Ctx), Ctx); + if (Kind != LoongArchMCExpr::VK_LoongArch_None) + ME = LoongArchMCExpr::create(ME, Kind, Ctx); return MCOperand::createExpr(ME); } @@ -70,9 +118,12 @@ bool llvm::lowerLoongArchMachineOperandToMCOperand(const MachineOperand &MO, MCOp = lowerSymbolOperand( MO, AP.GetExternalSymbolSymbol(MO.getSymbolName()), AP); break; - // TODO: lower special operands case MachineOperand::MO_BlockAddress: + MCOp = lowerSymbolOperand( + MO, AP.GetBlockAddressSymbol(MO.getBlockAddress()), AP); + break; case MachineOperand::MO_JumpTableIndex: + MCOp = lowerSymbolOperand(MO, AP.GetJTISymbol(MO.getIndex()), AP); break; } return true; diff --git a/llvm/lib/Target/LoongArch/LoongArchMachineFunctionInfo.h b/llvm/lib/Target/LoongArch/LoongArchMachineFunctionInfo.h index d4a6c884bc9d2b2320ed72ab4bf3947419a1ab99..47b021e2f78fa80fd61c42bc24f025cdbfcd71f1 100644 --- a/llvm/lib/Target/LoongArch/LoongArchMachineFunctionInfo.h +++ b/llvm/lib/Target/LoongArch/LoongArchMachineFunctionInfo.h @@ -32,6 +32,10 @@ private: /// Size of stack frame to save callee saved registers unsigned CalleeSavedStackSize = 0; + /// FrameIndex of the spill slot when there is no scavenged register in + /// insertIndirectBranch. + int BranchRelaxationSpillFrameIndex = -1; + public: LoongArchMachineFunctionInfo(const MachineFunction &MF) {} @@ -50,6 +54,13 @@ public: unsigned getCalleeSavedStackSize() const { return CalleeSavedStackSize; } void setCalleeSavedStackSize(unsigned Size) { CalleeSavedStackSize = Size; } + + int getBranchRelaxationSpillFrameIndex() { + return BranchRelaxationSpillFrameIndex; + } + void setBranchRelaxationSpillFrameIndex(int Index) { + BranchRelaxationSpillFrameIndex = Index; + } }; } // end namespace llvm diff --git a/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp index 05902ebb7ba6be84373b2d7da2e87af709458df1..2b667d9045e367f79c39dab4245a405b119b7eae 100644 --- a/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp @@ -13,7 +13,9 @@ #include "LoongArchRegisterInfo.h" #include "LoongArch.h" +#include "LoongArchInstrInfo.h" #include "LoongArchSubtarget.h" +#include "MCTargetDesc/LoongArchMCTargetDesc.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -36,6 +38,8 @@ const MCPhysReg * LoongArchRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { auto &Subtarget = MF->getSubtarget(); + if (MF->getFunction().getCallingConv() == CallingConv::GHC) + return CSR_NoRegs_SaveList; switch (Subtarget.getTargetABI()) { default: llvm_unreachable("Unrecognized ABI"); @@ -56,6 +60,8 @@ LoongArchRegisterInfo::getCallPreservedMask(const MachineFunction &MF, CallingConv::ID CC) const { auto &Subtarget = MF.getSubtarget(); + if (CC == CallingConv::GHC) + return CSR_NoRegs_RegMask; switch (Subtarget.getTargetABI()) { default: llvm_unreachable("Unrecognized ABI"); @@ -92,6 +98,13 @@ LoongArchRegisterInfo::getReservedRegs(const MachineFunction &MF) const { if (TFI->hasBP(MF)) markSuperRegs(Reserved, LoongArchABI::getBPReg()); // bp + // FIXME: To avoid generating COPY instructions between CFRs, only use $fcc0. + // This is required to work around the fact that COPY instruction between CFRs + // is not provided in LoongArch. + if (MF.getSubtarget().hasBasicF()) + for (size_t Reg = LoongArch::FCC1; Reg <= LoongArch::FCC7; ++Reg) + markSuperRegs(Reserved, Reg); + assert(checkAllSuperRegsMarked(Reserved)); return Reserved; } @@ -116,9 +129,18 @@ void LoongArchRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, assert(SPAdj == 0 && "Unexpected non-zero SPAdj value"); MachineInstr &MI = *II; + assert(MI.getOperand(FIOperandNum + 1).isImm() && + "Unexpected FI-consuming insn"); + + MachineBasicBlock &MBB = *MI.getParent(); MachineFunction &MF = *MI.getParent()->getParent(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + const LoongArchSubtarget &STI = MF.getSubtarget(); + const LoongArchInstrInfo *TII = STI.getInstrInfo(); const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); DebugLoc DL = MI.getDebugLoc(); + bool IsLA64 = STI.is64Bit(); + unsigned MIOpc = MI.getOpcode(); int FrameIndex = MI.getOperand(FIOperandNum).getIndex(); Register FrameReg; @@ -126,12 +148,60 @@ void LoongArchRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, TFI->getFrameIndexReference(MF, FrameIndex, FrameReg) + StackOffset::getFixed(MI.getOperand(FIOperandNum + 1).getImm()); - // Offsets must be encodable with a 12-bit immediate field. + bool FrameRegIsKill = false; + if (!isInt<12>(Offset.getFixed())) { - report_fatal_error("Frame offsets outside of the signed 12-bit range is " - "not supported currently"); + unsigned Addi = IsLA64 ? LoongArch::ADDI_D : LoongArch::ADDI_W; + unsigned Add = IsLA64 ? LoongArch::ADD_D : LoongArch::ADD_W; + + // The offset won't fit in an immediate, so use a scratch register instead. + // Modify Offset and FrameReg appropriately. + Register ScratchReg = MRI.createVirtualRegister(&LoongArch::GPRRegClass); + TII->movImm(MBB, II, DL, ScratchReg, Offset.getFixed()); + if (MIOpc == Addi) { + BuildMI(MBB, II, DL, TII->get(Add), MI.getOperand(0).getReg()) + .addReg(FrameReg) + .addReg(ScratchReg, RegState::Kill); + MI.eraseFromParent(); + return; + } + BuildMI(MBB, II, DL, TII->get(Add), ScratchReg) + .addReg(FrameReg) + .addReg(ScratchReg, RegState::Kill); + Offset = StackOffset::getFixed(0); + FrameReg = ScratchReg; + FrameRegIsKill = true; + } + + // Spill CFRs. + if (MIOpc == LoongArch::PseudoST_CFR) { + Register ScratchReg = MRI.createVirtualRegister(&LoongArch::GPRRegClass); + BuildMI(MBB, II, DL, TII->get(LoongArch::MOVCF2GR), ScratchReg) + .add(MI.getOperand(0)); + BuildMI(MBB, II, DL, TII->get(IsLA64 ? LoongArch::ST_D : LoongArch::ST_W)) + .addReg(ScratchReg, RegState::Kill) + .addReg(FrameReg) + .addImm(Offset.getFixed()); + MI.eraseFromParent(); + return; + } + + // Reload CFRs. + if (MIOpc == LoongArch::PseudoLD_CFR) { + Register ScratchReg = MRI.createVirtualRegister(&LoongArch::GPRRegClass); + BuildMI(MBB, II, DL, TII->get(IsLA64 ? LoongArch::LD_D : LoongArch::LD_W), + ScratchReg) + .addReg(FrameReg) + .addImm(Offset.getFixed()); + BuildMI(MBB, II, DL, TII->get(LoongArch::MOVGR2CF)) + .add(MI.getOperand(0)) + .addReg(ScratchReg, RegState::Kill); + MI.eraseFromParent(); + return; } - MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false); + MI.getOperand(FIOperandNum) + .ChangeToRegister(FrameReg, false, false, FrameRegIsKill); MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset.getFixed()); + return; } diff --git a/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.h b/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.h index cca130c3bc3ac06c8bedddde462980e112d77628..56c5c316c4c8547c6fbca310eee9986d9a0e083d 100644 --- a/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.h +++ b/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.h @@ -44,6 +44,14 @@ struct LoongArchRegisterInfo : public LoongArchGenRegisterInfo { RegScavenger *RS = nullptr) const override; Register getFrameRegister(const MachineFunction &MF) const override; + + bool requiresRegisterScavenging(const MachineFunction &MF) const override { + return true; + } + + bool requiresFrameIndexScavenging(const MachineFunction &MF) const override { + return true; + } }; } // end namespace llvm diff --git a/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.td b/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.td index 2d5ad99f61560de0372df69848eadc4721755c8a..4f0e0697667c0999ca3d40fcf9fdb3402327a8fb 100644 --- a/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.td @@ -97,6 +97,16 @@ def GPR : RegisterClass<"LoongArch", [GRLenVT], 32, (add let RegInfos = GRLenRI; } +// GPR for indirect tail calls. We can't use callee-saved registers, as they are +// restored to the saved value before the tail call, which would clobber a call +// address. +def GPRT : RegisterClass<"LoongArch", [GRLenVT], 32, (add + // a0...a7, t0...t8 + (sequence "R%u", 4, 20) + )> { + let RegInfos = GRLenRI; +} + // Floating point registers let RegAltNameIndices = [RegAliasName] in { diff --git a/llvm/lib/Target/LoongArch/LoongArchSubtarget.cpp b/llvm/lib/Target/LoongArch/LoongArchSubtarget.cpp index ff84e7c8cc1fb8841f6f13f326f2dd1ef5e74c18..a0136440ec0ee5013fa10c1657964db284fba821 100644 --- a/llvm/lib/Target/LoongArch/LoongArchSubtarget.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchSubtarget.cpp @@ -12,6 +12,7 @@ #include "LoongArchSubtarget.h" #include "LoongArchFrameLowering.h" +#include "MCTargetDesc/LoongArchBaseInfo.h" using namespace llvm; @@ -27,7 +28,7 @@ LoongArchSubtarget &LoongArchSubtarget::initializeSubtargetDependencies( const Triple &TT, StringRef CPU, StringRef TuneCPU, StringRef FS, StringRef ABIName) { bool Is64Bit = TT.isArch64Bit(); - if (CPU.empty()) + if (CPU.empty() || CPU == "generic") CPU = Is64Bit ? "generic-la64" : "generic-la32"; if (TuneCPU.empty()) @@ -39,8 +40,17 @@ LoongArchSubtarget &LoongArchSubtarget::initializeSubtargetDependencies( GRLen = 64; } - // TODO: ILP32{S,F} LP64{S,F} - TargetABI = Is64Bit ? LoongArchABI::ABI_LP64D : LoongArchABI::ABI_ILP32D; + if (HasLA32 == HasLA64) + report_fatal_error("Please use one feature of 32bit and 64bit."); + + if (Is64Bit && HasLA32) + report_fatal_error("Feature 32bit should be used for loongarch32 target."); + + if (!Is64Bit && HasLA64) + report_fatal_error("Feature 64bit should be used for loongarch64 target."); + + TargetABI = LoongArchABI::computeTargetABI(TT, ABIName); + return *this; } diff --git a/llvm/lib/Target/LoongArch/LoongArchSubtarget.h b/llvm/lib/Target/LoongArch/LoongArchSubtarget.h index fbe7a176b37117f1c44037ec8dabf9084aae9091..4ff42e3b13ea3c28d460bd0b592c3480e264bcee 100644 --- a/llvm/lib/Target/LoongArch/LoongArchSubtarget.h +++ b/llvm/lib/Target/LoongArch/LoongArchSubtarget.h @@ -31,6 +31,7 @@ class StringRef; class LoongArchSubtarget : public LoongArchGenSubtargetInfo { virtual void anchor(); + bool HasLA32 = false; bool HasLA64 = false; bool HasBasicF = false; bool HasBasicD = false; @@ -38,6 +39,10 @@ class LoongArchSubtarget : public LoongArchGenSubtargetInfo { bool HasExtLASX = false; bool HasExtLVZ = false; bool HasExtLBT = false; + bool HasLaGlobalWithPcrel = false; + bool HasLaGlobalWithAbs = false; + bool HasLaLocalWithAbs = false; + bool HasUAL = false; unsigned GRLen = 32; MVT GRLenVT = MVT::i32; LoongArchABI::ABI TargetABI = LoongArchABI::ABI_Unknown; @@ -45,6 +50,7 @@ class LoongArchSubtarget : public LoongArchGenSubtargetInfo { LoongArchInstrInfo InstrInfo; LoongArchRegisterInfo RegInfo; LoongArchTargetLowering TLInfo; + SelectionDAGTargetInfo TSInfo; /// Initializes using the passed in CPU and feature strings so that we can /// use initializer lists for subtarget initialization. @@ -73,6 +79,9 @@ public: const LoongArchTargetLowering *getTargetLowering() const override { return &TLInfo; } + const SelectionDAGTargetInfo *getSelectionDAGInfo() const override { + return &TSInfo; + } bool is64Bit() const { return HasLA64; } bool hasBasicF() const { return HasBasicF; } bool hasBasicD() const { return HasBasicD; } @@ -80,6 +89,10 @@ public: bool hasExtLASX() const { return HasExtLASX; } bool hasExtLVZ() const { return HasExtLVZ; } bool hasExtLBT() const { return HasExtLBT; } + bool hasLaGlobalWithPcrel() const { return HasLaGlobalWithPcrel; } + bool hasLaGlobalWithAbs() const { return HasLaGlobalWithAbs; } + bool hasLaLocalWithAbs() const { return HasLaLocalWithAbs; } + bool hasUAL() const { return HasUAL; } MVT getGRLenVT() const { return GRLenVT; } unsigned getGRLen() const { return GRLen; } LoongArchABI::ABI getTargetABI() const { return TargetABI; } diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp index 7ba5848e099797d6558b14c2b1c14b0da4012dc4..83963bac7c67ac1da99ee7e610ba5a63613313bc 100644 --- a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp @@ -18,6 +18,7 @@ #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/MC/TargetRegistry.h" +#include using namespace llvm; @@ -27,6 +28,8 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeLoongArchTarget() { // Register the target. RegisterTargetMachine X(getTheLoongArch32Target()); RegisterTargetMachine Y(getTheLoongArch64Target()); + auto *PR = PassRegistry::getPassRegistry(); + initializeLoongArchPreRAExpandPseudoPass(*PR); } static std::string computeDataLayout(const Triple &TT) { @@ -102,6 +105,9 @@ public: void addIRPasses() override; bool addInstSelector() override; + void addPreEmitPass() override; + void addPreEmitPass2() override; + void addPreRegAlloc() override; }; } // end namespace @@ -121,3 +127,16 @@ bool LoongArchPassConfig::addInstSelector() { return false; } + +void LoongArchPassConfig::addPreEmitPass() { addPass(&BranchRelaxationPassID); } + +void LoongArchPassConfig::addPreEmitPass2() { + // Schedule the expansion of AtomicPseudos at the last possible moment, + // avoiding the possibility for other passes to break the requirements for + // forward progress in the LL/SC block. + addPass(createLoongArchExpandAtomicPseudoPass()); +} + +void LoongArchPassConfig::addPreRegAlloc() { + addPass(createLoongArchPreRAExpandPseudoPass()); +} diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.h b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.h index cbd872031a324b1eb495004e61e8a9b37c89f13f..4cdec7ec9164d03c7db9e48d670fe62aef7cefb5 100644 --- a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.h +++ b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.h @@ -15,6 +15,7 @@ #include "LoongArchSubtarget.h" #include "llvm/Target/TargetMachine.h" +#include namespace llvm { @@ -26,8 +27,8 @@ public: LoongArchTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, Optional RM, - Optional CM, CodeGenOpt::Level OL, - bool JIT); + Optional CM, + CodeGenOpt::Level OL, bool JIT); ~LoongArchTargetMachine() override; const LoongArchSubtarget *getSubtargetImpl(const Function &F) const override; diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/CMakeLists.txt b/llvm/lib/Target/LoongArch/MCTargetDesc/CMakeLists.txt index 2e1ca69a3e56ea80c6e944d8e85901fd0040e564..6b572a93e38ef1dbd9ec6cb819b4d108aa4f79cc 100644 --- a/llvm/lib/Target/LoongArch/MCTargetDesc/CMakeLists.txt +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/CMakeLists.txt @@ -2,11 +2,14 @@ add_llvm_component_library(LLVMLoongArchDesc LoongArchAsmBackend.cpp LoongArchBaseInfo.cpp LoongArchELFObjectWriter.cpp + LoongArchELFStreamer.cpp LoongArchInstPrinter.cpp LoongArchMCAsmInfo.cpp - LoongArchMCTargetDesc.cpp LoongArchMCCodeEmitter.cpp + LoongArchMCExpr.cpp + LoongArchMCTargetDesc.cpp LoongArchMatInt.cpp + LoongArchTargetStreamer.cpp LINK_COMPONENTS MC diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp index 94a068897f8c50693b96a15e255f7b8dfaecbd4e..31d2795aba40bd101d4010f782069952879a38f4 100644 --- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp @@ -11,10 +11,12 @@ //===----------------------------------------------------------------------===// #include "LoongArchAsmBackend.h" +#include "LoongArchFixupKinds.h" #include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCELFObjectWriter.h" +#include "llvm/MC/MCValue.h" #include "llvm/Support/Endian.h" #include "llvm/Support/EndianStream.h" @@ -22,33 +24,169 @@ using namespace llvm; +Optional +LoongArchAsmBackend::getFixupKind(StringRef Name) const { + if (STI.getTargetTriple().isOSBinFormatELF()) { + auto Type = llvm::StringSwitch(Name) +#define ELF_RELOC(X, Y) .Case(#X, Y) +#include "llvm/BinaryFormat/ELFRelocs/LoongArch.def" +#undef ELF_RELOC + .Case("BFD_RELOC_NONE", ELF::R_LARCH_NONE) + .Case("BFD_RELOC_32", ELF::R_LARCH_32) + .Case("BFD_RELOC_64", ELF::R_LARCH_64) + .Default(-1u); + if (Type != -1u) + return static_cast(FirstLiteralRelocationKind + Type); + } + return None; +} + +const MCFixupKindInfo & +LoongArchAsmBackend::getFixupKindInfo(MCFixupKind Kind) const { + const static MCFixupKindInfo Infos[] = { + // This table *must* be in the order that the fixup_* kinds are defined in + // LoongArchFixupKinds.h. + // + // {name, offset, bits, flags} + {"fixup_loongarch_b16", 10, 16, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_loongarch_b21", 0, 26, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_loongarch_b26", 0, 26, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_loongarch_abs_hi20", 5, 20, 0}, + {"fixup_loongarch_abs_lo12", 10, 12, 0}, + {"fixup_loongarch_abs64_lo20", 5, 20, 0}, + {"fixup_loongarch_abs64_hi12", 10, 12, 0}, + {"fixup_loongarch_tls_le_hi20", 5, 20, 0}, + {"fixup_loongarch_tls_le_lo12", 10, 12, 0}, + {"fixup_loongarch_tls_le64_lo20", 5, 20, 0}, + {"fixup_loongarch_tls_le64_hi12", 10, 12, 0}, + // TODO: Add more fixup kinds. + }; + + static_assert((array_lengthof(Infos)) == LoongArch::NumTargetFixupKinds, + "Not all fixup kinds added to Infos array"); + + // Fixup kinds from .reloc directive are like R_LARCH_NONE. They + // do not require any extra processing. + if (Kind >= FirstLiteralRelocationKind) + return MCAsmBackend::getFixupKindInfo(FK_NONE); + + if (Kind < FirstTargetFixupKind) + return MCAsmBackend::getFixupKindInfo(Kind); + + assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() && + "Invalid kind!"); + return Infos[Kind - FirstTargetFixupKind]; +} + +static void reportOutOfRangeError(MCContext &Ctx, SMLoc Loc, unsigned N) { + Ctx.reportError(Loc, "fixup value out of range [" + Twine(llvm::minIntN(N)) + + ", " + Twine(llvm::maxIntN(N)) + "]"); +} + +static uint64_t adjustFixupValue(const MCFixup &Fixup, uint64_t Value, + MCContext &Ctx) { + switch (Fixup.getTargetKind()) { + default: + llvm_unreachable("Unknown fixup kind"); + case FK_Data_1: + case FK_Data_2: + case FK_Data_4: + case FK_Data_8: + return Value; + case LoongArch::fixup_loongarch_b16: { + if (!isInt<18>(Value)) + reportOutOfRangeError(Ctx, Fixup.getLoc(), 18); + if (Value % 4) + Ctx.reportError(Fixup.getLoc(), "fixup value must be 4-byte aligned"); + return (Value >> 2) & 0xffff; + } + case LoongArch::fixup_loongarch_b21: { + if (!isInt<23>(Value)) + reportOutOfRangeError(Ctx, Fixup.getLoc(), 23); + if (Value % 4) + Ctx.reportError(Fixup.getLoc(), "fixup value must be 4-byte aligned"); + return ((Value & 0x3fffc) << 8) | ((Value >> 18) & 0x1f); + } + case LoongArch::fixup_loongarch_b26: { + if (!isInt<28>(Value)) + reportOutOfRangeError(Ctx, Fixup.getLoc(), 28); + if (Value % 4) + Ctx.reportError(Fixup.getLoc(), "fixup value must be 4-byte aligned"); + return ((Value & 0x3fffc) << 8) | ((Value >> 18) & 0x3ff); + } + case LoongArch::fixup_loongarch_abs_hi20: + case LoongArch::fixup_loongarch_tls_le_hi20: + return (Value >> 12) & 0xfffff; + case LoongArch::fixup_loongarch_abs_lo12: + case LoongArch::fixup_loongarch_tls_le_lo12: + return Value & 0xfff; + case LoongArch::fixup_loongarch_abs64_lo20: + case LoongArch::fixup_loongarch_tls_le64_lo20: + return (Value >> 32) & 0xfffff; + case LoongArch::fixup_loongarch_abs64_hi12: + case LoongArch::fixup_loongarch_tls_le64_hi12: + return (Value >> 52) & 0xfff; + } +} + void LoongArchAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, const MCValue &Target, MutableArrayRef Data, uint64_t Value, bool IsResolved, const MCSubtargetInfo *STI) const { - // TODO: Apply the Value for given Fixup into the provided data fragment. - return; + if (!Value) + return; // Doesn't change encoding. + + MCFixupKind Kind = Fixup.getKind(); + if (Kind >= FirstLiteralRelocationKind) + return; + MCFixupKindInfo Info = getFixupKindInfo(Kind); + MCContext &Ctx = Asm.getContext(); + + // Apply any target-specific value adjustments. + Value = adjustFixupValue(Fixup, Value, Ctx); + + // Shift the value into position. + Value <<= Info.TargetOffset; + + unsigned Offset = Fixup.getOffset(); + unsigned NumBytes = alignTo(Info.TargetSize + Info.TargetOffset, 8) / 8; + + assert(Offset + NumBytes <= Data.size() && "Invalid fixup offset!"); + // For each byte of the fragment that the fixup touches, mask in the + // bits from the fixup value. + for (unsigned I = 0; I != NumBytes; ++I) { + Data[Offset + I] |= uint8_t((Value >> (I * 8)) & 0xff); + } } bool LoongArchAsmBackend::shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup, const MCValue &Target) { - // TODO: Determine which relocation require special processing at linking - // time. - return false; + if (Fixup.getKind() >= FirstLiteralRelocationKind) + return true; + switch (Fixup.getTargetKind()) { + default: + return false; + case FK_Data_1: + case FK_Data_2: + case FK_Data_4: + case FK_Data_8: + return !Target.isAbsolute(); + } } bool LoongArchAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count, const MCSubtargetInfo *STI) const { - // Check for byte count not multiple of instruction word size - if (Count % 4 != 0) - return false; + // We mostly follow binutils' convention here: align to 4-byte boundary with a + // 0-fill padding. + OS.write_zeros(Count % 4); - // The nop on LoongArch is andi r0, r0, 0. + // The remainder is now padded with 4-byte nops. + // nop: andi r0, r0, 0 for (; Count >= 4; Count -= 4) - support::endian::write(OS, 0x03400000, support::little); + OS.write("\0\0\x40\x03", 4); return true; } @@ -64,5 +202,5 @@ MCAsmBackend *llvm::createLoongArchAsmBackend(const Target &T, const MCTargetOptions &Options) { const Triple &TT = STI.getTargetTriple(); uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TT.getOS()); - return new LoongArchAsmBackend(STI, OSABI, TT.isArch64Bit()); + return new LoongArchAsmBackend(STI, OSABI, TT.isArch64Bit(), Options); } diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h index a5f0b816c972cb85a5a10698fd12acdc9c764a89..dae170b5628e8808722284aaeb760827e3adb21a 100644 --- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h @@ -14,6 +14,7 @@ #define LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHASMBACKEND_H #include "MCTargetDesc/LoongArchBaseInfo.h" +#include "MCTargetDesc/LoongArchFixupKinds.h" #include "MCTargetDesc/LoongArchMCTargetDesc.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCFixupKindInfo.h" @@ -22,12 +23,16 @@ namespace llvm { class LoongArchAsmBackend : public MCAsmBackend { + const MCSubtargetInfo &STI; uint8_t OSABI; bool Is64Bit; + const MCTargetOptions &TargetOptions; public: - LoongArchAsmBackend(const MCSubtargetInfo &STI, uint8_t OSABI, bool Is64Bit) - : MCAsmBackend(support::little), OSABI(OSABI), Is64Bit(Is64Bit) {} + LoongArchAsmBackend(const MCSubtargetInfo &STI, uint8_t OSABI, bool Is64Bit, + const MCTargetOptions &Options) + : MCAsmBackend(support::little), STI(STI), OSABI(OSABI), Is64Bit(Is64Bit), + TargetOptions(Options) {} ~LoongArchAsmBackend() override {} void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, @@ -45,10 +50,13 @@ public: } unsigned getNumFixupKinds() const override { - // FIXME: Implement this when we define fixup kind - return 0; + return LoongArch::NumTargetFixupKinds; } + Optional getFixupKind(StringRef Name) const override; + + const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override; + void relaxInstruction(MCInst &Inst, const MCSubtargetInfo &STI) const override {} @@ -57,6 +65,7 @@ public: std::unique_ptr createObjectTargetWriter() const override; + const MCTargetOptions &getTargetOptions() const { return TargetOptions; } }; } // end namespace llvm diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.cpp index de2ba2833414b227ac38ea7908377e8b20dc9a77..28404f04dbdacd6c6c899bfe875a141d65abf3ad 100644 --- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.cpp +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.cpp @@ -15,11 +15,71 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/Triple.h" #include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/raw_ostream.h" namespace llvm { namespace LoongArchABI { +ABI computeTargetABI(const Triple &TT, StringRef ABIName) { + ABI ArgProvidedABI = getTargetABI(ABIName); + bool Is64Bit = TT.isArch64Bit(); + ABI TripleABI; + + // Figure out the ABI explicitly requested via the triple's environment type. + switch (TT.getEnvironment()) { + case llvm::Triple::EnvironmentType::GNUSF: + TripleABI = Is64Bit ? LoongArchABI::ABI_LP64S : LoongArchABI::ABI_ILP32S; + break; + case llvm::Triple::EnvironmentType::GNUF32: + TripleABI = Is64Bit ? LoongArchABI::ABI_LP64F : LoongArchABI::ABI_ILP32F; + break; + + // Let the fallback case behave like {ILP32,LP64}D. + case llvm::Triple::EnvironmentType::GNUF64: + default: + TripleABI = Is64Bit ? LoongArchABI::ABI_LP64D : LoongArchABI::ABI_ILP32D; + break; + } + + switch (ArgProvidedABI) { + case LoongArchABI::ABI_Unknown: + // Fallback to the triple-implied ABI if ABI name is not specified or + // invalid. + if (!ABIName.empty()) + errs() << "'" << ABIName + << "' is not a recognized ABI for this target, ignoring and using " + "triple-implied ABI\n"; + return TripleABI; + + case LoongArchABI::ABI_ILP32S: + case LoongArchABI::ABI_ILP32F: + case LoongArchABI::ABI_ILP32D: + if (Is64Bit) { + errs() << "32-bit ABIs are not supported for 64-bit targets, ignoring " + "target-abi and using triple-implied ABI\n"; + return TripleABI; + } + break; + + case LoongArchABI::ABI_LP64S: + case LoongArchABI::ABI_LP64F: + case LoongArchABI::ABI_LP64D: + if (!Is64Bit) { + errs() << "64-bit ABIs are not supported for 32-bit targets, ignoring " + "target-abi and using triple-implied ABI\n"; + return TripleABI; + } + break; + } + + if (!ABIName.empty() && TT.hasEnvironment() && ArgProvidedABI != TripleABI) + errs() << "warning: triple-implied ABI conflicts with provided target-abi '" + << ABIName << "', using target-abi\n"; + + return ArgProvidedABI; +} + ABI getTargetABI(StringRef ABIName) { auto TargetABI = StringSwitch(ABIName) .Case("ilp32s", ABI_ILP32S) diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h index fee247a0c02c692d0dd278a3ab978a73a3549ff9..cdbd1f5699dfd70dfa6bb6f4bddad130b3b5fd40 100644 --- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h @@ -22,6 +22,27 @@ namespace llvm { +// This namespace holds all of the target specific flags that instruction info +// tracks. +namespace LoongArchII { +enum { + MO_None, + MO_CALL, + MO_CALL_PLT, + MO_PCREL_HI, + MO_PCREL_LO, + MO_GOT_PC_HI, + MO_GOT_PC_LO, + MO_LE_HI, + MO_LE_LO, + MO_IE_PC_HI, + MO_IE_PC_LO, + MO_LD_PC_HI, + MO_GD_PC_HI, + // TODO: Add more flags. +}; +} // end namespace LoongArchII + namespace LoongArchABI { enum ABI { ABI_ILP32S, @@ -33,6 +54,7 @@ enum ABI { ABI_Unknown }; +ABI computeTargetABI(const Triple &TT, StringRef ABIName); ABI getTargetABI(StringRef ABIName); // Returns the register used to hold the stack pointer after realignment. diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp index 1850b0d8a756cf0fb497b140e779b960759bdc73..a6b9c0652639fbcc7c95e7ab70ff8990957e2f9d 100644 --- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp @@ -6,7 +6,9 @@ // //===----------------------------------------------------------------------===// +#include "MCTargetDesc/LoongArchFixupKinds.h" #include "MCTargetDesc/LoongArchMCTargetDesc.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCFixup.h" @@ -22,13 +24,6 @@ public: ~LoongArchELFObjectWriter() override; - // Return true if the given relocation must be with a symbol rather than - // section plus offset. - bool needsRelocateWithSymbol(const MCSymbol &Sym, - unsigned Type) const override { - return true; - } - protected: unsigned getRelocType(MCContext &Ctx, const MCValue &Target, const MCFixup &Fixup, bool IsPCRel) const override; @@ -52,9 +47,42 @@ unsigned LoongArchELFObjectWriter::getRelocType(MCContext &Ctx, return Kind - FirstLiteralRelocationKind; switch (Kind) { - // TODO: Implement this when we defined fixup kind. default: + Ctx.reportError(Fixup.getLoc(), "Unsupported relocation type"); + return ELF::R_LARCH_NONE; + case FK_Data_1: + Ctx.reportError(Fixup.getLoc(), "1-byte data relocations not supported"); + return ELF::R_LARCH_NONE; + case FK_Data_2: + Ctx.reportError(Fixup.getLoc(), "2-byte data relocations not supported"); return ELF::R_LARCH_NONE; + case FK_Data_4: + return IsPCRel ? ELF::R_LARCH_32_PCREL : ELF::R_LARCH_32; + case FK_Data_8: + return IsPCRel ? ELF::R_LARCH_64_PCREL : ELF::R_LARCH_64; + case LoongArch::fixup_loongarch_b16: + return ELF::R_LARCH_B16; + case LoongArch::fixup_loongarch_b21: + return ELF::R_LARCH_B21; + case LoongArch::fixup_loongarch_b26: + return ELF::R_LARCH_B26; + case LoongArch::fixup_loongarch_abs_hi20: + return ELF::R_LARCH_ABS_HI20; + case LoongArch::fixup_loongarch_abs_lo12: + return ELF::R_LARCH_ABS_LO12; + case LoongArch::fixup_loongarch_abs64_lo20: + return ELF::R_LARCH_ABS64_LO20; + case LoongArch::fixup_loongarch_abs64_hi12: + return ELF::R_LARCH_ABS64_HI12; + case LoongArch::fixup_loongarch_tls_le_hi20: + return ELF::R_LARCH_TLS_LE_HI20; + case LoongArch::fixup_loongarch_tls_le_lo12: + return ELF::R_LARCH_TLS_LE_LO12; + case LoongArch::fixup_loongarch_tls_le64_lo20: + return ELF::R_LARCH_TLS_LE64_LO20; + case LoongArch::fixup_loongarch_tls_le64_hi12: + return ELF::R_LARCH_TLS_LE64_HI12; + // TODO: Handle more fixup-kinds. } } diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFStreamer.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFStreamer.cpp new file mode 100644 index 0000000000000000000000000000000000000000..a6e15e09463d261fa5d612803d483faccb7b7118 --- /dev/null +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFStreamer.cpp @@ -0,0 +1,92 @@ +//===-- LoongArchELFStreamer.cpp - LoongArch ELF Target Streamer Methods --===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file provides LoongArch specific target streamer methods. +// +//===----------------------------------------------------------------------===// + +#include "LoongArchELFStreamer.h" +#include "LoongArchAsmBackend.h" +#include "LoongArchBaseInfo.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCObjectWriter.h" + +using namespace llvm; + +// This part is for ELF object output. +LoongArchTargetELFStreamer::LoongArchTargetELFStreamer( + MCStreamer &S, const MCSubtargetInfo &STI) + : LoongArchTargetStreamer(S) { + auto &MAB = static_cast( + getStreamer().getAssembler().getBackend()); + setTargetABI(LoongArchABI::computeTargetABI( + STI.getTargetTriple(), MAB.getTargetOptions().getABIName())); +} + +MCELFStreamer &LoongArchTargetELFStreamer::getStreamer() { + return static_cast(Streamer); +} + +void LoongArchTargetELFStreamer::finish() { + LoongArchTargetStreamer::finish(); + MCAssembler &MCA = getStreamer().getAssembler(); + LoongArchABI::ABI ABI = getTargetABI(); + + // Figure out the e_flags. + // + // Bitness is already represented with the EI_CLASS byte in the current spec, + // so here we only record the base ABI modifier. Also set the object file ABI + // version to v1, as upstream LLVM cannot handle the previous stack-machine- + // based relocs from day one. + // + // Refer to LoongArch ELF psABI v2.01 for details. + unsigned EFlags = MCA.getELFHeaderEFlags(); + EFlags |= ELF::EF_LOONGARCH_OBJABI_V1; + switch (ABI) { + case LoongArchABI::ABI_ILP32S: + case LoongArchABI::ABI_LP64S: + EFlags |= ELF::EF_LOONGARCH_ABI_SOFT_FLOAT; + break; + case LoongArchABI::ABI_ILP32F: + case LoongArchABI::ABI_LP64F: + EFlags |= ELF::EF_LOONGARCH_ABI_SINGLE_FLOAT; + break; + case LoongArchABI::ABI_ILP32D: + case LoongArchABI::ABI_LP64D: + EFlags |= ELF::EF_LOONGARCH_ABI_DOUBLE_FLOAT; + break; + case LoongArchABI::ABI_Unknown: + llvm_unreachable("Improperly initialized target ABI"); + } + MCA.setELFHeaderEFlags(EFlags); +} + +namespace { +class LoongArchELFStreamer : public MCELFStreamer { +public: + LoongArchELFStreamer(MCContext &C, std::unique_ptr MAB, + std::unique_ptr MOW, + std::unique_ptr MCE) + : MCELFStreamer(C, std::move(MAB), std::move(MOW), std::move(MCE)) {} +}; +} // end namespace + +namespace llvm { +MCELFStreamer *createLoongArchELFStreamer(MCContext &C, + std::unique_ptr MAB, + std::unique_ptr MOW, + std::unique_ptr MCE, + bool RelaxAll) { + LoongArchELFStreamer *S = new LoongArchELFStreamer( + C, std::move(MAB), std::move(MOW), std::move(MCE)); + S->getAssembler().setRelaxAll(RelaxAll); + return S; +} +} // end namespace llvm diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFStreamer.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFStreamer.h new file mode 100644 index 0000000000000000000000000000000000000000..220b54092c72a113f1ecb5918f9ea1cc7795fc08 --- /dev/null +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFStreamer.h @@ -0,0 +1,31 @@ +//==-- LoongArchELFStreamer.h - LoongArch ELF Target Streamer --*- C++ -*--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHELFSTREAMER_H +#define LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHELFSTREAMER_H + +#include "LoongArchTargetStreamer.h" +#include "llvm/MC/MCELFStreamer.h" + +namespace llvm { + +class LoongArchTargetELFStreamer : public LoongArchTargetStreamer { +public: + MCELFStreamer &getStreamer(); + LoongArchTargetELFStreamer(MCStreamer &S, const MCSubtargetInfo &STI); + + void finish() override; +}; + +MCELFStreamer *createLoongArchELFStreamer(MCContext &C, + std::unique_ptr MAB, + std::unique_ptr MOW, + std::unique_ptr MCE, + bool RelaxAll); +} // end namespace llvm +#endif diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchFixupKinds.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchFixupKinds.h new file mode 100644 index 0000000000000000000000000000000000000000..ba2d6718cdf9a27ec3bf587a495fbf7009c792c8 --- /dev/null +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchFixupKinds.h @@ -0,0 +1,114 @@ +//===- LoongArchFixupKinds.h - LoongArch Specific Fixup Entries -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHFIXUPKINDS_H +#define LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHFIXUPKINDS_H + +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/MC/MCFixup.h" + +#undef LoongArch + +namespace llvm { +namespace LoongArch { +// +// This table *must* be in the same order of +// MCFixupKindInfo Infos[LoongArch::NumTargetFixupKinds] in +// LoongArchAsmBackend.cpp. +// +enum Fixups { + // Define fixups can be handled by LoongArchAsmBackend::applyFixup. + // 16-bit fixup corresponding to %b16(foo) for instructions like bne. + fixup_loongarch_b16 = FirstTargetFixupKind, + // 21-bit fixup corresponding to %b21(foo) for instructions like bnez. + fixup_loongarch_b21, + // 26-bit fixup corresponding to %b26(foo)/%plt(foo) for instructions b/bl. + fixup_loongarch_b26, + // 20-bit fixup corresponding to %abs_hi20(foo) for instruction lu12i.w. + fixup_loongarch_abs_hi20, + // 12-bit fixup corresponding to %abs_lo12(foo) for instruction ori. + fixup_loongarch_abs_lo12, + // 20-bit fixup corresponding to %abs64_lo20(foo) for instruction lu32i.d. + fixup_loongarch_abs64_lo20, + // 12-bit fixup corresponding to %abs_hi12(foo) for instruction lu52i.d. + fixup_loongarch_abs64_hi12, + // 20-bit fixup corresponding to %le_hi20(foo) for instruction lu12i.w. + fixup_loongarch_tls_le_hi20, + // 12-bit fixup corresponding to %le_lo12(foo) for instruction ori. + fixup_loongarch_tls_le_lo12, + // 20-bit fixup corresponding to %le64_lo20(foo) for instruction lu32i.d. + fixup_loongarch_tls_le64_lo20, + // 12-bit fixup corresponding to %le64_hi12(foo) for instruction lu52i.d. + fixup_loongarch_tls_le64_hi12, + // TODO: Add more fixup kind. + + // Used as a sentinel, must be the last of the fixup which can be handled by + // LoongArchAsmBackend::applyFixup. + fixup_loongarch_invalid, + NumTargetFixupKinds = fixup_loongarch_invalid - FirstTargetFixupKind, + + // Define fixups for force relocation as FirstLiteralRelocationKind+V + // represents the relocation type with number V. + // 20-bit fixup corresponding to %pc_hi20(foo) for instruction pcalau12i. + fixup_loongarch_pcala_hi20 = + FirstLiteralRelocationKind + ELF::R_LARCH_PCALA_HI20, + // 12-bit fixup corresponding to %pc_lo12(foo) for instructions like addi.w/d. + fixup_loongarch_pcala_lo12, + // 20-bit fixup corresponding to %pc64_lo20(foo) for instruction lu32i.d. + fixup_loongarch_pcala64_lo20, + // 12-bit fixup corresponding to %pc64_hi12(foo) for instruction lu52i.d. + fixup_loongarch_pcala64_hi12, + // 20-bit fixup corresponding to %got_pc_hi20(foo) for instruction pcalau12i. + fixup_loongarch_got_pc_hi20, + // 12-bit fixup corresponding to %got_pc_lo12(foo) for instructions + // ld.w/ld.d/add.d. + fixup_loongarch_got_pc_lo12, + // 20-bit fixup corresponding to %got64_pc_lo20(foo) for instruction lu32i.d. + fixup_loongarch_got64_pc_lo20, + // 12-bit fixup corresponding to %got64_pc_hi12(foo) for instruction lu52i.d. + fixup_loongarch_got64_pc_hi12, + // 20-bit fixup corresponding to %got_hi20(foo) for instruction lu12i.w. + fixup_loongarch_got_hi20, + // 12-bit fixup corresponding to %got_lo12(foo) for instruction ori. + fixup_loongarch_got_lo12, + // 20-bit fixup corresponding to %got64_lo20(foo) for instruction lu32i.d. + fixup_loongarch_got64_lo20, + // 12-bit fixup corresponding to %got64_hi12(foo) for instruction lu52i.d. + fixup_loongarch_got64_hi12, + // Skip R_LARCH_TLS_LE_*. + // 20-bit fixup corresponding to %ie_pc_hi20(foo) for instruction pcalau12i. + fixup_loongarch_tls_ie_pc_hi20 = + FirstLiteralRelocationKind + ELF::R_LARCH_TLS_IE_PC_HI20, + // 12-bit fixup corresponding to %ie_pc_lo12(foo) for instructions + // ld.w/ld.d/add.d. + fixup_loongarch_tls_ie_pc_lo12, + // 20-bit fixup corresponding to %ie64_pc_lo20(foo) for instruction lu32i.d. + fixup_loongarch_tls_ie64_pc_lo20, + // 12-bit fixup corresponding to %ie64_pc_hi12(foo) for instruction lu52i.d. + fixup_loongarch_tls_ie64_pc_hi12, + // 20-bit fixup corresponding to %ie_hi20(foo) for instruction lu12i.w. + fixup_loongarch_tls_ie_hi20, + // 12-bit fixup corresponding to %ie_lo12(foo) for instruction ori. + fixup_loongarch_tls_ie_lo12, + // 20-bit fixup corresponding to %ie64_lo20(foo) for instruction lu32i.d. + fixup_loongarch_tls_ie64_lo20, + // 12-bit fixup corresponding to %ie64_hi12(foo) for instruction lu52i.d. + fixup_loongarch_tls_ie64_hi12, + // 20-bit fixup corresponding to %ld_pc_hi20(foo) for instruction pcalau12i. + fixup_loongarch_tls_ld_pc_hi20, + // 20-bit fixup corresponding to %ld_hi20(foo) for instruction lu12i.w. + fixup_loongarch_tls_ld_hi20, + // 20-bit fixup corresponding to %gd_pc_hi20(foo) for instruction pcalau12i. + fixup_loongarch_tls_gd_pc_hi20, + // 20-bit fixup corresponding to %gd_hi20(foo) for instruction lu12i.w. + fixup_loongarch_tls_gd_hi20 +}; +} // end namespace LoongArch +} // end namespace llvm + +#endif diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchInstPrinter.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchInstPrinter.cpp index 66183868f4681c82b1c475612cc6f4b1a87f0f08..27f70c38d12b1a305ef1a60ca2d9c9d47b906735 100644 --- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchInstPrinter.cpp +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchInstPrinter.cpp @@ -12,11 +12,13 @@ #include "LoongArchInstPrinter.h" #include "LoongArchBaseInfo.h" +#include "LoongArchMCTargetDesc.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCSymbol.h" +#include "llvm/Support/CommandLine.h" using namespace llvm; #define DEBUG_TYPE "loongarch-asm-printer" @@ -25,6 +27,26 @@ using namespace llvm; #define PRINT_ALIAS_INSTR #include "LoongArchGenAsmWriter.inc" +static cl::opt + NumericReg("loongarch-numeric-reg", + cl::desc("Print numeric register names rather than the ABI " + "names (such as $r0 instead of $zero)"), + cl::init(false), cl::Hidden); + +// The command-line flag above is used by llvm-mc and llc. It can be used by +// `llvm-objdump`, but we override the value here to handle options passed to +// `llvm-objdump` with `-M` (which matches GNU objdump). There did not seem to +// be an easier way to allow these options in all these tools, without doing it +// this way. +bool LoongArchInstPrinter::applyTargetSpecificCLOption(StringRef Opt) { + if (Opt == "numeric") { + NumericReg = true; + return true; + } + + return false; +} + void LoongArchInstPrinter::printInst(const MCInst *MI, uint64_t Address, StringRef Annot, const MCSubtargetInfo &STI, @@ -34,8 +56,8 @@ void LoongArchInstPrinter::printInst(const MCInst *MI, uint64_t Address, printAnnotation(O, Annot); } -void LoongArchInstPrinter::printRegName(raw_ostream &O, unsigned RegNo) const { - O << '$' << getRegisterName(RegNo); +void LoongArchInstPrinter::printRegName(raw_ostream &O, unsigned Reg) const { + O << '$' << getRegisterName(Reg); } void LoongArchInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, @@ -57,7 +79,16 @@ void LoongArchInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, MO.getExpr()->print(O, &MAI); } -const char *LoongArchInstPrinter::getRegisterName(unsigned RegNo) { +void LoongArchInstPrinter::printAtomicMemOp(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, + raw_ostream &O) { + const MCOperand &MO = MI->getOperand(OpNo); + assert(MO.isReg() && "printAtomicMemOp can only print register operands"); + printRegName(O, MO.getReg()); +} + +const char *LoongArchInstPrinter::getRegisterName(unsigned Reg) { // Default print reg alias name - return getRegisterName(RegNo, LoongArch::RegAliasName); + return getRegisterName(Reg, NumericReg ? LoongArch::NoRegAltName + : LoongArch::RegAliasName); } diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchInstPrinter.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchInstPrinter.h index 0cbb3d73cd03b8ce446018f2eef8659edb7bccbb..834019ad9316b496e93bd948cd1a40e31da82021 100644 --- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchInstPrinter.h +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchInstPrinter.h @@ -24,9 +24,13 @@ public: const MCRegisterInfo &MRI) : MCInstPrinter(MAI, MII, MRI) {} + bool applyTargetSpecificCLOption(StringRef Opt) override; + void printInst(const MCInst *MI, uint64_t Address, StringRef Annot, const MCSubtargetInfo &STI, raw_ostream &O) override; - void printRegName(raw_ostream &O, unsigned RegNo) const override; + void printRegName(raw_ostream &O, unsigned Reg) const override; + void printAtomicMemOp(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O); // Autogenerated by tblgen. std::pair getMnemonic(const MCInst *MI) override; @@ -37,8 +41,8 @@ public: void printCustomAliasOperand(const MCInst *MI, uint64_t Address, unsigned OpIdx, unsigned PrintMethodIdx, const MCSubtargetInfo &STI, raw_ostream &O); - static const char *getRegisterName(unsigned RegNo); - static const char *getRegisterName(unsigned RegNo, unsigned AltIdx); + static const char *getRegisterName(unsigned Reg); + static const char *getRegisterName(unsigned Reg, unsigned AltIdx); private: void printOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp index 01a370a90403c96c2729e73f605b0d00beec6fb2..4587d59087f3220f6a42f4b8deb5959f19c0206b 100644 --- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp @@ -10,13 +10,16 @@ // //===----------------------------------------------------------------------===// +#include "LoongArchFixupKinds.h" #include "MCTargetDesc/LoongArchBaseInfo.h" +#include "MCTargetDesc/LoongArchMCExpr.h" #include "MCTargetDesc/LoongArchMCTargetDesc.h" #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCInstBuilder.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/EndianStream.h" using namespace llvm; @@ -68,6 +71,10 @@ public: unsigned getImmOpValueAsr2(const MCInst &MI, unsigned OpNo, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const; + + unsigned getExprOpValue(const MCInst &MI, const MCOperand &MO, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; }; } // end namespace @@ -82,7 +89,9 @@ LoongArchMCCodeEmitter::getMachineOpValue(const MCInst &MI, const MCOperand &MO, if (MO.isImm()) return static_cast(MO.getImm()); - llvm_unreachable("Unhandled expression!"); + // MO must be an Expr. + assert(MO.isExpr()); + return getExprOpValue(MI, MO, Fixups, STI); } unsigned @@ -96,9 +105,172 @@ unsigned LoongArchMCCodeEmitter::getImmOpValueAsr2(const MCInst &MI, unsigned OpNo, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const { - unsigned Res = MI.getOperand(OpNo).getImm(); - assert((Res & 3) == 0 && "lowest 2 bits are non-zero"); - return Res >> 2; + const MCOperand &MO = MI.getOperand(OpNo); + + if (MO.isImm()) { + unsigned Res = MI.getOperand(OpNo).getImm(); + assert((Res & 3) == 0 && "lowest 2 bits are non-zero"); + return Res >> 2; + } + + return getExprOpValue(MI, MO, Fixups, STI); +} + +unsigned +LoongArchMCCodeEmitter::getExprOpValue(const MCInst &MI, const MCOperand &MO, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + assert(MO.isExpr() && "getExprOpValue expects only expressions"); + const MCExpr *Expr = MO.getExpr(); + MCExpr::ExprKind Kind = Expr->getKind(); + LoongArch::Fixups FixupKind = LoongArch::fixup_loongarch_invalid; + if (Kind == MCExpr::Target) { + const LoongArchMCExpr *LAExpr = cast(Expr); + + switch (LAExpr->getKind()) { + case LoongArchMCExpr::VK_LoongArch_None: + case LoongArchMCExpr::VK_LoongArch_Invalid: + llvm_unreachable("Unhandled fixup kind!"); + case LoongArchMCExpr::VK_LoongArch_B16: + FixupKind = LoongArch::fixup_loongarch_b16; + break; + case LoongArchMCExpr::VK_LoongArch_B21: + FixupKind = LoongArch::fixup_loongarch_b21; + break; + case LoongArchMCExpr::VK_LoongArch_B26: + case LoongArchMCExpr::VK_LoongArch_CALL: + case LoongArchMCExpr::VK_LoongArch_CALL_PLT: + FixupKind = LoongArch::fixup_loongarch_b26; + break; + case LoongArchMCExpr::VK_LoongArch_ABS_HI20: + FixupKind = LoongArch::fixup_loongarch_abs_hi20; + break; + case LoongArchMCExpr::VK_LoongArch_ABS_LO12: + FixupKind = LoongArch::fixup_loongarch_abs_lo12; + break; + case LoongArchMCExpr::VK_LoongArch_ABS64_LO20: + FixupKind = LoongArch::fixup_loongarch_abs64_lo20; + break; + case LoongArchMCExpr::VK_LoongArch_ABS64_HI12: + FixupKind = LoongArch::fixup_loongarch_abs64_hi12; + break; + case LoongArchMCExpr::VK_LoongArch_PCALA_HI20: + FixupKind = LoongArch::fixup_loongarch_pcala_hi20; + break; + case LoongArchMCExpr::VK_LoongArch_PCALA_LO12: + FixupKind = LoongArch::fixup_loongarch_pcala_lo12; + break; + case LoongArchMCExpr::VK_LoongArch_PCALA64_LO20: + FixupKind = LoongArch::fixup_loongarch_pcala64_lo20; + break; + case LoongArchMCExpr::VK_LoongArch_PCALA64_HI12: + FixupKind = LoongArch::fixup_loongarch_pcala64_hi12; + break; + case LoongArchMCExpr::VK_LoongArch_GOT_PC_HI20: + FixupKind = LoongArch::fixup_loongarch_got_pc_hi20; + break; + case LoongArchMCExpr::VK_LoongArch_GOT_PC_LO12: + FixupKind = LoongArch::fixup_loongarch_got_pc_lo12; + break; + case LoongArchMCExpr::VK_LoongArch_GOT64_PC_LO20: + FixupKind = LoongArch::fixup_loongarch_got64_pc_lo20; + break; + case LoongArchMCExpr::VK_LoongArch_GOT64_PC_HI12: + FixupKind = LoongArch::fixup_loongarch_got64_pc_hi12; + break; + case LoongArchMCExpr::VK_LoongArch_GOT_HI20: + FixupKind = LoongArch::fixup_loongarch_got_hi20; + break; + case LoongArchMCExpr::VK_LoongArch_GOT_LO12: + FixupKind = LoongArch::fixup_loongarch_got_lo12; + break; + case LoongArchMCExpr::VK_LoongArch_GOT64_LO20: + FixupKind = LoongArch::fixup_loongarch_got64_lo20; + break; + case LoongArchMCExpr::VK_LoongArch_GOT64_HI12: + FixupKind = LoongArch::fixup_loongarch_got64_hi12; + break; + case LoongArchMCExpr::VK_LoongArch_TLS_LE_HI20: + FixupKind = LoongArch::fixup_loongarch_tls_le_hi20; + break; + case LoongArchMCExpr::VK_LoongArch_TLS_LE_LO12: + FixupKind = LoongArch::fixup_loongarch_tls_le_lo12; + break; + case LoongArchMCExpr::VK_LoongArch_TLS_LE64_LO20: + FixupKind = LoongArch::fixup_loongarch_tls_le64_lo20; + break; + case LoongArchMCExpr::VK_LoongArch_TLS_LE64_HI12: + FixupKind = LoongArch::fixup_loongarch_tls_le64_hi12; + break; + case LoongArchMCExpr::VK_LoongArch_TLS_IE_PC_HI20: + FixupKind = LoongArch::fixup_loongarch_tls_ie_pc_hi20; + break; + case LoongArchMCExpr::VK_LoongArch_TLS_IE_PC_LO12: + FixupKind = LoongArch::fixup_loongarch_tls_ie_pc_lo12; + break; + case LoongArchMCExpr::VK_LoongArch_TLS_IE64_PC_LO20: + FixupKind = LoongArch::fixup_loongarch_tls_ie64_pc_lo20; + break; + case LoongArchMCExpr::VK_LoongArch_TLS_IE64_PC_HI12: + FixupKind = LoongArch::fixup_loongarch_tls_ie64_pc_hi12; + break; + case LoongArchMCExpr::VK_LoongArch_TLS_IE_HI20: + FixupKind = LoongArch::fixup_loongarch_tls_ie_hi20; + break; + case LoongArchMCExpr::VK_LoongArch_TLS_IE_LO12: + FixupKind = LoongArch::fixup_loongarch_tls_ie_lo12; + break; + case LoongArchMCExpr::VK_LoongArch_TLS_IE64_LO20: + FixupKind = LoongArch::fixup_loongarch_tls_ie64_lo20; + break; + case LoongArchMCExpr::VK_LoongArch_TLS_IE64_HI12: + FixupKind = LoongArch::fixup_loongarch_tls_ie64_hi12; + break; + case LoongArchMCExpr::VK_LoongArch_TLS_LD_PC_HI20: + FixupKind = LoongArch::fixup_loongarch_tls_ld_pc_hi20; + break; + case LoongArchMCExpr::VK_LoongArch_TLS_LD_HI20: + FixupKind = LoongArch::fixup_loongarch_tls_ld_hi20; + break; + case LoongArchMCExpr::VK_LoongArch_TLS_GD_PC_HI20: + FixupKind = LoongArch::fixup_loongarch_tls_gd_pc_hi20; + break; + case LoongArchMCExpr::VK_LoongArch_TLS_GD_HI20: + FixupKind = LoongArch::fixup_loongarch_tls_gd_hi20; + break; + } + } else if (Kind == MCExpr::SymbolRef && + cast(Expr)->getKind() == + MCSymbolRefExpr::VK_None) { + switch (MI.getOpcode()) { + default: + break; + case LoongArch::BEQ: + case LoongArch::BNE: + case LoongArch::BLT: + case LoongArch::BGE: + case LoongArch::BLTU: + case LoongArch::BGEU: + FixupKind = LoongArch::fixup_loongarch_b16; + break; + case LoongArch::BEQZ: + case LoongArch::BNEZ: + case LoongArch::BCEQZ: + case LoongArch::BCNEZ: + FixupKind = LoongArch::fixup_loongarch_b21; + break; + case LoongArch::B: + FixupKind = LoongArch::fixup_loongarch_b26; + break; + } + } + + assert(FixupKind != LoongArch::fixup_loongarch_invalid && + "Unhandled expression!"); + + Fixups.push_back( + MCFixup::create(0, Expr, MCFixupKind(FixupKind), MI.getLoc())); + return 0; } void LoongArchMCCodeEmitter::encodeInstruction( diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.cpp new file mode 100644 index 0000000000000000000000000000000000000000..993111552a31430cd3111efae969680e6bb7e4aa --- /dev/null +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.cpp @@ -0,0 +1,225 @@ +//===-- LoongArchMCExpr.cpp - LoongArch specific MC expression classes ----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains the implementation of the assembly expression modifiers +// accepted by the LoongArch architecture. +// +//===----------------------------------------------------------------------===// + +#include "LoongArchMCExpr.h" +#include "LoongArchAsmBackend.h" +#include "LoongArchFixupKinds.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbolELF.h" +#include "llvm/MC/MCValue.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/ErrorHandling.h" + +using namespace llvm; + +#define DEBUG_TYPE "loongarch-mcexpr" + +const LoongArchMCExpr * +LoongArchMCExpr::create(const MCExpr *Expr, VariantKind Kind, MCContext &Ctx) { + return new (Ctx) LoongArchMCExpr(Expr, Kind); +} + +void LoongArchMCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const { + VariantKind Kind = getKind(); + bool HasVariant = + ((Kind != VK_LoongArch_None) && (Kind != VK_LoongArch_CALL)); + + if (HasVariant) + OS << '%' << getVariantKindName(getKind()) << '('; + Expr->print(OS, MAI); + if (HasVariant) + OS << ')'; +} + +bool LoongArchMCExpr::evaluateAsRelocatableImpl(MCValue &Res, + const MCAsmLayout *Layout, + const MCFixup *Fixup) const { + // Explicitly drop the layout and assembler to prevent any symbolic folding in + // the expression handling. This is required to preserve symbolic difference + // expressions to emit the paired relocations. + if (!getSubExpr()->evaluateAsRelocatable(Res, nullptr, nullptr)) + return false; + + Res = + MCValue::get(Res.getSymA(), Res.getSymB(), Res.getConstant(), getKind()); + // Custom fixup types are not valid with symbol difference expressions. + return Res.getSymB() ? getKind() == VK_LoongArch_None : true; +} + +void LoongArchMCExpr::visitUsedExpr(MCStreamer &Streamer) const { + Streamer.visitUsedExpr(*getSubExpr()); +} + +StringRef LoongArchMCExpr::getVariantKindName(VariantKind Kind) { + switch (Kind) { + default: + llvm_unreachable("Invalid ELF symbol kind"); + case VK_LoongArch_CALL_PLT: + return "plt"; + case VK_LoongArch_B16: + return "b16"; + case VK_LoongArch_B21: + return "b21"; + case VK_LoongArch_B26: + return "b21"; + case VK_LoongArch_ABS_HI20: + return "abs_hi20"; + case VK_LoongArch_ABS_LO12: + return "abs_lo12"; + case VK_LoongArch_ABS64_LO20: + return "abs64_lo20"; + case VK_LoongArch_ABS64_HI12: + return "abs64_hi12"; + case VK_LoongArch_PCALA_HI20: + return "pc_hi20"; + case VK_LoongArch_PCALA_LO12: + return "pc_lo12"; + case VK_LoongArch_PCALA64_LO20: + return "pc64_lo20"; + case VK_LoongArch_PCALA64_HI12: + return "pc64_hi12"; + case VK_LoongArch_GOT_PC_HI20: + return "got_pc_hi20"; + case VK_LoongArch_GOT_PC_LO12: + return "got_pc_lo12"; + case VK_LoongArch_GOT64_PC_LO20: + return "got64_pc_lo20"; + case VK_LoongArch_GOT64_PC_HI12: + return "got64_pc_hi12"; + case VK_LoongArch_GOT_HI20: + return "got_hi20"; + case VK_LoongArch_GOT_LO12: + return "got_lo12"; + case VK_LoongArch_GOT64_LO20: + return "got64_lo20"; + case VK_LoongArch_GOT64_HI12: + return "got64_hi12"; + case VK_LoongArch_TLS_LE_HI20: + return "le_hi20"; + case VK_LoongArch_TLS_LE_LO12: + return "le_lo12"; + case VK_LoongArch_TLS_LE64_LO20: + return "le64_lo20"; + case VK_LoongArch_TLS_LE64_HI12: + return "le64_hi12"; + case VK_LoongArch_TLS_IE_PC_HI20: + return "ie_pc_hi20"; + case VK_LoongArch_TLS_IE_PC_LO12: + return "ie_pc_lo12"; + case VK_LoongArch_TLS_IE64_PC_LO20: + return "ie64_pc_lo20"; + case VK_LoongArch_TLS_IE64_PC_HI12: + return "ie64_pc_hi12"; + case VK_LoongArch_TLS_IE_HI20: + return "ie_hi20"; + case VK_LoongArch_TLS_IE_LO12: + return "ie_lo12"; + case VK_LoongArch_TLS_IE64_LO20: + return "ie64_lo20"; + case VK_LoongArch_TLS_IE64_HI12: + return "ie64_hi12"; + case VK_LoongArch_TLS_LD_PC_HI20: + return "ld_pc_hi20"; + case VK_LoongArch_TLS_LD_HI20: + return "ld_hi20"; + case VK_LoongArch_TLS_GD_PC_HI20: + return "gd_pc_hi20"; + case VK_LoongArch_TLS_GD_HI20: + return "gd_hi20"; + } +} + +LoongArchMCExpr::VariantKind +LoongArchMCExpr::getVariantKindForName(StringRef name) { + return StringSwitch(name) + .Case("plt", VK_LoongArch_CALL_PLT) + .Case("b16", VK_LoongArch_B16) + .Case("b21", VK_LoongArch_B21) + .Case("b26", VK_LoongArch_B26) + .Case("abs_hi20", VK_LoongArch_ABS_HI20) + .Case("abs_lo12", VK_LoongArch_ABS_LO12) + .Case("abs64_lo20", VK_LoongArch_ABS64_LO20) + .Case("abs64_hi12", VK_LoongArch_ABS64_HI12) + .Case("pc_hi20", VK_LoongArch_PCALA_HI20) + .Case("pc_lo12", VK_LoongArch_PCALA_LO12) + .Case("pc64_lo20", VK_LoongArch_PCALA64_LO20) + .Case("pc64_hi12", VK_LoongArch_PCALA64_HI12) + .Case("got_pc_hi20", VK_LoongArch_GOT_PC_HI20) + .Case("got_pc_lo12", VK_LoongArch_GOT_PC_LO12) + .Case("got64_pc_lo20", VK_LoongArch_GOT64_PC_LO20) + .Case("got64_pc_hi12", VK_LoongArch_GOT64_PC_HI12) + .Case("got_hi20", VK_LoongArch_GOT_HI20) + .Case("got_lo12", VK_LoongArch_GOT_LO12) + .Case("got64_lo20", VK_LoongArch_GOT64_LO20) + .Case("got64_hi12", VK_LoongArch_GOT64_HI12) + .Case("le_hi20", VK_LoongArch_TLS_LE_HI20) + .Case("le_lo12", VK_LoongArch_TLS_LE_LO12) + .Case("le64_lo20", VK_LoongArch_TLS_LE64_LO20) + .Case("le64_hi12", VK_LoongArch_TLS_LE64_HI12) + .Case("ie_pc_hi20", VK_LoongArch_TLS_IE_PC_HI20) + .Case("ie_pc_lo12", VK_LoongArch_TLS_IE_PC_LO12) + .Case("ie64_pc_lo20", VK_LoongArch_TLS_IE64_PC_LO20) + .Case("ie64_pc_hi12", VK_LoongArch_TLS_IE64_PC_HI12) + .Case("ie_hi20", VK_LoongArch_TLS_IE_HI20) + .Case("ie_lo12", VK_LoongArch_TLS_IE_LO12) + .Case("ie64_lo20", VK_LoongArch_TLS_IE64_LO20) + .Case("ie64_hi12", VK_LoongArch_TLS_IE64_HI12) + .Case("ld_pc_hi20", VK_LoongArch_TLS_LD_PC_HI20) + .Case("ld_hi20", VK_LoongArch_TLS_LD_HI20) + .Case("gd_pc_hi20", VK_LoongArch_TLS_GD_PC_HI20) + .Case("gd_hi20", VK_LoongArch_TLS_GD_HI20) + .Default(VK_LoongArch_Invalid); +} + +static void fixELFSymbolsInTLSFixupsImpl(const MCExpr *Expr, MCAssembler &Asm) { + switch (Expr->getKind()) { + case MCExpr::Target: + llvm_unreachable("Can't handle nested target expression"); + break; + case MCExpr::Constant: + break; + case MCExpr::Unary: + fixELFSymbolsInTLSFixupsImpl(cast(Expr)->getSubExpr(), Asm); + break; + case MCExpr::Binary: { + const MCBinaryExpr *BE = cast(Expr); + fixELFSymbolsInTLSFixupsImpl(BE->getLHS(), Asm); + fixELFSymbolsInTLSFixupsImpl(BE->getRHS(), Asm); + break; + } + case MCExpr::SymbolRef: { + // We're known to be under a TLS fixup, so any symbol should be + // modified. There should be only one. + const MCSymbolRefExpr &SymRef = *cast(Expr); + cast(SymRef.getSymbol()).setType(ELF::STT_TLS); + break; + } + } +} + +void LoongArchMCExpr::fixELFSymbolsInTLSFixups(MCAssembler &Asm) const { + switch (getKind()) { + default: + return; + case VK_LoongArch_TLS_LE_HI20: + case VK_LoongArch_TLS_IE_PC_HI20: + case VK_LoongArch_TLS_IE_HI20: + case VK_LoongArch_TLS_LD_PC_HI20: + case VK_LoongArch_TLS_LD_HI20: + case VK_LoongArch_TLS_GD_PC_HI20: + case VK_LoongArch_TLS_GD_HI20: + break; + } + fixELFSymbolsInTLSFixupsImpl(getSubExpr(), Asm); +} diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.h new file mode 100644 index 0000000000000000000000000000000000000000..0945cf82db865cdaa3c2edc412d0eb3c96e8ba39 --- /dev/null +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.h @@ -0,0 +1,101 @@ +//= LoongArchMCExpr.h - LoongArch specific MC expression classes -*- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file describes LoongArch-specific MCExprs, used for modifiers like +// "%pc_hi20" or "%pc_lo12" etc. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHMCEXPR_H +#define LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHMCEXPR_H + +#include "llvm/MC/MCExpr.h" + +namespace llvm { + +class StringRef; + +class LoongArchMCExpr : public MCTargetExpr { +public: + enum VariantKind { + VK_LoongArch_None, + VK_LoongArch_CALL, + VK_LoongArch_CALL_PLT, + VK_LoongArch_B16, + VK_LoongArch_B21, + VK_LoongArch_B26, + VK_LoongArch_ABS_HI20, + VK_LoongArch_ABS_LO12, + VK_LoongArch_ABS64_LO20, + VK_LoongArch_ABS64_HI12, + VK_LoongArch_PCALA_HI20, + VK_LoongArch_PCALA_LO12, + VK_LoongArch_PCALA64_LO20, + VK_LoongArch_PCALA64_HI12, + VK_LoongArch_GOT_PC_HI20, + VK_LoongArch_GOT_PC_LO12, + VK_LoongArch_GOT64_PC_LO20, + VK_LoongArch_GOT64_PC_HI12, + VK_LoongArch_GOT_HI20, + VK_LoongArch_GOT_LO12, + VK_LoongArch_GOT64_LO20, + VK_LoongArch_GOT64_HI12, + VK_LoongArch_TLS_LE_HI20, + VK_LoongArch_TLS_LE_LO12, + VK_LoongArch_TLS_LE64_LO20, + VK_LoongArch_TLS_LE64_HI12, + VK_LoongArch_TLS_IE_PC_HI20, + VK_LoongArch_TLS_IE_PC_LO12, + VK_LoongArch_TLS_IE64_PC_LO20, + VK_LoongArch_TLS_IE64_PC_HI12, + VK_LoongArch_TLS_IE_HI20, + VK_LoongArch_TLS_IE_LO12, + VK_LoongArch_TLS_IE64_LO20, + VK_LoongArch_TLS_IE64_HI12, + VK_LoongArch_TLS_LD_PC_HI20, + VK_LoongArch_TLS_LD_HI20, + VK_LoongArch_TLS_GD_PC_HI20, + VK_LoongArch_TLS_GD_HI20, + VK_LoongArch_Invalid // Must be the last item. + }; + +private: + const MCExpr *Expr; + const VariantKind Kind; + + explicit LoongArchMCExpr(const MCExpr *Expr, VariantKind Kind) + : Expr(Expr), Kind(Kind) {} + +public: + static const LoongArchMCExpr *create(const MCExpr *Expr, VariantKind Kind, + MCContext &Ctx); + + VariantKind getKind() const { return Kind; } + const MCExpr *getSubExpr() const { return Expr; } + + void printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const override; + bool evaluateAsRelocatableImpl(MCValue &Res, const MCAsmLayout *Layout, + const MCFixup *Fixup) const override; + void visitUsedExpr(MCStreamer &Streamer) const override; + MCFragment *findAssociatedFragment() const override { + return getSubExpr()->findAssociatedFragment(); + } + + void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override; + + static bool classof(const MCExpr *E) { + return E->getKind() == MCExpr::Target; + } + + static StringRef getVariantKindName(VariantKind Kind); + static VariantKind getVariantKindForName(StringRef name); +}; + +} // end namespace llvm + +#endif diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp index 8d71235f6a811cd345dd3345337f89921005f91a..942e667bc2618728e2ddcdcc22ae47d0fed315d9 100644 --- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp @@ -12,13 +12,17 @@ #include "LoongArchMCTargetDesc.h" #include "LoongArchBaseInfo.h" +#include "LoongArchELFStreamer.h" #include "LoongArchInstPrinter.h" #include "LoongArchMCAsmInfo.h" #include "TargetInfo/LoongArchTargetInfo.h" +#include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCDwarf.h" #include "llvm/MC/MCInstrAnalysis.h" #include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/TargetRegistry.h" @@ -50,7 +54,7 @@ static MCInstrInfo *createLoongArchMCInstrInfo() { static MCSubtargetInfo * createLoongArchMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) { - if (CPU.empty()) + if (CPU.empty() || CPU == "generic") CPU = TT.isArch64Bit() ? "la464" : "generic-la32"; return createLoongArchMCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS); } @@ -76,6 +80,13 @@ static MCInstPrinter *createLoongArchMCInstPrinter(const Triple &T, return new LoongArchInstPrinter(MAI, MII, MRI); } +static MCTargetStreamer * +createLoongArchObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) { + return STI.getTargetTriple().isOSBinFormatELF() + ? new LoongArchTargetELFStreamer(S, STI) + : nullptr; +} + namespace { class LoongArchMCInstrAnalysis : public MCInstrAnalysis { @@ -101,6 +112,17 @@ static MCInstrAnalysis *createLoongArchInstrAnalysis(const MCInstrInfo *Info) { return new LoongArchMCInstrAnalysis(Info); } +namespace { +MCStreamer *createLoongArchELFStreamer(const Triple &T, MCContext &Context, + std::unique_ptr &&MAB, + std::unique_ptr &&MOW, + std::unique_ptr &&MCE, + bool RelaxAll) { + return createLoongArchELFStreamer(Context, std::move(MAB), std::move(MOW), + std::move(MCE), RelaxAll); +} +} // end namespace + extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeLoongArchTargetMC() { for (Target *T : {&getTheLoongArch32Target(), &getTheLoongArch64Target()}) { TargetRegistry::RegisterMCRegInfo(*T, createLoongArchMCRegisterInfo); @@ -111,5 +133,8 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeLoongArchTargetMC() { TargetRegistry::RegisterMCAsmBackend(*T, createLoongArchAsmBackend); TargetRegistry::RegisterMCInstPrinter(*T, createLoongArchMCInstPrinter); TargetRegistry::RegisterMCInstrAnalysis(*T, createLoongArchInstrAnalysis); + TargetRegistry::RegisterELFStreamer(*T, createLoongArchELFStreamer); + TargetRegistry::RegisterObjectTargetStreamer( + *T, createLoongArchObjectTargetStreamer); } } diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchTargetStreamer.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchTargetStreamer.cpp new file mode 100644 index 0000000000000000000000000000000000000000..571178442055190b2e6c88ceaa2fdce5bc09af8a --- /dev/null +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchTargetStreamer.cpp @@ -0,0 +1,24 @@ +//===-- LoongArchTargetStreamer.cpp - LoongArch Target Streamer Methods ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file provides LoongArch specific target streamer methods. +// +//===----------------------------------------------------------------------===// + +#include "LoongArchTargetStreamer.h" + +using namespace llvm; + +LoongArchTargetStreamer::LoongArchTargetStreamer(MCStreamer &S) + : MCTargetStreamer(S) {} + +void LoongArchTargetStreamer::setTargetABI(LoongArchABI::ABI ABI) { + assert(ABI != LoongArchABI::ABI_Unknown && + "Improperly initialized target ABI"); + TargetABI = ABI; +} diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchTargetStreamer.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchTargetStreamer.h new file mode 100644 index 0000000000000000000000000000000000000000..d4b1b2a3e358dbd692abbb6fdf4a4dbae9b4c65f --- /dev/null +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchTargetStreamer.h @@ -0,0 +1,27 @@ +//===-- LoongArchTargetStreamer.h - LoongArch Target Streamer --*- C++ -*--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHTARGETSTREAMER_H +#define LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHTARGETSTREAMER_H + +#include "LoongArch.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSubtargetInfo.h" + +namespace llvm { +class LoongArchTargetStreamer : public MCTargetStreamer { + LoongArchABI::ABI TargetABI = LoongArchABI::ABI_Unknown; + +public: + LoongArchTargetStreamer(MCStreamer &S); + void setTargetABI(LoongArchABI::ABI ABI); + LoongArchABI::ABI getTargetABI() const { return TargetABI; } +}; + +} // end namespace llvm +#endif diff --git a/llvm/lib/Target/LoongArch/TargetInfo/LoongArchTargetInfo.cpp b/llvm/lib/Target/LoongArch/TargetInfo/LoongArchTargetInfo.cpp index 10654510032f06a9d019ea37c0ad04d8e572bec7..1d6be4069b71e4527a835faea653b75749de9c5b 100644 --- a/llvm/lib/Target/LoongArch/TargetInfo/LoongArchTargetInfo.cpp +++ b/llvm/lib/Target/LoongArch/TargetInfo/LoongArchTargetInfo.cpp @@ -24,7 +24,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeLoongArchTargetInfo() { RegisterTarget X( getTheLoongArch32Target(), "loongarch32", "32-bit LoongArch", "LoongArch"); - RegisterTarget Y( + RegisterTarget Y( getTheLoongArch64Target(), "loongarch64", "64-bit LoongArch", "LoongArch"); } diff --git a/llvm/lib/TargetParser/LoongArchTargetParser.cpp b/llvm/lib/TargetParser/LoongArchTargetParser.cpp new file mode 100644 index 0000000000000000000000000000000000000000..772d24c5ce3deb95f539e0231f561c7a2322f119 --- /dev/null +++ b/llvm/lib/TargetParser/LoongArchTargetParser.cpp @@ -0,0 +1,60 @@ +//===-- LoongArchTargetParser - Parser for LoongArch features --*- C++ -*-====// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a target parser to recognise LoongArch hardware features +// such as CPU/ARCH and extension names. +// +//===----------------------------------------------------------------------===// + +#include "llvm/TargetParser/LoongArchTargetParser.h" + +using namespace llvm; +using namespace llvm::LoongArch; + +const FeatureInfo AllFeatures[] = { +#define LOONGARCH_FEATURE(NAME, KIND) {NAME, KIND}, +#include "llvm/TargetParser/LoongArchTargetParser.def" +}; + +const ArchInfo AllArchs[] = { +#define LOONGARCH_ARCH(NAME, KIND, FEATURES) \ + {NAME, LoongArch::ArchKind::KIND, FEATURES}, +#include "llvm/TargetParser/LoongArchTargetParser.def" +}; + +bool LoongArch::isValidArchName(StringRef Arch) { + for (const auto A : AllArchs) + if (A.Name == Arch) + return true; + return false; +} + +bool LoongArch::getArchFeatures(StringRef Arch, + std::vector &Features) { + for (const auto A : AllArchs) { + if (A.Name == Arch) { + for (const auto F : AllFeatures) + if ((A.Features & F.Kind) == F.Kind) + Features.push_back(F.Name); + return true; + } + } + return false; +} + +bool LoongArch::isValidCPUName(StringRef Name) { return isValidArchName(Name); } + +void LoongArch::fillValidCPUList(SmallVectorImpl &Values) { + for (const auto A : AllArchs) + Values.emplace_back(A.Name); +} + +StringRef LoongArch::getDefaultArch(bool Is64Bit) { + // TODO: use a real 32-bit arch name. + return Is64Bit ? "loongarch64" : ""; +} diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 3274e36ab71af4fea3a49f58b4cc8804902647cd..5a58160d5f3ffea55b8c7d9c306d69ee41e97760 100644 --- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -105,6 +105,7 @@ static const uint64_t kMIPS_ShadowOffsetN32 = 1ULL << 29; static const uint64_t kMIPS32_ShadowOffset32 = 0x0aaa0000; static const uint64_t kMIPS64_ShadowOffset64 = 1ULL << 37; static const uint64_t kAArch64_ShadowOffset64 = 1ULL << 36; +static const uint64_t kLoongArch64_ShadowOffset64 = 1ULL << 46; static const uint64_t kRISCV64_ShadowOffset64 = 0xd55550000; static const uint64_t kFreeBSD_ShadowOffset32 = 1ULL << 30; static const uint64_t kFreeBSD_ShadowOffset64 = 1ULL << 46; @@ -483,6 +484,7 @@ static ShadowMapping getShadowMapping(const Triple &TargetTriple, int LongSize, bool IsMIPS64 = TargetTriple.isMIPS64(); bool IsArmOrThumb = TargetTriple.isARM() || TargetTriple.isThumb(); bool IsAArch64 = TargetTriple.getArch() == Triple::aarch64; + bool IsLoongArch64 = TargetTriple.isLoongArch64(); bool IsRISCV64 = TargetTriple.getArch() == Triple::riscv64; bool IsWindows = TargetTriple.isOSWindows(); bool IsFuchsia = TargetTriple.isOSFuchsia(); @@ -554,6 +556,8 @@ static ShadowMapping getShadowMapping(const Triple &TargetTriple, int LongSize, Mapping.Offset = kDynamicShadowSentinel; else if (IsAArch64) Mapping.Offset = kAArch64_ShadowOffset64; + else if (IsLoongArch64) + Mapping.Offset = kLoongArch64_ShadowOffset64; else if (IsRISCV64) Mapping.Offset = kRISCV64_ShadowOffset64; else if (IsAMDGPU) @@ -572,12 +576,12 @@ static ShadowMapping getShadowMapping(const Triple &TargetTriple, int LongSize, } // OR-ing shadow offset if more efficient (at least on x86) if the offset - // is a power of two, but on ppc64 we have to use add since the shadow - // offset is not necessary 1/8-th of the address space. On SystemZ, - // we could OR the constant in a single instruction, but it's more + // is a power of two, but on ppc64 and loongarch64 we have to use add since + // the shadow offset is not necessarily 1/8-th of the address space. On + // SystemZ, we could OR the constant in a single instruction, but it's more // efficient to load it once and use indexed addressing. Mapping.OrShadowOffset = !IsAArch64 && !IsPPC64 && !IsSystemZ && !IsPS && - !IsRISCV64 && + !IsRISCV64 && !IsLoongArch64 && !(Mapping.Offset & (Mapping.Offset - 1)) && Mapping.Offset != kDynamicShadowSentinel; bool IsAndroidWithIfuncSupport = diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-inline-asm.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-inline-asm.ll index 8aedbad871951c5f673fd33cacd9f883aae754e6..9924e0c3e59242b4abe71e035a383459abfb633a 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-inline-asm.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-inline-asm.ll @@ -204,7 +204,7 @@ define i32 @test_memory_constraint(i32* %a) nounwind { ; CHECK: bb.1 (%ir-block.0): ; CHECK: liveins: $x0 ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: INLINEASM &"ldr $0, $1", 8 /* mayload attdialect */, 655370 /* regdef:GPR32common */, def %1, 196622 /* mem:m */, [[COPY]](p0) + ; CHECK: INLINEASM &"ldr $0, $1", 8 /* mayload attdialect */, 655370 /* regdef:GPR32common */, def %1, 262158 /* mem:m */, [[COPY]](p0) ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY %1 ; CHECK: $w0 = COPY [[COPY1]](s32) ; CHECK: RET_ReallyLR implicit $w0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll index 273498491cd3328c0d458f6c7fa14a0210c85a34..a1119658f28e0f664f9df77669811c4f7742bad5 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll @@ -203,7 +203,7 @@ define i32 @test_memory_constraint(i32 addrspace(3)* %a) nounwind { ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK-NEXT: INLINEASM &"ds_read_b32 $0, $1", 8 /* mayload attdialect */, 1835018 /* regdef:VGPR_32 */, def %1, 196622 /* mem:m */, [[COPY]](p3) + ; CHECK-NEXT: INLINEASM &"ds_read_b32 $0, $1", 8 /* mayload attdialect */, 1835018 /* regdef:VGPR_32 */, def %1, 262158 /* mem:m */, [[COPY]](p3) ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %1 ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 diff --git a/llvm/test/CodeGen/LoongArch/1ri.mir b/llvm/test/CodeGen/LoongArch/1ri.mir deleted file mode 100644 index c7d74b98775676333115dc3b2c997ae6f4f3bea6..0000000000000000000000000000000000000000 --- a/llvm/test/CodeGen/LoongArch/1ri.mir +++ /dev/null @@ -1,96 +0,0 @@ -# RUN: llc %s -mtriple=loongarch64 -start-after=prologepilog -O0 -filetype=obj -o - \ -# RUN: | extract-section .text \ -# RUN: | FileCheck %s -check-prefix=CHECK-ENC -# RUN: llc %s -mtriple=loongarch64 -start-after=prologepilog -O0 -filetype=asm -o - \ -# RUN: | FileCheck %s -check-prefix=CHECK-ASM - -# ------------------------------------------------------------------------------------------------- -# Encoding format: 1RI20 -# ------------------------------------------------------------------------------------------------- -# ---------------------+-----------------------------------------------------------+--------------- -# 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00 -# ---------------------+-----------------------------------------------------------+--------------- -# opcode | imm20 | rd -# ---------------------+-----------------------------------------------------------+--------------- - ---- -# CHECK-LABEL: test_LU12I_W: -# CHECK-ENC: 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 1 0 0 1 0 0 -# CHECK-ASM: lu12i.w $a0, 49 -name: test_LU12I_W -body: | - bb.0: - $r4 = LU12I_W 49 -... ---- -# CHECK-LABEL: test_LU32I_D: -# CHECK-ENC: 0 0 0 1 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 1 0 0 0 0 1 0 0 -# CHECK-ASM: lu32i.d $a0, 196 -name: test_LU32I_D -body: | - bb.0: - $r4 = LU32I_D $r4, 196 -... ---- -# CHECK-LABEL: test_PCADDI: -# CHECK-ENC: 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 1 1 0 1 1 0 0 1 0 0 -# CHECK-ASM: pcaddi $a0, 187 -name: test_PCADDI -body: | - bb.0: - $r4 = PCADDI 187 -... ---- -# CHECK-LABEL: test_PCALAU12I: -# CHECK-ENC: 0 0 0 1 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 1 0 0 1 0 0 1 0 0 -# CHECK-ASM: pcalau12i $a0, 89 -name: test_PCALAU12I -body: | - bb.0: - $r4 = PCALAU12I 89 -... ---- -# CHECK-LABEL: test_PCADDU12I: -# CHECK-ENC: 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: pcaddu12i $a0, 37 -name: test_PCADDU12I -body: | - bb.0: - $r4 = PCADDU12I 37 -... ---- -# CHECK-LABEL: test_PCADDU18I: -# CHECK-ENC: 0 0 0 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 1 0 0 0 1 0 0 -# CHECK-ASM: pcaddu18i $a0, 26 -name: test_PCADDU18I -body: | - bb.0: - $r4 = PCADDU18I 26 -... - -# ------------------------------------------------------------------------------------------------- -# Encoding format: 1RI21 -# ------------------------------------------------------------------------------------------------- -# ------------------+-----------------------------------------------+--------------+--------------- -# 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00 -# ------------------+-----------------------------------------------+--------------+--------------- -# opcode | imm21{15-0} | rj | imm21{20-16} -# ------------------+-----------------------------------------------+--------------+--------------- - ---- -# CHECK-LABEL: test_BEQZ: -# CHECK-ENC: 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 1 1 0 0 1 0 0 0 0 0 0 0 -# CHECK-ASM: beqz $a0, 92 -name: test_BEQZ -body: | - bb.0: - BEQZ $r4, 92 -... ---- -# CHECK-LABEL: test_BNEZ: -# CHECK-ENC: 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 1 0 0 1 0 0 0 0 0 0 0 -# CHECK-ASM: bnez $a0, 84 -name: test_BNEZ -body: | - bb.0: - BNEZ $r4, 84 diff --git a/llvm/test/CodeGen/LoongArch/2r.mir b/llvm/test/CodeGen/LoongArch/2r.mir deleted file mode 100644 index 488944526e58cd15a34d3cb3bd70ed50ed96fac5..0000000000000000000000000000000000000000 --- a/llvm/test/CodeGen/LoongArch/2r.mir +++ /dev/null @@ -1,230 +0,0 @@ -# RUN: llc %s -mtriple=loongarch64 -start-after=prologepilog -O0 -filetype=obj -o - \ -# RUN: | extract-section .text \ -# RUN: | FileCheck %s -check-prefix=CHECK-ENC -# RUN: llc %s -mtriple=loongarch64 -start-after=prologepilog -O0 -filetype=asm -o - \ -# RUN: | FileCheck %s -check-prefix=CHECK-ASM - -# ------------------------------------------------------------------------------------------------- -# Encoding format: 2R -# ------------------------------------------------------------------------------------------------- -# ------------------------------------------------------------------+--------------+--------------- -# 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00 -# ------------------------------------------------------------------+--------------+--------------- -# opcode | rj | rd -# ------------------------------------------------------------------+--------------+--------------- - ---- -# CHECK-LABEL: test_CLO_W: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: clo.w $a0, $a1 -name: test_CLO_W -body: | - bb.0: - $r4 = CLO_W $r5 -... ---- -# CHECK-LABEL: test_CLZ_W: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: clz.w $a0, $a1 -name: test_CLZ_W -body: | - bb.0: - $r4 = CLZ_W $r5 -... ---- -# CHECK-LABEL: test_CTO_W: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: cto.w $a0, $a1 -name: test_CTO_W -body: | - bb.0: - $r4 = CTO_W $r5 -... ---- -# CHECK-LABEL: test_CTZ_W: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: ctz.w $a0, $a1 -name: test_CTZ_W -body: | - bb.0: - $r4 = CTZ_W $r5 -... ---- -# CHECK-LABEL: test_CLO_D: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: clo.d $a0, $a1 -name: test_CLO_D -body: | - bb.0: - $r4 = CLO_D $r5 -... ---- -# CHECK-LABEL: test_CLZ_D: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: clz.d $a0, $a1 -name: test_CLZ_D -body: | - bb.0: - $r4 = CLZ_D $r5 -... ---- -# CHECK-LABEL: test_CTO_D: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: cto.d $a0, $a1 -name: test_CTO_D -body: | - bb.0: - $r4 = CTO_D $r5 -... ---- -# CHECK-LABEL: test_CTZ_D: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 1 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: ctz.d $a0, $a1 -name: test_CTZ_D -body: | - bb.0: - $r4 = CTZ_D $r5 -... ---- -# CHECK-LABEL: test_REVB_2H: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: revb.2h $a0, $a1 -name: test_REVB_2H -body: | - bb.0: - $r4 = REVB_2H $r5 -... ---- -# CHECK-LABEL: test_REVB_4H: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 1 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: revb.4h $a0, $a1 -name: test_REVB_4H -body: | - bb.0: - $r4 = REVB_4H $r5 -... ---- -# CHECK-LABEL: test_REVB_2W: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: revb.2w $a0, $a1 -name: test_REVB_2W -body: | - bb.0: - $r4 = REVB_2W $r5 -... ---- -# CHECK-LABEL: test_REVB_D: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: revb.d $a0, $a1 -name: test_REVB_D -body: | - bb.0: - $r4 = REVB_D $r5 -... ---- -# CHECK-LABEL: test_REVH_2W: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: revh.2w $a0, $a1 -name: test_REVH_2W -body: | - bb.0: - $r4 = REVH_2W $r5 -... ---- -# CHECK-LABEL: test_REVH_D: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: revh.d $a0, $a1 -name: test_REVH_D -body: | - bb.0: - $r4 = REVH_D $r5 -... ---- -# CHECK-LABEL: test_BITREV_4B: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: bitrev.4b $a0, $a1 -name: test_BITREV_4B -body: | - bb.0: - $r4 = BITREV_4B $r5 -... ---- -# CHECK-LABEL: test_BITREV_8B: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 1 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: bitrev.8b $a0, $a1 -name: test_BITREV_8B -body: | - bb.0: - $r4 = BITREV_8B $r5 -... ---- -# CHECK-LABEL: test_BITREV_W: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: bitrev.w $a0, $a1 -name: test_BITREV_W -body: | - bb.0: - $r4 = BITREV_W $r5 -... ---- -# CHECK-LABEL: test_BITREV_D: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 1 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: bitrev.d $a0, $a1 -name: test_BITREV_D -body: | - bb.0: - $r4 = BITREV_D $r5 -... ---- -# CHECK-LABEL: test_EXT_W_H: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: ext.w.h $a0, $a1 -name: test_EXT_W_H -body: | - bb.0: - $r4 = EXT_W_H $r5 -... ---- -# CHECK-LABEL: test_EXT_W_B: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 1 1 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: ext.w.b $a0, $a1 -name: test_EXT_W_B -body: | - bb.0: - $r4 = EXT_W_B $r5 -... ---- -# CHECK-LABEL: test_CPUCFG: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 1 1 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: cpucfg $a0, $a1 -name: test_CPUCFG -body: | - bb.0: - $r4 = CPUCFG $r5 -... ---- -# CHECK-LABEL: test_RDTIMEL_W: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: rdtimel.w $a0, $a1 -name: test_RDTIMEL_W -body: | - bb.0: - $r4, $r5 = RDTIMEL_W -... ---- -# CHECK-LABEL: test_RDTIMEH_W: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 1 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: rdtimeh.w $a0, $a1 -name: test_RDTIMEH_W -body: | - bb.0: - $r4, $r5 = RDTIMEH_W -... ---- -# CHECK-LABEL: test_RDTIME_D: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: rdtime.d $a0, $a1 -name: test_RDTIME_D -body: | - bb.0: - $r4, $r5 = RDTIME_D diff --git a/llvm/test/CodeGen/LoongArch/2ri.mir b/llvm/test/CodeGen/LoongArch/2ri.mir deleted file mode 100644 index 263fed42cadaa2083f763686552bd9ff9e78ff3e..0000000000000000000000000000000000000000 --- a/llvm/test/CodeGen/LoongArch/2ri.mir +++ /dev/null @@ -1,432 +0,0 @@ -# RUN: llc %s -mtriple=loongarch64 -start-after=prologepilog -O0 -filetype=obj -o - \ -# RUN: | extract-section .text \ -# RUN: | FileCheck %s -check-prefix=CHECK-ENC -# RUN: llc %s -mtriple=loongarch64 -start-after=prologepilog -O0 -filetype=asm -o - \ -# RUN: | FileCheck %s -check-prefix=CHECK-ASM - -# ------------------------------------------------------------------------------------------------- -# Encoding format: 2RI5 -# ------------------------------------------------------------------------------------------------- -# ---------------------------------------------------+--------------+--------------+--------------- -# 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00 -# ---------------------------------------------------+--------------+--------------+--------------- -# opcode | imm5 | rj | rd -# ---------------------------------------------------+--------------+--------------+--------------- - ---- -# CHECK-LABEL: test_SLLI_W: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: slli.w $a0, $a1, 0 -name: test_SLLI_W -body: | - bb.0: - $r4 = SLLI_W $r5, 0 -... ---- -# CHECK-LABEL: test_SRLI_W: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 1 1 1 1 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: srli.w $a0, $a1, 30 -name: test_SRLI_W -body: | - bb.0: - $r4 = SRLI_W $r5, 30 -... ---- -# CHECK-LABEL: test_SRAI_W: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 1 1 1 0 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: srai.w $a0, $a1, 24 -name: test_SRAI_W -body: | - bb.0: - $r4 = SRAI_W $r5, 24 -... ---- -# CHECK-LABEL: test_ROTRI_W: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 1 0 0 1 1 0 0 1 1 0 1 1 1 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: rotri.w $a0, $a1, 23 -name: test_ROTRI_W -body: | - bb.0: - $r4 = ROTRI_W $r5, 23 -... - -# ------------------------------------------------------------------------------------------------- -# Encoding format: 2RI6 -# ------------------------------------------------------------------------------------------------- -# ------------------------------------------------+-----------------+--------------+--------------- -# 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00 -# ------------------------------------------------+-----------------+--------------+--------------- -# opcode | imm6 | rj | rd -# ------------------------------------------------+-----------------+--------------+--------------- - ---- -# CHECK-LABEL: test_SLLI_D: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 1 0 0 1 1 1 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: slli.d $a0, $a1, 39 -name: test_SLLI_D -body: | - bb.0: - $r4 = SLLI_D $r5, 39 -... ---- -# CHECK-LABEL: test_SRLI_D: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 1 1 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: srli.d $a0, $a1, 38 -name: test_SRLI_D -body: | - bb.0: - $r4 = SRLI_D $r5, 38 -... ---- -# CHECK-LABEL: test_SRAI_D: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 1 0 1 1 0 1 1 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: srai.d $a0, $a1, 27 -name: test_SRAI_D -body: | - bb.0: - $r4 = SRAI_D $r5, 27 -... ---- -# CHECK-LABEL: test_ROTRI_D: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 1 0 0 1 1 0 1 0 0 0 1 1 1 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: rotri.d $a0, $a1, 7 -name: test_ROTRI_D -body: | - bb.0: - $r4 = ROTRI_D $r5, 7 -... - -# ------------------------------------------------------------------------------------------------- -# Encoding format: 2RI12 -# ------------------------------------------------------------------------------------------------- -# ------------------------------+-----------------------------------+--------------+--------------- -# 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00 -# ------------------------------+-----------------------------------+--------------+--------------- -# opcode | imm12 | rj | rd -# ------------------------------+-----------------------------------+--------------+--------------- - ---- -# CHECK-LABEL: test_SLTI: -# CHECK-ENC: 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 1 1 0 1 0 1 1 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: slti $a0, $a1, 235 -name: test_SLTI -body: | - bb.0: - $r4 = SLTI $r5, 235 -... ---- -# CHECK-LABEL: test_SLTUI: -# CHECK-ENC: 0 0 0 0 0 0 1 0 0 1 0 0 0 0 1 0 1 0 0 0 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: sltui $a0, $a1, 162 -name: test_SLTUI -body: | - bb.0: - $r4 = SLTUI $r5, 162 -... ---- -# CHECK-LABEL: test_ADDI_W: -# CHECK-ENC: 0 0 0 0 0 0 1 0 1 0 0 0 0 0 1 1 1 1 0 1 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: addi.w $a0, $a1, 246 -name: test_ADDI_W -body: | - bb.0: - $r4 = ADDI_W $r5, 246 -... ---- -# CHECK-LABEL: test_ADDI_D: -# CHECK-ENC: 0 0 0 0 0 0 1 0 1 1 0 0 0 0 0 1 0 0 1 0 1 1 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: addi.d $a0, $a1, 75 -name: test_ADDI_D -body: | - bb.0: - $r4 = ADDI_D $r5, 75 -... ---- -# CHECK-LABEL: test_LU52I_D: -# CHECK-ENC: 0 0 0 0 0 0 1 1 0 0 0 0 0 0 1 1 0 0 0 0 1 1 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: lu52i.d $a0, $a1, 195 -name: test_LU52I_D -body: | - bb.0: - $r4 = LU52I_D $r5, 195 -... ---- -# CHECK-LABEL: test_ANDI: -# CHECK-ENC: 0 0 0 0 0 0 1 1 0 1 0 0 0 0 0 1 1 0 1 0 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: andi $a0, $a1, 106 -name: test_ANDI -body: | - bb.0: - $r4 = ANDI $r5, 106 -... ---- -# CHECK-LABEL: test_ORI: -# CHECK-ENC: 0 0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 1 0 1 1 1 1 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: ori $a0, $a1, 47 -name: test_ORI -body: | - bb.0: - $r4 = ORI $r5, 47 -... ---- -# CHECK-LABEL: test_XORI: -# CHECK-ENC: 0 0 0 0 0 0 1 1 1 1 0 0 0 0 0 1 1 0 0 0 1 1 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: xori $a0, $a1, 99 -name: test_XORI -body: | - bb.0: - $r4 = XORI $r5, 99 -... ---- -# CHECK-LABEL: test_LD_B: -# CHECK-ENC: 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 1 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: ld.b $a0, $a1, 21 -name: test_LD_B -body: | - bb.0: - $r4 = LD_B $r5, 21 -... ---- -# CHECK-LABEL: test_LD_H: -# CHECK-ENC: 0 0 1 0 1 0 0 0 0 1 0 0 0 0 0 1 0 1 0 0 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: ld.h $a0, $a1, 80 -name: test_LD_H -body: | - bb.0: - $r4 = LD_H $r5, 80 -... ---- -# CHECK-LABEL: test_LD_W: -# CHECK-ENC: 0 0 1 0 1 0 0 0 1 0 0 0 0 0 0 1 0 1 1 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: ld.w $a0, $a1, 92 -name: test_LD_W -body: | - bb.0: - $r4 = LD_W $r5, 92 -... ---- -# CHECK-LABEL: test_LD_BU: -# CHECK-ENC: 0 0 1 0 1 0 1 0 0 0 0 0 0 0 1 0 0 1 0 1 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: ld.bu $a0, $a1, 150 -name: test_LD_BU -body: | - bb.0: - $r4 = LD_BU $r5, 150 -... ---- -# CHECK-LABEL: test_LD_HU: -# CHECK-ENC: 0 0 1 0 1 0 1 0 0 1 0 0 0 0 1 1 0 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: ld.hu $a0, $a1, 198 -name: test_LD_HU -body: | - bb.0: - $r4 = LD_HU $r5, 198 -... ---- -# CHECK-LABEL: test_LD_WU: -# CHECK-ENC: 0 0 1 0 1 0 1 0 1 0 0 0 0 0 0 0 0 1 1 1 1 1 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: ld.wu $a0, $a1, 31 -name: test_LD_WU -body: | - bb.0: - $r4 = LD_WU $r5, 31 -... ---- -# CHECK-LABEL: test_ST_B: -# CHECK-ENC: 0 0 1 0 1 0 0 1 0 0 0 0 0 0 0 1 0 1 1 1 1 1 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: st.b $a0, $a1, 95 -name: test_ST_B -body: | - bb.0: - ST_B $r4, $r5, 95 -... ---- -# CHECK-LABEL: test_ST_H: -# CHECK-ENC: 0 0 1 0 1 0 0 1 0 1 0 0 0 0 0 1 1 1 1 0 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: st.h $a0, $a1, 122 -name: test_ST_H -body: | - bb.0: - ST_H $r4, $r5, 122 -... ---- -# CHECK-LABEL: test_ST_W: -# CHECK-ENC: 0 0 1 0 1 0 0 1 1 0 0 0 0 0 1 0 1 0 1 1 1 1 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: st.w $a0, $a1, 175 -name: test_ST_W -body: | - bb.0: - ST_W $r4, $r5, 175 -... ---- -# CHECK-LABEL: test_ST_D: -# CHECK-ENC: 0 0 1 0 1 0 0 1 1 1 0 0 0 0 0 0 1 1 1 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: st.d $a0, $a1, 60 -name: test_ST_D -body: | - bb.0: - ST_D $r4, $r5, 60 -... - -# ------------------------------------------------------------------------------------------------- -# Encoding format: 2RI14 -# ------------------------------------------------------------------------------------------------- -# ------------------------+-----------------------------------------+--------------+--------------- -# 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00 -# ------------------------+-----------------------------------------+--------------+--------------- -# opcode | imm14 | rj | rd -# ------------------------+-----------------------------------------+--------------+--------------- - ---- -# CHECK-LABEL: test_LDPTR_W: -# CHECK-ENC: 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: ldptr.w $a0, $a1, 264 -name: test_LDPTR_W -body: | - bb.0: - $r4 = LDPTR_W $r5, 264 -... ---- -# CHECK-LABEL: test_LDPTR_D: -# CHECK-ENC: 0 0 1 0 0 1 1 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: ldptr.d $a0, $a1, 224 -name: test_LDPTR_D -body: | - bb.0: - $r4 = LDPTR_D $r5, 224 -... ---- -# CHECK-LABEL: test_STPTR_W: -# CHECK-ENC: 0 0 1 0 0 1 0 1 0 0 0 0 0 0 0 1 0 1 0 1 1 1 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: stptr.w $a0, $a1, 348 -name: test_STPTR_W -body: | - bb.0: - STPTR_W $r4, $r5, 348 -... ---- -# CHECK-LABEL: test_STPTR_D: -# CHECK-ENC: 0 0 1 0 0 1 1 1 0 0 0 0 0 0 1 0 0 1 0 0 0 1 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: stptr.d $a0, $a1, 580 -name: test_STPTR_D -body: | - bb.0: - STPTR_D $r4, $r5, 580 -... ---- -# CHECK-LABEL: test_LL_W: -# CHECK-ENC: 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 0 0 1 1 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: ll.w $a0, $a1, 972 -name: test_LL_W -body: | - bb.0: - $r4 = LL_W $r5, 972 -... ---- -# CHECK-LABEL: test_LL_D: -# CHECK-ENC: 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 1 0 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: ll.d $a0, $a1, 296 -name: test_LL_D -body: | - bb.0: - $r4 = LL_D $r5, 296 -... ---- -# CHECK-LABEL: test_SC_W: -# CHECK-ENC: 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: sc.w $a0, $a1, 384 -name: test_SC_W -body: | - bb.0: - $r4 = SC_W $r4, $r5, 384 -... ---- -# CHECK-LABEL: test_SC_D: -# CHECK-ENC: 0 0 1 0 0 0 1 1 0 0 0 0 0 0 0 1 1 0 1 0 0 1 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: sc.d $a0, $a1, 420 -name: test_SC_D -body: | - bb.0: - $r4 = SC_D $r4, $r5, 420 -... - -# ------------------------------------------------------------------------------------------------- -# Encoding format: 2RI16 -# ------------------------------------------------------------------------------------------------- -# ------------------+-----------------------------------------------+--------------+--------------- -# 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00 -# ------------------+-----------------------------------------------+--------------+--------------- -# opcode | imm16 | rj | rd -# ------------------+-----------------------------------------------+--------------+--------------- - ---- -# CHECK-LABEL: test_ADDU16I_D: -# CHECK-ENC: 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 1 1 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: addu16i.d $a0, $a1, 23 -name: test_ADDU16I_D -body: | - bb.0: - $r4 = ADDU16I_D $r5, 23 -... ---- -# CHECK-LABEL: test_JIRL: -# CHECK-ENC: 0 1 0 0 1 1 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 1 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: jirl $a0, $a1, 196 -name: test_JIRL -body: | - bb.0: - $r4 = JIRL $r5, 196 -... ---- -# CHECK-LABEL: test_BEQ: -# CHECK-ENC: 0 1 0 1 1 0 0 0 0 0 0 0 0 0 1 1 0 0 0 1 0 0 0 0 1 0 0 0 0 1 0 1 -# CHECK-ASM: beq $a0, $a1, 784 -name: test_BEQ -body: | - bb.0: - BEQ $r4, $r5, 784 -... ---- -# CHECK-LABEL: test_BNE: -# CHECK-ENC: 0 1 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 1 0 0 1 0 0 0 0 1 0 1 -# CHECK-ASM: bne $a0, $a1, 76 -name: test_BNE -body: | - bb.0: - BNE $r4, $r5, 76 -... ---- -# CHECK-LABEL: test_BLT: -# CHECK-ENC: 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 0 1 1 0 0 1 0 0 0 0 1 0 1 -# CHECK-ASM: blt $a0, $a1, 492 -name: test_BLT -body: | - bb.0: - BLT $r4, $r5, 492 -... ---- -# CHECK-LABEL: test_BGE: -# CHECK-ENC: 0 1 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 1 0 0 0 0 1 0 1 -# CHECK-ASM: bge $a0, $a1, 48 -name: test_BGE -body: | - bb.0: - BGE $r4, $r5, 48 -... ---- -# CHECK-LABEL: test_BLTU: -# CHECK-ENC: 0 1 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 1 0 0 0 0 1 0 1 -# CHECK-ASM: bltu $a0, $a1, 68 -name: test_BLTU -body: | - bb.0: - BLTU $r4, $r5, 68 -... ---- -# CHECK-LABEL: test_BGEU: -# CHECK-ENC: 0 1 1 0 1 1 0 0 0 0 0 0 0 0 0 1 0 1 1 0 0 0 0 0 1 0 0 0 0 1 0 1 -# CHECK-ASM: bgeu $a0, $a1, 352 -name: test_BGEU -body: | - bb.0: - BGEU $r4, $r5, 352 diff --git a/llvm/test/CodeGen/LoongArch/3r.mir b/llvm/test/CodeGen/LoongArch/3r.mir deleted file mode 100644 index a1b97d5637b22d6f4a3d0ad829ee1debec7e4209..0000000000000000000000000000000000000000 --- a/llvm/test/CodeGen/LoongArch/3r.mir +++ /dev/null @@ -1,995 +0,0 @@ -# RUN: llc %s -mtriple=loongarch64 -start-after=prologepilog -O0 -filetype=obj -o - \ -# RUN: | extract-section .text \ -# RUN: | FileCheck %s -check-prefix=CHECK-ENC -# RUN: llc %s -mtriple=loongarch64 -start-after=prologepilog -O0 -filetype=asm -o - \ -# RUN: | FileCheck %s -check-prefix=CHECK-ASM - -# ------------------------------------------------------------------------------------------------- -# Encoding format: 3R -# ------------------------------------------------------------------------------------------------- -# ---------------------------------------------------+--------------+--------------+--------------- -# 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00 -# ---------------------------------------------------+--------------+--------------+--------------- -# opcode | rk | rj | rd -# ---------------------------------------------------+--------------+--------------+--------------- - ---- -# CHECK-LABEL: test_ADD_W: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: add.w $a0, $a1, $a0 -name: test_ADD_W -body: | - bb.0: - $r4 = ADD_W $r5, $r4 -... ---- -# CHECK-LABEL: test_ADD_D: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: add.d $a0, $a1, $a0 -name: test_ADD_D -body: | - bb.0: - $r4 = ADD_D $r5, $r4 -... ---- -# CHECK-LABEL: test_SUB_W: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: sub.w $a0, $a1, $a0 -name: test_SUB_W -body: | - bb.0: - $r4 = SUB_W $r5, $r4 -... ---- -# CHECK-LABEL: test_SUB_D: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 1 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: sub.d $a0, $a1, $a0 -name: test_SUB_D -body: | - bb.0: - $r4 = SUB_D $r5, $r4 -... ---- -# CHECK-LABEL: test_SLT: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: slt $a0, $a1, $a0 -name: test_SLT -body: | - bb.0: - $r4 = SLT $r5, $r4 -... ---- -# CHECK-LABEL: test_SLTU: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 1 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: sltu $a0, $a1, $a0 -name: test_SLTU -body: | - bb.0: - $r4 = SLTU $r5, $r4 -... ---- -# CHECK-LABEL: test_MASKEQZ: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 1 0 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: maskeqz $a0, $a1, $a0 -name: test_MASKEQZ -body: | - bb.0: - $r4 = MASKEQZ $r5, $r4 -... ---- -# CHECK-LABEL: test_MASKNEZ: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 1 1 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: masknez $a0, $a1, $a0 -name: test_MASKNEZ -body: | - bb.0: - $r4 = MASKNEZ $r5, $r4 -... ---- -# CHECK-LABEL: test_NOR: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: nor $a0, $a1, $a0 -name: test_NOR -body: | - bb.0: - $r4 = NOR $r5, $r4 -... ---- -# CHECK-LABEL: test_AND: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 1 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: and $a0, $a1, $a0 -name: test_AND -body: | - bb.0: - $r4 = AND $r5, $r4 -... ---- -# CHECK-LABEL: test_OR: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 1 0 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: or $a0, $a1, $a0 -name: test_OR -body: | - bb.0: - $r4 = OR $r5, $r4 -... ---- -# CHECK-LABEL: test_XOR: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 1 1 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: xor $a0, $a1, $a0 -name: test_XOR -body: | - bb.0: - $r4 = XOR $r5, $r4 -... ---- -# CHECK-LABEL: test_ORN: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 1 0 1 1 0 0 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: orn $a0, $a1, $a0 -name: test_ORN -body: | - bb.0: - $r4 = ORN $r5, $r4 -... ---- -# CHECK-LABEL: test_ANDN: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 1 0 1 1 0 1 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: andn $a0, $a1, $a0 -name: test_ANDN -body: | - bb.0: - $r4 = ANDN $r5, $r4 -... ---- -# CHECK-LABEL: test_SLL_W: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 1 0 1 1 1 0 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: sll.w $a0, $a1, $a0 -name: test_SLL_W -body: | - bb.0: - $r4 = SLL_W $r5, $r4 -... ---- -# CHECK-LABEL: test_SRL_W: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 1 0 1 1 1 1 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: srl.w $a0, $a1, $a0 -name: test_SRL_W -body: | - bb.0: - $r4 = SRL_W $r5, $r4 -... ---- -# CHECK-LABEL: test_SRA_W: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: sra.w $a0, $a1, $a0 -name: test_SRA_W -body: | - bb.0: - $r4 = SRA_W $r5, $r4 -... ---- -# CHECK-LABEL: test_SLL_D: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 1 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: sll.d $a0, $a1, $a0 -name: test_SLL_D -body: | - bb.0: - $r4 = SLL_D $r5, $r4 -... ---- -# CHECK-LABEL: test_SRL_D: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 1 0 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: srl.d $a0, $a1, $a0 -name: test_SRL_D -body: | - bb.0: - $r4 = SRL_D $r5, $r4 -... ---- -# CHECK-LABEL: test_SRA_D: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 1 1 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: sra.d $a0, $a1, $a0 -name: test_SRA_D -body: | - bb.0: - $r4 = SRA_D $r5, $r4 -... ---- -# CHECK-LABEL: test_ROTR_W: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 1 1 0 1 1 0 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: rotr.w $a0, $a1, $a0 -name: test_ROTR_W -body: | - bb.0: - $r4 = ROTR_W $r5, $r4 -... ---- -# CHECK-LABEL: test_ROTR_D: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 1 1 0 1 1 1 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: rotr.d $a0, $a1, $a0 -name: test_ROTR_D -body: | - bb.0: - $r4 = ROTR_D $r5, $r4 -... ---- -# CHECK-LABEL: test_MUL_W: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: mul.w $a0, $a1, $a0 -name: test_MUL_W -body: | - bb.0: - $r4 = MUL_W $r5, $r4 -... ---- -# CHECK-LABEL: test_MULH_W: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 1 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: mulh.w $a0, $a1, $a0 -name: test_MULH_W -body: | - bb.0: - $r4 = MULH_W $r5, $r4 -... ---- -# CHECK-LABEL: test_MULH_WU: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 1 0 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: mulh.wu $a0, $a1, $a0 -name: test_MULH_WU -body: | - bb.0: - $r4 = MULH_WU $r5, $r4 -... ---- -# CHECK-LABEL: test_MUL_D: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 1 1 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: mul.d $a0, $a1, $a0 -name: test_MUL_D -body: | - bb.0: - $r4 = MUL_D $r5, $r4 -... ---- -# CHECK-LABEL: test_MULH_D: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 0 0 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: mulh.d $a0, $a1, $a0 -name: test_MULH_D -body: | - bb.0: - $r4 = MULH_D $r5, $r4 -... ---- -# CHECK-LABEL: test_MULH_DU: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 0 1 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: mulh.du $a0, $a1, $a0 -name: test_MULH_DU -body: | - bb.0: - $r4 = MULH_DU $r5, $r4 -... ---- -# CHECK-LABEL: test_MULW_D_W: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 0 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: mulw.d.w $a0, $a1, $a0 -name: test_MULW_D_W -body: | - bb.0: - $r4 = MULW_D_W $r5, $r4 -... ---- -# CHECK-LABEL: test_MULW_D_WU: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: mulw.d.wu $a0, $a1, $a0 -name: test_MULW_D_WU -body: | - bb.0: - $r4 = MULW_D_WU $r5, $r4 -... ---- -# CHECK-LABEL: test_DIV_W: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: div.w $a0, $a1, $a0 -name: test_DIV_W -body: | - bb.0: - $r4 = DIV_W $r5, $r4 -... ---- -# CHECK-LABEL: test_MOD_W: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: mod.w $a0, $a1, $a0 -name: test_MOD_W -body: | - bb.0: - $r4 = MOD_W $r5, $r4 -... ---- -# CHECK-LABEL: test_DIV_WU: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: div.wu $a0, $a1, $a0 -name: test_DIV_WU -body: | - bb.0: - $r4 = DIV_WU $r5, $r4 -... ---- -# CHECK-LABEL: test_MOD_WU: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 1 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: mod.wu $a0, $a1, $a0 -name: test_MOD_WU -body: | - bb.0: - $r4 = MOD_WU $r5, $r4 -... ---- -# CHECK-LABEL: test_DIV_D: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: div.d $a0, $a1, $a0 -name: test_DIV_D -body: | - bb.0: - $r4 = DIV_D $r5, $r4 -... ---- -# CHECK-LABEL: test_MOD_D: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 1 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: mod.d $a0, $a1, $a0 -name: test_MOD_D -body: | - bb.0: - $r4 = MOD_D $r5, $r4 -... ---- -# CHECK-LABEL: test_DIV_DU: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 1 0 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: div.du $a0, $a1, $a0 -name: test_DIV_DU -body: | - bb.0: - $r4 = DIV_DU $r5, $r4 -... ---- -# CHECK-LABEL: test_MOD_DU: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 1 1 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: mod.du $a0, $a1, $a0 -name: test_MOD_DU -body: | - bb.0: - $r4 = MOD_DU $r5, $r4 -... ---- -# CHECK-LABEL: test_CRC_W_B_W: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: crc.w.b.w $a0, $a1, $a0 -name: test_CRC_W_B_W -body: | - bb.0: - $r4 = CRC_W_B_W $r5, $r4 -... ---- -# CHECK-LABEL: test_CRC_W_H_W: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 1 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: crc.w.h.w $a0, $a1, $a0 -name: test_CRC_W_H_W -body: | - bb.0: - $r4 = CRC_W_H_W $r5, $r4 -... ---- -# CHECK-LABEL: test_CRC_W_W_W: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 1 0 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: crc.w.w.w $a0, $a1, $a0 -name: test_CRC_W_W_W -body: | - bb.0: - $r4 = CRC_W_W_W $r5, $r4 -... ---- -# CHECK-LABEL: test_CRC_W_D_W: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 1 1 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: crc.w.d.w $a0, $a1, $a0 -name: test_CRC_W_D_W -body: | - bb.0: - $r4 = CRC_W_D_W $r5, $r4 -... ---- -# CHECK-LABEL: test_CRCC_W_B_W: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 1 0 0 1 1 0 0 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: crcc.w.b.w $a0, $a1, $a0 -name: test_CRCC_W_B_W -body: | - bb.0: - $r4 = CRCC_W_B_W $r5, $r4 -... ---- -# CHECK-LABEL: test_CRCC_W_H_W: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 1 0 0 1 1 0 1 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: crcc.w.h.w $a0, $a1, $a0 -name: test_CRCC_W_H_W -body: | - bb.0: - $r4 = CRCC_W_H_W $r5, $r4 -... ---- -# CHECK-LABEL: test_CRCC_W_W_W: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 1 0 0 1 1 1 0 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: crcc.w.w.w $a0, $a1, $a0 -name: test_CRCC_W_W_W -body: | - bb.0: - $r4 = CRCC_W_W_W $r5, $r4 -... ---- -# CHECK-LABEL: test_CRCC_W_D_W: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 1 0 0 1 1 1 1 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: crcc.w.d.w $a0, $a1, $a0 -name: test_CRCC_W_D_W -body: | - bb.0: - $r4 = CRCC_W_D_W $r5, $r4 -... ---- -# CHECK-LABEL: test_AMSWAP_DB_W: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 0 1 0 0 1 0 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 -# CHECK-ASM: amswap_db.w $a0, $a1, $a2 -name: test_AMSWAP_DB_W -body: | - bb.0: - $r4 = AMSWAP_DB_W $r5, $r6 -... ---- -# CHECK-LABEL: test_AMSWAP_DB_D: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 0 1 0 0 1 1 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 -# CHECK-ASM: amswap_db.d $a0, $a1, $a2 -name: test_AMSWAP_DB_D -body: | - bb.0: - $r4 = AMSWAP_DB_D $r5, $r6 -... ---- -# CHECK-LABEL: test_AMADD_DB_W: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 0 1 0 1 0 0 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 -# CHECK-ASM: amadd_db.w $a0, $a1, $a2 -name: test_AMADD_DB_W -body: | - bb.0: - $r4 = AMADD_DB_W $r5, $r6 -... ---- -# CHECK-LABEL: test_AMADD_DB_D: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 0 1 0 1 0 1 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 -# CHECK-ASM: amadd_db.d $a0, $a1, $a2 -name: test_AMADD_DB_D -body: | - bb.0: - $r4 = AMADD_DB_D $r5, $r6 -... ---- -# CHECK-LABEL: test_AMAND_DB_W: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 0 1 0 1 1 0 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 -# CHECK-ASM: amand_db.w $a0, $a1, $a2 -name: test_AMAND_DB_W -body: | - bb.0: - $r4 = AMAND_DB_W $r5, $r6 -... ---- -# CHECK-LABEL: test_AMAND_DB_D: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 0 1 0 1 1 1 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 -# CHECK-ASM: amand_db.d $a0, $a1, $a2 -name: test_AMAND_DB_D -body: | - bb.0: - $r4 = AMAND_DB_D $r5, $r6 -... ---- -# CHECK-LABEL: test_AMOR_DB_W: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 0 1 1 0 0 0 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 -# CHECK-ASM: amor_db.w $a0, $a1, $a2 -name: test_AMOR_DB_W -body: | - bb.0: - $r4 = AMOR_DB_W $r5, $r6 -... ---- -# CHECK-LABEL: test_AMOR_DB_D: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 0 1 1 0 0 1 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 -# CHECK-ASM: amor_db.d $a0, $a1, $a2 -name: test_AMOR_DB_D -body: | - bb.0: - $r4 = AMOR_DB_D $r5, $r6 -... ---- -# CHECK-LABEL: test_AMXOR_DB_W: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 0 1 1 0 1 0 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 -# CHECK-ASM: amxor_db.w $a0, $a1, $a2 -name: test_AMXOR_DB_W -body: | - bb.0: - $r4 = AMXOR_DB_W $r5, $r6 -... ---- -# CHECK-LABEL: test_AMXOR_DB_D: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 0 1 1 0 1 1 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 -# CHECK-ASM: amxor_db.d $a0, $a1, $a2 -name: test_AMXOR_DB_D -body: | - bb.0: - $r4 = AMXOR_DB_D $r5, $r6 -... ---- -# CHECK-LABEL: test_AMMAX_DB_W: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 0 1 1 1 0 0 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 -# CHECK-ASM: ammax_db.w $a0, $a1, $a2 -name: test_AMMAX_DB_W -body: | - bb.0: - $r4 = AMMAX_DB_W $r5, $r6 -... ---- -# CHECK-LABEL: test_AMMAX_DB_D: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 0 1 1 1 0 1 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 -# CHECK-ASM: ammax_db.d $a0, $a1, $a2 -name: test_AMMAX_DB_D -body: | - bb.0: - $r4 = AMMAX_DB_D $r5, $r6 -... ---- -# CHECK-LABEL: test_AMMIN_DB_W: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 0 1 1 1 1 0 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 -# CHECK-ASM: ammin_db.w $a0, $a1, $a2 -name: test_AMMIN_DB_W -body: | - bb.0: - $r4 = AMMIN_DB_W $r5, $r6 -... ---- -# CHECK-LABEL: test_AMMIN_DB_D: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 0 1 1 1 1 1 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 -# CHECK-ASM: ammin_db.d $a0, $a1, $a2 -name: test_AMMIN_DB_D -body: | - bb.0: - $r4 = AMMIN_DB_D $r5, $r6 -... ---- -# CHECK-LABEL: test_AMMAX_DB_WU: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 1 0 0 0 0 0 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 -# CHECK-ASM: ammax_db.wu $a0, $a1, $a2 -name: test_AMMAX_DB_WU -body: | - bb.0: - $r4 = AMMAX_DB_WU $r5, $r6 -... ---- -# CHECK-LABEL: test_AMMAX_DB_DU: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 1 0 0 0 0 1 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 -# CHECK-ASM: ammax_db.du $a0, $a1, $a2 -name: test_AMMAX_DB_DU -body: | - bb.0: - $r4 = AMMAX_DB_DU $r5, $r6 -... ---- -# CHECK-LABEL: test_AMMIN_DB_WU: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 1 0 0 0 1 0 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 -# CHECK-ASM: ammin_db.wu $a0, $a1, $a2 -name: test_AMMIN_DB_WU -body: | - bb.0: - $r4 = AMMIN_DB_WU $r5, $r6 -... ---- -# CHECK-LABEL: test_AMMIN_DB_DU: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 1 0 0 0 1 1 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 -# CHECK-ASM: ammin_db.du $a0, $a1, $a2 -name: test_AMMIN_DB_DU -body: | - bb.0: - $r4 = AMMIN_DB_DU $r5, $r6 -... ---- -# CHECK-LABEL: test_AMSWAP_W: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 0 0 0 0 0 0 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 -# CHECK-ASM: amswap.w $a0, $a1, $a2 -name: test_AMSWAP_W -body: | - bb.0: - $r4 = AMSWAP_W $r5, $r6 -... ---- -# CHECK-LABEL: test_AMSWAP_D: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 0 0 0 0 0 1 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 -# CHECK-ASM: amswap.d $a0, $a1, $a2 -name: test_AMSWAP_D -body: | - bb.0: - $r4 = AMSWAP_D $r5, $r6 -... ---- -# CHECK-LABEL: test_AMADD_W: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 0 0 0 0 1 0 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 -# CHECK-ASM: amadd.w $a0, $a1, $a2 -name: test_AMADD_W -body: | - bb.0: - $r4 = AMADD_W $r5, $r6 -... ---- -# CHECK-LABEL: test_AMADD_D: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 0 0 0 0 1 1 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 -# CHECK-ASM: amadd.d $a0, $a1, $a2 -name: test_AMADD_D -body: | - bb.0: - $r4 = AMADD_D $r5, $r6 -... ---- -# CHECK-LABEL: test_AMAND_W: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 0 0 0 1 0 0 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 -# CHECK-ASM: amand.w $a0, $a1, $a2 -name: test_AMAND_W -body: | - bb.0: - $r4 = AMAND_W $r5, $r6 -... ---- -# CHECK-LABEL: test_AMAND_D: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 0 0 0 1 0 1 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 -# CHECK-ASM: amand.d $a0, $a1, $a2 -name: test_AMAND_D -body: | - bb.0: - $r4 = AMAND_D $r5, $r6 -... ---- -# CHECK-LABEL: test_AMOR_W: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 0 0 0 1 1 0 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 -# CHECK-ASM: amor.w $a0, $a1, $a2 -name: test_AMOR_W -body: | - bb.0: - $r4 = AMOR_W $r5, $r6 -... ---- -# CHECK-LABEL: test_AMOR_D: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 0 0 0 1 1 1 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 -# CHECK-ASM: amor.d $a0, $a1, $a2 -name: test_AMOR_D -body: | - bb.0: - $r4 = AMOR_D $r5, $r6 -... ---- -# CHECK-LABEL: test_AMXOR_W: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 0 0 1 0 0 0 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 -# CHECK-ASM: amxor.w $a0, $a1, $a2 -name: test_AMXOR_W -body: | - bb.0: - $r4 = AMXOR_W $r5, $r6 -... ---- -# CHECK-LABEL: test_AMXOR_D: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 0 0 1 0 0 1 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 -# CHECK-ASM: amxor.d $a0, $a1, $a2 -name: test_AMXOR_D -body: | - bb.0: - $r4 = AMXOR_D $r5, $r6 -... ---- -# CHECK-LABEL: test_AMMAX_W: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 0 0 1 0 1 0 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 -# CHECK-ASM: ammax.w $a0, $a1, $a2 -name: test_AMMAX_W -body: | - bb.0: - $r4 = AMMAX_W $r5, $r6 -... ---- -# CHECK-LABEL: test_AMMAX_D: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 0 0 1 0 1 1 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 -# CHECK-ASM: ammax.d $a0, $a1, $a2 -name: test_AMMAX_D -body: | - bb.0: - $r4 = AMMAX_D $r5, $r6 -... ---- -# CHECK-LABEL: test_AMMIN_W: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 0 0 1 1 0 0 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 -# CHECK-ASM: ammin.w $a0, $a1, $a2 -name: test_AMMIN_W -body: | - bb.0: - $r4 = AMMIN_W $r5, $r6 -... ---- -# CHECK-LABEL: test_AMMIN_D: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 0 0 1 1 0 1 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 -# CHECK-ASM: ammin.d $a0, $a1, $a2 -name: test_AMMIN_D -body: | - bb.0: - $r4 = AMMIN_D $r5, $r6 -... ---- -# CHECK-LABEL: test_AMMAX_WU: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 0 0 1 1 1 0 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 -# CHECK-ASM: ammax.wu $a0, $a1, $a2 -name: test_AMMAX_WU -body: | - bb.0: - $r4 = AMMAX_WU $r5, $r6 -... ---- -# CHECK-LABEL: test_AMMAX_DU: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 0 0 1 1 1 1 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 -# CHECK-ASM: ammax.du $a0, $a1, $a2 -name: test_AMMAX_DU -body: | - bb.0: - $r4 = AMMAX_DU $r5, $r6 -... ---- -# CHECK-LABEL: test_AMMIN_WU: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 0 1 0 0 0 0 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 -# CHECK-ASM: ammin.wu $a0, $a1, $a2 -name: test_AMMIN_WU -body: | - bb.0: - $r4 = AMMIN_WU $r5, $r6 -... ---- -# CHECK-LABEL: test_AMMIN_DU: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 0 1 0 0 0 1 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 -# CHECK-ASM: ammin.du $a0, $a1, $a2 -name: test_AMMIN_DU -body: | - bb.0: - $r4 = AMMIN_DU $r5, $r6 -... ---- -# CHECK-LABEL: test_LDX_B: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: ldx.b $a0, $a1, $a2 -name: test_LDX_B -body: | - bb.0: - $r4 = LDX_B $r5, $r6 -... ---- -# CHECK-LABEL: test_LDX_H: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: ldx.h $a0, $a1, $a2 -name: test_LDX_H -body: | - bb.0: - $r4 = LDX_H $r5, $r6 -... ---- -# CHECK-LABEL: test_LDX_W: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: ldx.w $a0, $a1, $a2 -name: test_LDX_W -body: | - bb.0: - $r4 = LDX_W $r5, $r6 -... ---- -# CHECK-LABEL: test_LDX_D: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 0 0 0 1 1 0 0 0 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: ldx.d $a0, $a1, $a2 -name: test_LDX_D -body: | - bb.0: - $r4 = LDX_D $r5, $r6 -... ---- -# CHECK-LABEL: test_LDX_BU: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: ldx.bu $a0, $a1, $a2 -name: test_LDX_BU -body: | - bb.0: - $r4 = LDX_BU $r5, $r6 -... ---- -# CHECK-LABEL: test_LDX_HU: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 0 1 0 0 1 0 0 0 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: ldx.hu $a0, $a1, $a2 -name: test_LDX_HU -body: | - bb.0: - $r4 = LDX_HU $r5, $r6 -... ---- -# CHECK-LABEL: test_LDX_WU: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 0 1 0 1 0 0 0 0 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: ldx.wu $a0, $a1, $a2 -name: test_LDX_WU -body: | - bb.0: - $r4 = LDX_WU $r5, $r6 -... ---- -# CHECK-LABEL: test_LDGT_B: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 1 1 0 0 0 0 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: ldgt.b $a0, $a1, $a2 -name: test_LDGT_B -body: | - bb.0: - $r4 = LDGT_B $r5, $r6 -... ---- -# CHECK-LABEL: test_LDGT_H: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 1 1 0 0 0 1 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: ldgt.h $a0, $a1, $a2 -name: test_LDGT_H -body: | - bb.0: - $r4 = LDGT_H $r5, $r6 -... ---- -# CHECK-LABEL: test_LDGT_W: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 1 1 0 0 1 0 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: ldgt.w $a0, $a1, $a2 -name: test_LDGT_W -body: | - bb.0: - $r4 = LDGT_W $r5, $r6 -... ---- -# CHECK-LABEL: test_LDGT_D: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 1 1 0 0 1 1 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: ldgt.d $a0, $a1, $a2 -name: test_LDGT_D -body: | - bb.0: - $r4 = LDGT_D $r5, $r6 -... ---- -# CHECK-LABEL: test_LDLE_B: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 1 1 0 1 0 0 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: ldle.b $a0, $a1, $a2 -name: test_LDLE_B -body: | - bb.0: - $r4 = LDLE_B $r5, $r6 -... ---- -# CHECK-LABEL: test_LDLE_H: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 1 1 0 1 0 1 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: ldle.h $a0, $a1, $a2 -name: test_LDLE_H -body: | - bb.0: - $r4 = LDLE_H $r5, $r6 -... ---- -# CHECK-LABEL: test_LDLE_W: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 1 1 0 1 1 0 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: ldle.w $a0, $a1, $a2 -name: test_LDLE_W -body: | - bb.0: - $r4 = LDLE_W $r5, $r6 -... ---- -# CHECK-LABEL: test_LDLE_D: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 1 1 0 1 1 1 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: ldle.d $a0, $a1, $a2 -name: test_LDLE_D -body: | - bb.0: - $r4 = LDLE_D $r5, $r6 -... ---- -# CHECK-LABEL: test_STX_B: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: stx.b $a0, $a1, $a2 -name: test_STX_B -body: | - bb.0: - STX_B $r4, $r5, $r6 -... ---- -# CHECK-LABEL: test_STX_H: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 0 0 1 0 1 0 0 0 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: stx.h $a0, $a1, $a2 -name: test_STX_H -body: | - bb.0: - STX_H $r4, $r5, $r6 -... ---- -# CHECK-LABEL: test_STX_W: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 0 0 1 1 0 0 0 0 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: stx.w $a0, $a1, $a2 -name: test_STX_W -body: | - bb.0: - STX_W $r4, $r5, $r6 -... ---- -# CHECK-LABEL: test_STX_D: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 0 0 1 1 1 0 0 0 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: stx.d $a0, $a1, $a2 -name: test_STX_D -body: | - bb.0: - STX_D $r4, $r5, $r6 -... ---- -# CHECK-LABEL: test_STGT_B: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 1 1 1 0 0 0 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: stgt.b $a0, $a1, $a2 -name: test_STGT_B -body: | - bb.0: - STGT_B $r4, $r5, $r6 -... ---- -# CHECK-LABEL: test_STGT_H: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 1 1 1 0 0 1 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: stgt.h $a0, $a1, $a2 -name: test_STGT_H -body: | - bb.0: - STGT_H $r4, $r5, $r6 -... ---- -# CHECK-LABEL: test_STGT_W: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 1 1 1 0 1 0 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: stgt.w $a0, $a1, $a2 -name: test_STGT_W -body: | - bb.0: - STGT_W $r4, $r5, $r6 -... ---- -# CHECK-LABEL: test_STGT_D: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 1 1 1 0 1 1 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: stgt.d $a0, $a1, $a2 -name: test_STGT_D -body: | - bb.0: - STGT_D $r4, $r5, $r6 -... ---- -# CHECK-LABEL: test_STLE_B: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 1 1 1 1 0 0 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: stle.b $a0, $a1, $a2 -name: test_STLE_B -body: | - bb.0: - STLE_B $r4, $r5, $r6 -... ---- -# CHECK-LABEL: test_STLE_H: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 1 1 1 1 0 1 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: stle.h $a0, $a1, $a2 -name: test_STLE_H -body: | - bb.0: - STLE_H $r4, $r5, $r6 -... ---- -# CHECK-LABEL: test_STLE_W: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 1 1 1 1 1 0 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: stle.w $a0, $a1, $a2 -name: test_STLE_W -body: | - bb.0: - STLE_W $r4, $r5, $r6 -... ---- -# CHECK-LABEL: test_STLE_D: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 1 1 1 1 1 1 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: stle.d $a0, $a1, $a2 -name: test_STLE_D -body: | - bb.0: - STLE_D $r4, $r5, $r6 diff --git a/llvm/test/CodeGen/LoongArch/3ri.mir b/llvm/test/CodeGen/LoongArch/3ri.mir deleted file mode 100644 index c86e141896176bbaf7d337af9c79a3d33adefed0..0000000000000000000000000000000000000000 --- a/llvm/test/CodeGen/LoongArch/3ri.mir +++ /dev/null @@ -1,69 +0,0 @@ -# RUN: llc %s -mtriple=loongarch64 -start-after=prologepilog -O0 -filetype=obj -o - \ -# RUN: | extract-section .text \ -# RUN: | FileCheck %s -check-prefix=CHECK-ENC -# RUN: llc %s -mtriple=loongarch64 -start-after=prologepilog -O0 -filetype=asm -o - \ -# RUN: | FileCheck %s -check-prefix=CHECK-ASM - -# ------------------------------------------------------------------------------------------------- -# Encoding format: 3RI2 -# ------------------------------------------------------------------------------------------------- -# ---------------------------------------------+-----+--------------+--------------+--------------- -# 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00 -# ---------------------------------------------+-----+--------------+--------------+--------------- -# opcode |imm2 | rk | rj | rd -# ---------------------------------------------+-----+--------------+--------------+--------------- - ---- -# CHECK-LABEL: test_ALSL_W: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 1 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: alsl.w $a0, $a1, $a2, 4 -name: test_ALSL_W -body: | - bb.0: - $r4 = ALSL_W $r5, $r6, 4 -... ---- -# CHECK-LABEL: test_ALSL_WU: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 1 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: alsl.wu $a0, $a1, $a2, 2 -name: test_ALSL_WU -body: | - bb.0: - $r4 = ALSL_WU $r5, $r6, 2 -... ---- -# CHECK-LABEL: test_ALSL_D: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 1 0 1 1 0 1 1 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: alsl.d $a0, $a1, $a2, 4 -name: test_ALSL_D -body: | - bb.0: - $r4 = ALSL_D $r5, $r6, 4 -... ---- -# CHECK-LABEL: test_BYTEPICK_W: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: bytepick.w $a0, $a1, $a2, 0 -name: test_BYTEPICK_W -body: | - bb.0: - $r4 = BYTEPICK_W $r5, $r6, 0 -... - -# ------------------------------------------------------------------------------------------------- -# Encoding format: 3RI3 -# ------------------------------------------------------------------------------------------------- -# ------------------------------------------+--------+--------------+--------------+--------------- -# 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00 -# ------------------------------------------+--------+--------------+--------------+--------------- -# opcode | imm3 | rk | rj | rd -# ------------------------------------------+--------+--------------+--------------+--------------- - ---- -# CHECK-LABEL: test_BYTEPICK_D: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: bytepick.d $a0, $a1, $a2, 4 -name: test_BYTEPICK_D -body: | - bb.0: - $r4 = BYTEPICK_D $r5, $r6, 4 diff --git a/llvm/test/CodeGen/LoongArch/O0-pipeline.ll b/llvm/test/CodeGen/LoongArch/O0-pipeline.ll new file mode 100644 index 0000000000000000000000000000000000000000..b8d53ea57ff4fa6df428177dfd85af105fac4a07 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/O0-pipeline.ll @@ -0,0 +1,68 @@ +;; When EXPENSIVE_CHECKS are enabled, the machine verifier appears between each +;; pass. Ignore it with 'grep -v'. +; RUN: llc --mtriple=loongarch32 -O0 --debug-pass=Structure %s -o /dev/null 2>&1 | \ +; RUN: grep -v "Verify generated machine code" | FileCheck %s +; RUN: llc --mtriple=loongarch64 -O0 --debug-pass=Structure %s -o /dev/null 2>&1 | \ +; RUN: grep -v "Verify generated machine code" | FileCheck %s + +; REQUIRES: asserts + +; CHECK-LABEL: Pass Arguments: +; CHECK-NEXT: Target Library Information +; CHECK-NEXT: Target Pass Configuration +; CHECK-NEXT: Machine Module Information +; CHECK-NEXT: Target Transform Information +; CHECK-NEXT: Create Garbage Collector Module Metadata +; CHECK-NEXT: Assumption Cache Tracker +; CHECK-NEXT: Profile summary info +; CHECK-NEXT: Machine Branch Probability Analysis +; CHECK-NEXT: ModulePass Manager +; CHECK-NEXT: Pre-ISel Intrinsic Lowering +; CHECK-NEXT: FunctionPass Manager +; CHECK-NEXT: Expand Atomic instructions +; CHECK-NEXT: Module Verifier +; CHECK-NEXT: Lower Garbage Collection Instructions +; CHECK-NEXT: Shadow Stack GC Lowering +; CHECK-NEXT: Lower constant intrinsics +; CHECK-NEXT: Remove unreachable blocks from the CFG +; CHECK-NEXT: Expand vector predication intrinsics +; CHECK-NEXT: Scalarize Masked Memory Intrinsics +; CHECK-NEXT: Expand reduction intrinsics +; CHECK-NEXT: Exception handling preparation +; CHECK-NEXT: Safe Stack instrumentation pass +; CHECK-NEXT: Insert stack protectors +; CHECK-NEXT: Module Verifier +; CHECK-NEXT: Dominator Tree Construction +; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) +; CHECK-NEXT: Function Alias Analysis Results +; CHECK-NEXT: Natural Loop Information +; CHECK-NEXT: Post-Dominator Tree Construction +; CHECK-NEXT: Branch Probability Analysis +; CHECK-NEXT: Lazy Branch Probability Analysis +; CHECK-NEXT: Lazy Block Frequency Analysis +; CHECK-NEXT: LoongArch DAG->DAG Pattern Instruction Selection +; CHECK-NEXT: Finalize ISel and expand pseudo-instructions +; CHECK-NEXT: Local Stack Slot Allocation +; CHECK-NEXT: LoongArch Pre-RA pseudo instruction expansion pass +; CHECK-NEXT: Eliminate PHI nodes for register allocation +; CHECK-NEXT: Two-Address instruction pass +; CHECK-NEXT: Fast Register Allocator +; CHECK-NEXT: Remove Redundant DEBUG_VALUE analysis +; CHECK-NEXT: Fixup Statepoint Caller Saved +; CHECK-NEXT: Lazy Machine Block Frequency Analysis +; CHECK-NEXT: Machine Optimization Remark Emitter +; CHECK-NEXT: Prologue/Epilogue Insertion & Frame Finalization +; CHECK-NEXT: Post-RA pseudo instruction expansion pass +; CHECK-NEXT: Analyze Machine Code For Garbage Collection +; CHECK-NEXT: Insert fentry calls +; CHECK-NEXT: Insert XRay ops +; CHECK-NEXT: Implement the 'patchable-function' attribute +; CHECK-NEXT: Branch relaxation pass +; CHECK-NEXT: Contiguously Lay Out Funclets +; CHECK-NEXT: StackMap Liveness Analysis +; CHECK-NEXT: Live DEBUG_VALUE analysis +; CHECK-NEXT: LoongArch atomic pseudo instruction expansion pass +; CHECK-NEXT: Lazy Machine Block Frequency Analysis +; CHECK-NEXT: Machine Optimization Remark Emitter +; CHECK-NEXT: LoongArch Assembly Printer +; CHECK-NEXT: Free MachineFunction diff --git a/llvm/test/CodeGen/LoongArch/alloca.ll b/llvm/test/CodeGen/LoongArch/alloca.ll new file mode 100644 index 0000000000000000000000000000000000000000..ca6508efd94e905f8f9300d15678890f561f5358 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/alloca.ll @@ -0,0 +1,191 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefix=LA32 +; RUN: llc --mtriple=loongarch64 --verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefix=LA64 + +declare void @notdead(ptr) + +;; These tests must ensure the stack pointer is restored using the frame +;; pointer + +define void @simple_alloca(i32 %n) nounwind { +; LA32-LABEL: simple_alloca: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: st.w $fp, $sp, 8 # 4-byte Folded Spill +; LA32-NEXT: addi.w $fp, $sp, 16 +; LA32-NEXT: addi.w $a0, $a0, 15 +; LA32-NEXT: addi.w $a1, $zero, -16 +; LA32-NEXT: and $a0, $a0, $a1 +; LA32-NEXT: sub.w $a0, $sp, $a0 +; LA32-NEXT: move $sp, $a0 +; LA32-NEXT: bl %plt(notdead) +; LA32-NEXT: addi.w $sp, $fp, -16 +; LA32-NEXT: ld.w $fp, $sp, 8 # 4-byte Folded Reload +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: simple_alloca: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: st.d $fp, $sp, 0 # 8-byte Folded Spill +; LA64-NEXT: addi.d $fp, $sp, 16 +; LA64-NEXT: addi.w $a1, $zero, -16 +; LA64-NEXT: lu32i.d $a1, 1 +; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0 +; LA64-NEXT: addi.d $a0, $a0, 15 +; LA64-NEXT: and $a0, $a0, $a1 +; LA64-NEXT: sub.d $a0, $sp, $a0 +; LA64-NEXT: move $sp, $a0 +; LA64-NEXT: bl %plt(notdead) +; LA64-NEXT: addi.d $sp, $fp, -16 +; LA64-NEXT: ld.d $fp, $sp, 0 # 8-byte Folded Reload +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret + %1 = alloca i8, i32 %n + call void @notdead(ptr %1) + ret void +} + +declare ptr @llvm.stacksave() +declare void @llvm.stackrestore(ptr) + +define void @scoped_alloca(i32 %n) nounwind { +; LA32-LABEL: scoped_alloca: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: st.w $fp, $sp, 8 # 4-byte Folded Spill +; LA32-NEXT: st.w $s0, $sp, 4 # 4-byte Folded Spill +; LA32-NEXT: addi.w $fp, $sp, 16 +; LA32-NEXT: addi.w $a0, $a0, 15 +; LA32-NEXT: addi.w $a1, $zero, -16 +; LA32-NEXT: and $a0, $a0, $a1 +; LA32-NEXT: move $s0, $sp +; LA32-NEXT: sub.w $a0, $sp, $a0 +; LA32-NEXT: move $sp, $a0 +; LA32-NEXT: bl %plt(notdead) +; LA32-NEXT: move $sp, $s0 +; LA32-NEXT: addi.w $sp, $fp, -16 +; LA32-NEXT: ld.w $s0, $sp, 4 # 4-byte Folded Reload +; LA32-NEXT: ld.w $fp, $sp, 8 # 4-byte Folded Reload +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: scoped_alloca: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -32 +; LA64-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill +; LA64-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill +; LA64-NEXT: st.d $s0, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: addi.d $fp, $sp, 32 +; LA64-NEXT: addi.w $a1, $zero, -16 +; LA64-NEXT: lu32i.d $a1, 1 +; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0 +; LA64-NEXT: addi.d $a0, $a0, 15 +; LA64-NEXT: and $a0, $a0, $a1 +; LA64-NEXT: move $s0, $sp +; LA64-NEXT: sub.d $a0, $sp, $a0 +; LA64-NEXT: move $sp, $a0 +; LA64-NEXT: bl %plt(notdead) +; LA64-NEXT: move $sp, $s0 +; LA64-NEXT: addi.d $sp, $fp, -32 +; LA64-NEXT: ld.d $s0, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload +; LA64-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 32 +; LA64-NEXT: ret + %sp = call ptr @llvm.stacksave() + %addr = alloca i8, i32 %n + call void @notdead(ptr %addr) + call void @llvm.stackrestore(ptr %sp) + ret void +} + +declare void @func(ptr, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) + +;; Check that outgoing arguments passed on the stack do not corrupt a +;; variable-sized stack object. +define void @alloca_callframe(i32 %n) nounwind { +; LA32-LABEL: alloca_callframe: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: st.w $fp, $sp, 8 # 4-byte Folded Spill +; LA32-NEXT: addi.w $fp, $sp, 16 +; LA32-NEXT: addi.w $a0, $a0, 15 +; LA32-NEXT: addi.w $a1, $zero, -16 +; LA32-NEXT: and $a0, $a0, $a1 +; LA32-NEXT: sub.w $a0, $sp, $a0 +; LA32-NEXT: move $sp, $a0 +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: ori $a1, $zero, 12 +; LA32-NEXT: st.w $a1, $sp, 12 +; LA32-NEXT: ori $a1, $zero, 11 +; LA32-NEXT: st.w $a1, $sp, 8 +; LA32-NEXT: ori $a1, $zero, 10 +; LA32-NEXT: st.w $a1, $sp, 4 +; LA32-NEXT: ori $a1, $zero, 9 +; LA32-NEXT: st.w $a1, $sp, 0 +; LA32-NEXT: ori $a1, $zero, 2 +; LA32-NEXT: ori $a2, $zero, 3 +; LA32-NEXT: ori $a3, $zero, 4 +; LA32-NEXT: ori $a4, $zero, 5 +; LA32-NEXT: ori $a5, $zero, 6 +; LA32-NEXT: ori $a6, $zero, 7 +; LA32-NEXT: ori $a7, $zero, 8 +; LA32-NEXT: bl %plt(func) +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: addi.w $sp, $fp, -16 +; LA32-NEXT: ld.w $fp, $sp, 8 # 4-byte Folded Reload +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: alloca_callframe: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: st.d $fp, $sp, 0 # 8-byte Folded Spill +; LA64-NEXT: addi.d $fp, $sp, 16 +; LA64-NEXT: addi.w $a1, $zero, -16 +; LA64-NEXT: lu32i.d $a1, 1 +; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0 +; LA64-NEXT: addi.d $a0, $a0, 15 +; LA64-NEXT: and $a0, $a0, $a1 +; LA64-NEXT: sub.d $a0, $sp, $a0 +; LA64-NEXT: move $sp, $a0 +; LA64-NEXT: addi.d $sp, $sp, -32 +; LA64-NEXT: ori $a1, $zero, 12 +; LA64-NEXT: st.d $a1, $sp, 24 +; LA64-NEXT: ori $a1, $zero, 11 +; LA64-NEXT: st.d $a1, $sp, 16 +; LA64-NEXT: ori $a1, $zero, 10 +; LA64-NEXT: st.d $a1, $sp, 8 +; LA64-NEXT: ori $a1, $zero, 9 +; LA64-NEXT: st.d $a1, $sp, 0 +; LA64-NEXT: ori $a1, $zero, 2 +; LA64-NEXT: ori $a2, $zero, 3 +; LA64-NEXT: ori $a3, $zero, 4 +; LA64-NEXT: ori $a4, $zero, 5 +; LA64-NEXT: ori $a5, $zero, 6 +; LA64-NEXT: ori $a6, $zero, 7 +; LA64-NEXT: ori $a7, $zero, 8 +; LA64-NEXT: bl %plt(func) +; LA64-NEXT: addi.d $sp, $sp, 32 +; LA64-NEXT: addi.d $sp, $fp, -16 +; LA64-NEXT: ld.d $fp, $sp, 0 # 8-byte Folded Reload +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret + %1 = alloca i8, i32 %n + call void @func(ptr %1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, + i32 9, i32 10, i32 11, i32 12) + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/alsl.ll b/llvm/test/CodeGen/LoongArch/alsl.ll new file mode 100644 index 0000000000000000000000000000000000000000..e296dabb38362544cf5726674a0c95ed13dc5134 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/alsl.ll @@ -0,0 +1,363 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefix=LA32 +; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64 + +define i8 @alsl_i8(i8 signext %a, i8 signext %b) nounwind { +; LA32-LABEL: alsl_i8: +; LA32: # %bb.0: # %entry +; LA32-NEXT: alsl.w $a0, $a0, $a1, 1 +; LA32-NEXT: ret +; +; LA64-LABEL: alsl_i8: +; LA64: # %bb.0: # %entry +; LA64-NEXT: alsl.d $a0, $a0, $a1, 1 +; LA64-NEXT: ret +entry: + %mul = mul nsw i8 %a, 2 + %add = add nsw i8 %b, %mul + ret i8 %add +} + +define i16 @alsl_i16(i16 signext %a, i16 signext %b) nounwind { +; LA32-LABEL: alsl_i16: +; LA32: # %bb.0: # %entry +; LA32-NEXT: alsl.w $a0, $a0, $a1, 2 +; LA32-NEXT: ret +; +; LA64-LABEL: alsl_i16: +; LA64: # %bb.0: # %entry +; LA64-NEXT: alsl.d $a0, $a0, $a1, 2 +; LA64-NEXT: ret +entry: + %mul = mul nsw i16 %a, 4 + %add = add nsw i16 %b, %mul + ret i16 %add +} + +define i32 @alsl_i32(i32 signext %a, i32 signext %b) nounwind { +; LA32-LABEL: alsl_i32: +; LA32: # %bb.0: # %entry +; LA32-NEXT: alsl.w $a0, $a0, $a1, 3 +; LA32-NEXT: ret +; +; LA64-LABEL: alsl_i32: +; LA64: # %bb.0: # %entry +; LA64-NEXT: alsl.d $a0, $a0, $a1, 3 +; LA64-NEXT: ret +entry: + %mul = mul nsw i32 %a, 8 + %add = add nsw i32 %b, %mul + ret i32 %add +} + +define i64 @alsl_i64(i64 signext %a, i64 signext %b) nounwind { +; LA32-LABEL: alsl_i64: +; LA32: # %bb.0: # %entry +; LA32-NEXT: slli.w $a1, $a1, 4 +; LA32-NEXT: srli.w $a4, $a0, 28 +; LA32-NEXT: or $a1, $a1, $a4 +; LA32-NEXT: add.w $a1, $a3, $a1 +; LA32-NEXT: alsl.w $a0, $a0, $a2, 4 +; LA32-NEXT: sltu $a2, $a0, $a2 +; LA32-NEXT: add.w $a1, $a1, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: alsl_i64: +; LA64: # %bb.0: # %entry +; LA64-NEXT: alsl.d $a0, $a0, $a1, 4 +; LA64-NEXT: ret +entry: + %mul = mul nsw i64 %a, 16 + %add = add nsw i64 %b, %mul + ret i64 %add +} + +define i32 @alsl_zext_i8(i8 signext %a, i8 signext %b) nounwind { +; LA32-LABEL: alsl_zext_i8: +; LA32: # %bb.0: # %entry +; LA32-NEXT: alsl.w $a0, $a0, $a1, 1 +; LA32-NEXT: andi $a0, $a0, 255 +; LA32-NEXT: ret +; +; LA64-LABEL: alsl_zext_i8: +; LA64: # %bb.0: # %entry +; LA64-NEXT: alsl.d $a0, $a0, $a1, 1 +; LA64-NEXT: andi $a0, $a0, 255 +; LA64-NEXT: ret +entry: + %mul = mul nsw i8 %a, 2 + %add = add nsw i8 %b, %mul + %zext = zext i8 %add to i32 + ret i32 %zext +} + +define i32 @alsl_zext_i16(i16 signext %a, i16 signext %b) nounwind { +; LA32-LABEL: alsl_zext_i16: +; LA32: # %bb.0: # %entry +; LA32-NEXT: alsl.w $a0, $a0, $a1, 2 +; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: alsl_zext_i16: +; LA64: # %bb.0: # %entry +; LA64-NEXT: alsl.d $a0, $a0, $a1, 2 +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-NEXT: ret +entry: + %mul = mul nsw i16 %a, 4 + %add = add nsw i16 %b, %mul + %zext = zext i16 %add to i32 + ret i32 %zext +} + +define i64 @alsl_zext_i32(i32 signext %a, i32 signext %b) nounwind { +; LA32-LABEL: alsl_zext_i32: +; LA32: # %bb.0: # %entry +; LA32-NEXT: alsl.w $a0, $a0, $a1, 3 +; LA32-NEXT: move $a1, $zero +; LA32-NEXT: ret +; +; LA64-LABEL: alsl_zext_i32: +; LA64: # %bb.0: # %entry +; LA64-NEXT: alsl.wu $a0, $a0, $a1, 3 +; LA64-NEXT: ret +entry: + %mul = mul nsw i32 %a, 8 + %add = add nsw i32 %b, %mul + %zext = zext i32 %add to i64 + ret i64 %zext +} + +define i8 @mul_add_i8(i8 signext %a, i8 signext %b) nounwind { +; LA32-LABEL: mul_add_i8: +; LA32: # %bb.0: # %entry +; LA32-NEXT: alsl.w $a0, $a0, $a0, 1 +; LA32-NEXT: add.w $a0, $a1, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: mul_add_i8: +; LA64: # %bb.0: # %entry +; LA64-NEXT: alsl.d $a0, $a0, $a0, 1 +; LA64-NEXT: add.d $a0, $a1, $a0 +; LA64-NEXT: ret +entry: + %mul = mul nsw i8 %a, 3 + %add = add nsw i8 %b, %mul + ret i8 %add +} + +define i16 @mul_add_i16(i16 signext %a, i16 signext %b) nounwind { +; LA32-LABEL: mul_add_i16: +; LA32: # %bb.0: # %entry +; LA32-NEXT: ori $a2, $zero, 10 +; LA32-NEXT: mul.w $a0, $a0, $a2 +; LA32-NEXT: add.w $a0, $a1, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: mul_add_i16: +; LA64: # %bb.0: # %entry +; LA64-NEXT: ori $a2, $zero, 10 +; LA64-NEXT: mul.d $a0, $a0, $a2 +; LA64-NEXT: add.d $a0, $a1, $a0 +; LA64-NEXT: ret +entry: + %mul = mul nsw i16 %a, 10 + %add = add nsw i16 %b, %mul + ret i16 %add +} + +define i32 @mul_add_i32(i32 signext %a, i32 signext %b) nounwind { +; LA32-LABEL: mul_add_i32: +; LA32: # %bb.0: # %entry +; LA32-NEXT: ori $a2, $zero, 12 +; LA32-NEXT: mul.w $a0, $a0, $a2 +; LA32-NEXT: add.w $a0, $a1, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: mul_add_i32: +; LA64: # %bb.0: # %entry +; LA64-NEXT: ori $a2, $zero, 12 +; LA64-NEXT: mul.d $a0, $a0, $a2 +; LA64-NEXT: add.d $a0, $a1, $a0 +; LA64-NEXT: ret +entry: + %mul = mul nsw i32 %a, 12 + %add = add nsw i32 %b, %mul + ret i32 %add +} + +define i64 @mul_add_i64(i64 signext %a, i64 signext %b) nounwind { +; LA32-LABEL: mul_add_i64: +; LA32: # %bb.0: # %entry +; LA32-NEXT: slli.w $a4, $a1, 4 +; LA32-NEXT: sub.w $a1, $a4, $a1 +; LA32-NEXT: ori $a4, $zero, 15 +; LA32-NEXT: mulh.wu $a4, $a0, $a4 +; LA32-NEXT: add.w $a1, $a4, $a1 +; LA32-NEXT: add.w $a1, $a3, $a1 +; LA32-NEXT: slli.w $a3, $a0, 4 +; LA32-NEXT: sub.w $a0, $a3, $a0 +; LA32-NEXT: add.w $a0, $a2, $a0 +; LA32-NEXT: sltu $a2, $a0, $a2 +; LA32-NEXT: add.w $a1, $a1, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: mul_add_i64: +; LA64: # %bb.0: # %entry +; LA64-NEXT: slli.d $a2, $a0, 4 +; LA64-NEXT: sub.d $a0, $a2, $a0 +; LA64-NEXT: add.d $a0, $a1, $a0 +; LA64-NEXT: ret +entry: + %mul = mul nsw i64 %a, 15 + %add = add nsw i64 %b, %mul + ret i64 %add +} + +define i32 @mul_add_zext_i8(i8 signext %a, i8 signext %b) nounwind { +; LA32-LABEL: mul_add_zext_i8: +; LA32: # %bb.0: # %entry +; LA32-NEXT: alsl.w $a0, $a0, $a0, 2 +; LA32-NEXT: add.w $a0, $a1, $a0 +; LA32-NEXT: andi $a0, $a0, 255 +; LA32-NEXT: ret +; +; LA64-LABEL: mul_add_zext_i8: +; LA64: # %bb.0: # %entry +; LA64-NEXT: alsl.d $a0, $a0, $a0, 2 +; LA64-NEXT: add.d $a0, $a1, $a0 +; LA64-NEXT: andi $a0, $a0, 255 +; LA64-NEXT: ret +entry: + %mul = mul nsw i8 %a, 5 + %add = add nsw i8 %b, %mul + %zext = zext i8 %add to i32 + ret i32 %zext +} + +define i32 @mul_add_zext_i16(i16 signext %a, i16 signext %b) nounwind { +; LA32-LABEL: mul_add_zext_i16: +; LA32: # %bb.0: # %entry +; LA32-NEXT: slli.w $a2, $a0, 4 +; LA32-NEXT: sub.w $a0, $a2, $a0 +; LA32-NEXT: add.w $a0, $a1, $a0 +; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: mul_add_zext_i16: +; LA64: # %bb.0: # %entry +; LA64-NEXT: slli.d $a2, $a0, 4 +; LA64-NEXT: sub.d $a0, $a2, $a0 +; LA64-NEXT: add.d $a0, $a1, $a0 +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-NEXT: ret +entry: + %mul = mul nsw i16 %a, 15 + %add = add nsw i16 %b, %mul + %zext = zext i16 %add to i32 + ret i32 %zext +} + +define i64 @mul_add_zext_i32(i32 signext %a, i32 signext %b) nounwind { +; LA32-LABEL: mul_add_zext_i32: +; LA32: # %bb.0: # %entry +; LA32-NEXT: alsl.w $a0, $a0, $a0, 2 +; LA32-NEXT: add.w $a0, $a1, $a0 +; LA32-NEXT: move $a1, $zero +; LA32-NEXT: ret +; +; LA64-LABEL: mul_add_zext_i32: +; LA64: # %bb.0: # %entry +; LA64-NEXT: alsl.d $a0, $a0, $a0, 2 +; LA64-NEXT: add.d $a0, $a1, $a0 +; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0 +; LA64-NEXT: ret +entry: + %mul = mul nsw i32 %a, 5 + %add = add nsw i32 %b, %mul + %zext = zext i32 %add to i64 + ret i64 %zext +} + +define i8 @alsl_neg_i8(i8 signext %a, i8 signext %b) nounwind { +; LA32-LABEL: alsl_neg_i8: +; LA32: # %bb.0: # %entry +; LA32-NEXT: alsl.w $a0, $a0, $a0, 1 +; LA32-NEXT: sub.w $a0, $a1, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: alsl_neg_i8: +; LA64: # %bb.0: # %entry +; LA64-NEXT: alsl.d $a0, $a0, $a0, 1 +; LA64-NEXT: sub.d $a0, $a1, $a0 +; LA64-NEXT: ret +entry: + %mul = mul nsw i8 %a, -3 + %add = add nsw i8 %b, %mul + ret i8 %add +} + +define i16 @alsl_neg_i16(i16 signext %a, i16 signext %b) nounwind { +; LA32-LABEL: alsl_neg_i16: +; LA32: # %bb.0: # %entry +; LA32-NEXT: alsl.w $a0, $a0, $a0, 2 +; LA32-NEXT: sub.w $a0, $a1, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: alsl_neg_i16: +; LA64: # %bb.0: # %entry +; LA64-NEXT: alsl.d $a0, $a0, $a0, 2 +; LA64-NEXT: sub.d $a0, $a1, $a0 +; LA64-NEXT: ret +entry: + %mul = mul nsw i16 %a, -5 + %add = add nsw i16 %b, %mul + ret i16 %add +} + +define i32 @alsl_neg_i32(i32 signext %a, i32 signext %b) nounwind { +; LA32-LABEL: alsl_neg_i32: +; LA32: # %bb.0: # %entry +; LA32-NEXT: alsl.w $a0, $a0, $a0, 3 +; LA32-NEXT: sub.w $a0, $a1, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: alsl_neg_i32: +; LA64: # %bb.0: # %entry +; LA64-NEXT: alsl.d $a0, $a0, $a0, 3 +; LA64-NEXT: sub.d $a0, $a1, $a0 +; LA64-NEXT: ret +entry: + %mul = mul nsw i32 %a, -9 + %add = add nsw i32 %b, %mul + ret i32 %add +} + +define i64 @mul_add_neg_i64(i64 signext %a, i64 signext %b) nounwind { +; LA32-LABEL: mul_add_neg_i64: +; LA32: # %bb.0: # %entry +; LA32-NEXT: slli.w $a4, $a1, 4 +; LA32-NEXT: sub.w $a1, $a1, $a4 +; LA32-NEXT: addi.w $a4, $zero, -15 +; LA32-NEXT: mulh.wu $a4, $a0, $a4 +; LA32-NEXT: sub.w $a4, $a4, $a0 +; LA32-NEXT: add.w $a1, $a4, $a1 +; LA32-NEXT: add.w $a1, $a3, $a1 +; LA32-NEXT: slli.w $a3, $a0, 4 +; LA32-NEXT: sub.w $a0, $a0, $a3 +; LA32-NEXT: add.w $a0, $a2, $a0 +; LA32-NEXT: sltu $a2, $a0, $a2 +; LA32-NEXT: add.w $a1, $a1, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: mul_add_neg_i64: +; LA64: # %bb.0: # %entry +; LA64-NEXT: slli.d $a2, $a0, 4 +; LA64-NEXT: sub.d $a0, $a0, $a2 +; LA64-NEXT: add.d $a0, $a1, $a0 +; LA64-NEXT: ret +entry: + %mul = mul nsw i64 %a, -15 + %add = add nsw i64 %b, %mul + ret i64 %add +} diff --git a/llvm/test/CodeGen/LoongArch/analyze-branch.ll b/llvm/test/CodeGen/LoongArch/analyze-branch.ll new file mode 100644 index 0000000000000000000000000000000000000000..fb89964af838e8c51b7db2d20c81922c5b098c9c --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/analyze-branch.ll @@ -0,0 +1,76 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s + +;; This test checks that LLVM can do basic stripping and reapplying of branches +;; to basic blocks. + +declare void @test_true() +declare void @test_false() + +;; !0 corresponds to a branch being taken, !1 to not being taken. +!0 = !{!"branch_weights", i32 64, i32 4} +!1 = !{!"branch_weights", i32 4, i32 64} + +define void @test_bcc_fallthrough_taken(i64 %in) nounwind { +; CHECK-LABEL: test_bcc_fallthrough_taken: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $sp, $sp, -16 +; CHECK-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; CHECK-NEXT: ori $a1, $zero, 42 +; CHECK-NEXT: bne $a0, $a1, .LBB0_3 +; CHECK-NEXT: # %bb.1: # %true +; CHECK-NEXT: bl %plt(test_true) +; CHECK-NEXT: .LBB0_2: # %true +; CHECK-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB0_3: # %false +; CHECK-NEXT: bl %plt(test_false) +; CHECK-NEXT: b .LBB0_2 + %tst = icmp eq i64 %in, 42 + br i1 %tst, label %true, label %false, !prof !0 + +;; Expected layout order is: Entry, TrueBlock, FalseBlock +;; Entry->TrueBlock is the common path, which should be taken whenever the +;; conditional branch is false. + +true: + call void @test_true() + ret void + +false: + call void @test_false() + ret void +} + +define void @test_bcc_fallthrough_nottaken(i64 %in) nounwind { +; CHECK-LABEL: test_bcc_fallthrough_nottaken: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $sp, $sp, -16 +; CHECK-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; CHECK-NEXT: ori $a1, $zero, 42 +; CHECK-NEXT: beq $a0, $a1, .LBB1_3 +; CHECK-NEXT: # %bb.1: # %false +; CHECK-NEXT: bl %plt(test_false) +; CHECK-NEXT: .LBB1_2: # %true +; CHECK-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB1_3: # %true +; CHECK-NEXT: bl %plt(test_true) +; CHECK-NEXT: b .LBB1_2 + %tst = icmp eq i64 %in, 42 + br i1 %tst, label %true, label %false, !prof !1 + +;; Expected layout order is: Entry, FalseBlock, TrueBlock +;; Entry->FalseBlock is the common path, which should be taken whenever the +;; conditional branch is false. + +true: + call void @test_true() + ret void + +false: + call void @test_false() + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/bitreverse.ll b/llvm/test/CodeGen/LoongArch/bitreverse.ll new file mode 100644 index 0000000000000000000000000000000000000000..8cc731eaa7a54f9134afb9f07c3ae0d9f631d0a4 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/bitreverse.ll @@ -0,0 +1,190 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefix=LA32 +; RUN: llc --mtriple=loongarch64 --verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefix=LA64 + +declare i7 @llvm.bitreverse.i7(i7) +declare i8 @llvm.bitreverse.i8(i8) +declare i16 @llvm.bitreverse.i16(i16) +declare i24 @llvm.bitreverse.i24(i24) +declare i32 @llvm.bitreverse.i32(i32) +declare i48 @llvm.bitreverse.i48(i48) +declare i64 @llvm.bitreverse.i64(i64) +declare i77 @llvm.bitreverse.i77(i77) +declare i128 @llvm.bitreverse.i128(i128) + +define i8 @test_bitreverse_i8(i8 %a) nounwind { +; LA32-LABEL: test_bitreverse_i8: +; LA32: # %bb.0: +; LA32-NEXT: bitrev.4b $a0, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: test_bitreverse_i8: +; LA64: # %bb.0: +; LA64-NEXT: bitrev.4b $a0, $a0 +; LA64-NEXT: ret + %tmp = call i8 @llvm.bitreverse.i8(i8 %a) + ret i8 %tmp +} + +define i16 @test_bitreverse_i16(i16 %a) nounwind { +; LA32-LABEL: test_bitreverse_i16: +; LA32: # %bb.0: +; LA32-NEXT: bitrev.w $a0, $a0 +; LA32-NEXT: srli.w $a0, $a0, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: test_bitreverse_i16: +; LA64: # %bb.0: +; LA64-NEXT: bitrev.d $a0, $a0 +; LA64-NEXT: srli.d $a0, $a0, 48 +; LA64-NEXT: ret + %tmp = call i16 @llvm.bitreverse.i16(i16 %a) + ret i16 %tmp +} + +define i32 @test_bitreverse_i32(i32 %a) nounwind { +; LA32-LABEL: test_bitreverse_i32: +; LA32: # %bb.0: +; LA32-NEXT: bitrev.w $a0, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: test_bitreverse_i32: +; LA64: # %bb.0: +; LA64-NEXT: bitrev.w $a0, $a0 +; LA64-NEXT: ret + %tmp = call i32 @llvm.bitreverse.i32(i32 %a) + ret i32 %tmp +} + +define i64 @test_bitreverse_i64(i64 %a) nounwind { +; LA32-LABEL: test_bitreverse_i64: +; LA32: # %bb.0: +; LA32-NEXT: bitrev.w $a2, $a1 +; LA32-NEXT: bitrev.w $a1, $a0 +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: test_bitreverse_i64: +; LA64: # %bb.0: +; LA64-NEXT: bitrev.d $a0, $a0 +; LA64-NEXT: ret + %tmp = call i64 @llvm.bitreverse.i64(i64 %a) + ret i64 %tmp +} + +;; Bitreverse on non-native integer widths. + +define i7 @test_bitreverse_i7(i7 %a) nounwind { +; LA32-LABEL: test_bitreverse_i7: +; LA32: # %bb.0: +; LA32-NEXT: bitrev.w $a0, $a0 +; LA32-NEXT: srli.w $a0, $a0, 25 +; LA32-NEXT: ret +; +; LA64-LABEL: test_bitreverse_i7: +; LA64: # %bb.0: +; LA64-NEXT: bitrev.d $a0, $a0 +; LA64-NEXT: srli.d $a0, $a0, 57 +; LA64-NEXT: ret + %tmp = call i7 @llvm.bitreverse.i7(i7 %a) + ret i7 %tmp +} + +define i24 @test_bitreverse_i24(i24 %a) nounwind { +; LA32-LABEL: test_bitreverse_i24: +; LA32: # %bb.0: +; LA32-NEXT: bitrev.w $a0, $a0 +; LA32-NEXT: srli.w $a0, $a0, 8 +; LA32-NEXT: ret +; +; LA64-LABEL: test_bitreverse_i24: +; LA64: # %bb.0: +; LA64-NEXT: bitrev.d $a0, $a0 +; LA64-NEXT: srli.d $a0, $a0, 40 +; LA64-NEXT: ret + %tmp = call i24 @llvm.bitreverse.i24(i24 %a) + ret i24 %tmp +} + +define i48 @test_bitreverse_i48(i48 %a) nounwind { +; LA32-LABEL: test_bitreverse_i48: +; LA32: # %bb.0: +; LA32-NEXT: bitrev.w $a1, $a1 +; LA32-NEXT: srli.w $a1, $a1, 16 +; LA32-NEXT: bitrev.w $a2, $a0 +; LA32-NEXT: slli.w $a0, $a2, 16 +; LA32-NEXT: or $a0, $a1, $a0 +; LA32-NEXT: srli.w $a1, $a2, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: test_bitreverse_i48: +; LA64: # %bb.0: +; LA64-NEXT: bitrev.d $a0, $a0 +; LA64-NEXT: srli.d $a0, $a0, 16 +; LA64-NEXT: ret + %tmp = call i48 @llvm.bitreverse.i48(i48 %a) + ret i48 %tmp +} + +define i77 @test_bitreverse_i77(i77 %a) nounwind { +; LA32-LABEL: test_bitreverse_i77: +; LA32: # %bb.0: +; LA32-NEXT: ld.w $a2, $a1, 0 +; LA32-NEXT: bitrev.w $a2, $a2 +; LA32-NEXT: ld.w $a3, $a1, 4 +; LA32-NEXT: bitrev.w $a3, $a3 +; LA32-NEXT: srli.w $a4, $a3, 19 +; LA32-NEXT: slli.w $a5, $a2, 13 +; LA32-NEXT: or $a4, $a5, $a4 +; LA32-NEXT: srli.w $a2, $a2, 19 +; LA32-NEXT: st.h $a2, $a0, 8 +; LA32-NEXT: st.w $a4, $a0, 4 +; LA32-NEXT: slli.w $a2, $a3, 13 +; LA32-NEXT: ld.w $a1, $a1, 8 +; LA32-NEXT: bitrev.w $a1, $a1 +; LA32-NEXT: srli.w $a1, $a1, 19 +; LA32-NEXT: or $a1, $a1, $a2 +; LA32-NEXT: st.w $a1, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: test_bitreverse_i77: +; LA64: # %bb.0: +; LA64-NEXT: bitrev.d $a1, $a1 +; LA64-NEXT: srli.d $a1, $a1, 51 +; LA64-NEXT: bitrev.d $a2, $a0 +; LA64-NEXT: slli.d $a0, $a2, 13 +; LA64-NEXT: or $a0, $a1, $a0 +; LA64-NEXT: srli.d $a1, $a2, 51 +; LA64-NEXT: ret + %tmp = call i77 @llvm.bitreverse.i77(i77 %a) + ret i77 %tmp +} + +define i128 @test_bitreverse_i128(i128 %a) nounwind { +; LA32-LABEL: test_bitreverse_i128: +; LA32: # %bb.0: +; LA32-NEXT: ld.w $a2, $a1, 0 +; LA32-NEXT: bitrev.w $a2, $a2 +; LA32-NEXT: st.w $a2, $a0, 12 +; LA32-NEXT: ld.w $a2, $a1, 4 +; LA32-NEXT: bitrev.w $a2, $a2 +; LA32-NEXT: st.w $a2, $a0, 8 +; LA32-NEXT: ld.w $a2, $a1, 8 +; LA32-NEXT: bitrev.w $a2, $a2 +; LA32-NEXT: st.w $a2, $a0, 4 +; LA32-NEXT: ld.w $a1, $a1, 12 +; LA32-NEXT: bitrev.w $a1, $a1 +; LA32-NEXT: st.w $a1, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: test_bitreverse_i128: +; LA64: # %bb.0: +; LA64-NEXT: bitrev.d $a2, $a1 +; LA64-NEXT: bitrev.d $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %tmp = call i128 @llvm.bitreverse.i128(i128 %a) + ret i128 %tmp +} diff --git a/llvm/test/CodeGen/LoongArch/block-address.ll b/llvm/test/CodeGen/LoongArch/block-address.ll new file mode 100644 index 0000000000000000000000000000000000000000..63d310dd9beabcee55ffc9c1f7f7e450e5808b2b --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/block-address.ll @@ -0,0 +1,39 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefix=LA32 +; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64 + +@addr = dso_local global ptr null + +define void @test_blockaddress() nounwind { +; LA32-LABEL: test_blockaddress: +; LA32: # %bb.0: +; LA32-NEXT: pcalau12i $a0, %pc_hi20(addr) +; LA32-NEXT: addi.w $a0, $a0, %pc_lo12(addr) +; LA32-NEXT: pcalau12i $a1, %pc_hi20(.Ltmp0) +; LA32-NEXT: addi.w $a1, $a1, %pc_lo12(.Ltmp0) +; LA32-NEXT: st.w $a1, $a0, 0 +; LA32-NEXT: ld.w $a0, $a0, 0 +; LA32-NEXT: jr $a0 +; LA32-NEXT: .Ltmp0: # Block address taken +; LA32-NEXT: .LBB0_1: # %block +; LA32-NEXT: ret +; +; LA64-LABEL: test_blockaddress: +; LA64: # %bb.0: +; LA64-NEXT: pcalau12i $a0, %pc_hi20(addr) +; LA64-NEXT: addi.d $a0, $a0, %pc_lo12(addr) +; LA64-NEXT: pcalau12i $a1, %pc_hi20(.Ltmp0) +; LA64-NEXT: addi.d $a1, $a1, %pc_lo12(.Ltmp0) +; LA64-NEXT: st.d $a1, $a0, 0 +; LA64-NEXT: ld.d $a0, $a0, 0 +; LA64-NEXT: jr $a0 +; LA64-NEXT: .Ltmp0: # Block address taken +; LA64-NEXT: .LBB0_1: # %block +; LA64-NEXT: ret + store volatile ptr blockaddress(@test_blockaddress, %block), ptr @addr + %val = load volatile ptr, ptr @addr + indirectbr ptr %val, [label %block] + +block: + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/blockaddress-symbol.ll b/llvm/test/CodeGen/LoongArch/blockaddress-symbol.ll new file mode 100644 index 0000000000000000000000000000000000000000..d07092230a4daa9b4b49fa38a1b0c87d5124feb5 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/blockaddress-symbol.ll @@ -0,0 +1,25 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s +; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s +; RUN: llc --mtriple=loongarch32 --no-integrated-as < %s | FileCheck %s +; RUN: llc --mtriple=loongarch64 --no-integrated-as < %s | FileCheck %s + +;; This regression test is for ensuring the AsmParser does not use the +;; getOrCreateSymbol interface to create blockaddress symbols. +;; Otherwise incorrect symbols will be created: +;; `.Ltmp0` -> `.Ltmp00`. + +define void @operand_block_address() nounwind { +; CHECK-LABEL: operand_block_address: +; CHECK: # %bb.0: +; CHECK-NEXT: #APP +; CHECK-NEXT: b .Ltmp0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: .Ltmp0: # Block address taken +; CHECK-NEXT: # %bb.1: # %bb +; CHECK-NEXT: ret + call void asm sideeffect "b $0", "i"(ptr blockaddress(@operand_block_address, %bb)) + br label %bb +bb: + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/bnez-beqz.ll b/llvm/test/CodeGen/LoongArch/bnez-beqz.ll new file mode 100644 index 0000000000000000000000000000000000000000..d1652c73c25ebb9cafd17c5a982e04e593f2f362 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/bnez-beqz.ll @@ -0,0 +1,119 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefix=LA32 +; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64 + +declare void @bar() + +define void @bnez_i32(i32 signext %0) nounwind { +; LA32-LABEL: bnez_i32: +; LA32: # %bb.0: # %start +; LA32-NEXT: beqz $a0, .LBB0_2 +; LA32-NEXT: # %bb.1: # %f +; LA32-NEXT: ret +; LA32-NEXT: .LBB0_2: # %t +; LA32-NEXT: b %plt(bar) +; +; LA64-LABEL: bnez_i32: +; LA64: # %bb.0: # %start +; LA64-NEXT: beqz $a0, .LBB0_2 +; LA64-NEXT: # %bb.1: # %f +; LA64-NEXT: ret +; LA64-NEXT: .LBB0_2: # %t +; LA64-NEXT: b %plt(bar) +start: + %1 = icmp eq i32 %0, 0 + br i1 %1, label %t, label %f + +t: + tail call void @bar() + br label %f + +f: + ret void +} + +define void @beqz_i32(i32 signext %0) nounwind { +; LA32-LABEL: beqz_i32: +; LA32: # %bb.0: # %start +; LA32-NEXT: beqz $a0, .LBB1_2 +; LA32-NEXT: # %bb.1: # %t +; LA32-NEXT: b %plt(bar) +; LA32-NEXT: .LBB1_2: # %f +; LA32-NEXT: ret +; +; LA64-LABEL: beqz_i32: +; LA64: # %bb.0: # %start +; LA64-NEXT: beqz $a0, .LBB1_2 +; LA64-NEXT: # %bb.1: # %t +; LA64-NEXT: b %plt(bar) +; LA64-NEXT: .LBB1_2: # %f +; LA64-NEXT: ret +start: + %1 = icmp ne i32 %0, 0 + br i1 %1, label %t, label %f + +t: + tail call void @bar() + br label %f + +f: + ret void +} + +define void @bnez_i64(i64 %0) nounwind { +; LA32-LABEL: bnez_i64: +; LA32: # %bb.0: # %start +; LA32-NEXT: or $a0, $a0, $a1 +; LA32-NEXT: beqz $a0, .LBB2_2 +; LA32-NEXT: # %bb.1: # %f +; LA32-NEXT: ret +; LA32-NEXT: .LBB2_2: # %t +; LA32-NEXT: b %plt(bar) +; +; LA64-LABEL: bnez_i64: +; LA64: # %bb.0: # %start +; LA64-NEXT: beqz $a0, .LBB2_2 +; LA64-NEXT: # %bb.1: # %f +; LA64-NEXT: ret +; LA64-NEXT: .LBB2_2: # %t +; LA64-NEXT: b %plt(bar) +start: + %1 = icmp eq i64 %0, 0 + br i1 %1, label %t, label %f + +t: + tail call void @bar() + br label %f + +f: + ret void +} + +define void @beqz_i64(i64 %0) nounwind { +; LA32-LABEL: beqz_i64: +; LA32: # %bb.0: # %start +; LA32-NEXT: or $a0, $a0, $a1 +; LA32-NEXT: beqz $a0, .LBB3_2 +; LA32-NEXT: # %bb.1: # %t +; LA32-NEXT: b %plt(bar) +; LA32-NEXT: .LBB3_2: # %f +; LA32-NEXT: ret +; +; LA64-LABEL: beqz_i64: +; LA64: # %bb.0: # %start +; LA64-NEXT: beqz $a0, .LBB3_2 +; LA64-NEXT: # %bb.1: # %t +; LA64-NEXT: b %plt(bar) +; LA64-NEXT: .LBB3_2: # %f +; LA64-NEXT: ret +start: + %1 = icmp ne i64 %0, 0 + br i1 %1, label %t, label %f + +t: + tail call void @bar() + br label %f + +f: + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/branch-relaxation-spill-32.ll b/llvm/test/CodeGen/LoongArch/branch-relaxation-spill-32.ll new file mode 100644 index 0000000000000000000000000000000000000000..aa4a602ccc64c666f8f538590c02f4f6da5f2422 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/branch-relaxation-spill-32.ll @@ -0,0 +1,313 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --filetype=obj --verify-machineinstrs < %s \ +; RUN: -o /dev/null 2>&1 +; RUN: llc --mtriple=loongarch32 --verify-machineinstrs < %s | FileCheck %s + +define void @relax_b28_spill() { +; CHECK-LABEL: relax_b28_spill: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $sp, $sp, -48 +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: st.w $ra, $sp, 44 # 4-byte Folded Spill +; CHECK-NEXT: st.w $fp, $sp, 40 # 4-byte Folded Spill +; CHECK-NEXT: st.w $s0, $sp, 36 # 4-byte Folded Spill +; CHECK-NEXT: st.w $s1, $sp, 32 # 4-byte Folded Spill +; CHECK-NEXT: st.w $s2, $sp, 28 # 4-byte Folded Spill +; CHECK-NEXT: st.w $s3, $sp, 24 # 4-byte Folded Spill +; CHECK-NEXT: st.w $s4, $sp, 20 # 4-byte Folded Spill +; CHECK-NEXT: st.w $s5, $sp, 16 # 4-byte Folded Spill +; CHECK-NEXT: st.w $s6, $sp, 12 # 4-byte Folded Spill +; CHECK-NEXT: st.w $s7, $sp, 8 # 4-byte Folded Spill +; CHECK-NEXT: st.w $s8, $sp, 4 # 4-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1, -4 +; CHECK-NEXT: .cfi_offset 22, -8 +; CHECK-NEXT: .cfi_offset 23, -12 +; CHECK-NEXT: .cfi_offset 24, -16 +; CHECK-NEXT: .cfi_offset 25, -20 +; CHECK-NEXT: .cfi_offset 26, -24 +; CHECK-NEXT: .cfi_offset 27, -28 +; CHECK-NEXT: .cfi_offset 28, -32 +; CHECK-NEXT: .cfi_offset 29, -36 +; CHECK-NEXT: .cfi_offset 30, -40 +; CHECK-NEXT: .cfi_offset 31, -44 +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.w $zero, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.w $ra, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.w $tp, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.w $a0, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.w $a1, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.w $a2, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.w $a3, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.w $a4, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.w $a5, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.w $a6, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.w $a7, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.w $t0, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.w $t1, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.w $t2, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.w $t3, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.w $t4, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.w $t5, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.w $t6, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.w $t7, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.w $t8, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.w $fp, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.w $s0, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.w $s1, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.w $s2, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.w $s3, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.w $s4, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.w $s5, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.w $s6, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.w $s7, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.w $s8, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: beq $s7, $s8, .LBB0_1 +; CHECK-NEXT: # %bb.4: +; CHECK-NEXT: st.w $t8, $sp, 0 +; CHECK-NEXT: pcalau12i $t8, %pc_hi20(.LBB0_5) +; CHECK-NEXT: addi.w $t8, $t8, %pc_lo12(.LBB0_5) +; CHECK-NEXT: jr $t8 +; CHECK-NEXT: .LBB0_1: # %iftrue +; CHECK-NEXT: #APP +; CHECK-NEXT: .space 536870912 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: b .LBB0_3 +; CHECK-NEXT: .LBB0_5: # %iftrue +; CHECK-NEXT: ld.w $t8, $sp, 0 +; CHECK-NEXT: # %bb.2: # %iffalse +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $zero +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $ra +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $tp +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $a0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $a1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $a2 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $a3 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $a4 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $a5 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $a6 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $a7 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $t0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $t1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $t2 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $t3 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $t4 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $t5 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $t6 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $t7 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $t8 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $fp +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $s0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $s1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $s2 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $s3 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $s4 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $s5 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $s6 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $s7 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $s8 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: .LBB0_3: # %iftrue +; CHECK-NEXT: ld.w $s8, $sp, 4 # 4-byte Folded Reload +; CHECK-NEXT: ld.w $s7, $sp, 8 # 4-byte Folded Reload +; CHECK-NEXT: ld.w $s6, $sp, 12 # 4-byte Folded Reload +; CHECK-NEXT: ld.w $s5, $sp, 16 # 4-byte Folded Reload +; CHECK-NEXT: ld.w $s4, $sp, 20 # 4-byte Folded Reload +; CHECK-NEXT: ld.w $s3, $sp, 24 # 4-byte Folded Reload +; CHECK-NEXT: ld.w $s2, $sp, 28 # 4-byte Folded Reload +; CHECK-NEXT: ld.w $s1, $sp, 32 # 4-byte Folded Reload +; CHECK-NEXT: ld.w $s0, $sp, 36 # 4-byte Folded Reload +; CHECK-NEXT: ld.w $fp, $sp, 40 # 4-byte Folded Reload +; CHECK-NEXT: ld.w $ra, $sp, 44 # 4-byte Folded Reload +; CHECK-NEXT: addi.w $sp, $sp, 48 +; CHECK-NEXT: ret + %zero = call i32 asm sideeffect "addi.w $$zero, $$zero, 1", "={r0}"() + %ra = call i32 asm sideeffect "addi.w $$ra, $$zero, 1", "={r1}"() + %tp = call i32 asm sideeffect "addi.w $$tp, $$zero, 1", "={r2}"() + %a0 = call i32 asm sideeffect "addi.w $$a0, $$zero, 1", "={r4}"() + %a1 = call i32 asm sideeffect "addi.w $$a1, $$zero, 1", "={r5}"() + %a2 = call i32 asm sideeffect "addi.w $$a2, $$zero, 1", "={r6}"() + %a3 = call i32 asm sideeffect "addi.w $$a3, $$zero, 1", "={r7}"() + %a4 = call i32 asm sideeffect "addi.w $$a4, $$zero, 1", "={r8}"() + %a5 = call i32 asm sideeffect "addi.w $$a5, $$zero, 1", "={r9}"() + %a6 = call i32 asm sideeffect "addi.w $$a6, $$zero, 1", "={r10}"() + %a7 = call i32 asm sideeffect "addi.w $$a7, $$zero, 1", "={r11}"() + %t0 = call i32 asm sideeffect "addi.w $$t0, $$zero, 1", "={r12}"() + %t1 = call i32 asm sideeffect "addi.w $$t1, $$zero, 1", "={r13}"() + %t2 = call i32 asm sideeffect "addi.w $$t2, $$zero, 1", "={r14}"() + %t3 = call i32 asm sideeffect "addi.w $$t3, $$zero, 1", "={r15}"() + %t4 = call i32 asm sideeffect "addi.w $$t4, $$zero, 1", "={r16}"() + %t5 = call i32 asm sideeffect "addi.w $$t5, $$zero, 1", "={r17}"() + %t6 = call i32 asm sideeffect "addi.w $$t6, $$zero, 1", "={r18}"() + %t7 = call i32 asm sideeffect "addi.w $$t7, $$zero, 1", "={r19}"() + %t8 = call i32 asm sideeffect "addi.w $$t8, $$zero, 1", "={r20}"() + ;; r21 Reserved (Non-allocatable) + %s9 = call i32 asm sideeffect "addi.w $$s9, $$zero, 1", "={r22}"() + %s0 = call i32 asm sideeffect "addi.w $$s0, $$zero, 1", "={r23}"() + %s1 = call i32 asm sideeffect "addi.w $$s1, $$zero, 1", "={r24}"() + %s2 = call i32 asm sideeffect "addi.w $$s2, $$zero, 1", "={r25}"() + %s3 = call i32 asm sideeffect "addi.w $$s3, $$zero, 1", "={r26}"() + %s4 = call i32 asm sideeffect "addi.w $$s4, $$zero, 1", "={r27}"() + %s5 = call i32 asm sideeffect "addi.w $$s5, $$zero, 1", "={r28}"() + %s6 = call i32 asm sideeffect "addi.w $$s6, $$zero, 1", "={r29}"() + %s7 = call i32 asm sideeffect "addi.w $$s7, $$zero, 1", "={r30}"() + %s8 = call i32 asm sideeffect "addi.w $$s8, $$zero, 1", "={r31}"() + + %cmp = icmp eq i32 %s7, %s8 + br i1 %cmp, label %iftrue, label %iffalse + +iftrue: + call void asm sideeffect ".space 536870912", ""() + ret void + +iffalse: + call void asm sideeffect "# reg use $0", "{r0}"(i32 %zero) + call void asm sideeffect "# reg use $0", "{r1}"(i32 %ra) + call void asm sideeffect "# reg use $0", "{r2}"(i32 %tp) + call void asm sideeffect "# reg use $0", "{r4}"(i32 %a0) + call void asm sideeffect "# reg use $0", "{r5}"(i32 %a1) + call void asm sideeffect "# reg use $0", "{r6}"(i32 %a2) + call void asm sideeffect "# reg use $0", "{r7}"(i32 %a3) + call void asm sideeffect "# reg use $0", "{r8}"(i32 %a4) + call void asm sideeffect "# reg use $0", "{r9}"(i32 %a5) + call void asm sideeffect "# reg use $0", "{r10}"(i32 %a6) + call void asm sideeffect "# reg use $0", "{r11}"(i32 %a7) + call void asm sideeffect "# reg use $0", "{r12}"(i32 %t0) + call void asm sideeffect "# reg use $0", "{r13}"(i32 %t1) + call void asm sideeffect "# reg use $0", "{r14}"(i32 %t2) + call void asm sideeffect "# reg use $0", "{r15}"(i32 %t3) + call void asm sideeffect "# reg use $0", "{r16}"(i32 %t4) + call void asm sideeffect "# reg use $0", "{r17}"(i32 %t5) + call void asm sideeffect "# reg use $0", "{r18}"(i32 %t6) + call void asm sideeffect "# reg use $0", "{r19}"(i32 %t7) + call void asm sideeffect "# reg use $0", "{r20}"(i32 %t8) + ;; r21 Reserved (Non-allocatable) + call void asm sideeffect "# reg use $0", "{r22}"(i32 %s9) + call void asm sideeffect "# reg use $0", "{r23}"(i32 %s0) + call void asm sideeffect "# reg use $0", "{r24}"(i32 %s1) + call void asm sideeffect "# reg use $0", "{r25}"(i32 %s2) + call void asm sideeffect "# reg use $0", "{r26}"(i32 %s3) + call void asm sideeffect "# reg use $0", "{r27}"(i32 %s4) + call void asm sideeffect "# reg use $0", "{r28}"(i32 %s5) + call void asm sideeffect "# reg use $0", "{r29}"(i32 %s6) + call void asm sideeffect "# reg use $0", "{r30}"(i32 %s7) + call void asm sideeffect "# reg use $0", "{r31}"(i32 %s8) + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/branch-relaxation-spill-64.ll b/llvm/test/CodeGen/LoongArch/branch-relaxation-spill-64.ll new file mode 100644 index 0000000000000000000000000000000000000000..93320e165583fd998df971684702c1b24c283c12 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/branch-relaxation-spill-64.ll @@ -0,0 +1,313 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --filetype=obj --verify-machineinstrs < %s \ +; RUN: -o /dev/null 2>&1 +; RUN: llc --mtriple=loongarch64 --verify-machineinstrs < %s | FileCheck %s + +define void @relax_b28_spill() { +; CHECK-LABEL: relax_b28_spill: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $sp, $sp, -96 +; CHECK-NEXT: .cfi_def_cfa_offset 96 +; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill +; CHECK-NEXT: st.d $s0, $sp, 72 # 8-byte Folded Spill +; CHECK-NEXT: st.d $s1, $sp, 64 # 8-byte Folded Spill +; CHECK-NEXT: st.d $s2, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $s3, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: st.d $s4, $sp, 40 # 8-byte Folded Spill +; CHECK-NEXT: st.d $s5, $sp, 32 # 8-byte Folded Spill +; CHECK-NEXT: st.d $s6, $sp, 24 # 8-byte Folded Spill +; CHECK-NEXT: st.d $s7, $sp, 16 # 8-byte Folded Spill +; CHECK-NEXT: st.d $s8, $sp, 8 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1, -8 +; CHECK-NEXT: .cfi_offset 22, -16 +; CHECK-NEXT: .cfi_offset 23, -24 +; CHECK-NEXT: .cfi_offset 24, -32 +; CHECK-NEXT: .cfi_offset 25, -40 +; CHECK-NEXT: .cfi_offset 26, -48 +; CHECK-NEXT: .cfi_offset 27, -56 +; CHECK-NEXT: .cfi_offset 28, -64 +; CHECK-NEXT: .cfi_offset 29, -72 +; CHECK-NEXT: .cfi_offset 30, -80 +; CHECK-NEXT: .cfi_offset 31, -88 +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.d $zero, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.d $ra, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.d $tp, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.d $a0, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.d $a1, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.d $a2, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.d $a3, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.d $a4, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.d $a5, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.d $a6, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.d $a7, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.d $t0, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.d $t1, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.d $t2, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.d $t3, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.d $t4, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.d $t5, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.d $t6, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.d $t7, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.d $t8, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.d $fp, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.d $s0, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.d $s1, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.d $s2, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.d $s3, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.d $s4, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.d $s5, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.d $s6, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.d $s7, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.d $s8, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: beq $s7, $s8, .LBB0_1 +; CHECK-NEXT: # %bb.4: +; CHECK-NEXT: st.d $t8, $sp, 0 +; CHECK-NEXT: pcalau12i $t8, %pc_hi20(.LBB0_5) +; CHECK-NEXT: addi.d $t8, $t8, %pc_lo12(.LBB0_5) +; CHECK-NEXT: jr $t8 +; CHECK-NEXT: .LBB0_1: # %iftrue +; CHECK-NEXT: #APP +; CHECK-NEXT: .space 536870912 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: b .LBB0_3 +; CHECK-NEXT: .LBB0_5: # %iftrue +; CHECK-NEXT: ld.d $t8, $sp, 0 +; CHECK-NEXT: # %bb.2: # %iffalse +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $zero +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $ra +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $tp +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $a0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $a1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $a2 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $a3 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $a4 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $a5 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $a6 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $a7 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $t0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $t1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $t2 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $t3 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $t4 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $t5 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $t6 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $t7 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $t8 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $fp +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $s0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $s1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $s2 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $s3 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $s4 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $s5 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $s6 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $s7 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $s8 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: .LBB0_3: # %iftrue +; CHECK-NEXT: ld.d $s8, $sp, 8 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $s7, $sp, 16 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $s6, $sp, 24 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $s5, $sp, 32 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $s4, $sp, 40 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $s3, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $s2, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $s1, $sp, 64 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $s0, $sp, 72 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 96 +; CHECK-NEXT: ret + %zero = call i64 asm sideeffect "addi.d $$zero, $$zero, 1", "={r0}"() + %ra = call i64 asm sideeffect "addi.d $$ra, $$zero, 1", "={r1}"() + %tp = call i64 asm sideeffect "addi.d $$tp, $$zero, 1", "={r2}"() + %a0 = call i64 asm sideeffect "addi.d $$a0, $$zero, 1", "={r4}"() + %a1 = call i64 asm sideeffect "addi.d $$a1, $$zero, 1", "={r5}"() + %a2 = call i64 asm sideeffect "addi.d $$a2, $$zero, 1", "={r6}"() + %a3 = call i64 asm sideeffect "addi.d $$a3, $$zero, 1", "={r7}"() + %a4 = call i64 asm sideeffect "addi.d $$a4, $$zero, 1", "={r8}"() + %a5 = call i64 asm sideeffect "addi.d $$a5, $$zero, 1", "={r9}"() + %a6 = call i64 asm sideeffect "addi.d $$a6, $$zero, 1", "={r10}"() + %a7 = call i64 asm sideeffect "addi.d $$a7, $$zero, 1", "={r11}"() + %t0 = call i64 asm sideeffect "addi.d $$t0, $$zero, 1", "={r12}"() + %t1 = call i64 asm sideeffect "addi.d $$t1, $$zero, 1", "={r13}"() + %t2 = call i64 asm sideeffect "addi.d $$t2, $$zero, 1", "={r14}"() + %t3 = call i64 asm sideeffect "addi.d $$t3, $$zero, 1", "={r15}"() + %t4 = call i64 asm sideeffect "addi.d $$t4, $$zero, 1", "={r16}"() + %t5 = call i64 asm sideeffect "addi.d $$t5, $$zero, 1", "={r17}"() + %t6 = call i64 asm sideeffect "addi.d $$t6, $$zero, 1", "={r18}"() + %t7 = call i64 asm sideeffect "addi.d $$t7, $$zero, 1", "={r19}"() + %t8 = call i64 asm sideeffect "addi.d $$t8, $$zero, 1", "={r20}"() + ;; r21 Reserved (Non-allocatable) + %s9 = call i64 asm sideeffect "addi.d $$s9, $$zero, 1", "={r22}"() + %s0 = call i64 asm sideeffect "addi.d $$s0, $$zero, 1", "={r23}"() + %s1 = call i64 asm sideeffect "addi.d $$s1, $$zero, 1", "={r24}"() + %s2 = call i64 asm sideeffect "addi.d $$s2, $$zero, 1", "={r25}"() + %s3 = call i64 asm sideeffect "addi.d $$s3, $$zero, 1", "={r26}"() + %s4 = call i64 asm sideeffect "addi.d $$s4, $$zero, 1", "={r27}"() + %s5 = call i64 asm sideeffect "addi.d $$s5, $$zero, 1", "={r28}"() + %s6 = call i64 asm sideeffect "addi.d $$s6, $$zero, 1", "={r29}"() + %s7 = call i64 asm sideeffect "addi.d $$s7, $$zero, 1", "={r30}"() + %s8 = call i64 asm sideeffect "addi.d $$s8, $$zero, 1", "={r31}"() + + %cmp = icmp eq i64 %s7, %s8 + br i1 %cmp, label %iftrue, label %iffalse + +iftrue: + call void asm sideeffect ".space 536870912", ""() + ret void + +iffalse: + call void asm sideeffect "# reg use $0", "{r0}"(i64 %zero) + call void asm sideeffect "# reg use $0", "{r1}"(i64 %ra) + call void asm sideeffect "# reg use $0", "{r2}"(i64 %tp) + call void asm sideeffect "# reg use $0", "{r4}"(i64 %a0) + call void asm sideeffect "# reg use $0", "{r5}"(i64 %a1) + call void asm sideeffect "# reg use $0", "{r6}"(i64 %a2) + call void asm sideeffect "# reg use $0", "{r7}"(i64 %a3) + call void asm sideeffect "# reg use $0", "{r8}"(i64 %a4) + call void asm sideeffect "# reg use $0", "{r9}"(i64 %a5) + call void asm sideeffect "# reg use $0", "{r10}"(i64 %a6) + call void asm sideeffect "# reg use $0", "{r11}"(i64 %a7) + call void asm sideeffect "# reg use $0", "{r12}"(i64 %t0) + call void asm sideeffect "# reg use $0", "{r13}"(i64 %t1) + call void asm sideeffect "# reg use $0", "{r14}"(i64 %t2) + call void asm sideeffect "# reg use $0", "{r15}"(i64 %t3) + call void asm sideeffect "# reg use $0", "{r16}"(i64 %t4) + call void asm sideeffect "# reg use $0", "{r17}"(i64 %t5) + call void asm sideeffect "# reg use $0", "{r18}"(i64 %t6) + call void asm sideeffect "# reg use $0", "{r19}"(i64 %t7) + call void asm sideeffect "# reg use $0", "{r20}"(i64 %t8) + ;; r21 Reserved (Non-allocatable) + call void asm sideeffect "# reg use $0", "{r22}"(i64 %s9) + call void asm sideeffect "# reg use $0", "{r23}"(i64 %s0) + call void asm sideeffect "# reg use $0", "{r24}"(i64 %s1) + call void asm sideeffect "# reg use $0", "{r25}"(i64 %s2) + call void asm sideeffect "# reg use $0", "{r26}"(i64 %s3) + call void asm sideeffect "# reg use $0", "{r27}"(i64 %s4) + call void asm sideeffect "# reg use $0", "{r28}"(i64 %s5) + call void asm sideeffect "# reg use $0", "{r29}"(i64 %s6) + call void asm sideeffect "# reg use $0", "{r30}"(i64 %s7) + call void asm sideeffect "# reg use $0", "{r31}"(i64 %s8) + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/branch-relaxation.ll b/llvm/test/CodeGen/LoongArch/branch-relaxation.ll new file mode 100644 index 0000000000000000000000000000000000000000..7d064ddcf3105d87d436f9295eaa0156b4353a65 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/branch-relaxation.ll @@ -0,0 +1,140 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --filetype=obj --verify-machineinstrs < %s \ +; RUN: -o /dev/null 2>&1 +; RUN: llc --mtriple=loongarch64 --filetype=obj --verify-machineinstrs < %s \ +; RUN: -o /dev/null 2>&1 +; RUN: llc --mtriple=loongarch32 --verify-machineinstrs < %s | FileCheck %s --check-prefix=LA32 +; RUN: llc --mtriple=loongarch64 --verify-machineinstrs < %s | FileCheck %s --check-prefix=LA64 + +define i32 @relax_b18(i32 signext %a, i32 signext %b) { +; LA32-LABEL: relax_b18: +; LA32: # %bb.0: +; LA32-NEXT: beq $a0, $a1, .LBB0_1 +; LA32-NEXT: b .LBB0_2 +; LA32-NEXT: .LBB0_1: # %iftrue +; LA32-NEXT: #APP +; LA32-NEXT: .space 1048576 +; LA32-NEXT: #NO_APP +; LA32-NEXT: ori $a0, $zero, 1 +; LA32-NEXT: ret +; LA32-NEXT: .LBB0_2: # %iffalse +; LA32-NEXT: move $a0, $zero +; LA32-NEXT: ret +; +; LA64-LABEL: relax_b18: +; LA64: # %bb.0: +; LA64-NEXT: beq $a0, $a1, .LBB0_1 +; LA64-NEXT: b .LBB0_2 +; LA64-NEXT: .LBB0_1: # %iftrue +; LA64-NEXT: #APP +; LA64-NEXT: .space 1048576 +; LA64-NEXT: #NO_APP +; LA64-NEXT: ori $a0, $zero, 1 +; LA64-NEXT: ret +; LA64-NEXT: .LBB0_2: # %iffalse +; LA64-NEXT: move $a0, $zero +; LA64-NEXT: ret + %cond = icmp eq i32 %a, %b + br i1 %cond, label %iftrue, label %iffalse + +iftrue: + call void asm sideeffect ".space 1048576", ""() + ret i32 1 + +iffalse: + ret i32 0 +} + +define i32 @relax_b23(i1 %a) { +; LA32-LABEL: relax_b23: +; LA32: # %bb.0: +; LA32-NEXT: andi $a0, $a0, 1 +; LA32-NEXT: bnez $a0, .LBB1_1 +; LA32-NEXT: b .LBB1_2 +; LA32-NEXT: .LBB1_1: # %iftrue +; LA32-NEXT: #APP +; LA32-NEXT: .space 16777216 +; LA32-NEXT: #NO_APP +; LA32-NEXT: ori $a0, $zero, 1 +; LA32-NEXT: ret +; LA32-NEXT: .LBB1_2: # %iffalse +; LA32-NEXT: move $a0, $zero +; LA32-NEXT: ret +; +; LA64-LABEL: relax_b23: +; LA64: # %bb.0: +; LA64-NEXT: andi $a0, $a0, 1 +; LA64-NEXT: bnez $a0, .LBB1_1 +; LA64-NEXT: b .LBB1_2 +; LA64-NEXT: .LBB1_1: # %iftrue +; LA64-NEXT: #APP +; LA64-NEXT: .space 16777216 +; LA64-NEXT: #NO_APP +; LA64-NEXT: ori $a0, $zero, 1 +; LA64-NEXT: ret +; LA64-NEXT: .LBB1_2: # %iffalse +; LA64-NEXT: move $a0, $zero +; LA64-NEXT: ret + br i1 %a, label %iftrue, label %iffalse + +iftrue: + call void asm sideeffect ".space 16777216", ""() + ret i32 1 + +iffalse: + ret i32 0 +} + +define i32 @relax_b28(i1 %a) { +; LA32-LABEL: relax_b28: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: .cfi_def_cfa_offset 16 +; LA32-NEXT: andi $a0, $a0, 1 +; LA32-NEXT: bnez $a0, .LBB2_1 +; LA32-NEXT: # %bb.3: +; LA32-NEXT: pcalau12i $a0, %pc_hi20(.LBB2_2) +; LA32-NEXT: addi.w $a0, $a0, %pc_lo12(.LBB2_2) +; LA32-NEXT: jr $a0 +; LA32-NEXT: .LBB2_1: # %iftrue +; LA32-NEXT: #APP +; LA32-NEXT: .space 536870912 +; LA32-NEXT: #NO_APP +; LA32-NEXT: ori $a0, $zero, 1 +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; LA32-NEXT: .LBB2_2: # %iffalse +; LA32-NEXT: move $a0, $zero +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: relax_b28: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: .cfi_def_cfa_offset 16 +; LA64-NEXT: andi $a0, $a0, 1 +; LA64-NEXT: bnez $a0, .LBB2_1 +; LA64-NEXT: # %bb.3: +; LA64-NEXT: pcalau12i $a0, %pc_hi20(.LBB2_2) +; LA64-NEXT: addi.d $a0, $a0, %pc_lo12(.LBB2_2) +; LA64-NEXT: jr $a0 +; LA64-NEXT: .LBB2_1: # %iftrue +; LA64-NEXT: #APP +; LA64-NEXT: .space 536870912 +; LA64-NEXT: #NO_APP +; LA64-NEXT: ori $a0, $zero, 1 +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +; LA64-NEXT: .LBB2_2: # %iffalse +; LA64-NEXT: move $a0, $zero +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret + br i1 %a, label %iftrue, label %iffalse + +iftrue: + call void asm sideeffect ".space 536870912", ""() + ret i32 1 + +iffalse: + ret i32 0 +} diff --git a/llvm/test/CodeGen/LoongArch/bstrins_d.ll b/llvm/test/CodeGen/LoongArch/bstrins_d.ll index 342e044c7a7be05176a7889133cfacaf5cc60b77..fe1f6270f966d3197f55b172b6fee9729f8d2b62 100644 --- a/llvm/test/CodeGen/LoongArch/bstrins_d.ll +++ b/llvm/test/CodeGen/LoongArch/bstrins_d.ll @@ -13,7 +13,7 @@ define i64 @pat1(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: pat1: ; CHECK: # %bb.0: ; CHECK-NEXT: bstrins.d $a0, $a1, 39, 16 -; CHECK-NEXT: jirl $zero, $ra, 0 +; CHECK-NEXT: ret %and1 = and i64 %a, -1099511562241 ; 0xffffff000000ffff %shl = shl i64 %b, 16 %and2 = and i64 %shl, 1099511562240 ; 0x000000ffffff0000 @@ -25,7 +25,7 @@ define i64 @pat1_swap(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: pat1_swap: ; CHECK: # %bb.0: ; CHECK-NEXT: bstrins.d $a0, $a1, 39, 16 -; CHECK-NEXT: jirl $zero, $ra, 0 +; CHECK-NEXT: ret %and1 = and i64 %a, -1099511562241 ; 0xffffff000000ffff %shl = shl i64 %b, 16 %and2 = and i64 %shl, 1099511562240 ; 0x000000ffffff0000 @@ -41,7 +41,7 @@ define i64 @pat2(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: pat2: ; CHECK: # %bb.0: ; CHECK-NEXT: bstrins.d $a0, $a1, 39, 16 -; CHECK-NEXT: jirl $zero, $ra, 0 +; CHECK-NEXT: ret %and1 = and i64 %a, -1099511562241 ; 0xffffff000000ffff %and2 = and i64 %b, 16777215 ; 0x0000000000ffffff %shl = shl i64 %and2, 16 @@ -53,7 +53,7 @@ define i64 @pat2_swap(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: pat2_swap: ; CHECK: # %bb.0: ; CHECK-NEXT: bstrins.d $a0, $a1, 39, 16 -; CHECK-NEXT: jirl $zero, $ra, 0 +; CHECK-NEXT: ret %and1 = and i64 %a, -1099511562241 ; 0xffffff000000ffff %and2 = and i64 %b, 16777215 ; 0x0000000000ffffff %shl = shl i64 %and2, 16 @@ -71,7 +71,7 @@ define i64 @pat3(i64 %a, i64 %b) nounwind { ; CHECK-NEXT: andi $a1, $a1, 288 ; CHECK-NEXT: srli.d $a1, $a1, 4 ; CHECK-NEXT: bstrins.d $a0, $a1, 11, 4 -; CHECK-NEXT: jirl $zero, $ra, 0 +; CHECK-NEXT: ret %and1 = and i64 %a, -4081 ; 0xfffffffffffff00f %and2 = and i64 %b, 288 ; 0x0000000000000120 %or = or i64 %and1, %and2 @@ -84,7 +84,7 @@ define i64 @pat3_swap(i64 %a, i64 %b) nounwind { ; CHECK-NEXT: andi $a1, $a1, 288 ; CHECK-NEXT: srli.d $a1, $a1, 4 ; CHECK-NEXT: bstrins.d $a0, $a1, 11, 4 -; CHECK-NEXT: jirl $zero, $ra, 0 +; CHECK-NEXT: ret %and1 = and i64 %a, -4081 ; 0xfffffffffffff00f %and2 = and i64 %b, 288 ; 0x0000000000000120 %or = or i64 %and2, %and1 @@ -99,7 +99,7 @@ define i64 @pat4(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: pat4: ; CHECK: # %bb.0: ; CHECK-NEXT: bstrins.d $a0, $a1, 63, 8 -; CHECK-NEXT: jirl $zero, $ra, 0 +; CHECK-NEXT: ret %and = and i64 %a, 255 %shl = shl i64 %b, 8 %or = or i64 %and, %shl @@ -110,7 +110,7 @@ define i64 @pat4_swap(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: pat4_swap: ; CHECK: # %bb.0: ; CHECK-NEXT: bstrins.d $a0, $a1, 63, 8 -; CHECK-NEXT: jirl $zero, $ra, 0 +; CHECK-NEXT: ret %and = and i64 %a, 255 %shl = shl i64 %b, 8 %or = or i64 %shl, %and @@ -127,7 +127,7 @@ define i64 @pat5(i64 %a) nounwind { ; CHECK-NEXT: lu12i.w $a1, 74565 ; CHECK-NEXT: ori $a1, $a1, 1656 ; CHECK-NEXT: bstrins.d $a0, $a1, 47, 16 -; CHECK-NEXT: jirl $zero, $ra, 0 +; CHECK-NEXT: ret %and = and i64 %a, 18446462598732906495 ; 0xffff00000000ffff %or = or i64 %and, 20015998304256 ; 0x0000123456780000 ret i64 %or @@ -146,7 +146,7 @@ define i64 @pat6(i64 %c) nounwind { ; CHECK-NEXT: lu52i.d $a1, $a1, 291 ; CHECK-NEXT: bstrins.d $a1, $a0, 39, 16 ; CHECK-NEXT: move $a0, $a1 -; CHECK-NEXT: jirl $zero, $ra, 0 +; CHECK-NEXT: ret %and = and i64 %c, 16777215 ; 0x0000000000ffffff %shl = shl i64 %and, 16 %or = or i64 %shl, 1311767949471676570 ; 0x123456000000789a @@ -164,7 +164,7 @@ define i64 @pat7(i64 %c) nounwind { ; CHECK-NEXT: lu52i.d $a1, $a1, 291 ; CHECK-NEXT: bstrins.d $a1, $a0, 39, 16 ; CHECK-NEXT: move $a0, $a1 -; CHECK-NEXT: jirl $zero, $ra, 0 +; CHECK-NEXT: ret %shl = shl i64 %c, 16 %and = and i64 %shl, 1099511562240 ; 0x000000ffffff0000 %or = or i64 %and, 1311767949471676570 ; 0x123456000000789a @@ -182,7 +182,7 @@ define i64 @pat8(i64 %c) nounwind { ; CHECK-NEXT: lu32i.d $a0, 284160 ; CHECK-NEXT: lu52i.d $a0, $a0, 291 ; CHECK-NEXT: bstrins.d $a0, $a1, 39, 16 -; CHECK-NEXT: jirl $zero, $ra, 0 +; CHECK-NEXT: ret %and = and i64 %c, 1099511562240 ; 0x000000ffffff0000 %or = or i64 %and, 1311767949471676570 ; 0x123456000000789a ret i64 %or @@ -200,7 +200,7 @@ define i64 @no_bstrins_d(i64 %a) nounwind { ; CHECK-NEXT: ori $a1, $a1, 4095 ; CHECK-NEXT: lu32i.d $a1, -60876 ; CHECK-NEXT: and $a0, $a0, $a1 -; CHECK-NEXT: jirl $zero, $ra, 0 +; CHECK-NEXT: ret %and = and i64 %a, 18446462598732906495 ; 0xffff00000000ffff %or = or i64 %and, 20015998341120 ; 0x0000123456789000 ret i64 %or diff --git a/llvm/test/CodeGen/LoongArch/bstrins_w.ll b/llvm/test/CodeGen/LoongArch/bstrins_w.ll index 47c4d826c2ee5e2cb34f53f1b297eab24e74373c..dfbe000841cdcbdf5481ddce19a2d6a6987dfcb6 100644 --- a/llvm/test/CodeGen/LoongArch/bstrins_w.ll +++ b/llvm/test/CodeGen/LoongArch/bstrins_w.ll @@ -13,7 +13,7 @@ define i32 @pat1(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: pat1: ; CHECK: # %bb.0: ; CHECK-NEXT: bstrins.w $a0, $a1, 19, 8 -; CHECK-NEXT: jirl $zero, $ra, 0 +; CHECK-NEXT: ret %and1 = and i32 %a, -1048321 ; 0xfff000ff %shl = shl i32 %b, 8 %and2 = and i32 %shl, 1048320 ; 0x000fff00 @@ -25,7 +25,7 @@ define i32 @pat1_swap(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: pat1_swap: ; CHECK: # %bb.0: ; CHECK-NEXT: bstrins.w $a0, $a1, 19, 8 -; CHECK-NEXT: jirl $zero, $ra, 0 +; CHECK-NEXT: ret %and1 = and i32 %a, -1048321 ; 0xfff000ff %shl = shl i32 %b, 8 %and2 = and i32 %shl, 1048320 ; 0x000fff00 @@ -41,7 +41,7 @@ define i32 @pat2(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: pat2: ; CHECK: # %bb.0: ; CHECK-NEXT: bstrins.w $a0, $a1, 19, 8 -; CHECK-NEXT: jirl $zero, $ra, 0 +; CHECK-NEXT: ret %and1 = and i32 %a, -1048321 ; 0xfff000ff %and2 = and i32 %b, 4095 ; 0x00000fff %shl = shl i32 %and2, 8 @@ -53,7 +53,7 @@ define i32 @pat2_swap(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: pat2_swap: ; CHECK: # %bb.0: ; CHECK-NEXT: bstrins.w $a0, $a1, 19, 8 -; CHECK-NEXT: jirl $zero, $ra, 0 +; CHECK-NEXT: ret %and1 = and i32 %a, -1048321 ; 0xfff000ff %and2 = and i32 %b, 4095 ; 0x00000fff %shl = shl i32 %and2, 8 @@ -71,7 +71,7 @@ define i32 @pat3(i32 %a, i32 %b) nounwind { ; CHECK-NEXT: andi $a1, $a1, 288 ; CHECK-NEXT: srli.w $a1, $a1, 4 ; CHECK-NEXT: bstrins.w $a0, $a1, 11, 4 -; CHECK-NEXT: jirl $zero, $ra, 0 +; CHECK-NEXT: ret %and1 = and i32 %a, -4081 ; 0xfffff00f %and2 = and i32 %b, 288 ; 0x00000120 %or = or i32 %and1, %and2 @@ -84,7 +84,7 @@ define i32 @pat3_swap(i32 %a, i32 %b) nounwind { ; CHECK-NEXT: andi $a1, $a1, 288 ; CHECK-NEXT: srli.w $a1, $a1, 4 ; CHECK-NEXT: bstrins.w $a0, $a1, 11, 4 -; CHECK-NEXT: jirl $zero, $ra, 0 +; CHECK-NEXT: ret %and1 = and i32 %a, -4081 ; 0xfffff00f %and2 = and i32 %b, 288 ; 0x00000120 %or = or i32 %and2, %and1 @@ -96,7 +96,7 @@ define i32 @pat3_positive_mask0(i32 %a, i32 %b) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: srli.w $a1, $a1, 28 ; CHECK-NEXT: bstrins.w $a0, $a1, 31, 28 -; CHECK-NEXT: jirl $zero, $ra, 0 +; CHECK-NEXT: ret %and1 = and i32 %a, 268435455 ; 0x0fffffff %and2 = and i32 %b, 4026531840 ; 0xf0000000 %or = or i32 %and1, %and2 @@ -111,7 +111,7 @@ define i32 @pat4(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: pat4: ; CHECK: # %bb.0: ; CHECK-NEXT: bstrins.w $a0, $a1, 31, 28 -; CHECK-NEXT: jirl $zero, $ra, 0 +; CHECK-NEXT: ret %and = and i32 %a, 268435455 ; 0x0fffffff %shl = shl i32 %b, 28 %or = or i32 %and, %shl @@ -122,7 +122,7 @@ define i32 @pat4_swap(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: pat4_swap: ; CHECK: # %bb.0: ; CHECK-NEXT: bstrins.w $a0, $a1, 31, 28 -; CHECK-NEXT: jirl $zero, $ra, 0 +; CHECK-NEXT: ret %and = and i32 %a, 268435455 ; 0x0fffffff %shl = shl i32 %b, 28 %or = or i32 %shl, %and @@ -139,7 +139,7 @@ define i32 @pat5(i32 %a) nounwind { ; CHECK-NEXT: lu12i.w $a1, 1 ; CHECK-NEXT: ori $a1, $a1, 564 ; CHECK-NEXT: bstrins.w $a0, $a1, 23, 8 -; CHECK-NEXT: jirl $zero, $ra, 0 +; CHECK-NEXT: ret %and = and i32 %a, 4278190335 ; 0xff0000ff %or = or i32 %and, 1192960 ; 0x00123400 ret i32 %or @@ -156,7 +156,7 @@ define i32 @pat6(i32 %c) nounwind { ; CHECK-NEXT: ori $a1, $a1, 2 ; CHECK-NEXT: bstrins.w $a1, $a0, 27, 4 ; CHECK-NEXT: move $a0, $a1 -; CHECK-NEXT: jirl $zero, $ra, 0 +; CHECK-NEXT: ret %and = and i32 %c, 16777215 ; 0x00ffffff %shl = shl i32 %and, 4 %or = or i32 %shl, 268435458 ; 0x10000002 @@ -172,7 +172,7 @@ define i32 @pat7(i32 %c) nounwind { ; CHECK-NEXT: ori $a1, $a1, 2 ; CHECK-NEXT: bstrins.w $a1, $a0, 27, 4 ; CHECK-NEXT: move $a0, $a1 -; CHECK-NEXT: jirl $zero, $ra, 0 +; CHECK-NEXT: ret %shl = shl i32 %c, 4 %and = and i32 %shl, 268435440 ; 0x0ffffff0 %or = or i32 %and, 268435458 ; 0x10000002 @@ -188,7 +188,7 @@ define i32 @pat8(i32 %c) nounwind { ; CHECK-NEXT: lu12i.w $a0, 65536 ; CHECK-NEXT: ori $a0, $a0, 2 ; CHECK-NEXT: bstrins.w $a0, $a1, 27, 4 -; CHECK-NEXT: jirl $zero, $ra, 0 +; CHECK-NEXT: ret %and = and i32 %c, 268435440 ; 0x0ffffff0 %or = or i32 %and, 268435458 ; 0x10000002 ret i32 %or @@ -205,7 +205,7 @@ define i32 @no_bstrins_w(i32 %a) nounwind { ; CHECK-NEXT: lu12i.w $a1, -3805 ; CHECK-NEXT: ori $a1, $a1, 1279 ; CHECK-NEXT: and $a0, $a0, $a1 -; CHECK-NEXT: jirl $zero, $ra, 0 +; CHECK-NEXT: ret %and = and i32 %a, 4278190335 ; 0xff0000ff %or = or i32 %and, 1193040 ; 0x00123450 ret i32 %or diff --git a/llvm/test/CodeGen/LoongArch/bstrpick_d.ll b/llvm/test/CodeGen/LoongArch/bstrpick_d.ll index 51d4967dc3f51407fff9efcc8c15ed3fc51a2f1a..e93c1391d463f04a5d0cd05bf7e9d5116a5f20fc 100644 --- a/llvm/test/CodeGen/LoongArch/bstrpick_d.ll +++ b/llvm/test/CodeGen/LoongArch/bstrpick_d.ll @@ -1,10 +1,11 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s define i64 @lshr40_and255(i64 %a) { ; CHECK-LABEL: lshr40_and255: ; CHECK: # %bb.0: ; CHECK-NEXT: bstrpick.d $a0, $a0, 47, 40 -; CHECK-NEXT: jirl $zero, $ra, 0 +; CHECK-NEXT: ret %shr = lshr i64 %a, 40 %and = and i64 %shr, 255 ret i64 %and @@ -14,7 +15,7 @@ define i64 @ashr50_and511(i64 %a) { ; CHECK-LABEL: ashr50_and511: ; CHECK: # %bb.0: ; CHECK-NEXT: bstrpick.d $a0, $a0, 58, 50 -; CHECK-NEXT: jirl $zero, $ra, 0 +; CHECK-NEXT: ret %shr = ashr i64 %a, 50 %and = and i64 %shr, 511 ret i64 %and @@ -24,7 +25,7 @@ define i64 @zext_i32_to_i64(i32 %a) { ; CHECK-LABEL: zext_i32_to_i64: ; CHECK: # %bb.0: ; CHECK-NEXT: bstrpick.d $a0, $a0, 31, 0 -; CHECK-NEXT: jirl $zero, $ra, 0 +; CHECK-NEXT: ret %res = zext i32 %a to i64 ret i64 %res } @@ -33,7 +34,7 @@ define i64 @and8191(i64 %a) { ; CHECK-LABEL: and8191: ; CHECK: # %bb.0: ; CHECK-NEXT: bstrpick.d $a0, $a0, 12, 0 -; CHECK-NEXT: jirl $zero, $ra, 0 +; CHECK-NEXT: ret %and = and i64 %a, 8191 ret i64 %and } @@ -43,7 +44,7 @@ define i64 @and4095(i64 %a) { ; CHECK-LABEL: and4095: ; CHECK: # %bb.0: ; CHECK-NEXT: andi $a0, $a0, 4095 -; CHECK-NEXT: jirl $zero, $ra, 0 +; CHECK-NEXT: ret %and = and i64 %a, 4095 ret i64 %and } @@ -53,7 +54,7 @@ define i64 @and0xff0_lshr4(i64 %a) { ; CHECK-LABEL: and0xff0_lshr4: ; CHECK: # %bb.0: ; CHECK-NEXT: bstrpick.d $a0, $a0, 11, 4 -; CHECK-NEXT: jirl $zero, $ra, 0 +; CHECK-NEXT: ret %and = and i64 %a, 4080 %shr = lshr i64 %and, 4 ret i64 %shr @@ -66,7 +67,7 @@ define i64 @and4080_ashr5(i64 %a) { ; CHECK-LABEL: and4080_ashr5: ; CHECK: # %bb.0: ; CHECK-NEXT: bstrpick.d $a0, $a0, 11, 5 -; CHECK-NEXT: jirl $zero, $ra, 0 +; CHECK-NEXT: ret %and = and i64 %a, 4080 %shr = ashr i64 %and, 5 ret i64 %shr @@ -78,7 +79,7 @@ define i64 @and0xf30_lshr4(i64 %a) { ; CHECK: # %bb.0: ; CHECK-NEXT: andi $a0, $a0, 3888 ; CHECK-NEXT: srli.d $a0, $a0, 4 -; CHECK-NEXT: jirl $zero, $ra, 0 +; CHECK-NEXT: ret %and = and i64 %a, 3888 %shr = lshr i64 %and, 4 ret i64 %shr @@ -90,7 +91,7 @@ define i64 @and0xff0_lshr3(i64 %a) { ; CHECK: # %bb.0: ; CHECK-NEXT: andi $a0, $a0, 4080 ; CHECK-NEXT: srli.d $a0, $a0, 3 -; CHECK-NEXT: jirl $zero, $ra, 0 +; CHECK-NEXT: ret %and = and i64 %a, 4080 %shr = lshr i64 %and, 3 ret i64 %shr diff --git a/llvm/test/CodeGen/LoongArch/bstrpick_w.ll b/llvm/test/CodeGen/LoongArch/bstrpick_w.ll index 92d79019a7e3a9184c7188a1f6d7712400b72d33..f9027e1fb32df35d903d8112e084027ae3f55852 100644 --- a/llvm/test/CodeGen/LoongArch/bstrpick_w.ll +++ b/llvm/test/CodeGen/LoongArch/bstrpick_w.ll @@ -1,10 +1,11 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s define i32 @lshr10_and255(i32 %a) { ; CHECK-LABEL: lshr10_and255: ; CHECK: # %bb.0: ; CHECK-NEXT: bstrpick.w $a0, $a0, 17, 10 -; CHECK-NEXT: jirl $zero, $ra, 0 +; CHECK-NEXT: ret %shr = lshr i32 %a, 10 %and = and i32 %shr, 255 ret i32 %and @@ -14,7 +15,7 @@ define i32 @ashr20_and511(i32 %a) { ; CHECK-LABEL: ashr20_and511: ; CHECK: # %bb.0: ; CHECK-NEXT: bstrpick.w $a0, $a0, 28, 20 -; CHECK-NEXT: jirl $zero, $ra, 0 +; CHECK-NEXT: ret %shr = ashr i32 %a, 20 %and = and i32 %shr, 511 ret i32 %and @@ -24,7 +25,7 @@ define i32 @zext_i16_to_i32(i16 %a) { ; CHECK-LABEL: zext_i16_to_i32: ; CHECK: # %bb.0: ; CHECK-NEXT: bstrpick.w $a0, $a0, 15, 0 -; CHECK-NEXT: jirl $zero, $ra, 0 +; CHECK-NEXT: ret %res = zext i16 %a to i32 ret i32 %res } @@ -33,7 +34,7 @@ define i32 @and8191(i32 %a) { ; CHECK-LABEL: and8191: ; CHECK: # %bb.0: ; CHECK-NEXT: bstrpick.w $a0, $a0, 12, 0 -; CHECK-NEXT: jirl $zero, $ra, 0 +; CHECK-NEXT: ret %and = and i32 %a, 8191 ret i32 %and } @@ -43,7 +44,7 @@ define i32 @and4095(i32 %a) { ; CHECK-LABEL: and4095: ; CHECK: # %bb.0: ; CHECK-NEXT: andi $a0, $a0, 4095 -; CHECK-NEXT: jirl $zero, $ra, 0 +; CHECK-NEXT: ret %and = and i32 %a, 4095 ret i32 %and } @@ -53,7 +54,7 @@ define i32 @and0xff0_lshr4(i32 %a) { ; CHECK-LABEL: and0xff0_lshr4: ; CHECK: # %bb.0: ; CHECK-NEXT: bstrpick.w $a0, $a0, 11, 4 -; CHECK-NEXT: jirl $zero, $ra, 0 +; CHECK-NEXT: ret %and = and i32 %a, 4080 %shr = lshr i32 %and, 4 ret i32 %shr @@ -66,7 +67,7 @@ define i32 @and4080_ashr5(i32 %a) { ; CHECK-LABEL: and4080_ashr5: ; CHECK: # %bb.0: ; CHECK-NEXT: bstrpick.w $a0, $a0, 11, 5 -; CHECK-NEXT: jirl $zero, $ra, 0 +; CHECK-NEXT: ret %and = and i32 %a, 4080 %shr = ashr i32 %and, 5 ret i32 %shr @@ -78,7 +79,7 @@ define i32 @and0xf30_lshr4(i32 %a) { ; CHECK: # %bb.0: ; CHECK-NEXT: andi $a0, $a0, 3888 ; CHECK-NEXT: srli.w $a0, $a0, 4 -; CHECK-NEXT: jirl $zero, $ra, 0 +; CHECK-NEXT: ret %and = and i32 %a, 3888 %shr = lshr i32 %and, 4 ret i32 %shr @@ -90,7 +91,7 @@ define i32 @and0xff0_lshr3(i32 %a) { ; CHECK: # %bb.0: ; CHECK-NEXT: andi $a0, $a0, 4080 ; CHECK-NEXT: srli.w $a0, $a0, 3 -; CHECK-NEXT: jirl $zero, $ra, 0 +; CHECK-NEXT: ret %and = and i32 %a, 4080 %shr = lshr i32 %and, 3 ret i32 %shr diff --git a/llvm/test/CodeGen/LoongArch/bswap-bitreverse.ll b/llvm/test/CodeGen/LoongArch/bswap-bitreverse.ll new file mode 100644 index 0000000000000000000000000000000000000000..c99adfbb0574fe36fef1a49496476170c49702ce --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/bswap-bitreverse.ll @@ -0,0 +1,136 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefix=LA32 +; RUN: llc --mtriple=loongarch64 --verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefix=LA64 + +declare i16 @llvm.bitreverse.i16(i16) +declare i32 @llvm.bitreverse.i32(i32) +declare i64 @llvm.bitreverse.i64(i64) +declare i16 @llvm.bswap.i16(i16) +declare i32 @llvm.bswap.i32(i32) +declare i64 @llvm.bswap.i64(i64) + +define i16 @test_bswap_bitreverse_i16(i16 %a) nounwind { +; LA32-LABEL: test_bswap_bitreverse_i16: +; LA32: # %bb.0: +; LA32-NEXT: revb.2h $a0, $a0 +; LA32-NEXT: bitrev.w $a0, $a0 +; LA32-NEXT: srli.w $a0, $a0, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: test_bswap_bitreverse_i16: +; LA64: # %bb.0: +; LA64-NEXT: revb.2h $a0, $a0 +; LA64-NEXT: bitrev.d $a0, $a0 +; LA64-NEXT: srli.d $a0, $a0, 48 +; LA64-NEXT: ret + %tmp = call i16 @llvm.bswap.i16(i16 %a) + %tmp2 = call i16 @llvm.bitreverse.i16(i16 %tmp) + ret i16 %tmp2 +} + +define i32 @test_bswap_bitreverse_i32(i32 %a) nounwind { +; LA32-LABEL: test_bswap_bitreverse_i32: +; LA32: # %bb.0: +; LA32-NEXT: bitrev.4b $a0, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: test_bswap_bitreverse_i32: +; LA64: # %bb.0: +; LA64-NEXT: bitrev.4b $a0, $a0 +; LA64-NEXT: ret + %tmp = call i32 @llvm.bswap.i32(i32 %a) + %tmp2 = call i32 @llvm.bitreverse.i32(i32 %tmp) + ret i32 %tmp2 +} + +define i64 @test_bswap_bitreverse_i64(i64 %a) nounwind { +; LA32-LABEL: test_bswap_bitreverse_i64: +; LA32: # %bb.0: +; LA32-NEXT: bitrev.4b $a0, $a0 +; LA32-NEXT: bitrev.4b $a1, $a1 +; LA32-NEXT: ret +; +; LA64-LABEL: test_bswap_bitreverse_i64: +; LA64: # %bb.0: +; LA64-NEXT: bitrev.8b $a0, $a0 +; LA64-NEXT: ret + %tmp = call i64 @llvm.bswap.i64(i64 %a) + %tmp2 = call i64 @llvm.bitreverse.i64(i64 %tmp) + ret i64 %tmp2 +} + +define i16 @test_bitreverse_bswap_i16(i16 %a) nounwind { +; LA32-LABEL: test_bitreverse_bswap_i16: +; LA32: # %bb.0: +; LA32-NEXT: revb.2h $a0, $a0 +; LA32-NEXT: bitrev.w $a0, $a0 +; LA32-NEXT: srli.w $a0, $a0, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: test_bitreverse_bswap_i16: +; LA64: # %bb.0: +; LA64-NEXT: revb.2h $a0, $a0 +; LA64-NEXT: bitrev.d $a0, $a0 +; LA64-NEXT: srli.d $a0, $a0, 48 +; LA64-NEXT: ret + %tmp = call i16 @llvm.bitreverse.i16(i16 %a) + %tmp2 = call i16 @llvm.bswap.i16(i16 %tmp) + ret i16 %tmp2 +} + +define i32 @test_bitreverse_bswap_i32(i32 %a) nounwind { +; LA32-LABEL: test_bitreverse_bswap_i32: +; LA32: # %bb.0: +; LA32-NEXT: bitrev.4b $a0, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: test_bitreverse_bswap_i32: +; LA64: # %bb.0: +; LA64-NEXT: bitrev.4b $a0, $a0 +; LA64-NEXT: ret + %tmp = call i32 @llvm.bitreverse.i32(i32 %a) + %tmp2 = call i32 @llvm.bswap.i32(i32 %tmp) + ret i32 %tmp2 +} + +define i64 @test_bitreverse_bswap_i64(i64 %a) nounwind { +; LA32-LABEL: test_bitreverse_bswap_i64: +; LA32: # %bb.0: +; LA32-NEXT: bitrev.4b $a0, $a0 +; LA32-NEXT: bitrev.4b $a1, $a1 +; LA32-NEXT: ret +; +; LA64-LABEL: test_bitreverse_bswap_i64: +; LA64: # %bb.0: +; LA64-NEXT: bitrev.8b $a0, $a0 +; LA64-NEXT: ret + %tmp = call i64 @llvm.bitreverse.i64(i64 %a) + %tmp2 = call i64 @llvm.bswap.i64(i64 %tmp) + ret i64 %tmp2 +} + +define i32 @pr55484(i32 %0) { +; LA32-LABEL: pr55484: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a1, $a0, 8 +; LA32-NEXT: srli.w $a0, $a0, 8 +; LA32-NEXT: or $a0, $a0, $a1 +; LA32-NEXT: ext.w.h $a0, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: pr55484: +; LA64: # %bb.0: +; LA64-NEXT: slli.d $a1, $a0, 8 +; LA64-NEXT: srli.d $a0, $a0, 8 +; LA64-NEXT: or $a0, $a0, $a1 +; LA64-NEXT: ext.w.h $a0, $a0 +; LA64-NEXT: ret + %2 = lshr i32 %0, 8 + %3 = shl i32 %0, 8 + %4 = or i32 %2, %3 + %5 = trunc i32 %4 to i16 + %6 = sext i16 %5 to i32 + ret i32 %6 +} diff --git a/llvm/test/CodeGen/LoongArch/bswap.ll b/llvm/test/CodeGen/LoongArch/bswap.ll new file mode 100644 index 0000000000000000000000000000000000000000..1ef73b4f1c0bfdb2cb2fa3a8a3d2ad322893312f --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/bswap.ll @@ -0,0 +1,151 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefix=LA32 +; RUN: llc --mtriple=loongarch64 --verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefix=LA64 + +declare i16 @llvm.bswap.i16(i16) +declare i32 @llvm.bswap.i32(i32) +declare i48 @llvm.bswap.i48(i48) +declare i64 @llvm.bswap.i64(i64) +declare i80 @llvm.bswap.i80(i80) +declare i128 @llvm.bswap.i128(i128) + +define i16 @test_bswap_i16(i16 %a) nounwind { +; LA32-LABEL: test_bswap_i16: +; LA32: # %bb.0: +; LA32-NEXT: revb.2h $a0, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: test_bswap_i16: +; LA64: # %bb.0: +; LA64-NEXT: revb.2h $a0, $a0 +; LA64-NEXT: ret + %tmp = call i16 @llvm.bswap.i16(i16 %a) + ret i16 %tmp +} + +define i32 @test_bswap_i32(i32 %a) nounwind { +; LA32-LABEL: test_bswap_i32: +; LA32: # %bb.0: +; LA32-NEXT: revb.2h $a0, $a0 +; LA32-NEXT: rotri.w $a0, $a0, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: test_bswap_i32: +; LA64: # %bb.0: +; LA64-NEXT: revb.2w $a0, $a0 +; LA64-NEXT: ret + %tmp = call i32 @llvm.bswap.i32(i32 %a) + ret i32 %tmp +} + +define i64 @test_bswap_i64(i64 %a) nounwind { +; LA32-LABEL: test_bswap_i64: +; LA32: # %bb.0: +; LA32-NEXT: revb.2h $a1, $a1 +; LA32-NEXT: rotri.w $a2, $a1, 16 +; LA32-NEXT: revb.2h $a0, $a0 +; LA32-NEXT: rotri.w $a1, $a0, 16 +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: test_bswap_i64: +; LA64: # %bb.0: +; LA64-NEXT: revb.d $a0, $a0 +; LA64-NEXT: ret + %tmp = call i64 @llvm.bswap.i64(i64 %a) + ret i64 %tmp +} + +;; Bswap on non-native integer widths. + +define i48 @test_bswap_i48(i48 %a) nounwind { +; LA32-LABEL: test_bswap_i48: +; LA32: # %bb.0: +; LA32-NEXT: revb.2h $a1, $a1 +; LA32-NEXT: rotri.w $a1, $a1, 16 +; LA32-NEXT: srli.w $a1, $a1, 16 +; LA32-NEXT: revb.2h $a0, $a0 +; LA32-NEXT: rotri.w $a2, $a0, 16 +; LA32-NEXT: slli.w $a0, $a2, 16 +; LA32-NEXT: or $a0, $a1, $a0 +; LA32-NEXT: srli.w $a1, $a2, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: test_bswap_i48: +; LA64: # %bb.0: +; LA64-NEXT: revb.d $a0, $a0 +; LA64-NEXT: srli.d $a0, $a0, 16 +; LA64-NEXT: ret + %tmp = call i48 @llvm.bswap.i48(i48 %a) + ret i48 %tmp +} + +define i80 @test_bswap_i80(i80 %a) nounwind { +; LA32-LABEL: test_bswap_i80: +; LA32: # %bb.0: +; LA32-NEXT: ld.w $a2, $a1, 0 +; LA32-NEXT: revb.2h $a2, $a2 +; LA32-NEXT: rotri.w $a2, $a2, 16 +; LA32-NEXT: ld.w $a3, $a1, 4 +; LA32-NEXT: revb.2h $a3, $a3 +; LA32-NEXT: rotri.w $a3, $a3, 16 +; LA32-NEXT: srli.w $a4, $a3, 16 +; LA32-NEXT: slli.w $a5, $a2, 16 +; LA32-NEXT: or $a4, $a5, $a4 +; LA32-NEXT: srli.w $a2, $a2, 16 +; LA32-NEXT: st.h $a2, $a0, 8 +; LA32-NEXT: st.w $a4, $a0, 4 +; LA32-NEXT: slli.w $a2, $a3, 16 +; LA32-NEXT: ld.w $a1, $a1, 8 +; LA32-NEXT: revb.2h $a1, $a1 +; LA32-NEXT: rotri.w $a1, $a1, 16 +; LA32-NEXT: srli.w $a1, $a1, 16 +; LA32-NEXT: or $a1, $a1, $a2 +; LA32-NEXT: st.w $a1, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: test_bswap_i80: +; LA64: # %bb.0: +; LA64-NEXT: revb.d $a1, $a1 +; LA64-NEXT: srli.d $a1, $a1, 48 +; LA64-NEXT: revb.d $a2, $a0 +; LA64-NEXT: slli.d $a0, $a2, 16 +; LA64-NEXT: or $a0, $a1, $a0 +; LA64-NEXT: srli.d $a1, $a2, 48 +; LA64-NEXT: ret + %tmp = call i80 @llvm.bswap.i80(i80 %a) + ret i80 %tmp +} + +define i128 @test_bswap_i128(i128 %a) nounwind { +; LA32-LABEL: test_bswap_i128: +; LA32: # %bb.0: +; LA32-NEXT: ld.w $a2, $a1, 0 +; LA32-NEXT: revb.2h $a2, $a2 +; LA32-NEXT: rotri.w $a2, $a2, 16 +; LA32-NEXT: st.w $a2, $a0, 12 +; LA32-NEXT: ld.w $a2, $a1, 4 +; LA32-NEXT: revb.2h $a2, $a2 +; LA32-NEXT: rotri.w $a2, $a2, 16 +; LA32-NEXT: st.w $a2, $a0, 8 +; LA32-NEXT: ld.w $a2, $a1, 8 +; LA32-NEXT: revb.2h $a2, $a2 +; LA32-NEXT: rotri.w $a2, $a2, 16 +; LA32-NEXT: st.w $a2, $a0, 4 +; LA32-NEXT: ld.w $a1, $a1, 12 +; LA32-NEXT: revb.2h $a1, $a1 +; LA32-NEXT: rotri.w $a1, $a1, 16 +; LA32-NEXT: st.w $a1, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: test_bswap_i128: +; LA64: # %bb.0: +; LA64-NEXT: revb.d $a2, $a1 +; LA64-NEXT: revb.d $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %tmp = call i128 @llvm.bswap.i128(i128 %a) + ret i128 %tmp +} diff --git a/llvm/test/CodeGen/LoongArch/calling-conv-common.ll b/llvm/test/CodeGen/LoongArch/calling-conv-common.ll new file mode 100644 index 0000000000000000000000000000000000000000..08fff9f8ceedd404eb53b50ca93bf8b830a92b90 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/calling-conv-common.ll @@ -0,0 +1,403 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --target-abi=lp64s < %s | FileCheck %s +; RUN: llc --mtriple=loongarch64 --mattr=+d --target-abi=lp64d < %s | FileCheck %s + +;; This file contains tests that should have identical output for all ABIs, i.e. +;; where no arguments are passed via floating point registers. + +;; Check that on LA64, i128 is passed in a pair of GPRs. +define i64 @callee_i128_in_regs(i64 %a, i128 %b) nounwind { +; CHECK-LABEL: callee_i128_in_regs: +; CHECK: # %bb.0: +; CHECK-NEXT: add.d $a0, $a0, $a1 +; CHECK-NEXT: ret + %b_trunc = trunc i128 %b to i64 + %1 = add i64 %a, %b_trunc + ret i64 %1 +} + +define i64 @caller_i128_in_regs() nounwind { +; CHECK-LABEL: caller_i128_in_regs: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $sp, $sp, -16 +; CHECK-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; CHECK-NEXT: ori $a0, $zero, 1 +; CHECK-NEXT: ori $a1, $zero, 2 +; CHECK-NEXT: move $a2, $zero +; CHECK-NEXT: bl %plt(callee_i128_in_regs) +; CHECK-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: ret + %1 = call i64 @callee_i128_in_regs(i64 1, i128 2) + ret i64 %1 +} + +;; Check that the stack is used once the GPRs are exhausted. +define i64 @callee_many_scalars(i8 %a, i16 %b, i32 %c, i64 %d, i128 %e, i64 %f, i128 %g, i64 %h) nounwind { +; CHECK-LABEL: callee_many_scalars: +; CHECK: # %bb.0: +; CHECK-NEXT: ld.d $t0, $sp, 0 +; CHECK-NEXT: xor $a5, $a5, $t0 +; CHECK-NEXT: xor $a4, $a4, $a7 +; CHECK-NEXT: or $a4, $a4, $a5 +; CHECK-NEXT: bstrpick.d $a1, $a1, 15, 0 +; CHECK-NEXT: andi $a0, $a0, 255 +; CHECK-NEXT: add.d $a0, $a0, $a1 +; CHECK-NEXT: bstrpick.d $a1, $a2, 31, 0 +; CHECK-NEXT: add.d $a0, $a0, $a1 +; CHECK-NEXT: add.d $a0, $a0, $a3 +; CHECK-NEXT: sltui $a1, $a4, 1 +; CHECK-NEXT: add.d $a0, $a1, $a0 +; CHECK-NEXT: add.d $a0, $a0, $a6 +; CHECK-NEXT: ld.d $a1, $sp, 8 +; CHECK-NEXT: add.d $a0, $a0, $a1 +; CHECK-NEXT: ret + %a_ext = zext i8 %a to i64 + %b_ext = zext i16 %b to i64 + %c_ext = zext i32 %c to i64 + %1 = add i64 %a_ext, %b_ext + %2 = add i64 %1, %c_ext + %3 = add i64 %2, %d + %4 = icmp eq i128 %e, %g + %5 = zext i1 %4 to i64 + %6 = add i64 %5, %3 + %7 = add i64 %6, %f + %8 = add i64 %7, %h + ret i64 %8 +} + +define i64 @caller_many_scalars() nounwind { +; CHECK-LABEL: caller_many_scalars: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $sp, $sp, -32 +; CHECK-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill +; CHECK-NEXT: ori $a0, $zero, 8 +; CHECK-NEXT: st.d $a0, $sp, 8 +; CHECK-NEXT: st.d $zero, $sp, 0 +; CHECK-NEXT: ori $a0, $zero, 1 +; CHECK-NEXT: ori $a1, $zero, 2 +; CHECK-NEXT: ori $a2, $zero, 3 +; CHECK-NEXT: ori $a3, $zero, 4 +; CHECK-NEXT: ori $a4, $zero, 5 +; CHECK-NEXT: ori $a6, $zero, 6 +; CHECK-NEXT: ori $a7, $zero, 7 +; CHECK-NEXT: move $a5, $zero +; CHECK-NEXT: bl %plt(callee_many_scalars) +; CHECK-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 32 +; CHECK-NEXT: ret + %1 = call i64 @callee_many_scalars(i8 1, i16 2, i32 3, i64 4, i128 5, i64 6, i128 7, i64 8) + ret i64 %1 +} + +;; Check that i256 is passed indirectly. + +define i64 @callee_large_scalars(i256 %a, i256 %b) nounwind { +; CHECK-LABEL: callee_large_scalars: +; CHECK: # %bb.0: +; CHECK-NEXT: ld.d $a2, $a1, 24 +; CHECK-NEXT: ld.d $a3, $a0, 24 +; CHECK-NEXT: xor $a2, $a3, $a2 +; CHECK-NEXT: ld.d $a3, $a1, 8 +; CHECK-NEXT: ld.d $a4, $a0, 8 +; CHECK-NEXT: xor $a3, $a4, $a3 +; CHECK-NEXT: or $a2, $a3, $a2 +; CHECK-NEXT: ld.d $a3, $a1, 16 +; CHECK-NEXT: ld.d $a4, $a0, 16 +; CHECK-NEXT: xor $a3, $a4, $a3 +; CHECK-NEXT: ld.d $a1, $a1, 0 +; CHECK-NEXT: ld.d $a0, $a0, 0 +; CHECK-NEXT: xor $a0, $a0, $a1 +; CHECK-NEXT: or $a0, $a0, $a3 +; CHECK-NEXT: or $a0, $a0, $a2 +; CHECK-NEXT: sltui $a0, $a0, 1 +; CHECK-NEXT: ret + %1 = icmp eq i256 %a, %b + %2 = zext i1 %1 to i64 + ret i64 %2 +} + +define i64 @caller_large_scalars() nounwind { +; CHECK-LABEL: caller_large_scalars: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $sp, $sp, -80 +; CHECK-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill +; CHECK-NEXT: ori $a0, $zero, 2 +; CHECK-NEXT: st.d $a0, $sp, 0 +; CHECK-NEXT: st.d $zero, $sp, 24 +; CHECK-NEXT: st.d $zero, $sp, 16 +; CHECK-NEXT: st.d $zero, $sp, 8 +; CHECK-NEXT: st.d $zero, $sp, 56 +; CHECK-NEXT: st.d $zero, $sp, 48 +; CHECK-NEXT: st.d $zero, $sp, 40 +; CHECK-NEXT: ori $a0, $zero, 1 +; CHECK-NEXT: st.d $a0, $sp, 32 +; CHECK-NEXT: addi.d $a0, $sp, 32 +; CHECK-NEXT: addi.d $a1, $sp, 0 +; CHECK-NEXT: bl %plt(callee_large_scalars) +; CHECK-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 80 +; CHECK-NEXT: ret + %1 = call i64 @callee_large_scalars(i256 1, i256 2) + ret i64 %1 +} + +;; Check that arguments larger than 2*GRLen are handled correctly when their +;; address is passed on the stack rather than in memory. + +;; Must keep define on a single line due to an update_llc_test_checks.py limitation +define i64 @callee_large_scalars_exhausted_regs(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, i64 %g, i256 %h, i64 %i, i256 %j) nounwind { +; CHECK-LABEL: callee_large_scalars_exhausted_regs: +; CHECK: # %bb.0: +; CHECK-NEXT: ld.d $a0, $sp, 8 +; CHECK-NEXT: ld.d $a1, $a0, 24 +; CHECK-NEXT: ld.d $a2, $a7, 24 +; CHECK-NEXT: xor $a1, $a2, $a1 +; CHECK-NEXT: ld.d $a2, $a0, 8 +; CHECK-NEXT: ld.d $a3, $a7, 8 +; CHECK-NEXT: xor $a2, $a3, $a2 +; CHECK-NEXT: or $a1, $a2, $a1 +; CHECK-NEXT: ld.d $a2, $a0, 16 +; CHECK-NEXT: ld.d $a3, $a7, 16 +; CHECK-NEXT: xor $a2, $a3, $a2 +; CHECK-NEXT: ld.d $a0, $a0, 0 +; CHECK-NEXT: ld.d $a3, $a7, 0 +; CHECK-NEXT: xor $a0, $a3, $a0 +; CHECK-NEXT: or $a0, $a0, $a2 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: sltui $a0, $a0, 1 +; CHECK-NEXT: ret + %1 = icmp eq i256 %h, %j + %2 = zext i1 %1 to i64 + ret i64 %2 +} + +define i64 @caller_large_scalars_exhausted_regs() nounwind { +; CHECK-LABEL: caller_large_scalars_exhausted_regs: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $sp, $sp, -96 +; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill +; CHECK-NEXT: addi.d $a0, $sp, 16 +; CHECK-NEXT: st.d $a0, $sp, 8 +; CHECK-NEXT: ori $a0, $zero, 9 +; CHECK-NEXT: st.d $a0, $sp, 0 +; CHECK-NEXT: ori $a0, $zero, 10 +; CHECK-NEXT: st.d $a0, $sp, 16 +; CHECK-NEXT: st.d $zero, $sp, 40 +; CHECK-NEXT: st.d $zero, $sp, 32 +; CHECK-NEXT: st.d $zero, $sp, 24 +; CHECK-NEXT: st.d $zero, $sp, 72 +; CHECK-NEXT: st.d $zero, $sp, 64 +; CHECK-NEXT: st.d $zero, $sp, 56 +; CHECK-NEXT: ori $a0, $zero, 8 +; CHECK-NEXT: st.d $a0, $sp, 48 +; CHECK-NEXT: ori $a0, $zero, 1 +; CHECK-NEXT: ori $a1, $zero, 2 +; CHECK-NEXT: ori $a2, $zero, 3 +; CHECK-NEXT: ori $a3, $zero, 4 +; CHECK-NEXT: ori $a4, $zero, 5 +; CHECK-NEXT: ori $a5, $zero, 6 +; CHECK-NEXT: ori $a6, $zero, 7 +; CHECK-NEXT: addi.d $a7, $sp, 48 +; CHECK-NEXT: bl %plt(callee_large_scalars_exhausted_regs) +; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 96 +; CHECK-NEXT: ret + %1 = call i64 @callee_large_scalars_exhausted_regs( + i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i256 8, i64 9, + i256 10) + ret i64 %1 +} + +;; Check large struct arguments, which are passed byval + +%struct.large = type { i64, i64, i64, i64 } + +define i64 @callee_large_struct(ptr byval(%struct.large) align 8 %a) nounwind { +; CHECK-LABEL: callee_large_struct: +; CHECK: # %bb.0: +; CHECK-NEXT: ld.d $a1, $a0, 24 +; CHECK-NEXT: ld.d $a0, $a0, 0 +; CHECK-NEXT: add.d $a0, $a0, $a1 +; CHECK-NEXT: ret + %1 = getelementptr inbounds %struct.large, ptr %a, i64 0, i32 0 + %2 = getelementptr inbounds %struct.large, ptr %a, i64 0, i32 3 + %3 = load i64, ptr %1 + %4 = load i64, ptr %2 + %5 = add i64 %3, %4 + ret i64 %5 +} + +define i64 @caller_large_struct() nounwind { +; CHECK-LABEL: caller_large_struct: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $sp, $sp, -80 +; CHECK-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill +; CHECK-NEXT: ori $a0, $zero, 1 +; CHECK-NEXT: st.d $a0, $sp, 40 +; CHECK-NEXT: st.d $a0, $sp, 8 +; CHECK-NEXT: ori $a0, $zero, 2 +; CHECK-NEXT: st.d $a0, $sp, 48 +; CHECK-NEXT: st.d $a0, $sp, 16 +; CHECK-NEXT: ori $a0, $zero, 3 +; CHECK-NEXT: st.d $a0, $sp, 56 +; CHECK-NEXT: st.d $a0, $sp, 24 +; CHECK-NEXT: ori $a0, $zero, 4 +; CHECK-NEXT: st.d $a0, $sp, 64 +; CHECK-NEXT: st.d $a0, $sp, 32 +; CHECK-NEXT: addi.d $a0, $sp, 8 +; CHECK-NEXT: bl %plt(callee_large_struct) +; CHECK-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 80 +; CHECK-NEXT: ret + %ls = alloca %struct.large, align 8 + %a = getelementptr inbounds %struct.large, ptr %ls, i64 0, i32 0 + store i64 1, ptr %a + %b = getelementptr inbounds %struct.large, ptr %ls, i64 0, i32 1 + store i64 2, ptr %b + %c = getelementptr inbounds %struct.large, ptr %ls, i64 0, i32 2 + store i64 3, ptr %c + %d = getelementptr inbounds %struct.large, ptr %ls, i64 0, i32 3 + store i64 4, ptr %d + %1 = call i64 @callee_large_struct(ptr byval(%struct.large) align 8 %ls) + ret i64 %1 +} + +;; Check return scalar which size is 2*GRLen. + +define i128 @callee_small_scalar_ret() nounwind { +; CHECK-LABEL: callee_small_scalar_ret: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $a0, $zero, -1 +; CHECK-NEXT: move $a1, $a0 +; CHECK-NEXT: ret + ret i128 -1 +} + +define i64 @caller_small_scalar_ret() nounwind { +; CHECK-LABEL: caller_small_scalar_ret: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $sp, $sp, -16 +; CHECK-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; CHECK-NEXT: bl %plt(callee_small_scalar_ret) +; CHECK-NEXT: addi.w $a2, $zero, -2 +; CHECK-NEXT: xor $a0, $a0, $a2 +; CHECK-NEXT: orn $a0, $a0, $a1 +; CHECK-NEXT: sltui $a0, $a0, 1 +; CHECK-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: ret + %1 = call i128 @callee_small_scalar_ret() + %2 = icmp eq i128 -2, %1 + %3 = zext i1 %2 to i64 + ret i64 %3 +} + +;; Check return struct which size is 2*GRLen. + +%struct.small = type { i64, ptr } + +define %struct.small @callee_small_struct_ret() nounwind { +; CHECK-LABEL: callee_small_struct_ret: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $a0, $zero, 1 +; CHECK-NEXT: move $a1, $zero +; CHECK-NEXT: ret + ret %struct.small { i64 1, ptr null } +} + +define i64 @caller_small_struct_ret() nounwind { +; CHECK-LABEL: caller_small_struct_ret: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $sp, $sp, -16 +; CHECK-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; CHECK-NEXT: bl %plt(callee_small_struct_ret) +; CHECK-NEXT: add.d $a0, $a0, $a1 +; CHECK-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: ret + %1 = call %struct.small @callee_small_struct_ret() + %2 = extractvalue %struct.small %1, 0 + %3 = extractvalue %struct.small %1, 1 + %4 = ptrtoint ptr %3 to i64 + %5 = add i64 %2, %4 + ret i64 %5 +} + +;; Check return scalar which size is more than 2*GRLen. + +define i256 @callee_large_scalar_ret() nounwind { +; CHECK-LABEL: callee_large_scalar_ret: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $a1, $zero, -1 +; CHECK-NEXT: st.d $a1, $a0, 24 +; CHECK-NEXT: st.d $a1, $a0, 16 +; CHECK-NEXT: st.d $a1, $a0, 8 +; CHECK-NEXT: lu12i.w $a1, -30141 +; CHECK-NEXT: ori $a1, $a1, 747 +; CHECK-NEXT: st.d $a1, $a0, 0 +; CHECK-NEXT: ret + ret i256 -123456789 +} + +define void @caller_large_scalar_ret() nounwind { +; CHECK-LABEL: caller_large_scalar_ret: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $sp, $sp, -48 +; CHECK-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill +; CHECK-NEXT: addi.d $a0, $sp, 0 +; CHECK-NEXT: bl %plt(callee_large_scalar_ret) +; CHECK-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 48 +; CHECK-NEXT: ret + %1 = call i256 @callee_large_scalar_ret() + ret void +} + +;; Check return struct which size is more than 2*GRLen. + +define void @callee_large_struct_ret(ptr noalias sret(%struct.large) %agg.result) nounwind { +; CHECK-LABEL: callee_large_struct_ret: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $a1, $zero, 4 +; CHECK-NEXT: st.d $a1, $a0, 24 +; CHECK-NEXT: ori $a1, $zero, 3 +; CHECK-NEXT: st.d $a1, $a0, 16 +; CHECK-NEXT: ori $a1, $zero, 2 +; CHECK-NEXT: st.d $a1, $a0, 8 +; CHECK-NEXT: ori $a1, $zero, 1 +; CHECK-NEXT: st.d $a1, $a0, 0 +; CHECK-NEXT: ret + %a = getelementptr inbounds %struct.large, ptr %agg.result, i64 0, i32 0 + store i64 1, ptr %a, align 4 + %b = getelementptr inbounds %struct.large, ptr %agg.result, i64 0, i32 1 + store i64 2, ptr %b, align 4 + %c = getelementptr inbounds %struct.large, ptr %agg.result, i64 0, i32 2 + store i64 3, ptr %c, align 4 + %d = getelementptr inbounds %struct.large, ptr %agg.result, i64 0, i32 3 + store i64 4, ptr %d, align 4 + ret void +} + +define i64 @caller_large_struct_ret() nounwind { +; CHECK-LABEL: caller_large_struct_ret: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $sp, $sp, -48 +; CHECK-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill +; CHECK-NEXT: addi.d $a0, $sp, 8 +; CHECK-NEXT: bl %plt(callee_large_struct_ret) +; CHECK-NEXT: ld.d $a0, $sp, 32 +; CHECK-NEXT: ld.d $a1, $sp, 8 +; CHECK-NEXT: add.d $a0, $a1, $a0 +; CHECK-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 48 +; CHECK-NEXT: ret + %1 = alloca %struct.large + call void @callee_large_struct_ret(ptr sret(%struct.large) %1) + %2 = getelementptr inbounds %struct.large, ptr %1, i64 0, i32 0 + %3 = load i64, ptr %2 + %4 = getelementptr inbounds %struct.large, ptr %1, i64 0, i32 3 + %5 = load i64, ptr %4 + %6 = add i64 %3, %5 + ret i64 %6 +} diff --git a/llvm/test/CodeGen/LoongArch/calling-conv-lp64d.ll b/llvm/test/CodeGen/LoongArch/calling-conv-lp64d.ll new file mode 100644 index 0000000000000000000000000000000000000000..ceb38876c384a45edbc884aa90c2ceee24825ee0 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/calling-conv-lp64d.ll @@ -0,0 +1,128 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+d --target-abi=lp64d < %s \ +; RUN: | FileCheck %s + +;; This file contains specific tests for the lp64d ABI. + +;; Check pass floating-point arguments whith FPRs. + +define i64 @callee_float_in_fpr(i64 %a, float %b, double %c) nounwind { +; CHECK-LABEL: callee_float_in_fpr: +; CHECK: # %bb.0: +; CHECK-NEXT: ftintrz.l.s $fa0, $fa0 +; CHECK-NEXT: movfr2gr.d $a1, $fa0 +; CHECK-NEXT: add.d $a0, $a0, $a1 +; CHECK-NEXT: ftintrz.l.d $fa0, $fa1 +; CHECK-NEXT: movfr2gr.d $a1, $fa0 +; CHECK-NEXT: add.d $a0, $a0, $a1 +; CHECK-NEXT: ret + %b_fptosi = fptosi float %b to i64 + %c_fptosi = fptosi double %c to i64 + %1 = add i64 %a, %b_fptosi + %2 = add i64 %1, %c_fptosi + ret i64 %2 +} + +define i64 @caller_float_in_fpr() nounwind { +; CHECK-LABEL: caller_float_in_fpr: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $sp, $sp, -16 +; CHECK-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; CHECK-NEXT: ori $a0, $zero, 1 +; CHECK-NEXT: movgr2fr.w $fa0, $zero +; CHECK-NEXT: movgr2fr.d $fa1, $zero +; CHECK-NEXT: bl %plt(callee_float_in_fpr) +; CHECK-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: ret + %1 = call i64 @callee_float_in_fpr(i64 1, float 0.0, double 0.0) + ret i64 %1 +} + +;; Check that the GPR is used once the FPRs are exhausted. + +;; Must keep define on a single line due to an update_llc_test_checks.py limitation. +define i64 @callee_double_in_gpr_exhausted_fprs(double %a, double %b, double %c, double %d, double %e, double %f, double %g, double %h, double %i) nounwind { +; CHECK-LABEL: callee_double_in_gpr_exhausted_fprs: +; CHECK: # %bb.0: +; CHECK-NEXT: ftintrz.l.d $fa0, $fa7 +; CHECK-NEXT: movfr2gr.d $a1, $fa0 +; CHECK-NEXT: movgr2fr.d $fa0, $a0 +; CHECK-NEXT: ftintrz.l.d $fa0, $fa0 +; CHECK-NEXT: movfr2gr.d $a0, $fa0 +; CHECK-NEXT: add.d $a0, $a1, $a0 +; CHECK-NEXT: ret + %h_fptosi = fptosi double %h to i64 + %i_fptosi = fptosi double %i to i64 + %1 = add i64 %h_fptosi, %i_fptosi + ret i64 %1 +} + +define i64 @caller_double_in_gpr_exhausted_fprs() nounwind { +; CHECK-LABEL: caller_double_in_gpr_exhausted_fprs: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $sp, $sp, -16 +; CHECK-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0) +; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI3_0) +; CHECK-NEXT: fld.d $fa1, $a0, 0 +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_1) +; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI3_1) +; CHECK-NEXT: fld.d $fa2, $a0, 0 +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_2) +; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI3_2) +; CHECK-NEXT: fld.d $fa3, $a0, 0 +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_3) +; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI3_3) +; CHECK-NEXT: fld.d $fa4, $a0, 0 +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_4) +; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI3_4) +; CHECK-NEXT: fld.d $fa5, $a0, 0 +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_5) +; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI3_5) +; CHECK-NEXT: fld.d $fa6, $a0, 0 +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_6) +; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI3_6) +; CHECK-NEXT: fld.d $fa7, $a0, 0 +; CHECK-NEXT: addi.d $a0, $zero, 1 +; CHECK-NEXT: movgr2fr.d $fa0, $a0 +; CHECK-NEXT: ffint.d.l $fa0, $fa0 +; CHECK-NEXT: ori $a0, $zero, 0 +; CHECK-NEXT: lu32i.d $a0, 131072 +; CHECK-NEXT: lu52i.d $a0, $a0, 1026 +; CHECK-NEXT: bl %plt(callee_double_in_gpr_exhausted_fprs) +; CHECK-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: ret + %1 = call i64 @callee_double_in_gpr_exhausted_fprs( + double 1.0, double 2.0, double 3.0, double 4.0, double 5.0, double 6.0, + double 7.0, double 8.0, double 9.0) + ret i64 %1 +} + +;; Check returning doubles. + +define double @callee_double_ret() nounwind { +; CHECK-LABEL: callee_double_ret: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $a0, $zero, 1 +; CHECK-NEXT: movgr2fr.d $fa0, $a0 +; CHECK-NEXT: ffint.d.l $fa0, $fa0 +; CHECK-NEXT: ret + ret double 1.0 +} + +define i64 @caller_double_ret() nounwind { +; CHECK-LABEL: caller_double_ret: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $sp, $sp, -16 +; CHECK-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; CHECK-NEXT: bl %plt(callee_double_ret) +; CHECK-NEXT: movfr2gr.d $a0, $fa0 +; CHECK-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: ret + %1 = call double @callee_double_ret() + %2 = bitcast double %1 to i64 + ret i64 %2 +} diff --git a/llvm/test/CodeGen/LoongArch/calling-conv-lp64s.ll b/llvm/test/CodeGen/LoongArch/calling-conv-lp64s.ll new file mode 100644 index 0000000000000000000000000000000000000000..d738c066e1ad36df5b35ab36cfb2772723f3ba2e --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/calling-conv-lp64s.ll @@ -0,0 +1,97 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc --mtriple=loongarch64 --target-abi=lp64s < %s | FileCheck %s + +;; This file contains specific tests for the lp64s ABI. + +define i64 @callee_float_in_regs(i64 %a, float %b) nounwind { +; CHECK-LABEL: callee_float_in_regs: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $sp, $sp, -16 +; CHECK-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 0 # 8-byte Folded Spill +; CHECK-NEXT: move $fp, $a0 +; CHECK-NEXT: bstrpick.d $a0, $a1, 31, 0 +; CHECK-NEXT: bl %plt(__fixsfdi) +; CHECK-NEXT: add.d $a0, $fp, $a0 +; CHECK-NEXT: ld.d $fp, $sp, 0 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: ret + %b_fptosi = fptosi float %b to i64 + %1 = add i64 %a, %b_fptosi + ret i64 %1 +} + +define i64 @caller_float_in_regs() nounwind { +; CHECK-LABEL: caller_float_in_regs: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $sp, $sp, -16 +; CHECK-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; CHECK-NEXT: ori $a0, $zero, 1 +; CHECK-NEXT: lu12i.w $a1, 262144 +; CHECK-NEXT: bl %plt(callee_float_in_regs) +; CHECK-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: ret + %1 = call i64 @callee_float_in_regs(i64 1, float 2.0) + ret i64 %1 +} + +define i64 @callee_float_on_stack(i128 %a, i128 %b, i128 %c, i128 %d, float %e) nounwind { +; CHECK-LABEL: callee_float_on_stack: +; CHECK: # %bb.0: +; CHECK-NEXT: ld.w $a0, $sp, 0 +; CHECK-NEXT: ret + %1 = trunc i128 %d to i64 + %2 = bitcast float %e to i32 + %3 = sext i32 %2 to i64 + %4 = add i64 %1, %3 + ret i64 %3 +} + +define i64 @caller_float_on_stack() nounwind { +; CHECK-LABEL: caller_float_on_stack: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $sp, $sp, -16 +; CHECK-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; CHECK-NEXT: lu12i.w $a0, 264704 +; CHECK-NEXT: st.d $a0, $sp, 0 +; CHECK-NEXT: ori $a0, $zero, 1 +; CHECK-NEXT: ori $a2, $zero, 2 +; CHECK-NEXT: ori $a4, $zero, 3 +; CHECK-NEXT: ori $a6, $zero, 4 +; CHECK-NEXT: move $a1, $zero +; CHECK-NEXT: move $a3, $zero +; CHECK-NEXT: move $a5, $zero +; CHECK-NEXT: move $a7, $zero +; CHECK-NEXT: bl %plt(callee_float_on_stack) +; CHECK-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: ret + %1 = call i64 @callee_float_on_stack(i128 1, i128 2, i128 3, i128 4, float 5.0) + ret i64 %1 +} + +define float @callee_tiny_scalar_ret() nounwind { +; CHECK-LABEL: callee_tiny_scalar_ret: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $a0, 260096 +; CHECK-NEXT: ret + ret float 1.0 +} + +define i64 @caller_tiny_scalar_ret() nounwind { +; CHECK-LABEL: caller_tiny_scalar_ret: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $sp, $sp, -16 +; CHECK-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; CHECK-NEXT: bl %plt(callee_tiny_scalar_ret) +; CHECK-NEXT: addi.w $a0, $a0, 0 +; CHECK-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: ret + %1 = call float @callee_tiny_scalar_ret() + %2 = bitcast float %1 to i32 + %3 = sext i32 %2 to i64 + ret i64 %3 +} diff --git a/llvm/test/CodeGen/LoongArch/codemodel-medium.ll b/llvm/test/CodeGen/LoongArch/codemodel-medium.ll new file mode 100644 index 0000000000000000000000000000000000000000..d4d97e7df804ddd059012f613278257651efb4bd --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/codemodel-medium.ll @@ -0,0 +1,79 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --code-model=small < %s | \ +; RUN: FileCheck --check-prefix=SMALL %s +; RUN: llc --mtriple=loongarch64 --code-model=medium < %s | \ +; RUN: FileCheck --check-prefix=MEDIUM %s + +declare void @llvm.memset.p0.i64(ptr, i8, i64, i1) +declare i32 @callee(i32) + +define i32 @call_globaladdress(i32 %a) nounwind { +; SMALL-LABEL: call_globaladdress: +; SMALL: # %bb.0: +; SMALL-NEXT: addi.d $sp, $sp, -16 +; SMALL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; SMALL-NEXT: bl %plt(callee) +; SMALL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; SMALL-NEXT: addi.d $sp, $sp, 16 +; SMALL-NEXT: ret +; +; MEDIUM-LABEL: call_globaladdress: +; MEDIUM: # %bb.0: +; MEDIUM-NEXT: addi.d $sp, $sp, -16 +; MEDIUM-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; MEDIUM-NEXT: pcalau12i $ra, %pc_hi20(callee) +; MEDIUM-NEXT: jirl $ra, $ra, %pc_lo12(callee) +; MEDIUM-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; MEDIUM-NEXT: addi.d $sp, $sp, 16 +; MEDIUM-NEXT: ret + %1 = call i32 @callee(i32 %a) + ret i32 %1 +} + +define void @call_external_sym(ptr %dst) { +; SMALL-LABEL: call_external_sym: +; SMALL: # %bb.0: # %entry +; SMALL-NEXT: addi.d $sp, $sp, -16 +; SMALL-NEXT: .cfi_def_cfa_offset 16 +; SMALL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; SMALL-NEXT: .cfi_offset 1, -8 +; SMALL-NEXT: ori $a2, $zero, 1000 +; SMALL-NEXT: move $a1, $zero +; SMALL-NEXT: bl %plt(memset) +; SMALL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; SMALL-NEXT: addi.d $sp, $sp, 16 +; SMALL-NEXT: ret +; +; MEDIUM-LABEL: call_external_sym: +; MEDIUM: # %bb.0: # %entry +; MEDIUM-NEXT: addi.d $sp, $sp, -16 +; MEDIUM-NEXT: .cfi_def_cfa_offset 16 +; MEDIUM-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; MEDIUM-NEXT: .cfi_offset 1, -8 +; MEDIUM-NEXT: ori $a2, $zero, 1000 +; MEDIUM-NEXT: move $a1, $zero +; MEDIUM-NEXT: pcalau12i $ra, %pc_hi20(memset) +; MEDIUM-NEXT: jirl $ra, $ra, %pc_lo12(memset) +; MEDIUM-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; MEDIUM-NEXT: addi.d $sp, $sp, 16 +; MEDIUM-NEXT: ret +entry: + call void @llvm.memset.p0.i64(ptr %dst, i8 0, i64 1000, i1 false) + ret void +} + +;; Tail call with different codemodel. +declare i32 @callee_tail(i32 %i) +define i32 @caller_tail(i32 %i) nounwind { +; SMALL-LABEL: caller_tail: +; SMALL: # %bb.0: # %entry +; SMALL-NEXT: b %plt(callee_tail) +; +; MEDIUM-LABEL: caller_tail: +; MEDIUM: # %bb.0: # %entry +; MEDIUM-NEXT: pcalau12i $a1, %pc_hi20(callee_tail) +; MEDIUM-NEXT: jirl $zero, $a1, %pc_lo12(callee_tail) +entry: + %r = tail call i32 @callee_tail(i32 %i) + ret i32 %r +} diff --git a/llvm/test/CodeGen/LoongArch/cpu-name-generic.ll b/llvm/test/CodeGen/LoongArch/cpu-name-generic.ll new file mode 100644 index 0000000000000000000000000000000000000000..1129d9fcb2542c9e4b38d1299a7128e7c6f8a956 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/cpu-name-generic.ll @@ -0,0 +1,29 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mcpu=generic < %s \ +; RUN: | FileCheck %s --check-prefix=LA32 +; RUN: llc --mtriple=loongarch32 --mcpu=generic-la32 < %s \ +; RUN: | FileCheck %s --check-prefix=LA32 +; RUN: llc --mtriple=loongarch64 --mcpu=generic < %s \ +; RUN: | FileCheck %s --check-prefix=LA64 +; RUN: llc --mtriple=loongarch64 --mcpu=generic-la64 < %s \ +; RUN: | FileCheck %s --check-prefix=LA64 + +;; The CPU name "generic" should map to the corresponding concrete names +;; according to the target triple's bitness. +define i64 @f(i64 signext %a, i64 signext %b) { +; LA32-LABEL: f: +; LA32: # %bb.0: +; LA32-NEXT: add.w $a1, $a1, $a3 +; LA32-NEXT: add.w $a2, $a0, $a2 +; LA32-NEXT: sltu $a0, $a2, $a0 +; LA32-NEXT: add.w $a1, $a1, $a0 +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: f: +; LA64: # %bb.0: +; LA64-NEXT: add.d $a0, $a0, $a1 +; LA64-NEXT: ret + %1 = add nsw i64 %a, %b + ret i64 %1 +} diff --git a/llvm/test/CodeGen/LoongArch/cpus-invalid.ll b/llvm/test/CodeGen/LoongArch/cpus-invalid.ll new file mode 100644 index 0000000000000000000000000000000000000000..b5435fb9050066cb5e01cd94ade3c446c1182657 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/cpus-invalid.ll @@ -0,0 +1,7 @@ +; RUN: llc < %s --mtriple=loongarch64 --mattr=+64bit --mcpu=invalidcpu 2>&1 | FileCheck %s + +; CHECK: {{.*}} is not a recognized processor for this target + +define void @f() { + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/cpus.ll b/llvm/test/CodeGen/LoongArch/cpus.ll new file mode 100644 index 0000000000000000000000000000000000000000..35945ae4de71fb5decca5c5516aecee09a8757f3 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/cpus.ll @@ -0,0 +1,20 @@ +;; This tests that llc accepts all valid LoongArch CPUs. +;; Note the 'generic' names have been tested in cpu-name-generic.ll. + +; RUN: llc < %s --mtriple=loongarch64 --mcpu=loongarch64 2>&1 | FileCheck %s +; RUN: llc < %s --mtriple=loongarch64 --mcpu=la464 2>&1 | FileCheck %s +; RUN: llc < %s --mtriple=loongarch64 2>&1 | FileCheck %s + +; CHECK-NOT: {{.*}} is not a recognized processor for this target + +define void @f() { + ret void +} + +define void @tune_cpu_loongarch64() "tune-cpu"="loongarch64" { + ret void +} + +define void @tune_cpu_la464() "tune-cpu"="la464" { + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll b/llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll new file mode 100644 index 0000000000000000000000000000000000000000..fa4fda9b8972bf18249121f0c307cf37402e9ced --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll @@ -0,0 +1,514 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefix=LA32 +; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64 + +declare i8 @llvm.ctlz.i8(i8, i1) +declare i16 @llvm.ctlz.i16(i16, i1) +declare i32 @llvm.ctlz.i32(i32, i1) +declare i64 @llvm.ctlz.i64(i64, i1) +declare i8 @llvm.ctpop.i8(i8) +declare i16 @llvm.ctpop.i16(i16) +declare i32 @llvm.ctpop.i32(i32) +declare i64 @llvm.ctpop.i64(i64) +declare i8 @llvm.cttz.i8(i8, i1) +declare i16 @llvm.cttz.i16(i16, i1) +declare i32 @llvm.cttz.i32(i32, i1) +declare i64 @llvm.cttz.i64(i64, i1) + +define i8 @test_ctlz_i8(i8 %a) nounwind { +; LA32-LABEL: test_ctlz_i8: +; LA32: # %bb.0: +; LA32-NEXT: andi $a0, $a0, 255 +; LA32-NEXT: clz.w $a0, $a0 +; LA32-NEXT: addi.w $a0, $a0, -24 +; LA32-NEXT: ret +; +; LA64-LABEL: test_ctlz_i8: +; LA64: # %bb.0: +; LA64-NEXT: andi $a0, $a0, 255 +; LA64-NEXT: clz.d $a0, $a0 +; LA64-NEXT: addi.d $a0, $a0, -56 +; LA64-NEXT: ret + %tmp = call i8 @llvm.ctlz.i8(i8 %a, i1 false) + ret i8 %tmp +} + +define i16 @test_ctlz_i16(i16 %a) nounwind { +; LA32-LABEL: test_ctlz_i16: +; LA32: # %bb.0: +; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 +; LA32-NEXT: clz.w $a0, $a0 +; LA32-NEXT: addi.w $a0, $a0, -16 +; LA32-NEXT: ret +; +; LA64-LABEL: test_ctlz_i16: +; LA64: # %bb.0: +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-NEXT: clz.d $a0, $a0 +; LA64-NEXT: addi.d $a0, $a0, -48 +; LA64-NEXT: ret + %tmp = call i16 @llvm.ctlz.i16(i16 %a, i1 false) + ret i16 %tmp +} + +define i32 @test_ctlz_i32(i32 %a) nounwind { +; LA32-LABEL: test_ctlz_i32: +; LA32: # %bb.0: +; LA32-NEXT: clz.w $a0, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: test_ctlz_i32: +; LA64: # %bb.0: +; LA64-NEXT: clz.w $a0, $a0 +; LA64-NEXT: ret + %tmp = call i32 @llvm.ctlz.i32(i32 %a, i1 false) + ret i32 %tmp +} + +define i64 @test_ctlz_i64(i64 %a) nounwind { +; LA32-LABEL: test_ctlz_i64: +; LA32: # %bb.0: +; LA32-NEXT: sltu $a2, $zero, $a1 +; LA32-NEXT: clz.w $a1, $a1 +; LA32-NEXT: maskeqz $a1, $a1, $a2 +; LA32-NEXT: clz.w $a0, $a0 +; LA32-NEXT: addi.w $a0, $a0, 32 +; LA32-NEXT: masknez $a0, $a0, $a2 +; LA32-NEXT: or $a0, $a1, $a0 +; LA32-NEXT: move $a1, $zero +; LA32-NEXT: ret +; +; LA64-LABEL: test_ctlz_i64: +; LA64: # %bb.0: +; LA64-NEXT: clz.d $a0, $a0 +; LA64-NEXT: ret + %tmp = call i64 @llvm.ctlz.i64(i64 %a, i1 false) + ret i64 %tmp +} + +define i8 @test_not_ctlz_i8(i8 %a) nounwind { +; LA32-LABEL: test_not_ctlz_i8: +; LA32: # %bb.0: +; LA32-NEXT: ori $a1, $zero, 255 +; LA32-NEXT: andn $a0, $a1, $a0 +; LA32-NEXT: clz.w $a0, $a0 +; LA32-NEXT: addi.w $a0, $a0, -24 +; LA32-NEXT: ret +; +; LA64-LABEL: test_not_ctlz_i8: +; LA64: # %bb.0: +; LA64-NEXT: ori $a1, $zero, 255 +; LA64-NEXT: andn $a0, $a1, $a0 +; LA64-NEXT: clz.d $a0, $a0 +; LA64-NEXT: addi.d $a0, $a0, -56 +; LA64-NEXT: ret + %neg = xor i8 %a, -1 + %tmp = call i8 @llvm.ctlz.i8(i8 %neg, i1 false) + ret i8 %tmp +} + +define i16 @test_not_ctlz_i16(i16 %a) nounwind { +; LA32-LABEL: test_not_ctlz_i16: +; LA32: # %bb.0: +; LA32-NEXT: nor $a0, $a0, $zero +; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 +; LA32-NEXT: clz.w $a0, $a0 +; LA32-NEXT: addi.w $a0, $a0, -16 +; LA32-NEXT: ret +; +; LA64-LABEL: test_not_ctlz_i16: +; LA64: # %bb.0: +; LA64-NEXT: nor $a0, $a0, $zero +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-NEXT: clz.d $a0, $a0 +; LA64-NEXT: addi.d $a0, $a0, -48 +; LA64-NEXT: ret + %neg = xor i16 %a, -1 + %tmp = call i16 @llvm.ctlz.i16(i16 %neg, i1 false) + ret i16 %tmp +} + +define i32 @test_not_ctlz_i32(i32 %a) nounwind { +; LA32-LABEL: test_not_ctlz_i32: +; LA32: # %bb.0: +; LA32-NEXT: clo.w $a0, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: test_not_ctlz_i32: +; LA64: # %bb.0: +; LA64-NEXT: clo.w $a0, $a0 +; LA64-NEXT: ret + %neg = xor i32 %a, -1 + %tmp = call i32 @llvm.ctlz.i32(i32 %neg, i1 false) + ret i32 %tmp +} + +define i64 @test_not_ctlz_i64(i64 %a) nounwind { +; LA32-LABEL: test_not_ctlz_i64: +; LA32: # %bb.0: +; LA32-NEXT: nor $a2, $a1, $zero +; LA32-NEXT: sltu $a2, $zero, $a2 +; LA32-NEXT: clo.w $a0, $a0 +; LA32-NEXT: addi.w $a0, $a0, 32 +; LA32-NEXT: masknez $a0, $a0, $a2 +; LA32-NEXT: clo.w $a1, $a1 +; LA32-NEXT: maskeqz $a1, $a1, $a2 +; LA32-NEXT: or $a0, $a1, $a0 +; LA32-NEXT: move $a1, $zero +; LA32-NEXT: ret +; +; LA64-LABEL: test_not_ctlz_i64: +; LA64: # %bb.0: +; LA64-NEXT: clo.d $a0, $a0 +; LA64-NEXT: ret + %neg = xor i64 %a, -1 + %tmp = call i64 @llvm.ctlz.i64(i64 %neg, i1 false) + ret i64 %tmp +} + +define i8 @test_ctpop_i8(i8 %a) nounwind { +; LA32-LABEL: test_ctpop_i8: +; LA32: # %bb.0: +; LA32-NEXT: srli.w $a1, $a0, 1 +; LA32-NEXT: andi $a1, $a1, 85 +; LA32-NEXT: sub.w $a0, $a0, $a1 +; LA32-NEXT: andi $a1, $a0, 51 +; LA32-NEXT: srli.w $a0, $a0, 2 +; LA32-NEXT: andi $a0, $a0, 51 +; LA32-NEXT: add.w $a0, $a1, $a0 +; LA32-NEXT: srli.w $a1, $a0, 4 +; LA32-NEXT: add.w $a0, $a0, $a1 +; LA32-NEXT: andi $a0, $a0, 15 +; LA32-NEXT: ret +; +; LA64-LABEL: test_ctpop_i8: +; LA64: # %bb.0: +; LA64-NEXT: srli.d $a1, $a0, 1 +; LA64-NEXT: andi $a1, $a1, 85 +; LA64-NEXT: sub.d $a0, $a0, $a1 +; LA64-NEXT: andi $a1, $a0, 51 +; LA64-NEXT: srli.d $a0, $a0, 2 +; LA64-NEXT: andi $a0, $a0, 51 +; LA64-NEXT: add.d $a0, $a1, $a0 +; LA64-NEXT: srli.d $a1, $a0, 4 +; LA64-NEXT: add.d $a0, $a0, $a1 +; LA64-NEXT: andi $a0, $a0, 15 +; LA64-NEXT: ret + %1 = call i8 @llvm.ctpop.i8(i8 %a) + ret i8 %1 +} + +define i16 @test_ctpop_i16(i16 %a) nounwind { +; LA32-LABEL: test_ctpop_i16: +; LA32: # %bb.0: +; LA32-NEXT: lu12i.w $a1, 5 +; LA32-NEXT: ori $a1, $a1, 1365 +; LA32-NEXT: srli.w $a2, $a0, 1 +; LA32-NEXT: and $a1, $a2, $a1 +; LA32-NEXT: sub.w $a0, $a0, $a1 +; LA32-NEXT: lu12i.w $a1, 3 +; LA32-NEXT: ori $a1, $a1, 819 +; LA32-NEXT: and $a2, $a0, $a1 +; LA32-NEXT: srli.w $a0, $a0, 2 +; LA32-NEXT: and $a0, $a0, $a1 +; LA32-NEXT: add.w $a0, $a2, $a0 +; LA32-NEXT: srli.w $a1, $a0, 4 +; LA32-NEXT: add.w $a0, $a0, $a1 +; LA32-NEXT: bstrpick.w $a1, $a0, 11, 8 +; LA32-NEXT: andi $a0, $a0, 15 +; LA32-NEXT: add.w $a0, $a0, $a1 +; LA32-NEXT: ret +; +; LA64-LABEL: test_ctpop_i16: +; LA64: # %bb.0: +; LA64-NEXT: lu12i.w $a1, 5 +; LA64-NEXT: ori $a1, $a1, 1365 +; LA64-NEXT: srli.d $a2, $a0, 1 +; LA64-NEXT: and $a1, $a2, $a1 +; LA64-NEXT: sub.d $a0, $a0, $a1 +; LA64-NEXT: lu12i.w $a1, 3 +; LA64-NEXT: ori $a1, $a1, 819 +; LA64-NEXT: and $a2, $a0, $a1 +; LA64-NEXT: srli.d $a0, $a0, 2 +; LA64-NEXT: and $a0, $a0, $a1 +; LA64-NEXT: add.d $a0, $a2, $a0 +; LA64-NEXT: srli.d $a1, $a0, 4 +; LA64-NEXT: add.d $a0, $a0, $a1 +; LA64-NEXT: bstrpick.d $a1, $a0, 11, 8 +; LA64-NEXT: andi $a0, $a0, 15 +; LA64-NEXT: add.d $a0, $a0, $a1 +; LA64-NEXT: ret + %1 = call i16 @llvm.ctpop.i16(i16 %a) + ret i16 %1 +} + +define i32 @test_ctpop_i32(i32 %a) nounwind { +; LA32-LABEL: test_ctpop_i32: +; LA32: # %bb.0: +; LA32-NEXT: lu12i.w $a1, 349525 +; LA32-NEXT: ori $a1, $a1, 1365 +; LA32-NEXT: srli.w $a2, $a0, 1 +; LA32-NEXT: and $a1, $a2, $a1 +; LA32-NEXT: sub.w $a0, $a0, $a1 +; LA32-NEXT: lu12i.w $a1, 209715 +; LA32-NEXT: ori $a1, $a1, 819 +; LA32-NEXT: and $a2, $a0, $a1 +; LA32-NEXT: srli.w $a0, $a0, 2 +; LA32-NEXT: and $a0, $a0, $a1 +; LA32-NEXT: add.w $a0, $a2, $a0 +; LA32-NEXT: srli.w $a1, $a0, 4 +; LA32-NEXT: add.w $a0, $a0, $a1 +; LA32-NEXT: lu12i.w $a1, 61680 +; LA32-NEXT: ori $a1, $a1, 3855 +; LA32-NEXT: and $a0, $a0, $a1 +; LA32-NEXT: lu12i.w $a1, 4112 +; LA32-NEXT: ori $a1, $a1, 257 +; LA32-NEXT: mul.w $a0, $a0, $a1 +; LA32-NEXT: srli.w $a0, $a0, 24 +; LA32-NEXT: ret +; +; LA64-LABEL: test_ctpop_i32: +; LA64: # %bb.0: +; LA64-NEXT: lu12i.w $a1, 349525 +; LA64-NEXT: ori $a1, $a1, 1365 +; LA64-NEXT: srli.d $a2, $a0, 1 +; LA64-NEXT: and $a1, $a2, $a1 +; LA64-NEXT: sub.d $a0, $a0, $a1 +; LA64-NEXT: lu12i.w $a1, 209715 +; LA64-NEXT: ori $a1, $a1, 819 +; LA64-NEXT: and $a2, $a0, $a1 +; LA64-NEXT: srli.d $a0, $a0, 2 +; LA64-NEXT: and $a0, $a0, $a1 +; LA64-NEXT: add.d $a0, $a2, $a0 +; LA64-NEXT: srli.d $a1, $a0, 4 +; LA64-NEXT: add.d $a0, $a0, $a1 +; LA64-NEXT: lu12i.w $a1, 61680 +; LA64-NEXT: ori $a1, $a1, 3855 +; LA64-NEXT: and $a0, $a0, $a1 +; LA64-NEXT: lu12i.w $a1, 4112 +; LA64-NEXT: ori $a1, $a1, 257 +; LA64-NEXT: mul.d $a0, $a0, $a1 +; LA64-NEXT: bstrpick.d $a0, $a0, 31, 24 +; LA64-NEXT: ret + %1 = call i32 @llvm.ctpop.i32(i32 %a) + ret i32 %1 +} + +define i64 @test_ctpop_i64(i64 %a) nounwind { +; LA32-LABEL: test_ctpop_i64: +; LA32: # %bb.0: +; LA32-NEXT: lu12i.w $a2, 349525 +; LA32-NEXT: ori $a2, $a2, 1365 +; LA32-NEXT: srli.w $a3, $a0, 1 +; LA32-NEXT: and $a3, $a3, $a2 +; LA32-NEXT: sub.w $a0, $a0, $a3 +; LA32-NEXT: lu12i.w $a3, 209715 +; LA32-NEXT: ori $a3, $a3, 819 +; LA32-NEXT: and $a4, $a0, $a3 +; LA32-NEXT: srli.w $a0, $a0, 2 +; LA32-NEXT: and $a0, $a0, $a3 +; LA32-NEXT: add.w $a0, $a4, $a0 +; LA32-NEXT: srli.w $a4, $a1, 1 +; LA32-NEXT: and $a2, $a4, $a2 +; LA32-NEXT: sub.w $a1, $a1, $a2 +; LA32-NEXT: srli.w $a2, $a0, 4 +; LA32-NEXT: add.w $a0, $a0, $a2 +; LA32-NEXT: and $a2, $a1, $a3 +; LA32-NEXT: srli.w $a1, $a1, 2 +; LA32-NEXT: and $a1, $a1, $a3 +; LA32-NEXT: add.w $a1, $a2, $a1 +; LA32-NEXT: srli.w $a2, $a1, 4 +; LA32-NEXT: add.w $a1, $a1, $a2 +; LA32-NEXT: lu12i.w $a2, 61680 +; LA32-NEXT: ori $a2, $a2, 3855 +; LA32-NEXT: and $a1, $a1, $a2 +; LA32-NEXT: and $a0, $a0, $a2 +; LA32-NEXT: lu12i.w $a2, 4112 +; LA32-NEXT: ori $a2, $a2, 257 +; LA32-NEXT: mul.w $a0, $a0, $a2 +; LA32-NEXT: mul.w $a1, $a1, $a2 +; LA32-NEXT: srli.w $a1, $a1, 24 +; LA32-NEXT: srli.w $a0, $a0, 24 +; LA32-NEXT: add.w $a0, $a0, $a1 +; LA32-NEXT: move $a1, $zero +; LA32-NEXT: ret +; +; LA64-LABEL: test_ctpop_i64: +; LA64: # %bb.0: +; LA64-NEXT: lu12i.w $a1, 349525 +; LA64-NEXT: ori $a1, $a1, 1365 +; LA64-NEXT: lu32i.d $a1, 349525 +; LA64-NEXT: lu52i.d $a1, $a1, 1365 +; LA64-NEXT: srli.d $a2, $a0, 1 +; LA64-NEXT: and $a1, $a2, $a1 +; LA64-NEXT: sub.d $a0, $a0, $a1 +; LA64-NEXT: lu12i.w $a1, 209715 +; LA64-NEXT: ori $a1, $a1, 819 +; LA64-NEXT: lu32i.d $a1, 209715 +; LA64-NEXT: lu52i.d $a1, $a1, 819 +; LA64-NEXT: and $a2, $a0, $a1 +; LA64-NEXT: srli.d $a0, $a0, 2 +; LA64-NEXT: and $a0, $a0, $a1 +; LA64-NEXT: add.d $a0, $a2, $a0 +; LA64-NEXT: srli.d $a1, $a0, 4 +; LA64-NEXT: add.d $a0, $a0, $a1 +; LA64-NEXT: lu12i.w $a1, 61680 +; LA64-NEXT: ori $a1, $a1, 3855 +; LA64-NEXT: lu32i.d $a1, -61681 +; LA64-NEXT: lu52i.d $a1, $a1, 240 +; LA64-NEXT: and $a0, $a0, $a1 +; LA64-NEXT: lu12i.w $a1, 4112 +; LA64-NEXT: ori $a1, $a1, 257 +; LA64-NEXT: lu32i.d $a1, 65793 +; LA64-NEXT: lu52i.d $a1, $a1, 16 +; LA64-NEXT: mul.d $a0, $a0, $a1 +; LA64-NEXT: srli.d $a0, $a0, 56 +; LA64-NEXT: ret + %1 = call i64 @llvm.ctpop.i64(i64 %a) + ret i64 %1 +} + +define i8 @test_cttz_i8(i8 %a) nounwind { +; LA32-LABEL: test_cttz_i8: +; LA32: # %bb.0: +; LA32-NEXT: ori $a0, $a0, 256 +; LA32-NEXT: ctz.w $a0, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: test_cttz_i8: +; LA64: # %bb.0: +; LA64-NEXT: ori $a0, $a0, 256 +; LA64-NEXT: ctz.d $a0, $a0 +; LA64-NEXT: ret + %tmp = call i8 @llvm.cttz.i8(i8 %a, i1 false) + ret i8 %tmp +} + +define i16 @test_cttz_i16(i16 %a) nounwind { +; LA32-LABEL: test_cttz_i16: +; LA32: # %bb.0: +; LA32-NEXT: lu12i.w $a1, 16 +; LA32-NEXT: or $a0, $a0, $a1 +; LA32-NEXT: ctz.w $a0, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: test_cttz_i16: +; LA64: # %bb.0: +; LA64-NEXT: lu12i.w $a1, 16 +; LA64-NEXT: or $a0, $a0, $a1 +; LA64-NEXT: ctz.d $a0, $a0 +; LA64-NEXT: ret + %tmp = call i16 @llvm.cttz.i16(i16 %a, i1 false) + ret i16 %tmp +} + +define i32 @test_cttz_i32(i32 %a) nounwind { +; LA32-LABEL: test_cttz_i32: +; LA32: # %bb.0: +; LA32-NEXT: ctz.w $a0, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: test_cttz_i32: +; LA64: # %bb.0: +; LA64-NEXT: ctz.w $a0, $a0 +; LA64-NEXT: ret + %tmp = call i32 @llvm.cttz.i32(i32 %a, i1 false) + ret i32 %tmp +} + +define i64 @test_cttz_i64(i64 %a) nounwind { +; LA32-LABEL: test_cttz_i64: +; LA32: # %bb.0: +; LA32-NEXT: sltu $a2, $zero, $a0 +; LA32-NEXT: ctz.w $a0, $a0 +; LA32-NEXT: maskeqz $a0, $a0, $a2 +; LA32-NEXT: ctz.w $a1, $a1 +; LA32-NEXT: addi.w $a1, $a1, 32 +; LA32-NEXT: masknez $a1, $a1, $a2 +; LA32-NEXT: or $a0, $a0, $a1 +; LA32-NEXT: move $a1, $zero +; LA32-NEXT: ret +; +; LA64-LABEL: test_cttz_i64: +; LA64: # %bb.0: +; LA64-NEXT: ctz.d $a0, $a0 +; LA64-NEXT: ret + %tmp = call i64 @llvm.cttz.i64(i64 %a, i1 false) + ret i64 %tmp +} + +define i8 @test_not_cttz_i8(i8 %a) nounwind { +; LA32-LABEL: test_not_cttz_i8: +; LA32: # %bb.0: +; LA32-NEXT: ori $a1, $zero, 256 +; LA32-NEXT: orn $a0, $a1, $a0 +; LA32-NEXT: ctz.w $a0, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: test_not_cttz_i8: +; LA64: # %bb.0: +; LA64-NEXT: ori $a1, $zero, 256 +; LA64-NEXT: orn $a0, $a1, $a0 +; LA64-NEXT: ctz.d $a0, $a0 +; LA64-NEXT: ret + %neg = xor i8 %a, -1 + %tmp = call i8 @llvm.cttz.i8(i8 %neg, i1 false) + ret i8 %tmp +} + +define i16 @test_not_cttz_i16(i16 %a) nounwind { +; LA32-LABEL: test_not_cttz_i16: +; LA32: # %bb.0: +; LA32-NEXT: lu12i.w $a1, 16 +; LA32-NEXT: orn $a0, $a1, $a0 +; LA32-NEXT: ctz.w $a0, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: test_not_cttz_i16: +; LA64: # %bb.0: +; LA64-NEXT: lu12i.w $a1, 16 +; LA64-NEXT: orn $a0, $a1, $a0 +; LA64-NEXT: ctz.d $a0, $a0 +; LA64-NEXT: ret + %neg = xor i16 %a, -1 + %tmp = call i16 @llvm.cttz.i16(i16 %neg, i1 false) + ret i16 %tmp +} + +define i32 @test_not_cttz_i32(i32 %a) nounwind { +; LA32-LABEL: test_not_cttz_i32: +; LA32: # %bb.0: +; LA32-NEXT: cto.w $a0, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: test_not_cttz_i32: +; LA64: # %bb.0: +; LA64-NEXT: cto.w $a0, $a0 +; LA64-NEXT: ret + %neg = xor i32 %a, -1 + %tmp = call i32 @llvm.cttz.i32(i32 %neg, i1 false) + ret i32 %tmp +} + +define i64 @test_not_cttz_i64(i64 %a) nounwind { +; LA32-LABEL: test_not_cttz_i64: +; LA32: # %bb.0: +; LA32-NEXT: nor $a2, $a0, $zero +; LA32-NEXT: sltu $a2, $zero, $a2 +; LA32-NEXT: cto.w $a1, $a1 +; LA32-NEXT: addi.w $a1, $a1, 32 +; LA32-NEXT: masknez $a1, $a1, $a2 +; LA32-NEXT: cto.w $a0, $a0 +; LA32-NEXT: maskeqz $a0, $a0, $a2 +; LA32-NEXT: or $a0, $a0, $a1 +; LA32-NEXT: move $a1, $zero +; LA32-NEXT: ret +; +; LA64-LABEL: test_not_cttz_i64: +; LA64: # %bb.0: +; LA64-NEXT: cto.d $a0, $a0 +; LA64-NEXT: ret + %neg = xor i64 %a, -1 + %tmp = call i64 @llvm.cttz.i64(i64 %neg, i1 false) + ret i64 %tmp +} diff --git a/llvm/test/CodeGen/LoongArch/double-br-fcmp.ll b/llvm/test/CodeGen/LoongArch/double-br-fcmp.ll new file mode 100644 index 0000000000000000000000000000000000000000..8006b67227b6e0f93bd0b10c7100d3023afd1f5b --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/double-br-fcmp.ll @@ -0,0 +1,985 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+d < %s | FileCheck %s --check-prefix=LA32 +; RUN: llc --mtriple=loongarch64 --mattr=+d < %s | FileCheck %s --check-prefix=LA64 + +declare void @abort() + +define void @br_fcmp_oeq_bcnez(double %a, double %b) nounwind { +; LA32-LABEL: br_fcmp_oeq_bcnez: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: fcmp.ceq.d $fcc0, $fa0, $fa1 +; LA32-NEXT: bcnez $fcc0, .LBB0_2 +; LA32-NEXT: # %bb.1: # %if.else +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; LA32-NEXT: .LBB0_2: # %if.then +; LA32-NEXT: bl %plt(abort) +; +; LA64-LABEL: br_fcmp_oeq_bcnez: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: fcmp.ceq.d $fcc0, $fa0, $fa1 +; LA64-NEXT: bcnez $fcc0, .LBB0_2 +; LA64-NEXT: # %bb.1: # %if.else +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +; LA64-NEXT: .LBB0_2: # %if.then +; LA64-NEXT: bl %plt(abort) + %1 = fcmp oeq double %a, %b + br i1 %1, label %if.then, label %if.else +if.else: + ret void +if.then: + tail call void @abort() + unreachable +} + +define void @br_fcmp_oeq_bceqz(double %a, double %b) nounwind { +; LA32-LABEL: br_fcmp_oeq_bceqz: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: fcmp.cune.d $fcc0, $fa0, $fa1 +; LA32-NEXT: bceqz $fcc0, .LBB1_2 +; LA32-NEXT: # %bb.1: # %if.else +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; LA32-NEXT: .LBB1_2: # %if.then +; LA32-NEXT: bl %plt(abort) +; +; LA64-LABEL: br_fcmp_oeq_bceqz: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: fcmp.cune.d $fcc0, $fa0, $fa1 +; LA64-NEXT: bceqz $fcc0, .LBB1_2 +; LA64-NEXT: # %bb.1: # %if.else +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +; LA64-NEXT: .LBB1_2: # %if.then +; LA64-NEXT: bl %plt(abort) + %1 = fcmp oeq double %a, %b + br i1 %1, label %if.then, label %if.else +if.then: + tail call void @abort() + unreachable +if.else: + ret void +} + +define void @br_fcmp_ogt_bcnez(double %a, double %b) nounwind { +; LA32-LABEL: br_fcmp_ogt_bcnez: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: fcmp.clt.d $fcc0, $fa1, $fa0 +; LA32-NEXT: bcnez $fcc0, .LBB2_2 +; LA32-NEXT: # %bb.1: # %if.else +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; LA32-NEXT: .LBB2_2: # %if.then +; LA32-NEXT: bl %plt(abort) +; +; LA64-LABEL: br_fcmp_ogt_bcnez: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: fcmp.clt.d $fcc0, $fa1, $fa0 +; LA64-NEXT: bcnez $fcc0, .LBB2_2 +; LA64-NEXT: # %bb.1: # %if.else +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +; LA64-NEXT: .LBB2_2: # %if.then +; LA64-NEXT: bl %plt(abort) + %1 = fcmp ogt double %a, %b + br i1 %1, label %if.then, label %if.else +if.else: + ret void +if.then: + tail call void @abort() + unreachable +} + +define void @br_fcmp_ogt_bceqz(double %a, double %b) nounwind { +; LA32-LABEL: br_fcmp_ogt_bceqz: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: fcmp.cule.d $fcc0, $fa0, $fa1 +; LA32-NEXT: bceqz $fcc0, .LBB3_2 +; LA32-NEXT: # %bb.1: # %if.else +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; LA32-NEXT: .LBB3_2: # %if.then +; LA32-NEXT: bl %plt(abort) +; +; LA64-LABEL: br_fcmp_ogt_bceqz: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: fcmp.cule.d $fcc0, $fa0, $fa1 +; LA64-NEXT: bceqz $fcc0, .LBB3_2 +; LA64-NEXT: # %bb.1: # %if.else +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +; LA64-NEXT: .LBB3_2: # %if.then +; LA64-NEXT: bl %plt(abort) + %1 = fcmp ogt double %a, %b + br i1 %1, label %if.then, label %if.else +if.then: + tail call void @abort() + unreachable +if.else: + ret void +} + +define void @br_fcmp_oge_bcnez(double %a, double %b) nounwind { +; LA32-LABEL: br_fcmp_oge_bcnez: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: fcmp.cle.d $fcc0, $fa1, $fa0 +; LA32-NEXT: bcnez $fcc0, .LBB4_2 +; LA32-NEXT: # %bb.1: # %if.else +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; LA32-NEXT: .LBB4_2: # %if.then +; LA32-NEXT: bl %plt(abort) +; +; LA64-LABEL: br_fcmp_oge_bcnez: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: fcmp.cle.d $fcc0, $fa1, $fa0 +; LA64-NEXT: bcnez $fcc0, .LBB4_2 +; LA64-NEXT: # %bb.1: # %if.else +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +; LA64-NEXT: .LBB4_2: # %if.then +; LA64-NEXT: bl %plt(abort) + %1 = fcmp oge double %a, %b + br i1 %1, label %if.then, label %if.else +if.else: + ret void +if.then: + tail call void @abort() + unreachable +} + +define void @br_fcmp_oge_bceqz(double %a, double %b) nounwind { +; LA32-LABEL: br_fcmp_oge_bceqz: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: fcmp.cult.d $fcc0, $fa0, $fa1 +; LA32-NEXT: bceqz $fcc0, .LBB5_2 +; LA32-NEXT: # %bb.1: # %if.else +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; LA32-NEXT: .LBB5_2: # %if.then +; LA32-NEXT: bl %plt(abort) +; +; LA64-LABEL: br_fcmp_oge_bceqz: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: fcmp.cult.d $fcc0, $fa0, $fa1 +; LA64-NEXT: bceqz $fcc0, .LBB5_2 +; LA64-NEXT: # %bb.1: # %if.else +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +; LA64-NEXT: .LBB5_2: # %if.then +; LA64-NEXT: bl %plt(abort) + %1 = fcmp oge double %a, %b + br i1 %1, label %if.then, label %if.else +if.then: + tail call void @abort() + unreachable +if.else: + ret void +} + +define void @br_fcmp_olt_bcnez(double %a, double %b) nounwind { +; LA32-LABEL: br_fcmp_olt_bcnez: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: fcmp.clt.d $fcc0, $fa0, $fa1 +; LA32-NEXT: bcnez $fcc0, .LBB6_2 +; LA32-NEXT: # %bb.1: # %if.else +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; LA32-NEXT: .LBB6_2: # %if.then +; LA32-NEXT: bl %plt(abort) +; +; LA64-LABEL: br_fcmp_olt_bcnez: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: fcmp.clt.d $fcc0, $fa0, $fa1 +; LA64-NEXT: bcnez $fcc0, .LBB6_2 +; LA64-NEXT: # %bb.1: # %if.else +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +; LA64-NEXT: .LBB6_2: # %if.then +; LA64-NEXT: bl %plt(abort) + %1 = fcmp olt double %a, %b + br i1 %1, label %if.then, label %if.else +if.else: + ret void +if.then: + tail call void @abort() + unreachable +} + +define void @br_fcmp_olt_bceqz(double %a, double %b) nounwind { +; LA32-LABEL: br_fcmp_olt_bceqz: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: fcmp.cule.d $fcc0, $fa1, $fa0 +; LA32-NEXT: bceqz $fcc0, .LBB7_2 +; LA32-NEXT: # %bb.1: # %if.else +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; LA32-NEXT: .LBB7_2: # %if.then +; LA32-NEXT: bl %plt(abort) +; +; LA64-LABEL: br_fcmp_olt_bceqz: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: fcmp.cule.d $fcc0, $fa1, $fa0 +; LA64-NEXT: bceqz $fcc0, .LBB7_2 +; LA64-NEXT: # %bb.1: # %if.else +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +; LA64-NEXT: .LBB7_2: # %if.then +; LA64-NEXT: bl %plt(abort) + %1 = fcmp olt double %a, %b + br i1 %1, label %if.then, label %if.else +if.then: + tail call void @abort() + unreachable +if.else: + ret void +} + +define void @br_fcmp_ole_bcnez(double %a, double %b) nounwind { +; LA32-LABEL: br_fcmp_ole_bcnez: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: fcmp.cle.d $fcc0, $fa0, $fa1 +; LA32-NEXT: bcnez $fcc0, .LBB8_2 +; LA32-NEXT: # %bb.1: # %if.else +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; LA32-NEXT: .LBB8_2: # %if.then +; LA32-NEXT: bl %plt(abort) +; +; LA64-LABEL: br_fcmp_ole_bcnez: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: fcmp.cle.d $fcc0, $fa0, $fa1 +; LA64-NEXT: bcnez $fcc0, .LBB8_2 +; LA64-NEXT: # %bb.1: # %if.else +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +; LA64-NEXT: .LBB8_2: # %if.then +; LA64-NEXT: bl %plt(abort) + %1 = fcmp ole double %a, %b + br i1 %1, label %if.then, label %if.else +if.else: + ret void +if.then: + tail call void @abort() + unreachable +} + +define void @br_fcmp_ole_bceqz(double %a, double %b) nounwind { +; LA32-LABEL: br_fcmp_ole_bceqz: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: fcmp.cult.d $fcc0, $fa1, $fa0 +; LA32-NEXT: bceqz $fcc0, .LBB9_2 +; LA32-NEXT: # %bb.1: # %if.else +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; LA32-NEXT: .LBB9_2: # %if.then +; LA32-NEXT: bl %plt(abort) +; +; LA64-LABEL: br_fcmp_ole_bceqz: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: fcmp.cult.d $fcc0, $fa1, $fa0 +; LA64-NEXT: bceqz $fcc0, .LBB9_2 +; LA64-NEXT: # %bb.1: # %if.else +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +; LA64-NEXT: .LBB9_2: # %if.then +; LA64-NEXT: bl %plt(abort) + %1 = fcmp ole double %a, %b + br i1 %1, label %if.then, label %if.else +if.then: + tail call void @abort() + unreachable +if.else: + ret void +} + +define void @br_fcmp_one_bcnez(double %a, double %b) nounwind { +; LA32-LABEL: br_fcmp_one_bcnez: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: fcmp.cne.d $fcc0, $fa0, $fa1 +; LA32-NEXT: bcnez $fcc0, .LBB10_2 +; LA32-NEXT: # %bb.1: # %if.else +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; LA32-NEXT: .LBB10_2: # %if.then +; LA32-NEXT: bl %plt(abort) +; +; LA64-LABEL: br_fcmp_one_bcnez: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: fcmp.cne.d $fcc0, $fa0, $fa1 +; LA64-NEXT: bcnez $fcc0, .LBB10_2 +; LA64-NEXT: # %bb.1: # %if.else +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +; LA64-NEXT: .LBB10_2: # %if.then +; LA64-NEXT: bl %plt(abort) + %1 = fcmp one double %a, %b + br i1 %1, label %if.then, label %if.else +if.else: + ret void +if.then: + tail call void @abort() + unreachable +} + +define void @br_fcmp_one_bceqz(double %a, double %b) nounwind { +; LA32-LABEL: br_fcmp_one_bceqz: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: fcmp.cueq.d $fcc0, $fa0, $fa1 +; LA32-NEXT: bceqz $fcc0, .LBB11_2 +; LA32-NEXT: # %bb.1: # %if.else +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; LA32-NEXT: .LBB11_2: # %if.then +; LA32-NEXT: bl %plt(abort) +; +; LA64-LABEL: br_fcmp_one_bceqz: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: fcmp.cueq.d $fcc0, $fa0, $fa1 +; LA64-NEXT: bceqz $fcc0, .LBB11_2 +; LA64-NEXT: # %bb.1: # %if.else +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +; LA64-NEXT: .LBB11_2: # %if.then +; LA64-NEXT: bl %plt(abort) + %1 = fcmp one double %a, %b + br i1 %1, label %if.then, label %if.else +if.then: + tail call void @abort() + unreachable +if.else: + ret void +} + +define void @br_fcmp_ord_bcnez(double %a, double %b) nounwind { +; LA32-LABEL: br_fcmp_ord_bcnez: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: fcmp.cor.d $fcc0, $fa0, $fa1 +; LA32-NEXT: bcnez $fcc0, .LBB12_2 +; LA32-NEXT: # %bb.1: # %if.else +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; LA32-NEXT: .LBB12_2: # %if.then +; LA32-NEXT: bl %plt(abort) +; +; LA64-LABEL: br_fcmp_ord_bcnez: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: fcmp.cor.d $fcc0, $fa0, $fa1 +; LA64-NEXT: bcnez $fcc0, .LBB12_2 +; LA64-NEXT: # %bb.1: # %if.else +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +; LA64-NEXT: .LBB12_2: # %if.then +; LA64-NEXT: bl %plt(abort) + %1 = fcmp ord double %a, %b + br i1 %1, label %if.then, label %if.else +if.else: + ret void +if.then: + tail call void @abort() + unreachable +} + +define void @br_fcmp_ord_bceqz(double %a, double %b) nounwind { +; LA32-LABEL: br_fcmp_ord_bceqz: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: fcmp.cun.d $fcc0, $fa0, $fa1 +; LA32-NEXT: bceqz $fcc0, .LBB13_2 +; LA32-NEXT: # %bb.1: # %if.else +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; LA32-NEXT: .LBB13_2: # %if.then +; LA32-NEXT: bl %plt(abort) +; +; LA64-LABEL: br_fcmp_ord_bceqz: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: fcmp.cun.d $fcc0, $fa0, $fa1 +; LA64-NEXT: bceqz $fcc0, .LBB13_2 +; LA64-NEXT: # %bb.1: # %if.else +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +; LA64-NEXT: .LBB13_2: # %if.then +; LA64-NEXT: bl %plt(abort) + %1 = fcmp ord double %a, %b + br i1 %1, label %if.then, label %if.else +if.then: + tail call void @abort() + unreachable +if.else: + ret void +} + +define void @br_fcmp_ueq_bcnez(double %a, double %b) nounwind { +; LA32-LABEL: br_fcmp_ueq_bcnez: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: fcmp.cueq.d $fcc0, $fa0, $fa1 +; LA32-NEXT: bcnez $fcc0, .LBB14_2 +; LA32-NEXT: # %bb.1: # %if.else +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; LA32-NEXT: .LBB14_2: # %if.then +; LA32-NEXT: bl %plt(abort) +; +; LA64-LABEL: br_fcmp_ueq_bcnez: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: fcmp.cueq.d $fcc0, $fa0, $fa1 +; LA64-NEXT: bcnez $fcc0, .LBB14_2 +; LA64-NEXT: # %bb.1: # %if.else +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +; LA64-NEXT: .LBB14_2: # %if.then +; LA64-NEXT: bl %plt(abort) + %1 = fcmp ueq double %a, %b + br i1 %1, label %if.then, label %if.else +if.else: + ret void +if.then: + tail call void @abort() + unreachable +} + +define void @br_fcmp_ueq_bceqz(double %a, double %b) nounwind { +; LA32-LABEL: br_fcmp_ueq_bceqz: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: fcmp.cne.d $fcc0, $fa0, $fa1 +; LA32-NEXT: bceqz $fcc0, .LBB15_2 +; LA32-NEXT: # %bb.1: # %if.else +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; LA32-NEXT: .LBB15_2: # %if.then +; LA32-NEXT: bl %plt(abort) +; +; LA64-LABEL: br_fcmp_ueq_bceqz: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: fcmp.cne.d $fcc0, $fa0, $fa1 +; LA64-NEXT: bceqz $fcc0, .LBB15_2 +; LA64-NEXT: # %bb.1: # %if.else +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +; LA64-NEXT: .LBB15_2: # %if.then +; LA64-NEXT: bl %plt(abort) + %1 = fcmp ueq double %a, %b + br i1 %1, label %if.then, label %if.else +if.then: + tail call void @abort() + unreachable +if.else: + ret void +} + +define void @br_fcmp_ugt_bcnez(double %a, double %b) nounwind { +; LA32-LABEL: br_fcmp_ugt_bcnez: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: fcmp.cult.d $fcc0, $fa1, $fa0 +; LA32-NEXT: bcnez $fcc0, .LBB16_2 +; LA32-NEXT: # %bb.1: # %if.else +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; LA32-NEXT: .LBB16_2: # %if.then +; LA32-NEXT: bl %plt(abort) +; +; LA64-LABEL: br_fcmp_ugt_bcnez: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: fcmp.cult.d $fcc0, $fa1, $fa0 +; LA64-NEXT: bcnez $fcc0, .LBB16_2 +; LA64-NEXT: # %bb.1: # %if.else +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +; LA64-NEXT: .LBB16_2: # %if.then +; LA64-NEXT: bl %plt(abort) + %1 = fcmp ugt double %a, %b + br i1 %1, label %if.then, label %if.else +if.else: + ret void +if.then: + tail call void @abort() + unreachable +} + +define void @br_fcmp_ugt_bceqz(double %a, double %b) nounwind { +; LA32-LABEL: br_fcmp_ugt_bceqz: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: fcmp.cle.d $fcc0, $fa0, $fa1 +; LA32-NEXT: bceqz $fcc0, .LBB17_2 +; LA32-NEXT: # %bb.1: # %if.else +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; LA32-NEXT: .LBB17_2: # %if.then +; LA32-NEXT: bl %plt(abort) +; +; LA64-LABEL: br_fcmp_ugt_bceqz: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: fcmp.cle.d $fcc0, $fa0, $fa1 +; LA64-NEXT: bceqz $fcc0, .LBB17_2 +; LA64-NEXT: # %bb.1: # %if.else +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +; LA64-NEXT: .LBB17_2: # %if.then +; LA64-NEXT: bl %plt(abort) + %1 = fcmp ugt double %a, %b + br i1 %1, label %if.then, label %if.else +if.then: + tail call void @abort() + unreachable +if.else: + ret void +} + +define void @br_fcmp_uge_bcnez(double %a, double %b) nounwind { +; LA32-LABEL: br_fcmp_uge_bcnez: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: fcmp.cule.d $fcc0, $fa1, $fa0 +; LA32-NEXT: bcnez $fcc0, .LBB18_2 +; LA32-NEXT: # %bb.1: # %if.else +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; LA32-NEXT: .LBB18_2: # %if.then +; LA32-NEXT: bl %plt(abort) +; +; LA64-LABEL: br_fcmp_uge_bcnez: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: fcmp.cule.d $fcc0, $fa1, $fa0 +; LA64-NEXT: bcnez $fcc0, .LBB18_2 +; LA64-NEXT: # %bb.1: # %if.else +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +; LA64-NEXT: .LBB18_2: # %if.then +; LA64-NEXT: bl %plt(abort) + %1 = fcmp uge double %a, %b + br i1 %1, label %if.then, label %if.else +if.else: + ret void +if.then: + tail call void @abort() + unreachable +} + +define void @br_fcmp_uge_bceqz(double %a, double %b) nounwind { +; LA32-LABEL: br_fcmp_uge_bceqz: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: fcmp.clt.d $fcc0, $fa0, $fa1 +; LA32-NEXT: bceqz $fcc0, .LBB19_2 +; LA32-NEXT: # %bb.1: # %if.else +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; LA32-NEXT: .LBB19_2: # %if.then +; LA32-NEXT: bl %plt(abort) +; +; LA64-LABEL: br_fcmp_uge_bceqz: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: fcmp.clt.d $fcc0, $fa0, $fa1 +; LA64-NEXT: bceqz $fcc0, .LBB19_2 +; LA64-NEXT: # %bb.1: # %if.else +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +; LA64-NEXT: .LBB19_2: # %if.then +; LA64-NEXT: bl %plt(abort) + %1 = fcmp uge double %a, %b + br i1 %1, label %if.then, label %if.else +if.then: + tail call void @abort() + unreachable +if.else: + ret void +} + +define void @br_fcmp_ult_bcnez(double %a, double %b) nounwind { +; LA32-LABEL: br_fcmp_ult_bcnez: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: fcmp.cult.d $fcc0, $fa0, $fa1 +; LA32-NEXT: bcnez $fcc0, .LBB20_2 +; LA32-NEXT: # %bb.1: # %if.else +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; LA32-NEXT: .LBB20_2: # %if.then +; LA32-NEXT: bl %plt(abort) +; +; LA64-LABEL: br_fcmp_ult_bcnez: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: fcmp.cult.d $fcc0, $fa0, $fa1 +; LA64-NEXT: bcnez $fcc0, .LBB20_2 +; LA64-NEXT: # %bb.1: # %if.else +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +; LA64-NEXT: .LBB20_2: # %if.then +; LA64-NEXT: bl %plt(abort) + %1 = fcmp ult double %a, %b + br i1 %1, label %if.then, label %if.else +if.else: + ret void +if.then: + tail call void @abort() + unreachable +} + +define void @br_fcmp_ult_bceqz(double %a, double %b) nounwind { +; LA32-LABEL: br_fcmp_ult_bceqz: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: fcmp.cle.d $fcc0, $fa1, $fa0 +; LA32-NEXT: bceqz $fcc0, .LBB21_2 +; LA32-NEXT: # %bb.1: # %if.else +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; LA32-NEXT: .LBB21_2: # %if.then +; LA32-NEXT: bl %plt(abort) +; +; LA64-LABEL: br_fcmp_ult_bceqz: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: fcmp.cle.d $fcc0, $fa1, $fa0 +; LA64-NEXT: bceqz $fcc0, .LBB21_2 +; LA64-NEXT: # %bb.1: # %if.else +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +; LA64-NEXT: .LBB21_2: # %if.then +; LA64-NEXT: bl %plt(abort) + %1 = fcmp ult double %a, %b + br i1 %1, label %if.then, label %if.else +if.then: + tail call void @abort() + unreachable +if.else: + ret void +} + +define void @br_fcmp_ule_bcnez(double %a, double %b) nounwind { +; LA32-LABEL: br_fcmp_ule_bcnez: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: fcmp.cule.d $fcc0, $fa0, $fa1 +; LA32-NEXT: bcnez $fcc0, .LBB22_2 +; LA32-NEXT: # %bb.1: # %if.else +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; LA32-NEXT: .LBB22_2: # %if.then +; LA32-NEXT: bl %plt(abort) +; +; LA64-LABEL: br_fcmp_ule_bcnez: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: fcmp.cule.d $fcc0, $fa0, $fa1 +; LA64-NEXT: bcnez $fcc0, .LBB22_2 +; LA64-NEXT: # %bb.1: # %if.else +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +; LA64-NEXT: .LBB22_2: # %if.then +; LA64-NEXT: bl %plt(abort) + %1 = fcmp ule double %a, %b + br i1 %1, label %if.then, label %if.else +if.else: + ret void +if.then: + tail call void @abort() + unreachable +} + +define void @br_fcmp_ule_bceqz(double %a, double %b) nounwind { +; LA32-LABEL: br_fcmp_ule_bceqz: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: fcmp.clt.d $fcc0, $fa1, $fa0 +; LA32-NEXT: bceqz $fcc0, .LBB23_2 +; LA32-NEXT: # %bb.1: # %if.else +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; LA32-NEXT: .LBB23_2: # %if.then +; LA32-NEXT: bl %plt(abort) +; +; LA64-LABEL: br_fcmp_ule_bceqz: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: fcmp.clt.d $fcc0, $fa1, $fa0 +; LA64-NEXT: bceqz $fcc0, .LBB23_2 +; LA64-NEXT: # %bb.1: # %if.else +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +; LA64-NEXT: .LBB23_2: # %if.then +; LA64-NEXT: bl %plt(abort) + %1 = fcmp ule double %a, %b + br i1 %1, label %if.then, label %if.else +if.then: + tail call void @abort() + unreachable +if.else: + ret void +} + +define void @br_fcmp_une_bcnez(double %a, double %b) nounwind { +; LA32-LABEL: br_fcmp_une_bcnez: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: fcmp.cune.d $fcc0, $fa0, $fa1 +; LA32-NEXT: bcnez $fcc0, .LBB24_2 +; LA32-NEXT: # %bb.1: # %if.else +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; LA32-NEXT: .LBB24_2: # %if.then +; LA32-NEXT: bl %plt(abort) +; +; LA64-LABEL: br_fcmp_une_bcnez: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: fcmp.cune.d $fcc0, $fa0, $fa1 +; LA64-NEXT: bcnez $fcc0, .LBB24_2 +; LA64-NEXT: # %bb.1: # %if.else +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +; LA64-NEXT: .LBB24_2: # %if.then +; LA64-NEXT: bl %plt(abort) + %1 = fcmp une double %a, %b + br i1 %1, label %if.then, label %if.else +if.else: + ret void +if.then: + tail call void @abort() + unreachable +} + +define void @br_fcmp_une_bceqz(double %a, double %b) nounwind { +; LA32-LABEL: br_fcmp_une_bceqz: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: fcmp.ceq.d $fcc0, $fa0, $fa1 +; LA32-NEXT: bceqz $fcc0, .LBB25_2 +; LA32-NEXT: # %bb.1: # %if.else +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; LA32-NEXT: .LBB25_2: # %if.then +; LA32-NEXT: bl %plt(abort) +; +; LA64-LABEL: br_fcmp_une_bceqz: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: fcmp.ceq.d $fcc0, $fa0, $fa1 +; LA64-NEXT: bceqz $fcc0, .LBB25_2 +; LA64-NEXT: # %bb.1: # %if.else +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +; LA64-NEXT: .LBB25_2: # %if.then +; LA64-NEXT: bl %plt(abort) + %1 = fcmp une double %a, %b + br i1 %1, label %if.then, label %if.else +if.then: + tail call void @abort() + unreachable +if.else: + ret void +} + +define void @br_fcmp_uno_bcnez(double %a, double %b) nounwind { +; LA32-LABEL: br_fcmp_uno_bcnez: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: fcmp.cun.d $fcc0, $fa0, $fa1 +; LA32-NEXT: bcnez $fcc0, .LBB26_2 +; LA32-NEXT: # %bb.1: # %if.else +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; LA32-NEXT: .LBB26_2: # %if.then +; LA32-NEXT: bl %plt(abort) +; +; LA64-LABEL: br_fcmp_uno_bcnez: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: fcmp.cun.d $fcc0, $fa0, $fa1 +; LA64-NEXT: bcnez $fcc0, .LBB26_2 +; LA64-NEXT: # %bb.1: # %if.else +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +; LA64-NEXT: .LBB26_2: # %if.then +; LA64-NEXT: bl %plt(abort) + %1 = fcmp uno double %a, %b + br i1 %1, label %if.then, label %if.else +if.else: + ret void +if.then: + tail call void @abort() + unreachable +} + +define void @br_fcmp_uno_bceqz(double %a, double %b) nounwind { +; LA32-LABEL: br_fcmp_uno_bceqz: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: fcmp.cor.d $fcc0, $fa0, $fa1 +; LA32-NEXT: bceqz $fcc0, .LBB27_2 +; LA32-NEXT: # %bb.1: # %if.else +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; LA32-NEXT: .LBB27_2: # %if.then +; LA32-NEXT: bl %plt(abort) +; +; LA64-LABEL: br_fcmp_uno_bceqz: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: fcmp.cor.d $fcc0, $fa0, $fa1 +; LA64-NEXT: bceqz $fcc0, .LBB27_2 +; LA64-NEXT: # %bb.1: # %if.else +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +; LA64-NEXT: .LBB27_2: # %if.then +; LA64-NEXT: bl %plt(abort) + %1 = fcmp uno double %a, %b + br i1 %1, label %if.then, label %if.else +if.then: + tail call void @abort() + unreachable +if.else: + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/double-fcmp-strict.ll b/llvm/test/CodeGen/LoongArch/double-fcmp-strict.ll new file mode 100644 index 0000000000000000000000000000000000000000..066f60752e2a7590e32eea082f9ff2adb17ebb12 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/double-fcmp-strict.ll @@ -0,0 +1,243 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+d < %s | FileCheck %s --check-prefix=LA32 +; RUN: llc --mtriple=loongarch64 --mattr=+d < %s | FileCheck %s --check-prefix=LA64 + +declare i1 @llvm.experimental.constrained.fcmp.f64(double, double, metadata, metadata) + +define i32 @fcmp_oeq(double %a, double %b) nounwind strictfp { +; LA32-LABEL: fcmp_oeq: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.ceq.d $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_oeq: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.ceq.d $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_ogt(double %a, double %b) nounwind strictfp { +; LA32-LABEL: fcmp_ogt: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.clt.d $fcc0, $fa1, $fa0 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_ogt: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.clt.d $fcc0, $fa1, $fa0 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"ogt", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_oge(double %a, double %b) nounwind strictfp { +; LA32-LABEL: fcmp_oge: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.cle.d $fcc0, $fa1, $fa0 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_oge: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.cle.d $fcc0, $fa1, $fa0 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"oge", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_olt(double %a, double %b) nounwind strictfp { +; LA32-LABEL: fcmp_olt: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.clt.d $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_olt: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.clt.d $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"olt", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_ole(double %a, double %b) nounwind strictfp { +; LA32-LABEL: fcmp_ole: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.cle.d $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_ole: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.cle.d $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"ole", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_one(double %a, double %b) nounwind strictfp { +; LA32-LABEL: fcmp_one: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.cne.d $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_one: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.cne.d $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"one", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_ord(double %a, double %b) nounwind strictfp { +; LA32-LABEL: fcmp_ord: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.cor.d $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_ord: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.cor.d $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"ord", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_ueq(double %a, double %b) nounwind strictfp { +; LA32-LABEL: fcmp_ueq: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.cueq.d $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_ueq: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.cueq.d $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"ueq", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_ugt(double %a, double %b) nounwind strictfp { +; LA32-LABEL: fcmp_ugt: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.cult.d $fcc0, $fa1, $fa0 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_ugt: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.cult.d $fcc0, $fa1, $fa0 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"ugt", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_uge(double %a, double %b) nounwind strictfp { +; LA32-LABEL: fcmp_uge: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.cule.d $fcc0, $fa1, $fa0 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_uge: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.cule.d $fcc0, $fa1, $fa0 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"uge", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_ult(double %a, double %b) nounwind strictfp { +; LA32-LABEL: fcmp_ult: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.cult.d $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_ult: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.cult.d $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"ult", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_ule(double %a, double %b) nounwind strictfp { +; LA32-LABEL: fcmp_ule: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.cule.d $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_ule: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.cule.d $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"ule", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_une(double %a, double %b) nounwind strictfp { +; LA32-LABEL: fcmp_une: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.cune.d $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_une: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.cune.d $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"une", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_uno(double %a, double %b) nounwind strictfp { +; LA32-LABEL: fcmp_uno: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.cun.d $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_uno: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.cun.d $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"uno", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} diff --git a/llvm/test/CodeGen/LoongArch/double-fcmps-strict.ll b/llvm/test/CodeGen/LoongArch/double-fcmps-strict.ll new file mode 100644 index 0000000000000000000000000000000000000000..c8974fb946222a8c930180bf707e1f7864f120d8 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/double-fcmps-strict.ll @@ -0,0 +1,482 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+d < %s | FileCheck %s --check-prefix=LA32 +; RUN: llc --mtriple=loongarch64 --mattr=+d < %s | FileCheck %s --check-prefix=LA64 + +declare i1 @llvm.experimental.constrained.fcmps.f64(double, double, metadata, metadata) +declare i1 @llvm.experimental.constrained.fcmp.f64(double, double, metadata, metadata) + +define i32 @fcmps_oeq(double %a, double %b) nounwind strictfp { +; LA32-LABEL: fcmps_oeq: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.seq.d $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmps_oeq: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.seq.d $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmps_ogt(double %a, double %b) nounwind strictfp { +; LA32-LABEL: fcmps_ogt: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.slt.d $fcc0, $fa1, $fa0 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmps_ogt: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.slt.d $fcc0, $fa1, $fa0 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double %b, metadata !"ogt", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmps_oge(double %a, double %b) nounwind strictfp { +; LA32-LABEL: fcmps_oge: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.sle.d $fcc0, $fa1, $fa0 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmps_oge: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.sle.d $fcc0, $fa1, $fa0 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double %b, metadata !"oge", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmps_olt(double %a, double %b) nounwind strictfp { +; LA32-LABEL: fcmps_olt: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.slt.d $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmps_olt: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.slt.d $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double %b, metadata !"olt", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmps_ole(double %a, double %b) nounwind strictfp { +; LA32-LABEL: fcmps_ole: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.sle.d $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmps_ole: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.sle.d $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double %b, metadata !"ole", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmps_one(double %a, double %b) nounwind strictfp { +; LA32-LABEL: fcmps_one: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.sne.d $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmps_one: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.sne.d $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double %b, metadata !"one", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmps_ord(double %a, double %b) nounwind strictfp { +; LA32-LABEL: fcmps_ord: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.sor.d $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmps_ord: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.sor.d $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double %b, metadata !"ord", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmps_ueq(double %a, double %b) nounwind strictfp { +; LA32-LABEL: fcmps_ueq: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.sueq.d $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmps_ueq: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.sueq.d $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double %b, metadata !"ueq", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmps_ugt(double %a, double %b) nounwind strictfp { +; LA32-LABEL: fcmps_ugt: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.sult.d $fcc0, $fa1, $fa0 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmps_ugt: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.sult.d $fcc0, $fa1, $fa0 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double %b, metadata !"ugt", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmps_uge(double %a, double %b) nounwind strictfp { +; LA32-LABEL: fcmps_uge: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.sule.d $fcc0, $fa1, $fa0 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmps_uge: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.sule.d $fcc0, $fa1, $fa0 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double %b, metadata !"uge", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmps_ult(double %a, double %b) nounwind strictfp { +; LA32-LABEL: fcmps_ult: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.sult.d $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmps_ult: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.sult.d $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double %b, metadata !"ult", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmps_ule(double %a, double %b) nounwind strictfp { +; LA32-LABEL: fcmps_ule: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.sule.d $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmps_ule: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.sule.d $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double %b, metadata !"ule", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmps_une(double %a, double %b) nounwind strictfp { +; LA32-LABEL: fcmps_une: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.sune.d $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmps_une: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.sune.d $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double %b, metadata !"une", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmps_uno(double %a, double %b) nounwind strictfp { +; LA32-LABEL: fcmps_uno: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.sun.d $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmps_uno: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.sun.d $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double %b, metadata !"uno", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_oeq(double %a, double %b) nounwind strictfp { +; LA32-LABEL: fcmp_oeq: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.ceq.d $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_oeq: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.ceq.d $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_ogt(double %a, double %b) nounwind strictfp { +; LA32-LABEL: fcmp_ogt: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.clt.d $fcc0, $fa1, $fa0 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_ogt: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.clt.d $fcc0, $fa1, $fa0 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"ogt", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_oge(double %a, double %b) nounwind strictfp { +; LA32-LABEL: fcmp_oge: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.cle.d $fcc0, $fa1, $fa0 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_oge: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.cle.d $fcc0, $fa1, $fa0 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"oge", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_olt(double %a, double %b) nounwind strictfp { +; LA32-LABEL: fcmp_olt: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.clt.d $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_olt: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.clt.d $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"olt", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_ole(double %a, double %b) nounwind strictfp { +; LA32-LABEL: fcmp_ole: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.cle.d $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_ole: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.cle.d $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"ole", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_one(double %a, double %b) nounwind strictfp { +; LA32-LABEL: fcmp_one: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.cne.d $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_one: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.cne.d $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"one", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_ord(double %a, double %b) nounwind strictfp { +; LA32-LABEL: fcmp_ord: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.cor.d $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_ord: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.cor.d $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"ord", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_ueq(double %a, double %b) nounwind strictfp { +; LA32-LABEL: fcmp_ueq: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.cueq.d $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_ueq: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.cueq.d $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"ueq", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_ugt(double %a, double %b) nounwind strictfp { +; LA32-LABEL: fcmp_ugt: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.cult.d $fcc0, $fa1, $fa0 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_ugt: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.cult.d $fcc0, $fa1, $fa0 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"ugt", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_uge(double %a, double %b) nounwind strictfp { +; LA32-LABEL: fcmp_uge: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.cule.d $fcc0, $fa1, $fa0 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_uge: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.cule.d $fcc0, $fa1, $fa0 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"uge", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_ult(double %a, double %b) nounwind strictfp { +; LA32-LABEL: fcmp_ult: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.cult.d $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_ult: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.cult.d $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"ult", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_ule(double %a, double %b) nounwind strictfp { +; LA32-LABEL: fcmp_ule: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.cule.d $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_ule: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.cule.d $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"ule", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_une(double %a, double %b) nounwind strictfp { +; LA32-LABEL: fcmp_une: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.cune.d $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_une: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.cune.d $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"une", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_uno(double %a, double %b) nounwind strictfp { +; LA32-LABEL: fcmp_uno: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.cun.d $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_uno: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.cun.d $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"uno", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} diff --git a/llvm/test/CodeGen/LoongArch/double-fma.ll b/llvm/test/CodeGen/LoongArch/double-fma.ll new file mode 100644 index 0000000000000000000000000000000000000000..6dd62847943375325952d10159ca8468f864ff04 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/double-fma.ll @@ -0,0 +1,887 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+d --fp-contract=fast < %s \ +; RUN: | FileCheck %s --check-prefix=LA32-CONTRACT-FAST +; RUN: llc --mtriple=loongarch32 --mattr=+d --fp-contract=on < %s \ +; RUN: | FileCheck %s --check-prefix=LA32-CONTRACT-ON +; RUN: llc --mtriple=loongarch32 --mattr=+d --fp-contract=off < %s \ +; RUN: | FileCheck %s --check-prefix=LA32-CONTRACT-OFF +; RUN: llc --mtriple=loongarch64 --mattr=+d --fp-contract=fast < %s \ +; RUN: | FileCheck %s --check-prefix=LA64-CONTRACT-FAST +; RUN: llc --mtriple=loongarch64 --mattr=+d --fp-contract=on < %s \ +; RUN: | FileCheck %s --check-prefix=LA64-CONTRACT-ON +; RUN: llc --mtriple=loongarch64 --mattr=+d --fp-contract=off < %s \ +; RUN: | FileCheck %s --check-prefix=LA64-CONTRACT-OFF + +define double @fmadd_d(double %a, double %b, double %c) nounwind { +; LA32-CONTRACT-FAST-LABEL: fmadd_d: +; LA32-CONTRACT-FAST: # %bb.0: +; LA32-CONTRACT-FAST-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: fmadd_d: +; LA32-CONTRACT-ON: # %bb.0: +; LA32-CONTRACT-ON-NEXT: fmul.d $fa0, $fa0, $fa1 +; LA32-CONTRACT-ON-NEXT: fadd.d $fa0, $fa0, $fa2 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: fmadd_d: +; LA32-CONTRACT-OFF: # %bb.0: +; LA32-CONTRACT-OFF-NEXT: fmul.d $fa0, $fa0, $fa1 +; LA32-CONTRACT-OFF-NEXT: fadd.d $fa0, $fa0, $fa2 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: fmadd_d: +; LA64-CONTRACT-FAST: # %bb.0: +; LA64-CONTRACT-FAST-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: fmadd_d: +; LA64-CONTRACT-ON: # %bb.0: +; LA64-CONTRACT-ON-NEXT: fmul.d $fa0, $fa0, $fa1 +; LA64-CONTRACT-ON-NEXT: fadd.d $fa0, $fa0, $fa2 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: fmadd_d: +; LA64-CONTRACT-OFF: # %bb.0: +; LA64-CONTRACT-OFF-NEXT: fmul.d $fa0, $fa0, $fa1 +; LA64-CONTRACT-OFF-NEXT: fadd.d $fa0, $fa0, $fa2 +; LA64-CONTRACT-OFF-NEXT: ret + %mul = fmul double %a, %b + %add = fadd double %mul, %c + ret double %add +} + +define double @fmsub_d(double %a, double %b, double %c) nounwind { +; LA32-CONTRACT-FAST-LABEL: fmsub_d: +; LA32-CONTRACT-FAST: # %bb.0: +; LA32-CONTRACT-FAST-NEXT: fmsub.d $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: fmsub_d: +; LA32-CONTRACT-ON: # %bb.0: +; LA32-CONTRACT-ON-NEXT: fmul.d $fa0, $fa0, $fa1 +; LA32-CONTRACT-ON-NEXT: fsub.d $fa0, $fa0, $fa2 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: fmsub_d: +; LA32-CONTRACT-OFF: # %bb.0: +; LA32-CONTRACT-OFF-NEXT: fmul.d $fa0, $fa0, $fa1 +; LA32-CONTRACT-OFF-NEXT: fsub.d $fa0, $fa0, $fa2 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: fmsub_d: +; LA64-CONTRACT-FAST: # %bb.0: +; LA64-CONTRACT-FAST-NEXT: fmsub.d $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: fmsub_d: +; LA64-CONTRACT-ON: # %bb.0: +; LA64-CONTRACT-ON-NEXT: fmul.d $fa0, $fa0, $fa1 +; LA64-CONTRACT-ON-NEXT: fsub.d $fa0, $fa0, $fa2 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: fmsub_d: +; LA64-CONTRACT-OFF: # %bb.0: +; LA64-CONTRACT-OFF-NEXT: fmul.d $fa0, $fa0, $fa1 +; LA64-CONTRACT-OFF-NEXT: fsub.d $fa0, $fa0, $fa2 +; LA64-CONTRACT-OFF-NEXT: ret + %mul = fmul double %a, %b + %sub = fsub double %mul, %c + ret double %sub +} + +define double @fnmadd_d(double %a, double %b, double %c) nounwind { +; LA32-CONTRACT-FAST-LABEL: fnmadd_d: +; LA32-CONTRACT-FAST: # %bb.0: +; LA32-CONTRACT-FAST-NEXT: fnmadd.d $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: fnmadd_d: +; LA32-CONTRACT-ON: # %bb.0: +; LA32-CONTRACT-ON-NEXT: fmul.d $fa0, $fa0, $fa1 +; LA32-CONTRACT-ON-NEXT: fadd.d $fa0, $fa0, $fa2 +; LA32-CONTRACT-ON-NEXT: fneg.d $fa0, $fa0 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: fnmadd_d: +; LA32-CONTRACT-OFF: # %bb.0: +; LA32-CONTRACT-OFF-NEXT: fmul.d $fa0, $fa0, $fa1 +; LA32-CONTRACT-OFF-NEXT: fadd.d $fa0, $fa0, $fa2 +; LA32-CONTRACT-OFF-NEXT: fneg.d $fa0, $fa0 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: fnmadd_d: +; LA64-CONTRACT-FAST: # %bb.0: +; LA64-CONTRACT-FAST-NEXT: fnmadd.d $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: fnmadd_d: +; LA64-CONTRACT-ON: # %bb.0: +; LA64-CONTRACT-ON-NEXT: fmul.d $fa0, $fa0, $fa1 +; LA64-CONTRACT-ON-NEXT: fadd.d $fa0, $fa0, $fa2 +; LA64-CONTRACT-ON-NEXT: fneg.d $fa0, $fa0 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: fnmadd_d: +; LA64-CONTRACT-OFF: # %bb.0: +; LA64-CONTRACT-OFF-NEXT: fmul.d $fa0, $fa0, $fa1 +; LA64-CONTRACT-OFF-NEXT: fadd.d $fa0, $fa0, $fa2 +; LA64-CONTRACT-OFF-NEXT: fneg.d $fa0, $fa0 +; LA64-CONTRACT-OFF-NEXT: ret + %mul = fmul double %a, %b + %add = fadd double %mul, %c + %negadd = fneg double %add + ret double %negadd +} + +define double @fnmadd_d_nsz(double %a, double %b, double %c) nounwind { +; LA32-CONTRACT-FAST-LABEL: fnmadd_d_nsz: +; LA32-CONTRACT-FAST: # %bb.0: +; LA32-CONTRACT-FAST-NEXT: fnmadd.d $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: fnmadd_d_nsz: +; LA32-CONTRACT-ON: # %bb.0: +; LA32-CONTRACT-ON-NEXT: fneg.d $fa0, $fa0 +; LA32-CONTRACT-ON-NEXT: fmul.d $fa0, $fa0, $fa1 +; LA32-CONTRACT-ON-NEXT: fsub.d $fa0, $fa0, $fa2 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: fnmadd_d_nsz: +; LA32-CONTRACT-OFF: # %bb.0: +; LA32-CONTRACT-OFF-NEXT: fneg.d $fa0, $fa0 +; LA32-CONTRACT-OFF-NEXT: fmul.d $fa0, $fa0, $fa1 +; LA32-CONTRACT-OFF-NEXT: fsub.d $fa0, $fa0, $fa2 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: fnmadd_d_nsz: +; LA64-CONTRACT-FAST: # %bb.0: +; LA64-CONTRACT-FAST-NEXT: fnmadd.d $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: fnmadd_d_nsz: +; LA64-CONTRACT-ON: # %bb.0: +; LA64-CONTRACT-ON-NEXT: fneg.d $fa0, $fa0 +; LA64-CONTRACT-ON-NEXT: fmul.d $fa0, $fa0, $fa1 +; LA64-CONTRACT-ON-NEXT: fsub.d $fa0, $fa0, $fa2 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: fnmadd_d_nsz: +; LA64-CONTRACT-OFF: # %bb.0: +; LA64-CONTRACT-OFF-NEXT: fneg.d $fa0, $fa0 +; LA64-CONTRACT-OFF-NEXT: fmul.d $fa0, $fa0, $fa1 +; LA64-CONTRACT-OFF-NEXT: fsub.d $fa0, $fa0, $fa2 +; LA64-CONTRACT-OFF-NEXT: ret + %nega = fneg nsz double %a + %negc = fneg nsz double %c + %mul = fmul nsz double %nega, %b + %add = fadd nsz double %mul, %negc + ret double %add +} + +;; Check that fnmadd.d is not emitted. +define double @not_fnmadd_d(double %a, double %b, double %c) nounwind { +; LA32-CONTRACT-FAST-LABEL: not_fnmadd_d: +; LA32-CONTRACT-FAST: # %bb.0: +; LA32-CONTRACT-FAST-NEXT: fneg.d $fa0, $fa0 +; LA32-CONTRACT-FAST-NEXT: fmsub.d $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: not_fnmadd_d: +; LA32-CONTRACT-ON: # %bb.0: +; LA32-CONTRACT-ON-NEXT: fneg.d $fa0, $fa0 +; LA32-CONTRACT-ON-NEXT: fmul.d $fa0, $fa0, $fa1 +; LA32-CONTRACT-ON-NEXT: fsub.d $fa0, $fa0, $fa2 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: not_fnmadd_d: +; LA32-CONTRACT-OFF: # %bb.0: +; LA32-CONTRACT-OFF-NEXT: fneg.d $fa0, $fa0 +; LA32-CONTRACT-OFF-NEXT: fmul.d $fa0, $fa0, $fa1 +; LA32-CONTRACT-OFF-NEXT: fsub.d $fa0, $fa0, $fa2 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: not_fnmadd_d: +; LA64-CONTRACT-FAST: # %bb.0: +; LA64-CONTRACT-FAST-NEXT: fneg.d $fa0, $fa0 +; LA64-CONTRACT-FAST-NEXT: fmsub.d $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: not_fnmadd_d: +; LA64-CONTRACT-ON: # %bb.0: +; LA64-CONTRACT-ON-NEXT: fneg.d $fa0, $fa0 +; LA64-CONTRACT-ON-NEXT: fmul.d $fa0, $fa0, $fa1 +; LA64-CONTRACT-ON-NEXT: fsub.d $fa0, $fa0, $fa2 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: not_fnmadd_d: +; LA64-CONTRACT-OFF: # %bb.0: +; LA64-CONTRACT-OFF-NEXT: fneg.d $fa0, $fa0 +; LA64-CONTRACT-OFF-NEXT: fmul.d $fa0, $fa0, $fa1 +; LA64-CONTRACT-OFF-NEXT: fsub.d $fa0, $fa0, $fa2 +; LA64-CONTRACT-OFF-NEXT: ret + %nega = fneg double %a + %negc = fneg double %c + %mul = fmul double %nega, %b + %add = fadd double %mul, %negc + ret double %add +} + +define double @fnmsub_d(double %a, double %b, double %c) nounwind { +; LA32-CONTRACT-FAST-LABEL: fnmsub_d: +; LA32-CONTRACT-FAST: # %bb.0: +; LA32-CONTRACT-FAST-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: fnmsub_d: +; LA32-CONTRACT-ON: # %bb.0: +; LA32-CONTRACT-ON-NEXT: fmul.d $fa0, $fa0, $fa1 +; LA32-CONTRACT-ON-NEXT: fsub.d $fa0, $fa2, $fa0 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: fnmsub_d: +; LA32-CONTRACT-OFF: # %bb.0: +; LA32-CONTRACT-OFF-NEXT: fmul.d $fa0, $fa0, $fa1 +; LA32-CONTRACT-OFF-NEXT: fsub.d $fa0, $fa2, $fa0 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: fnmsub_d: +; LA64-CONTRACT-FAST: # %bb.0: +; LA64-CONTRACT-FAST-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: fnmsub_d: +; LA64-CONTRACT-ON: # %bb.0: +; LA64-CONTRACT-ON-NEXT: fmul.d $fa0, $fa0, $fa1 +; LA64-CONTRACT-ON-NEXT: fsub.d $fa0, $fa2, $fa0 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: fnmsub_d: +; LA64-CONTRACT-OFF: # %bb.0: +; LA64-CONTRACT-OFF-NEXT: fmul.d $fa0, $fa0, $fa1 +; LA64-CONTRACT-OFF-NEXT: fsub.d $fa0, $fa2, $fa0 +; LA64-CONTRACT-OFF-NEXT: ret + %nega = fneg double %a + %mul = fmul double %nega, %b + %add = fadd double %mul, %c + ret double %add +} + +define double @contract_fmadd_d(double %a, double %b, double %c) nounwind { +; LA32-CONTRACT-FAST-LABEL: contract_fmadd_d: +; LA32-CONTRACT-FAST: # %bb.0: +; LA32-CONTRACT-FAST-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: contract_fmadd_d: +; LA32-CONTRACT-ON: # %bb.0: +; LA32-CONTRACT-ON-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: contract_fmadd_d: +; LA32-CONTRACT-OFF: # %bb.0: +; LA32-CONTRACT-OFF-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: contract_fmadd_d: +; LA64-CONTRACT-FAST: # %bb.0: +; LA64-CONTRACT-FAST-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: contract_fmadd_d: +; LA64-CONTRACT-ON: # %bb.0: +; LA64-CONTRACT-ON-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: contract_fmadd_d: +; LA64-CONTRACT-OFF: # %bb.0: +; LA64-CONTRACT-OFF-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-OFF-NEXT: ret + %mul = fmul contract double %a, %b + %add = fadd contract double %mul, %c + ret double %add +} + +define double @contract_fmsub_d(double %a, double %b, double %c) nounwind { +; LA32-CONTRACT-FAST-LABEL: contract_fmsub_d: +; LA32-CONTRACT-FAST: # %bb.0: +; LA32-CONTRACT-FAST-NEXT: fmsub.d $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: contract_fmsub_d: +; LA32-CONTRACT-ON: # %bb.0: +; LA32-CONTRACT-ON-NEXT: fmsub.d $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: contract_fmsub_d: +; LA32-CONTRACT-OFF: # %bb.0: +; LA32-CONTRACT-OFF-NEXT: fmsub.d $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: contract_fmsub_d: +; LA64-CONTRACT-FAST: # %bb.0: +; LA64-CONTRACT-FAST-NEXT: fmsub.d $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: contract_fmsub_d: +; LA64-CONTRACT-ON: # %bb.0: +; LA64-CONTRACT-ON-NEXT: fmsub.d $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: contract_fmsub_d: +; LA64-CONTRACT-OFF: # %bb.0: +; LA64-CONTRACT-OFF-NEXT: fmsub.d $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-OFF-NEXT: ret + %mul = fmul contract double %a, %b + %sub = fsub contract double %mul, %c + ret double %sub +} + +define double @contract_fnmadd_d(double %a, double %b, double %c) nounwind { +; LA32-CONTRACT-FAST-LABEL: contract_fnmadd_d: +; LA32-CONTRACT-FAST: # %bb.0: +; LA32-CONTRACT-FAST-NEXT: fnmadd.d $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: contract_fnmadd_d: +; LA32-CONTRACT-ON: # %bb.0: +; LA32-CONTRACT-ON-NEXT: fnmadd.d $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: contract_fnmadd_d: +; LA32-CONTRACT-OFF: # %bb.0: +; LA32-CONTRACT-OFF-NEXT: fnmadd.d $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: contract_fnmadd_d: +; LA64-CONTRACT-FAST: # %bb.0: +; LA64-CONTRACT-FAST-NEXT: fnmadd.d $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: contract_fnmadd_d: +; LA64-CONTRACT-ON: # %bb.0: +; LA64-CONTRACT-ON-NEXT: fnmadd.d $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: contract_fnmadd_d: +; LA64-CONTRACT-OFF: # %bb.0: +; LA64-CONTRACT-OFF-NEXT: fnmadd.d $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-OFF-NEXT: ret + %mul = fmul contract double %a, %b + %add = fadd contract double %mul, %c + %negadd = fneg contract double %add + ret double %negadd +} + +define double @contract_fnmadd_d_nsz(double %a, double %b, double %c) nounwind { +; LA32-CONTRACT-FAST-LABEL: contract_fnmadd_d_nsz: +; LA32-CONTRACT-FAST: # %bb.0: +; LA32-CONTRACT-FAST-NEXT: fnmadd.d $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: contract_fnmadd_d_nsz: +; LA32-CONTRACT-ON: # %bb.0: +; LA32-CONTRACT-ON-NEXT: fnmadd.d $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: contract_fnmadd_d_nsz: +; LA32-CONTRACT-OFF: # %bb.0: +; LA32-CONTRACT-OFF-NEXT: fnmadd.d $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: contract_fnmadd_d_nsz: +; LA64-CONTRACT-FAST: # %bb.0: +; LA64-CONTRACT-FAST-NEXT: fnmadd.d $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: contract_fnmadd_d_nsz: +; LA64-CONTRACT-ON: # %bb.0: +; LA64-CONTRACT-ON-NEXT: fnmadd.d $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: contract_fnmadd_d_nsz: +; LA64-CONTRACT-OFF: # %bb.0: +; LA64-CONTRACT-OFF-NEXT: fnmadd.d $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-OFF-NEXT: ret + %nega = fneg contract nsz double %a + %negc = fneg contract nsz double %c + %mul = fmul contract nsz double %nega, %b + %add = fadd contract nsz double %mul, %negc + ret double %add +} + +;; Check that fnmadd.d is not emitted. +define double @not_contract_fnmadd_d(double %a, double %b, double %c) nounwind { +; LA32-CONTRACT-FAST-LABEL: not_contract_fnmadd_d: +; LA32-CONTRACT-FAST: # %bb.0: +; LA32-CONTRACT-FAST-NEXT: fneg.d $fa0, $fa0 +; LA32-CONTRACT-FAST-NEXT: fmsub.d $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: not_contract_fnmadd_d: +; LA32-CONTRACT-ON: # %bb.0: +; LA32-CONTRACT-ON-NEXT: fneg.d $fa0, $fa0 +; LA32-CONTRACT-ON-NEXT: fmsub.d $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: not_contract_fnmadd_d: +; LA32-CONTRACT-OFF: # %bb.0: +; LA32-CONTRACT-OFF-NEXT: fneg.d $fa0, $fa0 +; LA32-CONTRACT-OFF-NEXT: fmsub.d $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: not_contract_fnmadd_d: +; LA64-CONTRACT-FAST: # %bb.0: +; LA64-CONTRACT-FAST-NEXT: fneg.d $fa0, $fa0 +; LA64-CONTRACT-FAST-NEXT: fmsub.d $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: not_contract_fnmadd_d: +; LA64-CONTRACT-ON: # %bb.0: +; LA64-CONTRACT-ON-NEXT: fneg.d $fa0, $fa0 +; LA64-CONTRACT-ON-NEXT: fmsub.d $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: not_contract_fnmadd_d: +; LA64-CONTRACT-OFF: # %bb.0: +; LA64-CONTRACT-OFF-NEXT: fneg.d $fa0, $fa0 +; LA64-CONTRACT-OFF-NEXT: fmsub.d $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-OFF-NEXT: ret + %nega = fneg contract double %a + %negc = fneg contract double %c + %mul = fmul contract double %nega, %b + %add = fadd contract double %mul, %negc + ret double %add +} + +define double @contract_fnmsub_d(double %a, double %b, double %c) nounwind { +; LA32-CONTRACT-FAST-LABEL: contract_fnmsub_d: +; LA32-CONTRACT-FAST: # %bb.0: +; LA32-CONTRACT-FAST-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: contract_fnmsub_d: +; LA32-CONTRACT-ON: # %bb.0: +; LA32-CONTRACT-ON-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: contract_fnmsub_d: +; LA32-CONTRACT-OFF: # %bb.0: +; LA32-CONTRACT-OFF-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: contract_fnmsub_d: +; LA64-CONTRACT-FAST: # %bb.0: +; LA64-CONTRACT-FAST-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: contract_fnmsub_d: +; LA64-CONTRACT-ON: # %bb.0: +; LA64-CONTRACT-ON-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: contract_fnmsub_d: +; LA64-CONTRACT-OFF: # %bb.0: +; LA64-CONTRACT-OFF-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-OFF-NEXT: ret + %nega = fneg contract double %a + %mul = fmul contract double %nega, %b + %add = fadd contract double %mul, %c + ret double %add +} + +declare double @llvm.fma.f64(double, double, double) + +define double @fmadd_d_intrinsics(double %a, double %b, double %c) nounwind { +; LA32-CONTRACT-FAST-LABEL: fmadd_d_intrinsics: +; LA32-CONTRACT-FAST: # %bb.0: +; LA32-CONTRACT-FAST-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: fmadd_d_intrinsics: +; LA32-CONTRACT-ON: # %bb.0: +; LA32-CONTRACT-ON-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: fmadd_d_intrinsics: +; LA32-CONTRACT-OFF: # %bb.0: +; LA32-CONTRACT-OFF-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: fmadd_d_intrinsics: +; LA64-CONTRACT-FAST: # %bb.0: +; LA64-CONTRACT-FAST-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: fmadd_d_intrinsics: +; LA64-CONTRACT-ON: # %bb.0: +; LA64-CONTRACT-ON-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: fmadd_d_intrinsics: +; LA64-CONTRACT-OFF: # %bb.0: +; LA64-CONTRACT-OFF-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-OFF-NEXT: ret + %fma = call double @llvm.fma.f64(double %a, double %b, double %c) + ret double %fma +} + +define double @fmsub_d_intrinsics(double %a, double %b, double %c) nounwind { +; LA32-CONTRACT-FAST-LABEL: fmsub_d_intrinsics: +; LA32-CONTRACT-FAST: # %bb.0: +; LA32-CONTRACT-FAST-NEXT: fmsub.d $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: fmsub_d_intrinsics: +; LA32-CONTRACT-ON: # %bb.0: +; LA32-CONTRACT-ON-NEXT: fmsub.d $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: fmsub_d_intrinsics: +; LA32-CONTRACT-OFF: # %bb.0: +; LA32-CONTRACT-OFF-NEXT: fmsub.d $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: fmsub_d_intrinsics: +; LA64-CONTRACT-FAST: # %bb.0: +; LA64-CONTRACT-FAST-NEXT: fmsub.d $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: fmsub_d_intrinsics: +; LA64-CONTRACT-ON: # %bb.0: +; LA64-CONTRACT-ON-NEXT: fmsub.d $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: fmsub_d_intrinsics: +; LA64-CONTRACT-OFF: # %bb.0: +; LA64-CONTRACT-OFF-NEXT: fmsub.d $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-OFF-NEXT: ret + %negc = fneg double %c + %fma = call double @llvm.fma.f64(double %a, double %b, double %negc) + ret double %fma +} + +define double @fnmadd_d_intrinsics(double %a, double %b, double %c) nounwind { +; LA32-CONTRACT-FAST-LABEL: fnmadd_d_intrinsics: +; LA32-CONTRACT-FAST: # %bb.0: +; LA32-CONTRACT-FAST-NEXT: fnmadd.d $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: fnmadd_d_intrinsics: +; LA32-CONTRACT-ON: # %bb.0: +; LA32-CONTRACT-ON-NEXT: fnmadd.d $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: fnmadd_d_intrinsics: +; LA32-CONTRACT-OFF: # %bb.0: +; LA32-CONTRACT-OFF-NEXT: fnmadd.d $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: fnmadd_d_intrinsics: +; LA64-CONTRACT-FAST: # %bb.0: +; LA64-CONTRACT-FAST-NEXT: fnmadd.d $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: fnmadd_d_intrinsics: +; LA64-CONTRACT-ON: # %bb.0: +; LA64-CONTRACT-ON-NEXT: fnmadd.d $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: fnmadd_d_intrinsics: +; LA64-CONTRACT-OFF: # %bb.0: +; LA64-CONTRACT-OFF-NEXT: fnmadd.d $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-OFF-NEXT: ret + %fma = call double @llvm.fma.f64(double %a, double %b, double %c) + %neg = fneg double %fma + ret double %neg +} + +define double @fnmadd_d_nsz_intrinsics(double %a, double %b, double %c) nounwind { +; LA32-CONTRACT-FAST-LABEL: fnmadd_d_nsz_intrinsics: +; LA32-CONTRACT-FAST: # %bb.0: +; LA32-CONTRACT-FAST-NEXT: fnmadd.d $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: fnmadd_d_nsz_intrinsics: +; LA32-CONTRACT-ON: # %bb.0: +; LA32-CONTRACT-ON-NEXT: fnmadd.d $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: fnmadd_d_nsz_intrinsics: +; LA32-CONTRACT-OFF: # %bb.0: +; LA32-CONTRACT-OFF-NEXT: fnmadd.d $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: fnmadd_d_nsz_intrinsics: +; LA64-CONTRACT-FAST: # %bb.0: +; LA64-CONTRACT-FAST-NEXT: fnmadd.d $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: fnmadd_d_nsz_intrinsics: +; LA64-CONTRACT-ON: # %bb.0: +; LA64-CONTRACT-ON-NEXT: fnmadd.d $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: fnmadd_d_nsz_intrinsics: +; LA64-CONTRACT-OFF: # %bb.0: +; LA64-CONTRACT-OFF-NEXT: fnmadd.d $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-OFF-NEXT: ret + %nega = fneg double %a + %negc = fneg double %c + %fma = call nsz double @llvm.fma.f64(double %nega, double %b, double %negc) + ret double %fma +} + +;; Check that fnmadd.d is not emitted. +define double @not_fnmadd_d_intrinsics(double %a, double %b, double %c) nounwind { +; LA32-CONTRACT-FAST-LABEL: not_fnmadd_d_intrinsics: +; LA32-CONTRACT-FAST: # %bb.0: +; LA32-CONTRACT-FAST-NEXT: fneg.d $fa0, $fa0 +; LA32-CONTRACT-FAST-NEXT: fmsub.d $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: not_fnmadd_d_intrinsics: +; LA32-CONTRACT-ON: # %bb.0: +; LA32-CONTRACT-ON-NEXT: fneg.d $fa0, $fa0 +; LA32-CONTRACT-ON-NEXT: fmsub.d $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: not_fnmadd_d_intrinsics: +; LA32-CONTRACT-OFF: # %bb.0: +; LA32-CONTRACT-OFF-NEXT: fneg.d $fa0, $fa0 +; LA32-CONTRACT-OFF-NEXT: fmsub.d $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: not_fnmadd_d_intrinsics: +; LA64-CONTRACT-FAST: # %bb.0: +; LA64-CONTRACT-FAST-NEXT: fneg.d $fa0, $fa0 +; LA64-CONTRACT-FAST-NEXT: fmsub.d $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: not_fnmadd_d_intrinsics: +; LA64-CONTRACT-ON: # %bb.0: +; LA64-CONTRACT-ON-NEXT: fneg.d $fa0, $fa0 +; LA64-CONTRACT-ON-NEXT: fmsub.d $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: not_fnmadd_d_intrinsics: +; LA64-CONTRACT-OFF: # %bb.0: +; LA64-CONTRACT-OFF-NEXT: fneg.d $fa0, $fa0 +; LA64-CONTRACT-OFF-NEXT: fmsub.d $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-OFF-NEXT: ret + %nega = fneg double %a + %negc = fneg double %c + %fma = call double @llvm.fma.f64(double %nega, double %b, double %negc) + ret double %fma +} + +define double @fnmsub_d_intrinsics(double %a, double %b, double %c) nounwind { +; LA32-CONTRACT-FAST-LABEL: fnmsub_d_intrinsics: +; LA32-CONTRACT-FAST: # %bb.0: +; LA32-CONTRACT-FAST-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: fnmsub_d_intrinsics: +; LA32-CONTRACT-ON: # %bb.0: +; LA32-CONTRACT-ON-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: fnmsub_d_intrinsics: +; LA32-CONTRACT-OFF: # %bb.0: +; LA32-CONTRACT-OFF-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: fnmsub_d_intrinsics: +; LA64-CONTRACT-FAST: # %bb.0: +; LA64-CONTRACT-FAST-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: fnmsub_d_intrinsics: +; LA64-CONTRACT-ON: # %bb.0: +; LA64-CONTRACT-ON-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: fnmsub_d_intrinsics: +; LA64-CONTRACT-OFF: # %bb.0: +; LA64-CONTRACT-OFF-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-OFF-NEXT: ret + %nega = fneg double %a + %fma = call double @llvm.fma.f64(double %nega, double %b, double %c) + ret double %fma +} + +define double @fnmsub_d_swap_intrinsics(double %a, double %b, double %c) nounwind { +; LA32-CONTRACT-FAST-LABEL: fnmsub_d_swap_intrinsics: +; LA32-CONTRACT-FAST: # %bb.0: +; LA32-CONTRACT-FAST-NEXT: fnmsub.d $fa0, $fa1, $fa0, $fa2 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: fnmsub_d_swap_intrinsics: +; LA32-CONTRACT-ON: # %bb.0: +; LA32-CONTRACT-ON-NEXT: fnmsub.d $fa0, $fa1, $fa0, $fa2 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: fnmsub_d_swap_intrinsics: +; LA32-CONTRACT-OFF: # %bb.0: +; LA32-CONTRACT-OFF-NEXT: fnmsub.d $fa0, $fa1, $fa0, $fa2 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: fnmsub_d_swap_intrinsics: +; LA64-CONTRACT-FAST: # %bb.0: +; LA64-CONTRACT-FAST-NEXT: fnmsub.d $fa0, $fa1, $fa0, $fa2 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: fnmsub_d_swap_intrinsics: +; LA64-CONTRACT-ON: # %bb.0: +; LA64-CONTRACT-ON-NEXT: fnmsub.d $fa0, $fa1, $fa0, $fa2 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: fnmsub_d_swap_intrinsics: +; LA64-CONTRACT-OFF: # %bb.0: +; LA64-CONTRACT-OFF-NEXT: fnmsub.d $fa0, $fa1, $fa0, $fa2 +; LA64-CONTRACT-OFF-NEXT: ret + %negb = fneg double %b + %fma = call double @llvm.fma.f64(double %a, double %negb, double %c) + ret double %fma +} + +define double @fmadd_d_contract(double %a, double %b, double %c) nounwind { +; LA32-CONTRACT-FAST-LABEL: fmadd_d_contract: +; LA32-CONTRACT-FAST: # %bb.0: +; LA32-CONTRACT-FAST-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: fmadd_d_contract: +; LA32-CONTRACT-ON: # %bb.0: +; LA32-CONTRACT-ON-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: fmadd_d_contract: +; LA32-CONTRACT-OFF: # %bb.0: +; LA32-CONTRACT-OFF-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: fmadd_d_contract: +; LA64-CONTRACT-FAST: # %bb.0: +; LA64-CONTRACT-FAST-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: fmadd_d_contract: +; LA64-CONTRACT-ON: # %bb.0: +; LA64-CONTRACT-ON-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: fmadd_d_contract: +; LA64-CONTRACT-OFF: # %bb.0: +; LA64-CONTRACT-OFF-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-OFF-NEXT: ret + %mul = fmul contract double %a, %b + %add = fadd contract double %mul, %c + ret double %add +} + +define double @fmsub_d_contract(double %a, double %b, double %c) nounwind { +; LA32-CONTRACT-FAST-LABEL: fmsub_d_contract: +; LA32-CONTRACT-FAST: # %bb.0: +; LA32-CONTRACT-FAST-NEXT: fmsub.d $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: fmsub_d_contract: +; LA32-CONTRACT-ON: # %bb.0: +; LA32-CONTRACT-ON-NEXT: fmsub.d $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: fmsub_d_contract: +; LA32-CONTRACT-OFF: # %bb.0: +; LA32-CONTRACT-OFF-NEXT: fmsub.d $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: fmsub_d_contract: +; LA64-CONTRACT-FAST: # %bb.0: +; LA64-CONTRACT-FAST-NEXT: fmsub.d $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: fmsub_d_contract: +; LA64-CONTRACT-ON: # %bb.0: +; LA64-CONTRACT-ON-NEXT: fmsub.d $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: fmsub_d_contract: +; LA64-CONTRACT-OFF: # %bb.0: +; LA64-CONTRACT-OFF-NEXT: fmsub.d $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-OFF-NEXT: ret + %mul = fmul contract double %a, %b + %sub = fsub contract double %mul, %c + ret double %sub +} + +define double @fnmadd_d_contract(double %a, double %b, double %c) nounwind { +; LA32-CONTRACT-FAST-LABEL: fnmadd_d_contract: +; LA32-CONTRACT-FAST: # %bb.0: +; LA32-CONTRACT-FAST-NEXT: fnmadd.d $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: fnmadd_d_contract: +; LA32-CONTRACT-ON: # %bb.0: +; LA32-CONTRACT-ON-NEXT: fnmadd.d $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: fnmadd_d_contract: +; LA32-CONTRACT-OFF: # %bb.0: +; LA32-CONTRACT-OFF-NEXT: fnmadd.d $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: fnmadd_d_contract: +; LA64-CONTRACT-FAST: # %bb.0: +; LA64-CONTRACT-FAST-NEXT: fnmadd.d $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: fnmadd_d_contract: +; LA64-CONTRACT-ON: # %bb.0: +; LA64-CONTRACT-ON-NEXT: fnmadd.d $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: fnmadd_d_contract: +; LA64-CONTRACT-OFF: # %bb.0: +; LA64-CONTRACT-OFF-NEXT: fnmadd.d $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-OFF-NEXT: ret + %mul = fmul contract double %a, %b + %add = fadd contract double %mul, %c + %negadd = fneg contract double %add + ret double %negadd +} + +define double @fnmsub_d_contract(double %a, double %b, double %c) nounwind { +; LA32-CONTRACT-FAST-LABEL: fnmsub_d_contract: +; LA32-CONTRACT-FAST: # %bb.0: +; LA32-CONTRACT-FAST-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: fnmsub_d_contract: +; LA32-CONTRACT-ON: # %bb.0: +; LA32-CONTRACT-ON-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: fnmsub_d_contract: +; LA32-CONTRACT-OFF: # %bb.0: +; LA32-CONTRACT-OFF-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: fnmsub_d_contract: +; LA64-CONTRACT-FAST: # %bb.0: +; LA64-CONTRACT-FAST-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: fnmsub_d_contract: +; LA64-CONTRACT-ON: # %bb.0: +; LA64-CONTRACT-ON-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: fnmsub_d_contract: +; LA64-CONTRACT-OFF: # %bb.0: +; LA64-CONTRACT-OFF-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-OFF-NEXT: ret + %mul = fmul contract double %a, %b + %sub = fsub contract double %c, %mul + ret double %sub +} diff --git a/llvm/test/CodeGen/LoongArch/double-imm.ll b/llvm/test/CodeGen/LoongArch/double-imm.ll index a7782cf85954dd1faf671426438e60ab9af8fb03..3e89db3ec5c8ccdb24dd7e44ce3cd26f25bed3aa 100644 --- a/llvm/test/CodeGen/LoongArch/double-imm.ll +++ b/llvm/test/CodeGen/LoongArch/double-imm.ll @@ -7,12 +7,12 @@ define double @f64_positive_zero() nounwind { ; LA32: # %bb.0: ; LA32-NEXT: movgr2fr.w $fa0, $zero ; LA32-NEXT: movgr2frh.w $fa0, $zero -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: f64_positive_zero: ; LA64: # %bb.0: ; LA64-NEXT: movgr2fr.d $fa0, $zero -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret ret double 0.0 } @@ -22,30 +22,30 @@ define double @f64_negative_zero() nounwind { ; LA32-NEXT: movgr2fr.w $fa0, $zero ; LA32-NEXT: movgr2frh.w $fa0, $zero ; LA32-NEXT: fneg.d $fa0, $fa0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: f64_negative_zero: ; LA64: # %bb.0: ; LA64-NEXT: movgr2fr.d $fa0, $zero ; LA64-NEXT: fneg.d $fa0, $fa0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret ret double -0.0 } define double @f64_constant_pi() nounwind { ; LA32-LABEL: f64_constant_pi: ; LA32: # %bb.0: -; LA32-NEXT: pcalau12i $a0, .LCPI2_0 -; LA32-NEXT: addi.w $a0, $a0, .LCPI2_0 +; LA32-NEXT: pcalau12i $a0, %pc_hi20(.LCPI2_0) +; LA32-NEXT: addi.w $a0, $a0, %pc_lo12(.LCPI2_0) ; LA32-NEXT: fld.d $fa0, $a0, 0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: f64_constant_pi: ; LA64: # %bb.0: -; LA64-NEXT: pcalau12i $a0, .LCPI2_0 -; LA64-NEXT: addi.d $a0, $a0, .LCPI2_0 +; LA64-NEXT: pcalau12i $a0, %pc_hi20(.LCPI2_0) +; LA64-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI2_0) ; LA64-NEXT: fld.d $fa0, $a0, 0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret ret double 3.1415926535897931159979634685441851615905761718750 } @@ -57,7 +57,7 @@ define double @f64_add_fimm1(double %a) nounwind { ; LA32-NEXT: ffint.s.w $fa1, $fa1 ; LA32-NEXT: fcvt.d.s $fa1, $fa1 ; LA32-NEXT: fadd.d $fa0, $fa0, $fa1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: f64_add_fimm1: ; LA64: # %bb.0: @@ -65,7 +65,7 @@ define double @f64_add_fimm1(double %a) nounwind { ; LA64-NEXT: movgr2fr.d $fa1, $a0 ; LA64-NEXT: ffint.d.l $fa1, $fa1 ; LA64-NEXT: fadd.d $fa0, $fa0, $fa1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %1 = fadd double %a, 1.0 ret double %1 } @@ -77,13 +77,13 @@ define double @f64_positive_fimm1() nounwind { ; LA32-NEXT: movgr2fr.w $fa0, $a0 ; LA32-NEXT: ffint.s.w $fa0, $fa0 ; LA32-NEXT: fcvt.d.s $fa0, $fa0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: f64_positive_fimm1: ; LA64: # %bb.0: ; LA64-NEXT: addi.d $a0, $zero, 1 ; LA64-NEXT: movgr2fr.d $fa0, $a0 ; LA64-NEXT: ffint.d.l $fa0, $fa0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret ret double 1.0 } diff --git a/llvm/test/CodeGen/LoongArch/duplicate-returns-for-tailcall.ll b/llvm/test/CodeGen/LoongArch/duplicate-returns-for-tailcall.ll new file mode 100644 index 0000000000000000000000000000000000000000..59bc9aa4566410b2829333110ebeadbfd1e396aa --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/duplicate-returns-for-tailcall.ll @@ -0,0 +1,60 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s + +;; Perform tail call optimization for duplicate returns. +declare i32 @test() +declare i32 @test1() +declare i32 @test2() +declare i32 @test3() +define i32 @duplicate_returns(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: duplicate_returns: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: bstrpick.d $a2, $a0, 31, 0 +; CHECK-NEXT: beqz $a2, .LBB0_4 +; CHECK-NEXT: # %bb.1: # %if.else +; CHECK-NEXT: bstrpick.d $a2, $a1, 31, 0 +; CHECK-NEXT: beqz $a2, .LBB0_5 +; CHECK-NEXT: # %bb.2: # %if.else2 +; CHECK-NEXT: addi.w $a0, $a0, 0 +; CHECK-NEXT: addi.w $a1, $a1, 0 +; CHECK-NEXT: bge $a1, $a0, .LBB0_6 +; CHECK-NEXT: # %bb.3: # %if.then3 +; CHECK-NEXT: b %plt(test2) +; CHECK-NEXT: .LBB0_4: # %if.then +; CHECK-NEXT: b %plt(test) +; CHECK-NEXT: .LBB0_5: # %if.then2 +; CHECK-NEXT: b %plt(test1) +; CHECK-NEXT: .LBB0_6: # %if.else3 +; CHECK-NEXT: b %plt(test3) +entry: + %cmp = icmp eq i32 %a, 0 + br i1 %cmp, label %if.then, label %if.else + +if.then: + %call = tail call i32 @test() + br label %return + +if.else: + %cmp1 = icmp eq i32 %b, 0 + br i1 %cmp1, label %if.then2, label %if.else2 + +if.then2: + %call1 = tail call i32 @test1() + br label %return + +if.else2: + %cmp5 = icmp sgt i32 %a, %b + br i1 %cmp5, label %if.then3, label %if.else3 + +if.then3: + %call2 = tail call i32 @test2() + br label %return + +if.else3: + %call3 = tail call i32 @test3() + br label %return + +return: + %retval = phi i32 [ %call, %if.then ], [ %call1, %if.then2 ], [ %call2, %if.then3 ], [ %call3, %if.else3 ] + ret i32 %retval +} diff --git a/llvm/test/CodeGen/LoongArch/dwarf-eh.ll b/llvm/test/CodeGen/LoongArch/dwarf-eh.ll new file mode 100644 index 0000000000000000000000000000000000000000..f4e347e07de546992e6145e8abed92b0ea4efcc5 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/dwarf-eh.ll @@ -0,0 +1,54 @@ +; RUN: llc --mtriple=loongarch32 --relocation-model=static < %s | FileCheck %s +; RUN: llc --mtriple=loongarch32 --relocation-model=pic < %s | FileCheck %s +; RUN: llc --mtriple=loongarch64 --relocation-model=static < %s | FileCheck %s +; RUN: llc --mtriple=loongarch64 --relocation-model=pic < %s | FileCheck %s + +declare void @throw_exception() + +declare i32 @__gxx_personality_v0(...) + +declare ptr @__cxa_begin_catch(ptr) + +declare void @__cxa_end_catch() + +; CHECK-LABEL: test1: +; CHECK: .cfi_startproc +;; PersonalityEncoding = DW_EH_PE_indirect | DW_EH_PE_pcrel | DW_EH_PE_sdata4 +; CHECK-NEXT: .cfi_personality 155, DW.ref.__gxx_personality_v0 +;; LSDAEncoding = DW_EH_PE_pcrel | DW_EH_PE_sdata4 +; CHECK-NEXT: .cfi_lsda 27, .Lexception0 + +define void @test1() personality ptr @__gxx_personality_v0 { +entry: + invoke void @throw_exception() to label %try.cont unwind label %lpad + +lpad: + %0 = landingpad { ptr, i32 } + catch ptr null + %1 = extractvalue { ptr, i32 } %0, 0 + %2 = tail call ptr @__cxa_begin_catch(ptr %1) + tail call void @__cxa_end_catch() + br label %try.cont + +try.cont: + ret void +} + +; CHECK-LABEL: GCC_except_table0: +; CHECK-NEXT: .Lexception0: +; CHECK-NEXT: .byte 255 # @LPStart Encoding = omit +;; TTypeEncoding = DW_EH_PE_indirect | DW_EH_PE_pcrel | DW_EH_PE_sdata4 +; CHECK-NEXT: .byte 155 # @TType Encoding = indirect pcrel sdata4 +; CHECK: .Lttbaseref0: +;; CallSiteEncoding = dwarf::DW_EH_PE_uleb128 +; CHECK-NEXT: .byte 1 # Call site Encoding = uleb128 +; CHECK-NEXT: .uleb128 .Lcst_end0-.Lcst_begin0 +; CHECK-NEXT: .Lcst_begin0: +; CHECK-NEXT: .uleb128 .Ltmp0-.Lfunc_begin0 # >> Call Site 1 << +; CHECK-NEXT: .uleb128 .Ltmp1-.Ltmp0 # Call between .Ltmp0 and .Ltmp1 +; CHECK-NEXT: .uleb128 .Ltmp2-.Lfunc_begin0 # jumps to .Ltmp2 +; CHECK-NEXT: .byte 1 # On action: 1 +; CHECK-NEXT: .uleb128 .Ltmp1-.Lfunc_begin0 # >> Call Site 2 << +; CHECK-NEXT: .uleb128 .Lfunc_end0-.Ltmp1 # Call between .Ltmp1 and .Lfunc_end0 +; CHECK-NEXT: .byte 0 # has no landing pad +; CHECK-NEXT: .byte 0 # On action: cleanup diff --git a/llvm/test/CodeGen/LoongArch/e_flags.ll b/llvm/test/CodeGen/LoongArch/e_flags.ll new file mode 100644 index 0000000000000000000000000000000000000000..c004d1f9cdf4d4dacb28b33acdb67cad95258d4e --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/e_flags.ll @@ -0,0 +1,34 @@ +; RUN: llc --mtriple=loongarch32 --filetype=obj %s -o %t-la32 +; RUN: llvm-readelf -h %t-la32 | FileCheck %s --check-prefixes=ILP32,ABI-D --match-full-lines + +; RUN: llc --mtriple=loongarch32 --filetype=obj %s --target-abi=ilp32s -o %t-ilp32s +; RUN: llvm-readelf -h %t-ilp32s | FileCheck %s --check-prefixes=ILP32,ABI-S --match-full-lines + +; RUN: llc --mtriple=loongarch32 --filetype=obj %s --target-abi=ilp32f -o %t-ilp32f +; RUN: llvm-readelf -h %t-ilp32f | FileCheck %s --check-prefixes=ILP32,ABI-F --match-full-lines + +; RUN: llc --mtriple=loongarch32 --filetype=obj %s --target-abi=ilp32d -o %t-ilp32d +; RUN: llvm-readelf -h %t-ilp32d | FileCheck %s --check-prefixes=ILP32,ABI-D --match-full-lines + +; RUN: llc --mtriple=loongarch64 --filetype=obj %s -o %t-la64 +; RUN: llvm-readelf -h %t-la64 | FileCheck %s --check-prefixes=LP64,ABI-D --match-full-lines + +; RUN: llc --mtriple=loongarch64 --filetype=obj %s --target-abi=lp64s -o %t-lp64s +; RUN: llvm-readelf -h %t-lp64s | FileCheck %s --check-prefixes=LP64,ABI-S --match-full-lines + +; RUN: llc --mtriple=loongarch64 --filetype=obj %s --target-abi=lp64f -o %t-lp64f +; RUN: llvm-readelf -h %t-lp64f | FileCheck %s --check-prefixes=LP64,ABI-F --match-full-lines + +; RUN: llc --mtriple=loongarch64 --filetype=obj %s --mattr=+d --target-abi=lp64d -o %t-lp64d +; RUN: llvm-readelf -h %t-lp64d | FileCheck %s --check-prefixes=LP64,ABI-D --match-full-lines + +; LP64: Class: ELF64 +; ILP32: Class: ELF32 + +; ABI-S: Flags: 0x41, SOFT-FLOAT, OBJ-v1 +; ABI-F: Flags: 0x42, SINGLE-FLOAT, OBJ-v1 +; ABI-D: Flags: 0x43, DOUBLE-FLOAT, OBJ-v1 + +define void @foo() { + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/eh-dwarf-cfa.ll b/llvm/test/CodeGen/LoongArch/eh-dwarf-cfa.ll new file mode 100644 index 0000000000000000000000000000000000000000..796ada3a1a0248602cbb8874d3f187ff275cc9c7 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/eh-dwarf-cfa.ll @@ -0,0 +1,37 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 < %s | FileCheck --check-prefix=LA32 %s +; RUN: llc --mtriple=loongarch64 < %s | FileCheck --check-prefix=LA64 %s + +define void @dwarf() { +; LA32-LABEL: dwarf: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: .cfi_def_cfa_offset 16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: .cfi_offset 1, -4 +; LA32-NEXT: addi.w $a0, $sp, 16 +; LA32-NEXT: bl %plt(foo) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: dwarf: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: .cfi_def_cfa_offset 16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: .cfi_offset 1, -8 +; LA64-NEXT: addi.d $a0, $sp, 16 +; LA64-NEXT: bl %plt(foo) +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %0 = call ptr @llvm.eh.dwarf.cfa(i32 0) + call void @foo(ptr %0) + ret void +} + +declare void @foo(ptr) + +declare ptr @llvm.eh.dwarf.cfa(i32) nounwind diff --git a/llvm/test/CodeGen/LoongArch/emergency-spill-slot.ll b/llvm/test/CodeGen/LoongArch/emergency-spill-slot.ll new file mode 100644 index 0000000000000000000000000000000000000000..08426b07bf74ba2688b20d336dddfd93ead9236a --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/emergency-spill-slot.ll @@ -0,0 +1,101 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 -O0 < %s | FileCheck %s + +@var = external global i32 + +define void @func() { +; CHECK-LABEL: func: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $sp, $sp, -2048 +; CHECK-NEXT: addi.d $sp, $sp, -2048 +; CHECK-NEXT: addi.d $sp, $sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 4112 +; CHECK-NEXT: pcalau12i $a0, %got_pc_hi20(var) +; CHECK-NEXT: ld.d $a1, $a0, %got_pc_lo12(var) +; CHECK-NEXT: ld.w $t8, $a1, 0 +; CHECK-NEXT: ld.w $t7, $a1, 0 +; CHECK-NEXT: ld.w $t6, $a1, 0 +; CHECK-NEXT: ld.w $t5, $a1, 0 +; CHECK-NEXT: ld.w $t4, $a1, 0 +; CHECK-NEXT: ld.w $t3, $a1, 0 +; CHECK-NEXT: ld.w $t2, $a1, 0 +; CHECK-NEXT: ld.w $t1, $a1, 0 +; CHECK-NEXT: ld.w $t0, $a1, 0 +; CHECK-NEXT: ld.w $a7, $a1, 0 +; CHECK-NEXT: ld.w $a6, $a1, 0 +; CHECK-NEXT: ld.w $a5, $a1, 0 +; CHECK-NEXT: ld.w $a4, $a1, 0 +; CHECK-NEXT: ld.w $a3, $a1, 0 +; CHECK-NEXT: ld.w $a2, $a1, 0 +; CHECK-NEXT: ld.w $a0, $a1, 0 +; CHECK-NEXT: st.d $fp, $sp, 0 +; CHECK-NEXT: lu12i.w $fp, 1 +; CHECK-NEXT: ori $fp, $fp, 12 +; CHECK-NEXT: add.d $fp, $sp, $fp +; CHECK-NEXT: st.w $t8, $fp, 0 +; CHECK-NEXT: ld.d $fp, $sp, 0 +; CHECK-NEXT: st.w $t8, $a1, 0 +; CHECK-NEXT: st.w $t7, $a1, 0 +; CHECK-NEXT: st.w $t6, $a1, 0 +; CHECK-NEXT: st.w $t5, $a1, 0 +; CHECK-NEXT: st.w $t4, $a1, 0 +; CHECK-NEXT: st.w $t3, $a1, 0 +; CHECK-NEXT: st.w $t2, $a1, 0 +; CHECK-NEXT: st.w $t1, $a1, 0 +; CHECK-NEXT: st.w $t0, $a1, 0 +; CHECK-NEXT: st.w $a7, $a1, 0 +; CHECK-NEXT: st.w $a6, $a1, 0 +; CHECK-NEXT: st.w $a5, $a1, 0 +; CHECK-NEXT: st.w $a4, $a1, 0 +; CHECK-NEXT: st.w $a3, $a1, 0 +; CHECK-NEXT: st.w $a2, $a1, 0 +; CHECK-NEXT: st.w $a0, $a1, 0 +; CHECK-NEXT: lu12i.w $a0, 1 +; CHECK-NEXT: ori $a0, $a0, 16 +; CHECK-NEXT: add.d $sp, $sp, $a0 +; CHECK-NEXT: ret + %space = alloca i32, align 4 + %stackspace = alloca[1024 x i32], align 4 + + ;; Load values to increase register pressure. + %v0 = load volatile i32, ptr @var + %v1 = load volatile i32, ptr @var + %v2 = load volatile i32, ptr @var + %v3 = load volatile i32, ptr @var + %v4 = load volatile i32, ptr @var + %v5 = load volatile i32, ptr @var + %v6 = load volatile i32, ptr @var + %v7 = load volatile i32, ptr @var + %v8 = load volatile i32, ptr @var + %v9 = load volatile i32, ptr @var + %v10 = load volatile i32, ptr @var + %v11 = load volatile i32, ptr @var + %v12 = load volatile i32, ptr @var + %v13 = load volatile i32, ptr @var + %v14 = load volatile i32, ptr @var + %v15 = load volatile i32, ptr @var + + ;; Computing a stack-relative values needs an additional register. + ;; We should get an emergency spill/reload for this. + store volatile i32 %v0, ptr %space + + ;; store values so they are used. + store volatile i32 %v0, ptr @var + store volatile i32 %v1, ptr @var + store volatile i32 %v2, ptr @var + store volatile i32 %v3, ptr @var + store volatile i32 %v4, ptr @var + store volatile i32 %v5, ptr @var + store volatile i32 %v6, ptr @var + store volatile i32 %v7, ptr @var + store volatile i32 %v8, ptr @var + store volatile i32 %v9, ptr @var + store volatile i32 %v10, ptr @var + store volatile i32 %v11, ptr @var + store volatile i32 %v12, ptr @var + store volatile i32 %v13, ptr @var + store volatile i32 %v14, ptr @var + store volatile i32 %v15, ptr @var + + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/emutls.ll b/llvm/test/CodeGen/LoongArch/emutls.ll new file mode 100644 index 0000000000000000000000000000000000000000..86e73ea8709635afe8210f6034cb8e40046ac072 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/emutls.ll @@ -0,0 +1,147 @@ +; OHOS_LOCAL begin +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc --mtriple=loongarch32 --emulated-tls --relocation-model=pic < %s \ +; RUN: | FileCheck --check-prefix=LA32 %s +; RUN: llc --mtriple=loongarch64 --emulated-tls --relocation-model=pic < %s \ +; RUN: | FileCheck --check-prefix=LA64 %s + +@external_x = external thread_local global i32, align 8 +@y = thread_local global i8 7, align 2 +@internal_z = internal thread_local global i64 9, align 16 + +define ptr @get_external_x() nounwind { +; LA32-LABEL: get_external_x: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: pcalau12i $a0, %got_pc_hi20(__emutls_v.external_x) +; LA32-NEXT: ld.w $a0, $a0, %got_pc_lo12(__emutls_v.external_x) +; LA32-NEXT: bl %plt(__emutls_get_address) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: get_external_x: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: pcalau12i $a0, %got_pc_hi20(__emutls_v.external_x) +; LA64-NEXT: ld.d $a0, $a0, %got_pc_lo12(__emutls_v.external_x) +; LA64-NEXT: bl %plt(__emutls_get_address) +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + ret ptr @external_x +} + +define ptr @get_y() nounwind { +; LA32-LABEL: get_y: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: pcalau12i $a0, %got_pc_hi20(__emutls_v.y) +; LA32-NEXT: ld.w $a0, $a0, %got_pc_lo12(__emutls_v.y) +; LA32-NEXT: bl %plt(__emutls_get_address) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: get_y: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: pcalau12i $a0, %got_pc_hi20(__emutls_v.y) +; LA64-NEXT: ld.d $a0, $a0, %got_pc_lo12(__emutls_v.y) +; LA64-NEXT: bl %plt(__emutls_get_address) +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + ret ptr @y +} + +define ptr @get_internal_z() nounwind { +; LA32-LABEL: get_internal_z: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: pcalau12i $a0, %pc_hi20(__emutls_v.internal_z) +; LA32-NEXT: addi.w $a0, $a0, %pc_lo12(__emutls_v.internal_z) +; LA32-NEXT: bl %plt(__emutls_get_address) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: get_internal_z: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: pcalau12i $a0, %pc_hi20(__emutls_v.internal_z) +; LA64-NEXT: addi.d $a0, $a0, %pc_lo12(__emutls_v.internal_z) +; LA64-NEXT: bl %plt(__emutls_get_address) +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + ret ptr @internal_z +} + +; UTC_ARGS: --disable + +; LA32: .data +; LA32-NEXT: .globl __emutls_v.y +; LA32-NEXT: .p2align 2 +; LA32-NEXT: __emutls_v.y: +; LA32-NEXT: .word 1 +; LA32-NEXT: .word 2 +; LA32-NEXT: .word 0 +; LA32-NEXT: .word __emutls_t.y + +; LA32: .section .rodata, +; LA32-NEXT: .globl __emutls_t.y +; LA32-NEXT: .p2align 1 +; LA32-NEXT: __emutls_t.y: +; LA32-NEXT: .byte 7 + +; LA32: .data +; LA32-NEXT: .p2align 2 +; LA32-NEXT: __emutls_v.internal_z: +; LA32-NEXT: .word 8 +; LA32-NEXT: .word 16 +; LA32-NEXT: .word 0 +; LA32-NEXT: .word __emutls_t.internal_z + +; LA32: .section .rodata, +; LA32-NEXT: .p2align 4 +; LA32-NEXT: __emutls_t.internal_z: +; LA32-NEXT: .dword 9 + +; LA64: .data +; LA64-NEXT: .globl __emutls_v.y +; LA64-NEXT: .p2align 3 +; LA64-NEXT: __emutls_v.y: +; LA64-NEXT: .dword 1 +; LA64-NEXT: .dword 2 +; LA64-NEXT: .dword 0 +; LA64-NEXT: .dword __emutls_t.y + +; LA64: .section .rodata, +; LA64-NEXT: .globl __emutls_t.y +; LA64-NEXT: .p2align 1 +; LA64-NEXT: __emutls_t.y: +; LA64-NEXT: .byte 7 + +; LA64: .data +; LA64-NEXT: .p2align 3 +; LA64-NEXT: __emutls_v.internal_z: +; LA64-NEXT: .dword 8 +; LA64-NEXT: .dword 16 +; LA64-NEXT: .dword 0 +; LA64-NEXT: .dword __emutls_t.internal_z + +; LA64: .section .rodata, +; LA64-NEXT: .p2align 4 +; LA64-NEXT: __emutls_t.internal_z: +; LA64-NEXT: .dword 9 +; OHOS_LOCAL end \ No newline at end of file diff --git a/llvm/test/CodeGen/LoongArch/exception-pointer-register.ll b/llvm/test/CodeGen/LoongArch/exception-pointer-register.ll new file mode 100644 index 0000000000000000000000000000000000000000..797c7e520f5bb99ffc4708fd3700146a1a234eb1 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/exception-pointer-register.ll @@ -0,0 +1,120 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefix=LA32 +; RUN: llc --mtriple=loongarch64 --verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefix=LA64 + +declare void @foo(ptr %p); +declare void @bar(ptr %p); +declare dso_local i32 @__gxx_personality_v0(...) + +;; Before getExceptionPointerRegister() and getExceptionSelectorRegister() +;; lowering hooks were defined this would trigger an assertion during live +;; variable analysis. + +define void @caller(ptr %p) personality ptr @__gxx_personality_v0 { +; LA32-LABEL: caller: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: .cfi_def_cfa_offset 16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: st.w $fp, $sp, 8 # 4-byte Folded Spill +; LA32-NEXT: st.w $s0, $sp, 4 # 4-byte Folded Spill +; LA32-NEXT: .cfi_offset 1, -4 +; LA32-NEXT: .cfi_offset 22, -8 +; LA32-NEXT: .cfi_offset 23, -12 +; LA32-NEXT: move $fp, $a0 +; LA32-NEXT: beqz $a0, .LBB0_2 +; LA32-NEXT: # %bb.1: # %bb2 +; LA32-NEXT: .Ltmp0: +; LA32-NEXT: move $a0, $fp +; LA32-NEXT: bl %plt(bar) +; LA32-NEXT: .Ltmp1: +; LA32-NEXT: b .LBB0_3 +; LA32-NEXT: .LBB0_2: # %bb1 +; LA32-NEXT: .Ltmp2: +; LA32-NEXT: move $a0, $fp +; LA32-NEXT: bl %plt(foo) +; LA32-NEXT: .Ltmp3: +; LA32-NEXT: .LBB0_3: # %end2 +; LA32-NEXT: ld.w $s0, $sp, 4 # 4-byte Folded Reload +; LA32-NEXT: ld.w $fp, $sp, 8 # 4-byte Folded Reload +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; LA32-NEXT: .LBB0_4: # %lpad +; LA32-NEXT: .Ltmp4: +; LA32-NEXT: move $s0, $a0 +; LA32-NEXT: move $a0, $fp +; LA32-NEXT: bl callee +; LA32-NEXT: move $a0, $s0 +; LA32-NEXT: bl %plt(_Unwind_Resume) +; +; LA64-LABEL: caller: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -32 +; LA64-NEXT: .cfi_def_cfa_offset 32 +; LA64-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill +; LA64-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill +; LA64-NEXT: st.d $s0, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: .cfi_offset 1, -8 +; LA64-NEXT: .cfi_offset 22, -16 +; LA64-NEXT: .cfi_offset 23, -24 +; LA64-NEXT: move $fp, $a0 +; LA64-NEXT: beqz $a0, .LBB0_2 +; LA64-NEXT: # %bb.1: # %bb2 +; LA64-NEXT: .Ltmp0: +; LA64-NEXT: move $a0, $fp +; LA64-NEXT: bl %plt(bar) +; LA64-NEXT: .Ltmp1: +; LA64-NEXT: b .LBB0_3 +; LA64-NEXT: .LBB0_2: # %bb1 +; LA64-NEXT: .Ltmp2: +; LA64-NEXT: move $a0, $fp +; LA64-NEXT: bl %plt(foo) +; LA64-NEXT: .Ltmp3: +; LA64-NEXT: .LBB0_3: # %end2 +; LA64-NEXT: ld.d $s0, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload +; LA64-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 32 +; LA64-NEXT: ret +; LA64-NEXT: .LBB0_4: # %lpad +; LA64-NEXT: .Ltmp4: +; LA64-NEXT: move $s0, $a0 +; LA64-NEXT: move $a0, $fp +; LA64-NEXT: bl callee +; LA64-NEXT: move $a0, $s0 +; LA64-NEXT: bl %plt(_Unwind_Resume) +entry: + %0 = icmp eq ptr %p, null + br i1 %0, label %bb1, label %bb2 + +bb1: + invoke void @foo(ptr %p) to label %end1 unwind label %lpad + +bb2: + invoke void @bar(ptr %p) to label %end2 unwind label %lpad + +lpad: + %1 = landingpad { ptr, i32 } cleanup + call void @callee(ptr %p) + resume { ptr, i32 } %1 + +end1: + ret void + +end2: + ret void +} + +define internal void @callee(ptr %p) { +; LA32-LABEL: callee: +; LA32: # %bb.0: +; LA32-NEXT: ret +; +; LA64-LABEL: callee: +; LA64: # %bb.0: +; LA64-NEXT: ret + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/expand-call.ll b/llvm/test/CodeGen/LoongArch/expand-call.ll new file mode 100644 index 0000000000000000000000000000000000000000..86bf4292665b72c88b4ba5ab3cf056b9c327edda --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/expand-call.ll @@ -0,0 +1,16 @@ +; RUN: llc --mtriple=loongarch64 --stop-before loongarch-prera-expand-pseudo \ +; RUN: --verify-machineinstrs < %s | FileCheck %s --check-prefix=NOEXPAND +; RUN: llc --mtriple=loongarch64 --stop-after loongarch-prera-expand-pseudo \ +; RUN: --verify-machineinstrs < %s | FileCheck %s --check-prefix=EXPAND + +declare void @callee() + +define void @caller() nounwind { +; NOEXPAND-LABEL: name: caller +; NOEXPAND: PseudoCALL target-flags{{.*}}callee +; +; EXPAND-LABEL: name: caller +; EXPAND: BL target-flags{{.*}}callee + call void @callee() + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/fabs.ll b/llvm/test/CodeGen/LoongArch/fabs.ll new file mode 100644 index 0000000000000000000000000000000000000000..3f3dacd9b71bc33563e13f0874ed2998872e53b4 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/fabs.ll @@ -0,0 +1,56 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+f,-d < %s | FileCheck %s --check-prefix=LA32F +; RUN: llc --mtriple=loongarch32 --mattr=+d < %s | FileCheck %s --check-prefix=LA32D +; RUN: llc --mtriple=loongarch64 --mattr=+f,-d < %s | FileCheck %s --check-prefix=LA64F +; RUN: llc --mtriple=loongarch64 --mattr=+d < %s | FileCheck %s --check-prefix=LA64D + +declare float @llvm.fabs.f32(float) +declare double @llvm.fabs.f64(double) + +define float @fabs_f32(float %a) nounwind { +; LA32F-LABEL: fabs_f32: +; LA32F: # %bb.0: +; LA32F-NEXT: fabs.s $fa0, $fa0 +; LA32F-NEXT: ret +; +; LA32D-LABEL: fabs_f32: +; LA32D: # %bb.0: +; LA32D-NEXT: fabs.s $fa0, $fa0 +; LA32D-NEXT: ret +; +; LA64F-LABEL: fabs_f32: +; LA64F: # %bb.0: +; LA64F-NEXT: fabs.s $fa0, $fa0 +; LA64F-NEXT: ret +; +; LA64D-LABEL: fabs_f32: +; LA64D: # %bb.0: +; LA64D-NEXT: fabs.s $fa0, $fa0 +; LA64D-NEXT: ret + %1 = call float @llvm.fabs.f32(float %a) + ret float %1 +} + +define double @fabs_f64(double %a) nounwind { +; LA32F-LABEL: fabs_f64: +; LA32F: # %bb.0: +; LA32F-NEXT: bstrpick.w $a1, $a1, 30, 0 +; LA32F-NEXT: ret +; +; LA32D-LABEL: fabs_f64: +; LA32D: # %bb.0: +; LA32D-NEXT: fabs.d $fa0, $fa0 +; LA32D-NEXT: ret +; +; LA64F-LABEL: fabs_f64: +; LA64F: # %bb.0: +; LA64F-NEXT: bstrpick.d $a0, $a0, 62, 0 +; LA64F-NEXT: ret +; +; LA64D-LABEL: fabs_f64: +; LA64D: # %bb.0: +; LA64D-NEXT: fabs.d $fa0, $fa0 +; LA64D-NEXT: ret + %1 = call double @llvm.fabs.f64(double %a) + ret double %1 +} diff --git a/llvm/test/CodeGen/LoongArch/fcopysign.ll b/llvm/test/CodeGen/LoongArch/fcopysign.ll new file mode 100644 index 0000000000000000000000000000000000000000..181130d2c6a5f0e1ad0909b3094b15ea4be02a5f --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/fcopysign.ll @@ -0,0 +1,123 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+f,-d < %s | FileCheck %s --check-prefix=LA32F +; RUN: llc --mtriple=loongarch32 --mattr=+d < %s | FileCheck %s --check-prefix=LA32D +; RUN: llc --mtriple=loongarch64 --mattr=+f,-d < %s | FileCheck %s --check-prefix=LA64F +; RUN: llc --mtriple=loongarch64 --mattr=+d < %s | FileCheck %s --check-prefix=LA64D + +declare float @llvm.copysign.f32(float, float) +declare double @llvm.copysign.f64(double, double) + +define float @fcopysign_s(float %a, float %b) nounwind { +; LA32F-LABEL: fcopysign_s: +; LA32F: # %bb.0: +; LA32F-NEXT: fcopysign.s $fa0, $fa0, $fa1 +; LA32F-NEXT: ret +; +; LA32D-LABEL: fcopysign_s: +; LA32D: # %bb.0: +; LA32D-NEXT: fcopysign.s $fa0, $fa0, $fa1 +; LA32D-NEXT: ret +; +; LA64F-LABEL: fcopysign_s: +; LA64F: # %bb.0: +; LA64F-NEXT: fcopysign.s $fa0, $fa0, $fa1 +; LA64F-NEXT: ret +; +; LA64D-LABEL: fcopysign_s: +; LA64D: # %bb.0: +; LA64D-NEXT: fcopysign.s $fa0, $fa0, $fa1 +; LA64D-NEXT: ret + %1 = call float @llvm.copysign.f32(float %a, float %b) + ret float %1 +} + +define double @fcopysign_d(double %a, double %b) nounwind { +; LA32F-LABEL: fcopysign_d: +; LA32F: # %bb.0: +; LA32F-NEXT: srli.w $a2, $a3, 31 +; LA32F-NEXT: bstrins.w $a1, $a2, 31, 31 +; LA32F-NEXT: ret +; +; LA32D-LABEL: fcopysign_d: +; LA32D: # %bb.0: +; LA32D-NEXT: fcopysign.d $fa0, $fa0, $fa1 +; LA32D-NEXT: ret +; +; LA64F-LABEL: fcopysign_d: +; LA64F: # %bb.0: +; LA64F-NEXT: srli.d $a1, $a1, 63 +; LA64F-NEXT: bstrins.d $a0, $a1, 63, 63 +; LA64F-NEXT: ret +; +; LA64D-LABEL: fcopysign_d: +; LA64D: # %bb.0: +; LA64D-NEXT: fcopysign.d $fa0, $fa0, $fa1 +; LA64D-NEXT: ret + %1 = call double @llvm.copysign.f64(double %a, double %b) + ret double %1 +} + +define double @fold_promote_d_s(double %a, float %b) nounwind { +; LA32F-LABEL: fold_promote_d_s: +; LA32F: # %bb.0: +; LA32F-NEXT: movfr2gr.s $a2, $fa0 +; LA32F-NEXT: srli.w $a2, $a2, 31 +; LA32F-NEXT: bstrins.w $a1, $a2, 31, 31 +; LA32F-NEXT: ret +; +; LA32D-LABEL: fold_promote_d_s: +; LA32D: # %bb.0: +; LA32D-NEXT: fcvt.d.s $fa1, $fa1 +; LA32D-NEXT: fcopysign.d $fa0, $fa0, $fa1 +; LA32D-NEXT: ret +; +; LA64F-LABEL: fold_promote_d_s: +; LA64F: # %bb.0: +; LA64F-NEXT: lu12i.w $a1, -524288 +; LA64F-NEXT: lu32i.d $a1, 0 +; LA64F-NEXT: movfr2gr.s $a2, $fa0 +; LA64F-NEXT: and $a1, $a2, $a1 +; LA64F-NEXT: slli.d $a1, $a1, 32 +; LA64F-NEXT: bstrins.d $a1, $a0, 62, 0 +; LA64F-NEXT: move $a0, $a1 +; LA64F-NEXT: ret +; +; LA64D-LABEL: fold_promote_d_s: +; LA64D: # %bb.0: +; LA64D-NEXT: fcvt.d.s $fa1, $fa1 +; LA64D-NEXT: fcopysign.d $fa0, $fa0, $fa1 +; LA64D-NEXT: ret + %c = fpext float %b to double + %t = call double @llvm.copysign.f64(double %a, double %c) + ret double %t +} + +define float @fold_demote_s_d(float %a, double %b) nounwind { +; LA32F-LABEL: fold_demote_s_d: +; LA32F: # %bb.0: +; LA32F-NEXT: movgr2fr.w $fa1, $a1 +; LA32F-NEXT: fcopysign.s $fa0, $fa0, $fa1 +; LA32F-NEXT: ret +; +; LA32D-LABEL: fold_demote_s_d: +; LA32D: # %bb.0: +; LA32D-NEXT: fcvt.s.d $fa1, $fa1 +; LA32D-NEXT: fcopysign.s $fa0, $fa0, $fa1 +; LA32D-NEXT: ret +; +; LA64F-LABEL: fold_demote_s_d: +; LA64F: # %bb.0: +; LA64F-NEXT: srli.d $a0, $a0, 32 +; LA64F-NEXT: movgr2fr.w $fa1, $a0 +; LA64F-NEXT: fcopysign.s $fa0, $fa0, $fa1 +; LA64F-NEXT: ret +; +; LA64D-LABEL: fold_demote_s_d: +; LA64D: # %bb.0: +; LA64D-NEXT: fcvt.s.d $fa1, $fa1 +; LA64D-NEXT: fcopysign.s $fa0, $fa0, $fa1 +; LA64D-NEXT: ret + %c = fptrunc double %b to float + %t = call float @llvm.copysign.f32(float %a, float %c) + ret float %t +} diff --git a/llvm/test/CodeGen/LoongArch/feature-32bit.ll b/llvm/test/CodeGen/LoongArch/feature-32bit.ll new file mode 100644 index 0000000000000000000000000000000000000000..ef3cbd989503773c109d717a75d78fa9b63c6018 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/feature-32bit.ll @@ -0,0 +1,5 @@ +; RUN: llc --mtriple=loongarch64 --mattr=help 2>&1 | FileCheck %s +; RUN: llc --mtriple=loongarch32 --mattr=help 2>&1 | FileCheck %s + +; CHECK: Available features for this target: +; CHECK: 32bit - LA32 Basic Integer and Privilege Instruction Set. diff --git a/llvm/test/CodeGen/LoongArch/float-br-fcmp.ll b/llvm/test/CodeGen/LoongArch/float-br-fcmp.ll new file mode 100644 index 0000000000000000000000000000000000000000..91bfe4ec9b78647c5f223b3a5dd6ed859d97f83c --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/float-br-fcmp.ll @@ -0,0 +1,985 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+f,-d < %s | FileCheck %s --check-prefix=LA32 +; RUN: llc --mtriple=loongarch64 --mattr=+f,-d < %s | FileCheck %s --check-prefix=LA64 + +declare void @abort() + +define void @br_fcmp_oeq_bcnez_float(float %a, float %b) nounwind { +; LA32-LABEL: br_fcmp_oeq_bcnez_float: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: fcmp.ceq.s $fcc0, $fa0, $fa1 +; LA32-NEXT: bcnez $fcc0, .LBB0_2 +; LA32-NEXT: # %bb.1: # %if.else +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; LA32-NEXT: .LBB0_2: # %if.then +; LA32-NEXT: bl %plt(abort) +; +; LA64-LABEL: br_fcmp_oeq_bcnez_float: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: fcmp.ceq.s $fcc0, $fa0, $fa1 +; LA64-NEXT: bcnez $fcc0, .LBB0_2 +; LA64-NEXT: # %bb.1: # %if.else +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +; LA64-NEXT: .LBB0_2: # %if.then +; LA64-NEXT: bl %plt(abort) + %1 = fcmp oeq float %a, %b + br i1 %1, label %if.then, label %if.else +if.else: + ret void +if.then: + tail call void @abort() + unreachable +} + +define void @br_fcmp_oeq_bceqz_float(float %a, float %b) nounwind { +; LA32-LABEL: br_fcmp_oeq_bceqz_float: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: fcmp.cune.s $fcc0, $fa0, $fa1 +; LA32-NEXT: bceqz $fcc0, .LBB1_2 +; LA32-NEXT: # %bb.1: # %if.else +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; LA32-NEXT: .LBB1_2: # %if.then +; LA32-NEXT: bl %plt(abort) +; +; LA64-LABEL: br_fcmp_oeq_bceqz_float: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: fcmp.cune.s $fcc0, $fa0, $fa1 +; LA64-NEXT: bceqz $fcc0, .LBB1_2 +; LA64-NEXT: # %bb.1: # %if.else +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +; LA64-NEXT: .LBB1_2: # %if.then +; LA64-NEXT: bl %plt(abort) + %1 = fcmp oeq float %a, %b + br i1 %1, label %if.then, label %if.else +if.then: + tail call void @abort() + unreachable +if.else: + ret void +} + +define void @br_fcmp_ogt_bcnez_float(float %a, float %b) nounwind { +; LA32-LABEL: br_fcmp_ogt_bcnez_float: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: fcmp.clt.s $fcc0, $fa1, $fa0 +; LA32-NEXT: bcnez $fcc0, .LBB2_2 +; LA32-NEXT: # %bb.1: # %if.else +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; LA32-NEXT: .LBB2_2: # %if.then +; LA32-NEXT: bl %plt(abort) +; +; LA64-LABEL: br_fcmp_ogt_bcnez_float: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: fcmp.clt.s $fcc0, $fa1, $fa0 +; LA64-NEXT: bcnez $fcc0, .LBB2_2 +; LA64-NEXT: # %bb.1: # %if.else +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +; LA64-NEXT: .LBB2_2: # %if.then +; LA64-NEXT: bl %plt(abort) + %1 = fcmp ogt float %a, %b + br i1 %1, label %if.then, label %if.else +if.else: + ret void +if.then: + tail call void @abort() + unreachable +} + +define void @br_fcmp_ogt_bceqz_float(float %a, float %b) nounwind { +; LA32-LABEL: br_fcmp_ogt_bceqz_float: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: fcmp.cule.s $fcc0, $fa0, $fa1 +; LA32-NEXT: bceqz $fcc0, .LBB3_2 +; LA32-NEXT: # %bb.1: # %if.else +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; LA32-NEXT: .LBB3_2: # %if.then +; LA32-NEXT: bl %plt(abort) +; +; LA64-LABEL: br_fcmp_ogt_bceqz_float: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: fcmp.cule.s $fcc0, $fa0, $fa1 +; LA64-NEXT: bceqz $fcc0, .LBB3_2 +; LA64-NEXT: # %bb.1: # %if.else +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +; LA64-NEXT: .LBB3_2: # %if.then +; LA64-NEXT: bl %plt(abort) + %1 = fcmp ogt float %a, %b + br i1 %1, label %if.then, label %if.else +if.then: + tail call void @abort() + unreachable +if.else: + ret void +} + +define void @br_fcmp_oge_bcnez_float(float %a, float %b) nounwind { +; LA32-LABEL: br_fcmp_oge_bcnez_float: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: fcmp.cle.s $fcc0, $fa1, $fa0 +; LA32-NEXT: bcnez $fcc0, .LBB4_2 +; LA32-NEXT: # %bb.1: # %if.else +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; LA32-NEXT: .LBB4_2: # %if.then +; LA32-NEXT: bl %plt(abort) +; +; LA64-LABEL: br_fcmp_oge_bcnez_float: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: fcmp.cle.s $fcc0, $fa1, $fa0 +; LA64-NEXT: bcnez $fcc0, .LBB4_2 +; LA64-NEXT: # %bb.1: # %if.else +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +; LA64-NEXT: .LBB4_2: # %if.then +; LA64-NEXT: bl %plt(abort) + %1 = fcmp oge float %a, %b + br i1 %1, label %if.then, label %if.else +if.else: + ret void +if.then: + tail call void @abort() + unreachable +} + +define void @br_fcmp_oge_bceqz_float(float %a, float %b) nounwind { +; LA32-LABEL: br_fcmp_oge_bceqz_float: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: fcmp.cult.s $fcc0, $fa0, $fa1 +; LA32-NEXT: bceqz $fcc0, .LBB5_2 +; LA32-NEXT: # %bb.1: # %if.else +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; LA32-NEXT: .LBB5_2: # %if.then +; LA32-NEXT: bl %plt(abort) +; +; LA64-LABEL: br_fcmp_oge_bceqz_float: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: fcmp.cult.s $fcc0, $fa0, $fa1 +; LA64-NEXT: bceqz $fcc0, .LBB5_2 +; LA64-NEXT: # %bb.1: # %if.else +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +; LA64-NEXT: .LBB5_2: # %if.then +; LA64-NEXT: bl %plt(abort) + %1 = fcmp oge float %a, %b + br i1 %1, label %if.then, label %if.else +if.then: + tail call void @abort() + unreachable +if.else: + ret void +} + +define void @br_fcmp_olt_bcnez_float(float %a, float %b) nounwind { +; LA32-LABEL: br_fcmp_olt_bcnez_float: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: fcmp.clt.s $fcc0, $fa0, $fa1 +; LA32-NEXT: bcnez $fcc0, .LBB6_2 +; LA32-NEXT: # %bb.1: # %if.else +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; LA32-NEXT: .LBB6_2: # %if.then +; LA32-NEXT: bl %plt(abort) +; +; LA64-LABEL: br_fcmp_olt_bcnez_float: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: fcmp.clt.s $fcc0, $fa0, $fa1 +; LA64-NEXT: bcnez $fcc0, .LBB6_2 +; LA64-NEXT: # %bb.1: # %if.else +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +; LA64-NEXT: .LBB6_2: # %if.then +; LA64-NEXT: bl %plt(abort) + %1 = fcmp olt float %a, %b + br i1 %1, label %if.then, label %if.else +if.else: + ret void +if.then: + tail call void @abort() + unreachable +} + +define void @br_fcmp_olt_bceqz_float(float %a, float %b) nounwind { +; LA32-LABEL: br_fcmp_olt_bceqz_float: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: fcmp.cule.s $fcc0, $fa1, $fa0 +; LA32-NEXT: bceqz $fcc0, .LBB7_2 +; LA32-NEXT: # %bb.1: # %if.else +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; LA32-NEXT: .LBB7_2: # %if.then +; LA32-NEXT: bl %plt(abort) +; +; LA64-LABEL: br_fcmp_olt_bceqz_float: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: fcmp.cule.s $fcc0, $fa1, $fa0 +; LA64-NEXT: bceqz $fcc0, .LBB7_2 +; LA64-NEXT: # %bb.1: # %if.else +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +; LA64-NEXT: .LBB7_2: # %if.then +; LA64-NEXT: bl %plt(abort) + %1 = fcmp olt float %a, %b + br i1 %1, label %if.then, label %if.else +if.then: + tail call void @abort() + unreachable +if.else: + ret void +} + +define void @br_fcmp_ole_bcnez_float(float %a, float %b) nounwind { +; LA32-LABEL: br_fcmp_ole_bcnez_float: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: fcmp.cle.s $fcc0, $fa0, $fa1 +; LA32-NEXT: bcnez $fcc0, .LBB8_2 +; LA32-NEXT: # %bb.1: # %if.else +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; LA32-NEXT: .LBB8_2: # %if.then +; LA32-NEXT: bl %plt(abort) +; +; LA64-LABEL: br_fcmp_ole_bcnez_float: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: fcmp.cle.s $fcc0, $fa0, $fa1 +; LA64-NEXT: bcnez $fcc0, .LBB8_2 +; LA64-NEXT: # %bb.1: # %if.else +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +; LA64-NEXT: .LBB8_2: # %if.then +; LA64-NEXT: bl %plt(abort) + %1 = fcmp ole float %a, %b + br i1 %1, label %if.then, label %if.else +if.else: + ret void +if.then: + tail call void @abort() + unreachable +} + +define void @br_fcmp_ole_bceqz_float(float %a, float %b) nounwind { +; LA32-LABEL: br_fcmp_ole_bceqz_float: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: fcmp.cult.s $fcc0, $fa1, $fa0 +; LA32-NEXT: bceqz $fcc0, .LBB9_2 +; LA32-NEXT: # %bb.1: # %if.else +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; LA32-NEXT: .LBB9_2: # %if.then +; LA32-NEXT: bl %plt(abort) +; +; LA64-LABEL: br_fcmp_ole_bceqz_float: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: fcmp.cult.s $fcc0, $fa1, $fa0 +; LA64-NEXT: bceqz $fcc0, .LBB9_2 +; LA64-NEXT: # %bb.1: # %if.else +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +; LA64-NEXT: .LBB9_2: # %if.then +; LA64-NEXT: bl %plt(abort) + %1 = fcmp ole float %a, %b + br i1 %1, label %if.then, label %if.else +if.then: + tail call void @abort() + unreachable +if.else: + ret void +} + +define void @br_fcmp_one_bcnez_float(float %a, float %b) nounwind { +; LA32-LABEL: br_fcmp_one_bcnez_float: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: fcmp.cne.s $fcc0, $fa0, $fa1 +; LA32-NEXT: bcnez $fcc0, .LBB10_2 +; LA32-NEXT: # %bb.1: # %if.else +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; LA32-NEXT: .LBB10_2: # %if.then +; LA32-NEXT: bl %plt(abort) +; +; LA64-LABEL: br_fcmp_one_bcnez_float: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: fcmp.cne.s $fcc0, $fa0, $fa1 +; LA64-NEXT: bcnez $fcc0, .LBB10_2 +; LA64-NEXT: # %bb.1: # %if.else +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +; LA64-NEXT: .LBB10_2: # %if.then +; LA64-NEXT: bl %plt(abort) + %1 = fcmp one float %a, %b + br i1 %1, label %if.then, label %if.else +if.else: + ret void +if.then: + tail call void @abort() + unreachable +} + +define void @br_fcmp_one_bceqz_float(float %a, float %b) nounwind { +; LA32-LABEL: br_fcmp_one_bceqz_float: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: fcmp.cueq.s $fcc0, $fa0, $fa1 +; LA32-NEXT: bceqz $fcc0, .LBB11_2 +; LA32-NEXT: # %bb.1: # %if.else +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; LA32-NEXT: .LBB11_2: # %if.then +; LA32-NEXT: bl %plt(abort) +; +; LA64-LABEL: br_fcmp_one_bceqz_float: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: fcmp.cueq.s $fcc0, $fa0, $fa1 +; LA64-NEXT: bceqz $fcc0, .LBB11_2 +; LA64-NEXT: # %bb.1: # %if.else +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +; LA64-NEXT: .LBB11_2: # %if.then +; LA64-NEXT: bl %plt(abort) + %1 = fcmp one float %a, %b + br i1 %1, label %if.then, label %if.else +if.then: + tail call void @abort() + unreachable +if.else: + ret void +} + +define void @br_fcmp_ord_bcnez_float(float %a, float %b) nounwind { +; LA32-LABEL: br_fcmp_ord_bcnez_float: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: fcmp.cor.s $fcc0, $fa0, $fa1 +; LA32-NEXT: bcnez $fcc0, .LBB12_2 +; LA32-NEXT: # %bb.1: # %if.else +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; LA32-NEXT: .LBB12_2: # %if.then +; LA32-NEXT: bl %plt(abort) +; +; LA64-LABEL: br_fcmp_ord_bcnez_float: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: fcmp.cor.s $fcc0, $fa0, $fa1 +; LA64-NEXT: bcnez $fcc0, .LBB12_2 +; LA64-NEXT: # %bb.1: # %if.else +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +; LA64-NEXT: .LBB12_2: # %if.then +; LA64-NEXT: bl %plt(abort) + %1 = fcmp ord float %a, %b + br i1 %1, label %if.then, label %if.else +if.else: + ret void +if.then: + tail call void @abort() + unreachable +} + +define void @br_fcmp_ord_bceqz_float(float %a, float %b) nounwind { +; LA32-LABEL: br_fcmp_ord_bceqz_float: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: fcmp.cun.s $fcc0, $fa0, $fa1 +; LA32-NEXT: bceqz $fcc0, .LBB13_2 +; LA32-NEXT: # %bb.1: # %if.else +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; LA32-NEXT: .LBB13_2: # %if.then +; LA32-NEXT: bl %plt(abort) +; +; LA64-LABEL: br_fcmp_ord_bceqz_float: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: fcmp.cun.s $fcc0, $fa0, $fa1 +; LA64-NEXT: bceqz $fcc0, .LBB13_2 +; LA64-NEXT: # %bb.1: # %if.else +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +; LA64-NEXT: .LBB13_2: # %if.then +; LA64-NEXT: bl %plt(abort) + %1 = fcmp ord float %a, %b + br i1 %1, label %if.then, label %if.else +if.then: + tail call void @abort() + unreachable +if.else: + ret void +} + +define void @br_fcmp_ueq_bcnez_float(float %a, float %b) nounwind { +; LA32-LABEL: br_fcmp_ueq_bcnez_float: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: fcmp.cueq.s $fcc0, $fa0, $fa1 +; LA32-NEXT: bcnez $fcc0, .LBB14_2 +; LA32-NEXT: # %bb.1: # %if.else +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; LA32-NEXT: .LBB14_2: # %if.then +; LA32-NEXT: bl %plt(abort) +; +; LA64-LABEL: br_fcmp_ueq_bcnez_float: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: fcmp.cueq.s $fcc0, $fa0, $fa1 +; LA64-NEXT: bcnez $fcc0, .LBB14_2 +; LA64-NEXT: # %bb.1: # %if.else +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +; LA64-NEXT: .LBB14_2: # %if.then +; LA64-NEXT: bl %plt(abort) + %1 = fcmp ueq float %a, %b + br i1 %1, label %if.then, label %if.else +if.else: + ret void +if.then: + tail call void @abort() + unreachable +} + +define void @br_fcmp_ueq_bceqz_float(float %a, float %b) nounwind { +; LA32-LABEL: br_fcmp_ueq_bceqz_float: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: fcmp.cne.s $fcc0, $fa0, $fa1 +; LA32-NEXT: bceqz $fcc0, .LBB15_2 +; LA32-NEXT: # %bb.1: # %if.else +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; LA32-NEXT: .LBB15_2: # %if.then +; LA32-NEXT: bl %plt(abort) +; +; LA64-LABEL: br_fcmp_ueq_bceqz_float: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: fcmp.cne.s $fcc0, $fa0, $fa1 +; LA64-NEXT: bceqz $fcc0, .LBB15_2 +; LA64-NEXT: # %bb.1: # %if.else +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +; LA64-NEXT: .LBB15_2: # %if.then +; LA64-NEXT: bl %plt(abort) + %1 = fcmp ueq float %a, %b + br i1 %1, label %if.then, label %if.else +if.then: + tail call void @abort() + unreachable +if.else: + ret void +} + +define void @br_fcmp_ugt_bcnez_float(float %a, float %b) nounwind { +; LA32-LABEL: br_fcmp_ugt_bcnez_float: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: fcmp.cult.s $fcc0, $fa1, $fa0 +; LA32-NEXT: bcnez $fcc0, .LBB16_2 +; LA32-NEXT: # %bb.1: # %if.else +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; LA32-NEXT: .LBB16_2: # %if.then +; LA32-NEXT: bl %plt(abort) +; +; LA64-LABEL: br_fcmp_ugt_bcnez_float: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: fcmp.cult.s $fcc0, $fa1, $fa0 +; LA64-NEXT: bcnez $fcc0, .LBB16_2 +; LA64-NEXT: # %bb.1: # %if.else +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +; LA64-NEXT: .LBB16_2: # %if.then +; LA64-NEXT: bl %plt(abort) + %1 = fcmp ugt float %a, %b + br i1 %1, label %if.then, label %if.else +if.else: + ret void +if.then: + tail call void @abort() + unreachable +} + +define void @br_fcmp_ugt_bceqz_float(float %a, float %b) nounwind { +; LA32-LABEL: br_fcmp_ugt_bceqz_float: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: fcmp.cle.s $fcc0, $fa0, $fa1 +; LA32-NEXT: bceqz $fcc0, .LBB17_2 +; LA32-NEXT: # %bb.1: # %if.else +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; LA32-NEXT: .LBB17_2: # %if.then +; LA32-NEXT: bl %plt(abort) +; +; LA64-LABEL: br_fcmp_ugt_bceqz_float: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: fcmp.cle.s $fcc0, $fa0, $fa1 +; LA64-NEXT: bceqz $fcc0, .LBB17_2 +; LA64-NEXT: # %bb.1: # %if.else +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +; LA64-NEXT: .LBB17_2: # %if.then +; LA64-NEXT: bl %plt(abort) + %1 = fcmp ugt float %a, %b + br i1 %1, label %if.then, label %if.else +if.then: + tail call void @abort() + unreachable +if.else: + ret void +} + +define void @br_fcmp_uge_bcnez_float(float %a, float %b) nounwind { +; LA32-LABEL: br_fcmp_uge_bcnez_float: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: fcmp.cule.s $fcc0, $fa1, $fa0 +; LA32-NEXT: bcnez $fcc0, .LBB18_2 +; LA32-NEXT: # %bb.1: # %if.else +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; LA32-NEXT: .LBB18_2: # %if.then +; LA32-NEXT: bl %plt(abort) +; +; LA64-LABEL: br_fcmp_uge_bcnez_float: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: fcmp.cule.s $fcc0, $fa1, $fa0 +; LA64-NEXT: bcnez $fcc0, .LBB18_2 +; LA64-NEXT: # %bb.1: # %if.else +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +; LA64-NEXT: .LBB18_2: # %if.then +; LA64-NEXT: bl %plt(abort) + %1 = fcmp uge float %a, %b + br i1 %1, label %if.then, label %if.else +if.else: + ret void +if.then: + tail call void @abort() + unreachable +} + +define void @br_fcmp_uge_bceqz_float(float %a, float %b) nounwind { +; LA32-LABEL: br_fcmp_uge_bceqz_float: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: fcmp.clt.s $fcc0, $fa0, $fa1 +; LA32-NEXT: bceqz $fcc0, .LBB19_2 +; LA32-NEXT: # %bb.1: # %if.else +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; LA32-NEXT: .LBB19_2: # %if.then +; LA32-NEXT: bl %plt(abort) +; +; LA64-LABEL: br_fcmp_uge_bceqz_float: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: fcmp.clt.s $fcc0, $fa0, $fa1 +; LA64-NEXT: bceqz $fcc0, .LBB19_2 +; LA64-NEXT: # %bb.1: # %if.else +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +; LA64-NEXT: .LBB19_2: # %if.then +; LA64-NEXT: bl %plt(abort) + %1 = fcmp uge float %a, %b + br i1 %1, label %if.then, label %if.else +if.then: + tail call void @abort() + unreachable +if.else: + ret void +} + +define void @br_fcmp_ult_bcnez_float(float %a, float %b) nounwind { +; LA32-LABEL: br_fcmp_ult_bcnez_float: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: fcmp.cult.s $fcc0, $fa0, $fa1 +; LA32-NEXT: bcnez $fcc0, .LBB20_2 +; LA32-NEXT: # %bb.1: # %if.else +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; LA32-NEXT: .LBB20_2: # %if.then +; LA32-NEXT: bl %plt(abort) +; +; LA64-LABEL: br_fcmp_ult_bcnez_float: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: fcmp.cult.s $fcc0, $fa0, $fa1 +; LA64-NEXT: bcnez $fcc0, .LBB20_2 +; LA64-NEXT: # %bb.1: # %if.else +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +; LA64-NEXT: .LBB20_2: # %if.then +; LA64-NEXT: bl %plt(abort) + %1 = fcmp ult float %a, %b + br i1 %1, label %if.then, label %if.else +if.else: + ret void +if.then: + tail call void @abort() + unreachable +} + +define void @br_fcmp_ult_bceqz_float(float %a, float %b) nounwind { +; LA32-LABEL: br_fcmp_ult_bceqz_float: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: fcmp.cle.s $fcc0, $fa1, $fa0 +; LA32-NEXT: bceqz $fcc0, .LBB21_2 +; LA32-NEXT: # %bb.1: # %if.else +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; LA32-NEXT: .LBB21_2: # %if.then +; LA32-NEXT: bl %plt(abort) +; +; LA64-LABEL: br_fcmp_ult_bceqz_float: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: fcmp.cle.s $fcc0, $fa1, $fa0 +; LA64-NEXT: bceqz $fcc0, .LBB21_2 +; LA64-NEXT: # %bb.1: # %if.else +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +; LA64-NEXT: .LBB21_2: # %if.then +; LA64-NEXT: bl %plt(abort) + %1 = fcmp ult float %a, %b + br i1 %1, label %if.then, label %if.else +if.then: + tail call void @abort() + unreachable +if.else: + ret void +} + +define void @br_fcmp_ule_bcnez_float(float %a, float %b) nounwind { +; LA32-LABEL: br_fcmp_ule_bcnez_float: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: fcmp.cule.s $fcc0, $fa0, $fa1 +; LA32-NEXT: bcnez $fcc0, .LBB22_2 +; LA32-NEXT: # %bb.1: # %if.else +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; LA32-NEXT: .LBB22_2: # %if.then +; LA32-NEXT: bl %plt(abort) +; +; LA64-LABEL: br_fcmp_ule_bcnez_float: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: fcmp.cule.s $fcc0, $fa0, $fa1 +; LA64-NEXT: bcnez $fcc0, .LBB22_2 +; LA64-NEXT: # %bb.1: # %if.else +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +; LA64-NEXT: .LBB22_2: # %if.then +; LA64-NEXT: bl %plt(abort) + %1 = fcmp ule float %a, %b + br i1 %1, label %if.then, label %if.else +if.else: + ret void +if.then: + tail call void @abort() + unreachable +} + +define void @br_fcmp_ule_bceqz_float(float %a, float %b) nounwind { +; LA32-LABEL: br_fcmp_ule_bceqz_float: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: fcmp.clt.s $fcc0, $fa1, $fa0 +; LA32-NEXT: bceqz $fcc0, .LBB23_2 +; LA32-NEXT: # %bb.1: # %if.else +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; LA32-NEXT: .LBB23_2: # %if.then +; LA32-NEXT: bl %plt(abort) +; +; LA64-LABEL: br_fcmp_ule_bceqz_float: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: fcmp.clt.s $fcc0, $fa1, $fa0 +; LA64-NEXT: bceqz $fcc0, .LBB23_2 +; LA64-NEXT: # %bb.1: # %if.else +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +; LA64-NEXT: .LBB23_2: # %if.then +; LA64-NEXT: bl %plt(abort) + %1 = fcmp ule float %a, %b + br i1 %1, label %if.then, label %if.else +if.then: + tail call void @abort() + unreachable +if.else: + ret void +} + +define void @br_fcmp_une_bcnez_float(float %a, float %b) nounwind { +; LA32-LABEL: br_fcmp_une_bcnez_float: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: fcmp.cune.s $fcc0, $fa0, $fa1 +; LA32-NEXT: bcnez $fcc0, .LBB24_2 +; LA32-NEXT: # %bb.1: # %if.else +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; LA32-NEXT: .LBB24_2: # %if.then +; LA32-NEXT: bl %plt(abort) +; +; LA64-LABEL: br_fcmp_une_bcnez_float: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: fcmp.cune.s $fcc0, $fa0, $fa1 +; LA64-NEXT: bcnez $fcc0, .LBB24_2 +; LA64-NEXT: # %bb.1: # %if.else +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +; LA64-NEXT: .LBB24_2: # %if.then +; LA64-NEXT: bl %plt(abort) + %1 = fcmp une float %a, %b + br i1 %1, label %if.then, label %if.else +if.else: + ret void +if.then: + tail call void @abort() + unreachable +} + +define void @br_fcmp_une_bceqz_float(float %a, float %b) nounwind { +; LA32-LABEL: br_fcmp_une_bceqz_float: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: fcmp.ceq.s $fcc0, $fa0, $fa1 +; LA32-NEXT: bceqz $fcc0, .LBB25_2 +; LA32-NEXT: # %bb.1: # %if.else +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; LA32-NEXT: .LBB25_2: # %if.then +; LA32-NEXT: bl %plt(abort) +; +; LA64-LABEL: br_fcmp_une_bceqz_float: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: fcmp.ceq.s $fcc0, $fa0, $fa1 +; LA64-NEXT: bceqz $fcc0, .LBB25_2 +; LA64-NEXT: # %bb.1: # %if.else +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +; LA64-NEXT: .LBB25_2: # %if.then +; LA64-NEXT: bl %plt(abort) + %1 = fcmp une float %a, %b + br i1 %1, label %if.then, label %if.else +if.then: + tail call void @abort() + unreachable +if.else: + ret void +} + +define void @br_fcmp_uno_bcnez_float(float %a, float %b) nounwind { +; LA32-LABEL: br_fcmp_uno_bcnez_float: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: fcmp.cun.s $fcc0, $fa0, $fa1 +; LA32-NEXT: bcnez $fcc0, .LBB26_2 +; LA32-NEXT: # %bb.1: # %if.else +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; LA32-NEXT: .LBB26_2: # %if.then +; LA32-NEXT: bl %plt(abort) +; +; LA64-LABEL: br_fcmp_uno_bcnez_float: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: fcmp.cun.s $fcc0, $fa0, $fa1 +; LA64-NEXT: bcnez $fcc0, .LBB26_2 +; LA64-NEXT: # %bb.1: # %if.else +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +; LA64-NEXT: .LBB26_2: # %if.then +; LA64-NEXT: bl %plt(abort) + %1 = fcmp uno float %a, %b + br i1 %1, label %if.then, label %if.else +if.else: + ret void +if.then: + tail call void @abort() + unreachable +} + +define void @br_fcmp_uno_bceqz_float(float %a, float %b) nounwind { +; LA32-LABEL: br_fcmp_uno_bceqz_float: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: fcmp.cor.s $fcc0, $fa0, $fa1 +; LA32-NEXT: bceqz $fcc0, .LBB27_2 +; LA32-NEXT: # %bb.1: # %if.else +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; LA32-NEXT: .LBB27_2: # %if.then +; LA32-NEXT: bl %plt(abort) +; +; LA64-LABEL: br_fcmp_uno_bceqz_float: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: fcmp.cor.s $fcc0, $fa0, $fa1 +; LA64-NEXT: bceqz $fcc0, .LBB27_2 +; LA64-NEXT: # %bb.1: # %if.else +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +; LA64-NEXT: .LBB27_2: # %if.then +; LA64-NEXT: bl %plt(abort) + %1 = fcmp uno float %a, %b + br i1 %1, label %if.then, label %if.else +if.then: + tail call void @abort() + unreachable +if.else: + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/float-fcmp-strict.ll b/llvm/test/CodeGen/LoongArch/float-fcmp-strict.ll new file mode 100644 index 0000000000000000000000000000000000000000..0459d5019378f273837e68ff4ed91be327c6d9de --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/float-fcmp-strict.ll @@ -0,0 +1,243 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+f,-d < %s | FileCheck %s --check-prefix=LA32 +; RUN: llc --mtriple=loongarch64 --mattr=+f,-d < %s | FileCheck %s --check-prefix=LA64 + +declare i1 @llvm.experimental.constrained.fcmp.f32(float, float, metadata, metadata) + +define i32 @fcmp_oeq(float %a, float %b) nounwind strictfp { +; LA32-LABEL: fcmp_oeq: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.ceq.s $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_oeq: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.ceq.s $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"oeq", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_ogt(float %a, float %b) nounwind strictfp { +; LA32-LABEL: fcmp_ogt: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.clt.s $fcc0, $fa1, $fa0 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_ogt: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.clt.s $fcc0, $fa1, $fa0 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"ogt", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_oge(float %a, float %b) nounwind strictfp { +; LA32-LABEL: fcmp_oge: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.cle.s $fcc0, $fa1, $fa0 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_oge: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.cle.s $fcc0, $fa1, $fa0 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"oge", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_olt(float %a, float %b) nounwind strictfp { +; LA32-LABEL: fcmp_olt: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.clt.s $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_olt: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.clt.s $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"olt", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_ole(float %a, float %b) nounwind strictfp { +; LA32-LABEL: fcmp_ole: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.cle.s $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_ole: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.cle.s $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"ole", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_one(float %a, float %b) nounwind strictfp { +; LA32-LABEL: fcmp_one: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.cne.s $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_one: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.cne.s $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"one", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_ord(float %a, float %b) nounwind strictfp { +; LA32-LABEL: fcmp_ord: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.cor.s $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_ord: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.cor.s $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"ord", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_ueq(float %a, float %b) nounwind strictfp { +; LA32-LABEL: fcmp_ueq: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.cueq.s $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_ueq: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.cueq.s $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"ueq", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_ugt(float %a, float %b) nounwind strictfp { +; LA32-LABEL: fcmp_ugt: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.cult.s $fcc0, $fa1, $fa0 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_ugt: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.cult.s $fcc0, $fa1, $fa0 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"ugt", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_uge(float %a, float %b) nounwind strictfp { +; LA32-LABEL: fcmp_uge: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.cule.s $fcc0, $fa1, $fa0 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_uge: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.cule.s $fcc0, $fa1, $fa0 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"uge", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_ult(float %a, float %b) nounwind strictfp { +; LA32-LABEL: fcmp_ult: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.cult.s $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_ult: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.cult.s $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"ult", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_ule(float %a, float %b) nounwind strictfp { +; LA32-LABEL: fcmp_ule: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.cule.s $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_ule: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.cule.s $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"ule", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_une(float %a, float %b) nounwind strictfp { +; LA32-LABEL: fcmp_une: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.cune.s $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_une: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.cune.s $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"une", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_uno(float %a, float %b) nounwind strictfp { +; LA32-LABEL: fcmp_uno: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.cun.s $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_uno: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.cun.s $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"uno", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} diff --git a/llvm/test/CodeGen/LoongArch/float-fcmps-strict.ll b/llvm/test/CodeGen/LoongArch/float-fcmps-strict.ll new file mode 100644 index 0000000000000000000000000000000000000000..cad4d45c147ee44342975d25dc82b68ef481d5d2 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/float-fcmps-strict.ll @@ -0,0 +1,482 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+f,-d < %s | FileCheck %s --check-prefix=LA32 +; RUN: llc --mtriple=loongarch64 --mattr=+f,-d < %s | FileCheck %s --check-prefix=LA64 + +declare i1 @llvm.experimental.constrained.fcmps.f32(float, float, metadata, metadata) +declare i1 @llvm.experimental.constrained.fcmp.f32(float, float, metadata, metadata) + +define i32 @fcmps_oeq(float %a, float %b) nounwind strictfp { +; LA32-LABEL: fcmps_oeq: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.seq.s $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmps_oeq: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.seq.s $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmps.f32(float %a, float %b, metadata !"oeq", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmps_ogt(float %a, float %b) nounwind strictfp { +; LA32-LABEL: fcmps_ogt: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.slt.s $fcc0, $fa1, $fa0 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmps_ogt: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.slt.s $fcc0, $fa1, $fa0 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmps.f32(float %a, float %b, metadata !"ogt", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmps_oge(float %a, float %b) nounwind strictfp { +; LA32-LABEL: fcmps_oge: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.sle.s $fcc0, $fa1, $fa0 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmps_oge: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.sle.s $fcc0, $fa1, $fa0 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmps.f32(float %a, float %b, metadata !"oge", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmps_olt(float %a, float %b) nounwind strictfp { +; LA32-LABEL: fcmps_olt: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.slt.s $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmps_olt: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.slt.s $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmps.f32(float %a, float %b, metadata !"olt", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmps_ole(float %a, float %b) nounwind strictfp { +; LA32-LABEL: fcmps_ole: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.sle.s $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmps_ole: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.sle.s $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmps.f32(float %a, float %b, metadata !"ole", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmps_one(float %a, float %b) nounwind strictfp { +; LA32-LABEL: fcmps_one: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.sne.s $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmps_one: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.sne.s $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmps.f32(float %a, float %b, metadata !"one", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmps_ord(float %a, float %b) nounwind strictfp { +; LA32-LABEL: fcmps_ord: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.sor.s $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmps_ord: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.sor.s $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmps.f32(float %a, float %b, metadata !"ord", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmps_ueq(float %a, float %b) nounwind strictfp { +; LA32-LABEL: fcmps_ueq: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.sueq.s $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmps_ueq: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.sueq.s $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmps.f32(float %a, float %b, metadata !"ueq", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmps_ugt(float %a, float %b) nounwind strictfp { +; LA32-LABEL: fcmps_ugt: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.sult.s $fcc0, $fa1, $fa0 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmps_ugt: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.sult.s $fcc0, $fa1, $fa0 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmps.f32(float %a, float %b, metadata !"ugt", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmps_uge(float %a, float %b) nounwind strictfp { +; LA32-LABEL: fcmps_uge: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.sule.s $fcc0, $fa1, $fa0 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmps_uge: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.sule.s $fcc0, $fa1, $fa0 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmps.f32(float %a, float %b, metadata !"uge", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmps_ult(float %a, float %b) nounwind strictfp { +; LA32-LABEL: fcmps_ult: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.sult.s $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmps_ult: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.sult.s $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmps.f32(float %a, float %b, metadata !"ult", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmps_ule(float %a, float %b) nounwind strictfp { +; LA32-LABEL: fcmps_ule: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.sule.s $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmps_ule: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.sule.s $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmps.f32(float %a, float %b, metadata !"ule", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmps_une(float %a, float %b) nounwind strictfp { +; LA32-LABEL: fcmps_une: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.sune.s $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmps_une: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.sune.s $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmps.f32(float %a, float %b, metadata !"une", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmps_uno(float %a, float %b) nounwind strictfp { +; LA32-LABEL: fcmps_uno: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.sun.s $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmps_uno: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.sun.s $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmps.f32(float %a, float %b, metadata !"uno", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_oeq(float %a, float %b) nounwind strictfp { +; LA32-LABEL: fcmp_oeq: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.ceq.s $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_oeq: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.ceq.s $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"oeq", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_ogt(float %a, float %b) nounwind strictfp { +; LA32-LABEL: fcmp_ogt: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.clt.s $fcc0, $fa1, $fa0 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_ogt: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.clt.s $fcc0, $fa1, $fa0 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"ogt", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_oge(float %a, float %b) nounwind strictfp { +; LA32-LABEL: fcmp_oge: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.cle.s $fcc0, $fa1, $fa0 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_oge: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.cle.s $fcc0, $fa1, $fa0 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"oge", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_olt(float %a, float %b) nounwind strictfp { +; LA32-LABEL: fcmp_olt: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.clt.s $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_olt: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.clt.s $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"olt", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_ole(float %a, float %b) nounwind strictfp { +; LA32-LABEL: fcmp_ole: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.cle.s $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_ole: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.cle.s $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"ole", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_one(float %a, float %b) nounwind strictfp { +; LA32-LABEL: fcmp_one: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.cne.s $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_one: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.cne.s $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"one", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_ord(float %a, float %b) nounwind strictfp { +; LA32-LABEL: fcmp_ord: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.cor.s $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_ord: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.cor.s $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"ord", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_ueq(float %a, float %b) nounwind strictfp { +; LA32-LABEL: fcmp_ueq: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.cueq.s $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_ueq: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.cueq.s $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"ueq", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_ugt(float %a, float %b) nounwind strictfp { +; LA32-LABEL: fcmp_ugt: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.cult.s $fcc0, $fa1, $fa0 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_ugt: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.cult.s $fcc0, $fa1, $fa0 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"ugt", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_uge(float %a, float %b) nounwind strictfp { +; LA32-LABEL: fcmp_uge: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.cule.s $fcc0, $fa1, $fa0 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_uge: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.cule.s $fcc0, $fa1, $fa0 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"uge", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_ult(float %a, float %b) nounwind strictfp { +; LA32-LABEL: fcmp_ult: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.cult.s $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_ult: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.cult.s $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"ult", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_ule(float %a, float %b) nounwind strictfp { +; LA32-LABEL: fcmp_ule: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.cule.s $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_ule: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.cule.s $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"ule", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_une(float %a, float %b) nounwind strictfp { +; LA32-LABEL: fcmp_une: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.cune.s $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_une: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.cune.s $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"une", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_uno(float %a, float %b) nounwind strictfp { +; LA32-LABEL: fcmp_uno: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.cun.s $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_uno: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.cun.s $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"uno", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} diff --git a/llvm/test/CodeGen/LoongArch/float-fma.ll b/llvm/test/CodeGen/LoongArch/float-fma.ll new file mode 100644 index 0000000000000000000000000000000000000000..54dc56784006f1270a1d3003e4740fa27cf63b6f --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/float-fma.ll @@ -0,0 +1,887 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+f,-d --fp-contract=fast < %s \ +; RUN: | FileCheck %s --check-prefix=LA32-CONTRACT-FAST +; RUN: llc --mtriple=loongarch32 --mattr=+f,-d --fp-contract=on < %s \ +; RUN: | FileCheck %s --check-prefix=LA32-CONTRACT-ON +; RUN: llc --mtriple=loongarch32 --mattr=+f,-d --fp-contract=off < %s \ +; RUN: | FileCheck %s --check-prefix=LA32-CONTRACT-OFF +; RUN: llc --mtriple=loongarch64 --mattr=+f,-d --fp-contract=fast < %s \ +; RUN: | FileCheck %s --check-prefix=LA64-CONTRACT-FAST +; RUN: llc --mtriple=loongarch64 --mattr=+f,-d --fp-contract=on < %s \ +; RUN: | FileCheck %s --check-prefix=LA64-CONTRACT-ON +; RUN: llc --mtriple=loongarch64 --mattr=+f,-d --fp-contract=off < %s \ +; RUN: | FileCheck %s --check-prefix=LA64-CONTRACT-OFF + +define float @fmadd_s(float %a, float %b, float %c) nounwind { +; LA32-CONTRACT-FAST-LABEL: fmadd_s: +; LA32-CONTRACT-FAST: # %bb.0: +; LA32-CONTRACT-FAST-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: fmadd_s: +; LA32-CONTRACT-ON: # %bb.0: +; LA32-CONTRACT-ON-NEXT: fmul.s $fa0, $fa0, $fa1 +; LA32-CONTRACT-ON-NEXT: fadd.s $fa0, $fa0, $fa2 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: fmadd_s: +; LA32-CONTRACT-OFF: # %bb.0: +; LA32-CONTRACT-OFF-NEXT: fmul.s $fa0, $fa0, $fa1 +; LA32-CONTRACT-OFF-NEXT: fadd.s $fa0, $fa0, $fa2 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: fmadd_s: +; LA64-CONTRACT-FAST: # %bb.0: +; LA64-CONTRACT-FAST-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: fmadd_s: +; LA64-CONTRACT-ON: # %bb.0: +; LA64-CONTRACT-ON-NEXT: fmul.s $fa0, $fa0, $fa1 +; LA64-CONTRACT-ON-NEXT: fadd.s $fa0, $fa0, $fa2 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: fmadd_s: +; LA64-CONTRACT-OFF: # %bb.0: +; LA64-CONTRACT-OFF-NEXT: fmul.s $fa0, $fa0, $fa1 +; LA64-CONTRACT-OFF-NEXT: fadd.s $fa0, $fa0, $fa2 +; LA64-CONTRACT-OFF-NEXT: ret + %mul = fmul float %a, %b + %add = fadd float %mul, %c + ret float %add +} + +define float @fmsub_s(float %a, float %b, float %c) nounwind { +; LA32-CONTRACT-FAST-LABEL: fmsub_s: +; LA32-CONTRACT-FAST: # %bb.0: +; LA32-CONTRACT-FAST-NEXT: fmsub.s $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: fmsub_s: +; LA32-CONTRACT-ON: # %bb.0: +; LA32-CONTRACT-ON-NEXT: fmul.s $fa0, $fa0, $fa1 +; LA32-CONTRACT-ON-NEXT: fsub.s $fa0, $fa0, $fa2 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: fmsub_s: +; LA32-CONTRACT-OFF: # %bb.0: +; LA32-CONTRACT-OFF-NEXT: fmul.s $fa0, $fa0, $fa1 +; LA32-CONTRACT-OFF-NEXT: fsub.s $fa0, $fa0, $fa2 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: fmsub_s: +; LA64-CONTRACT-FAST: # %bb.0: +; LA64-CONTRACT-FAST-NEXT: fmsub.s $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: fmsub_s: +; LA64-CONTRACT-ON: # %bb.0: +; LA64-CONTRACT-ON-NEXT: fmul.s $fa0, $fa0, $fa1 +; LA64-CONTRACT-ON-NEXT: fsub.s $fa0, $fa0, $fa2 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: fmsub_s: +; LA64-CONTRACT-OFF: # %bb.0: +; LA64-CONTRACT-OFF-NEXT: fmul.s $fa0, $fa0, $fa1 +; LA64-CONTRACT-OFF-NEXT: fsub.s $fa0, $fa0, $fa2 +; LA64-CONTRACT-OFF-NEXT: ret + %mul = fmul float %a, %b + %sub = fsub float %mul, %c + ret float %sub +} + +define float @fnmadd_s(float %a, float %b, float %c) nounwind { +; LA32-CONTRACT-FAST-LABEL: fnmadd_s: +; LA32-CONTRACT-FAST: # %bb.0: +; LA32-CONTRACT-FAST-NEXT: fnmadd.s $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: fnmadd_s: +; LA32-CONTRACT-ON: # %bb.0: +; LA32-CONTRACT-ON-NEXT: fmul.s $fa0, $fa0, $fa1 +; LA32-CONTRACT-ON-NEXT: fadd.s $fa0, $fa0, $fa2 +; LA32-CONTRACT-ON-NEXT: fneg.s $fa0, $fa0 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: fnmadd_s: +; LA32-CONTRACT-OFF: # %bb.0: +; LA32-CONTRACT-OFF-NEXT: fmul.s $fa0, $fa0, $fa1 +; LA32-CONTRACT-OFF-NEXT: fadd.s $fa0, $fa0, $fa2 +; LA32-CONTRACT-OFF-NEXT: fneg.s $fa0, $fa0 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: fnmadd_s: +; LA64-CONTRACT-FAST: # %bb.0: +; LA64-CONTRACT-FAST-NEXT: fnmadd.s $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: fnmadd_s: +; LA64-CONTRACT-ON: # %bb.0: +; LA64-CONTRACT-ON-NEXT: fmul.s $fa0, $fa0, $fa1 +; LA64-CONTRACT-ON-NEXT: fadd.s $fa0, $fa0, $fa2 +; LA64-CONTRACT-ON-NEXT: fneg.s $fa0, $fa0 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: fnmadd_s: +; LA64-CONTRACT-OFF: # %bb.0: +; LA64-CONTRACT-OFF-NEXT: fmul.s $fa0, $fa0, $fa1 +; LA64-CONTRACT-OFF-NEXT: fadd.s $fa0, $fa0, $fa2 +; LA64-CONTRACT-OFF-NEXT: fneg.s $fa0, $fa0 +; LA64-CONTRACT-OFF-NEXT: ret + %mul = fmul float %a, %b + %add = fadd float %mul, %c + %negadd = fneg float %add + ret float %negadd +} + +define float @fnmadd_s_nsz(float %a, float %b, float %c) nounwind { +; LA32-CONTRACT-FAST-LABEL: fnmadd_s_nsz: +; LA32-CONTRACT-FAST: # %bb.0: +; LA32-CONTRACT-FAST-NEXT: fnmadd.s $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: fnmadd_s_nsz: +; LA32-CONTRACT-ON: # %bb.0: +; LA32-CONTRACT-ON-NEXT: fneg.s $fa0, $fa0 +; LA32-CONTRACT-ON-NEXT: fmul.s $fa0, $fa0, $fa1 +; LA32-CONTRACT-ON-NEXT: fsub.s $fa0, $fa0, $fa2 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: fnmadd_s_nsz: +; LA32-CONTRACT-OFF: # %bb.0: +; LA32-CONTRACT-OFF-NEXT: fneg.s $fa0, $fa0 +; LA32-CONTRACT-OFF-NEXT: fmul.s $fa0, $fa0, $fa1 +; LA32-CONTRACT-OFF-NEXT: fsub.s $fa0, $fa0, $fa2 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: fnmadd_s_nsz: +; LA64-CONTRACT-FAST: # %bb.0: +; LA64-CONTRACT-FAST-NEXT: fnmadd.s $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: fnmadd_s_nsz: +; LA64-CONTRACT-ON: # %bb.0: +; LA64-CONTRACT-ON-NEXT: fneg.s $fa0, $fa0 +; LA64-CONTRACT-ON-NEXT: fmul.s $fa0, $fa0, $fa1 +; LA64-CONTRACT-ON-NEXT: fsub.s $fa0, $fa0, $fa2 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: fnmadd_s_nsz: +; LA64-CONTRACT-OFF: # %bb.0: +; LA64-CONTRACT-OFF-NEXT: fneg.s $fa0, $fa0 +; LA64-CONTRACT-OFF-NEXT: fmul.s $fa0, $fa0, $fa1 +; LA64-CONTRACT-OFF-NEXT: fsub.s $fa0, $fa0, $fa2 +; LA64-CONTRACT-OFF-NEXT: ret + %nega = fneg nsz float %a + %negc = fneg nsz float %c + %mul = fmul nsz float %nega, %b + %add = fadd nsz float %mul, %negc + ret float %add +} + +;; Check that fnmadd.s is not emitted. +define float @not_fnmadd_s(float %a, float %b, float %c) nounwind { +; LA32-CONTRACT-FAST-LABEL: not_fnmadd_s: +; LA32-CONTRACT-FAST: # %bb.0: +; LA32-CONTRACT-FAST-NEXT: fneg.s $fa0, $fa0 +; LA32-CONTRACT-FAST-NEXT: fmsub.s $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: not_fnmadd_s: +; LA32-CONTRACT-ON: # %bb.0: +; LA32-CONTRACT-ON-NEXT: fneg.s $fa0, $fa0 +; LA32-CONTRACT-ON-NEXT: fmul.s $fa0, $fa0, $fa1 +; LA32-CONTRACT-ON-NEXT: fsub.s $fa0, $fa0, $fa2 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: not_fnmadd_s: +; LA32-CONTRACT-OFF: # %bb.0: +; LA32-CONTRACT-OFF-NEXT: fneg.s $fa0, $fa0 +; LA32-CONTRACT-OFF-NEXT: fmul.s $fa0, $fa0, $fa1 +; LA32-CONTRACT-OFF-NEXT: fsub.s $fa0, $fa0, $fa2 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: not_fnmadd_s: +; LA64-CONTRACT-FAST: # %bb.0: +; LA64-CONTRACT-FAST-NEXT: fneg.s $fa0, $fa0 +; LA64-CONTRACT-FAST-NEXT: fmsub.s $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: not_fnmadd_s: +; LA64-CONTRACT-ON: # %bb.0: +; LA64-CONTRACT-ON-NEXT: fneg.s $fa0, $fa0 +; LA64-CONTRACT-ON-NEXT: fmul.s $fa0, $fa0, $fa1 +; LA64-CONTRACT-ON-NEXT: fsub.s $fa0, $fa0, $fa2 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: not_fnmadd_s: +; LA64-CONTRACT-OFF: # %bb.0: +; LA64-CONTRACT-OFF-NEXT: fneg.s $fa0, $fa0 +; LA64-CONTRACT-OFF-NEXT: fmul.s $fa0, $fa0, $fa1 +; LA64-CONTRACT-OFF-NEXT: fsub.s $fa0, $fa0, $fa2 +; LA64-CONTRACT-OFF-NEXT: ret + %nega = fneg float %a + %negc = fneg float %c + %mul = fmul float %nega, %b + %add = fadd float %mul, %negc + ret float %add +} + +define float @fnmsub_s(float %a, float %b, float %c) nounwind { +; LA32-CONTRACT-FAST-LABEL: fnmsub_s: +; LA32-CONTRACT-FAST: # %bb.0: +; LA32-CONTRACT-FAST-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: fnmsub_s: +; LA32-CONTRACT-ON: # %bb.0: +; LA32-CONTRACT-ON-NEXT: fmul.s $fa0, $fa0, $fa1 +; LA32-CONTRACT-ON-NEXT: fsub.s $fa0, $fa2, $fa0 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: fnmsub_s: +; LA32-CONTRACT-OFF: # %bb.0: +; LA32-CONTRACT-OFF-NEXT: fmul.s $fa0, $fa0, $fa1 +; LA32-CONTRACT-OFF-NEXT: fsub.s $fa0, $fa2, $fa0 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: fnmsub_s: +; LA64-CONTRACT-FAST: # %bb.0: +; LA64-CONTRACT-FAST-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: fnmsub_s: +; LA64-CONTRACT-ON: # %bb.0: +; LA64-CONTRACT-ON-NEXT: fmul.s $fa0, $fa0, $fa1 +; LA64-CONTRACT-ON-NEXT: fsub.s $fa0, $fa2, $fa0 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: fnmsub_s: +; LA64-CONTRACT-OFF: # %bb.0: +; LA64-CONTRACT-OFF-NEXT: fmul.s $fa0, $fa0, $fa1 +; LA64-CONTRACT-OFF-NEXT: fsub.s $fa0, $fa2, $fa0 +; LA64-CONTRACT-OFF-NEXT: ret + %nega = fneg float %a + %mul = fmul float %nega, %b + %add = fadd float %mul, %c + ret float %add +} + +define float @contract_fmadd_s(float %a, float %b, float %c) nounwind { +; LA32-CONTRACT-FAST-LABEL: contract_fmadd_s: +; LA32-CONTRACT-FAST: # %bb.0: +; LA32-CONTRACT-FAST-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: contract_fmadd_s: +; LA32-CONTRACT-ON: # %bb.0: +; LA32-CONTRACT-ON-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: contract_fmadd_s: +; LA32-CONTRACT-OFF: # %bb.0: +; LA32-CONTRACT-OFF-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: contract_fmadd_s: +; LA64-CONTRACT-FAST: # %bb.0: +; LA64-CONTRACT-FAST-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: contract_fmadd_s: +; LA64-CONTRACT-ON: # %bb.0: +; LA64-CONTRACT-ON-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: contract_fmadd_s: +; LA64-CONTRACT-OFF: # %bb.0: +; LA64-CONTRACT-OFF-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-OFF-NEXT: ret + %mul = fmul contract float %a, %b + %add = fadd contract float %mul, %c + ret float %add +} + +define float @contract_fmsub_s(float %a, float %b, float %c) nounwind { +; LA32-CONTRACT-FAST-LABEL: contract_fmsub_s: +; LA32-CONTRACT-FAST: # %bb.0: +; LA32-CONTRACT-FAST-NEXT: fmsub.s $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: contract_fmsub_s: +; LA32-CONTRACT-ON: # %bb.0: +; LA32-CONTRACT-ON-NEXT: fmsub.s $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: contract_fmsub_s: +; LA32-CONTRACT-OFF: # %bb.0: +; LA32-CONTRACT-OFF-NEXT: fmsub.s $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: contract_fmsub_s: +; LA64-CONTRACT-FAST: # %bb.0: +; LA64-CONTRACT-FAST-NEXT: fmsub.s $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: contract_fmsub_s: +; LA64-CONTRACT-ON: # %bb.0: +; LA64-CONTRACT-ON-NEXT: fmsub.s $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: contract_fmsub_s: +; LA64-CONTRACT-OFF: # %bb.0: +; LA64-CONTRACT-OFF-NEXT: fmsub.s $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-OFF-NEXT: ret + %mul = fmul contract float %a, %b + %sub = fsub contract float %mul, %c + ret float %sub +} + +define float @contract_fnmadd_s(float %a, float %b, float %c) nounwind { +; LA32-CONTRACT-FAST-LABEL: contract_fnmadd_s: +; LA32-CONTRACT-FAST: # %bb.0: +; LA32-CONTRACT-FAST-NEXT: fnmadd.s $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: contract_fnmadd_s: +; LA32-CONTRACT-ON: # %bb.0: +; LA32-CONTRACT-ON-NEXT: fnmadd.s $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: contract_fnmadd_s: +; LA32-CONTRACT-OFF: # %bb.0: +; LA32-CONTRACT-OFF-NEXT: fnmadd.s $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: contract_fnmadd_s: +; LA64-CONTRACT-FAST: # %bb.0: +; LA64-CONTRACT-FAST-NEXT: fnmadd.s $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: contract_fnmadd_s: +; LA64-CONTRACT-ON: # %bb.0: +; LA64-CONTRACT-ON-NEXT: fnmadd.s $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: contract_fnmadd_s: +; LA64-CONTRACT-OFF: # %bb.0: +; LA64-CONTRACT-OFF-NEXT: fnmadd.s $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-OFF-NEXT: ret + %mul = fmul contract float %a, %b + %add = fadd contract float %mul, %c + %negadd = fneg contract float %add + ret float %negadd +} + +define float @contract_fnmadd_s_nsz(float %a, float %b, float %c) nounwind { +; LA32-CONTRACT-FAST-LABEL: contract_fnmadd_s_nsz: +; LA32-CONTRACT-FAST: # %bb.0: +; LA32-CONTRACT-FAST-NEXT: fnmadd.s $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: contract_fnmadd_s_nsz: +; LA32-CONTRACT-ON: # %bb.0: +; LA32-CONTRACT-ON-NEXT: fnmadd.s $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: contract_fnmadd_s_nsz: +; LA32-CONTRACT-OFF: # %bb.0: +; LA32-CONTRACT-OFF-NEXT: fnmadd.s $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: contract_fnmadd_s_nsz: +; LA64-CONTRACT-FAST: # %bb.0: +; LA64-CONTRACT-FAST-NEXT: fnmadd.s $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: contract_fnmadd_s_nsz: +; LA64-CONTRACT-ON: # %bb.0: +; LA64-CONTRACT-ON-NEXT: fnmadd.s $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: contract_fnmadd_s_nsz: +; LA64-CONTRACT-OFF: # %bb.0: +; LA64-CONTRACT-OFF-NEXT: fnmadd.s $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-OFF-NEXT: ret + %nega = fneg contract nsz float %a + %negc = fneg contract nsz float %c + %mul = fmul contract nsz float %nega, %b + %add = fadd contract nsz float %mul, %negc + ret float %add +} + +;; Check that fnmadd.s is not emitted. +define float @not_contract_fnmadd_s(float %a, float %b, float %c) nounwind { +; LA32-CONTRACT-FAST-LABEL: not_contract_fnmadd_s: +; LA32-CONTRACT-FAST: # %bb.0: +; LA32-CONTRACT-FAST-NEXT: fneg.s $fa0, $fa0 +; LA32-CONTRACT-FAST-NEXT: fmsub.s $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: not_contract_fnmadd_s: +; LA32-CONTRACT-ON: # %bb.0: +; LA32-CONTRACT-ON-NEXT: fneg.s $fa0, $fa0 +; LA32-CONTRACT-ON-NEXT: fmsub.s $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: not_contract_fnmadd_s: +; LA32-CONTRACT-OFF: # %bb.0: +; LA32-CONTRACT-OFF-NEXT: fneg.s $fa0, $fa0 +; LA32-CONTRACT-OFF-NEXT: fmsub.s $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: not_contract_fnmadd_s: +; LA64-CONTRACT-FAST: # %bb.0: +; LA64-CONTRACT-FAST-NEXT: fneg.s $fa0, $fa0 +; LA64-CONTRACT-FAST-NEXT: fmsub.s $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: not_contract_fnmadd_s: +; LA64-CONTRACT-ON: # %bb.0: +; LA64-CONTRACT-ON-NEXT: fneg.s $fa0, $fa0 +; LA64-CONTRACT-ON-NEXT: fmsub.s $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: not_contract_fnmadd_s: +; LA64-CONTRACT-OFF: # %bb.0: +; LA64-CONTRACT-OFF-NEXT: fneg.s $fa0, $fa0 +; LA64-CONTRACT-OFF-NEXT: fmsub.s $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-OFF-NEXT: ret + %nega = fneg contract float %a + %negc = fneg contract float %c + %mul = fmul contract float %nega, %b + %add = fadd contract float %mul, %negc + ret float %add +} + +define float @contract_fnmsub_s(float %a, float %b, float %c) nounwind { +; LA32-CONTRACT-FAST-LABEL: contract_fnmsub_s: +; LA32-CONTRACT-FAST: # %bb.0: +; LA32-CONTRACT-FAST-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: contract_fnmsub_s: +; LA32-CONTRACT-ON: # %bb.0: +; LA32-CONTRACT-ON-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: contract_fnmsub_s: +; LA32-CONTRACT-OFF: # %bb.0: +; LA32-CONTRACT-OFF-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: contract_fnmsub_s: +; LA64-CONTRACT-FAST: # %bb.0: +; LA64-CONTRACT-FAST-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: contract_fnmsub_s: +; LA64-CONTRACT-ON: # %bb.0: +; LA64-CONTRACT-ON-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: contract_fnmsub_s: +; LA64-CONTRACT-OFF: # %bb.0: +; LA64-CONTRACT-OFF-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-OFF-NEXT: ret + %nega = fneg contract float %a + %mul = fmul contract float %nega, %b + %add = fadd contract float %mul, %c + ret float %add +} + +declare float @llvm.fma.f64(float, float, float) + +define float @fmadd_s_intrinsics(float %a, float %b, float %c) nounwind { +; LA32-CONTRACT-FAST-LABEL: fmadd_s_intrinsics: +; LA32-CONTRACT-FAST: # %bb.0: +; LA32-CONTRACT-FAST-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: fmadd_s_intrinsics: +; LA32-CONTRACT-ON: # %bb.0: +; LA32-CONTRACT-ON-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: fmadd_s_intrinsics: +; LA32-CONTRACT-OFF: # %bb.0: +; LA32-CONTRACT-OFF-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: fmadd_s_intrinsics: +; LA64-CONTRACT-FAST: # %bb.0: +; LA64-CONTRACT-FAST-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: fmadd_s_intrinsics: +; LA64-CONTRACT-ON: # %bb.0: +; LA64-CONTRACT-ON-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: fmadd_s_intrinsics: +; LA64-CONTRACT-OFF: # %bb.0: +; LA64-CONTRACT-OFF-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-OFF-NEXT: ret + %fma = call float @llvm.fma.f64(float %a, float %b, float %c) + ret float %fma +} + +define float @fmsub_s_intrinsics(float %a, float %b, float %c) nounwind { +; LA32-CONTRACT-FAST-LABEL: fmsub_s_intrinsics: +; LA32-CONTRACT-FAST: # %bb.0: +; LA32-CONTRACT-FAST-NEXT: fmsub.s $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: fmsub_s_intrinsics: +; LA32-CONTRACT-ON: # %bb.0: +; LA32-CONTRACT-ON-NEXT: fmsub.s $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: fmsub_s_intrinsics: +; LA32-CONTRACT-OFF: # %bb.0: +; LA32-CONTRACT-OFF-NEXT: fmsub.s $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: fmsub_s_intrinsics: +; LA64-CONTRACT-FAST: # %bb.0: +; LA64-CONTRACT-FAST-NEXT: fmsub.s $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: fmsub_s_intrinsics: +; LA64-CONTRACT-ON: # %bb.0: +; LA64-CONTRACT-ON-NEXT: fmsub.s $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: fmsub_s_intrinsics: +; LA64-CONTRACT-OFF: # %bb.0: +; LA64-CONTRACT-OFF-NEXT: fmsub.s $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-OFF-NEXT: ret + %negc = fneg float %c + %fma = call float @llvm.fma.f64(float %a, float %b, float %negc) + ret float %fma +} + +define float @fnmadd_s_intrinsics(float %a, float %b, float %c) nounwind { +; LA32-CONTRACT-FAST-LABEL: fnmadd_s_intrinsics: +; LA32-CONTRACT-FAST: # %bb.0: +; LA32-CONTRACT-FAST-NEXT: fnmadd.s $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: fnmadd_s_intrinsics: +; LA32-CONTRACT-ON: # %bb.0: +; LA32-CONTRACT-ON-NEXT: fnmadd.s $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: fnmadd_s_intrinsics: +; LA32-CONTRACT-OFF: # %bb.0: +; LA32-CONTRACT-OFF-NEXT: fnmadd.s $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: fnmadd_s_intrinsics: +; LA64-CONTRACT-FAST: # %bb.0: +; LA64-CONTRACT-FAST-NEXT: fnmadd.s $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: fnmadd_s_intrinsics: +; LA64-CONTRACT-ON: # %bb.0: +; LA64-CONTRACT-ON-NEXT: fnmadd.s $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: fnmadd_s_intrinsics: +; LA64-CONTRACT-OFF: # %bb.0: +; LA64-CONTRACT-OFF-NEXT: fnmadd.s $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-OFF-NEXT: ret + %fma = call float @llvm.fma.f64(float %a, float %b, float %c) + %neg = fneg float %fma + ret float %neg +} + +define float @fnmadd_s_nsz_intrinsics(float %a, float %b, float %c) nounwind { +; LA32-CONTRACT-FAST-LABEL: fnmadd_s_nsz_intrinsics: +; LA32-CONTRACT-FAST: # %bb.0: +; LA32-CONTRACT-FAST-NEXT: fnmadd.s $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: fnmadd_s_nsz_intrinsics: +; LA32-CONTRACT-ON: # %bb.0: +; LA32-CONTRACT-ON-NEXT: fnmadd.s $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: fnmadd_s_nsz_intrinsics: +; LA32-CONTRACT-OFF: # %bb.0: +; LA32-CONTRACT-OFF-NEXT: fnmadd.s $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: fnmadd_s_nsz_intrinsics: +; LA64-CONTRACT-FAST: # %bb.0: +; LA64-CONTRACT-FAST-NEXT: fnmadd.s $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: fnmadd_s_nsz_intrinsics: +; LA64-CONTRACT-ON: # %bb.0: +; LA64-CONTRACT-ON-NEXT: fnmadd.s $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: fnmadd_s_nsz_intrinsics: +; LA64-CONTRACT-OFF: # %bb.0: +; LA64-CONTRACT-OFF-NEXT: fnmadd.s $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-OFF-NEXT: ret + %nega = fneg float %a + %negc = fneg float %c + %fma = call nsz float @llvm.fma.f64(float %nega, float %b, float %negc) + ret float %fma +} + +;; Check that fnmadd.s is not emitted. +define float @not_fnmadd_s_intrinsics(float %a, float %b, float %c) nounwind { +; LA32-CONTRACT-FAST-LABEL: not_fnmadd_s_intrinsics: +; LA32-CONTRACT-FAST: # %bb.0: +; LA32-CONTRACT-FAST-NEXT: fneg.s $fa0, $fa0 +; LA32-CONTRACT-FAST-NEXT: fmsub.s $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: not_fnmadd_s_intrinsics: +; LA32-CONTRACT-ON: # %bb.0: +; LA32-CONTRACT-ON-NEXT: fneg.s $fa0, $fa0 +; LA32-CONTRACT-ON-NEXT: fmsub.s $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: not_fnmadd_s_intrinsics: +; LA32-CONTRACT-OFF: # %bb.0: +; LA32-CONTRACT-OFF-NEXT: fneg.s $fa0, $fa0 +; LA32-CONTRACT-OFF-NEXT: fmsub.s $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: not_fnmadd_s_intrinsics: +; LA64-CONTRACT-FAST: # %bb.0: +; LA64-CONTRACT-FAST-NEXT: fneg.s $fa0, $fa0 +; LA64-CONTRACT-FAST-NEXT: fmsub.s $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: not_fnmadd_s_intrinsics: +; LA64-CONTRACT-ON: # %bb.0: +; LA64-CONTRACT-ON-NEXT: fneg.s $fa0, $fa0 +; LA64-CONTRACT-ON-NEXT: fmsub.s $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: not_fnmadd_s_intrinsics: +; LA64-CONTRACT-OFF: # %bb.0: +; LA64-CONTRACT-OFF-NEXT: fneg.s $fa0, $fa0 +; LA64-CONTRACT-OFF-NEXT: fmsub.s $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-OFF-NEXT: ret + %nega = fneg float %a + %negc = fneg float %c + %fma = call float @llvm.fma.f64(float %nega, float %b, float %negc) + ret float %fma +} + +define float @fnmsub_s_intrinsics(float %a, float %b, float %c) nounwind { +; LA32-CONTRACT-FAST-LABEL: fnmsub_s_intrinsics: +; LA32-CONTRACT-FAST: # %bb.0: +; LA32-CONTRACT-FAST-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: fnmsub_s_intrinsics: +; LA32-CONTRACT-ON: # %bb.0: +; LA32-CONTRACT-ON-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: fnmsub_s_intrinsics: +; LA32-CONTRACT-OFF: # %bb.0: +; LA32-CONTRACT-OFF-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: fnmsub_s_intrinsics: +; LA64-CONTRACT-FAST: # %bb.0: +; LA64-CONTRACT-FAST-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: fnmsub_s_intrinsics: +; LA64-CONTRACT-ON: # %bb.0: +; LA64-CONTRACT-ON-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: fnmsub_s_intrinsics: +; LA64-CONTRACT-OFF: # %bb.0: +; LA64-CONTRACT-OFF-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-OFF-NEXT: ret + %nega = fneg float %a + %fma = call float @llvm.fma.f64(float %nega, float %b, float %c) + ret float %fma +} + +define float @fnmsub_s_swap_intrinsics(float %a, float %b, float %c) nounwind { +; LA32-CONTRACT-FAST-LABEL: fnmsub_s_swap_intrinsics: +; LA32-CONTRACT-FAST: # %bb.0: +; LA32-CONTRACT-FAST-NEXT: fnmsub.s $fa0, $fa1, $fa0, $fa2 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: fnmsub_s_swap_intrinsics: +; LA32-CONTRACT-ON: # %bb.0: +; LA32-CONTRACT-ON-NEXT: fnmsub.s $fa0, $fa1, $fa0, $fa2 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: fnmsub_s_swap_intrinsics: +; LA32-CONTRACT-OFF: # %bb.0: +; LA32-CONTRACT-OFF-NEXT: fnmsub.s $fa0, $fa1, $fa0, $fa2 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: fnmsub_s_swap_intrinsics: +; LA64-CONTRACT-FAST: # %bb.0: +; LA64-CONTRACT-FAST-NEXT: fnmsub.s $fa0, $fa1, $fa0, $fa2 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: fnmsub_s_swap_intrinsics: +; LA64-CONTRACT-ON: # %bb.0: +; LA64-CONTRACT-ON-NEXT: fnmsub.s $fa0, $fa1, $fa0, $fa2 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: fnmsub_s_swap_intrinsics: +; LA64-CONTRACT-OFF: # %bb.0: +; LA64-CONTRACT-OFF-NEXT: fnmsub.s $fa0, $fa1, $fa0, $fa2 +; LA64-CONTRACT-OFF-NEXT: ret + %negb = fneg float %b + %fma = call float @llvm.fma.f64(float %a, float %negb, float %c) + ret float %fma +} + +define float @fmadd_s_contract(float %a, float %b, float %c) nounwind { +; LA32-CONTRACT-FAST-LABEL: fmadd_s_contract: +; LA32-CONTRACT-FAST: # %bb.0: +; LA32-CONTRACT-FAST-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: fmadd_s_contract: +; LA32-CONTRACT-ON: # %bb.0: +; LA32-CONTRACT-ON-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: fmadd_s_contract: +; LA32-CONTRACT-OFF: # %bb.0: +; LA32-CONTRACT-OFF-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: fmadd_s_contract: +; LA64-CONTRACT-FAST: # %bb.0: +; LA64-CONTRACT-FAST-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: fmadd_s_contract: +; LA64-CONTRACT-ON: # %bb.0: +; LA64-CONTRACT-ON-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: fmadd_s_contract: +; LA64-CONTRACT-OFF: # %bb.0: +; LA64-CONTRACT-OFF-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-OFF-NEXT: ret + %mul = fmul contract float %a, %b + %add = fadd contract float %mul, %c + ret float %add +} + +define float @fmsub_s_contract(float %a, float %b, float %c) nounwind { +; LA32-CONTRACT-FAST-LABEL: fmsub_s_contract: +; LA32-CONTRACT-FAST: # %bb.0: +; LA32-CONTRACT-FAST-NEXT: fmsub.s $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: fmsub_s_contract: +; LA32-CONTRACT-ON: # %bb.0: +; LA32-CONTRACT-ON-NEXT: fmsub.s $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: fmsub_s_contract: +; LA32-CONTRACT-OFF: # %bb.0: +; LA32-CONTRACT-OFF-NEXT: fmsub.s $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: fmsub_s_contract: +; LA64-CONTRACT-FAST: # %bb.0: +; LA64-CONTRACT-FAST-NEXT: fmsub.s $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: fmsub_s_contract: +; LA64-CONTRACT-ON: # %bb.0: +; LA64-CONTRACT-ON-NEXT: fmsub.s $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: fmsub_s_contract: +; LA64-CONTRACT-OFF: # %bb.0: +; LA64-CONTRACT-OFF-NEXT: fmsub.s $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-OFF-NEXT: ret + %mul = fmul contract float %a, %b + %sub = fsub contract float %mul, %c + ret float %sub +} + +define float @fnmadd_s_contract(float %a, float %b, float %c) nounwind { +; LA32-CONTRACT-FAST-LABEL: fnmadd_s_contract: +; LA32-CONTRACT-FAST: # %bb.0: +; LA32-CONTRACT-FAST-NEXT: fnmadd.s $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: fnmadd_s_contract: +; LA32-CONTRACT-ON: # %bb.0: +; LA32-CONTRACT-ON-NEXT: fnmadd.s $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: fnmadd_s_contract: +; LA32-CONTRACT-OFF: # %bb.0: +; LA32-CONTRACT-OFF-NEXT: fnmadd.s $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: fnmadd_s_contract: +; LA64-CONTRACT-FAST: # %bb.0: +; LA64-CONTRACT-FAST-NEXT: fnmadd.s $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: fnmadd_s_contract: +; LA64-CONTRACT-ON: # %bb.0: +; LA64-CONTRACT-ON-NEXT: fnmadd.s $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: fnmadd_s_contract: +; LA64-CONTRACT-OFF: # %bb.0: +; LA64-CONTRACT-OFF-NEXT: fnmadd.s $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-OFF-NEXT: ret + %mul = fmul contract float %a, %b + %add = fadd contract float %mul, %c + %negadd = fneg contract float %add + ret float %negadd +} + +define float @fnmsub_s_contract(float %a, float %b, float %c) nounwind { +; LA32-CONTRACT-FAST-LABEL: fnmsub_s_contract: +; LA32-CONTRACT-FAST: # %bb.0: +; LA32-CONTRACT-FAST-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: fnmsub_s_contract: +; LA32-CONTRACT-ON: # %bb.0: +; LA32-CONTRACT-ON-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: fnmsub_s_contract: +; LA32-CONTRACT-OFF: # %bb.0: +; LA32-CONTRACT-OFF-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: fnmsub_s_contract: +; LA64-CONTRACT-FAST: # %bb.0: +; LA64-CONTRACT-FAST-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: fnmsub_s_contract: +; LA64-CONTRACT-ON: # %bb.0: +; LA64-CONTRACT-ON-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: fnmsub_s_contract: +; LA64-CONTRACT-OFF: # %bb.0: +; LA64-CONTRACT-OFF-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-OFF-NEXT: ret + %mul = fmul contract float %a, %b + %sub = fsub contract float %c, %mul + ret float %sub +} diff --git a/llvm/test/CodeGen/LoongArch/float-imm.ll b/llvm/test/CodeGen/LoongArch/float-imm.ll index a6b542c29ed73da7e9e04960f1c4c87f04995432..e2cbf4bf9b3e874f0bbd8b7336359c14caca2d25 100644 --- a/llvm/test/CodeGen/LoongArch/float-imm.ll +++ b/llvm/test/CodeGen/LoongArch/float-imm.ll @@ -6,12 +6,12 @@ define float @f32_positive_zero() nounwind { ; LA32-LABEL: f32_positive_zero: ; LA32: # %bb.0: ; LA32-NEXT: movgr2fr.w $fa0, $zero -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: f32_positive_zero: ; LA64: # %bb.0: ; LA64-NEXT: movgr2fr.w $fa0, $zero -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret ret float 0.0 } @@ -20,30 +20,30 @@ define float @f32_negative_zero() nounwind { ; LA32: # %bb.0: ; LA32-NEXT: movgr2fr.w $fa0, $zero ; LA32-NEXT: fneg.s $fa0, $fa0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: f32_negative_zero: ; LA64: # %bb.0: ; LA64-NEXT: movgr2fr.w $fa0, $zero ; LA64-NEXT: fneg.s $fa0, $fa0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret ret float -0.0 } define float @f32_constant_pi() nounwind { ; LA32-LABEL: f32_constant_pi: ; LA32: # %bb.0: -; LA32-NEXT: pcalau12i $a0, .LCPI2_0 -; LA32-NEXT: addi.w $a0, $a0, .LCPI2_0 +; LA32-NEXT: pcalau12i $a0, %pc_hi20(.LCPI2_0) +; LA32-NEXT: addi.w $a0, $a0, %pc_lo12(.LCPI2_0) ; LA32-NEXT: fld.s $fa0, $a0, 0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: f32_constant_pi: ; LA64: # %bb.0: -; LA64-NEXT: pcalau12i $a0, .LCPI2_0 -; LA64-NEXT: addi.d $a0, $a0, .LCPI2_0 +; LA64-NEXT: pcalau12i $a0, %pc_hi20(.LCPI2_0) +; LA64-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI2_0) ; LA64-NEXT: fld.s $fa0, $a0, 0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret ret float 3.14159274101257324218750 } @@ -54,7 +54,7 @@ define float @f32_add_fimm1(float %a) nounwind { ; LA32-NEXT: movgr2fr.w $fa1, $a0 ; LA32-NEXT: ffint.s.w $fa1, $fa1 ; LA32-NEXT: fadd.s $fa0, $fa0, $fa1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: f32_add_fimm1: ; LA64: # %bb.0: @@ -62,7 +62,7 @@ define float @f32_add_fimm1(float %a) nounwind { ; LA64-NEXT: movgr2fr.w $fa1, $a0 ; LA64-NEXT: ffint.s.w $fa1, $fa1 ; LA64-NEXT: fadd.s $fa0, $fa0, $fa1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %1 = fadd float %a, 1.0 ret float %1 } @@ -73,13 +73,13 @@ define float @f32_positive_fimm1() nounwind { ; LA32-NEXT: addi.w $a0, $zero, 1 ; LA32-NEXT: movgr2fr.w $fa0, $a0 ; LA32-NEXT: ffint.s.w $fa0, $fa0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: f32_positive_fimm1: ; LA64: # %bb.0: ; LA64-NEXT: addi.w $a0, $zero, 1 ; LA64-NEXT: movgr2fr.w $fa0, $a0 ; LA64-NEXT: ffint.s.w $fa0, $fa0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret ret float 1.0 } diff --git a/llvm/test/CodeGen/LoongArch/fp-expand.ll b/llvm/test/CodeGen/LoongArch/fp-expand.ll new file mode 100644 index 0000000000000000000000000000000000000000..522aea33383f61e69be523fc78b648b27f87f90d --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/fp-expand.ll @@ -0,0 +1,272 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+d < %s | FileCheck %s --check-prefix=LA32 +; RUN: llc --mtriple=loongarch64 --mattr=+d < %s | FileCheck %s --check-prefix=LA64 + +;; TODO: Add more test cases after ABI implementation for ilp32f and lp64f. + +declare float @llvm.sin.f32(float) +declare float @llvm.cos.f32(float) +declare float @llvm.pow.f32(float, float) +declare double @llvm.sin.f64(double) +declare double @llvm.cos.f64(double) +declare double @llvm.pow.f64(double, double) + +define float @sin_f32(float %a) nounwind { +; LA32-LABEL: sin_f32: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: bl %plt(sinf) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: sin_f32: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: bl %plt(sinf) +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret + %1 = call float @llvm.sin.f32(float %a) + ret float %1 +} + +define float @cos_f32(float %a) nounwind { +; LA32-LABEL: cos_f32: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: bl %plt(cosf) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: cos_f32: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: bl %plt(cosf) +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret + %1 = call float @llvm.cos.f32(float %a) + ret float %1 +} + +define float @sincos_f32(float %a) nounwind { +; LA32-LABEL: sincos_f32: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -32 +; LA32-NEXT: st.w $ra, $sp, 28 # 4-byte Folded Spill +; LA32-NEXT: fst.d $fs0, $sp, 16 # 8-byte Folded Spill +; LA32-NEXT: fst.d $fs1, $sp, 8 # 8-byte Folded Spill +; LA32-NEXT: fmov.s $fs0, $fa0 +; LA32-NEXT: bl %plt(sinf) +; LA32-NEXT: fmov.s $fs1, $fa0 +; LA32-NEXT: fmov.s $fa0, $fs0 +; LA32-NEXT: bl %plt(cosf) +; LA32-NEXT: fadd.s $fa0, $fs1, $fa0 +; LA32-NEXT: fld.d $fs1, $sp, 8 # 8-byte Folded Reload +; LA32-NEXT: fld.d $fs0, $sp, 16 # 8-byte Folded Reload +; LA32-NEXT: ld.w $ra, $sp, 28 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 32 +; LA32-NEXT: ret +; +; LA64-LABEL: sincos_f32: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -32 +; LA64-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill +; LA64-NEXT: fst.d $fs0, $sp, 16 # 8-byte Folded Spill +; LA64-NEXT: fst.d $fs1, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: fmov.s $fs0, $fa0 +; LA64-NEXT: bl %plt(sinf) +; LA64-NEXT: fmov.s $fs1, $fa0 +; LA64-NEXT: fmov.s $fa0, $fs0 +; LA64-NEXT: bl %plt(cosf) +; LA64-NEXT: fadd.s $fa0, $fs1, $fa0 +; LA64-NEXT: fld.d $fs1, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: fld.d $fs0, $sp, 16 # 8-byte Folded Reload +; LA64-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 32 +; LA64-NEXT: ret + %1 = call float @llvm.sin.f32(float %a) + %2 = call float @llvm.cos.f32(float %a) + %3 = fadd float %1, %2 + ret float %3 +} + +define float @pow_f32(float %a, float %b) nounwind { +; LA32-LABEL: pow_f32: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: bl %plt(powf) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: pow_f32: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: bl %plt(powf) +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret + %1 = call float @llvm.pow.f32(float %a, float %b) + ret float %1 +} + +define float @frem_f32(float %a, float %b) nounwind { +; LA32-LABEL: frem_f32: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: bl %plt(fmodf) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: frem_f32: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: bl %plt(fmodf) +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret + %1 = frem float %a, %b + ret float %1 +} + +define double @sin_f64(double %a) nounwind { +; LA32-LABEL: sin_f64: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: bl %plt(sin) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: sin_f64: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: bl %plt(sin) +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret + %1 = call double @llvm.sin.f64(double %a) + ret double %1 +} + +define double @cos_f64(double %a) nounwind { +; LA32-LABEL: cos_f64: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: bl %plt(cos) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: cos_f64: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: bl %plt(cos) +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret + %1 = call double @llvm.cos.f64(double %a) + ret double %1 +} + +define double @sincos_f64(double %a) nounwind { +; LA32-LABEL: sincos_f64: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -32 +; LA32-NEXT: st.w $ra, $sp, 28 # 4-byte Folded Spill +; LA32-NEXT: fst.d $fs0, $sp, 16 # 8-byte Folded Spill +; LA32-NEXT: fst.d $fs1, $sp, 8 # 8-byte Folded Spill +; LA32-NEXT: fmov.d $fs0, $fa0 +; LA32-NEXT: bl %plt(sin) +; LA32-NEXT: fmov.d $fs1, $fa0 +; LA32-NEXT: fmov.d $fa0, $fs0 +; LA32-NEXT: bl %plt(cos) +; LA32-NEXT: fadd.d $fa0, $fs1, $fa0 +; LA32-NEXT: fld.d $fs1, $sp, 8 # 8-byte Folded Reload +; LA32-NEXT: fld.d $fs0, $sp, 16 # 8-byte Folded Reload +; LA32-NEXT: ld.w $ra, $sp, 28 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 32 +; LA32-NEXT: ret +; +; LA64-LABEL: sincos_f64: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -32 +; LA64-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill +; LA64-NEXT: fst.d $fs0, $sp, 16 # 8-byte Folded Spill +; LA64-NEXT: fst.d $fs1, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: fmov.d $fs0, $fa0 +; LA64-NEXT: bl %plt(sin) +; LA64-NEXT: fmov.d $fs1, $fa0 +; LA64-NEXT: fmov.d $fa0, $fs0 +; LA64-NEXT: bl %plt(cos) +; LA64-NEXT: fadd.d $fa0, $fs1, $fa0 +; LA64-NEXT: fld.d $fs1, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: fld.d $fs0, $sp, 16 # 8-byte Folded Reload +; LA64-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 32 +; LA64-NEXT: ret + %1 = call double @llvm.sin.f64(double %a) + %2 = call double @llvm.cos.f64(double %a) + %3 = fadd double %1, %2 + ret double %3 +} + +define double @pow_f64(double %a, double %b) nounwind { +; LA32-LABEL: pow_f64: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: bl %plt(pow) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: pow_f64: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: bl %plt(pow) +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret + %1 = call double @llvm.pow.f64(double %a, double %b) + ret double %1 +} + +define double @frem_f64(double %a, double %b) nounwind { +; LA32-LABEL: frem_f64: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: bl %plt(fmod) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: frem_f64: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: bl %plt(fmod) +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret + %1 = frem double %a, %b + ret double %1 +} diff --git a/llvm/test/CodeGen/LoongArch/fp-max-min.ll b/llvm/test/CodeGen/LoongArch/fp-max-min.ll new file mode 100644 index 0000000000000000000000000000000000000000..b2ca475b16ab43a3db03ef4379dccacc3b15428a --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/fp-max-min.ll @@ -0,0 +1,154 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+f,-d < %s | FileCheck %s --check-prefix=LA32F +; RUN: llc --mtriple=loongarch32 --mattr=+d < %s | FileCheck %s --check-prefix=LA32D +; RUN: llc --mtriple=loongarch64 --mattr=+f,-d < %s | FileCheck %s --check-prefix=LA64F +; RUN: llc --mtriple=loongarch64 --mattr=+d < %s | FileCheck %s --check-prefix=LA64D + +declare float @llvm.maxnum.f32(float, float) +declare double @llvm.maxnum.f64(double, double) +declare float @llvm.minnum.f32(float, float) +declare double @llvm.minnum.f64(double, double) + +define float @maxnum_float(float %x, float %y) { +; LA32F-LABEL: maxnum_float: +; LA32F: # %bb.0: +; LA32F-NEXT: fmax.s $fa1, $fa1, $fa1 +; LA32F-NEXT: fmax.s $fa0, $fa0, $fa0 +; LA32F-NEXT: fmax.s $fa0, $fa0, $fa1 +; LA32F-NEXT: ret +; +; LA32D-LABEL: maxnum_float: +; LA32D: # %bb.0: +; LA32D-NEXT: fmax.s $fa1, $fa1, $fa1 +; LA32D-NEXT: fmax.s $fa0, $fa0, $fa0 +; LA32D-NEXT: fmax.s $fa0, $fa0, $fa1 +; LA32D-NEXT: ret +; +; LA64F-LABEL: maxnum_float: +; LA64F: # %bb.0: +; LA64F-NEXT: fmax.s $fa1, $fa1, $fa1 +; LA64F-NEXT: fmax.s $fa0, $fa0, $fa0 +; LA64F-NEXT: fmax.s $fa0, $fa0, $fa1 +; LA64F-NEXT: ret +; +; LA64D-LABEL: maxnum_float: +; LA64D: # %bb.0: +; LA64D-NEXT: fmax.s $fa1, $fa1, $fa1 +; LA64D-NEXT: fmax.s $fa0, $fa0, $fa0 +; LA64D-NEXT: fmax.s $fa0, $fa0, $fa1 +; LA64D-NEXT: ret + %z = call float @llvm.maxnum.f32(float %x, float %y) + ret float %z +} + +define double @maxnum_double(double %x, double %y) { +; LA32F-LABEL: maxnum_double: +; LA32F: # %bb.0: +; LA32F-NEXT: addi.w $sp, $sp, -16 +; LA32F-NEXT: .cfi_def_cfa_offset 16 +; LA32F-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32F-NEXT: .cfi_offset 1, -4 +; LA32F-NEXT: bl %plt(fmax) +; LA32F-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32F-NEXT: addi.w $sp, $sp, 16 +; LA32F-NEXT: ret +; +; LA32D-LABEL: maxnum_double: +; LA32D: # %bb.0: +; LA32D-NEXT: fmax.d $fa1, $fa1, $fa1 +; LA32D-NEXT: fmax.d $fa0, $fa0, $fa0 +; LA32D-NEXT: fmax.d $fa0, $fa0, $fa1 +; LA32D-NEXT: ret +; +; LA64F-LABEL: maxnum_double: +; LA64F: # %bb.0: +; LA64F-NEXT: addi.d $sp, $sp, -16 +; LA64F-NEXT: .cfi_def_cfa_offset 16 +; LA64F-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64F-NEXT: .cfi_offset 1, -8 +; LA64F-NEXT: bl %plt(fmax) +; LA64F-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64F-NEXT: addi.d $sp, $sp, 16 +; LA64F-NEXT: ret +; +; LA64D-LABEL: maxnum_double: +; LA64D: # %bb.0: +; LA64D-NEXT: fmax.d $fa1, $fa1, $fa1 +; LA64D-NEXT: fmax.d $fa0, $fa0, $fa0 +; LA64D-NEXT: fmax.d $fa0, $fa0, $fa1 +; LA64D-NEXT: ret + %z = call double @llvm.maxnum.f64(double %x, double %y) + ret double %z +} + +define float @minnum_float(float %x, float %y) { +; LA32F-LABEL: minnum_float: +; LA32F: # %bb.0: +; LA32F-NEXT: fmax.s $fa1, $fa1, $fa1 +; LA32F-NEXT: fmax.s $fa0, $fa0, $fa0 +; LA32F-NEXT: fmin.s $fa0, $fa0, $fa1 +; LA32F-NEXT: ret +; +; LA32D-LABEL: minnum_float: +; LA32D: # %bb.0: +; LA32D-NEXT: fmax.s $fa1, $fa1, $fa1 +; LA32D-NEXT: fmax.s $fa0, $fa0, $fa0 +; LA32D-NEXT: fmin.s $fa0, $fa0, $fa1 +; LA32D-NEXT: ret +; +; LA64F-LABEL: minnum_float: +; LA64F: # %bb.0: +; LA64F-NEXT: fmax.s $fa1, $fa1, $fa1 +; LA64F-NEXT: fmax.s $fa0, $fa0, $fa0 +; LA64F-NEXT: fmin.s $fa0, $fa0, $fa1 +; LA64F-NEXT: ret +; +; LA64D-LABEL: minnum_float: +; LA64D: # %bb.0: +; LA64D-NEXT: fmax.s $fa1, $fa1, $fa1 +; LA64D-NEXT: fmax.s $fa0, $fa0, $fa0 +; LA64D-NEXT: fmin.s $fa0, $fa0, $fa1 +; LA64D-NEXT: ret + %z = call float @llvm.minnum.f32(float %x, float %y) + ret float %z +} + +define double @minnum_double(double %x, double %y) { +; LA32F-LABEL: minnum_double: +; LA32F: # %bb.0: +; LA32F-NEXT: addi.w $sp, $sp, -16 +; LA32F-NEXT: .cfi_def_cfa_offset 16 +; LA32F-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32F-NEXT: .cfi_offset 1, -4 +; LA32F-NEXT: bl %plt(fmin) +; LA32F-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32F-NEXT: addi.w $sp, $sp, 16 +; LA32F-NEXT: ret +; +; LA32D-LABEL: minnum_double: +; LA32D: # %bb.0: +; LA32D-NEXT: fmax.d $fa1, $fa1, $fa1 +; LA32D-NEXT: fmax.d $fa0, $fa0, $fa0 +; LA32D-NEXT: fmin.d $fa0, $fa0, $fa1 +; LA32D-NEXT: ret +; +; LA64F-LABEL: minnum_double: +; LA64F: # %bb.0: +; LA64F-NEXT: addi.d $sp, $sp, -16 +; LA64F-NEXT: .cfi_def_cfa_offset 16 +; LA64F-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64F-NEXT: .cfi_offset 1, -8 +; LA64F-NEXT: bl %plt(fmin) +; LA64F-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64F-NEXT: addi.d $sp, $sp, 16 +; LA64F-NEXT: ret +; +; LA64D-LABEL: minnum_double: +; LA64D: # %bb.0: +; LA64D-NEXT: fmax.d $fa1, $fa1, $fa1 +; LA64D-NEXT: fmax.d $fa0, $fa0, $fa0 +; LA64D-NEXT: fmin.d $fa0, $fa0, $fa1 +; LA64D-NEXT: ret + %z = call double @llvm.minnum.f64(double %x, double %y) + ret double %z +} diff --git a/llvm/test/CodeGen/LoongArch/fp-reciprocal.ll b/llvm/test/CodeGen/LoongArch/fp-reciprocal.ll new file mode 100644 index 0000000000000000000000000000000000000000..b858099839cacc4d662650333a3a78b8aade42d8 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/fp-reciprocal.ll @@ -0,0 +1,68 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+f,-d < %s | FileCheck %s --check-prefix=LA32F +; RUN: llc --mtriple=loongarch32 --mattr=+d < %s | FileCheck %s --check-prefix=LA32D +; RUN: llc --mtriple=loongarch64 --mattr=+f,-d < %s | FileCheck %s --check-prefix=LA64F +; RUN: llc --mtriple=loongarch64 --mattr=+d < %s | FileCheck %s --check-prefix=LA64D + + +define float @f32_reciprocal(float %a) nounwind { +; LA32F-LABEL: f32_reciprocal: +; LA32F: # %bb.0: +; LA32F-NEXT: frecip.s $fa0, $fa0 +; LA32F-NEXT: ret +; +; LA32D-LABEL: f32_reciprocal: +; LA32D: # %bb.0: +; LA32D-NEXT: frecip.s $fa0, $fa0 +; LA32D-NEXT: ret +; +; LA64F-LABEL: f32_reciprocal: +; LA64F: # %bb.0: +; LA64F-NEXT: frecip.s $fa0, $fa0 +; LA64F-NEXT: ret +; +; LA64D-LABEL: f32_reciprocal: +; LA64D: # %bb.0: +; LA64D-NEXT: frecip.s $fa0, $fa0 +; LA64D-NEXT: ret + %1 = fdiv float 1.0, %a + ret float %1 +} + +define double @f64_reciprocal(double %a) nounwind { +; LA32F-LABEL: f64_reciprocal: +; LA32F: # %bb.0: +; LA32F-NEXT: addi.w $sp, $sp, -16 +; LA32F-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32F-NEXT: move $a3, $a1 +; LA32F-NEXT: move $a2, $a0 +; LA32F-NEXT: lu12i.w $a1, 261888 +; LA32F-NEXT: move $a0, $zero +; LA32F-NEXT: bl %plt(__divdf3) +; LA32F-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32F-NEXT: addi.w $sp, $sp, 16 +; LA32F-NEXT: ret +; +; LA32D-LABEL: f64_reciprocal: +; LA32D: # %bb.0: +; LA32D-NEXT: frecip.d $fa0, $fa0 +; LA32D-NEXT: ret +; +; LA64F-LABEL: f64_reciprocal: +; LA64F: # %bb.0: +; LA64F-NEXT: addi.d $sp, $sp, -16 +; LA64F-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64F-NEXT: move $a1, $a0 +; LA64F-NEXT: lu52i.d $a0, $zero, 1023 +; LA64F-NEXT: bl %plt(__divdf3) +; LA64F-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64F-NEXT: addi.d $sp, $sp, 16 +; LA64F-NEXT: ret +; +; LA64D-LABEL: f64_reciprocal: +; LA64D: # %bb.0: +; LA64D-NEXT: frecip.d $fa0, $fa0 +; LA64D-NEXT: ret + %1 = fdiv double 1.0, %a + ret double %1 +} diff --git a/llvm/test/CodeGen/LoongArch/fp-trunc-store.ll b/llvm/test/CodeGen/LoongArch/fp-trunc-store.ll new file mode 100644 index 0000000000000000000000000000000000000000..84e52d9d18c726246d61edd6fb35e0871572b97e --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/fp-trunc-store.ll @@ -0,0 +1,51 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+f,-d < %s | FileCheck %s --check-prefix=LA32F +; RUN: llc --mtriple=loongarch32 --mattr=+d < %s | FileCheck %s --check-prefix=LA32D +; RUN: llc --mtriple=loongarch64 --mattr=+f,-d < %s | FileCheck %s --check-prefix=LA64F +; RUN: llc --mtriple=loongarch64 --mattr=+d < %s | FileCheck %s --check-prefix=LA64D + +define void @fp_trunc(ptr %a, double %b) nounwind { +; LA32F-LABEL: fp_trunc: +; LA32F: # %bb.0: +; LA32F-NEXT: addi.w $sp, $sp, -16 +; LA32F-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32F-NEXT: st.w $fp, $sp, 8 # 4-byte Folded Spill +; LA32F-NEXT: move $fp, $a0 +; LA32F-NEXT: move $a0, $a1 +; LA32F-NEXT: move $a1, $a2 +; LA32F-NEXT: bl %plt(__truncdfsf2) +; LA32F-NEXT: fst.s $fa0, $fp, 0 +; LA32F-NEXT: ld.w $fp, $sp, 8 # 4-byte Folded Reload +; LA32F-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32F-NEXT: addi.w $sp, $sp, 16 +; LA32F-NEXT: ret +; +; LA32D-LABEL: fp_trunc: +; LA32D: # %bb.0: +; LA32D-NEXT: fcvt.s.d $fa0, $fa0 +; LA32D-NEXT: fst.s $fa0, $a0, 0 +; LA32D-NEXT: ret +; +; LA64F-LABEL: fp_trunc: +; LA64F: # %bb.0: +; LA64F-NEXT: addi.d $sp, $sp, -16 +; LA64F-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64F-NEXT: st.d $fp, $sp, 0 # 8-byte Folded Spill +; LA64F-NEXT: move $fp, $a0 +; LA64F-NEXT: move $a0, $a1 +; LA64F-NEXT: bl %plt(__truncdfsf2) +; LA64F-NEXT: fst.s $fa0, $fp, 0 +; LA64F-NEXT: ld.d $fp, $sp, 0 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64F-NEXT: addi.d $sp, $sp, 16 +; LA64F-NEXT: ret +; +; LA64D-LABEL: fp_trunc: +; LA64D: # %bb.0: +; LA64D-NEXT: fcvt.s.d $fa0, $fa0 +; LA64D-NEXT: fst.s $fa0, $a0, 0 +; LA64D-NEXT: ret + %1 = fptrunc double %b to float + store float %1, ptr %a, align 4 + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/frame.ll b/llvm/test/CodeGen/LoongArch/frame.ll index e0aa7db13f728212d43976516c11026f63d766e7..2a9700522219c30c83b7956cd579f04bd0a2ec5d 100644 --- a/llvm/test/CodeGen/LoongArch/frame.ll +++ b/llvm/test/CodeGen/LoongArch/frame.ll @@ -1,7 +1,11 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s %struct.key_t = type { i32, [16 x i8] } +declare void @llvm.memset.p0i8.i64(ptr, i8, i64, i1) +declare void @test1(ptr) + define i32 @test() nounwind { ; CHECK-LABEL: test: ; CHECK: # %bb.0: @@ -10,13 +14,12 @@ define i32 @test() nounwind { ; CHECK-NEXT: st.w $zero, $sp, 16 ; CHECK-NEXT: st.d $zero, $sp, 8 ; CHECK-NEXT: st.d $zero, $sp, 0 -; CHECK-NEXT: addi.d $a0, $sp, 0 -; CHECK-NEXT: ori $a0, $a0, 4 -; CHECK-NEXT: bl test1 +; CHECK-NEXT: addi.d $a0, $sp, 4 +; CHECK-NEXT: bl %plt(test1) ; CHECK-NEXT: move $a0, $zero ; CHECK-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload ; CHECK-NEXT: addi.d $sp, $sp, 32 -; CHECK-NEXT: jirl $zero, $ra, 0 +; CHECK-NEXT: ret %key = alloca %struct.key_t, align 4 call void @llvm.memset.p0i8.i64(ptr %key, i8 0, i64 20, i1 false) %1 = getelementptr inbounds %struct.key_t, ptr %key, i64 0, i32 1, i64 0 @@ -24,6 +27,73 @@ define i32 @test() nounwind { ret i32 0 } -declare void @llvm.memset.p0i8.i64(ptr, i8, i64, i1) +;; Note: will create an emergency spill slot, if (!isInt<11>(StackSize)). +;; Should involve only one SP-adjusting addi per adjustment. +define void @test_large_frame_size_2032() { +; CHECK-LABEL: test_large_frame_size_2032: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $sp, $sp, -2032 +; CHECK-NEXT: .cfi_def_cfa_offset 2032 +; CHECK-NEXT: addi.d $sp, $sp, 2032 +; CHECK-NEXT: ret + %1 = alloca i8, i32 2016 ; + 16(emergency slot) = 2032 + ret void +} -declare void @test1(ptr) +;; Should involve two SP-adjusting addi's when adjusting SP up, but only one +;; when adjusting down. +define void @test_large_frame_size_2048() { +; CHECK-LABEL: test_large_frame_size_2048: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $sp, $sp, -2048 +; CHECK-NEXT: .cfi_def_cfa_offset 2048 +; CHECK-NEXT: addi.d $sp, $sp, 2032 +; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: ret + %1 = alloca i8, i32 2032 ; + 16(emergency slot) = 2048 + ret void +} + +;; Should involve two SP-adjusting addi's per adjustment. +define void @test_large_frame_size_2064() { +; CHECK-LABEL: test_large_frame_size_2064: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $sp, $sp, -2048 +; CHECK-NEXT: addi.d $sp, $sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 2064 +; CHECK-NEXT: addi.d $sp, $sp, 2032 +; CHECK-NEXT: addi.d $sp, $sp, 32 +; CHECK-NEXT: ret + %1 = alloca i8, i32 2048 ; + 16(emergency slot) = 2064 + ret void +} + +;; NOTE: Due to the problem with the emegency spill slot, the scratch register +;; will not be used when the fp is eliminated. To make this test valid, add the +;; attribute "frame-pointer=all". + +;; SP should be adjusted with help of a scratch register. +define void @test_large_frame_size_1234576() "frame-pointer"="all" { +; CHECK-LABEL: test_large_frame_size_1234576: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $sp, $sp, -2032 +; CHECK-NEXT: .cfi_def_cfa_offset 2032 +; CHECK-NEXT: st.d $ra, $sp, 2024 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 2016 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1, -8 +; CHECK-NEXT: .cfi_offset 22, -16 +; CHECK-NEXT: addi.d $fp, $sp, 2032 +; CHECK-NEXT: .cfi_def_cfa 22, 0 +; CHECK-NEXT: lu12i.w $a0, 300 +; CHECK-NEXT: ori $a0, $a0, 3760 +; CHECK-NEXT: sub.d $sp, $sp, $a0 +; CHECK-NEXT: lu12i.w $a0, 300 +; CHECK-NEXT: ori $a0, $a0, 3760 +; CHECK-NEXT: add.d $sp, $sp, $a0 +; CHECK-NEXT: ld.d $fp, $sp, 2016 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 2024 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 2032 +; CHECK-NEXT: ret + %1 = alloca i8, i32 1234567 + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/frameaddr-returnaddr.ll b/llvm/test/CodeGen/LoongArch/frameaddr-returnaddr.ll new file mode 100644 index 0000000000000000000000000000000000000000..01c9173c2e982aa0a95b80b9102842589dd81b7d --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/frameaddr-returnaddr.ll @@ -0,0 +1,78 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefix=LA32 +; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64 + +declare ptr @llvm.frameaddress(i32) +declare ptr @llvm.returnaddress(i32) + +define ptr @test_frameaddress_0() nounwind { +; LA32-LABEL: test_frameaddress_0: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: st.w $fp, $sp, 8 # 4-byte Folded Spill +; LA32-NEXT: addi.w $fp, $sp, 16 +; LA32-NEXT: move $a0, $fp +; LA32-NEXT: ld.w $fp, $sp, 8 # 4-byte Folded Reload +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: test_frameaddress_0: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: st.d $fp, $sp, 0 # 8-byte Folded Spill +; LA64-NEXT: addi.d $fp, $sp, 16 +; LA64-NEXT: move $a0, $fp +; LA64-NEXT: ld.d $fp, $sp, 0 # 8-byte Folded Reload +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret + %1 = call ptr @llvm.frameaddress(i32 0) + ret ptr %1 +} + +define ptr @test_frameaddress_2() nounwind { +; LA32-LABEL: test_frameaddress_2: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: st.w $fp, $sp, 8 # 4-byte Folded Spill +; LA32-NEXT: addi.w $fp, $sp, 16 +; LA32-NEXT: ld.w $a0, $fp, -8 +; LA32-NEXT: ld.w $a0, $a0, -8 +; LA32-NEXT: ld.w $fp, $sp, 8 # 4-byte Folded Reload +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: test_frameaddress_2: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: st.d $fp, $sp, 0 # 8-byte Folded Spill +; LA64-NEXT: addi.d $fp, $sp, 16 +; LA64-NEXT: ld.d $a0, $fp, -16 +; LA64-NEXT: ld.d $a0, $a0, -16 +; LA64-NEXT: ld.d $fp, $sp, 0 # 8-byte Folded Reload +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret + %1 = call ptr @llvm.frameaddress(i32 2) + ret ptr %1 +} + +define ptr @test_returnaddress_0() nounwind { +; LA32-LABEL: test_returnaddress_0: +; LA32: # %bb.0: +; LA32-NEXT: move $a0, $ra +; LA32-NEXT: ret +; +; LA64-LABEL: test_returnaddress_0: +; LA64: # %bb.0: +; LA64-NEXT: move $a0, $ra +; LA64-NEXT: ret + %1 = call ptr @llvm.returnaddress(i32 0) + ret ptr %1 +} diff --git a/llvm/test/CodeGen/LoongArch/frint.ll b/llvm/test/CodeGen/LoongArch/frint.ll new file mode 100644 index 0000000000000000000000000000000000000000..e7fa8d913bbe7801ed819d94b58a4b7173dc5abe --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/frint.ll @@ -0,0 +1,79 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+f,-d < %s | FileCheck %s --check-prefix=LA32F +; RUN: llc --mtriple=loongarch32 --mattr=+d < %s | FileCheck %s --check-prefix=LA32D +; RUN: llc --mtriple=loongarch64 --mattr=+f,-d < %s | FileCheck %s --check-prefix=LA64F +; RUN: llc --mtriple=loongarch64 --mattr=+d < %s | FileCheck %s --check-prefix=LA64D + +define float @rint_f32(float %f) nounwind { +; LA32F-LABEL: rint_f32: +; LA32F: # %bb.0: # %entry +; LA32F-NEXT: addi.w $sp, $sp, -16 +; LA32F-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32F-NEXT: bl %plt(rintf) +; LA32F-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32F-NEXT: addi.w $sp, $sp, 16 +; LA32F-NEXT: ret +; +; LA32D-LABEL: rint_f32: +; LA32D: # %bb.0: # %entry +; LA32D-NEXT: addi.w $sp, $sp, -16 +; LA32D-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32D-NEXT: bl %plt(rintf) +; LA32D-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32D-NEXT: addi.w $sp, $sp, 16 +; LA32D-NEXT: ret +; +; LA64F-LABEL: rint_f32: +; LA64F: # %bb.0: # %entry +; LA64F-NEXT: frint.s $fa0, $fa0 +; LA64F-NEXT: ret +; +; LA64D-LABEL: rint_f32: +; LA64D: # %bb.0: # %entry +; LA64D-NEXT: frint.s $fa0, $fa0 +; LA64D-NEXT: ret +entry: + %0 = tail call float @llvm.rint.f32(float %f) + ret float %0 +} + +declare float @llvm.rint.f32(float) + +define double @rint_f64(double %d) nounwind { +; LA32F-LABEL: rint_f64: +; LA32F: # %bb.0: # %entry +; LA32F-NEXT: addi.w $sp, $sp, -16 +; LA32F-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32F-NEXT: bl %plt(rint) +; LA32F-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32F-NEXT: addi.w $sp, $sp, 16 +; LA32F-NEXT: ret +; +; LA32D-LABEL: rint_f64: +; LA32D: # %bb.0: # %entry +; LA32D-NEXT: addi.w $sp, $sp, -16 +; LA32D-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32D-NEXT: bl %plt(rint) +; LA32D-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32D-NEXT: addi.w $sp, $sp, 16 +; LA32D-NEXT: ret +; +; LA64F-LABEL: rint_f64: +; LA64F: # %bb.0: # %entry +; LA64F-NEXT: addi.d $sp, $sp, -16 +; LA64F-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64F-NEXT: bl %plt(rint) +; LA64F-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64F-NEXT: addi.d $sp, $sp, 16 +; LA64F-NEXT: ret +; +; LA64D-LABEL: rint_f64: +; LA64D: # %bb.0: # %entry +; LA64D-NEXT: frint.d $fa0, $fa0 +; LA64D-NEXT: ret +entry: + %0 = tail call double @llvm.rint.f64(double %d) + ret double %0 +} + +declare double @llvm.rint.f64(double) diff --git a/llvm/test/CodeGen/LoongArch/fsqrt.ll b/llvm/test/CodeGen/LoongArch/fsqrt.ll new file mode 100644 index 0000000000000000000000000000000000000000..776de7f729ec4302575ecdb663570c5559d5995f --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/fsqrt.ll @@ -0,0 +1,130 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+f,-d < %s | FileCheck %s --check-prefix=LA32F +; RUN: llc --mtriple=loongarch32 --mattr=+d < %s | FileCheck %s --check-prefix=LA32D +; RUN: llc --mtriple=loongarch64 --mattr=+f,-d < %s | FileCheck %s --check-prefix=LA64F +; RUN: llc --mtriple=loongarch64 --mattr=+d < %s | FileCheck %s --check-prefix=LA64D + +declare float @llvm.sqrt.f32(float) +declare double @llvm.sqrt.f64(double) + +define float @fsqrt_f32(float %a) nounwind { +; LA32F-LABEL: fsqrt_f32: +; LA32F: # %bb.0: +; LA32F-NEXT: fsqrt.s $fa0, $fa0 +; LA32F-NEXT: ret +; +; LA32D-LABEL: fsqrt_f32: +; LA32D: # %bb.0: +; LA32D-NEXT: fsqrt.s $fa0, $fa0 +; LA32D-NEXT: ret +; +; LA64F-LABEL: fsqrt_f32: +; LA64F: # %bb.0: +; LA64F-NEXT: fsqrt.s $fa0, $fa0 +; LA64F-NEXT: ret +; +; LA64D-LABEL: fsqrt_f32: +; LA64D: # %bb.0: +; LA64D-NEXT: fsqrt.s $fa0, $fa0 +; LA64D-NEXT: ret + %1 = call float @llvm.sqrt.f32(float %a) + ret float %1 +} + +define double @fsqrt_f64(double %a) nounwind { +; LA32F-LABEL: fsqrt_f64: +; LA32F: # %bb.0: +; LA32F-NEXT: addi.w $sp, $sp, -16 +; LA32F-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32F-NEXT: bl %plt(sqrt) +; LA32F-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32F-NEXT: addi.w $sp, $sp, 16 +; LA32F-NEXT: ret +; +; LA32D-LABEL: fsqrt_f64: +; LA32D: # %bb.0: +; LA32D-NEXT: fsqrt.d $fa0, $fa0 +; LA32D-NEXT: ret +; +; LA64F-LABEL: fsqrt_f64: +; LA64F: # %bb.0: +; LA64F-NEXT: addi.d $sp, $sp, -16 +; LA64F-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64F-NEXT: bl %plt(sqrt) +; LA64F-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64F-NEXT: addi.d $sp, $sp, 16 +; LA64F-NEXT: ret +; +; LA64D-LABEL: fsqrt_f64: +; LA64D: # %bb.0: +; LA64D-NEXT: fsqrt.d $fa0, $fa0 +; LA64D-NEXT: ret + %1 = call double @llvm.sqrt.f64(double %a) + ret double %1 +} + +define float @frsqrt_f32(float %a) nounwind { +; LA32F-LABEL: frsqrt_f32: +; LA32F: # %bb.0: +; LA32F-NEXT: frsqrt.s $fa0, $fa0 +; LA32F-NEXT: ret +; +; LA32D-LABEL: frsqrt_f32: +; LA32D: # %bb.0: +; LA32D-NEXT: frsqrt.s $fa0, $fa0 +; LA32D-NEXT: ret +; +; LA64F-LABEL: frsqrt_f32: +; LA64F: # %bb.0: +; LA64F-NEXT: frsqrt.s $fa0, $fa0 +; LA64F-NEXT: ret +; +; LA64D-LABEL: frsqrt_f32: +; LA64D: # %bb.0: +; LA64D-NEXT: frsqrt.s $fa0, $fa0 +; LA64D-NEXT: ret + %1 = call float @llvm.sqrt.f32(float %a) + %2 = fdiv float 1.0, %1 + ret float %2 +} + +define double @frsqrt_f64(double %a) nounwind { +; LA32F-LABEL: frsqrt_f64: +; LA32F: # %bb.0: +; LA32F-NEXT: addi.w $sp, $sp, -16 +; LA32F-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32F-NEXT: bl %plt(sqrt) +; LA32F-NEXT: move $a2, $a0 +; LA32F-NEXT: move $a3, $a1 +; LA32F-NEXT: lu12i.w $a1, 261888 +; LA32F-NEXT: move $a0, $zero +; LA32F-NEXT: bl %plt(__divdf3) +; LA32F-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32F-NEXT: addi.w $sp, $sp, 16 +; LA32F-NEXT: ret +; +; LA32D-LABEL: frsqrt_f64: +; LA32D: # %bb.0: +; LA32D-NEXT: frsqrt.d $fa0, $fa0 +; LA32D-NEXT: ret +; +; LA64F-LABEL: frsqrt_f64: +; LA64F: # %bb.0: +; LA64F-NEXT: addi.d $sp, $sp, -16 +; LA64F-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64F-NEXT: bl %plt(sqrt) +; LA64F-NEXT: move $a1, $a0 +; LA64F-NEXT: lu52i.d $a0, $zero, 1023 +; LA64F-NEXT: bl %plt(__divdf3) +; LA64F-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64F-NEXT: addi.d $sp, $sp, 16 +; LA64F-NEXT: ret +; +; LA64D-LABEL: frsqrt_f64: +; LA64D: # %bb.0: +; LA64D-NEXT: frsqrt.d $fa0, $fa0 +; LA64D-NEXT: ret + %1 = call double @llvm.sqrt.f64(double %a) + %2 = fdiv double 1.0, %1 + ret double %2 +} diff --git a/llvm/test/CodeGen/LoongArch/get-reg-error-la32.ll b/llvm/test/CodeGen/LoongArch/get-reg-error-la32.ll new file mode 100644 index 0000000000000000000000000000000000000000..7440bfe5c85a5b1639b64f447c989a4d0c4fe21b --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/get-reg-error-la32.ll @@ -0,0 +1,21 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: not llc < %s --mtriple=loongarch32 2>&1 | FileCheck %s + +define i64 @read_sp() nounwind { +entry: +; CHECK: On LA32, only 32-bit registers can be read. + %a1 = call i64 @llvm.read_register.i64(metadata !0) + ret i64 %a1 +} + +define void @write_sp(i64 %val) nounwind { +entry: +; CHECK: On LA32, only 32-bit registers can be written. + call void @llvm.write_register.i64(metadata !0, i64 %val) + ret void +} + +declare i64 @llvm.read_register.i64(metadata) nounwind +declare void @llvm.write_register.i64(metadata, i64) nounwind + +!0 = !{!"$sp\00"} diff --git a/llvm/test/CodeGen/LoongArch/get-reg-error-la64.ll b/llvm/test/CodeGen/LoongArch/get-reg-error-la64.ll new file mode 100644 index 0000000000000000000000000000000000000000..9312aa9020ba945647b3563121fbafe3142c305f --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/get-reg-error-la64.ll @@ -0,0 +1,21 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: not llc < %s --mtriple=loongarch64 2>&1 | FileCheck %s + +define i32 @read_sp() nounwind { +entry: +; CHECK: On LA64, only 64-bit registers can be read. + %a1 = call i32 @llvm.read_register.i32(metadata !0) + ret i32 %a1 +} + +define void @write_sp(i32 %val) nounwind { +entry: +; CHECK: On LA64, only 64-bit registers can be written. + call void @llvm.write_register.i32(metadata !0, i32 %val) + ret void +} + +declare i32 @llvm.read_register.i32(metadata) nounwind +declare void @llvm.write_register.i32(metadata, i32) nounwind + +!0 = !{!"$sp\00"} diff --git a/llvm/test/CodeGen/LoongArch/get-reg.ll b/llvm/test/CodeGen/LoongArch/get-reg.ll new file mode 100644 index 0000000000000000000000000000000000000000..323030da9e7f336859531c06f5550402da865d1e --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/get-reg.ll @@ -0,0 +1,27 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s --mtriple=loongarch64 | FileCheck %s + +define i64 @get_stack() nounwind { +; CHECK-LABEL: get_stack: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: move $a0, $sp +; CHECK-NEXT: ret +entry: + %sp = call i64 @llvm.read_register.i64(metadata !0) + ret i64 %sp +} + +define void @set_stack(i64 %val) nounwind { +; CHECK-LABEL: set_stack: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: move $sp, $a0 +; CHECK-NEXT: ret +entry: + call void @llvm.write_register.i64(metadata !0, i64 %val) + ret void +} + +declare i64 @llvm.read_register.i64(metadata) nounwind +declare void @llvm.write_register.i64(metadata, i64) nounwind + +!0 = !{!"$sp\00"} diff --git a/llvm/test/CodeGen/LoongArch/get-setcc-result-type.ll b/llvm/test/CodeGen/LoongArch/get-setcc-result-type.ll new file mode 100644 index 0000000000000000000000000000000000000000..34a5102b4dde17c3d66e34f2a69530a53640fa78 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/get-setcc-result-type.ll @@ -0,0 +1,31 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --verify-machineinstrs < %s \ +; RUN: | FileCheck %s + +define void @getSetCCResultType(ptr %p) { +; CHECK-LABEL: getSetCCResultType: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ld.wu $a1, $a0, 12 +; CHECK-NEXT: sltui $a1, $a1, 1 +; CHECK-NEXT: sub.d $a1, $zero, $a1 +; CHECK-NEXT: st.w $a1, $a0, 12 +; CHECK-NEXT: ld.wu $a1, $a0, 8 +; CHECK-NEXT: sltui $a1, $a1, 1 +; CHECK-NEXT: sub.d $a1, $zero, $a1 +; CHECK-NEXT: st.w $a1, $a0, 8 +; CHECK-NEXT: ld.wu $a1, $a0, 4 +; CHECK-NEXT: sltui $a1, $a1, 1 +; CHECK-NEXT: sub.d $a1, $zero, $a1 +; CHECK-NEXT: st.w $a1, $a0, 4 +; CHECK-NEXT: ld.wu $a1, $a0, 0 +; CHECK-NEXT: sltui $a1, $a1, 1 +; CHECK-NEXT: sub.d $a1, $zero, $a1 +; CHECK-NEXT: st.w $a1, $a0, 0 +; CHECK-NEXT: ret +entry: + %0 = load <4 x i32>, ptr %p, align 16 + %cmp = icmp eq <4 x i32> %0, zeroinitializer + %sext = sext <4 x i1> %cmp to <4 x i32> + store <4 x i32> %sext, ptr %p, align 16 + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/ghc-cc.ll b/llvm/test/CodeGen/LoongArch/ghc-cc.ll new file mode 100644 index 0000000000000000000000000000000000000000..0ab125e875b9964dc46e89d99b5ee3c20590d82e --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/ghc-cc.ll @@ -0,0 +1,107 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+f,+d < %s | FileCheck %s --check-prefix=LA64 + +; Check the GHC call convention works (la64) + +@base = external dso_local global i64 ; assigned to register: s0 +@sp = external dso_local global i64 ; assigned to register: s1 +@hp = external dso_local global i64 ; assigned to register: s2 +@r1 = external dso_local global i64 ; assigned to register: s3 +@r2 = external dso_local global i64 ; assigned to register: s4 +@r3 = external dso_local global i64 ; assigned to register: s5 +@r4 = external dso_local global i64 ; assigned to register: s6 +@r5 = external dso_local global i64 ; assigned to register: s7 +@splim = external dso_local global i64 ; assigned to register: s8 + +@f1 = external dso_local global float ; assigned to register: fs0 +@f2 = external dso_local global float ; assigned to register: fs1 +@f3 = external dso_local global float ; assigned to register: fs2 +@f4 = external dso_local global float ; assigned to register: fs3 + +@d1 = external dso_local global double ; assigned to register: fs4 +@d2 = external dso_local global double ; assigned to register: fs5 +@d3 = external dso_local global double ; assigned to register: fs6 +@d4 = external dso_local global double ; assigned to register: fs7 + +define ghccc void @foo() nounwind { +; LA64-LABEL: foo: +; LA64: # %bb.0: # %entry +; LA64-NEXT: pcalau12i $a0, %pc_hi20(base) +; LA64-NEXT: addi.d $a0, $a0, %pc_lo12(base) +; LA64-NEXT: ld.d $s0, $a0, 0 +; LA64-NEXT: pcalau12i $a0, %pc_hi20(sp) +; LA64-NEXT: addi.d $a0, $a0, %pc_lo12(sp) +; LA64-NEXT: ld.d $s1, $a0, 0 +; LA64-NEXT: pcalau12i $a0, %pc_hi20(hp) +; LA64-NEXT: addi.d $a0, $a0, %pc_lo12(hp) +; LA64-NEXT: ld.d $s2, $a0, 0 +; LA64-NEXT: pcalau12i $a0, %pc_hi20(r1) +; LA64-NEXT: addi.d $a0, $a0, %pc_lo12(r1) +; LA64-NEXT: ld.d $s3, $a0, 0 +; LA64-NEXT: pcalau12i $a0, %pc_hi20(r2) +; LA64-NEXT: addi.d $a0, $a0, %pc_lo12(r2) +; LA64-NEXT: ld.d $s4, $a0, 0 +; LA64-NEXT: pcalau12i $a0, %pc_hi20(r3) +; LA64-NEXT: addi.d $a0, $a0, %pc_lo12(r3) +; LA64-NEXT: ld.d $s5, $a0, 0 +; LA64-NEXT: pcalau12i $a0, %pc_hi20(r4) +; LA64-NEXT: addi.d $a0, $a0, %pc_lo12(r4) +; LA64-NEXT: ld.d $s6, $a0, 0 +; LA64-NEXT: pcalau12i $a0, %pc_hi20(r5) +; LA64-NEXT: addi.d $a0, $a0, %pc_lo12(r5) +; LA64-NEXT: ld.d $s7, $a0, 0 +; LA64-NEXT: pcalau12i $a0, %pc_hi20(splim) +; LA64-NEXT: addi.d $a0, $a0, %pc_lo12(splim) +; LA64-NEXT: ld.d $s8, $a0, 0 +; LA64-NEXT: pcalau12i $a0, %pc_hi20(f1) +; LA64-NEXT: addi.d $a0, $a0, %pc_lo12(f1) +; LA64-NEXT: fld.s $fs0, $a0, 0 +; LA64-NEXT: pcalau12i $a0, %pc_hi20(f2) +; LA64-NEXT: addi.d $a0, $a0, %pc_lo12(f2) +; LA64-NEXT: fld.s $fs1, $a0, 0 +; LA64-NEXT: pcalau12i $a0, %pc_hi20(f3) +; LA64-NEXT: addi.d $a0, $a0, %pc_lo12(f3) +; LA64-NEXT: fld.s $fs2, $a0, 0 +; LA64-NEXT: pcalau12i $a0, %pc_hi20(f4) +; LA64-NEXT: addi.d $a0, $a0, %pc_lo12(f4) +; LA64-NEXT: fld.s $fs3, $a0, 0 +; LA64-NEXT: pcalau12i $a0, %pc_hi20(d1) +; LA64-NEXT: addi.d $a0, $a0, %pc_lo12(d1) +; LA64-NEXT: fld.d $fs4, $a0, 0 +; LA64-NEXT: pcalau12i $a0, %pc_hi20(d2) +; LA64-NEXT: addi.d $a0, $a0, %pc_lo12(d2) +; LA64-NEXT: fld.d $fs5, $a0, 0 +; LA64-NEXT: pcalau12i $a0, %pc_hi20(d3) +; LA64-NEXT: addi.d $a0, $a0, %pc_lo12(d3) +; LA64-NEXT: fld.d $fs6, $a0, 0 +; LA64-NEXT: pcalau12i $a0, %pc_hi20(d4) +; LA64-NEXT: addi.d $a0, $a0, %pc_lo12(d4) +; LA64-NEXT: fld.d $fs7, $a0, 0 +; LA64-NEXT: b %plt(bar) + +entry: + %0 = load double, ptr @d4 + %1 = load double, ptr @d3 + %2 = load double, ptr @d2 + %3 = load double, ptr @d1 + %4 = load float, ptr @f4 + %5 = load float, ptr @f3 + %6 = load float, ptr @f2 + %7 = load float, ptr @f1 + %8 = load i64, ptr @splim + %9 = load i64, ptr @r5 + %10 = load i64, ptr @r4 + %11 = load i64, ptr @r3 + %12 = load i64, ptr @r2 + %13 = load i64, ptr @r1 + %14 = load i64, ptr @hp + %15 = load i64, ptr @sp + %16 = load i64, ptr @base + tail call ghccc void @bar(i64 %16, i64 %15, i64 %14, i64 %13, i64 %12, + i64 %11, i64 %10, i64 %9, i64 %8, float %7, float %6, + float %5, float %4, double %3, double %2, double %1, double %0) nounwind + ret void +} +declare ghccc void @bar(i64, i64, i64, i64, i64, i64, i64, i64, i64, + float, float, float, float, + double, double, double, double) diff --git a/llvm/test/CodeGen/LoongArch/global-address.ll b/llvm/test/CodeGen/LoongArch/global-address.ll new file mode 100644 index 0000000000000000000000000000000000000000..258c4e8691567fed4fc0bdbb5ec32ea51f6285ae --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/global-address.ll @@ -0,0 +1,53 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --relocation-model=static < %s | FileCheck %s --check-prefix=LA32NOPIC +; RUN: llc --mtriple=loongarch32 --relocation-model=pic < %s | FileCheck %s --check-prefix=LA32PIC +; RUN: llc --mtriple=loongarch64 --relocation-model=static < %s | FileCheck %s --check-prefix=LA64NOPIC +; RUN: llc --mtriple=loongarch64 --relocation-model=pic < %s | FileCheck %s --check-prefix=LA64PIC + +@g = dso_local global i32 zeroinitializer, align 4 +@G = global i32 zeroinitializer, align 4 + +define void @foo() nounwind { +; LA32NOPIC-LABEL: foo: +; LA32NOPIC: # %bb.0: +; LA32NOPIC-NEXT: pcalau12i $a0, %got_pc_hi20(G) +; LA32NOPIC-NEXT: ld.w $a0, $a0, %got_pc_lo12(G) +; LA32NOPIC-NEXT: ld.w $a0, $a0, 0 +; LA32NOPIC-NEXT: pcalau12i $a0, %pc_hi20(g) +; LA32NOPIC-NEXT: addi.w $a0, $a0, %pc_lo12(g) +; LA32NOPIC-NEXT: ld.w $a0, $a0, 0 +; LA32NOPIC-NEXT: ret +; +; LA32PIC-LABEL: foo: +; LA32PIC: # %bb.0: +; LA32PIC-NEXT: pcalau12i $a0, %got_pc_hi20(G) +; LA32PIC-NEXT: ld.w $a0, $a0, %got_pc_lo12(G) +; LA32PIC-NEXT: ld.w $a0, $a0, 0 +; LA32PIC-NEXT: pcalau12i $a0, %pc_hi20(.Lg$local) +; LA32PIC-NEXT: addi.w $a0, $a0, %pc_lo12(.Lg$local) +; LA32PIC-NEXT: ld.w $a0, $a0, 0 +; LA32PIC-NEXT: ret +; +; LA64NOPIC-LABEL: foo: +; LA64NOPIC: # %bb.0: +; LA64NOPIC-NEXT: pcalau12i $a0, %got_pc_hi20(G) +; LA64NOPIC-NEXT: ld.d $a0, $a0, %got_pc_lo12(G) +; LA64NOPIC-NEXT: ld.w $a0, $a0, 0 +; LA64NOPIC-NEXT: pcalau12i $a0, %pc_hi20(g) +; LA64NOPIC-NEXT: addi.d $a0, $a0, %pc_lo12(g) +; LA64NOPIC-NEXT: ld.w $a0, $a0, 0 +; LA64NOPIC-NEXT: ret +; +; LA64PIC-LABEL: foo: +; LA64PIC: # %bb.0: +; LA64PIC-NEXT: pcalau12i $a0, %got_pc_hi20(G) +; LA64PIC-NEXT: ld.d $a0, $a0, %got_pc_lo12(G) +; LA64PIC-NEXT: ld.w $a0, $a0, 0 +; LA64PIC-NEXT: pcalau12i $a0, %pc_hi20(.Lg$local) +; LA64PIC-NEXT: addi.d $a0, $a0, %pc_lo12(.Lg$local) +; LA64PIC-NEXT: ld.w $a0, $a0, 0 +; LA64PIC-NEXT: ret + %V = load volatile i32, ptr @G + %v = load volatile i32, ptr @g + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/imm.ll b/llvm/test/CodeGen/LoongArch/imm.ll index fb0dcf21f231e37962bc7140448d739aa1824f85..f8b7a61d60973f40b5a8351ddaeabb69f98e376b 100644 --- a/llvm/test/CodeGen/LoongArch/imm.ll +++ b/llvm/test/CodeGen/LoongArch/imm.ll @@ -1,10 +1,11 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s define i64 @imm0() { ; CHECK-LABEL: imm0: ; CHECK: # %bb.0: ; CHECK-NEXT: move $a0, $zero -; CHECK-NEXT: jirl $zero, $ra, 0 +; CHECK-NEXT: ret ret i64 0 } @@ -12,7 +13,7 @@ define i64 @imm7ff0000000000000() { ; CHECK-LABEL: imm7ff0000000000000: ; CHECK: # %bb.0: ; CHECK-NEXT: lu52i.d $a0, $zero, 2047 -; CHECK-NEXT: jirl $zero, $ra, 0 +; CHECK-NEXT: ret ret i64 9218868437227405312 } @@ -20,7 +21,7 @@ define i64 @imm0000000000000fff() { ; CHECK-LABEL: imm0000000000000fff: ; CHECK: # %bb.0: ; CHECK-NEXT: ori $a0, $zero, 4095 -; CHECK-NEXT: jirl $zero, $ra, 0 +; CHECK-NEXT: ret ret i64 4095 } @@ -29,7 +30,7 @@ define i64 @imm0007ffff00000800() { ; CHECK: # %bb.0: ; CHECK-NEXT: ori $a0, $zero, 2048 ; CHECK-NEXT: lu32i.d $a0, 524287 -; CHECK-NEXT: jirl $zero, $ra, 0 +; CHECK-NEXT: ret ret i64 2251795518720000 } @@ -38,7 +39,7 @@ define i64 @immfff0000000000fff() { ; CHECK: # %bb.0: ; CHECK-NEXT: ori $a0, $zero, 4095 ; CHECK-NEXT: lu52i.d $a0, $a0, -1 -; CHECK-NEXT: jirl $zero, $ra, 0 +; CHECK-NEXT: ret ret i64 -4503599627366401 } @@ -48,7 +49,7 @@ define i64 @imm0008000000000fff() { ; CHECK-NEXT: ori $a0, $zero, 4095 ; CHECK-NEXT: lu32i.d $a0, -524288 ; CHECK-NEXT: lu52i.d $a0, $a0, 0 -; CHECK-NEXT: jirl $zero, $ra, 0 +; CHECK-NEXT: ret ret i64 2251799813689343 } @@ -56,7 +57,7 @@ define i64 @immfffffffffffff800() { ; CHECK-LABEL: immfffffffffffff800: ; CHECK: # %bb.0: ; CHECK-NEXT: addi.w $a0, $zero, -2048 -; CHECK-NEXT: jirl $zero, $ra, 0 +; CHECK-NEXT: ret ret i64 -2048 } @@ -65,7 +66,7 @@ define i64 @imm00000000fffff800() { ; CHECK: # %bb.0: ; CHECK-NEXT: addi.w $a0, $zero, -2048 ; CHECK-NEXT: lu32i.d $a0, 0 -; CHECK-NEXT: jirl $zero, $ra, 0 +; CHECK-NEXT: ret ret i64 4294965248 } @@ -74,7 +75,7 @@ define i64 @imm000ffffffffff800() { ; CHECK: # %bb.0: ; CHECK-NEXT: addi.w $a0, $zero, -2048 ; CHECK-NEXT: lu52i.d $a0, $a0, 0 -; CHECK-NEXT: jirl $zero, $ra, 0 +; CHECK-NEXT: ret ret i64 4503599627368448 } @@ -84,7 +85,7 @@ define i64 @imm00080000fffff800() { ; CHECK-NEXT: addi.w $a0, $zero, -2048 ; CHECK-NEXT: lu32i.d $a0, -524288 ; CHECK-NEXT: lu52i.d $a0, $a0, 0 -; CHECK-NEXT: jirl $zero, $ra, 0 +; CHECK-NEXT: ret ret i64 2251804108650496 } @@ -92,7 +93,7 @@ define i64 @imm000000007ffff000() { ; CHECK-LABEL: imm000000007ffff000: ; CHECK: # %bb.0: ; CHECK-NEXT: lu12i.w $a0, 524287 -; CHECK-NEXT: jirl $zero, $ra, 0 +; CHECK-NEXT: ret ret i64 2147479552 } @@ -101,7 +102,7 @@ define i64 @imm0000000080000000() { ; CHECK: # %bb.0: ; CHECK-NEXT: lu12i.w $a0, -524288 ; CHECK-NEXT: lu32i.d $a0, 0 -; CHECK-NEXT: jirl $zero, $ra, 0 +; CHECK-NEXT: ret ret i64 2147483648 } @@ -110,7 +111,7 @@ define i64 @imm000ffffffffff000() { ; CHECK: # %bb.0: ; CHECK-NEXT: lu12i.w $a0, -1 ; CHECK-NEXT: lu52i.d $a0, $a0, 0 -; CHECK-NEXT: jirl $zero, $ra, 0 +; CHECK-NEXT: ret ret i64 4503599627366400 } @@ -120,7 +121,7 @@ define i64 @imm7ff0000080000000() { ; CHECK-NEXT: lu12i.w $a0, -524288 ; CHECK-NEXT: lu32i.d $a0, 0 ; CHECK-NEXT: lu52i.d $a0, $a0, 2047 -; CHECK-NEXT: jirl $zero, $ra, 0 +; CHECK-NEXT: ret ret i64 9218868439374888960 } @@ -129,7 +130,7 @@ define i64 @immffffffff80000800() { ; CHECK: # %bb.0: ; CHECK-NEXT: lu12i.w $a0, -524288 ; CHECK-NEXT: ori $a0, $a0, 2048 -; CHECK-NEXT: jirl $zero, $ra, 0 +; CHECK-NEXT: ret ret i64 -2147481600 } @@ -139,7 +140,7 @@ define i64 @immffffffff7ffff800() { ; CHECK-NEXT: lu12i.w $a0, 524287 ; CHECK-NEXT: ori $a0, $a0, 2048 ; CHECK-NEXT: lu32i.d $a0, -1 -; CHECK-NEXT: jirl $zero, $ra, 0 +; CHECK-NEXT: ret ret i64 -2147485696 } @@ -149,7 +150,7 @@ define i64 @imm7fffffff800007ff() { ; CHECK-NEXT: lu12i.w $a0, -524288 ; CHECK-NEXT: ori $a0, $a0, 2047 ; CHECK-NEXT: lu52i.d $a0, $a0, 2047 -; CHECK-NEXT: jirl $zero, $ra, 0 +; CHECK-NEXT: ret ret i64 9223372034707294207 } @@ -160,6 +161,6 @@ define i64 @imm0008000080000800() { ; CHECK-NEXT: ori $a0, $a0, 2048 ; CHECK-NEXT: lu32i.d $a0, -524288 ; CHECK-NEXT: lu52i.d $a0, $a0, 0 -; CHECK-NEXT: jirl $zero, $ra, 0 +; CHECK-NEXT: ret ret i64 2251801961170944 } diff --git a/llvm/test/CodeGen/LoongArch/inline-asm-clobbers-fcc.mir b/llvm/test/CodeGen/LoongArch/inline-asm-clobbers-fcc.mir new file mode 100644 index 0000000000000000000000000000000000000000..fa5fccb1a5ba188552fe85405683f7469a412e19 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/inline-asm-clobbers-fcc.mir @@ -0,0 +1,33 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc --mtriple=loongarch64 --mattr=+d --run-pass=greedy %s -o - | FileCheck %s + +## Check that fcc register clobbered by inlineasm is correctly saved by examing +## a pair of pseudos (PseudoST_CFR and PseudoLD_CFR) are generated before and +## after the INLINEASM. +... +--- +name: test +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $f0_64, $f1_64 + + ; CHECK-LABEL: name: test + ; CHECK: liveins: $f0_64, $f1_64 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr64 = COPY $f1_64 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr64 = COPY $f0_64 + ; CHECK-NEXT: [[FCMP_CLT_D:%[0-9]+]]:cfr = FCMP_CLT_D [[COPY]], [[COPY1]] + ; CHECK-NEXT: PseudoST_CFR [[FCMP_CLT_D]], %stack.0, 0 :: (store (s64) into %stack.0) + ; CHECK-NEXT: INLINEASM &nop, 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $fcc0 + ; CHECK-NEXT: [[PseudoLD_CFR:%[0-9]+]]:cfr = PseudoLD_CFR %stack.0, 0 :: (load (s64) from %stack.0) + ; CHECK-NEXT: $r4 = COPY [[PseudoLD_CFR]] + ; CHECK-NEXT: PseudoRET implicit killed $r4 + %1:fpr64 = COPY $f1_64 + %0:fpr64 = COPY $f0_64 + %2:cfr = FCMP_CLT_D %1, %0 + INLINEASM &"nop", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $fcc0 + $r4 = COPY %2 + PseudoRET implicit killed $r4 + +... diff --git a/llvm/test/CodeGen/LoongArch/inline-asm-clobbers.ll b/llvm/test/CodeGen/LoongArch/inline-asm-clobbers.ll new file mode 100644 index 0000000000000000000000000000000000000000..f7e460e4754538351d247a10a64cc1f087949e0d --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/inline-asm-clobbers.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+d --target-abi=ilp32d --verify-machineinstrs < %s \ +; RUN: | FileCheck --check-prefix=LA32 %s +; RUN: llc --mtriple=loongarch64 --mattr=+d --target-abi=lp64d --verify-machineinstrs < %s \ +; RUN: | FileCheck --check-prefix=LA64 %s + +;; Check that callee-saved registers clobbered by inlineasm are correctly saved. +;; +;; $r23: $s0 (callee-saved register under all ABIs) +;; $r24: $s1 (callee-saved register under all ABIs) +;; $f24: $fs0 (callee-saved register under *d/*f ABIs) +;; $f25: $fs1 (callee-saved register under *d/*f ABIs) + +;; TODO: test other ABIs. + +define void @test() nounwind { +; LA32-LABEL: test: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -32 +; LA32-NEXT: st.w $s0, $sp, 28 # 4-byte Folded Spill +; LA32-NEXT: st.w $s1, $sp, 24 # 4-byte Folded Spill +; LA32-NEXT: fst.d $fs0, $sp, 16 # 8-byte Folded Spill +; LA32-NEXT: fst.d $fs1, $sp, 8 # 8-byte Folded Spill +; LA32-NEXT: #APP +; LA32-NEXT: #NO_APP +; LA32-NEXT: fld.d $fs1, $sp, 8 # 8-byte Folded Reload +; LA32-NEXT: fld.d $fs0, $sp, 16 # 8-byte Folded Reload +; LA32-NEXT: ld.w $s1, $sp, 24 # 4-byte Folded Reload +; LA32-NEXT: ld.w $s0, $sp, 28 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 32 +; LA32-NEXT: ret +; +; LA64-LABEL: test: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -32 +; LA64-NEXT: st.d $s0, $sp, 24 # 8-byte Folded Spill +; LA64-NEXT: st.d $s1, $sp, 16 # 8-byte Folded Spill +; LA64-NEXT: fst.d $fs0, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: fst.d $fs1, $sp, 0 # 8-byte Folded Spill +; LA64-NEXT: #APP +; LA64-NEXT: #NO_APP +; LA64-NEXT: fld.d $fs1, $sp, 0 # 8-byte Folded Reload +; LA64-NEXT: fld.d $fs0, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: ld.d $s1, $sp, 16 # 8-byte Folded Reload +; LA64-NEXT: ld.d $s0, $sp, 24 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 32 +; LA64-NEXT: ret + tail call void asm sideeffect "", "~{$f24},~{$f25},~{$r23},~{$r24}"() + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/inline-asm-constraint-ZB.ll b/llvm/test/CodeGen/LoongArch/inline-asm-constraint-ZB.ll new file mode 100644 index 0000000000000000000000000000000000000000..1a8f50abb6588d9075470224071cb7f50b2b5f5b --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/inline-asm-constraint-ZB.ll @@ -0,0 +1,62 @@ +; RUN: llc --mtriple=loongarch64 --verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefix=ASM +; RUN: llc --mtriple=loongarch64 --print-after-isel -o /dev/null 2>&1 < %s \ +; RUN: | FileCheck %s --check-prefix=MACHINE-INSTR + +;; Note amswap.w is not available on loongarch32. + +define void @ZB(ptr %p) nounwind { +; ASM-LABEL: ZB: +; ASM: # %bb.0: +; ASM-NEXT: #APP +; ASM-NEXT: amswap.w $t0, $t1, $a0 +; ASM-NEXT: #NO_APP +; ASM-NEXT: ret +;; Make sure machine instr with this "ZB" constraint is printed correctly. +; MACHINE-INSTR: INLINEASM{{.*}}[mem:ZB] + call void asm "amswap.w $$r12, $$r13, $0", "*^ZB"(ptr elementtype(i32) %p) + ret void +} + +define void @ZB_constant_offset(ptr %p) nounwind { +; ASM-LABEL: ZB_constant_offset: +; ASM: # %bb.0: +; ASM-NEXT: addi.d $a0, $a0, 1 +; ASM-NEXT: #APP +; ASM-NEXT: amswap.w $t0, $t1, $a0 +; ASM-NEXT: #NO_APP +; ASM-NEXT: ret + %1 = getelementptr inbounds i8, ptr %p, i32 1 +;; Make sure machine instr with this "ZB" constraint is printed correctly. +; MACHINE-INSTR: INLINEASM{{.*}}[mem:ZB] + call void asm "amswap.w $$r12, $$r13, $0", "*^ZB"(ptr elementtype(i32) %1) + ret void +} + +define void @ZB_variable_offset(ptr %p, i32 signext %idx) nounwind { +; ASM-LABEL: ZB_variable_offset: +; ASM: # %bb.0: +; ASM-NEXT: add.d $a0, $a0, $a1 +; ASM-NEXT: #APP +; ASM-NEXT: amswap.w $t0, $t1, $a0 +; ASM-NEXT: #NO_APP +; ASM-NEXT: ret + %1 = getelementptr inbounds i8, ptr %p, i32 %idx +;; Make sure machine instr with this "ZB" constraint is printed correctly. +; MACHINE-INSTR: INLINEASM{{.*}}[mem:ZB] + call void asm "amswap.w $$r12, $$r13, $0", "*^ZB"(ptr elementtype(i32) %1) + ret void +} + +define void @ZB_Input_Output(ptr %p) nounwind { +; ASM-LABEL: ZB_Input_Output: +; ASM: # %bb.0: +; ASM-NEXT: #APP +; ASM-NEXT: amadd_db.d $zero, $t1, $a0 +; ASM-NEXT: #NO_APP +; ASM-NEXT: ret +;; Make sure machine instr with this "ZB" constraint is printed correctly. +; MACHINE-INSTR: INLINEASM{{.*}}[mem:ZB], %0:gpr, 0 + call void asm "amadd_db.d $$zero, $$r13, $0", "=*^ZB,*^ZB,~{memory}"(ptr elementtype(i64) %p, ptr elementtype(i64) %p) + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/inline-asm-constraint-ZC.ll b/llvm/test/CodeGen/LoongArch/inline-asm-constraint-ZC.ll new file mode 100644 index 0000000000000000000000000000000000000000..9c053c4d24857eb5a271df346c8888ef02ada09a --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/inline-asm-constraint-ZC.ll @@ -0,0 +1,170 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --verify-machineinstrs < %s | FileCheck %s --check-prefix=LA32 +; RUN: llc --mtriple=loongarch64 --verify-machineinstrs < %s | FileCheck %s --check-prefix=LA64 + +define i32 @ZC_offset_neg_32769(ptr %p) nounwind { +; LA32-LABEL: ZC_offset_neg_32769: +; LA32: # %bb.0: +; LA32-NEXT: lu12i.w $a1, -9 +; LA32-NEXT: ori $a1, $a1, 4095 +; LA32-NEXT: add.w $a0, $a0, $a1 +; LA32-NEXT: #APP +; LA32-NEXT: ll.w $a0, $a0, 0 +; LA32-NEXT: #NO_APP +; LA32-NEXT: ret +; +; LA64-LABEL: ZC_offset_neg_32769: +; LA64: # %bb.0: +; LA64-NEXT: lu12i.w $a1, -9 +; LA64-NEXT: ori $a1, $a1, 4095 +; LA64-NEXT: add.d $a0, $a0, $a1 +; LA64-NEXT: #APP +; LA64-NEXT: ll.w $a0, $a0, 0 +; LA64-NEXT: #NO_APP +; LA64-NEXT: ret + %1 = getelementptr inbounds i8, ptr %p, i32 -32769 + %2 = call i32 asm "ll.w $0, $1", "=r,*^ZC"(ptr elementtype(i32) %1) + ret i32 %2 +} + +define i32 @ZC_offset_neg_32768(ptr %p) nounwind { +; LA32-LABEL: ZC_offset_neg_32768: +; LA32: # %bb.0: +; LA32-NEXT: #APP +; LA32-NEXT: ll.w $a0, $a0, -32768 +; LA32-NEXT: #NO_APP +; LA32-NEXT: ret +; +; LA64-LABEL: ZC_offset_neg_32768: +; LA64: # %bb.0: +; LA64-NEXT: #APP +; LA64-NEXT: ll.w $a0, $a0, -32768 +; LA64-NEXT: #NO_APP +; LA64-NEXT: ret + %1 = getelementptr inbounds i8, ptr %p, i32 -32768 + %2 = call i32 asm "ll.w $0, $1", "=r,*^ZC"(ptr elementtype(i32) %1) + ret i32 %2 +} + +define i32 @ZC_offset_neg_4(ptr %p) nounwind { +; LA32-LABEL: ZC_offset_neg_4: +; LA32: # %bb.0: +; LA32-NEXT: #APP +; LA32-NEXT: ll.w $a0, $a0, -4 +; LA32-NEXT: #NO_APP +; LA32-NEXT: ret +; +; LA64-LABEL: ZC_offset_neg_4: +; LA64: # %bb.0: +; LA64-NEXT: #APP +; LA64-NEXT: ll.w $a0, $a0, -4 +; LA64-NEXT: #NO_APP +; LA64-NEXT: ret + %1 = getelementptr inbounds i8, ptr %p, i32 -4 + %2 = call i32 asm "ll.w $0, $1", "=r,*^ZC"(ptr elementtype(i32) %1) + ret i32 %2 +} + +define i32 @ZC_offset_neg_1(ptr %p) nounwind { +; LA32-LABEL: ZC_offset_neg_1: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $a0, $a0, -1 +; LA32-NEXT: #APP +; LA32-NEXT: ll.w $a0, $a0, 0 +; LA32-NEXT: #NO_APP +; LA32-NEXT: ret +; +; LA64-LABEL: ZC_offset_neg_1: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $a0, $a0, -1 +; LA64-NEXT: #APP +; LA64-NEXT: ll.w $a0, $a0, 0 +; LA64-NEXT: #NO_APP +; LA64-NEXT: ret + %1 = getelementptr inbounds i8, ptr %p, i32 -1 + %2 = call i32 asm "ll.w $0, $1", "=r,*^ZC"(ptr elementtype(i32) %1) + ret i32 %2 +} + +define i32 @ZC_offset_0(ptr %p) nounwind { +; LA32-LABEL: ZC_offset_0: +; LA32: # %bb.0: +; LA32-NEXT: #APP +; LA32-NEXT: ll.w $a0, $a0, 0 +; LA32-NEXT: #NO_APP +; LA32-NEXT: ret +; +; LA64-LABEL: ZC_offset_0: +; LA64: # %bb.0: +; LA64-NEXT: #APP +; LA64-NEXT: ll.w $a0, $a0, 0 +; LA64-NEXT: #NO_APP +; LA64-NEXT: ret + %1 = call i32 asm "ll.w $0, $1", "=r,*^ZC"(ptr elementtype(i32) %p) + ret i32 %1 +} + +define i32 @ZC_offset_1(ptr %p) nounwind { +; LA32-LABEL: ZC_offset_1: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $a0, $a0, 1 +; LA32-NEXT: #APP +; LA32-NEXT: ll.w $a0, $a0, 0 +; LA32-NEXT: #NO_APP +; LA32-NEXT: ret +; +; LA64-LABEL: ZC_offset_1: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $a0, $a0, 1 +; LA64-NEXT: #APP +; LA64-NEXT: ll.w $a0, $a0, 0 +; LA64-NEXT: #NO_APP +; LA64-NEXT: ret + %1 = getelementptr inbounds i8, ptr %p, i32 1 + %2 = call i32 asm "ll.w $0, $1", "=r,*^ZC"(ptr elementtype(i32) %1) + ret i32 %2 +} + +define i32 @ZC_offset_32764(ptr %p) nounwind { +; LA32-LABEL: ZC_offset_32764: +; LA32: # %bb.0: +; LA32-NEXT: #APP +; LA32-NEXT: ll.w $a0, $a0, 32764 +; LA32-NEXT: #NO_APP +; LA32-NEXT: ret +; +; LA64-LABEL: ZC_offset_32764: +; LA64: # %bb.0: +; LA64-NEXT: #APP +; LA64-NEXT: ll.w $a0, $a0, 32764 +; LA64-NEXT: #NO_APP +; LA64-NEXT: ret + %1 = getelementptr inbounds i8, ptr %p, i32 32764 + %2 = call i32 asm "ll.w $0, $1", "=r,*^ZC"(ptr elementtype(i32) %1) + ret i32 %2 +} + +define i32 @ZC_offset_32767(ptr %p) nounwind { +; LA32-LABEL: ZC_offset_32767: +; LA32: # %bb.0: +; LA32-NEXT: lu12i.w $a1, 7 +; LA32-NEXT: ori $a1, $a1, 4095 +; LA32-NEXT: add.w $a0, $a0, $a1 +; LA32-NEXT: #APP +; LA32-NEXT: ll.w $a0, $a0, 0 +; LA32-NEXT: #NO_APP +; LA32-NEXT: ret +; +; LA64-LABEL: ZC_offset_32767: +; LA64: # %bb.0: +; LA64-NEXT: lu12i.w $a1, 7 +; LA64-NEXT: ori $a1, $a1, 4095 +; LA64-NEXT: add.d $a0, $a0, $a1 +; LA64-NEXT: #APP +; LA64-NEXT: ll.w $a0, $a0, 0 +; LA64-NEXT: #NO_APP +; LA64-NEXT: ret + %1 = getelementptr inbounds i8, ptr %p, i32 32767 + %2 = call i32 asm "ll.w $0, $1", "=r,*^ZC"(ptr elementtype(i32) %1) + ret i32 %2 +} diff --git a/llvm/test/CodeGen/LoongArch/inline-asm-constraint-error.ll b/llvm/test/CodeGen/LoongArch/inline-asm-constraint-error.ll new file mode 100644 index 0000000000000000000000000000000000000000..570fd438be97bf100d45b77695ff058dcc30ad26 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/inline-asm-constraint-error.ll @@ -0,0 +1,46 @@ +; RUN: not llc --mtriple=loongarch32 < %s 2>&1 | FileCheck %s +; RUN: not llc --mtriple=loongarch64 < %s 2>&1 | FileCheck %s + +define void @constraint_l() { +; CHECK: error: value out of range for constraint 'l' + tail call void asm sideeffect "lu12i.w $$a0, $0", "l"(i32 32768) +; CHECK: error: value out of range for constraint 'l' + tail call void asm sideeffect "lu12i.w $$a0, $0", "l"(i32 -32769) + ret void +} + +define void @constraint_I() { +; CHECK: error: value out of range for constraint 'I' + tail call void asm sideeffect "addi.w $$a0, $$a0, $0", "I"(i32 2048) +; CHECK: error: value out of range for constraint 'I' + tail call void asm sideeffect "addi.w $$a0, $$a0, $0", "I"(i32 -2049) + ret void +} + +define void @constraint_J() { +; CHECK: error: value out of range for constraint 'J' + tail call void asm sideeffect "addi.w $$a0, $$a0, $$0", "J"(i32 1) + ret void +} + +define void @constraint_K() { +; CHECK: error: value out of range for constraint 'K' + tail call void asm sideeffect "andi.w $$a0, $$a0, $0", "K"(i32 4096) +; CHECK: error: value out of range for constraint 'K' + tail call void asm sideeffect "andi.w $$a0, $$a0, $0", "K"(i32 -1) + ret void +} + +define void @constraint_f() nounwind { +; CHECK: error: couldn't allocate input reg for constraint 'f' + tail call void asm "fadd.s $$fa0, $$fa0, $0", "f"(float 0.0) +; CHECK: error: couldn't allocate input reg for constraint 'f' + tail call void asm "fadd.s $$fa0, $$fa0, $0", "f"(double 0.0) + ret void +} + +define void @constraint_r_vec() nounwind { +; CHECK: error: couldn't allocate input reg for constraint 'r' + tail call void asm "add.w $$a0, $$a0, $0", "r"(<4 x i32> zeroinitializer) + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/inline-asm-constraint-f.ll b/llvm/test/CodeGen/LoongArch/inline-asm-constraint-f.ll new file mode 100644 index 0000000000000000000000000000000000000000..fa675e4bbb32434be437e894c84577b37637310a --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/inline-asm-constraint-f.ll @@ -0,0 +1,62 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+d --target-abi=ilp32d --verify-machineinstrs < %s \ +; RUN: | FileCheck --check-prefix=LA32 %s +; RUN: llc --mtriple=loongarch64 --mattr=+d --target-abi=lp64d --verify-machineinstrs < %s \ +; RUN: | FileCheck --check-prefix=LA64 %s + +@gd = external dso_local global double + +define double @constraint_f_double(double %a) nounwind { +; LA32-LABEL: constraint_f_double: +; LA32: # %bb.0: +; LA32-NEXT: pcalau12i $a0, %pc_hi20(gd) +; LA32-NEXT: addi.w $a0, $a0, %pc_lo12(gd) +; LA32-NEXT: fld.d $fa1, $a0, 0 +; LA32-NEXT: #APP +; LA32-NEXT: fadd.d $fa0, $fa0, $fa1 +; LA32-NEXT: #NO_APP +; LA32-NEXT: ret +; +; LA64-LABEL: constraint_f_double: +; LA64: # %bb.0: +; LA64-NEXT: pcalau12i $a0, %pc_hi20(gd) +; LA64-NEXT: addi.d $a0, $a0, %pc_lo12(gd) +; LA64-NEXT: fld.d $fa1, $a0, 0 +; LA64-NEXT: #APP +; LA64-NEXT: fadd.d $fa0, $fa0, $fa1 +; LA64-NEXT: #NO_APP +; LA64-NEXT: ret + %1 = load double, ptr @gd + %2 = tail call double asm "fadd.d $0, $1, $2", "=f,f,f"(double %a, double %1) + ret double %2 +} + +define double @constraint_gpr(double %a) { +; LA32-LABEL: constraint_gpr: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: .cfi_def_cfa_offset 16 +; LA32-NEXT: fst.d $fa0, $sp, 8 +; LA32-NEXT: ld.w $a7, $sp, 8 +; LA32-NEXT: ld.w $t0, $sp, 12 +; LA32-NEXT: #APP +; LA32-NEXT: move $a6, $a7 +; LA32-NEXT: #NO_APP +; LA32-NEXT: st.w $a7, $sp, 4 +; LA32-NEXT: st.w $a6, $sp, 0 +; LA32-NEXT: fld.d $fa0, $sp, 0 +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: constraint_gpr: +; LA64: # %bb.0: +; LA64-NEXT: .cfi_def_cfa_offset 0 +; LA64-NEXT: movfr2gr.d $a7, $fa0 +; LA64-NEXT: #APP +; LA64-NEXT: move $a6, $a7 +; LA64-NEXT: #NO_APP +; LA64-NEXT: movgr2fr.d $fa0, $a6 +; LA64-NEXT: ret + %1 = tail call double asm sideeffect alignstack "move $0, $1", "={$r10},{$r11}"(double %a) + ret double %1 +} diff --git a/llvm/test/CodeGen/LoongArch/inline-asm-constraint-k.ll b/llvm/test/CodeGen/LoongArch/inline-asm-constraint-k.ll new file mode 100644 index 0000000000000000000000000000000000000000..5ffe4b48c3f542c80b4e43ca0adf967dff400392 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/inline-asm-constraint-k.ll @@ -0,0 +1,33 @@ +; RUN: llc --mtriple=loongarch64 --verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefix=ASM +; RUN: llc --mtriple=loongarch64 --print-after-isel -o /dev/null 2>&1 < %s \ +; RUN: | FileCheck %s --check-prefix=MACHINE-INSTR + +define i64 @k_variable_offset(ptr %p, i64 %idx) nounwind { +; ASM-LABEL: k_variable_offset: +; ASM: # %bb.0: +; ASM-NEXT: #APP +; ASM-NEXT: ldx.d $a0, $a0, $a1 +; ASM-NEXT: #NO_APP +; ASM-NEXT: ret + %1 = getelementptr inbounds i8, ptr %p, i64 %idx +;; Make sure machine instr with this 'k' constraint is printed correctly. +; MACHINE-INSTR: INLINEASM{{.*}}[mem:k] + %2 = call i64 asm "ldx.d $0, $1", "=r,*k"(ptr elementtype(i64) %1) + ret i64 %2 +} + +define i64 @k_constant_offset(ptr %p) nounwind { +; ASM-LABEL: k_constant_offset: +; ASM: # %bb.0: +; ASM-NEXT: ori $a1, $zero, 5 +; ASM-NEXT: #APP +; ASM-NEXT: ldx.d $a0, $a0, $a1 +; ASM-NEXT: #NO_APP +; ASM-NEXT: ret + %1 = getelementptr inbounds i8, ptr %p, i64 5 +;; Make sure machine instr with this 'k' constraint is printed correctly. +; MACHINE-INSTR: INLINEASM{{.*}}[mem:k] + %2 = call i64 asm "ldx.d $0, $1", "=r,*k"(ptr elementtype(i64) %1) + ret i64 %2 +} diff --git a/llvm/test/CodeGen/LoongArch/inline-asm-constraint-m.ll b/llvm/test/CodeGen/LoongArch/inline-asm-constraint-m.ll new file mode 100644 index 0000000000000000000000000000000000000000..129368aee1d31f22beaa0a8c13c81a488c62b0c9 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/inline-asm-constraint-m.ll @@ -0,0 +1,145 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --verify-machineinstrs < %s | FileCheck %s --check-prefix=LA32 +; RUN: llc --mtriple=loongarch64 --verify-machineinstrs < %s | FileCheck %s --check-prefix=LA64 + +define i32 @m_offset_neg_2049(ptr %p) nounwind { +; LA32-LABEL: m_offset_neg_2049: +; LA32: # %bb.0: +; LA32-NEXT: lu12i.w $a1, -1 +; LA32-NEXT: ori $a1, $a1, 2047 +; LA32-NEXT: add.w $a0, $a0, $a1 +; LA32-NEXT: #APP +; LA32-NEXT: ld.w $a0, $a0, 0 +; LA32-NEXT: #NO_APP +; LA32-NEXT: ret +; +; LA64-LABEL: m_offset_neg_2049: +; LA64: # %bb.0: +; LA64-NEXT: lu12i.w $a1, -1 +; LA64-NEXT: ori $a1, $a1, 2047 +; LA64-NEXT: add.d $a0, $a0, $a1 +; LA64-NEXT: #APP +; LA64-NEXT: ld.w $a0, $a0, 0 +; LA64-NEXT: #NO_APP +; LA64-NEXT: ret + %1 = getelementptr inbounds i8, ptr %p, i32 -2049 + %2 = call i32 asm "ld.w $0, $1", "=r,*m"(ptr elementtype(i32) %1) + ret i32 %2 +} + +define i32 @m_offset_neg_2048(ptr %p) nounwind { +; LA32-LABEL: m_offset_neg_2048: +; LA32: # %bb.0: +; LA32-NEXT: #APP +; LA32-NEXT: ld.w $a0, $a0, -2048 +; LA32-NEXT: #NO_APP +; LA32-NEXT: ret +; +; LA64-LABEL: m_offset_neg_2048: +; LA64: # %bb.0: +; LA64-NEXT: #APP +; LA64-NEXT: ld.w $a0, $a0, -2048 +; LA64-NEXT: #NO_APP +; LA64-NEXT: ret + %1 = getelementptr inbounds i8, ptr %p, i32 -2048 + %2 = call i32 asm "ld.w $0, $1", "=r,*m"(ptr elementtype(i32) %1) + ret i32 %2 +} + +define i32 @m_offset_neg_1(ptr %p) nounwind { +; LA32-LABEL: m_offset_neg_1: +; LA32: # %bb.0: +; LA32-NEXT: #APP +; LA32-NEXT: ld.w $a0, $a0, -1 +; LA32-NEXT: #NO_APP +; LA32-NEXT: ret +; +; LA64-LABEL: m_offset_neg_1: +; LA64: # %bb.0: +; LA64-NEXT: #APP +; LA64-NEXT: ld.w $a0, $a0, -1 +; LA64-NEXT: #NO_APP +; LA64-NEXT: ret + %1 = getelementptr inbounds i8, ptr %p, i32 -1 + %2 = call i32 asm "ld.w $0, $1", "=r,*m"(ptr elementtype(i32) %1) + ret i32 %2 +} + +define i32 @m_offset_0(ptr %p) nounwind { +; LA32-LABEL: m_offset_0: +; LA32: # %bb.0: +; LA32-NEXT: #APP +; LA32-NEXT: ld.w $a0, $a0, 0 +; LA32-NEXT: #NO_APP +; LA32-NEXT: ret +; +; LA64-LABEL: m_offset_0: +; LA64: # %bb.0: +; LA64-NEXT: #APP +; LA64-NEXT: ld.w $a0, $a0, 0 +; LA64-NEXT: #NO_APP +; LA64-NEXT: ret + %1 = call i32 asm "ld.w $0, $1", "=r,*m"(ptr elementtype(i32) %p) + ret i32 %1 +} + +define i32 @m_offset_1(ptr %p) nounwind { +; LA32-LABEL: m_offset_1: +; LA32: # %bb.0: +; LA32-NEXT: #APP +; LA32-NEXT: ld.w $a0, $a0, 1 +; LA32-NEXT: #NO_APP +; LA32-NEXT: ret +; +; LA64-LABEL: m_offset_1: +; LA64: # %bb.0: +; LA64-NEXT: #APP +; LA64-NEXT: ld.w $a0, $a0, 1 +; LA64-NEXT: #NO_APP +; LA64-NEXT: ret + %1 = getelementptr inbounds i8, ptr %p, i32 1 + %2 = call i32 asm "ld.w $0, $1", "=r,*m"(ptr elementtype(i32) %1) + ret i32 %2 +} + +define i32 @m_offset_2047(ptr %p) nounwind { +; LA32-LABEL: m_offset_2047: +; LA32: # %bb.0: +; LA32-NEXT: #APP +; LA32-NEXT: ld.w $a0, $a0, 2047 +; LA32-NEXT: #NO_APP +; LA32-NEXT: ret +; +; LA64-LABEL: m_offset_2047: +; LA64: # %bb.0: +; LA64-NEXT: #APP +; LA64-NEXT: ld.w $a0, $a0, 2047 +; LA64-NEXT: #NO_APP +; LA64-NEXT: ret + %1 = getelementptr inbounds i8, ptr %p, i32 2047 + %2 = call i32 asm "ld.w $0, $1", "=r,*m"(ptr elementtype(i32) %1) + ret i32 %2 +} + +define i32 @m_offset_2048(ptr %p) nounwind { +; LA32-LABEL: m_offset_2048: +; LA32: # %bb.0: +; LA32-NEXT: ori $a1, $zero, 2048 +; LA32-NEXT: add.w $a0, $a0, $a1 +; LA32-NEXT: #APP +; LA32-NEXT: ld.w $a0, $a0, 0 +; LA32-NEXT: #NO_APP +; LA32-NEXT: ret +; +; LA64-LABEL: m_offset_2048: +; LA64: # %bb.0: +; LA64-NEXT: ori $a1, $zero, 2048 +; LA64-NEXT: add.d $a0, $a0, $a1 +; LA64-NEXT: #APP +; LA64-NEXT: ld.w $a0, $a0, 0 +; LA64-NEXT: #NO_APP +; LA64-NEXT: ret + %1 = getelementptr inbounds i8, ptr %p, i32 2048 + %2 = call i32 asm "ld.w $0, $1", "=r,*m"(ptr elementtype(i32) %1) + ret i32 %2 +} diff --git a/llvm/test/CodeGen/LoongArch/inline-asm-constraint.ll b/llvm/test/CodeGen/LoongArch/inline-asm-constraint.ll new file mode 100644 index 0000000000000000000000000000000000000000..4b63d3b0a0a2c3f009b9fd9a7a73b0830e40a20d --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/inline-asm-constraint.ll @@ -0,0 +1,111 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --verify-machineinstrs --no-integrated-as < %s \ +; RUN: | FileCheck %s +; RUN: llc --mtriple=loongarch64 --verify-machineinstrs --no-integrated-as < %s \ +; RUN: | FileCheck %s + +@gi = external dso_local global i32, align 4 + +define i32 @constraint_r(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: constraint_r: +; CHECK: # %bb.0: +; CHECK-NEXT: #APP +; CHECK-NEXT: add.w $a0, $a0, $a1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: ret + %1 = tail call i32 asm "add.w $0, $1, $2", "=r,r,r"(i32 %a, i32 %b) + ret i32 %1 +} + +define i32 @constraint_i(i32 %a) nounwind { +; CHECK-LABEL: constraint_i: +; CHECK: # %bb.0: +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.w $a0, $a0, 113 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: ret + %1 = tail call i32 asm "addi.w $0, $1, $2", "=r,r,i"(i32 %a, i32 113) + ret i32 %1 +} + +define void @constraint_l() nounwind { +; CHECK-LABEL: constraint_l: +; CHECK: # %bb.0: +; CHECK-NEXT: #APP +; CHECK-NEXT: lu12i.w $a0, 32767 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: lu12i.w $a0, -32768 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: ret + tail call void asm sideeffect "lu12i.w $$a0, $0", "l"(i32 32767) + tail call void asm sideeffect "lu12i.w $$a0, $0", "l"(i32 -32768) + ret void +} + +define void @constraint_I() nounwind { +; CHECK-LABEL: constraint_I: +; CHECK: # %bb.0: +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.w $a0, $a0, 2047 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.w $a0, $a0, -2048 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: ret + tail call void asm sideeffect "addi.w $$a0, $$a0, $0", "I"(i32 2047) + tail call void asm sideeffect "addi.w $$a0, $$a0, $0", "I"(i32 -2048) + ret void +} + +define void @constraint_J() nounwind { +; CHECK-LABEL: constraint_J: +; CHECK: # %bb.0: +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.w $a0, $a0, 0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: ret + tail call void asm sideeffect "addi.w $$a0, $$a0, $0", "J"(i32 0) + ret void +} + +define void @constraint_K() nounwind { +; CHECK-LABEL: constraint_K: +; CHECK: # %bb.0: +; CHECK-NEXT: #APP +; CHECK-NEXT: andi $a0, $a0, 4095 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: andi $a0, $a0, 0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: ret + tail call void asm sideeffect "andi $$a0, $$a0, $0", "K"(i32 4095) + tail call void asm sideeffect "andi $$a0, $$a0, $0", "K"(i32 0) + ret void +} + +define void @operand_global() nounwind { +; CHECK-LABEL: operand_global: +; CHECK: # %bb.0: +; CHECK-NEXT: #APP +; CHECK-NEXT: .8byte gi +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: ret + tail call void asm sideeffect ".8byte $0", "i"(ptr @gi) + ret void +} + +define void @operand_block_address() nounwind { +; CHECK-LABEL: operand_block_address: +; CHECK: # %bb.0: +; CHECK-NEXT: #APP +; CHECK-NEXT: b .Ltmp0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: .Ltmp0: # Block address taken +; CHECK-NEXT: # %bb.1: # %bb +; CHECK-NEXT: ret + call void asm sideeffect "b $0", "i"(i8* blockaddress(@operand_block_address, %bb)) + br label %bb +bb: + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/inline-asm-operand-modifiers.ll b/llvm/test/CodeGen/LoongArch/inline-asm-operand-modifiers.ll new file mode 100644 index 0000000000000000000000000000000000000000..d3cf288bfd010b246574ea7020c03a53a65a7e01 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/inline-asm-operand-modifiers.ll @@ -0,0 +1,25 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --verify-machineinstrs < %s | FileCheck %s +; RUN: llc --mtriple=loongarch64 --verify-machineinstrs < %s | FileCheck %s + +define i32 @modifier_z_zero(i32 %a) nounwind { +; CHECK-LABEL: modifier_z_zero: +; CHECK: # %bb.0: +; CHECK-NEXT: #APP +; CHECK-NEXT: add.w $a0, $a0, $zero +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: ret + %1 = tail call i32 asm "add.w $0, $1, ${2:z}", "=r,r,ri"(i32 %a, i32 0) + ret i32 %1 +} + +define i32 @modifier_z_nonzero(i32 %a) nounwind { +; CHECK-LABEL: modifier_z_nonzero: +; CHECK: # %bb.0: +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.w $a0, $a0, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: ret + %1 = tail call i32 asm "addi.w $0, $1, ${2:z}", "=r,r,ri"(i32 %a, i32 1) + ret i32 %1 +} diff --git a/llvm/test/CodeGen/LoongArch/inline-asm-reg-names-error.ll b/llvm/test/CodeGen/LoongArch/inline-asm-reg-names-error.ll new file mode 100644 index 0000000000000000000000000000000000000000..56c335ffb3a60ddc7bdcf9666a6e1a3abf1e990f --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/inline-asm-reg-names-error.ll @@ -0,0 +1,14 @@ +; RUN: not llc --mtriple=loongarch32 2>&1 < %s | FileCheck %s +; RUN: not llc --mtriple=loongarch64 2>&1 < %s | FileCheck %s + +define i32 @non_exit_r32(i32 %a) nounwind { +; CHECK: error: couldn't allocate input reg for constraint '{$r32}' + %1 = tail call i32 asm "addi.w $0, $1, 1", "=r,{$r32}"(i32 %a) + ret i32 %1 +} + +define i32 @non_exit_foo(i32 %a) nounwind { +; CHECK: error: couldn't allocate input reg for constraint '{$foo}' + %1 = tail call i32 asm "addi.w $0, $1, 1", "=r,{$foo}"(i32 %a) + ret i32 %1 +} diff --git a/llvm/test/CodeGen/LoongArch/inline-asm-reg-names-f-error.ll b/llvm/test/CodeGen/LoongArch/inline-asm-reg-names-f-error.ll new file mode 100644 index 0000000000000000000000000000000000000000..82d0d21e1cd5e20c5f4a13cf8974399a26a1b566 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/inline-asm-reg-names-f-error.ll @@ -0,0 +1,14 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+f,+d 2>&1 < %s | FileCheck %s +; RUN: not llc --mtriple=loongarch64 --mattr=+f,+d 2>&1 < %s | FileCheck %s + +define double @non_exit_f32(double %a) nounwind { +; CHECK: error: couldn't allocate input reg for constraint '{$f32}' + %1 = tail call double asm "fabs.d $0, $1", "=f,{$f32}"(double %a) + ret double %1 +} + +define double @non_exit_foo(double %a) nounwind { +; CHECK: error: couldn't allocate input reg for constraint '{$foo}' + %1 = tail call double asm "fabs.d $0, $1", "=f,{$foo}"(double %a) + ret double %1 +} diff --git a/llvm/test/CodeGen/LoongArch/inline-asm-reg-names-f.ll b/llvm/test/CodeGen/LoongArch/inline-asm-reg-names-f.ll new file mode 100644 index 0000000000000000000000000000000000000000..8cf112223e25eb0b58999d3af6ba45e045378f1d --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/inline-asm-reg-names-f.ll @@ -0,0 +1,89 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+f,+d --target-abi=ilp32d --verify-machineinstrs < %s \ +; RUN: | FileCheck --check-prefix=LA32 %s +; RUN: llc --mtriple=loongarch64 --mattr=+f,+d --target-abi=lp64d --verify-machineinstrs < %s \ +; RUN: | FileCheck --check-prefix=LA64 %s + +;; These test that we can use architectural names ($f[0-9]*) refer to registers in +;; inline asm constraint lists. In each case, the named register should be used +;; for the source register of the `fabs.d`. It is very likely that `$fa0` will +;; be chosen as the designation register, but this is left to the compiler to +;; choose. +;; +;; Parenthesised registers in comments are the other aliases for this register. + +define double @register_f0(double %a) nounwind { +; LA32-LABEL: register_f0: +; LA32: # %bb.0: +; LA32-NEXT: #APP +; LA32-NEXT: fabs.d $fa0, $fa0 +; LA32-NEXT: #NO_APP +; LA32-NEXT: ret +; +; LA64-LABEL: register_f0: +; LA64: # %bb.0: +; LA64-NEXT: #APP +; LA64-NEXT: fabs.d $fa0, $fa0 +; LA64-NEXT: #NO_APP +; LA64-NEXT: ret + %1 = tail call double asm "fabs.d $0, $1", "=f,{$f0}"(double %a) + ret double %1 +} + +;; NOTE: This test uses `$f24` (`$fs0`) as an input, so it should be saved. +define double @register_f24(double %a) nounwind { +; LA32-LABEL: register_f24: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: fst.d $fs0, $sp, 8 # 8-byte Folded Spill +; LA32-NEXT: fmov.d $fs0, $fa0 +; LA32-NEXT: #APP +; LA32-NEXT: fabs.d $fa0, $fs0 +; LA32-NEXT: #NO_APP +; LA32-NEXT: fld.d $fs0, $sp, 8 # 8-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: register_f24: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: fst.d $fs0, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: fmov.d $fs0, $fa0 +; LA64-NEXT: #APP +; LA64-NEXT: fabs.d $fa0, $fs0 +; LA64-NEXT: #NO_APP +; LA64-NEXT: fld.d $fs0, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret + %1 = tail call double asm "fabs.d $0, $1", "=f,{$f24}"(double %a) + ret double %1 +} + +;; NOTE: This test uses `$f31` (`$fs7`) as an input, so it should be saved. +define double @register_f31(double %a) nounwind { +; LA32-LABEL: register_f31: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: fst.d $fs7, $sp, 8 # 8-byte Folded Spill +; LA32-NEXT: fmov.d $fs7, $fa0 +; LA32-NEXT: #APP +; LA32-NEXT: fabs.d $fa0, $fs7 +; LA32-NEXT: #NO_APP +; LA32-NEXT: fld.d $fs7, $sp, 8 # 8-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: register_f31: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: fst.d $fs7, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: fmov.d $fs7, $fa0 +; LA64-NEXT: #APP +; LA64-NEXT: fabs.d $fa0, $fs7 +; LA64-NEXT: #NO_APP +; LA64-NEXT: fld.d $fs7, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret + %1 = tail call double asm "fabs.d $0, $1", "=f,{$f31}"(double %a) + ret double %1 +} diff --git a/llvm/test/CodeGen/LoongArch/inline-asm-reg-names.ll b/llvm/test/CodeGen/LoongArch/inline-asm-reg-names.ll new file mode 100644 index 0000000000000000000000000000000000000000..4bc16e6cc5fbbbdbd47a5e93ed97e9c3f2bcea75 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/inline-asm-reg-names.ll @@ -0,0 +1,109 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --verify-machineinstrs < %s \ +; RUN: | FileCheck --check-prefix=LA32 %s +; RUN: llc --mtriple=loongarch64 --verify-machineinstrs < %s \ +; RUN: | FileCheck --check-prefix=LA64 %s + +;; These test that we can use architectural names ($r*) refer to registers in +;; inline asm constraint lists. In each case, the named register should be used +;; for the source register of the `addi.w`. It is very likely that `$a0` will +;; be chosen as the designation register, but this is left to the compiler to +;; choose. +;; +;; Parenthesised registers in comments are the other aliases for this register. + +;; NOTE: This test has to pass in 0 to the inline asm, because that's the only +;; value `$r0` (`$zero`) can take. +define i32 @register_r0() nounwind { +; LA32-LABEL: register_r0: +; LA32: # %bb.0: +; LA32-NEXT: #APP +; LA32-NEXT: addi.w $a0, $zero, 0 +; LA32-NEXT: #NO_APP +; LA32-NEXT: ret +; +; LA64-LABEL: register_r0: +; LA64: # %bb.0: +; LA64-NEXT: #APP +; LA64-NEXT: addi.w $a0, $zero, 0 +; LA64-NEXT: #NO_APP +; LA64-NEXT: ret + %1 = tail call i32 asm "addi.w $0, $1, 0", "=r,{$r0}"(i32 0) + ret i32 %1 +} + +define i32 @register_r4(i32 %a) nounwind { +; LA32-LABEL: register_r4: +; LA32: # %bb.0: +; LA32-NEXT: #APP +; LA32-NEXT: addi.w $a0, $a0, 1 +; LA32-NEXT: #NO_APP +; LA32-NEXT: ret +; +; LA64-LABEL: register_r4: +; LA64: # %bb.0: +; LA64-NEXT: #APP +; LA64-NEXT: addi.w $a0, $a0, 1 +; LA64-NEXT: #NO_APP +; LA64-NEXT: ret + %1 = tail call i32 asm "addi.w $0, $1, 1", "=r,{$r4}"(i32 %a) + ret i32 %1 +} + +;; NOTE: This test uses `$r22` (`$s9`, `$fp`) as an input, so it should be saved. +define i32 @register_r22(i32 %a) nounwind { +; LA32-LABEL: register_r22: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $fp, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: move $fp, $a0 +; LA32-NEXT: #APP +; LA32-NEXT: addi.w $a0, $fp, 1 +; LA32-NEXT: #NO_APP +; LA32-NEXT: ld.w $fp, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: register_r22: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $fp, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: move $fp, $a0 +; LA64-NEXT: #APP +; LA64-NEXT: addi.w $a0, $fp, 1 +; LA64-NEXT: #NO_APP +; LA64-NEXT: ld.d $fp, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret + %1 = tail call i32 asm "addi.w $0, $1, 1", "=r,{$r22}"(i32 %a) + ret i32 %1 +} + +;; NOTE: This test uses `$r31` (`$s8`) as an input, so it should be saved. +define i32 @register_r31(i32 %a) nounwind { +; LA32-LABEL: register_r31: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $s8, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: move $s8, $a0 +; LA32-NEXT: #APP +; LA32-NEXT: addi.w $a0, $s8, 1 +; LA32-NEXT: #NO_APP +; LA32-NEXT: ld.w $s8, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: register_r31: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $s8, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: move $s8, $a0 +; LA64-NEXT: #APP +; LA64-NEXT: addi.w $a0, $s8, 1 +; LA64-NEXT: #NO_APP +; LA64-NEXT: ld.d $s8, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret + %1 = tail call i32 asm "addi.w $0, $1, 1", "=r,{$r31}"(i32 %a) + ret i32 %1 +} diff --git a/llvm/test/CodeGen/LoongArch/intrinsic-csr-side-effects.ll b/llvm/test/CodeGen/LoongArch/intrinsic-csr-side-effects.ll new file mode 100644 index 0000000000000000000000000000000000000000..e3e23e46b04b981dc4cfe4b394a86e458709df0e --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/intrinsic-csr-side-effects.ll @@ -0,0 +1,47 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s +; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s + +declare i32 @llvm.loongarch.csrrd.w(i32 immarg) nounwind +declare i32 @llvm.loongarch.csrwr.w(i32, i32 immarg) nounwind +declare void @bug() + +define dso_local void @foo(i32 noundef signext %flag) nounwind { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: beqz $a0, .LBB0_2 +; CHECK-NEXT: # %bb.1: # %if.then +; CHECK-NEXT: csrrd $a0, 2 +; CHECK-NEXT: ori $a0, $a0, 1 +; CHECK-NEXT: csrwr $a0, 2 +; CHECK-NEXT: .LBB0_2: # %if.end +; CHECK-NEXT: csrrd $a0, 2 +; CHECK-NEXT: andi $a0, $a0, 1 +; CHECK-NEXT: bnez $a0, .LBB0_4 +; CHECK-NEXT: # %bb.3: # %if.then2 +; CHECK-NEXT: b %plt(bug) +; CHECK-NEXT: .LBB0_4: # %if.end3 +; CHECK-NEXT: ret +entry: + %tobool.not = icmp eq i32 %flag, 0 + br i1 %tobool.not, label %if.end, label %if.then + +if.then: ; preds = %entry + %0 = tail call i32 @llvm.loongarch.csrrd.w(i32 2) + %or = or i32 %0, 1 + %1 = tail call i32 @llvm.loongarch.csrwr.w(i32 %or, i32 2) + br label %if.end + +if.end: ; preds = %if.then, %entry + %2 = tail call i32 @llvm.loongarch.csrrd.w(i32 2) + %and = and i32 %2, 1 + %tobool1.not = icmp eq i32 %and, 0 + br i1 %tobool1.not, label %if.then2, label %if.end3 + +if.then2: ; preds = %if.end + tail call void @bug() + br label %if.end3 + +if.end3: ; preds = %if.then2, %if.end + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/intrinsic-error.ll b/llvm/test/CodeGen/LoongArch/intrinsic-error.ll new file mode 100644 index 0000000000000000000000000000000000000000..a839ab149c3338e43078ddbe405405d3c7abad72 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/intrinsic-error.ll @@ -0,0 +1,154 @@ +; RUN: not llc --mtriple=loongarch32 < %s 2>&1 | FileCheck %s +; RUN: not llc --mtriple=loongarch64 < %s 2>&1 | FileCheck %s + +declare void @llvm.loongarch.dbar(i32) +declare void @llvm.loongarch.ibar(i32) +declare void @llvm.loongarch.break(i32) +declare void @llvm.loongarch.movgr2fcsr(i32, i32) +declare i32 @llvm.loongarch.movfcsr2gr(i32) +declare void @llvm.loongarch.syscall(i32) +declare i32 @llvm.loongarch.csrrd.w(i32 immarg) +declare i32 @llvm.loongarch.csrwr.w(i32, i32 immarg) +declare i32 @llvm.loongarch.csrxchg.w(i32, i32, i32 immarg) + +define void @dbar_imm_out_of_hi_range() #0 { +; CHECK: llvm.loongarch.dbar: argument out of range. +entry: + call void @llvm.loongarch.dbar(i32 32769) + ret void +} + +define void @dbar_imm_out_of_lo_range() #0 { +; CHECK: llvm.loongarch.dbar: argument out of range. +entry: + call void @llvm.loongarch.dbar(i32 -1) + ret void +} + +define void @ibar_imm_out_of_hi_range() #0 { +; CHECK: llvm.loongarch.ibar: argument out of range. +entry: + call void @llvm.loongarch.ibar(i32 32769) + ret void +} + +define void @ibar_imm_out_of_lo_range() #0 { +; CHECK: llvm.loongarch.ibar: argument out of range. +entry: + call void @llvm.loongarch.ibar(i32 -1) + ret void +} + +define void @break_imm_out_of_hi_range() #0 { +; CHECK: llvm.loongarch.break: argument out of range. +entry: + call void @llvm.loongarch.break(i32 32769) + ret void +} + +define void @break_imm_out_of_lo_range() #0 { +; CHECK: llvm.loongarch.break: argument out of range. +entry: + call void @llvm.loongarch.break(i32 -1) + ret void +} + +define void @movgr2fcsr(i32 %a) nounwind { +; CHECK: llvm.loongarch.movgr2fcsr: requires basic 'f' target feature. +entry: + call void @llvm.loongarch.movgr2fcsr(i32 1, i32 %a) + ret void +} + +define void @movgr2fcsr_imm_out_of_hi_range(i32 %a) #0 { +; CHECK: llvm.loongarch.movgr2fcsr: argument out of range. +entry: + call void @llvm.loongarch.movgr2fcsr(i32 32, i32 %a) + ret void +} + +define void @movgr2fcsr_imm_out_of_lo_range(i32 %a) #0 { +; CHECK: llvm.loongarch.movgr2fcsr: argument out of range. +entry: + call void @llvm.loongarch.movgr2fcsr(i32 -1, i32 %a) + ret void +} + +define i32 @movfcsr2gr() nounwind { +; CHECK: llvm.loongarch.movfcsr2gr: requires basic 'f' target feature. +entry: + %res = call i32 @llvm.loongarch.movfcsr2gr(i32 1) + ret i32 %res +} + +define i32 @movfcsr2gr_imm_out_of_hi_range() #0 { +; CHECK: llvm.loongarch.movfcsr2gr: argument out of range. +entry: + %res = call i32 @llvm.loongarch.movfcsr2gr(i32 32) + ret i32 %res +} + +define i32 @movfcsr2gr_imm_out_of_lo_range() #0 { +; CHECK: llvm.loongarch.movfcsr2gr: argument out of range. +entry: + %res = call i32 @llvm.loongarch.movfcsr2gr(i32 -1) + ret i32 %res +} + +define void @syscall_imm_out_of_hi_range() #0 { +; CHECK: llvm.loongarch.syscall: argument out of range. +entry: + call void @llvm.loongarch.syscall(i32 32769) + ret void +} + +define void @syscall_imm_out_of_lo_range() #0 { +; CHECK: llvm.loongarch.syscall: argument out of range. +entry: + call void @llvm.loongarch.syscall(i32 -1) + ret void +} + +define i32 @csrrd_w_imm_out_of_hi_range() #0 { +; CHECK: llvm.loongarch.csrrd.w: argument out of range. +entry: + %0 = call i32 @llvm.loongarch.csrrd.w(i32 16384) + ret i32 %0 +} + +define i32 @csrrd_w_imm_out_of_lo_range() #0 { +; CHECK: llvm.loongarch.csrrd.w: argument out of range. +entry: + %0 = call i32 @llvm.loongarch.csrrd.w(i32 -1) + ret i32 %0 +} + +define i32 @csrwr_w_imm_out_of_hi_range(i32 %a) #0 { +; CHECK: llvm.loongarch.csrwr.w: argument out of range. +entry: + %0 = call i32 @llvm.loongarch.csrwr.w(i32 %a, i32 16384) + ret i32 %0 +} + +define i32 @csrwr_w_imm_out_of_lo_range(i32 %a) #0 { +; CHECK: llvm.loongarch.csrwr.w: argument out of range. +entry: + %0 = call i32 @llvm.loongarch.csrwr.w(i32 %a, i32 -1) + ret i32 %0 +} + +define i32 @csrxchg_w_imm_out_of_hi_range(i32 %a, i32 %b) #0 { +; CHECK: llvm.loongarch.csrxchg.w: argument out of range. +entry: + %0 = call i32 @llvm.loongarch.csrxchg.w(i32 %a, i32 %b, i32 16384) + ret i32 %0 +} + +define i32 @csrxchg_w_imm_out_of_lo_range(i32 %a, i32 %b) #0 { +; CHECK: llvm.loongarch.csrxchg.w: argument out of range. +entry: + %0 = call i32 @llvm.loongarch.csrxchg.w(i32 %a, i32 %b, i32 -1) + ret i32 %0 +} + +attributes #0 = { nounwind "target-features"="+f" } diff --git a/llvm/test/CodeGen/LoongArch/intrinsic-iocsr-side-effects.ll b/llvm/test/CodeGen/LoongArch/intrinsic-iocsr-side-effects.ll new file mode 100644 index 0000000000000000000000000000000000000000..ad78f7f53be1240ea61b5f3b79e96415cb915fd9 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/intrinsic-iocsr-side-effects.ll @@ -0,0 +1,180 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s + +declare i32 @llvm.loongarch.iocsrrd.b(i32) nounwind +declare void @llvm.loongarch.iocsrwr.b(i32, i32) nounwind +declare i32 @llvm.loongarch.iocsrrd.h(i32) nounwind +declare void @llvm.loongarch.iocsrwr.h(i32, i32) nounwind +declare i32 @llvm.loongarch.iocsrrd.w(i32) nounwind +declare void @llvm.loongarch.iocsrwr.w(i32, i32) nounwind +declare i64 @llvm.loongarch.iocsrrd.d(i32) nounwind +declare void @llvm.loongarch.iocsrwr.d(i64, i32) nounwind +declare void @bug() + +define dso_local void @test_b(i32 noundef signext %flag) nounwind { +; CHECK-LABEL: test_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: beqz $a0, .LBB0_2 +; CHECK-NEXT: # %bb.1: # %if.then +; CHECK-NEXT: ori $a0, $zero, 2 +; CHECK-NEXT: iocsrrd.b $a1, $a0 +; CHECK-NEXT: ori $a1, $a1, 1 +; CHECK-NEXT: iocsrwr.b $a1, $a0 +; CHECK-NEXT: .LBB0_2: # %if.end +; CHECK-NEXT: ori $a0, $zero, 2 +; CHECK-NEXT: iocsrrd.b $a0, $a0 +; CHECK-NEXT: andi $a0, $a0, 1 +; CHECK-NEXT: bnez $a0, .LBB0_4 +; CHECK-NEXT: # %bb.3: # %if.then2 +; CHECK-NEXT: b %plt(bug) +; CHECK-NEXT: .LBB0_4: # %if.end3 +; CHECK-NEXT: ret +entry: + %tobool.not = icmp eq i32 %flag, 0 + br i1 %tobool.not, label %if.end, label %if.then + +if.then: ; preds = %entry + %0 = tail call i32 @llvm.loongarch.iocsrrd.b(i32 2) + %or = or i32 %0, 1 + tail call void @llvm.loongarch.iocsrwr.b(i32 %or, i32 2) + br label %if.end + +if.end: ; preds = %if.then, %entry + %1 = tail call i32 @llvm.loongarch.iocsrrd.b(i32 2) + %and = and i32 %1, 1 + %tobool1.not = icmp eq i32 %and, 0 + br i1 %tobool1.not, label %if.then2, label %if.end3 + +if.then2: ; preds = %if.end + tail call void @bug() + br label %if.end3 + +if.end3: ; preds = %if.then2, %if.end + ret void +} + +define dso_local void @test_h(i32 noundef signext %flag) nounwind { +; CHECK-LABEL: test_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: beqz $a0, .LBB1_2 +; CHECK-NEXT: # %bb.1: # %if.then +; CHECK-NEXT: ori $a0, $zero, 2 +; CHECK-NEXT: iocsrrd.h $a1, $a0 +; CHECK-NEXT: ori $a1, $a1, 1 +; CHECK-NEXT: iocsrwr.h $a1, $a0 +; CHECK-NEXT: .LBB1_2: # %if.end +; CHECK-NEXT: ori $a0, $zero, 2 +; CHECK-NEXT: iocsrrd.h $a0, $a0 +; CHECK-NEXT: andi $a0, $a0, 1 +; CHECK-NEXT: bnez $a0, .LBB1_4 +; CHECK-NEXT: # %bb.3: # %if.then2 +; CHECK-NEXT: b %plt(bug) +; CHECK-NEXT: .LBB1_4: # %if.end3 +; CHECK-NEXT: ret +entry: + %tobool.not = icmp eq i32 %flag, 0 + br i1 %tobool.not, label %if.end, label %if.then + +if.then: ; preds = %entry + %0 = tail call i32 @llvm.loongarch.iocsrrd.h(i32 2) + %or = or i32 %0, 1 + tail call void @llvm.loongarch.iocsrwr.h(i32 %or, i32 2) + br label %if.end + +if.end: ; preds = %if.then, %entry + %1 = tail call i32 @llvm.loongarch.iocsrrd.h(i32 2) + %and = and i32 %1, 1 + %tobool1.not = icmp eq i32 %and, 0 + br i1 %tobool1.not, label %if.then2, label %if.end3 + +if.then2: ; preds = %if.end + tail call void @bug() + br label %if.end3 + +if.end3: ; preds = %if.then2, %if.end + ret void +} + +define dso_local void @test_w(i32 noundef signext %flag) nounwind { +; CHECK-LABEL: test_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: beqz $a0, .LBB2_2 +; CHECK-NEXT: # %bb.1: # %if.then +; CHECK-NEXT: ori $a0, $zero, 2 +; CHECK-NEXT: iocsrrd.w $a1, $a0 +; CHECK-NEXT: ori $a1, $a1, 1 +; CHECK-NEXT: iocsrwr.w $a1, $a0 +; CHECK-NEXT: .LBB2_2: # %if.end +; CHECK-NEXT: ori $a0, $zero, 2 +; CHECK-NEXT: iocsrrd.w $a0, $a0 +; CHECK-NEXT: andi $a0, $a0, 1 +; CHECK-NEXT: bnez $a0, .LBB2_4 +; CHECK-NEXT: # %bb.3: # %if.then2 +; CHECK-NEXT: b %plt(bug) +; CHECK-NEXT: .LBB2_4: # %if.end3 +; CHECK-NEXT: ret +entry: + %tobool.not = icmp eq i32 %flag, 0 + br i1 %tobool.not, label %if.end, label %if.then + +if.then: ; preds = %entry + %0 = tail call i32 @llvm.loongarch.iocsrrd.w(i32 2) + %or = or i32 %0, 1 + tail call void @llvm.loongarch.iocsrwr.w(i32 %or, i32 2) + br label %if.end + +if.end: ; preds = %if.then, %entry + %1 = tail call i32 @llvm.loongarch.iocsrrd.w(i32 2) + %and = and i32 %1, 1 + %tobool1.not = icmp eq i32 %and, 0 + br i1 %tobool1.not, label %if.then2, label %if.end3 + +if.then2: ; preds = %if.end + tail call void @bug() + br label %if.end3 + +if.end3: ; preds = %if.then2, %if.end + ret void +} + +define dso_local void @test_d(i32 noundef signext %flag) nounwind { +; CHECK-LABEL: test_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: beqz $a0, .LBB3_2 +; CHECK-NEXT: # %bb.1: # %if.then +; CHECK-NEXT: ori $a0, $zero, 2 +; CHECK-NEXT: iocsrrd.d $a1, $a0 +; CHECK-NEXT: ori $a1, $a1, 1 +; CHECK-NEXT: iocsrwr.d $a1, $a0 +; CHECK-NEXT: .LBB3_2: # %if.end +; CHECK-NEXT: ori $a0, $zero, 2 +; CHECK-NEXT: iocsrrd.d $a0, $a0 +; CHECK-NEXT: andi $a0, $a0, 1 +; CHECK-NEXT: bnez $a0, .LBB3_4 +; CHECK-NEXT: # %bb.3: # %if.then2 +; CHECK-NEXT: b %plt(bug) +; CHECK-NEXT: .LBB3_4: # %if.end3 +; CHECK-NEXT: ret +entry: + %tobool.not = icmp eq i32 %flag, 0 + br i1 %tobool.not, label %if.end, label %if.then + +if.then: ; preds = %entry + %0 = tail call i64 @llvm.loongarch.iocsrrd.d(i32 2) + %or = or i64 %0, 1 + tail call void @llvm.loongarch.iocsrwr.d(i64 %or, i32 2) + br label %if.end + +if.end: ; preds = %if.then, %entry + %1 = tail call i64 @llvm.loongarch.iocsrrd.d(i32 2) + %and = and i64 %1, 1 + %tobool1.not = icmp eq i64 %and, 0 + br i1 %tobool1.not, label %if.then2, label %if.end3 + +if.then2: ; preds = %if.end + tail call void @bug() + br label %if.end3 + +if.end3: ; preds = %if.then2, %if.end + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/intrinsic-la32-error.ll b/llvm/test/CodeGen/LoongArch/intrinsic-la32-error.ll new file mode 100644 index 0000000000000000000000000000000000000000..5302ba558940c8dd255c5112042e6627df28cb0d --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/intrinsic-la32-error.ll @@ -0,0 +1,167 @@ +; RUN: not llc --mtriple=loongarch32 < %s 2>&1 | FileCheck %s + +declare void @llvm.loongarch.cacop.w(i32, i32, i32) +declare i32 @llvm.loongarch.crc.w.b.w(i32, i32) +declare i32 @llvm.loongarch.crc.w.h.w(i32, i32) +declare i32 @llvm.loongarch.crc.w.w.w(i32, i32) +declare i32 @llvm.loongarch.crc.w.d.w(i64, i32) +declare i32 @llvm.loongarch.crcc.w.b.w(i32, i32) +declare i32 @llvm.loongarch.crcc.w.h.w(i32, i32) +declare i32 @llvm.loongarch.crcc.w.w.w(i32, i32) +declare i32 @llvm.loongarch.crcc.w.d.w(i64, i32) +declare i64 @llvm.loongarch.csrrd.d(i32 immarg) +declare i64 @llvm.loongarch.csrwr.d(i64, i32 immarg) +declare i64 @llvm.loongarch.csrxchg.d(i64, i64, i32 immarg) +declare i64 @llvm.loongarch.iocsrrd.d(i32) +declare void @llvm.loongarch.iocsrwr.d(i64, i32) +declare void @llvm.loongarch.asrtle.d(i64, i64) +declare void @llvm.loongarch.asrtgt.d(i64, i64) +declare i64 @llvm.loongarch.lddir.d(i64, i64 immarg) +declare void @llvm.loongarch.ldpte.d(i64, i64 immarg) + +define void @cacop_arg0_out_of_hi_range(i32 %a) nounwind { +; CHECK: llvm.loongarch.cacop.w: argument out of range +entry: + call void @llvm.loongarch.cacop.w(i32 32, i32 %a, i32 1024) + ret void +} + +define void @cacop_arg0_out_of_lo_range(i32 %a) nounwind { +; CHECK: llvm.loongarch.cacop.w: argument out of range +entry: + call void @llvm.loongarch.cacop.w(i32 -1, i32 %a, i32 1024) + ret void +} + +define void @cacop_arg2_out_of_hi_range(i32 %a) nounwind { +; CHECK: llvm.loongarch.cacop.w: argument out of range +entry: + call void @llvm.loongarch.cacop.w(i32 1, i32 %a, i32 4096) + ret void +} + +define void @cacop_arg2_out_of_lo_range(i32 %a) nounwind { +; CHECK: llvm.loongarch.cacop.w: argument out of range +entry: + call void @llvm.loongarch.cacop.w(i32 1, i32 %a, i32 -4096) + ret void +} + +define i32 @crc_w_b_w(i32 %a, i32 %b) nounwind { +; CHECK: llvm.loongarch.crc.w.b.w: requires loongarch64 +entry: + %res = call i32 @llvm.loongarch.crc.w.b.w(i32 %a, i32 %b) + ret i32 %res +} + +define i32 @crc_w_h_w(i32 %a, i32 %b) nounwind { +; CHECK: llvm.loongarch.crc.w.h.w: requires loongarch64 +entry: + %res = call i32 @llvm.loongarch.crc.w.h.w(i32 %a, i32 %b) + ret i32 %res +} + +define i32 @crc_w_w_w(i32 %a, i32 %b) nounwind { +; CHECK: llvm.loongarch.crc.w.w.w: requires loongarch64 +entry: + %res = call i32 @llvm.loongarch.crc.w.w.w(i32 %a, i32 %b) + ret i32 %res +} + +define i32 @crc_w_d_w(i64 %a, i32 %b) nounwind { +; CHECK: llvm.loongarch.crc.w.d.w: requires loongarch64 +entry: + %res = call i32 @llvm.loongarch.crc.w.d.w(i64 %a, i32 %b) + ret i32 %res +} + +define i32 @crcc_w_b_w(i32 %a, i32 %b) nounwind { +; CHECK: llvm.loongarch.crcc.w.b.w: requires loongarch64 +entry: + %res = call i32 @llvm.loongarch.crcc.w.b.w(i32 %a, i32 %b) + ret i32 %res +} + +define i32 @crcc_w_h_w(i32 %a, i32 %b) nounwind { +; CHECK: llvm.loongarch.crcc.w.h.w: requires loongarch64 +entry: + %res = call i32 @llvm.loongarch.crcc.w.h.w(i32 %a, i32 %b) + ret i32 %res +} + +define i32 @crcc_w_w_w(i32 %a, i32 %b) nounwind { +; CHECK: llvm.loongarch.crcc.w.w.w: requires loongarch64 +entry: + %res = call i32 @llvm.loongarch.crcc.w.w.w(i32 %a, i32 %b) + ret i32 %res +} + +define i32 @crcc_w_d_w(i64 %a, i32 %b) nounwind { +; CHECK: llvm.loongarch.crcc.w.d.w: requires loongarch64 +entry: + %res = call i32 @llvm.loongarch.crcc.w.d.w(i64 %a, i32 %b) + ret i32 %res +} + +define i64 @csrrd_d() { +; CHECK: llvm.loongarch.csrrd.d: requires loongarch64 +entry: + %0 = tail call i64 @llvm.loongarch.csrrd.d(i32 1) + ret i64 %0 +} + +define i64 @csrwr_d(i64 %a) { +; CHECK: llvm.loongarch.csrwr.d: requires loongarch64 +entry: + %0 = tail call i64 @llvm.loongarch.csrwr.d(i64 %a, i32 1) + ret i64 %0 +} + +define i64 @csrxchg_d(i64 %a, i64 %b) { +; CHECK: llvm.loongarch.csrxchg.d: requires loongarch64 +entry: + %0 = tail call i64 @llvm.loongarch.csrxchg.d(i64 %a, i64 %b, i32 1) + ret i64 %0 +} + +define i64 @iocsrrd_d(i32 %a) { +; CHECK: llvm.loongarch.iocsrrd.d: requires loongarch64 +entry: + %0 = tail call i64 @llvm.loongarch.iocsrrd.d(i32 %a) + ret i64 %0 +} + +define void @iocsrwr_d(i64 %a, i32 signext %b) { +; CHECK: llvm.loongarch.iocsrwr.d: requires loongarch64 +entry: + tail call void @llvm.loongarch.iocsrwr.d(i64 %a, i32 %b) + ret void +} + +define void @asrtle_d(i64 %a, i64 %b) { +; CHECK: llvm.loongarch.asrtle.d: requires loongarch64 +entry: + tail call void @llvm.loongarch.asrtle.d(i64 %a, i64 %b) + ret void +} + +define void @asrtgt_d(i64 %a, i64 %b) { +; CHECK: llvm.loongarch.asrtgt.d: requires loongarch64 +entry: + tail call void @llvm.loongarch.asrtgt.d(i64 %a, i64 %b) + ret void +} + +define i64 @lddir_d(i64 %a) { +; CHECK: llvm.loongarch.lddir.d: requires loongarch64 +entry: + %0 = tail call i64 @llvm.loongarch.lddir.d(i64 %a, i64 1) + ret i64 %0 +} + +define void @ldpte_d(i64 %a) { +; CHECK: llvm.loongarch.ldpte.d: requires loongarch64 +entry: + tail call void @llvm.loongarch.ldpte.d(i64 %a, i64 1) + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/intrinsic-la32.ll b/llvm/test/CodeGen/LoongArch/intrinsic-la32.ll new file mode 100644 index 0000000000000000000000000000000000000000..37e0902625a2cb7ef68e9b3863a1e5902cf5ef40 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/intrinsic-la32.ll @@ -0,0 +1,13 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s + +declare void @llvm.loongarch.cacop.w(i32, i32, i32) + +define void @cacop_w(i32 %a) nounwind { +; CHECK-LABEL: cacop_w: +; CHECK: # %bb.0: +; CHECK-NEXT: cacop 1, $a0, 4 +; CHECK-NEXT: ret + call void @llvm.loongarch.cacop.w(i32 1, i32 %a, i32 4) + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/intrinsic-la64-error.ll b/llvm/test/CodeGen/LoongArch/intrinsic-la64-error.ll new file mode 100644 index 0000000000000000000000000000000000000000..4716d401d9fdf493641e5434c3f6b338d87c2635 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/intrinsic-la64-error.ll @@ -0,0 +1,84 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: not llc --mtriple=loongarch64 < %s 2>&1 | FileCheck %s + +declare void @llvm.loongarch.cacop.w(i32, i32, i32) +declare void @llvm.loongarch.cacop.d(i64, i64, i64) +declare i64 @llvm.loongarch.csrrd.d(i32 immarg) +declare i64 @llvm.loongarch.csrwr.d(i64, i32 immarg) +declare i64 @llvm.loongarch.csrxchg.d(i64, i64, i32 immarg) + +define i64 @csrrd_d_imm_out_of_hi_range() nounwind { +; CHECK: llvm.loongarch.csrrd.d: argument out of range +entry: + %0 = call i64 @llvm.loongarch.csrrd.d(i32 16384) + ret i64 %0 +} + +define i64 @csrrd_d_imm_out_of_lo_range() nounwind { +; CHECK: llvm.loongarch.csrrd.d: argument out of range +entry: + %0 = call i64 @llvm.loongarch.csrrd.d(i32 -1) + ret i64 %0 +} + +define i64 @csrwr_d_imm_out_of_hi_range(i64 %a) nounwind { +; CHECK: llvm.loongarch.csrwr.d: argument out of range +entry: + %0 = call i64 @llvm.loongarch.csrwr.d(i64 %a, i32 16384) + ret i64 %0 +} + +define i64 @csrwr_d_imm_out_of_lo_range(i64 %a) nounwind { +; CHECK: llvm.loongarch.csrwr.d: argument out of range +entry: + %0 = call i64 @llvm.loongarch.csrwr.d(i64 %a, i32 -1) + ret i64 %0 +} + +define i64 @csrxchg_d_imm_out_of_hi_range(i64 %a, i64 %b) nounwind { +; CHECK: llvm.loongarch.csrxchg.d: argument out of range +entry: + %0 = call i64 @llvm.loongarch.csrxchg.d(i64 %a, i64 %b, i32 16384) + ret i64 %0 +} + +define i64 @csrxchg_d_imm_out_of_lo_range(i64 %a, i64 %b) nounwind { +; CHECK: llvm.loongarch.csrxchg.d: argument out of range +entry: + %0 = call i64 @llvm.loongarch.csrxchg.d(i64 %a, i64 %b, i32 -1) + ret i64 %0 +} + +define void @cacop_w(i32 %a) nounwind { +; CHECK: llvm.loongarch.cacop.w: requires loongarch32 + call void @llvm.loongarch.cacop.w(i32 1, i32 %a, i32 4) + ret void +} + +define void @cacop_arg0_out_of_hi_range(i64 %a) nounwind { +; CHECK: llvm.loongarch.cacop.d: argument out of range +entry: + call void @llvm.loongarch.cacop.d(i64 32, i64 %a, i64 1024) + ret void +} + +define void @cacop_arg0_out_of_lo_range(i64 %a) nounwind { +; CHECK: llvm.loongarch.cacop.d: argument out of range +entry: + call void @llvm.loongarch.cacop.d(i64 -1, i64 %a, i64 1024) + ret void +} + +define void @cacop_arg2_out_of_hi_range(i64 %a) nounwind { +; CHECK: llvm.loongarch.cacop.d: argument out of range +entry: + call void @llvm.loongarch.cacop.d(i64 1, i64 %a, i64 4096) + ret void +} + +define void @cacop_arg2_out_of_lo_range(i64 %a) nounwind { +; CHECK: llvm.loongarch.cacop.d: argument out of range +entry: + call void @llvm.loongarch.cacop.d(i64 1, i64 %a, i64 -4096) + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/intrinsic-la64.ll b/llvm/test/CodeGen/LoongArch/intrinsic-la64.ll new file mode 100644 index 0000000000000000000000000000000000000000..f0ebd8508ad14840e5f539ce436290278c7e0521 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/intrinsic-la64.ll @@ -0,0 +1,308 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s + +declare void @llvm.loongarch.cacop.d(i64, i64, i64) +declare i32 @llvm.loongarch.crc.w.b.w(i32, i32) +declare i32 @llvm.loongarch.crc.w.h.w(i32, i32) +declare i32 @llvm.loongarch.crc.w.w.w(i32, i32) +declare i32 @llvm.loongarch.crc.w.d.w(i64, i32) +declare i32 @llvm.loongarch.crcc.w.b.w(i32, i32) +declare i32 @llvm.loongarch.crcc.w.h.w(i32, i32) +declare i32 @llvm.loongarch.crcc.w.w.w(i32, i32) +declare i32 @llvm.loongarch.crcc.w.d.w(i64, i32) +declare i64 @llvm.loongarch.csrrd.d(i32 immarg) +declare i64 @llvm.loongarch.csrwr.d(i64, i32 immarg) +declare i64 @llvm.loongarch.csrxchg.d(i64, i64, i32 immarg) +declare i64 @llvm.loongarch.iocsrrd.d(i32) +declare void @llvm.loongarch.iocsrwr.d(i64, i32) +declare void @llvm.loongarch.asrtle.d(i64, i64) +declare void @llvm.loongarch.asrtgt.d(i64, i64) +declare i64 @llvm.loongarch.lddir.d(i64, i64) +declare void @llvm.loongarch.ldpte.d(i64, i64) + +define i32 @crc_w_b_w(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: crc_w_b_w: +; CHECK: # %bb.0: +; CHECK-NEXT: crc.w.b.w $a0, $a0, $a1 +; CHECK-NEXT: ret + %res = call i32 @llvm.loongarch.crc.w.b.w(i32 %a, i32 %b) + ret i32 %res +} + +define void @crc_w_b_w_noret(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: crc_w_b_w_noret: +; CHECK: # %bb.0: +; CHECK-NEXT: ret + %res = call i32 @llvm.loongarch.crc.w.b.w(i32 %a, i32 %b) + ret void +} + +define i32 @crc_w_h_w(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: crc_w_h_w: +; CHECK: # %bb.0: +; CHECK-NEXT: crc.w.h.w $a0, $a0, $a1 +; CHECK-NEXT: ret + %res = call i32 @llvm.loongarch.crc.w.h.w(i32 %a, i32 %b) + ret i32 %res +} + +define void @crc_w_h_w_noret(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: crc_w_h_w_noret: +; CHECK: # %bb.0: +; CHECK-NEXT: ret + %res = call i32 @llvm.loongarch.crc.w.h.w(i32 %a, i32 %b) + ret void +} + +define i32 @crc_w_w_w(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: crc_w_w_w: +; CHECK: # %bb.0: +; CHECK-NEXT: crc.w.w.w $a0, $a0, $a1 +; CHECK-NEXT: ret + %res = call i32 @llvm.loongarch.crc.w.w.w(i32 %a, i32 %b) + ret i32 %res +} + +define void @crc_w_w_w_noret(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: crc_w_w_w_noret: +; CHECK: # %bb.0: +; CHECK-NEXT: ret + %res = call i32 @llvm.loongarch.crc.w.w.w(i32 %a, i32 %b) + ret void +} + +define void @cacop_d(i64 %a) nounwind { +; CHECK-LABEL: cacop_d: +; CHECK: # %bb.0: +; CHECK-NEXT: cacop 1, $a0, 4 +; CHECK-NEXT: ret + call void @llvm.loongarch.cacop.d(i64 1, i64 %a, i64 4) + ret void +} + +define i32 @crc_w_d_w(i64 %a, i32 %b) nounwind { +; CHECK-LABEL: crc_w_d_w: +; CHECK: # %bb.0: +; CHECK-NEXT: crc.w.d.w $a0, $a0, $a1 +; CHECK-NEXT: ret + %res = call i32 @llvm.loongarch.crc.w.d.w(i64 %a, i32 %b) + ret i32 %res +} + +define void @crc_w_d_w_noret(i64 %a, i32 %b) nounwind { +; CHECK-LABEL: crc_w_d_w_noret: +; CHECK: # %bb.0: +; CHECK-NEXT: ret + %res = call i32 @llvm.loongarch.crc.w.d.w(i64 %a, i32 %b) + ret void +} + +define i32 @crcc_w_b_w(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: crcc_w_b_w: +; CHECK: # %bb.0: +; CHECK-NEXT: crcc.w.b.w $a0, $a0, $a1 +; CHECK-NEXT: ret + %res = call i32 @llvm.loongarch.crcc.w.b.w(i32 %a, i32 %b) + ret i32 %res +} + +define void @crcc_w_b_w_noret(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: crcc_w_b_w_noret: +; CHECK: # %bb.0: +; CHECK-NEXT: ret + %res = call i32 @llvm.loongarch.crcc.w.b.w(i32 %a, i32 %b) + ret void +} + +define i32 @crcc_w_h_w(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: crcc_w_h_w: +; CHECK: # %bb.0: +; CHECK-NEXT: crcc.w.h.w $a0, $a0, $a1 +; CHECK-NEXT: ret + %res = call i32 @llvm.loongarch.crcc.w.h.w(i32 %a, i32 %b) + ret i32 %res +} + +define void @crcc_w_h_w_noret(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: crcc_w_h_w_noret: +; CHECK: # %bb.0: +; CHECK-NEXT: ret + %res = call i32 @llvm.loongarch.crcc.w.h.w(i32 %a, i32 %b) + ret void +} + +define i32 @crcc_w_w_w(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: crcc_w_w_w: +; CHECK: # %bb.0: +; CHECK-NEXT: crcc.w.w.w $a0, $a0, $a1 +; CHECK-NEXT: ret + %res = call i32 @llvm.loongarch.crcc.w.w.w(i32 %a, i32 %b) + ret i32 %res +} + +define void @crcc_w_w_w_noret(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: crcc_w_w_w_noret: +; CHECK: # %bb.0: +; CHECK-NEXT: ret + %res = call i32 @llvm.loongarch.crcc.w.w.w(i32 %a, i32 %b) + ret void +} + +define i32 @crcc_w_d_w(i64 %a, i32 %b) nounwind { +; CHECK-LABEL: crcc_w_d_w: +; CHECK: # %bb.0: +; CHECK-NEXT: crcc.w.d.w $a0, $a0, $a1 +; CHECK-NEXT: ret + %res = call i32 @llvm.loongarch.crcc.w.d.w(i64 %a, i32 %b) + ret i32 %res +} + +define void @crcc_w_d_w_noret(i64 %a, i32 %b) nounwind { +; CHECK-LABEL: crcc_w_d_w_noret: +; CHECK: # %bb.0: +; CHECK-NEXT: ret + %res = call i32 @llvm.loongarch.crcc.w.d.w(i64 %a, i32 %b) + ret void +} + +define i64 @csrrd_d() { +; CHECK-LABEL: csrrd_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: csrrd $a0, 1 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.loongarch.csrrd.d(i32 1) + ret i64 %0 +} + +define void @csrrd_d_noret() { +; CHECK-LABEL: csrrd_d_noret: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: csrrd $a0, 1 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.loongarch.csrrd.d(i32 1) + ret void +} + +define i64 @csrwr_d(i64 %a) { +; CHECK-LABEL: csrwr_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: csrwr $a0, 1 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.loongarch.csrwr.d(i64 %a, i32 1) + ret i64 %0 +} + +;; Check that csrwr is emitted even if the return value of the intrinsic is not used. +define void @csrwr_d_noret(i64 %a) { +; CHECK-LABEL: csrwr_d_noret: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: csrwr $a0, 1 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.loongarch.csrwr.d(i64 %a, i32 1) + ret void +} + +define i64 @csrxchg_d(i64 %a, i64 %b) { +; CHECK-LABEL: csrxchg_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: csrxchg $a0, $a1, 1 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.loongarch.csrxchg.d(i64 %a, i64 %b, i32 1) + ret i64 %0 +} + +;; Check that csrxchg is emitted even if the return value of the intrinsic is not used. +define void @csrxchg_d_noret(i64 %a, i64 %b) { +; CHECK-LABEL: csrxchg_d_noret: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: csrxchg $a0, $a1, 1 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.loongarch.csrxchg.d(i64 %a, i64 %b, i32 1) + ret void +} + +define i64 @iocsrrd_d(i32 %a) { +; CHECK-LABEL: iocsrrd_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: iocsrrd.d $a0, $a0 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.loongarch.iocsrrd.d(i32 %a) + ret i64 %0 +} + +define void @iocsrrd_d_noret(i32 %a) { +; CHECK-LABEL: iocsrrd_d_noret: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: iocsrrd.d $a0, $a0 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.loongarch.iocsrrd.d(i32 %a) + ret void +} + +define void @iocsrwr_d(i64 %a, i32 signext %b) { +; CHECK-LABEL: iocsrwr_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: iocsrwr.d $a0, $a1 +; CHECK-NEXT: ret +entry: + tail call void @llvm.loongarch.iocsrwr.d(i64 %a, i32 %b) + ret void +} + +define void @asrtle_d(i64 %a, i64 %b) { +; CHECK-LABEL: asrtle_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: asrtle.d $a0, $a1 +; CHECK-NEXT: ret +entry: + tail call void @llvm.loongarch.asrtle.d(i64 %a, i64 %b) + ret void +} + +define void @asrtgt_d(i64 %a, i64 %b) { +; CHECK-LABEL: asrtgt_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: asrtgt.d $a0, $a1 +; CHECK-NEXT: ret +entry: + tail call void @llvm.loongarch.asrtgt.d(i64 %a, i64 %b) + ret void +} + +define i64 @lddir_d(i64 %a) { +; CHECK-LABEL: lddir_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lddir $a0, $a0, 1 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.loongarch.lddir.d(i64 %a, i64 1) + ret i64 %0 +} + +define void @lddir_d_noret(i64 %a) { +; CHECK-LABEL: lddir_d_noret: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lddir $a0, $a0, 1 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.loongarch.lddir.d(i64 %a, i64 1) + ret void +} + +define void @ldpte_d(i64 %a) { +; CHECK-LABEL: ldpte_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ldpte $a0, 1 +; CHECK-NEXT: ret +entry: + tail call void @llvm.loongarch.ldpte.d(i64 %a, i64 1) + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/intrinsic-not-constant-error.ll b/llvm/test/CodeGen/LoongArch/intrinsic-not-constant-error.ll new file mode 100644 index 0000000000000000000000000000000000000000..9cb89670c293d031471e1e83a7aa43d6bfb0ce22 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/intrinsic-not-constant-error.ll @@ -0,0 +1,51 @@ +; RUN: not llc --mtriple=loongarch32 < %s 2>&1 | FileCheck %s +; RUN: not llc --mtriple=loongarch64 < %s 2>&1 | FileCheck %s + +declare void @llvm.loongarch.dbar(i32) +declare void @llvm.loongarch.ibar(i32) +declare void @llvm.loongarch.break(i32) +declare void @llvm.loongarch.movgr2fcsr(i32, i32) +declare i32 @llvm.loongarch.movfcsr2gr(i32) +declare void @llvm.loongarch.syscall(i32) + +define void @dbar_not_constant(i32 %x) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + call void @llvm.loongarch.dbar(i32 %x) + ret void +} + +define void @ibar(i32 %x) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + call void @llvm.loongarch.ibar(i32 %x) + ret void +} + +define void @break(i32 %x) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + call void @llvm.loongarch.break(i32 %x) + ret void +} + +define void @movgr2fcsr(i32 %a) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + call void @llvm.loongarch.movgr2fcsr(i32 %a, i32 %a) + ret void +} + +define i32 @movfcsr2gr(i32 %a) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call i32 @llvm.loongarch.movfcsr2gr(i32 %a) + ret i32 %res +} + +define void @syscall(i32 %x) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + call void @llvm.loongarch.syscall(i32 %x) + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/intrinsic.ll b/llvm/test/CodeGen/LoongArch/intrinsic.ll new file mode 100644 index 0000000000000000000000000000000000000000..f49a2500ad3c73137a4e4a1555479cab2834a4cd --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/intrinsic.ll @@ -0,0 +1,262 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+f --verify-machineinstrs < %s | FileCheck %s +; RUN: llc --mtriple=loongarch64 --mattr=+f --verify-machineinstrs < %s | FileCheck %s + +declare void @llvm.loongarch.dbar(i32) +declare void @llvm.loongarch.ibar(i32) +declare void @llvm.loongarch.break(i32) +declare void @llvm.loongarch.movgr2fcsr(i32, i32) +declare i32 @llvm.loongarch.movfcsr2gr(i32) +declare void @llvm.loongarch.syscall(i32) +declare i32 @llvm.loongarch.csrrd.w(i32 immarg) +declare i32 @llvm.loongarch.csrwr.w(i32, i32 immarg) +declare i32 @llvm.loongarch.csrxchg.w(i32, i32, i32 immarg) +declare i32 @llvm.loongarch.iocsrrd.b(i32) +declare i32 @llvm.loongarch.iocsrrd.h(i32) +declare i32 @llvm.loongarch.iocsrrd.w(i32) +declare void @llvm.loongarch.iocsrwr.b(i32, i32) +declare void @llvm.loongarch.iocsrwr.h(i32, i32) +declare void @llvm.loongarch.iocsrwr.w(i32, i32) +declare i32 @llvm.loongarch.cpucfg(i32) + +define void @foo() nounwind { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: dbar 0 +; CHECK-NEXT: ret +entry: + call void @llvm.loongarch.dbar(i32 0) + ret void +} + +define void @ibar() nounwind { +; CHECK-LABEL: ibar: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ibar 0 +; CHECK-NEXT: ret +entry: + call void @llvm.loongarch.ibar(i32 0) + ret void +} + +define void @break() nounwind { +; CHECK-LABEL: break: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: break 1 +; CHECK-NEXT: ret +entry: + call void @llvm.loongarch.break(i32 1) + ret void +} + +define void @movgr2fcsr(i32 %a) nounwind { +; CHECK-LABEL: movgr2fcsr: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movgr2fcsr $fcsr1, $a0 +; CHECK-NEXT: ret +entry: + call void @llvm.loongarch.movgr2fcsr(i32 1, i32 %a) + ret void +} + +define i32 @movfcsr2gr() nounwind { +; CHECK-LABEL: movfcsr2gr: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movfcsr2gr $a0, $fcsr1 +; CHECK-NEXT: ret +entry: + %res = call i32 @llvm.loongarch.movfcsr2gr(i32 1) + ret i32 %res +} + +;; TODO: Optimize out `movfcsr2gr` without data-dependency. +define void @movfcsr2gr_noret() nounwind { +; CHECK-LABEL: movfcsr2gr_noret: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movfcsr2gr $a0, $fcsr1 +; CHECK-NEXT: ret +entry: + %res = call i32 @llvm.loongarch.movfcsr2gr(i32 1) + ret void +} + +define void @syscall() nounwind { +; CHECK-LABEL: syscall: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: syscall 1 +; CHECK-NEXT: ret +entry: + call void @llvm.loongarch.syscall(i32 1) + ret void +} + +define i32 @csrrd_w() { +; CHECK-LABEL: csrrd_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: csrrd $a0, 1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.loongarch.csrrd.w(i32 1) + ret i32 %0 +} + +define void @csrrd_w_noret() { +; CHECK-LABEL: csrrd_w_noret: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: csrrd $a0, 1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.loongarch.csrrd.w(i32 1) + ret void +} + +define i32 @csrwr_w(i32 signext %a) { +; CHECK-LABEL: csrwr_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: csrwr $a0, 1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.loongarch.csrwr.w(i32 %a, i32 1) + ret i32 %0 +} + +;; Check that csrwr is emitted even if the return value of the intrinsic is not used. +define void @csrwr_w_noret(i32 signext %a) { +; CHECK-LABEL: csrwr_w_noret: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: csrwr $a0, 1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.loongarch.csrwr.w(i32 %a, i32 1) + ret void +} + +define i32 @csrxchg_w(i32 signext %a, i32 signext %b) { +; CHECK-LABEL: csrxchg_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: csrxchg $a0, $a1, 1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.loongarch.csrxchg.w(i32 %a, i32 %b, i32 1) + ret i32 %0 +} + +;; Check that csrxchg is emitted even if the return value of the intrinsic is not used. +define void @csrxchg_w_noret(i32 signext %a, i32 signext %b) { +; CHECK-LABEL: csrxchg_w_noret: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: csrxchg $a0, $a1, 1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.loongarch.csrxchg.w(i32 %a, i32 %b, i32 1) + ret void +} + +define i32 @iocsrrd_b(i32 %a) { +; CHECK-LABEL: iocsrrd_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: iocsrrd.b $a0, $a0 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.loongarch.iocsrrd.b(i32 %a) + ret i32 %0 +} + +define i32 @iocsrrd_h(i32 %a) { +; CHECK-LABEL: iocsrrd_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: iocsrrd.h $a0, $a0 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.loongarch.iocsrrd.h(i32 %a) + ret i32 %0 +} + +define i32 @iocsrrd_w(i32 %a) { +; CHECK-LABEL: iocsrrd_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: iocsrrd.w $a0, $a0 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.loongarch.iocsrrd.w(i32 %a) + ret i32 %0 +} + +define void @iocsrrd_b_noret(i32 %a) { +; CHECK-LABEL: iocsrrd_b_noret: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: iocsrrd.b $a0, $a0 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.loongarch.iocsrrd.b(i32 %a) + ret void +} + +define void @iocsrrd_h_noret(i32 %a) { +; CHECK-LABEL: iocsrrd_h_noret: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: iocsrrd.h $a0, $a0 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.loongarch.iocsrrd.h(i32 %a) + ret void +} + +define void @iocsrrd_w_noret(i32 %a) { +; CHECK-LABEL: iocsrrd_w_noret: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: iocsrrd.w $a0, $a0 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.loongarch.iocsrrd.w(i32 %a) + ret void +} + +define void @iocsrwr_b(i32 %a, i32 %b) { +; CHECK-LABEL: iocsrwr_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: iocsrwr.b $a0, $a1 +; CHECK-NEXT: ret +entry: + tail call void @llvm.loongarch.iocsrwr.b(i32 %a, i32 %b) + ret void +} + +define void @iocsrwr_h(i32 %a, i32 %b) { +; CHECK-LABEL: iocsrwr_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: iocsrwr.h $a0, $a1 +; CHECK-NEXT: ret +entry: + tail call void @llvm.loongarch.iocsrwr.h(i32 %a, i32 %b) + ret void +} + +define void @iocsrwr_w(i32 %a, i32 %b) { +; CHECK-LABEL: iocsrwr_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: iocsrwr.w $a0, $a1 +; CHECK-NEXT: ret +entry: + tail call void @llvm.loongarch.iocsrwr.w(i32 %a, i32 %b) + ret void +} + +define i32 @cpucfg(i32 %a) { +; CHECK-LABEL: cpucfg: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cpucfg $a0, $a0 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.loongarch.cpucfg(i32 %a) + ret i32 %0 +} + +define void @cpucfg_noret(i32 %a) { +; CHECK-LABEL: cpucfg_noret: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.loongarch.cpucfg(i32 %a) + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/add.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/add.ll index bfa1a59756b814b73fbb9c7b1c3a511281d7e665..d96adb250eb1d7b0f258fa4836ad5b899cbf88f3 100644 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/add.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/add.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefix=LA32 ; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64 @@ -7,12 +8,12 @@ define i1 @add_i1(i1 %x, i1 %y) { ; LA32-LABEL: add_i1: ; LA32: # %bb.0: ; LA32-NEXT: add.w $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: add_i1: ; LA64: # %bb.0: ; LA64-NEXT: add.d $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %add = add i1 %x, %y ret i1 %add } @@ -21,12 +22,12 @@ define i8 @add_i8(i8 %x, i8 %y) { ; LA32-LABEL: add_i8: ; LA32: # %bb.0: ; LA32-NEXT: add.w $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: add_i8: ; LA64: # %bb.0: ; LA64-NEXT: add.d $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %add = add i8 %x, %y ret i8 %add } @@ -35,12 +36,12 @@ define i16 @add_i16(i16 %x, i16 %y) { ; LA32-LABEL: add_i16: ; LA32: # %bb.0: ; LA32-NEXT: add.w $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: add_i16: ; LA64: # %bb.0: ; LA64-NEXT: add.d $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %add = add i16 %x, %y ret i16 %add } @@ -49,12 +50,12 @@ define i32 @add_i32(i32 %x, i32 %y) { ; LA32-LABEL: add_i32: ; LA32: # %bb.0: ; LA32-NEXT: add.w $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: add_i32: ; LA64: # %bb.0: ; LA64-NEXT: add.d $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %add = add i32 %x, %y ret i32 %add } @@ -65,12 +66,12 @@ define signext i32 @add_i32_sext(i32 %x, i32 %y) { ; LA32-LABEL: add_i32_sext: ; LA32: # %bb.0: ; LA32-NEXT: add.w $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: add_i32_sext: ; LA64: # %bb.0: ; LA64-NEXT: add.w $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %add = add i32 %x, %y ret i32 %add } @@ -83,12 +84,12 @@ define i64 @add_i64(i64 %x, i64 %y) { ; LA32-NEXT: sltu $a0, $a2, $a0 ; LA32-NEXT: add.w $a1, $a1, $a0 ; LA32-NEXT: move $a0, $a2 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: add_i64: ; LA64: # %bb.0: ; LA64-NEXT: add.d $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %add = add i64 %x, %y ret i64 %add } @@ -97,12 +98,12 @@ define i1 @add_i1_3(i1 %x) { ; LA32-LABEL: add_i1_3: ; LA32: # %bb.0: ; LA32-NEXT: addi.w $a0, $a0, 1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: add_i1_3: ; LA64: # %bb.0: ; LA64-NEXT: addi.d $a0, $a0, 1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %add = add i1 %x, 3 ret i1 %add } @@ -111,12 +112,12 @@ define i8 @add_i8_3(i8 %x) { ; LA32-LABEL: add_i8_3: ; LA32: # %bb.0: ; LA32-NEXT: addi.w $a0, $a0, 3 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: add_i8_3: ; LA64: # %bb.0: ; LA64-NEXT: addi.d $a0, $a0, 3 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %add = add i8 %x, 3 ret i8 %add } @@ -125,12 +126,12 @@ define i16 @add_i16_3(i16 %x) { ; LA32-LABEL: add_i16_3: ; LA32: # %bb.0: ; LA32-NEXT: addi.w $a0, $a0, 3 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: add_i16_3: ; LA64: # %bb.0: ; LA64-NEXT: addi.d $a0, $a0, 3 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %add = add i16 %x, 3 ret i16 %add } @@ -139,12 +140,12 @@ define i32 @add_i32_3(i32 %x) { ; LA32-LABEL: add_i32_3: ; LA32: # %bb.0: ; LA32-NEXT: addi.w $a0, $a0, 3 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: add_i32_3: ; LA64: # %bb.0: ; LA64-NEXT: addi.d $a0, $a0, 3 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %add = add i32 %x, 3 ret i32 %add } @@ -155,12 +156,12 @@ define signext i32 @add_i32_3_sext(i32 %x) { ; LA32-LABEL: add_i32_3_sext: ; LA32: # %bb.0: ; LA32-NEXT: addi.w $a0, $a0, 3 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: add_i32_3_sext: ; LA64: # %bb.0: ; LA64-NEXT: addi.w $a0, $a0, 3 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %add = add i32 %x, 3 ret i32 %add } @@ -172,12 +173,12 @@ define i64 @add_i64_3(i64 %x) { ; LA32-NEXT: sltu $a0, $a2, $a0 ; LA32-NEXT: add.w $a1, $a1, $a0 ; LA32-NEXT: move $a0, $a2 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: add_i64_3: ; LA64: # %bb.0: ; LA64-NEXT: addi.d $a0, $a0, 3 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %add = add i64 %x, 3 ret i64 %add } diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/and.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/and.ll index e5c9da58c7570d91540ef28ecf58880d53950bd0..47b990febe8ac2c702d4808ba8650229fc2ee711 100644 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/and.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/and.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefix=LA32 ; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64 @@ -7,12 +8,12 @@ define i1 @and_i1(i1 %a, i1 %b) { ; LA32-LABEL: and_i1: ; LA32: # %bb.0: # %entry ; LA32-NEXT: and $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: and_i1: ; LA64: # %bb.0: # %entry ; LA64-NEXT: and $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret entry: %r = and i1 %a, %b ret i1 %r @@ -22,12 +23,12 @@ define i8 @and_i8(i8 %a, i8 %b) { ; LA32-LABEL: and_i8: ; LA32: # %bb.0: # %entry ; LA32-NEXT: and $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: and_i8: ; LA64: # %bb.0: # %entry ; LA64-NEXT: and $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret entry: %r = and i8 %a, %b ret i8 %r @@ -37,12 +38,12 @@ define i16 @and_i16(i16 %a, i16 %b) { ; LA32-LABEL: and_i16: ; LA32: # %bb.0: # %entry ; LA32-NEXT: and $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: and_i16: ; LA64: # %bb.0: # %entry ; LA64-NEXT: and $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret entry: %r = and i16 %a, %b ret i16 %r @@ -52,12 +53,12 @@ define i32 @and_i32(i32 %a, i32 %b) { ; LA32-LABEL: and_i32: ; LA32: # %bb.0: # %entry ; LA32-NEXT: and $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: and_i32: ; LA64: # %bb.0: # %entry ; LA64-NEXT: and $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret entry: %r = and i32 %a, %b ret i32 %r @@ -68,12 +69,12 @@ define i64 @and_i64(i64 %a, i64 %b) { ; LA32: # %bb.0: # %entry ; LA32-NEXT: and $a0, $a0, $a2 ; LA32-NEXT: and $a1, $a1, $a3 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: and_i64: ; LA64: # %bb.0: # %entry ; LA64-NEXT: and $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret entry: %r = and i64 %a, %b ret i64 %r @@ -83,12 +84,12 @@ define i1 @and_i1_0(i1 %b) { ; LA32-LABEL: and_i1_0: ; LA32: # %bb.0: # %entry ; LA32-NEXT: move $a0, $zero -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: and_i1_0: ; LA64: # %bb.0: # %entry ; LA64-NEXT: move $a0, $zero -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret entry: %r = and i1 4, %b ret i1 %r @@ -97,11 +98,11 @@ entry: define i1 @and_i1_5(i1 %b) { ; LA32-LABEL: and_i1_5: ; LA32: # %bb.0: # %entry -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: and_i1_5: ; LA64: # %bb.0: # %entry -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret entry: %r = and i1 5, %b ret i1 %r @@ -111,12 +112,12 @@ define i8 @and_i8_5(i8 %b) { ; LA32-LABEL: and_i8_5: ; LA32: # %bb.0: # %entry ; LA32-NEXT: andi $a0, $a0, 5 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: and_i8_5: ; LA64: # %bb.0: # %entry ; LA64-NEXT: andi $a0, $a0, 5 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret entry: %r = and i8 5, %b ret i8 %r @@ -126,12 +127,12 @@ define i8 @and_i8_257(i8 %b) { ; LA32-LABEL: and_i8_257: ; LA32: # %bb.0: # %entry ; LA32-NEXT: andi $a0, $a0, 1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: and_i8_257: ; LA64: # %bb.0: # %entry ; LA64-NEXT: andi $a0, $a0, 1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret entry: %r = and i8 257, %b ret i8 %r @@ -141,12 +142,12 @@ define i16 @and_i16_5(i16 %b) { ; LA32-LABEL: and_i16_5: ; LA32: # %bb.0: # %entry ; LA32-NEXT: andi $a0, $a0, 5 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: and_i16_5: ; LA64: # %bb.0: # %entry ; LA64-NEXT: andi $a0, $a0, 5 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret entry: %r = and i16 5, %b ret i16 %r @@ -157,13 +158,13 @@ define i16 @and_i16_0x1000(i16 %b) { ; LA32: # %bb.0: # %entry ; LA32-NEXT: lu12i.w $a1, 1 ; LA32-NEXT: and $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: and_i16_0x1000: ; LA64: # %bb.0: # %entry ; LA64-NEXT: lu12i.w $a1, 1 ; LA64-NEXT: and $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret entry: %r = and i16 4096, %b ret i16 %r @@ -173,12 +174,12 @@ define i16 @and_i16_0x10001(i16 %b) { ; LA32-LABEL: and_i16_0x10001: ; LA32: # %bb.0: # %entry ; LA32-NEXT: andi $a0, $a0, 1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: and_i16_0x10001: ; LA64: # %bb.0: # %entry ; LA64-NEXT: andi $a0, $a0, 1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret entry: %r = and i16 65537, %b ret i16 %r @@ -188,12 +189,12 @@ define i32 @and_i32_5(i32 %b) { ; LA32-LABEL: and_i32_5: ; LA32: # %bb.0: # %entry ; LA32-NEXT: andi $a0, $a0, 5 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: and_i32_5: ; LA64: # %bb.0: # %entry ; LA64-NEXT: andi $a0, $a0, 5 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret entry: %r = and i32 5, %b ret i32 %r @@ -204,13 +205,13 @@ define i32 @and_i32_0x1000(i32 %b) { ; LA32: # %bb.0: # %entry ; LA32-NEXT: lu12i.w $a1, 1 ; LA32-NEXT: and $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: and_i32_0x1000: ; LA64: # %bb.0: # %entry ; LA64-NEXT: lu12i.w $a1, 1 ; LA64-NEXT: and $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret entry: %r = and i32 4096, %b ret i32 %r @@ -220,12 +221,12 @@ define i32 @and_i32_0x100000001(i32 %b) { ; LA32-LABEL: and_i32_0x100000001: ; LA32: # %bb.0: # %entry ; LA32-NEXT: andi $a0, $a0, 1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: and_i32_0x100000001: ; LA64: # %bb.0: # %entry ; LA64-NEXT: andi $a0, $a0, 1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret entry: %r = and i32 4294967297, %b ret i32 %r @@ -236,12 +237,12 @@ define i64 @and_i64_5(i64 %b) { ; LA32: # %bb.0: # %entry ; LA32-NEXT: andi $a0, $a0, 5 ; LA32-NEXT: move $a1, $zero -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: and_i64_5: ; LA64: # %bb.0: # %entry ; LA64-NEXT: andi $a0, $a0, 5 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret entry: %r = and i64 5, %b ret i64 %r @@ -253,13 +254,13 @@ define i64 @and_i64_0x1000(i64 %b) { ; LA32-NEXT: lu12i.w $a1, 1 ; LA32-NEXT: and $a0, $a0, $a1 ; LA32-NEXT: move $a1, $zero -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: and_i64_0x1000: ; LA64: # %bb.0: # %entry ; LA64-NEXT: lu12i.w $a1, 1 ; LA64-NEXT: and $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret entry: %r = and i64 4096, %b ret i64 %r diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/ashr.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/ashr.ll index 1b7e8085185a0bd03fcdcdaf81d7aba358905491..0d8e7127d0df8b28e0fe0ce0bc7e620b241bb95b 100644 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/ashr.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/ashr.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefix=LA32 ; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64 @@ -6,11 +7,11 @@ define i1 @ashr_i1(i1 %x, i1 %y) { ; LA32-LABEL: ashr_i1: ; LA32: # %bb.0: -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: ashr_i1: ; LA64: # %bb.0: -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %ashr = ashr i1 %x, %y ret i1 %ashr } @@ -20,13 +21,13 @@ define i8 @ashr_i8(i8 %x, i8 %y) { ; LA32: # %bb.0: ; LA32-NEXT: ext.w.b $a0, $a0 ; LA32-NEXT: sra.w $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: ashr_i8: ; LA64: # %bb.0: ; LA64-NEXT: ext.w.b $a0, $a0 ; LA64-NEXT: sra.d $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %ashr = ashr i8 %x, %y ret i8 %ashr } @@ -36,13 +37,13 @@ define i16 @ashr_i16(i16 %x, i16 %y) { ; LA32: # %bb.0: ; LA32-NEXT: ext.w.h $a0, $a0 ; LA32-NEXT: sra.w $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: ashr_i16: ; LA64: # %bb.0: ; LA64-NEXT: ext.w.h $a0, $a0 ; LA64-NEXT: sra.d $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %ashr = ashr i16 %x, %y ret i16 %ashr } @@ -51,12 +52,12 @@ define i32 @ashr_i32(i32 %x, i32 %y) { ; LA32-LABEL: ashr_i32: ; LA32: # %bb.0: ; LA32-NEXT: sra.w $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: ashr_i32: ; LA64: # %bb.0: ; LA64-NEXT: sra.w $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %ashr = ashr i32 %x, %y ret i32 %ashr } @@ -81,12 +82,12 @@ define i64 @ashr_i64(i64 %x, i64 %y) { ; LA32-NEXT: masknez $a1, $a1, $a5 ; LA32-NEXT: or $a0, $a0, $a1 ; LA32-NEXT: move $a1, $a3 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: ashr_i64: ; LA64: # %bb.0: ; LA64-NEXT: sra.d $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %ashr = ashr i64 %x, %y ret i64 %ashr } @@ -94,11 +95,11 @@ define i64 @ashr_i64(i64 %x, i64 %y) { define i1 @ashr_i1_3(i1 %x) { ; LA32-LABEL: ashr_i1_3: ; LA32: # %bb.0: -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: ashr_i1_3: ; LA64: # %bb.0: -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %ashr = ashr i1 %x, 3 ret i1 %ashr } @@ -108,13 +109,13 @@ define i8 @ashr_i8_3(i8 %x) { ; LA32: # %bb.0: ; LA32-NEXT: ext.w.b $a0, $a0 ; LA32-NEXT: srai.w $a0, $a0, 3 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: ashr_i8_3: ; LA64: # %bb.0: ; LA64-NEXT: ext.w.b $a0, $a0 ; LA64-NEXT: srai.d $a0, $a0, 3 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %ashr = ashr i8 %x, 3 ret i8 %ashr } @@ -124,13 +125,13 @@ define i16 @ashr_i16_3(i16 %x) { ; LA32: # %bb.0: ; LA32-NEXT: ext.w.h $a0, $a0 ; LA32-NEXT: srai.w $a0, $a0, 3 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: ashr_i16_3: ; LA64: # %bb.0: ; LA64-NEXT: ext.w.h $a0, $a0 ; LA64-NEXT: srai.d $a0, $a0, 3 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %ashr = ashr i16 %x, 3 ret i16 %ashr } @@ -139,13 +140,13 @@ define i32 @ashr_i32_3(i32 %x) { ; LA32-LABEL: ashr_i32_3: ; LA32: # %bb.0: ; LA32-NEXT: srai.w $a0, $a0, 3 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: ashr_i32_3: ; LA64: # %bb.0: ; LA64-NEXT: addi.w $a0, $a0, 0 ; LA64-NEXT: srai.d $a0, $a0, 3 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %ashr = ashr i32 %x, 3 ret i32 %ashr } @@ -157,12 +158,12 @@ define i64 @ashr_i64_3(i64 %x) { ; LA32-NEXT: slli.w $a2, $a1, 29 ; LA32-NEXT: or $a0, $a0, $a2 ; LA32-NEXT: srai.w $a1, $a1, 3 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: ashr_i64_3: ; LA64: # %bb.0: ; LA64-NEXT: srai.d $a0, $a0, 3 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %ashr = ashr i64 %x, 3 ret i64 %ashr } diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll new file mode 100644 index 0000000000000000000000000000000000000000..b8c0cb257122a792c43705478a095ec7c406b653 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll @@ -0,0 +1,356 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64 + +define void @cmpxchg_i8_acquire_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind { +; LA64-LABEL: cmpxchg_i8_acquire_acquire: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a3, $zero, -4 +; LA64-NEXT: and $a3, $a0, $a3 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: andi $a1, $a1, 255 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: andi $a2, $a2, 255 +; LA64-NEXT: sll.w $a2, $a2, $a0 +; LA64-NEXT: ori $a4, $zero, 255 +; LA64-NEXT: sll.w $a0, $a4, $a0 +; LA64-NEXT: addi.w $a0, $a0, 0 +; LA64-NEXT: addi.w $a2, $a2, 0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a4, $a3, 0 +; LA64-NEXT: and $a5, $a4, $a0 +; LA64-NEXT: bne $a5, $a1, .LBB0_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB0_1 Depth=1 +; LA64-NEXT: dbar 0 +; LA64-NEXT: andn $a5, $a4, $a0 +; LA64-NEXT: or $a5, $a5, $a2 +; LA64-NEXT: sc.w $a5, $a3, 0 +; LA64-NEXT: beqz $a5, .LBB0_1 +; LA64-NEXT: b .LBB0_4 +; LA64-NEXT: .LBB0_3: +; LA64-NEXT: dbar 1792 +; LA64-NEXT: .LBB0_4: +; LA64-NEXT: ret + %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val acquire acquire + ret void +} + +define void @cmpxchg_i16_acquire_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind { +; LA64-LABEL: cmpxchg_i16_acquire_acquire: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a3, $zero, -4 +; LA64-NEXT: and $a3, $a0, $a3 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: bstrpick.d $a2, $a2, 15, 0 +; LA64-NEXT: sll.w $a2, $a2, $a0 +; LA64-NEXT: lu12i.w $a4, 15 +; LA64-NEXT: ori $a4, $a4, 4095 +; LA64-NEXT: sll.w $a0, $a4, $a0 +; LA64-NEXT: addi.w $a0, $a0, 0 +; LA64-NEXT: addi.w $a2, $a2, 0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a4, $a3, 0 +; LA64-NEXT: and $a5, $a4, $a0 +; LA64-NEXT: bne $a5, $a1, .LBB1_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB1_1 Depth=1 +; LA64-NEXT: dbar 0 +; LA64-NEXT: andn $a5, $a4, $a0 +; LA64-NEXT: or $a5, $a5, $a2 +; LA64-NEXT: sc.w $a5, $a3, 0 +; LA64-NEXT: beqz $a5, .LBB1_1 +; LA64-NEXT: b .LBB1_4 +; LA64-NEXT: .LBB1_3: +; LA64-NEXT: dbar 1792 +; LA64-NEXT: .LBB1_4: +; LA64-NEXT: ret + %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val acquire acquire + ret void +} + +define void @cmpxchg_i32_acquire_acquire(ptr %ptr, i32 %cmp, i32 %val) nounwind { +; LA64-LABEL: cmpxchg_i32_acquire_acquire: +; LA64: # %bb.0: +; LA64-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a3, $a0, 0 +; LA64-NEXT: bne $a3, $a1, .LBB2_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB2_1 Depth=1 +; LA64-NEXT: dbar 0 +; LA64-NEXT: move $a4, $a2 +; LA64-NEXT: sc.w $a4, $a0, 0 +; LA64-NEXT: beqz $a4, .LBB2_1 +; LA64-NEXT: b .LBB2_4 +; LA64-NEXT: .LBB2_3: +; LA64-NEXT: dbar 1792 +; LA64-NEXT: .LBB2_4: +; LA64-NEXT: ret + %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val acquire acquire + ret void +} + +define void @cmpxchg_i64_acquire_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind { +; LA64-LABEL: cmpxchg_i64_acquire_acquire: +; LA64: # %bb.0: +; LA64-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.d $a3, $a0, 0 +; LA64-NEXT: bne $a3, $a1, .LBB3_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB3_1 Depth=1 +; LA64-NEXT: dbar 0 +; LA64-NEXT: move $a4, $a2 +; LA64-NEXT: sc.d $a4, $a0, 0 +; LA64-NEXT: beqz $a4, .LBB3_1 +; LA64-NEXT: b .LBB3_4 +; LA64-NEXT: .LBB3_3: +; LA64-NEXT: dbar 1792 +; LA64-NEXT: .LBB3_4: +; LA64-NEXT: ret + %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val acquire acquire + ret void +} + +define i8 @cmpxchg_i8_acquire_acquire_reti8(ptr %ptr, i8 %cmp, i8 %val) nounwind { +; LA64-LABEL: cmpxchg_i8_acquire_acquire_reti8: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a3, $zero, -4 +; LA64-NEXT: and $a3, $a0, $a3 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: ori $a4, $zero, 255 +; LA64-NEXT: sll.w $a4, $a4, $a0 +; LA64-NEXT: addi.w $a4, $a4, 0 +; LA64-NEXT: andi $a2, $a2, 255 +; LA64-NEXT: sll.w $a2, $a2, $a0 +; LA64-NEXT: addi.w $a2, $a2, 0 +; LA64-NEXT: andi $a1, $a1, 255 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a5, $a3, 0 +; LA64-NEXT: and $a6, $a5, $a4 +; LA64-NEXT: bne $a6, $a1, .LBB4_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB4_1 Depth=1 +; LA64-NEXT: dbar 0 +; LA64-NEXT: andn $a6, $a5, $a4 +; LA64-NEXT: or $a6, $a6, $a2 +; LA64-NEXT: sc.w $a6, $a3, 0 +; LA64-NEXT: beqz $a6, .LBB4_1 +; LA64-NEXT: b .LBB4_4 +; LA64-NEXT: .LBB4_3: +; LA64-NEXT: dbar 1792 +; LA64-NEXT: .LBB4_4: +; LA64-NEXT: srl.w $a0, $a5, $a0 +; LA64-NEXT: ret + %tmp = cmpxchg ptr %ptr, i8 %cmp, i8 %val acquire acquire + %res = extractvalue { i8, i1 } %tmp, 0 + ret i8 %res +} + +define i16 @cmpxchg_i16_acquire_acquire_reti16(ptr %ptr, i16 %cmp, i16 %val) nounwind { +; LA64-LABEL: cmpxchg_i16_acquire_acquire_reti16: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a3, $zero, -4 +; LA64-NEXT: and $a3, $a0, $a3 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: lu12i.w $a4, 15 +; LA64-NEXT: ori $a4, $a4, 4095 +; LA64-NEXT: sll.w $a4, $a4, $a0 +; LA64-NEXT: addi.w $a4, $a4, 0 +; LA64-NEXT: bstrpick.d $a2, $a2, 15, 0 +; LA64-NEXT: sll.w $a2, $a2, $a0 +; LA64-NEXT: addi.w $a2, $a2, 0 +; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a5, $a3, 0 +; LA64-NEXT: and $a6, $a5, $a4 +; LA64-NEXT: bne $a6, $a1, .LBB5_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB5_1 Depth=1 +; LA64-NEXT: dbar 0 +; LA64-NEXT: andn $a6, $a5, $a4 +; LA64-NEXT: or $a6, $a6, $a2 +; LA64-NEXT: sc.w $a6, $a3, 0 +; LA64-NEXT: beqz $a6, .LBB5_1 +; LA64-NEXT: b .LBB5_4 +; LA64-NEXT: .LBB5_3: +; LA64-NEXT: dbar 1792 +; LA64-NEXT: .LBB5_4: +; LA64-NEXT: srl.w $a0, $a5, $a0 +; LA64-NEXT: ret + %tmp = cmpxchg ptr %ptr, i16 %cmp, i16 %val acquire acquire + %res = extractvalue { i16, i1 } %tmp, 0 + ret i16 %res +} + +define i32 @cmpxchg_i32_acquire_acquire_reti32(ptr %ptr, i32 %cmp, i32 %val) nounwind { +; LA64-LABEL: cmpxchg_i32_acquire_acquire_reti32: +; LA64: # %bb.0: +; LA64-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a3, $a0, 0 +; LA64-NEXT: bne $a3, $a1, .LBB6_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB6_1 Depth=1 +; LA64-NEXT: dbar 0 +; LA64-NEXT: move $a4, $a2 +; LA64-NEXT: sc.w $a4, $a0, 0 +; LA64-NEXT: beqz $a4, .LBB6_1 +; LA64-NEXT: b .LBB6_4 +; LA64-NEXT: .LBB6_3: +; LA64-NEXT: dbar 1792 +; LA64-NEXT: .LBB6_4: +; LA64-NEXT: move $a0, $a3 +; LA64-NEXT: ret + %tmp = cmpxchg ptr %ptr, i32 %cmp, i32 %val acquire acquire + %res = extractvalue { i32, i1 } %tmp, 0 + ret i32 %res +} + +define i64 @cmpxchg_i64_acquire_acquire_reti64(ptr %ptr, i64 %cmp, i64 %val) nounwind { +; LA64-LABEL: cmpxchg_i64_acquire_acquire_reti64: +; LA64: # %bb.0: +; LA64-NEXT: .LBB7_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.d $a3, $a0, 0 +; LA64-NEXT: bne $a3, $a1, .LBB7_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB7_1 Depth=1 +; LA64-NEXT: dbar 0 +; LA64-NEXT: move $a4, $a2 +; LA64-NEXT: sc.d $a4, $a0, 0 +; LA64-NEXT: beqz $a4, .LBB7_1 +; LA64-NEXT: b .LBB7_4 +; LA64-NEXT: .LBB7_3: +; LA64-NEXT: dbar 1792 +; LA64-NEXT: .LBB7_4: +; LA64-NEXT: move $a0, $a3 +; LA64-NEXT: ret + %tmp = cmpxchg ptr %ptr, i64 %cmp, i64 %val acquire acquire + %res = extractvalue { i64, i1 } %tmp, 0 + ret i64 %res +} + +define i1 @cmpxchg_i8_acquire_acquire_reti1(ptr %ptr, i8 %cmp, i8 %val) nounwind { +; LA64-LABEL: cmpxchg_i8_acquire_acquire_reti1: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a3, $zero, -4 +; LA64-NEXT: and $a3, $a0, $a3 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: andi $a1, $a1, 255 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: ori $a4, $zero, 255 +; LA64-NEXT: sll.w $a4, $a4, $a0 +; LA64-NEXT: andi $a2, $a2, 255 +; LA64-NEXT: sll.w $a0, $a2, $a0 +; LA64-NEXT: addi.w $a0, $a0, 0 +; LA64-NEXT: addi.w $a2, $a4, 0 +; LA64-NEXT: addi.w $a5, $a1, 0 +; LA64-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a6, $a3, 0 +; LA64-NEXT: and $a7, $a6, $a2 +; LA64-NEXT: bne $a7, $a5, .LBB8_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB8_1 Depth=1 +; LA64-NEXT: dbar 0 +; LA64-NEXT: andn $a7, $a6, $a2 +; LA64-NEXT: or $a7, $a7, $a0 +; LA64-NEXT: sc.w $a7, $a3, 0 +; LA64-NEXT: beqz $a7, .LBB8_1 +; LA64-NEXT: b .LBB8_4 +; LA64-NEXT: .LBB8_3: +; LA64-NEXT: dbar 1792 +; LA64-NEXT: .LBB8_4: +; LA64-NEXT: and $a0, $a6, $a4 +; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0 +; LA64-NEXT: bstrpick.d $a1, $a1, 31, 0 +; LA64-NEXT: xor $a0, $a1, $a0 +; LA64-NEXT: sltui $a0, $a0, 1 +; LA64-NEXT: ret + %tmp = cmpxchg ptr %ptr, i8 %cmp, i8 %val acquire acquire + %res = extractvalue { i8, i1 } %tmp, 1 + ret i1 %res +} + +define i1 @cmpxchg_i16_acquire_acquire_reti1(ptr %ptr, i16 %cmp, i16 %val) nounwind { +; LA64-LABEL: cmpxchg_i16_acquire_acquire_reti1: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a3, $zero, -4 +; LA64-NEXT: and $a3, $a0, $a3 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: lu12i.w $a4, 15 +; LA64-NEXT: ori $a4, $a4, 4095 +; LA64-NEXT: sll.w $a4, $a4, $a0 +; LA64-NEXT: bstrpick.d $a2, $a2, 15, 0 +; LA64-NEXT: sll.w $a0, $a2, $a0 +; LA64-NEXT: addi.w $a0, $a0, 0 +; LA64-NEXT: addi.w $a2, $a4, 0 +; LA64-NEXT: addi.w $a5, $a1, 0 +; LA64-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a6, $a3, 0 +; LA64-NEXT: and $a7, $a6, $a2 +; LA64-NEXT: bne $a7, $a5, .LBB9_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB9_1 Depth=1 +; LA64-NEXT: dbar 0 +; LA64-NEXT: andn $a7, $a6, $a2 +; LA64-NEXT: or $a7, $a7, $a0 +; LA64-NEXT: sc.w $a7, $a3, 0 +; LA64-NEXT: beqz $a7, .LBB9_1 +; LA64-NEXT: b .LBB9_4 +; LA64-NEXT: .LBB9_3: +; LA64-NEXT: dbar 1792 +; LA64-NEXT: .LBB9_4: +; LA64-NEXT: and $a0, $a6, $a4 +; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0 +; LA64-NEXT: bstrpick.d $a1, $a1, 31, 0 +; LA64-NEXT: xor $a0, $a1, $a0 +; LA64-NEXT: sltui $a0, $a0, 1 +; LA64-NEXT: ret + %tmp = cmpxchg ptr %ptr, i16 %cmp, i16 %val acquire acquire + %res = extractvalue { i16, i1 } %tmp, 1 + ret i1 %res +} + +define i1 @cmpxchg_i32_acquire_acquire_reti1(ptr %ptr, i32 %cmp, i32 %val) nounwind { +; LA64-LABEL: cmpxchg_i32_acquire_acquire_reti1: +; LA64: # %bb.0: +; LA64-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a3, $a0, 0 +; LA64-NEXT: bne $a3, $a1, .LBB10_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB10_1 Depth=1 +; LA64-NEXT: dbar 0 +; LA64-NEXT: move $a4, $a2 +; LA64-NEXT: sc.w $a4, $a0, 0 +; LA64-NEXT: beqz $a4, .LBB10_1 +; LA64-NEXT: b .LBB10_4 +; LA64-NEXT: .LBB10_3: +; LA64-NEXT: dbar 1792 +; LA64-NEXT: .LBB10_4: +; LA64-NEXT: addi.w $a0, $a1, 0 +; LA64-NEXT: xor $a0, $a3, $a0 +; LA64-NEXT: sltui $a0, $a0, 1 +; LA64-NEXT: ret + %tmp = cmpxchg ptr %ptr, i32 %cmp, i32 %val acquire acquire + %res = extractvalue { i32, i1 } %tmp, 1 + ret i1 %res +} + +define i1 @cmpxchg_i64_acquire_acquire_reti1(ptr %ptr, i64 %cmp, i64 %val) nounwind { +; LA64-LABEL: cmpxchg_i64_acquire_acquire_reti1: +; LA64: # %bb.0: +; LA64-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.d $a3, $a0, 0 +; LA64-NEXT: bne $a3, $a1, .LBB11_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB11_1 Depth=1 +; LA64-NEXT: dbar 0 +; LA64-NEXT: move $a4, $a2 +; LA64-NEXT: sc.d $a4, $a0, 0 +; LA64-NEXT: beqz $a4, .LBB11_1 +; LA64-NEXT: b .LBB11_4 +; LA64-NEXT: .LBB11_3: +; LA64-NEXT: dbar 1792 +; LA64-NEXT: .LBB11_4: +; LA64-NEXT: xor $a0, $a3, $a1 +; LA64-NEXT: sltui $a0, $a0, 1 +; LA64-NEXT: ret + %tmp = cmpxchg ptr %ptr, i64 %cmp, i64 %val acquire acquire + %res = extractvalue { i64, i1 } %tmp, 1 + ret i1 %res +} diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll new file mode 100644 index 0000000000000000000000000000000000000000..3d7aa871b9c9ff25eec8b4fe2cfd0168d505fe25 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll @@ -0,0 +1,663 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+f,-d < %s | FileCheck %s --check-prefix=LA64F +; RUN: llc --mtriple=loongarch64 --mattr=+d < %s | FileCheck %s --check-prefix=LA64D + +;; Add more test cases after supporting different AtomicOrdering. + +define float @float_fadd_acquire(ptr %p) nounwind { +; LA64F-LABEL: float_fadd_acquire: +; LA64F: # %bb.0: +; LA64F-NEXT: fld.s $fa0, $a0, 0 +; LA64F-NEXT: addi.w $a1, $zero, 1 +; LA64F-NEXT: movgr2fr.w $fa1, $a1 +; LA64F-NEXT: .LBB0_1: # %atomicrmw.start +; LA64F-NEXT: # =>This Loop Header: Depth=1 +; LA64F-NEXT: # Child Loop BB0_3 Depth 2 +; LA64F-NEXT: ffint.s.w $fa2, $fa1 +; LA64F-NEXT: fadd.s $fa2, $fa0, $fa2 +; LA64F-NEXT: movfr2gr.s $a1, $fa2 +; LA64F-NEXT: movfr2gr.s $a2, $fa0 +; LA64F-NEXT: .LBB0_3: # %atomicrmw.start +; LA64F-NEXT: # Parent Loop BB0_1 Depth=1 +; LA64F-NEXT: # => This Inner Loop Header: Depth=2 +; LA64F-NEXT: ll.w $a3, $a0, 0 +; LA64F-NEXT: bne $a3, $a2, .LBB0_5 +; LA64F-NEXT: # %bb.4: # %atomicrmw.start +; LA64F-NEXT: # in Loop: Header=BB0_3 Depth=2 +; LA64F-NEXT: dbar 0 +; LA64F-NEXT: move $a4, $a1 +; LA64F-NEXT: sc.w $a4, $a0, 0 +; LA64F-NEXT: beqz $a4, .LBB0_3 +; LA64F-NEXT: b .LBB0_6 +; LA64F-NEXT: .LBB0_5: # %atomicrmw.start +; LA64F-NEXT: # in Loop: Header=BB0_1 Depth=1 +; LA64F-NEXT: dbar 1792 +; LA64F-NEXT: .LBB0_6: # %atomicrmw.start +; LA64F-NEXT: # in Loop: Header=BB0_1 Depth=1 +; LA64F-NEXT: movgr2fr.w $fa0, $a3 +; LA64F-NEXT: addi.w $a1, $a2, 0 +; LA64F-NEXT: bne $a3, $a1, .LBB0_1 +; LA64F-NEXT: # %bb.2: # %atomicrmw.end +; LA64F-NEXT: ret +; +; LA64D-LABEL: float_fadd_acquire: +; LA64D: # %bb.0: +; LA64D-NEXT: fld.s $fa0, $a0, 0 +; LA64D-NEXT: addi.w $a1, $zero, 1 +; LA64D-NEXT: movgr2fr.w $fa1, $a1 +; LA64D-NEXT: .LBB0_1: # %atomicrmw.start +; LA64D-NEXT: # =>This Loop Header: Depth=1 +; LA64D-NEXT: # Child Loop BB0_3 Depth 2 +; LA64D-NEXT: ffint.s.w $fa2, $fa1 +; LA64D-NEXT: fadd.s $fa2, $fa0, $fa2 +; LA64D-NEXT: movfr2gr.s $a1, $fa2 +; LA64D-NEXT: movfr2gr.s $a2, $fa0 +; LA64D-NEXT: .LBB0_3: # %atomicrmw.start +; LA64D-NEXT: # Parent Loop BB0_1 Depth=1 +; LA64D-NEXT: # => This Inner Loop Header: Depth=2 +; LA64D-NEXT: ll.w $a3, $a0, 0 +; LA64D-NEXT: bne $a3, $a2, .LBB0_5 +; LA64D-NEXT: # %bb.4: # %atomicrmw.start +; LA64D-NEXT: # in Loop: Header=BB0_3 Depth=2 +; LA64D-NEXT: dbar 0 +; LA64D-NEXT: move $a4, $a1 +; LA64D-NEXT: sc.w $a4, $a0, 0 +; LA64D-NEXT: beqz $a4, .LBB0_3 +; LA64D-NEXT: b .LBB0_6 +; LA64D-NEXT: .LBB0_5: # %atomicrmw.start +; LA64D-NEXT: # in Loop: Header=BB0_1 Depth=1 +; LA64D-NEXT: dbar 1792 +; LA64D-NEXT: .LBB0_6: # %atomicrmw.start +; LA64D-NEXT: # in Loop: Header=BB0_1 Depth=1 +; LA64D-NEXT: movgr2fr.w $fa0, $a3 +; LA64D-NEXT: addi.w $a1, $a2, 0 +; LA64D-NEXT: bne $a3, $a1, .LBB0_1 +; LA64D-NEXT: # %bb.2: # %atomicrmw.end +; LA64D-NEXT: ret + %v = atomicrmw fadd ptr %p, float 1.0 acquire, align 4 + ret float %v +} + +define float @float_fsub_acquire(ptr %p) nounwind { +; LA64F-LABEL: float_fsub_acquire: +; LA64F: # %bb.0: +; LA64F-NEXT: fld.s $fa0, $a0, 0 +; LA64F-NEXT: pcalau12i $a1, %pc_hi20(.LCPI1_0) +; LA64F-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI1_0) +; LA64F-NEXT: fld.s $fa1, $a1, 0 +; LA64F-NEXT: .LBB1_1: # %atomicrmw.start +; LA64F-NEXT: # =>This Loop Header: Depth=1 +; LA64F-NEXT: # Child Loop BB1_3 Depth 2 +; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 +; LA64F-NEXT: movfr2gr.s $a1, $fa2 +; LA64F-NEXT: movfr2gr.s $a2, $fa0 +; LA64F-NEXT: .LBB1_3: # %atomicrmw.start +; LA64F-NEXT: # Parent Loop BB1_1 Depth=1 +; LA64F-NEXT: # => This Inner Loop Header: Depth=2 +; LA64F-NEXT: ll.w $a3, $a0, 0 +; LA64F-NEXT: bne $a3, $a2, .LBB1_5 +; LA64F-NEXT: # %bb.4: # %atomicrmw.start +; LA64F-NEXT: # in Loop: Header=BB1_3 Depth=2 +; LA64F-NEXT: dbar 0 +; LA64F-NEXT: move $a4, $a1 +; LA64F-NEXT: sc.w $a4, $a0, 0 +; LA64F-NEXT: beqz $a4, .LBB1_3 +; LA64F-NEXT: b .LBB1_6 +; LA64F-NEXT: .LBB1_5: # %atomicrmw.start +; LA64F-NEXT: # in Loop: Header=BB1_1 Depth=1 +; LA64F-NEXT: dbar 1792 +; LA64F-NEXT: .LBB1_6: # %atomicrmw.start +; LA64F-NEXT: # in Loop: Header=BB1_1 Depth=1 +; LA64F-NEXT: movgr2fr.w $fa0, $a3 +; LA64F-NEXT: addi.w $a1, $a2, 0 +; LA64F-NEXT: bne $a3, $a1, .LBB1_1 +; LA64F-NEXT: # %bb.2: # %atomicrmw.end +; LA64F-NEXT: ret +; +; LA64D-LABEL: float_fsub_acquire: +; LA64D: # %bb.0: +; LA64D-NEXT: fld.s $fa0, $a0, 0 +; LA64D-NEXT: pcalau12i $a1, %pc_hi20(.LCPI1_0) +; LA64D-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI1_0) +; LA64D-NEXT: fld.s $fa1, $a1, 0 +; LA64D-NEXT: .LBB1_1: # %atomicrmw.start +; LA64D-NEXT: # =>This Loop Header: Depth=1 +; LA64D-NEXT: # Child Loop BB1_3 Depth 2 +; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 +; LA64D-NEXT: movfr2gr.s $a1, $fa2 +; LA64D-NEXT: movfr2gr.s $a2, $fa0 +; LA64D-NEXT: .LBB1_3: # %atomicrmw.start +; LA64D-NEXT: # Parent Loop BB1_1 Depth=1 +; LA64D-NEXT: # => This Inner Loop Header: Depth=2 +; LA64D-NEXT: ll.w $a3, $a0, 0 +; LA64D-NEXT: bne $a3, $a2, .LBB1_5 +; LA64D-NEXT: # %bb.4: # %atomicrmw.start +; LA64D-NEXT: # in Loop: Header=BB1_3 Depth=2 +; LA64D-NEXT: dbar 0 +; LA64D-NEXT: move $a4, $a1 +; LA64D-NEXT: sc.w $a4, $a0, 0 +; LA64D-NEXT: beqz $a4, .LBB1_3 +; LA64D-NEXT: b .LBB1_6 +; LA64D-NEXT: .LBB1_5: # %atomicrmw.start +; LA64D-NEXT: # in Loop: Header=BB1_1 Depth=1 +; LA64D-NEXT: dbar 1792 +; LA64D-NEXT: .LBB1_6: # %atomicrmw.start +; LA64D-NEXT: # in Loop: Header=BB1_1 Depth=1 +; LA64D-NEXT: movgr2fr.w $fa0, $a3 +; LA64D-NEXT: addi.w $a1, $a2, 0 +; LA64D-NEXT: bne $a3, $a1, .LBB1_1 +; LA64D-NEXT: # %bb.2: # %atomicrmw.end +; LA64D-NEXT: ret + %v = atomicrmw fsub ptr %p, float 1.0 acquire, align 4 + ret float %v +} + +define float @float_fmin_acquire(ptr %p) nounwind { +; LA64F-LABEL: float_fmin_acquire: +; LA64F: # %bb.0: +; LA64F-NEXT: fld.s $fa0, $a0, 0 +; LA64F-NEXT: addi.w $a1, $zero, 1 +; LA64F-NEXT: movgr2fr.w $fa1, $a1 +; LA64F-NEXT: .LBB2_1: # %atomicrmw.start +; LA64F-NEXT: # =>This Loop Header: Depth=1 +; LA64F-NEXT: # Child Loop BB2_3 Depth 2 +; LA64F-NEXT: ffint.s.w $fa2, $fa1 +; LA64F-NEXT: fmax.s $fa3, $fa0, $fa0 +; LA64F-NEXT: fmin.s $fa2, $fa3, $fa2 +; LA64F-NEXT: movfr2gr.s $a1, $fa2 +; LA64F-NEXT: movfr2gr.s $a2, $fa0 +; LA64F-NEXT: .LBB2_3: # %atomicrmw.start +; LA64F-NEXT: # Parent Loop BB2_1 Depth=1 +; LA64F-NEXT: # => This Inner Loop Header: Depth=2 +; LA64F-NEXT: ll.w $a3, $a0, 0 +; LA64F-NEXT: bne $a3, $a2, .LBB2_5 +; LA64F-NEXT: # %bb.4: # %atomicrmw.start +; LA64F-NEXT: # in Loop: Header=BB2_3 Depth=2 +; LA64F-NEXT: dbar 0 +; LA64F-NEXT: move $a4, $a1 +; LA64F-NEXT: sc.w $a4, $a0, 0 +; LA64F-NEXT: beqz $a4, .LBB2_3 +; LA64F-NEXT: b .LBB2_6 +; LA64F-NEXT: .LBB2_5: # %atomicrmw.start +; LA64F-NEXT: # in Loop: Header=BB2_1 Depth=1 +; LA64F-NEXT: dbar 1792 +; LA64F-NEXT: .LBB2_6: # %atomicrmw.start +; LA64F-NEXT: # in Loop: Header=BB2_1 Depth=1 +; LA64F-NEXT: movgr2fr.w $fa0, $a3 +; LA64F-NEXT: addi.w $a1, $a2, 0 +; LA64F-NEXT: bne $a3, $a1, .LBB2_1 +; LA64F-NEXT: # %bb.2: # %atomicrmw.end +; LA64F-NEXT: ret +; +; LA64D-LABEL: float_fmin_acquire: +; LA64D: # %bb.0: +; LA64D-NEXT: fld.s $fa0, $a0, 0 +; LA64D-NEXT: addi.w $a1, $zero, 1 +; LA64D-NEXT: movgr2fr.w $fa1, $a1 +; LA64D-NEXT: .LBB2_1: # %atomicrmw.start +; LA64D-NEXT: # =>This Loop Header: Depth=1 +; LA64D-NEXT: # Child Loop BB2_3 Depth 2 +; LA64D-NEXT: ffint.s.w $fa2, $fa1 +; LA64D-NEXT: fmax.s $fa3, $fa0, $fa0 +; LA64D-NEXT: fmin.s $fa2, $fa3, $fa2 +; LA64D-NEXT: movfr2gr.s $a1, $fa2 +; LA64D-NEXT: movfr2gr.s $a2, $fa0 +; LA64D-NEXT: .LBB2_3: # %atomicrmw.start +; LA64D-NEXT: # Parent Loop BB2_1 Depth=1 +; LA64D-NEXT: # => This Inner Loop Header: Depth=2 +; LA64D-NEXT: ll.w $a3, $a0, 0 +; LA64D-NEXT: bne $a3, $a2, .LBB2_5 +; LA64D-NEXT: # %bb.4: # %atomicrmw.start +; LA64D-NEXT: # in Loop: Header=BB2_3 Depth=2 +; LA64D-NEXT: dbar 0 +; LA64D-NEXT: move $a4, $a1 +; LA64D-NEXT: sc.w $a4, $a0, 0 +; LA64D-NEXT: beqz $a4, .LBB2_3 +; LA64D-NEXT: b .LBB2_6 +; LA64D-NEXT: .LBB2_5: # %atomicrmw.start +; LA64D-NEXT: # in Loop: Header=BB2_1 Depth=1 +; LA64D-NEXT: dbar 1792 +; LA64D-NEXT: .LBB2_6: # %atomicrmw.start +; LA64D-NEXT: # in Loop: Header=BB2_1 Depth=1 +; LA64D-NEXT: movgr2fr.w $fa0, $a3 +; LA64D-NEXT: addi.w $a1, $a2, 0 +; LA64D-NEXT: bne $a3, $a1, .LBB2_1 +; LA64D-NEXT: # %bb.2: # %atomicrmw.end +; LA64D-NEXT: ret + %v = atomicrmw fmin ptr %p, float 1.0 acquire, align 4 + ret float %v +} + +define float @float_fmax_acquire(ptr %p) nounwind { +; LA64F-LABEL: float_fmax_acquire: +; LA64F: # %bb.0: +; LA64F-NEXT: fld.s $fa0, $a0, 0 +; LA64F-NEXT: addi.w $a1, $zero, 1 +; LA64F-NEXT: movgr2fr.w $fa1, $a1 +; LA64F-NEXT: .LBB3_1: # %atomicrmw.start +; LA64F-NEXT: # =>This Loop Header: Depth=1 +; LA64F-NEXT: # Child Loop BB3_3 Depth 2 +; LA64F-NEXT: ffint.s.w $fa2, $fa1 +; LA64F-NEXT: fmax.s $fa3, $fa0, $fa0 +; LA64F-NEXT: fmax.s $fa2, $fa3, $fa2 +; LA64F-NEXT: movfr2gr.s $a1, $fa2 +; LA64F-NEXT: movfr2gr.s $a2, $fa0 +; LA64F-NEXT: .LBB3_3: # %atomicrmw.start +; LA64F-NEXT: # Parent Loop BB3_1 Depth=1 +; LA64F-NEXT: # => This Inner Loop Header: Depth=2 +; LA64F-NEXT: ll.w $a3, $a0, 0 +; LA64F-NEXT: bne $a3, $a2, .LBB3_5 +; LA64F-NEXT: # %bb.4: # %atomicrmw.start +; LA64F-NEXT: # in Loop: Header=BB3_3 Depth=2 +; LA64F-NEXT: dbar 0 +; LA64F-NEXT: move $a4, $a1 +; LA64F-NEXT: sc.w $a4, $a0, 0 +; LA64F-NEXT: beqz $a4, .LBB3_3 +; LA64F-NEXT: b .LBB3_6 +; LA64F-NEXT: .LBB3_5: # %atomicrmw.start +; LA64F-NEXT: # in Loop: Header=BB3_1 Depth=1 +; LA64F-NEXT: dbar 1792 +; LA64F-NEXT: .LBB3_6: # %atomicrmw.start +; LA64F-NEXT: # in Loop: Header=BB3_1 Depth=1 +; LA64F-NEXT: movgr2fr.w $fa0, $a3 +; LA64F-NEXT: addi.w $a1, $a2, 0 +; LA64F-NEXT: bne $a3, $a1, .LBB3_1 +; LA64F-NEXT: # %bb.2: # %atomicrmw.end +; LA64F-NEXT: ret +; +; LA64D-LABEL: float_fmax_acquire: +; LA64D: # %bb.0: +; LA64D-NEXT: fld.s $fa0, $a0, 0 +; LA64D-NEXT: addi.w $a1, $zero, 1 +; LA64D-NEXT: movgr2fr.w $fa1, $a1 +; LA64D-NEXT: .LBB3_1: # %atomicrmw.start +; LA64D-NEXT: # =>This Loop Header: Depth=1 +; LA64D-NEXT: # Child Loop BB3_3 Depth 2 +; LA64D-NEXT: ffint.s.w $fa2, $fa1 +; LA64D-NEXT: fmax.s $fa3, $fa0, $fa0 +; LA64D-NEXT: fmax.s $fa2, $fa3, $fa2 +; LA64D-NEXT: movfr2gr.s $a1, $fa2 +; LA64D-NEXT: movfr2gr.s $a2, $fa0 +; LA64D-NEXT: .LBB3_3: # %atomicrmw.start +; LA64D-NEXT: # Parent Loop BB3_1 Depth=1 +; LA64D-NEXT: # => This Inner Loop Header: Depth=2 +; LA64D-NEXT: ll.w $a3, $a0, 0 +; LA64D-NEXT: bne $a3, $a2, .LBB3_5 +; LA64D-NEXT: # %bb.4: # %atomicrmw.start +; LA64D-NEXT: # in Loop: Header=BB3_3 Depth=2 +; LA64D-NEXT: dbar 0 +; LA64D-NEXT: move $a4, $a1 +; LA64D-NEXT: sc.w $a4, $a0, 0 +; LA64D-NEXT: beqz $a4, .LBB3_3 +; LA64D-NEXT: b .LBB3_6 +; LA64D-NEXT: .LBB3_5: # %atomicrmw.start +; LA64D-NEXT: # in Loop: Header=BB3_1 Depth=1 +; LA64D-NEXT: dbar 1792 +; LA64D-NEXT: .LBB3_6: # %atomicrmw.start +; LA64D-NEXT: # in Loop: Header=BB3_1 Depth=1 +; LA64D-NEXT: movgr2fr.w $fa0, $a3 +; LA64D-NEXT: addi.w $a1, $a2, 0 +; LA64D-NEXT: bne $a3, $a1, .LBB3_1 +; LA64D-NEXT: # %bb.2: # %atomicrmw.end +; LA64D-NEXT: ret + %v = atomicrmw fmax ptr %p, float 1.0 acquire, align 4 + ret float %v +} + +define double @double_fadd_acquire(ptr %p) nounwind { +; LA64F-LABEL: double_fadd_acquire: +; LA64F: # %bb.0: +; LA64F-NEXT: addi.d $sp, $sp, -64 +; LA64F-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; LA64F-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s0, $sp, 40 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s1, $sp, 32 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s2, $sp, 24 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s3, $sp, 16 # 8-byte Folded Spill +; LA64F-NEXT: move $fp, $a0 +; LA64F-NEXT: ld.d $a0, $a0, 0 +; LA64F-NEXT: ori $s0, $zero, 8 +; LA64F-NEXT: addi.d $s1, $sp, 8 +; LA64F-NEXT: addi.d $s2, $sp, 0 +; LA64F-NEXT: ori $s3, $zero, 2 +; LA64F-NEXT: .LBB4_1: # %atomicrmw.start +; LA64F-NEXT: # =>This Inner Loop Header: Depth=1 +; LA64F-NEXT: st.d $a0, $sp, 8 +; LA64F-NEXT: lu52i.d $a1, $zero, 1023 +; LA64F-NEXT: bl %plt(__adddf3) +; LA64F-NEXT: st.d $a0, $sp, 0 +; LA64F-NEXT: move $a0, $s0 +; LA64F-NEXT: move $a1, $fp +; LA64F-NEXT: move $a2, $s1 +; LA64F-NEXT: move $a3, $s2 +; LA64F-NEXT: move $a4, $s3 +; LA64F-NEXT: move $a5, $s3 +; LA64F-NEXT: bl %plt(__atomic_compare_exchange) +; LA64F-NEXT: move $a1, $a0 +; LA64F-NEXT: ld.d $a0, $sp, 8 +; LA64F-NEXT: beqz $a1, .LBB4_1 +; LA64F-NEXT: # %bb.2: # %atomicrmw.end +; LA64F-NEXT: ld.d $s3, $sp, 16 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s2, $sp, 24 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s1, $sp, 32 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s0, $sp, 40 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; LA64F-NEXT: addi.d $sp, $sp, 64 +; LA64F-NEXT: ret +; +; LA64D-LABEL: double_fadd_acquire: +; LA64D: # %bb.0: +; LA64D-NEXT: addi.d $sp, $sp, -80 +; LA64D-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill +; LA64D-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill +; LA64D-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill +; LA64D-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill +; LA64D-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill +; LA64D-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill +; LA64D-NEXT: fst.d $fs0, $sp, 24 # 8-byte Folded Spill +; LA64D-NEXT: move $fp, $a0 +; LA64D-NEXT: fld.d $fa0, $a0, 0 +; LA64D-NEXT: addi.d $a0, $zero, 1 +; LA64D-NEXT: movgr2fr.d $fs0, $a0 +; LA64D-NEXT: ori $s0, $zero, 8 +; LA64D-NEXT: addi.d $s1, $sp, 16 +; LA64D-NEXT: addi.d $s2, $sp, 8 +; LA64D-NEXT: ori $s3, $zero, 2 +; LA64D-NEXT: .LBB4_1: # %atomicrmw.start +; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 +; LA64D-NEXT: fst.d $fa0, $sp, 16 +; LA64D-NEXT: ffint.d.l $fa1, $fs0 +; LA64D-NEXT: fadd.d $fa0, $fa0, $fa1 +; LA64D-NEXT: fst.d $fa0, $sp, 8 +; LA64D-NEXT: move $a0, $s0 +; LA64D-NEXT: move $a1, $fp +; LA64D-NEXT: move $a2, $s1 +; LA64D-NEXT: move $a3, $s2 +; LA64D-NEXT: move $a4, $s3 +; LA64D-NEXT: move $a5, $s3 +; LA64D-NEXT: bl %plt(__atomic_compare_exchange) +; LA64D-NEXT: fld.d $fa0, $sp, 16 +; LA64D-NEXT: beqz $a0, .LBB4_1 +; LA64D-NEXT: # %bb.2: # %atomicrmw.end +; LA64D-NEXT: fld.d $fs0, $sp, 24 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload +; LA64D-NEXT: addi.d $sp, $sp, 80 +; LA64D-NEXT: ret + %v = atomicrmw fadd ptr %p, double 1.0 acquire, align 4 + ret double %v +} + +define double @double_fsub_acquire(ptr %p) nounwind { +; LA64F-LABEL: double_fsub_acquire: +; LA64F: # %bb.0: +; LA64F-NEXT: addi.d $sp, $sp, -64 +; LA64F-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; LA64F-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s0, $sp, 40 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s1, $sp, 32 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s2, $sp, 24 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s3, $sp, 16 # 8-byte Folded Spill +; LA64F-NEXT: move $fp, $a0 +; LA64F-NEXT: ld.d $a0, $a0, 0 +; LA64F-NEXT: ori $s0, $zero, 8 +; LA64F-NEXT: addi.d $s1, $sp, 8 +; LA64F-NEXT: addi.d $s2, $sp, 0 +; LA64F-NEXT: ori $s3, $zero, 2 +; LA64F-NEXT: .LBB5_1: # %atomicrmw.start +; LA64F-NEXT: # =>This Inner Loop Header: Depth=1 +; LA64F-NEXT: st.d $a0, $sp, 8 +; LA64F-NEXT: lu52i.d $a1, $zero, -1025 +; LA64F-NEXT: bl %plt(__adddf3) +; LA64F-NEXT: st.d $a0, $sp, 0 +; LA64F-NEXT: move $a0, $s0 +; LA64F-NEXT: move $a1, $fp +; LA64F-NEXT: move $a2, $s1 +; LA64F-NEXT: move $a3, $s2 +; LA64F-NEXT: move $a4, $s3 +; LA64F-NEXT: move $a5, $s3 +; LA64F-NEXT: bl %plt(__atomic_compare_exchange) +; LA64F-NEXT: move $a1, $a0 +; LA64F-NEXT: ld.d $a0, $sp, 8 +; LA64F-NEXT: beqz $a1, .LBB5_1 +; LA64F-NEXT: # %bb.2: # %atomicrmw.end +; LA64F-NEXT: ld.d $s3, $sp, 16 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s2, $sp, 24 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s1, $sp, 32 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s0, $sp, 40 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; LA64F-NEXT: addi.d $sp, $sp, 64 +; LA64F-NEXT: ret +; +; LA64D-LABEL: double_fsub_acquire: +; LA64D: # %bb.0: +; LA64D-NEXT: addi.d $sp, $sp, -80 +; LA64D-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill +; LA64D-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill +; LA64D-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill +; LA64D-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill +; LA64D-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill +; LA64D-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill +; LA64D-NEXT: fst.d $fs0, $sp, 24 # 8-byte Folded Spill +; LA64D-NEXT: move $fp, $a0 +; LA64D-NEXT: fld.d $fa0, $a0, 0 +; LA64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_0) +; LA64D-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI5_0) +; LA64D-NEXT: fld.d $fs0, $a0, 0 +; LA64D-NEXT: ori $s0, $zero, 8 +; LA64D-NEXT: addi.d $s1, $sp, 16 +; LA64D-NEXT: addi.d $s2, $sp, 8 +; LA64D-NEXT: ori $s3, $zero, 2 +; LA64D-NEXT: .LBB5_1: # %atomicrmw.start +; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 +; LA64D-NEXT: fst.d $fa0, $sp, 16 +; LA64D-NEXT: fadd.d $fa0, $fa0, $fs0 +; LA64D-NEXT: fst.d $fa0, $sp, 8 +; LA64D-NEXT: move $a0, $s0 +; LA64D-NEXT: move $a1, $fp +; LA64D-NEXT: move $a2, $s1 +; LA64D-NEXT: move $a3, $s2 +; LA64D-NEXT: move $a4, $s3 +; LA64D-NEXT: move $a5, $s3 +; LA64D-NEXT: bl %plt(__atomic_compare_exchange) +; LA64D-NEXT: fld.d $fa0, $sp, 16 +; LA64D-NEXT: beqz $a0, .LBB5_1 +; LA64D-NEXT: # %bb.2: # %atomicrmw.end +; LA64D-NEXT: fld.d $fs0, $sp, 24 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload +; LA64D-NEXT: addi.d $sp, $sp, 80 +; LA64D-NEXT: ret + %v = atomicrmw fsub ptr %p, double 1.0 acquire, align 4 + ret double %v +} + +define double @double_fmin_acquire(ptr %p) nounwind { +; LA64F-LABEL: double_fmin_acquire: +; LA64F: # %bb.0: +; LA64F-NEXT: addi.d $sp, $sp, -64 +; LA64F-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; LA64F-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s0, $sp, 40 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s1, $sp, 32 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s2, $sp, 24 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s3, $sp, 16 # 8-byte Folded Spill +; LA64F-NEXT: move $fp, $a0 +; LA64F-NEXT: ld.d $a0, $a0, 0 +; LA64F-NEXT: ori $s0, $zero, 8 +; LA64F-NEXT: addi.d $s1, $sp, 8 +; LA64F-NEXT: addi.d $s2, $sp, 0 +; LA64F-NEXT: ori $s3, $zero, 2 +; LA64F-NEXT: .LBB6_1: # %atomicrmw.start +; LA64F-NEXT: # =>This Inner Loop Header: Depth=1 +; LA64F-NEXT: st.d $a0, $sp, 8 +; LA64F-NEXT: lu52i.d $a1, $zero, 1023 +; LA64F-NEXT: bl %plt(fmin) +; LA64F-NEXT: st.d $a0, $sp, 0 +; LA64F-NEXT: move $a0, $s0 +; LA64F-NEXT: move $a1, $fp +; LA64F-NEXT: move $a2, $s1 +; LA64F-NEXT: move $a3, $s2 +; LA64F-NEXT: move $a4, $s3 +; LA64F-NEXT: move $a5, $s3 +; LA64F-NEXT: bl %plt(__atomic_compare_exchange) +; LA64F-NEXT: move $a1, $a0 +; LA64F-NEXT: ld.d $a0, $sp, 8 +; LA64F-NEXT: beqz $a1, .LBB6_1 +; LA64F-NEXT: # %bb.2: # %atomicrmw.end +; LA64F-NEXT: ld.d $s3, $sp, 16 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s2, $sp, 24 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s1, $sp, 32 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s0, $sp, 40 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; LA64F-NEXT: addi.d $sp, $sp, 64 +; LA64F-NEXT: ret +; +; LA64D-LABEL: double_fmin_acquire: +; LA64D: # %bb.0: +; LA64D-NEXT: addi.d $sp, $sp, -80 +; LA64D-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill +; LA64D-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill +; LA64D-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill +; LA64D-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill +; LA64D-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill +; LA64D-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill +; LA64D-NEXT: fst.d $fs0, $sp, 24 # 8-byte Folded Spill +; LA64D-NEXT: move $fp, $a0 +; LA64D-NEXT: fld.d $fa0, $a0, 0 +; LA64D-NEXT: addi.d $a0, $zero, 1 +; LA64D-NEXT: movgr2fr.d $fs0, $a0 +; LA64D-NEXT: ori $s0, $zero, 8 +; LA64D-NEXT: addi.d $s1, $sp, 16 +; LA64D-NEXT: addi.d $s2, $sp, 8 +; LA64D-NEXT: ori $s3, $zero, 2 +; LA64D-NEXT: .LBB6_1: # %atomicrmw.start +; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 +; LA64D-NEXT: fst.d $fa0, $sp, 16 +; LA64D-NEXT: fmax.d $fa0, $fa0, $fa0 +; LA64D-NEXT: ffint.d.l $fa1, $fs0 +; LA64D-NEXT: fmin.d $fa0, $fa0, $fa1 +; LA64D-NEXT: fst.d $fa0, $sp, 8 +; LA64D-NEXT: move $a0, $s0 +; LA64D-NEXT: move $a1, $fp +; LA64D-NEXT: move $a2, $s1 +; LA64D-NEXT: move $a3, $s2 +; LA64D-NEXT: move $a4, $s3 +; LA64D-NEXT: move $a5, $s3 +; LA64D-NEXT: bl %plt(__atomic_compare_exchange) +; LA64D-NEXT: fld.d $fa0, $sp, 16 +; LA64D-NEXT: beqz $a0, .LBB6_1 +; LA64D-NEXT: # %bb.2: # %atomicrmw.end +; LA64D-NEXT: fld.d $fs0, $sp, 24 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload +; LA64D-NEXT: addi.d $sp, $sp, 80 +; LA64D-NEXT: ret + %v = atomicrmw fmin ptr %p, double 1.0 acquire, align 4 + ret double %v +} + +define double @double_fmax_acquire(ptr %p) nounwind { +; LA64F-LABEL: double_fmax_acquire: +; LA64F: # %bb.0: +; LA64F-NEXT: addi.d $sp, $sp, -64 +; LA64F-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; LA64F-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s0, $sp, 40 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s1, $sp, 32 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s2, $sp, 24 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s3, $sp, 16 # 8-byte Folded Spill +; LA64F-NEXT: move $fp, $a0 +; LA64F-NEXT: ld.d $a0, $a0, 0 +; LA64F-NEXT: ori $s0, $zero, 8 +; LA64F-NEXT: addi.d $s1, $sp, 8 +; LA64F-NEXT: addi.d $s2, $sp, 0 +; LA64F-NEXT: ori $s3, $zero, 2 +; LA64F-NEXT: .LBB7_1: # %atomicrmw.start +; LA64F-NEXT: # =>This Inner Loop Header: Depth=1 +; LA64F-NEXT: st.d $a0, $sp, 8 +; LA64F-NEXT: lu52i.d $a1, $zero, 1023 +; LA64F-NEXT: bl %plt(fmax) +; LA64F-NEXT: st.d $a0, $sp, 0 +; LA64F-NEXT: move $a0, $s0 +; LA64F-NEXT: move $a1, $fp +; LA64F-NEXT: move $a2, $s1 +; LA64F-NEXT: move $a3, $s2 +; LA64F-NEXT: move $a4, $s3 +; LA64F-NEXT: move $a5, $s3 +; LA64F-NEXT: bl %plt(__atomic_compare_exchange) +; LA64F-NEXT: move $a1, $a0 +; LA64F-NEXT: ld.d $a0, $sp, 8 +; LA64F-NEXT: beqz $a1, .LBB7_1 +; LA64F-NEXT: # %bb.2: # %atomicrmw.end +; LA64F-NEXT: ld.d $s3, $sp, 16 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s2, $sp, 24 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s1, $sp, 32 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s0, $sp, 40 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; LA64F-NEXT: addi.d $sp, $sp, 64 +; LA64F-NEXT: ret +; +; LA64D-LABEL: double_fmax_acquire: +; LA64D: # %bb.0: +; LA64D-NEXT: addi.d $sp, $sp, -80 +; LA64D-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill +; LA64D-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill +; LA64D-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill +; LA64D-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill +; LA64D-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill +; LA64D-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill +; LA64D-NEXT: fst.d $fs0, $sp, 24 # 8-byte Folded Spill +; LA64D-NEXT: move $fp, $a0 +; LA64D-NEXT: fld.d $fa0, $a0, 0 +; LA64D-NEXT: addi.d $a0, $zero, 1 +; LA64D-NEXT: movgr2fr.d $fs0, $a0 +; LA64D-NEXT: ori $s0, $zero, 8 +; LA64D-NEXT: addi.d $s1, $sp, 16 +; LA64D-NEXT: addi.d $s2, $sp, 8 +; LA64D-NEXT: ori $s3, $zero, 2 +; LA64D-NEXT: .LBB7_1: # %atomicrmw.start +; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 +; LA64D-NEXT: fst.d $fa0, $sp, 16 +; LA64D-NEXT: fmax.d $fa0, $fa0, $fa0 +; LA64D-NEXT: ffint.d.l $fa1, $fs0 +; LA64D-NEXT: fmax.d $fa0, $fa0, $fa1 +; LA64D-NEXT: fst.d $fa0, $sp, 8 +; LA64D-NEXT: move $a0, $s0 +; LA64D-NEXT: move $a1, $fp +; LA64D-NEXT: move $a2, $s1 +; LA64D-NEXT: move $a3, $s2 +; LA64D-NEXT: move $a4, $s3 +; LA64D-NEXT: move $a5, $s3 +; LA64D-NEXT: bl %plt(__atomic_compare_exchange) +; LA64D-NEXT: fld.d $fa0, $sp, 16 +; LA64D-NEXT: beqz $a0, .LBB7_1 +; LA64D-NEXT: # %bb.2: # %atomicrmw.end +; LA64D-NEXT: fld.d $fs0, $sp, 24 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload +; LA64D-NEXT: addi.d $sp, $sp, 80 +; LA64D-NEXT: ret + %v = atomicrmw fmax ptr %p, double 1.0 acquire, align 4 + ret double %v +} diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll new file mode 100644 index 0000000000000000000000000000000000000000..cd4a9e7fa9c4fff87f3e171ac6bede25998a3100 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll @@ -0,0 +1,379 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --verify-machineinstrs < %s | \ +; RUN: FileCheck %s --check-prefix=LA64 + +;; TODO: Testing for LA32 architecture will be added later + +define i8 @atomicrmw_umax_i8_acquire(ptr %a, i8 %b) nounwind { +; LA64-LABEL: atomicrmw_umax_i8_acquire: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: ori $a3, $zero, 255 +; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: andi $a1, $a1, 255 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: dbar 0 +; LA64-NEXT: ll.w $a4, $a2, 0 +; LA64-NEXT: and $a6, $a4, $a3 +; LA64-NEXT: move $a5, $a4 +; LA64-NEXT: bgeu $a6, $a1, .LBB0_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB0_1 Depth=1 +; LA64-NEXT: xor $a5, $a4, $a1 +; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: .LBB0_3: # in Loop: Header=BB0_1 Depth=1 +; LA64-NEXT: sc.w $a5, $a2, 0 +; LA64-NEXT: beqz $a5, .LBB0_1 +; LA64-NEXT: # %bb.4: +; LA64-NEXT: dbar 1792 +; LA64-NEXT: # %bb.5: +; LA64-NEXT: srl.w $a0, $a4, $a0 +; LA64-NEXT: ret + %1 = atomicrmw umax ptr %a, i8 %b acquire + ret i8 %1 +} + +define i16 @atomicrmw_umax_i16_acquire(ptr %a, i16 %b) nounwind { +; LA64-LABEL: atomicrmw_umax_i16_acquire: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: lu12i.w $a3, 15 +; LA64-NEXT: ori $a3, $a3, 4095 +; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: dbar 0 +; LA64-NEXT: ll.w $a4, $a2, 0 +; LA64-NEXT: and $a6, $a4, $a3 +; LA64-NEXT: move $a5, $a4 +; LA64-NEXT: bgeu $a6, $a1, .LBB1_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB1_1 Depth=1 +; LA64-NEXT: xor $a5, $a4, $a1 +; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: .LBB1_3: # in Loop: Header=BB1_1 Depth=1 +; LA64-NEXT: sc.w $a5, $a2, 0 +; LA64-NEXT: beqz $a5, .LBB1_1 +; LA64-NEXT: # %bb.4: +; LA64-NEXT: dbar 1792 +; LA64-NEXT: # %bb.5: +; LA64-NEXT: srl.w $a0, $a4, $a0 +; LA64-NEXT: ret + %1 = atomicrmw umax ptr %a, i16 %b acquire + ret i16 %1 +} + +define i32 @atomicrmw_umax_i32_acquire(ptr %a, i32 %b) nounwind { +; LA64-LABEL: atomicrmw_umax_i32_acquire: +; LA64: # %bb.0: +; LA64-NEXT: ammax_db.wu $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw umax ptr %a, i32 %b acquire + ret i32 %1 +} + +define i64 @atomicrmw_umax_i64_acquire(ptr %a, i64 %b) nounwind { +; LA64-LABEL: atomicrmw_umax_i64_acquire: +; LA64: # %bb.0: +; LA64-NEXT: ammax_db.du $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw umax ptr %a, i64 %b acquire + ret i64 %1 +} + +define i8 @atomicrmw_umin_i8_acquire(ptr %a, i8 %b) nounwind { +; LA64-LABEL: atomicrmw_umin_i8_acquire: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: ori $a3, $zero, 255 +; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: andi $a1, $a1, 255 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: dbar 0 +; LA64-NEXT: ll.w $a4, $a2, 0 +; LA64-NEXT: and $a6, $a4, $a3 +; LA64-NEXT: move $a5, $a4 +; LA64-NEXT: bgeu $a1, $a6, .LBB4_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB4_1 Depth=1 +; LA64-NEXT: xor $a5, $a4, $a1 +; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: .LBB4_3: # in Loop: Header=BB4_1 Depth=1 +; LA64-NEXT: sc.w $a5, $a2, 0 +; LA64-NEXT: beqz $a5, .LBB4_1 +; LA64-NEXT: # %bb.4: +; LA64-NEXT: dbar 1792 +; LA64-NEXT: # %bb.5: +; LA64-NEXT: srl.w $a0, $a4, $a0 +; LA64-NEXT: ret + %1 = atomicrmw umin ptr %a, i8 %b acquire + ret i8 %1 +} + +define i16 @atomicrmw_umin_i16_acquire(ptr %a, i16 %b) nounwind { +; LA64-LABEL: atomicrmw_umin_i16_acquire: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: lu12i.w $a3, 15 +; LA64-NEXT: ori $a3, $a3, 4095 +; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: dbar 0 +; LA64-NEXT: ll.w $a4, $a2, 0 +; LA64-NEXT: and $a6, $a4, $a3 +; LA64-NEXT: move $a5, $a4 +; LA64-NEXT: bgeu $a1, $a6, .LBB5_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB5_1 Depth=1 +; LA64-NEXT: xor $a5, $a4, $a1 +; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: .LBB5_3: # in Loop: Header=BB5_1 Depth=1 +; LA64-NEXT: sc.w $a5, $a2, 0 +; LA64-NEXT: beqz $a5, .LBB5_1 +; LA64-NEXT: # %bb.4: +; LA64-NEXT: dbar 1792 +; LA64-NEXT: # %bb.5: +; LA64-NEXT: srl.w $a0, $a4, $a0 +; LA64-NEXT: ret + %1 = atomicrmw umin ptr %a, i16 %b acquire + ret i16 %1 +} + +define i32 @atomicrmw_umin_i32_acquire(ptr %a, i32 %b) nounwind { +; LA64-LABEL: atomicrmw_umin_i32_acquire: +; LA64: # %bb.0: +; LA64-NEXT: ammin_db.wu $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw umin ptr %a, i32 %b acquire + ret i32 %1 +} + +define i64 @atomicrmw_umin_i64_acquire(ptr %a, i64 %b) nounwind { +; LA64-LABEL: atomicrmw_umin_i64_acquire: +; LA64: # %bb.0: +; LA64-NEXT: ammin_db.du $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw umin ptr %a, i64 %b acquire + ret i64 %1 +} + +define i8 @atomicrmw_max_i8_acquire(ptr %a, i8 %b) nounwind { +; LA64-LABEL: atomicrmw_max_i8_acquire: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: ori $a3, $zero, 255 +; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: ext.w.b $a1, $a1 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: andi $a4, $a0, 24 +; LA64-NEXT: xori $a4, $a4, 56 +; LA64-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: dbar 0 +; LA64-NEXT: ll.w $a5, $a2, 0 +; LA64-NEXT: and $a7, $a5, $a3 +; LA64-NEXT: move $a6, $a5 +; LA64-NEXT: sll.w $a7, $a7, $a4 +; LA64-NEXT: sra.w $a7, $a7, $a4 +; LA64-NEXT: bge $a7, $a1, .LBB8_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB8_1 Depth=1 +; LA64-NEXT: xor $a6, $a5, $a1 +; LA64-NEXT: and $a6, $a6, $a3 +; LA64-NEXT: xor $a6, $a5, $a6 +; LA64-NEXT: .LBB8_3: # in Loop: Header=BB8_1 Depth=1 +; LA64-NEXT: sc.w $a6, $a2, 0 +; LA64-NEXT: beqz $a6, .LBB8_1 +; LA64-NEXT: # %bb.4: +; LA64-NEXT: dbar 1792 +; LA64-NEXT: # %bb.5: +; LA64-NEXT: srl.w $a0, $a5, $a0 +; LA64-NEXT: ret + %1 = atomicrmw max ptr %a, i8 %b acquire + ret i8 %1 +} + +define i16 @atomicrmw_max_i16_acquire(ptr %a, i16 %b) nounwind { +; LA64-LABEL: atomicrmw_max_i16_acquire: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: andi $a3, $a0, 24 +; LA64-NEXT: ori $a4, $zero, 48 +; LA64-NEXT: sub.d $a3, $a4, $a3 +; LA64-NEXT: lu12i.w $a4, 15 +; LA64-NEXT: ori $a4, $a4, 4095 +; LA64-NEXT: sll.w $a4, $a4, $a0 +; LA64-NEXT: addi.w $a4, $a4, 0 +; LA64-NEXT: ext.w.h $a1, $a1 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: dbar 0 +; LA64-NEXT: ll.w $a5, $a2, 0 +; LA64-NEXT: and $a7, $a5, $a4 +; LA64-NEXT: move $a6, $a5 +; LA64-NEXT: sll.w $a7, $a7, $a3 +; LA64-NEXT: sra.w $a7, $a7, $a3 +; LA64-NEXT: bge $a7, $a1, .LBB9_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB9_1 Depth=1 +; LA64-NEXT: xor $a6, $a5, $a1 +; LA64-NEXT: and $a6, $a6, $a4 +; LA64-NEXT: xor $a6, $a5, $a6 +; LA64-NEXT: .LBB9_3: # in Loop: Header=BB9_1 Depth=1 +; LA64-NEXT: sc.w $a6, $a2, 0 +; LA64-NEXT: beqz $a6, .LBB9_1 +; LA64-NEXT: # %bb.4: +; LA64-NEXT: dbar 1792 +; LA64-NEXT: # %bb.5: +; LA64-NEXT: srl.w $a0, $a5, $a0 +; LA64-NEXT: ret + %1 = atomicrmw max ptr %a, i16 %b acquire + ret i16 %1 +} + +define i32 @atomicrmw_max_i32_acquire(ptr %a, i32 %b) nounwind { +; LA64-LABEL: atomicrmw_max_i32_acquire: +; LA64: # %bb.0: +; LA64-NEXT: ammax_db.w $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw max ptr %a, i32 %b acquire + ret i32 %1 +} + +define i64 @atomicrmw_max_i64_acquire(ptr %a, i64 %b) nounwind { +; LA64-LABEL: atomicrmw_max_i64_acquire: +; LA64: # %bb.0: +; LA64-NEXT: ammax_db.d $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw max ptr %a, i64 %b acquire + ret i64 %1 +} + +define i8 @atomicrmw_min_i8_acquire(ptr %a, i8 %b) nounwind { +; LA64-LABEL: atomicrmw_min_i8_acquire: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: ori $a3, $zero, 255 +; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: ext.w.b $a1, $a1 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: andi $a4, $a0, 24 +; LA64-NEXT: xori $a4, $a4, 56 +; LA64-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: dbar 0 +; LA64-NEXT: ll.w $a5, $a2, 0 +; LA64-NEXT: and $a7, $a5, $a3 +; LA64-NEXT: move $a6, $a5 +; LA64-NEXT: sll.w $a7, $a7, $a4 +; LA64-NEXT: sra.w $a7, $a7, $a4 +; LA64-NEXT: bge $a1, $a7, .LBB12_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB12_1 Depth=1 +; LA64-NEXT: xor $a6, $a5, $a1 +; LA64-NEXT: and $a6, $a6, $a3 +; LA64-NEXT: xor $a6, $a5, $a6 +; LA64-NEXT: .LBB12_3: # in Loop: Header=BB12_1 Depth=1 +; LA64-NEXT: sc.w $a6, $a2, 0 +; LA64-NEXT: beqz $a6, .LBB12_1 +; LA64-NEXT: # %bb.4: +; LA64-NEXT: dbar 1792 +; LA64-NEXT: # %bb.5: +; LA64-NEXT: srl.w $a0, $a5, $a0 +; LA64-NEXT: ret + %1 = atomicrmw min ptr %a, i8 %b acquire + ret i8 %1 +} + +define i16 @atomicrmw_min_i16_acquire(ptr %a, i16 %b) nounwind { +; LA64-LABEL: atomicrmw_min_i16_acquire: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: andi $a3, $a0, 24 +; LA64-NEXT: ori $a4, $zero, 48 +; LA64-NEXT: sub.d $a3, $a4, $a3 +; LA64-NEXT: lu12i.w $a4, 15 +; LA64-NEXT: ori $a4, $a4, 4095 +; LA64-NEXT: sll.w $a4, $a4, $a0 +; LA64-NEXT: addi.w $a4, $a4, 0 +; LA64-NEXT: ext.w.h $a1, $a1 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: dbar 0 +; LA64-NEXT: ll.w $a5, $a2, 0 +; LA64-NEXT: and $a7, $a5, $a4 +; LA64-NEXT: move $a6, $a5 +; LA64-NEXT: sll.w $a7, $a7, $a3 +; LA64-NEXT: sra.w $a7, $a7, $a3 +; LA64-NEXT: bge $a1, $a7, .LBB13_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB13_1 Depth=1 +; LA64-NEXT: xor $a6, $a5, $a1 +; LA64-NEXT: and $a6, $a6, $a4 +; LA64-NEXT: xor $a6, $a5, $a6 +; LA64-NEXT: .LBB13_3: # in Loop: Header=BB13_1 Depth=1 +; LA64-NEXT: sc.w $a6, $a2, 0 +; LA64-NEXT: beqz $a6, .LBB13_1 +; LA64-NEXT: # %bb.4: +; LA64-NEXT: dbar 1792 +; LA64-NEXT: # %bb.5: +; LA64-NEXT: srl.w $a0, $a5, $a0 +; LA64-NEXT: ret + %1 = atomicrmw min ptr %a, i16 %b acquire + ret i16 %1 +} + +define i32 @atomicrmw_min_i32_acquire(ptr %a, i32 %b) nounwind { +; LA64-LABEL: atomicrmw_min_i32_acquire: +; LA64: # %bb.0: +; LA64-NEXT: ammin_db.w $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw min ptr %a, i32 %b acquire + ret i32 %1 +} + +define i64 @atomicrmw_min_i64_acquire(ptr %a, i64 %b) nounwind { +; LA64-LABEL: atomicrmw_min_i64_acquire: +; LA64: # %bb.0: +; LA64-NEXT: ammin_db.d $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw min ptr %a, i64 %b acquire + ret i64 %1 +} diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw.ll new file mode 100644 index 0000000000000000000000000000000000000000..c077d14f728f7eeab81119b7dbcbc51f6208b5bb --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw.ll @@ -0,0 +1,1830 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefix=LA32 +; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64 + +define i8 @atomicrmw_xchg_i8_acquire(ptr %a, i8 %b) nounwind { +; LA32-LABEL: atomicrmw_xchg_i8_acquire: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $a2, $zero, -4 +; LA32-NEXT: and $a2, $a0, $a2 +; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: ori $a3, $zero, 255 +; LA32-NEXT: sll.w $a3, $a3, $a0 +; LA32-NEXT: andi $a1, $a1, 255 +; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: dbar 0 +; LA32-NEXT: ll.w $a4, $a2, 0 +; LA32-NEXT: addi.w $a5, $a1, 0 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a2, 0 +; LA32-NEXT: beqz $a5, .LBB0_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_xchg_i8_acquire: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: ori $a3, $zero, 255 +; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: andi $a1, $a1, 255 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: dbar 0 +; LA64-NEXT: ll.w $a4, $a2, 0 +; LA64-NEXT: addi.w $a5, $a1, 0 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: sc.w $a5, $a2, 0 +; LA64-NEXT: beqz $a5, .LBB0_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: srl.w $a0, $a4, $a0 +; LA64-NEXT: ret + %1 = atomicrmw xchg ptr %a, i8 %b acquire + ret i8 %1 +} + +define i16 @atomicrmw_xchg_i16_acquire(ptr %a, i16 %b) nounwind { +; LA32-LABEL: atomicrmw_xchg_i16_acquire: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $a2, $zero, -4 +; LA32-NEXT: and $a2, $a0, $a2 +; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: lu12i.w $a3, 15 +; LA32-NEXT: ori $a3, $a3, 4095 +; LA32-NEXT: sll.w $a3, $a3, $a0 +; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 +; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: dbar 0 +; LA32-NEXT: ll.w $a4, $a2, 0 +; LA32-NEXT: addi.w $a5, $a1, 0 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a2, 0 +; LA32-NEXT: beqz $a5, .LBB1_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_xchg_i16_acquire: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: lu12i.w $a3, 15 +; LA64-NEXT: ori $a3, $a3, 4095 +; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: dbar 0 +; LA64-NEXT: ll.w $a4, $a2, 0 +; LA64-NEXT: addi.w $a5, $a1, 0 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: sc.w $a5, $a2, 0 +; LA64-NEXT: beqz $a5, .LBB1_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: srl.w $a0, $a4, $a0 +; LA64-NEXT: ret + %1 = atomicrmw xchg ptr %a, i16 %b acquire + ret i16 %1 +} + +define i32 @atomicrmw_xchg_i32_acquire(ptr %a, i32 %b) nounwind { +; LA32-LABEL: atomicrmw_xchg_i32_acquire: +; LA32: # %bb.0: +; LA32-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: dbar 0 +; LA32-NEXT: ll.w $a2, $a0, 0 +; LA32-NEXT: move $a3, $a1 +; LA32-NEXT: sc.w $a3, $a0, 0 +; LA32-NEXT: beqz $a3, .LBB2_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_xchg_i32_acquire: +; LA64: # %bb.0: +; LA64-NEXT: amswap_db.w $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw xchg ptr %a, i32 %b acquire + ret i32 %1 +} + +define i64 @atomicrmw_xchg_i64_acquire(ptr %a, i64 %b) nounwind { +; LA32-LABEL: atomicrmw_xchg_i64_acquire: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a3, $zero, 2 +; LA32-NEXT: bl %plt(__atomic_exchange_8) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_xchg_i64_acquire: +; LA64: # %bb.0: +; LA64-NEXT: amswap_db.d $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw xchg ptr %a, i64 %b acquire + ret i64 %1 +} + +define i8 @atomicrmw_add_i8_acquire(ptr %a, i8 %b) nounwind { +; LA32-LABEL: atomicrmw_add_i8_acquire: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $a2, $zero, -4 +; LA32-NEXT: and $a2, $a0, $a2 +; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: ori $a3, $zero, 255 +; LA32-NEXT: sll.w $a3, $a3, $a0 +; LA32-NEXT: andi $a1, $a1, 255 +; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: dbar 0 +; LA32-NEXT: ll.w $a4, $a2, 0 +; LA32-NEXT: add.w $a5, $a4, $a1 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a2, 0 +; LA32-NEXT: beqz $a5, .LBB4_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_add_i8_acquire: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: ori $a3, $zero, 255 +; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: andi $a1, $a1, 255 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: dbar 0 +; LA64-NEXT: ll.w $a4, $a2, 0 +; LA64-NEXT: add.w $a5, $a4, $a1 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: sc.w $a5, $a2, 0 +; LA64-NEXT: beqz $a5, .LBB4_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: srl.w $a0, $a4, $a0 +; LA64-NEXT: ret + %1 = atomicrmw add ptr %a, i8 %b acquire + ret i8 %1 +} + +define i16 @atomicrmw_add_i16_acquire(ptr %a, i16 %b) nounwind { +; LA32-LABEL: atomicrmw_add_i16_acquire: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $a2, $zero, -4 +; LA32-NEXT: and $a2, $a0, $a2 +; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: lu12i.w $a3, 15 +; LA32-NEXT: ori $a3, $a3, 4095 +; LA32-NEXT: sll.w $a3, $a3, $a0 +; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 +; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: dbar 0 +; LA32-NEXT: ll.w $a4, $a2, 0 +; LA32-NEXT: add.w $a5, $a4, $a1 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a2, 0 +; LA32-NEXT: beqz $a5, .LBB5_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_add_i16_acquire: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: lu12i.w $a3, 15 +; LA64-NEXT: ori $a3, $a3, 4095 +; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: dbar 0 +; LA64-NEXT: ll.w $a4, $a2, 0 +; LA64-NEXT: add.w $a5, $a4, $a1 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: sc.w $a5, $a2, 0 +; LA64-NEXT: beqz $a5, .LBB5_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: srl.w $a0, $a4, $a0 +; LA64-NEXT: ret + %1 = atomicrmw add ptr %a, i16 %b acquire + ret i16 %1 +} + +define i32 @atomicrmw_add_i32_acquire(ptr %a, i32 %b) nounwind { +; LA32-LABEL: atomicrmw_add_i32_acquire: +; LA32: # %bb.0: +; LA32-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: dbar 0 +; LA32-NEXT: ll.w $a2, $a0, 0 +; LA32-NEXT: add.w $a3, $a2, $a1 +; LA32-NEXT: sc.w $a3, $a0, 0 +; LA32-NEXT: beqz $a3, .LBB6_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_add_i32_acquire: +; LA64: # %bb.0: +; LA64-NEXT: amadd_db.w $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw add ptr %a, i32 %b acquire + ret i32 %1 +} + +define i64 @atomicrmw_add_i64_acquire(ptr %a, i64 %b) nounwind { +; LA32-LABEL: atomicrmw_add_i64_acquire: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a3, $zero, 2 +; LA32-NEXT: bl %plt(__atomic_fetch_add_8) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_add_i64_acquire: +; LA64: # %bb.0: +; LA64-NEXT: amadd_db.d $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw add ptr %a, i64 %b acquire + ret i64 %1 +} + +define i8 @atomicrmw_sub_i8_acquire(ptr %a, i8 %b) nounwind { +; LA32-LABEL: atomicrmw_sub_i8_acquire: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $a2, $zero, -4 +; LA32-NEXT: and $a2, $a0, $a2 +; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: ori $a3, $zero, 255 +; LA32-NEXT: sll.w $a3, $a3, $a0 +; LA32-NEXT: andi $a1, $a1, 255 +; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: dbar 0 +; LA32-NEXT: ll.w $a4, $a2, 0 +; LA32-NEXT: sub.w $a5, $a4, $a1 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a2, 0 +; LA32-NEXT: beqz $a5, .LBB8_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_sub_i8_acquire: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: ori $a3, $zero, 255 +; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: andi $a1, $a1, 255 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: dbar 0 +; LA64-NEXT: ll.w $a4, $a2, 0 +; LA64-NEXT: sub.w $a5, $a4, $a1 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: sc.w $a5, $a2, 0 +; LA64-NEXT: beqz $a5, .LBB8_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: srl.w $a0, $a4, $a0 +; LA64-NEXT: ret + %1 = atomicrmw sub ptr %a, i8 %b acquire + ret i8 %1 +} + +define i16 @atomicrmw_sub_i16_acquire(ptr %a, i16 %b) nounwind { +; LA32-LABEL: atomicrmw_sub_i16_acquire: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $a2, $zero, -4 +; LA32-NEXT: and $a2, $a0, $a2 +; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: lu12i.w $a3, 15 +; LA32-NEXT: ori $a3, $a3, 4095 +; LA32-NEXT: sll.w $a3, $a3, $a0 +; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 +; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: dbar 0 +; LA32-NEXT: ll.w $a4, $a2, 0 +; LA32-NEXT: sub.w $a5, $a4, $a1 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a2, 0 +; LA32-NEXT: beqz $a5, .LBB9_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_sub_i16_acquire: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: lu12i.w $a3, 15 +; LA64-NEXT: ori $a3, $a3, 4095 +; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: dbar 0 +; LA64-NEXT: ll.w $a4, $a2, 0 +; LA64-NEXT: sub.w $a5, $a4, $a1 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: sc.w $a5, $a2, 0 +; LA64-NEXT: beqz $a5, .LBB9_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: srl.w $a0, $a4, $a0 +; LA64-NEXT: ret + %1 = atomicrmw sub ptr %a, i16 %b acquire + ret i16 %1 +} + +define i32 @atomicrmw_sub_i32_acquire(ptr %a, i32 %b) nounwind { +; LA32-LABEL: atomicrmw_sub_i32_acquire: +; LA32: # %bb.0: +; LA32-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: dbar 0 +; LA32-NEXT: ll.w $a2, $a0, 0 +; LA32-NEXT: sub.w $a3, $a2, $a1 +; LA32-NEXT: sc.w $a3, $a0, 0 +; LA32-NEXT: beqz $a3, .LBB10_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_sub_i32_acquire: +; LA64: # %bb.0: +; LA64-NEXT: sub.w $a2, $zero, $a1 +; LA64-NEXT: amadd_db.w $a1, $a2, $a0 +; LA64-NEXT: move $a0, $a1 +; LA64-NEXT: ret + %1 = atomicrmw sub ptr %a, i32 %b acquire + ret i32 %1 +} + +define i64 @atomicrmw_sub_i64_acquire(ptr %a, i64 %b) nounwind { +; LA32-LABEL: atomicrmw_sub_i64_acquire: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a3, $zero, 2 +; LA32-NEXT: bl %plt(__atomic_fetch_sub_8) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_sub_i64_acquire: +; LA64: # %bb.0: +; LA64-NEXT: sub.d $a2, $zero, $a1 +; LA64-NEXT: amadd_db.d $a1, $a2, $a0 +; LA64-NEXT: move $a0, $a1 +; LA64-NEXT: ret + %1 = atomicrmw sub ptr %a, i64 %b acquire + ret i64 %1 +} + +define i8 @atomicrmw_nand_i8_acquire(ptr %a, i8 %b) nounwind { +; LA32-LABEL: atomicrmw_nand_i8_acquire: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $a2, $zero, -4 +; LA32-NEXT: and $a2, $a0, $a2 +; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: ori $a3, $zero, 255 +; LA32-NEXT: sll.w $a3, $a3, $a0 +; LA32-NEXT: andi $a1, $a1, 255 +; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: dbar 0 +; LA32-NEXT: ll.w $a4, $a2, 0 +; LA32-NEXT: and $a5, $a4, $a1 +; LA32-NEXT: nor $a5, $a5, $zero +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a2, 0 +; LA32-NEXT: beqz $a5, .LBB12_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_nand_i8_acquire: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: ori $a3, $zero, 255 +; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: andi $a1, $a1, 255 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: dbar 0 +; LA64-NEXT: ll.w $a4, $a2, 0 +; LA64-NEXT: and $a5, $a4, $a1 +; LA64-NEXT: nor $a5, $a5, $zero +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: sc.w $a5, $a2, 0 +; LA64-NEXT: beqz $a5, .LBB12_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: srl.w $a0, $a4, $a0 +; LA64-NEXT: ret + %1 = atomicrmw nand ptr %a, i8 %b acquire + ret i8 %1 +} + +define i16 @atomicrmw_nand_i16_acquire(ptr %a, i16 %b) nounwind { +; LA32-LABEL: atomicrmw_nand_i16_acquire: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $a2, $zero, -4 +; LA32-NEXT: and $a2, $a0, $a2 +; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: lu12i.w $a3, 15 +; LA32-NEXT: ori $a3, $a3, 4095 +; LA32-NEXT: sll.w $a3, $a3, $a0 +; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 +; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: dbar 0 +; LA32-NEXT: ll.w $a4, $a2, 0 +; LA32-NEXT: and $a5, $a4, $a1 +; LA32-NEXT: nor $a5, $a5, $zero +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a2, 0 +; LA32-NEXT: beqz $a5, .LBB13_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_nand_i16_acquire: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: lu12i.w $a3, 15 +; LA64-NEXT: ori $a3, $a3, 4095 +; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: dbar 0 +; LA64-NEXT: ll.w $a4, $a2, 0 +; LA64-NEXT: and $a5, $a4, $a1 +; LA64-NEXT: nor $a5, $a5, $zero +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: sc.w $a5, $a2, 0 +; LA64-NEXT: beqz $a5, .LBB13_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: srl.w $a0, $a4, $a0 +; LA64-NEXT: ret + %1 = atomicrmw nand ptr %a, i16 %b acquire + ret i16 %1 +} + +define i32 @atomicrmw_nand_i32_acquire(ptr %a, i32 %b) nounwind { +; LA32-LABEL: atomicrmw_nand_i32_acquire: +; LA32: # %bb.0: +; LA32-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: dbar 0 +; LA32-NEXT: ll.w $a2, $a0, 0 +; LA32-NEXT: and $a3, $a2, $a1 +; LA32-NEXT: nor $a3, $a3, $zero +; LA32-NEXT: sc.w $a3, $a0, 0 +; LA32-NEXT: beqz $a3, .LBB14_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_nand_i32_acquire: +; LA64: # %bb.0: +; LA64-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: dbar 0 +; LA64-NEXT: ll.w $a2, $a0, 0 +; LA64-NEXT: and $a3, $a2, $a1 +; LA64-NEXT: nor $a3, $a3, $zero +; LA64-NEXT: sc.w $a3, $a0, 0 +; LA64-NEXT: beqz $a3, .LBB14_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw nand ptr %a, i32 %b acquire + ret i32 %1 +} + +define i64 @atomicrmw_nand_i64_acquire(ptr %a, i64 %b) nounwind { +; LA32-LABEL: atomicrmw_nand_i64_acquire: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a3, $zero, 2 +; LA32-NEXT: bl %plt(__atomic_fetch_nand_8) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_nand_i64_acquire: +; LA64: # %bb.0: +; LA64-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: dbar 0 +; LA64-NEXT: ll.d $a2, $a0, 0 +; LA64-NEXT: and $a3, $a2, $a1 +; LA64-NEXT: nor $a3, $a3, $zero +; LA64-NEXT: sc.d $a3, $a0, 0 +; LA64-NEXT: beqz $a3, .LBB15_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw nand ptr %a, i64 %b acquire + ret i64 %1 +} + +define i8 @atomicrmw_and_i8_acquire(ptr %a, i8 %b) nounwind { +; LA32-LABEL: atomicrmw_and_i8_acquire: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a2, $a0, 3 +; LA32-NEXT: ori $a3, $zero, 255 +; LA32-NEXT: sll.w $a3, $a3, $a2 +; LA32-NEXT: andi $a1, $a1, 255 +; LA32-NEXT: sll.w $a1, $a1, $a2 +; LA32-NEXT: orn $a1, $a1, $a3 +; LA32-NEXT: addi.w $a3, $zero, -4 +; LA32-NEXT: and $a0, $a0, $a3 +; LA32-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: dbar 0 +; LA32-NEXT: ll.w $a3, $a0, 0 +; LA32-NEXT: and $a4, $a3, $a1 +; LA32-NEXT: sc.w $a4, $a0, 0 +; LA32-NEXT: beqz $a4, .LBB16_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a3, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_and_i8_acquire: +; LA64: # %bb.0: +; LA64-NEXT: slli.d $a2, $a0, 3 +; LA64-NEXT: ori $a3, $zero, 255 +; LA64-NEXT: sll.w $a3, $a3, $a2 +; LA64-NEXT: andi $a1, $a1, 255 +; LA64-NEXT: sll.w $a1, $a1, $a2 +; LA64-NEXT: orn $a1, $a1, $a3 +; LA64-NEXT: addi.w $a3, $zero, -4 +; LA64-NEXT: and $a0, $a0, $a3 +; LA64-NEXT: amand_db.w $a3, $a1, $a0 +; LA64-NEXT: srl.w $a0, $a3, $a2 +; LA64-NEXT: ret + %1 = atomicrmw and ptr %a, i8 %b acquire + ret i8 %1 +} + +define i16 @atomicrmw_and_i16_acquire(ptr %a, i16 %b) nounwind { +; LA32-LABEL: atomicrmw_and_i16_acquire: +; LA32: # %bb.0: +; LA32-NEXT: lu12i.w $a2, 15 +; LA32-NEXT: ori $a2, $a2, 4095 +; LA32-NEXT: slli.w $a3, $a0, 3 +; LA32-NEXT: sll.w $a2, $a2, $a3 +; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 +; LA32-NEXT: sll.w $a1, $a1, $a3 +; LA32-NEXT: orn $a1, $a1, $a2 +; LA32-NEXT: addi.w $a2, $zero, -4 +; LA32-NEXT: and $a0, $a0, $a2 +; LA32-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: dbar 0 +; LA32-NEXT: ll.w $a2, $a0, 0 +; LA32-NEXT: and $a4, $a2, $a1 +; LA32-NEXT: sc.w $a4, $a0, 0 +; LA32-NEXT: beqz $a4, .LBB17_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a2, $a3 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_and_i16_acquire: +; LA64: # %bb.0: +; LA64-NEXT: lu12i.w $a2, 15 +; LA64-NEXT: ori $a2, $a2, 4095 +; LA64-NEXT: slli.d $a3, $a0, 3 +; LA64-NEXT: sll.w $a2, $a2, $a3 +; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 +; LA64-NEXT: sll.w $a1, $a1, $a3 +; LA64-NEXT: orn $a1, $a1, $a2 +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a0, $a0, $a2 +; LA64-NEXT: amand_db.w $a2, $a1, $a0 +; LA64-NEXT: srl.w $a0, $a2, $a3 +; LA64-NEXT: ret + %1 = atomicrmw and ptr %a, i16 %b acquire + ret i16 %1 +} + +define i32 @atomicrmw_and_i32_acquire(ptr %a, i32 %b) nounwind { +; LA32-LABEL: atomicrmw_and_i32_acquire: +; LA32: # %bb.0: +; LA32-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: dbar 0 +; LA32-NEXT: ll.w $a2, $a0, 0 +; LA32-NEXT: and $a3, $a2, $a1 +; LA32-NEXT: sc.w $a3, $a0, 0 +; LA32-NEXT: beqz $a3, .LBB18_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_and_i32_acquire: +; LA64: # %bb.0: +; LA64-NEXT: amand_db.w $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw and ptr %a, i32 %b acquire + ret i32 %1 +} + +define i64 @atomicrmw_and_i64_acquire(ptr %a, i64 %b) nounwind { +; LA32-LABEL: atomicrmw_and_i64_acquire: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a3, $zero, 2 +; LA32-NEXT: bl %plt(__atomic_fetch_and_8) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_and_i64_acquire: +; LA64: # %bb.0: +; LA64-NEXT: amand_db.d $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw and ptr %a, i64 %b acquire + ret i64 %1 +} + +define i8 @atomicrmw_or_i8_acquire(ptr %a, i8 %b) nounwind { +; LA32-LABEL: atomicrmw_or_i8_acquire: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $a2, $zero, -4 +; LA32-NEXT: and $a2, $a0, $a2 +; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: andi $a1, $a1, 255 +; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: dbar 0 +; LA32-NEXT: ll.w $a3, $a2, 0 +; LA32-NEXT: or $a4, $a3, $a1 +; LA32-NEXT: sc.w $a4, $a2, 0 +; LA32-NEXT: beqz $a4, .LBB20_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a3, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_or_i8_acquire: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: andi $a1, $a1, 255 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: amor_db.w $a3, $a1, $a2 +; LA64-NEXT: srl.w $a0, $a3, $a0 +; LA64-NEXT: ret + %1 = atomicrmw or ptr %a, i8 %b acquire + ret i8 %1 +} + +define i16 @atomicrmw_or_i16_acquire(ptr %a, i16 %b) nounwind { +; LA32-LABEL: atomicrmw_or_i16_acquire: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $a2, $zero, -4 +; LA32-NEXT: and $a2, $a0, $a2 +; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 +; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: dbar 0 +; LA32-NEXT: ll.w $a3, $a2, 0 +; LA32-NEXT: or $a4, $a3, $a1 +; LA32-NEXT: sc.w $a4, $a2, 0 +; LA32-NEXT: beqz $a4, .LBB21_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a3, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_or_i16_acquire: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: amor_db.w $a3, $a1, $a2 +; LA64-NEXT: srl.w $a0, $a3, $a0 +; LA64-NEXT: ret + %1 = atomicrmw or ptr %a, i16 %b acquire + ret i16 %1 +} + +define i32 @atomicrmw_or_i32_acquire(ptr %a, i32 %b) nounwind { +; LA32-LABEL: atomicrmw_or_i32_acquire: +; LA32: # %bb.0: +; LA32-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: dbar 0 +; LA32-NEXT: ll.w $a2, $a0, 0 +; LA32-NEXT: or $a3, $a2, $a1 +; LA32-NEXT: sc.w $a3, $a0, 0 +; LA32-NEXT: beqz $a3, .LBB22_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_or_i32_acquire: +; LA64: # %bb.0: +; LA64-NEXT: amor_db.w $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw or ptr %a, i32 %b acquire + ret i32 %1 +} + +define i64 @atomicrmw_or_i64_acquire(ptr %a, i64 %b) nounwind { +; LA32-LABEL: atomicrmw_or_i64_acquire: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a3, $zero, 2 +; LA32-NEXT: bl %plt(__atomic_fetch_or_8) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_or_i64_acquire: +; LA64: # %bb.0: +; LA64-NEXT: amor_db.d $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw or ptr %a, i64 %b acquire + ret i64 %1 +} + +define i8 @atomicrmw_xor_i8_acquire(ptr %a, i8 %b) nounwind { +; LA32-LABEL: atomicrmw_xor_i8_acquire: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $a2, $zero, -4 +; LA32-NEXT: and $a2, $a0, $a2 +; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: andi $a1, $a1, 255 +; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: .LBB24_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: dbar 0 +; LA32-NEXT: ll.w $a3, $a2, 0 +; LA32-NEXT: xor $a4, $a3, $a1 +; LA32-NEXT: sc.w $a4, $a2, 0 +; LA32-NEXT: beqz $a4, .LBB24_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a3, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_xor_i8_acquire: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: andi $a1, $a1, 255 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: amxor_db.w $a3, $a1, $a2 +; LA64-NEXT: srl.w $a0, $a3, $a0 +; LA64-NEXT: ret + %1 = atomicrmw xor ptr %a, i8 %b acquire + ret i8 %1 +} + +define i16 @atomicrmw_xor_i16_acquire(ptr %a, i16 %b) nounwind { +; LA32-LABEL: atomicrmw_xor_i16_acquire: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $a2, $zero, -4 +; LA32-NEXT: and $a2, $a0, $a2 +; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 +; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: .LBB25_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: dbar 0 +; LA32-NEXT: ll.w $a3, $a2, 0 +; LA32-NEXT: xor $a4, $a3, $a1 +; LA32-NEXT: sc.w $a4, $a2, 0 +; LA32-NEXT: beqz $a4, .LBB25_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a3, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_xor_i16_acquire: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: amxor_db.w $a3, $a1, $a2 +; LA64-NEXT: srl.w $a0, $a3, $a0 +; LA64-NEXT: ret + %1 = atomicrmw xor ptr %a, i16 %b acquire + ret i16 %1 +} + +define i32 @atomicrmw_xor_i32_acquire(ptr %a, i32 %b) nounwind { +; LA32-LABEL: atomicrmw_xor_i32_acquire: +; LA32: # %bb.0: +; LA32-NEXT: .LBB26_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: dbar 0 +; LA32-NEXT: ll.w $a2, $a0, 0 +; LA32-NEXT: xor $a3, $a2, $a1 +; LA32-NEXT: sc.w $a3, $a0, 0 +; LA32-NEXT: beqz $a3, .LBB26_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_xor_i32_acquire: +; LA64: # %bb.0: +; LA64-NEXT: amxor_db.w $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw xor ptr %a, i32 %b acquire + ret i32 %1 +} + +define i64 @atomicrmw_xor_i64_acquire(ptr %a, i64 %b) nounwind { +; LA32-LABEL: atomicrmw_xor_i64_acquire: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a3, $zero, 2 +; LA32-NEXT: bl %plt(__atomic_fetch_xor_8) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_xor_i64_acquire: +; LA64: # %bb.0: +; LA64-NEXT: amxor_db.d $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw xor ptr %a, i64 %b acquire + ret i64 %1 +} + +define i8 @atomicrmw_xchg_i8_monotonic(ptr %a, i8 %b) nounwind { +; LA32-LABEL: atomicrmw_xchg_i8_monotonic: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $a2, $zero, -4 +; LA32-NEXT: and $a2, $a0, $a2 +; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: ori $a3, $zero, 255 +; LA32-NEXT: sll.w $a3, $a3, $a0 +; LA32-NEXT: andi $a1, $a1, 255 +; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: .LBB28_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a4, $a2, 0 +; LA32-NEXT: addi.w $a5, $a1, 0 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a2, 0 +; LA32-NEXT: beqz $a5, .LBB28_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_xchg_i8_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: ori $a3, $zero, 255 +; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: andi $a1, $a1, 255 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB28_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a4, $a2, 0 +; LA64-NEXT: addi.w $a5, $a1, 0 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: sc.w $a5, $a2, 0 +; LA64-NEXT: beqz $a5, .LBB28_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: srl.w $a0, $a4, $a0 +; LA64-NEXT: ret + %1 = atomicrmw xchg ptr %a, i8 %b monotonic + ret i8 %1 +} + +define i16 @atomicrmw_xchg_i16_monotonic(ptr %a, i16 %b) nounwind { +; LA32-LABEL: atomicrmw_xchg_i16_monotonic: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $a2, $zero, -4 +; LA32-NEXT: and $a2, $a0, $a2 +; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: lu12i.w $a3, 15 +; LA32-NEXT: ori $a3, $a3, 4095 +; LA32-NEXT: sll.w $a3, $a3, $a0 +; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 +; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: .LBB29_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a4, $a2, 0 +; LA32-NEXT: addi.w $a5, $a1, 0 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a2, 0 +; LA32-NEXT: beqz $a5, .LBB29_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_xchg_i16_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: lu12i.w $a3, 15 +; LA64-NEXT: ori $a3, $a3, 4095 +; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB29_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a4, $a2, 0 +; LA64-NEXT: addi.w $a5, $a1, 0 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: sc.w $a5, $a2, 0 +; LA64-NEXT: beqz $a5, .LBB29_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: srl.w $a0, $a4, $a0 +; LA64-NEXT: ret + %1 = atomicrmw xchg ptr %a, i16 %b monotonic + ret i16 %1 +} + +define i32 @atomicrmw_xchg_i32_monotonic(ptr %a, i32 %b) nounwind { +; LA32-LABEL: atomicrmw_xchg_i32_monotonic: +; LA32: # %bb.0: +; LA32-NEXT: .LBB30_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a2, $a0, 0 +; LA32-NEXT: move $a3, $a1 +; LA32-NEXT: sc.w $a3, $a0, 0 +; LA32-NEXT: beqz $a3, .LBB30_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_xchg_i32_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: amswap_db.w $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw xchg ptr %a, i32 %b monotonic + ret i32 %1 +} + +define i64 @atomicrmw_xchg_i64_monotonic(ptr %a, i64 %b) nounwind { +; LA32-LABEL: atomicrmw_xchg_i64_monotonic: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: move $a3, $zero +; LA32-NEXT: bl %plt(__atomic_exchange_8) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_xchg_i64_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: amswap_db.d $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw xchg ptr %a, i64 %b monotonic + ret i64 %1 +} + +define i8 @atomicrmw_add_i8_monotonic(ptr %a, i8 %b) nounwind { +; LA32-LABEL: atomicrmw_add_i8_monotonic: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $a2, $zero, -4 +; LA32-NEXT: and $a2, $a0, $a2 +; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: ori $a3, $zero, 255 +; LA32-NEXT: sll.w $a3, $a3, $a0 +; LA32-NEXT: andi $a1, $a1, 255 +; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: .LBB32_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a4, $a2, 0 +; LA32-NEXT: add.w $a5, $a4, $a1 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a2, 0 +; LA32-NEXT: beqz $a5, .LBB32_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_add_i8_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: ori $a3, $zero, 255 +; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: andi $a1, $a1, 255 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB32_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a4, $a2, 0 +; LA64-NEXT: add.w $a5, $a4, $a1 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: sc.w $a5, $a2, 0 +; LA64-NEXT: beqz $a5, .LBB32_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: srl.w $a0, $a4, $a0 +; LA64-NEXT: ret + %1 = atomicrmw add ptr %a, i8 %b monotonic + ret i8 %1 +} + +define i16 @atomicrmw_add_i16_monotonic(ptr %a, i16 %b) nounwind { +; LA32-LABEL: atomicrmw_add_i16_monotonic: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $a2, $zero, -4 +; LA32-NEXT: and $a2, $a0, $a2 +; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: lu12i.w $a3, 15 +; LA32-NEXT: ori $a3, $a3, 4095 +; LA32-NEXT: sll.w $a3, $a3, $a0 +; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 +; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: .LBB33_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a4, $a2, 0 +; LA32-NEXT: add.w $a5, $a4, $a1 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a2, 0 +; LA32-NEXT: beqz $a5, .LBB33_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_add_i16_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: lu12i.w $a3, 15 +; LA64-NEXT: ori $a3, $a3, 4095 +; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB33_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a4, $a2, 0 +; LA64-NEXT: add.w $a5, $a4, $a1 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: sc.w $a5, $a2, 0 +; LA64-NEXT: beqz $a5, .LBB33_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: srl.w $a0, $a4, $a0 +; LA64-NEXT: ret + %1 = atomicrmw add ptr %a, i16 %b monotonic + ret i16 %1 +} + +define i32 @atomicrmw_add_i32_monotonic(ptr %a, i32 %b) nounwind { +; LA32-LABEL: atomicrmw_add_i32_monotonic: +; LA32: # %bb.0: +; LA32-NEXT: .LBB34_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a2, $a0, 0 +; LA32-NEXT: add.w $a3, $a2, $a1 +; LA32-NEXT: sc.w $a3, $a0, 0 +; LA32-NEXT: beqz $a3, .LBB34_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_add_i32_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: amadd_db.w $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw add ptr %a, i32 %b monotonic + ret i32 %1 +} + +define i64 @atomicrmw_add_i64_monotonic(ptr %a, i64 %b) nounwind { +; LA32-LABEL: atomicrmw_add_i64_monotonic: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: move $a3, $zero +; LA32-NEXT: bl %plt(__atomic_fetch_add_8) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_add_i64_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: amadd_db.d $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw add ptr %a, i64 %b monotonic + ret i64 %1 +} + +define i8 @atomicrmw_sub_i8_monotonic(ptr %a, i8 %b) nounwind { +; LA32-LABEL: atomicrmw_sub_i8_monotonic: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $a2, $zero, -4 +; LA32-NEXT: and $a2, $a0, $a2 +; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: ori $a3, $zero, 255 +; LA32-NEXT: sll.w $a3, $a3, $a0 +; LA32-NEXT: andi $a1, $a1, 255 +; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: .LBB36_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a4, $a2, 0 +; LA32-NEXT: sub.w $a5, $a4, $a1 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a2, 0 +; LA32-NEXT: beqz $a5, .LBB36_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_sub_i8_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: ori $a3, $zero, 255 +; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: andi $a1, $a1, 255 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB36_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a4, $a2, 0 +; LA64-NEXT: sub.w $a5, $a4, $a1 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: sc.w $a5, $a2, 0 +; LA64-NEXT: beqz $a5, .LBB36_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: srl.w $a0, $a4, $a0 +; LA64-NEXT: ret + %1 = atomicrmw sub ptr %a, i8 %b monotonic + ret i8 %1 +} + +define i16 @atomicrmw_sub_i16_monotonic(ptr %a, i16 %b) nounwind { +; LA32-LABEL: atomicrmw_sub_i16_monotonic: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $a2, $zero, -4 +; LA32-NEXT: and $a2, $a0, $a2 +; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: lu12i.w $a3, 15 +; LA32-NEXT: ori $a3, $a3, 4095 +; LA32-NEXT: sll.w $a3, $a3, $a0 +; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 +; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: .LBB37_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a4, $a2, 0 +; LA32-NEXT: sub.w $a5, $a4, $a1 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a2, 0 +; LA32-NEXT: beqz $a5, .LBB37_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_sub_i16_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: lu12i.w $a3, 15 +; LA64-NEXT: ori $a3, $a3, 4095 +; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB37_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a4, $a2, 0 +; LA64-NEXT: sub.w $a5, $a4, $a1 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: sc.w $a5, $a2, 0 +; LA64-NEXT: beqz $a5, .LBB37_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: srl.w $a0, $a4, $a0 +; LA64-NEXT: ret + %1 = atomicrmw sub ptr %a, i16 %b monotonic + ret i16 %1 +} + +define i32 @atomicrmw_sub_i32_monotonic(ptr %a, i32 %b) nounwind { +; LA32-LABEL: atomicrmw_sub_i32_monotonic: +; LA32: # %bb.0: +; LA32-NEXT: .LBB38_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a2, $a0, 0 +; LA32-NEXT: sub.w $a3, $a2, $a1 +; LA32-NEXT: sc.w $a3, $a0, 0 +; LA32-NEXT: beqz $a3, .LBB38_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_sub_i32_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: sub.w $a2, $zero, $a1 +; LA64-NEXT: amadd_db.w $a1, $a2, $a0 +; LA64-NEXT: move $a0, $a1 +; LA64-NEXT: ret + %1 = atomicrmw sub ptr %a, i32 %b monotonic + ret i32 %1 +} + +define i64 @atomicrmw_sub_i64_monotonic(ptr %a, i64 %b) nounwind { +; LA32-LABEL: atomicrmw_sub_i64_monotonic: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: move $a3, $zero +; LA32-NEXT: bl %plt(__atomic_fetch_sub_8) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_sub_i64_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: sub.d $a2, $zero, $a1 +; LA64-NEXT: amadd_db.d $a1, $a2, $a0 +; LA64-NEXT: move $a0, $a1 +; LA64-NEXT: ret + %1 = atomicrmw sub ptr %a, i64 %b monotonic + ret i64 %1 +} + +define i8 @atomicrmw_nand_i8_monotonic(ptr %a, i8 %b) nounwind { +; LA32-LABEL: atomicrmw_nand_i8_monotonic: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $a2, $zero, -4 +; LA32-NEXT: and $a2, $a0, $a2 +; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: ori $a3, $zero, 255 +; LA32-NEXT: sll.w $a3, $a3, $a0 +; LA32-NEXT: andi $a1, $a1, 255 +; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: .LBB40_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a4, $a2, 0 +; LA32-NEXT: and $a5, $a4, $a1 +; LA32-NEXT: nor $a5, $a5, $zero +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a2, 0 +; LA32-NEXT: beqz $a5, .LBB40_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_nand_i8_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: ori $a3, $zero, 255 +; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: andi $a1, $a1, 255 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB40_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a4, $a2, 0 +; LA64-NEXT: and $a5, $a4, $a1 +; LA64-NEXT: nor $a5, $a5, $zero +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: sc.w $a5, $a2, 0 +; LA64-NEXT: beqz $a5, .LBB40_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: srl.w $a0, $a4, $a0 +; LA64-NEXT: ret + %1 = atomicrmw nand ptr %a, i8 %b monotonic + ret i8 %1 +} + +define i16 @atomicrmw_nand_i16_monotonic(ptr %a, i16 %b) nounwind { +; LA32-LABEL: atomicrmw_nand_i16_monotonic: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $a2, $zero, -4 +; LA32-NEXT: and $a2, $a0, $a2 +; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: lu12i.w $a3, 15 +; LA32-NEXT: ori $a3, $a3, 4095 +; LA32-NEXT: sll.w $a3, $a3, $a0 +; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 +; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: .LBB41_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a4, $a2, 0 +; LA32-NEXT: and $a5, $a4, $a1 +; LA32-NEXT: nor $a5, $a5, $zero +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a2, 0 +; LA32-NEXT: beqz $a5, .LBB41_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_nand_i16_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: lu12i.w $a3, 15 +; LA64-NEXT: ori $a3, $a3, 4095 +; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: .LBB41_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a4, $a2, 0 +; LA64-NEXT: and $a5, $a4, $a1 +; LA64-NEXT: nor $a5, $a5, $zero +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: sc.w $a5, $a2, 0 +; LA64-NEXT: beqz $a5, .LBB41_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: srl.w $a0, $a4, $a0 +; LA64-NEXT: ret + %1 = atomicrmw nand ptr %a, i16 %b monotonic + ret i16 %1 +} + +define i32 @atomicrmw_nand_i32_monotonic(ptr %a, i32 %b) nounwind { +; LA32-LABEL: atomicrmw_nand_i32_monotonic: +; LA32: # %bb.0: +; LA32-NEXT: .LBB42_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a2, $a0, 0 +; LA32-NEXT: and $a3, $a2, $a1 +; LA32-NEXT: nor $a3, $a3, $zero +; LA32-NEXT: sc.w $a3, $a0, 0 +; LA32-NEXT: beqz $a3, .LBB42_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_nand_i32_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: .LBB42_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a2, $a0, 0 +; LA64-NEXT: and $a3, $a2, $a1 +; LA64-NEXT: nor $a3, $a3, $zero +; LA64-NEXT: sc.w $a3, $a0, 0 +; LA64-NEXT: beqz $a3, .LBB42_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw nand ptr %a, i32 %b monotonic + ret i32 %1 +} + +define i64 @atomicrmw_nand_i64_monotonic(ptr %a, i64 %b) nounwind { +; LA32-LABEL: atomicrmw_nand_i64_monotonic: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: move $a3, $zero +; LA32-NEXT: bl %plt(__atomic_fetch_nand_8) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_nand_i64_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: .LBB43_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.d $a2, $a0, 0 +; LA64-NEXT: and $a3, $a2, $a1 +; LA64-NEXT: nor $a3, $a3, $zero +; LA64-NEXT: sc.d $a3, $a0, 0 +; LA64-NEXT: beqz $a3, .LBB43_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw nand ptr %a, i64 %b monotonic + ret i64 %1 +} + +define i8 @atomicrmw_and_i8_monotonic(ptr %a, i8 %b) nounwind { +; LA32-LABEL: atomicrmw_and_i8_monotonic: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a2, $a0, 3 +; LA32-NEXT: ori $a3, $zero, 255 +; LA32-NEXT: sll.w $a3, $a3, $a2 +; LA32-NEXT: andi $a1, $a1, 255 +; LA32-NEXT: sll.w $a1, $a1, $a2 +; LA32-NEXT: orn $a1, $a1, $a3 +; LA32-NEXT: addi.w $a3, $zero, -4 +; LA32-NEXT: and $a0, $a0, $a3 +; LA32-NEXT: .LBB44_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a3, $a0, 0 +; LA32-NEXT: and $a4, $a3, $a1 +; LA32-NEXT: sc.w $a4, $a0, 0 +; LA32-NEXT: beqz $a4, .LBB44_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a3, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_and_i8_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: slli.d $a2, $a0, 3 +; LA64-NEXT: ori $a3, $zero, 255 +; LA64-NEXT: sll.w $a3, $a3, $a2 +; LA64-NEXT: andi $a1, $a1, 255 +; LA64-NEXT: sll.w $a1, $a1, $a2 +; LA64-NEXT: orn $a1, $a1, $a3 +; LA64-NEXT: addi.w $a3, $zero, -4 +; LA64-NEXT: and $a0, $a0, $a3 +; LA64-NEXT: amand_db.w $a3, $a1, $a0 +; LA64-NEXT: srl.w $a0, $a3, $a2 +; LA64-NEXT: ret + %1 = atomicrmw and ptr %a, i8 %b monotonic + ret i8 %1 +} + +define i16 @atomicrmw_and_i16_monotonic(ptr %a, i16 %b) nounwind { +; LA32-LABEL: atomicrmw_and_i16_monotonic: +; LA32: # %bb.0: +; LA32-NEXT: lu12i.w $a2, 15 +; LA32-NEXT: ori $a2, $a2, 4095 +; LA32-NEXT: slli.w $a3, $a0, 3 +; LA32-NEXT: sll.w $a2, $a2, $a3 +; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 +; LA32-NEXT: sll.w $a1, $a1, $a3 +; LA32-NEXT: orn $a1, $a1, $a2 +; LA32-NEXT: addi.w $a2, $zero, -4 +; LA32-NEXT: and $a0, $a0, $a2 +; LA32-NEXT: .LBB45_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a2, $a0, 0 +; LA32-NEXT: and $a4, $a2, $a1 +; LA32-NEXT: sc.w $a4, $a0, 0 +; LA32-NEXT: beqz $a4, .LBB45_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a2, $a3 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_and_i16_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: lu12i.w $a2, 15 +; LA64-NEXT: ori $a2, $a2, 4095 +; LA64-NEXT: slli.d $a3, $a0, 3 +; LA64-NEXT: sll.w $a2, $a2, $a3 +; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 +; LA64-NEXT: sll.w $a1, $a1, $a3 +; LA64-NEXT: orn $a1, $a1, $a2 +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a0, $a0, $a2 +; LA64-NEXT: amand_db.w $a2, $a1, $a0 +; LA64-NEXT: srl.w $a0, $a2, $a3 +; LA64-NEXT: ret + %1 = atomicrmw and ptr %a, i16 %b monotonic + ret i16 %1 +} + +define i32 @atomicrmw_and_i32_monotonic(ptr %a, i32 %b) nounwind { +; LA32-LABEL: atomicrmw_and_i32_monotonic: +; LA32: # %bb.0: +; LA32-NEXT: .LBB46_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a2, $a0, 0 +; LA32-NEXT: and $a3, $a2, $a1 +; LA32-NEXT: sc.w $a3, $a0, 0 +; LA32-NEXT: beqz $a3, .LBB46_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_and_i32_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: amand_db.w $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw and ptr %a, i32 %b monotonic + ret i32 %1 +} + +define i64 @atomicrmw_and_i64_monotonic(ptr %a, i64 %b) nounwind { +; LA32-LABEL: atomicrmw_and_i64_monotonic: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: move $a3, $zero +; LA32-NEXT: bl %plt(__atomic_fetch_and_8) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_and_i64_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: amand_db.d $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw and ptr %a, i64 %b monotonic + ret i64 %1 +} + +define i8 @atomicrmw_or_i8_monotonic(ptr %a, i8 %b) nounwind { +; LA32-LABEL: atomicrmw_or_i8_monotonic: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $a2, $zero, -4 +; LA32-NEXT: and $a2, $a0, $a2 +; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: andi $a1, $a1, 255 +; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: .LBB48_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a3, $a2, 0 +; LA32-NEXT: or $a4, $a3, $a1 +; LA32-NEXT: sc.w $a4, $a2, 0 +; LA32-NEXT: beqz $a4, .LBB48_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a3, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_or_i8_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: andi $a1, $a1, 255 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: amor_db.w $a3, $a1, $a2 +; LA64-NEXT: srl.w $a0, $a3, $a0 +; LA64-NEXT: ret + %1 = atomicrmw or ptr %a, i8 %b monotonic + ret i8 %1 +} + +define i16 @atomicrmw_or_i16_monotonic(ptr %a, i16 %b) nounwind { +; LA32-LABEL: atomicrmw_or_i16_monotonic: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $a2, $zero, -4 +; LA32-NEXT: and $a2, $a0, $a2 +; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 +; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: .LBB49_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a3, $a2, 0 +; LA32-NEXT: or $a4, $a3, $a1 +; LA32-NEXT: sc.w $a4, $a2, 0 +; LA32-NEXT: beqz $a4, .LBB49_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a3, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_or_i16_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: amor_db.w $a3, $a1, $a2 +; LA64-NEXT: srl.w $a0, $a3, $a0 +; LA64-NEXT: ret + %1 = atomicrmw or ptr %a, i16 %b monotonic + ret i16 %1 +} + +define i32 @atomicrmw_or_i32_monotonic(ptr %a, i32 %b) nounwind { +; LA32-LABEL: atomicrmw_or_i32_monotonic: +; LA32: # %bb.0: +; LA32-NEXT: .LBB50_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a2, $a0, 0 +; LA32-NEXT: or $a3, $a2, $a1 +; LA32-NEXT: sc.w $a3, $a0, 0 +; LA32-NEXT: beqz $a3, .LBB50_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_or_i32_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: amor_db.w $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw or ptr %a, i32 %b monotonic + ret i32 %1 +} + +define i64 @atomicrmw_or_i64_monotonic(ptr %a, i64 %b) nounwind { +; LA32-LABEL: atomicrmw_or_i64_monotonic: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: move $a3, $zero +; LA32-NEXT: bl %plt(__atomic_fetch_or_8) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_or_i64_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: amor_db.d $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw or ptr %a, i64 %b monotonic + ret i64 %1 +} + +define i8 @atomicrmw_xor_i8_monotonic(ptr %a, i8 %b) nounwind { +; LA32-LABEL: atomicrmw_xor_i8_monotonic: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $a2, $zero, -4 +; LA32-NEXT: and $a2, $a0, $a2 +; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: andi $a1, $a1, 255 +; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: .LBB52_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a3, $a2, 0 +; LA32-NEXT: xor $a4, $a3, $a1 +; LA32-NEXT: sc.w $a4, $a2, 0 +; LA32-NEXT: beqz $a4, .LBB52_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a3, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_xor_i8_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: andi $a1, $a1, 255 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: amxor_db.w $a3, $a1, $a2 +; LA64-NEXT: srl.w $a0, $a3, $a0 +; LA64-NEXT: ret + %1 = atomicrmw xor ptr %a, i8 %b monotonic + ret i8 %1 +} + +define i16 @atomicrmw_xor_i16_monotonic(ptr %a, i16 %b) nounwind { +; LA32-LABEL: atomicrmw_xor_i16_monotonic: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $a2, $zero, -4 +; LA32-NEXT: and $a2, $a0, $a2 +; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 +; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: .LBB53_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a3, $a2, 0 +; LA32-NEXT: xor $a4, $a3, $a1 +; LA32-NEXT: sc.w $a4, $a2, 0 +; LA32-NEXT: beqz $a4, .LBB53_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a3, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_xor_i16_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 +; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: amxor_db.w $a3, $a1, $a2 +; LA64-NEXT: srl.w $a0, $a3, $a0 +; LA64-NEXT: ret + %1 = atomicrmw xor ptr %a, i16 %b monotonic + ret i16 %1 +} + +define i32 @atomicrmw_xor_i32_monotonic(ptr %a, i32 %b) nounwind { +; LA32-LABEL: atomicrmw_xor_i32_monotonic: +; LA32: # %bb.0: +; LA32-NEXT: .LBB54_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a2, $a0, 0 +; LA32-NEXT: xor $a3, $a2, $a1 +; LA32-NEXT: sc.w $a3, $a0, 0 +; LA32-NEXT: beqz $a3, .LBB54_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_xor_i32_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: amxor_db.w $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw xor ptr %a, i32 %b monotonic + ret i32 %1 +} + +define i64 @atomicrmw_xor_i64_monotonic(ptr %a, i64 %b) nounwind { +; LA32-LABEL: atomicrmw_xor_i64_monotonic: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: move $a3, $zero +; LA32-NEXT: bl %plt(__atomic_fetch_xor_8) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_xor_i64_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: amxor_db.d $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %1 = atomicrmw xor ptr %a, i64 %b monotonic + ret i64 %1 +} diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/br.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/br.ll index f46eca268aae8f805dc4aa65628a3e1c9cc3c761..7ab4788e6a31a163f55cd9a858d062a5d32d19f0 100644 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/br.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/br.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefixes=ALL,LA32 ; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefixes=ALL,LA64 @@ -18,22 +19,20 @@ define void @foo_br_eq(i32 %a, ptr %b) nounwind { ; LA32: # %bb.0: ; LA32-NEXT: ld.w $a2, $a1, 0 ; LA32-NEXT: beq $a2, $a0, .LBB1_2 -; LA32-NEXT: b .LBB1_1 -; LA32-NEXT: .LBB1_1: # %test +; LA32-NEXT: # %bb.1: # %test ; LA32-NEXT: ld.w $a0, $a1, 0 ; LA32-NEXT: .LBB1_2: # %end -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: foo_br_eq: ; LA64: # %bb.0: ; LA64-NEXT: ld.wu $a2, $a1, 0 ; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0 ; LA64-NEXT: beq $a2, $a0, .LBB1_2 -; LA64-NEXT: b .LBB1_1 -; LA64-NEXT: .LBB1_1: # %test +; LA64-NEXT: # %bb.1: # %test ; LA64-NEXT: ld.w $a0, $a1, 0 ; LA64-NEXT: .LBB1_2: # %end -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %val = load volatile i32, ptr %b %cc = icmp eq i32 %val, %a br i1 %cc, label %end, label %test @@ -50,22 +49,20 @@ define void @foo_br_ne(i32 %a, ptr %b) nounwind { ; LA32: # %bb.0: ; LA32-NEXT: ld.w $a2, $a1, 0 ; LA32-NEXT: bne $a2, $a0, .LBB2_2 -; LA32-NEXT: b .LBB2_1 -; LA32-NEXT: .LBB2_1: # %test +; LA32-NEXT: # %bb.1: # %test ; LA32-NEXT: ld.w $a0, $a1, 0 ; LA32-NEXT: .LBB2_2: # %end -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: foo_br_ne: ; LA64: # %bb.0: ; LA64-NEXT: ld.wu $a2, $a1, 0 ; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0 ; LA64-NEXT: bne $a2, $a0, .LBB2_2 -; LA64-NEXT: b .LBB2_1 -; LA64-NEXT: .LBB2_1: # %test +; LA64-NEXT: # %bb.1: # %test ; LA64-NEXT: ld.w $a0, $a1, 0 ; LA64-NEXT: .LBB2_2: # %end -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %val = load volatile i32, ptr %b %cc = icmp ne i32 %val, %a br i1 %cc, label %end, label %test @@ -82,22 +79,20 @@ define void @foo_br_slt(i32 %a, ptr %b) nounwind { ; LA32: # %bb.0: ; LA32-NEXT: ld.w $a2, $a1, 0 ; LA32-NEXT: blt $a2, $a0, .LBB3_2 -; LA32-NEXT: b .LBB3_1 -; LA32-NEXT: .LBB3_1: # %test +; LA32-NEXT: # %bb.1: # %test ; LA32-NEXT: ld.w $a0, $a1, 0 ; LA32-NEXT: .LBB3_2: # %end -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: foo_br_slt: ; LA64: # %bb.0: ; LA64-NEXT: ld.w $a2, $a1, 0 ; LA64-NEXT: addi.w $a0, $a0, 0 ; LA64-NEXT: blt $a2, $a0, .LBB3_2 -; LA64-NEXT: b .LBB3_1 -; LA64-NEXT: .LBB3_1: # %test +; LA64-NEXT: # %bb.1: # %test ; LA64-NEXT: ld.w $a0, $a1, 0 ; LA64-NEXT: .LBB3_2: # %end -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %val = load volatile i32, ptr %b %cc = icmp slt i32 %val, %a br i1 %cc, label %end, label %test @@ -114,22 +109,20 @@ define void @foo_br_sge(i32 %a, ptr %b) nounwind { ; LA32: # %bb.0: ; LA32-NEXT: ld.w $a2, $a1, 0 ; LA32-NEXT: bge $a2, $a0, .LBB4_2 -; LA32-NEXT: b .LBB4_1 -; LA32-NEXT: .LBB4_1: # %test +; LA32-NEXT: # %bb.1: # %test ; LA32-NEXT: ld.w $a0, $a1, 0 ; LA32-NEXT: .LBB4_2: # %end -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: foo_br_sge: ; LA64: # %bb.0: ; LA64-NEXT: ld.w $a2, $a1, 0 ; LA64-NEXT: addi.w $a0, $a0, 0 ; LA64-NEXT: bge $a2, $a0, .LBB4_2 -; LA64-NEXT: b .LBB4_1 -; LA64-NEXT: .LBB4_1: # %test +; LA64-NEXT: # %bb.1: # %test ; LA64-NEXT: ld.w $a0, $a1, 0 ; LA64-NEXT: .LBB4_2: # %end -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %val = load volatile i32, ptr %b %cc = icmp sge i32 %val, %a br i1 %cc, label %end, label %test @@ -146,22 +139,20 @@ define void @foo_br_ult(i32 %a, ptr %b) nounwind { ; LA32: # %bb.0: ; LA32-NEXT: ld.w $a2, $a1, 0 ; LA32-NEXT: bltu $a2, $a0, .LBB5_2 -; LA32-NEXT: b .LBB5_1 -; LA32-NEXT: .LBB5_1: # %test +; LA32-NEXT: # %bb.1: # %test ; LA32-NEXT: ld.w $a0, $a1, 0 ; LA32-NEXT: .LBB5_2: # %end -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: foo_br_ult: ; LA64: # %bb.0: ; LA64-NEXT: ld.wu $a2, $a1, 0 ; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0 ; LA64-NEXT: bltu $a2, $a0, .LBB5_2 -; LA64-NEXT: b .LBB5_1 -; LA64-NEXT: .LBB5_1: # %test +; LA64-NEXT: # %bb.1: # %test ; LA64-NEXT: ld.w $a0, $a1, 0 ; LA64-NEXT: .LBB5_2: # %end -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %val = load volatile i32, ptr %b %cc = icmp ult i32 %val, %a br i1 %cc, label %end, label %test @@ -178,22 +169,20 @@ define void @foo_br_uge(i32 %a, ptr %b) nounwind { ; LA32: # %bb.0: ; LA32-NEXT: ld.w $a2, $a1, 0 ; LA32-NEXT: bgeu $a2, $a0, .LBB6_2 -; LA32-NEXT: b .LBB6_1 -; LA32-NEXT: .LBB6_1: # %test +; LA32-NEXT: # %bb.1: # %test ; LA32-NEXT: ld.w $a0, $a1, 0 ; LA32-NEXT: .LBB6_2: # %end -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: foo_br_uge: ; LA64: # %bb.0: ; LA64-NEXT: ld.wu $a2, $a1, 0 ; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0 ; LA64-NEXT: bgeu $a2, $a0, .LBB6_2 -; LA64-NEXT: b .LBB6_1 -; LA64-NEXT: .LBB6_1: # %test +; LA64-NEXT: # %bb.1: # %test ; LA64-NEXT: ld.w $a0, $a1, 0 ; LA64-NEXT: .LBB6_2: # %end -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %val = load volatile i32, ptr %b %cc = icmp uge i32 %val, %a br i1 %cc, label %end, label %test @@ -211,22 +200,20 @@ define void @foo_br_sgt(i32 %a, ptr %b) nounwind { ; LA32: # %bb.0: ; LA32-NEXT: ld.w $a2, $a1, 0 ; LA32-NEXT: blt $a0, $a2, .LBB7_2 -; LA32-NEXT: b .LBB7_1 -; LA32-NEXT: .LBB7_1: # %test +; LA32-NEXT: # %bb.1: # %test ; LA32-NEXT: ld.w $a0, $a1, 0 ; LA32-NEXT: .LBB7_2: # %end -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: foo_br_sgt: ; LA64: # %bb.0: ; LA64-NEXT: ld.w $a2, $a1, 0 ; LA64-NEXT: addi.w $a0, $a0, 0 ; LA64-NEXT: blt $a0, $a2, .LBB7_2 -; LA64-NEXT: b .LBB7_1 -; LA64-NEXT: .LBB7_1: # %test +; LA64-NEXT: # %bb.1: # %test ; LA64-NEXT: ld.w $a0, $a1, 0 ; LA64-NEXT: .LBB7_2: # %end -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %val = load volatile i32, ptr %b %cc = icmp sgt i32 %val, %a br i1 %cc, label %end, label %test @@ -243,22 +230,20 @@ define void @foo_br_sle(i32 %a, ptr %b) nounwind { ; LA32: # %bb.0: ; LA32-NEXT: ld.w $a2, $a1, 0 ; LA32-NEXT: bge $a0, $a2, .LBB8_2 -; LA32-NEXT: b .LBB8_1 -; LA32-NEXT: .LBB8_1: # %test +; LA32-NEXT: # %bb.1: # %test ; LA32-NEXT: ld.w $a0, $a1, 0 ; LA32-NEXT: .LBB8_2: # %end -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: foo_br_sle: ; LA64: # %bb.0: ; LA64-NEXT: ld.w $a2, $a1, 0 ; LA64-NEXT: addi.w $a0, $a0, 0 ; LA64-NEXT: bge $a0, $a2, .LBB8_2 -; LA64-NEXT: b .LBB8_1 -; LA64-NEXT: .LBB8_1: # %test +; LA64-NEXT: # %bb.1: # %test ; LA64-NEXT: ld.w $a0, $a1, 0 ; LA64-NEXT: .LBB8_2: # %end -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %val = load volatile i32, ptr %b %cc = icmp sle i32 %val, %a br i1 %cc, label %end, label %test @@ -275,22 +260,20 @@ define void @foo_br_ugt(i32 %a, ptr %b) nounwind { ; LA32: # %bb.0: ; LA32-NEXT: ld.w $a2, $a1, 0 ; LA32-NEXT: bltu $a0, $a2, .LBB9_2 -; LA32-NEXT: b .LBB9_1 -; LA32-NEXT: .LBB9_1: # %test +; LA32-NEXT: # %bb.1: # %test ; LA32-NEXT: ld.w $a0, $a1, 0 ; LA32-NEXT: .LBB9_2: # %end -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: foo_br_ugt: ; LA64: # %bb.0: ; LA64-NEXT: ld.wu $a2, $a1, 0 ; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0 ; LA64-NEXT: bltu $a0, $a2, .LBB9_2 -; LA64-NEXT: b .LBB9_1 -; LA64-NEXT: .LBB9_1: # %test +; LA64-NEXT: # %bb.1: # %test ; LA64-NEXT: ld.w $a0, $a1, 0 ; LA64-NEXT: .LBB9_2: # %end -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %val = load volatile i32, ptr %b %cc = icmp ugt i32 %val, %a br i1 %cc, label %end, label %test @@ -307,22 +290,20 @@ define void @foo_br_ule(i32 %a, ptr %b) nounwind { ; LA32: # %bb.0: ; LA32-NEXT: ld.w $a2, $a1, 0 ; LA32-NEXT: bgeu $a0, $a2, .LBB10_2 -; LA32-NEXT: b .LBB10_1 -; LA32-NEXT: .LBB10_1: # %test +; LA32-NEXT: # %bb.1: # %test ; LA32-NEXT: ld.w $a0, $a1, 0 ; LA32-NEXT: .LBB10_2: # %end -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: foo_br_ule: ; LA64: # %bb.0: ; LA64-NEXT: ld.wu $a2, $a1, 0 ; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0 ; LA64-NEXT: bgeu $a0, $a2, .LBB10_2 -; LA64-NEXT: b .LBB10_1 -; LA64-NEXT: .LBB10_1: # %test +; LA64-NEXT: # %bb.1: # %test ; LA64-NEXT: ld.w $a0, $a1, 0 ; LA64-NEXT: .LBB10_2: # %end -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %val = load volatile i32, ptr %b %cc = icmp ule i32 %val, %a br i1 %cc, label %end, label %test @@ -342,11 +323,10 @@ define void @foo_br_cc(ptr %a, i1 %cc) nounwind { ; ALL-NEXT: ld.w $a2, $a0, 0 ; ALL-NEXT: andi $a1, $a1, 1 ; ALL-NEXT: bnez $a1, .LBB11_2 -; ALL-NEXT: b .LBB11_1 -; ALL-NEXT: .LBB11_1: # %test +; ALL-NEXT: # %bb.1: # %test ; ALL-NEXT: ld.w $a0, $a0, 0 ; ALL-NEXT: .LBB11_2: # %end -; ALL-NEXT: jirl $zero, $ra, 0 +; ALL-NEXT: ret %val = load volatile i32, ptr %a br i1 %cc, label %end, label %test test: diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/call.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/call.ll index 596ea22e5854e9c72e36d8a06f1b43eaad12fdef..90ee9490de74e89647cf9e0fad86ecd687e27d70 100644 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/call.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/call.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc --mtriple=loongarch32 < %s | FileCheck --check-prefix=LA32 %s ; RUN: llc --mtriple=loongarch64 < %s | FileCheck --check-prefix=LA64 %s @@ -8,19 +9,19 @@ define i32 @test_call_external(i32 %a) nounwind { ; LA32: # %bb.0: ; LA32-NEXT: addi.w $sp, $sp, -16 ; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32-NEXT: bl external_function +; LA32-NEXT: bl %plt(external_function) ; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload ; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: test_call_external: ; LA64: # %bb.0: ; LA64-NEXT: addi.d $sp, $sp, -16 ; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -; LA64-NEXT: bl external_function +; LA64-NEXT: bl %plt(external_function) ; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload ; LA64-NEXT: addi.d $sp, $sp, 16 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %1 = call i32 @external_function(i32 %a) ret i32 %1 } @@ -29,12 +30,12 @@ define i32 @defined_function(i32 %a) nounwind { ; LA32-LABEL: defined_function: ; LA32: # %bb.0: ; LA32-NEXT: addi.w $a0, $a0, 1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: defined_function: ; LA64: # %bb.0: ; LA64-NEXT: addi.d $a0, $a0, 1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %1 = add i32 %a, 1 ret i32 %1 } @@ -44,19 +45,19 @@ define i32 @test_call_defined(i32 %a) nounwind { ; LA32: # %bb.0: ; LA32-NEXT: addi.w $sp, $sp, -16 ; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32-NEXT: bl defined_function +; LA32-NEXT: bl %plt(defined_function) ; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload ; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: test_call_defined: ; LA64: # %bb.0: ; LA64-NEXT: addi.d $sp, $sp, -16 ; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -; LA64-NEXT: bl defined_function +; LA64-NEXT: bl %plt(defined_function) ; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload ; LA64-NEXT: addi.d $sp, $sp, 16 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %1 = call i32 @defined_function(i32 %a) nounwind ret i32 %1 } @@ -71,7 +72,7 @@ define i32 @test_call_indirect(ptr %a, i32 %b) nounwind { ; LA32-NEXT: jirl $ra, $a2, 0 ; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload ; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: test_call_indirect: ; LA64: # %bb.0: @@ -82,7 +83,7 @@ define i32 @test_call_indirect(ptr %a, i32 %b) nounwind { ; LA64-NEXT: jirl $ra, $a2, 0 ; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload ; LA64-NEXT: addi.d $sp, $sp, 16 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %1 = call i32 %a(i32 %b) ret i32 %1 } diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/double-convert.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/double-convert.ll index 33f6dbee748effcce10eb2d0e7742b9a238d586b..03a126a736efc302dc7965eaaad569677357810f 100644 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/double-convert.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/double-convert.ll @@ -6,12 +6,12 @@ define float @convert_double_to_float(double %a) nounwind { ; LA32-LABEL: convert_double_to_float: ; LA32: # %bb.0: ; LA32-NEXT: fcvt.s.d $fa0, $fa0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: convert_double_to_float: ; LA64: # %bb.0: ; LA64-NEXT: fcvt.s.d $fa0, $fa0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %1 = fptrunc double %a to float ret float %1 } @@ -20,12 +20,12 @@ define double @convert_float_to_double(float %a) nounwind { ; LA32-LABEL: convert_float_to_double: ; LA32: # %bb.0: ; LA32-NEXT: fcvt.d.s $fa0, $fa0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: convert_float_to_double: ; LA64: # %bb.0: ; LA64-NEXT: fcvt.d.s $fa0, $fa0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %1 = fpext float %a to double ret double %1 } @@ -35,13 +35,13 @@ define double @convert_i8_to_double(i8 signext %a) nounwind { ; LA32: # %bb.0: ; LA32-NEXT: movgr2fr.w $fa0, $a0 ; LA32-NEXT: ffint.d.w $fa0, $fa0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: convert_i8_to_double: ; LA64: # %bb.0: ; LA64-NEXT: movgr2fr.w $fa0, $a0 ; LA64-NEXT: ffint.d.w $fa0, $fa0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %1 = sitofp i8 %a to double ret double %1 } @@ -51,13 +51,13 @@ define double @convert_i16_to_double(i16 signext %a) nounwind { ; LA32: # %bb.0: ; LA32-NEXT: movgr2fr.w $fa0, $a0 ; LA32-NEXT: ffint.d.w $fa0, $fa0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: convert_i16_to_double: ; LA64: # %bb.0: ; LA64-NEXT: movgr2fr.w $fa0, $a0 ; LA64-NEXT: ffint.d.w $fa0, $fa0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %1 = sitofp i16 %a to double ret double %1 } @@ -67,13 +67,13 @@ define double @convert_i32_to_double(i32 %a) nounwind { ; LA32: # %bb.0: ; LA32-NEXT: movgr2fr.w $fa0, $a0 ; LA32-NEXT: ffint.d.w $fa0, $fa0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: convert_i32_to_double: ; LA64: # %bb.0: ; LA64-NEXT: movgr2fr.w $fa0, $a0 ; LA64-NEXT: ffint.d.w $fa0, $fa0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %1 = sitofp i32 %a to double ret double %1 } @@ -83,16 +83,16 @@ define double @convert_i64_to_double(i64 %a) nounwind { ; LA32: # %bb.0: ; LA32-NEXT: addi.w $sp, $sp, -16 ; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32-NEXT: bl __floatdidf +; LA32-NEXT: bl %plt(__floatdidf) ; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload ; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: convert_i64_to_double: ; LA64: # %bb.0: ; LA64-NEXT: movgr2fr.d $fa0, $a0 ; LA64-NEXT: ffint.d.l $fa0, $fa0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %1 = sitofp i64 %a to double ret double %1 } @@ -102,13 +102,13 @@ define i32 @convert_double_to_i32(double %a) nounwind { ; LA32: # %bb.0: ; LA32-NEXT: ftintrz.w.d $fa0, $fa0 ; LA32-NEXT: movfr2gr.s $a0, $fa0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: convert_double_to_i32: ; LA64: # %bb.0: ; LA64-NEXT: ftintrz.w.d $fa0, $fa0 ; LA64-NEXT: movfr2gr.s $a0, $fa0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %1 = fptosi double %a to i32 ret i32 %1 } @@ -116,8 +116,8 @@ define i32 @convert_double_to_i32(double %a) nounwind { define i32 @convert_double_to_u32(double %a) nounwind { ; LA32-LABEL: convert_double_to_u32: ; LA32: # %bb.0: -; LA32-NEXT: pcalau12i $a0, .LCPI7_0 -; LA32-NEXT: addi.w $a0, $a0, .LCPI7_0 +; LA32-NEXT: pcalau12i $a0, %pc_hi20(.LCPI7_0) +; LA32-NEXT: addi.w $a0, $a0, %pc_lo12(.LCPI7_0) ; LA32-NEXT: fld.d $fa1, $a0, 0 ; LA32-NEXT: fsub.d $fa2, $fa0, $fa1 ; LA32-NEXT: ftintrz.w.d $fa2, $fa2 @@ -131,13 +131,13 @@ define i32 @convert_double_to_u32(double %a) nounwind { ; LA32-NEXT: movfr2gr.s $a2, $fa0 ; LA32-NEXT: maskeqz $a1, $a2, $a1 ; LA32-NEXT: or $a0, $a1, $a0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: convert_double_to_u32: ; LA64: # %bb.0: ; LA64-NEXT: ftintrz.l.d $fa0, $fa0 ; LA64-NEXT: movfr2gr.d $a0, $fa0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %1 = fptoui double %a to i32 ret i32 %1 } @@ -147,16 +147,16 @@ define i64 @convert_double_to_i64(double %a) nounwind { ; LA32: # %bb.0: ; LA32-NEXT: addi.w $sp, $sp, -16 ; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32-NEXT: bl __fixdfdi +; LA32-NEXT: bl %plt(__fixdfdi) ; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload ; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: convert_double_to_i64: ; LA64: # %bb.0: ; LA64-NEXT: ftintrz.l.d $fa0, $fa0 ; LA64-NEXT: movfr2gr.d $a0, $fa0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %1 = fptosi double %a to i64 ret i64 %1 } @@ -166,15 +166,15 @@ define i64 @convert_double_to_u64(double %a) nounwind { ; LA32: # %bb.0: ; LA32-NEXT: addi.w $sp, $sp, -16 ; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32-NEXT: bl __fixunsdfdi +; LA32-NEXT: bl %plt(__fixunsdfdi) ; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload ; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: convert_double_to_u64: ; LA64: # %bb.0: -; LA64-NEXT: pcalau12i $a0, .LCPI9_0 -; LA64-NEXT: addi.d $a0, $a0, .LCPI9_0 +; LA64-NEXT: pcalau12i $a0, %pc_hi20(.LCPI9_0) +; LA64-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI9_0) ; LA64-NEXT: fld.d $fa1, $a0, 0 ; LA64-NEXT: fsub.d $fa2, $fa0, $fa1 ; LA64-NEXT: ftintrz.l.d $fa2, $fa2 @@ -188,7 +188,7 @@ define i64 @convert_double_to_u64(double %a) nounwind { ; LA64-NEXT: movfr2gr.d $a2, $fa0 ; LA64-NEXT: maskeqz $a1, $a2, $a1 ; LA64-NEXT: or $a0, $a1, $a0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %1 = fptoui double %a to i64 ret i64 %1 } @@ -198,13 +198,13 @@ define double @convert_u8_to_double(i8 zeroext %a) nounwind { ; LA32: # %bb.0: ; LA32-NEXT: movgr2fr.w $fa0, $a0 ; LA32-NEXT: ffint.d.w $fa0, $fa0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: convert_u8_to_double: ; LA64: # %bb.0: ; LA64-NEXT: movgr2fr.w $fa0, $a0 ; LA64-NEXT: ffint.d.w $fa0, $fa0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %1 = uitofp i8 %a to double ret double %1 } @@ -214,13 +214,13 @@ define double @convert_u16_to_double(i16 zeroext %a) nounwind { ; LA32: # %bb.0: ; LA32-NEXT: movgr2fr.w $fa0, $a0 ; LA32-NEXT: ffint.d.w $fa0, $fa0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: convert_u16_to_double: ; LA64: # %bb.0: ; LA64-NEXT: movgr2fr.w $fa0, $a0 ; LA64-NEXT: ffint.d.w $fa0, $fa0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %1 = uitofp i16 %a to double ret double %1 } @@ -229,32 +229,23 @@ define double @convert_u32_to_double(i32 %a) nounwind { ; LA32-LABEL: convert_u32_to_double: ; LA32: # %bb.0: ; LA32-NEXT: addi.w $sp, $sp, -16 -; LA32-NEXT: addi.w $a1, $sp, 8 -; LA32-NEXT: ori $a1, $a1, 4 -; LA32-NEXT: lu12i.w $a2, 275200 -; LA32-NEXT: st.w $a2, $a1, 0 +; LA32-NEXT: lu12i.w $a1, 275200 +; LA32-NEXT: st.w $a1, $sp, 12 ; LA32-NEXT: st.w $a0, $sp, 8 -; LA32-NEXT: pcalau12i $a0, .LCPI12_0 -; LA32-NEXT: addi.w $a0, $a0, .LCPI12_0 +; LA32-NEXT: pcalau12i $a0, %pc_hi20(.LCPI12_0) +; LA32-NEXT: addi.w $a0, $a0, %pc_lo12(.LCPI12_0) ; LA32-NEXT: fld.d $fa0, $a0, 0 ; LA32-NEXT: fld.d $fa1, $sp, 8 ; LA32-NEXT: fsub.d $fa0, $fa1, $fa0 ; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: convert_u32_to_double: ; LA64: # %bb.0: -; LA64-NEXT: lu52i.d $a1, $zero, 1107 -; LA64-NEXT: movgr2fr.d $fa0, $a1 -; LA64-NEXT: pcalau12i $a1, .LCPI12_0 -; LA64-NEXT: addi.d $a1, $a1, .LCPI12_0 -; LA64-NEXT: fld.d $fa1, $a1, 0 -; LA64-NEXT: fsub.d $fa0, $fa0, $fa1 -; LA64-NEXT: lu12i.w $a1, 275200 -; LA64-NEXT: bstrins.d $a0, $a1, 63, 32 -; LA64-NEXT: movgr2fr.d $fa1, $a0 -; LA64-NEXT: fadd.d $fa0, $fa1, $fa0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0 +; LA64-NEXT: movgr2fr.d $fa0, $a0 +; LA64-NEXT: ffint.d.l $fa0, $fa0 +; LA64-NEXT: ret %1 = uitofp i32 %a to double ret double %1 } @@ -264,10 +255,10 @@ define double @convert_u64_to_double(i64 %a) nounwind { ; LA32: # %bb.0: ; LA32-NEXT: addi.w $sp, $sp, -16 ; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32-NEXT: bl __floatundidf +; LA32-NEXT: bl %plt(__floatundidf) ; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload ; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: convert_u64_to_double: ; LA64: # %bb.0: @@ -275,15 +266,15 @@ define double @convert_u64_to_double(i64 %a) nounwind { ; LA64-NEXT: lu52i.d $a2, $zero, 1107 ; LA64-NEXT: or $a1, $a1, $a2 ; LA64-NEXT: movgr2fr.d $fa0, $a1 -; LA64-NEXT: pcalau12i $a1, .LCPI13_0 -; LA64-NEXT: addi.d $a1, $a1, .LCPI13_0 +; LA64-NEXT: pcalau12i $a1, %pc_hi20(.LCPI13_0) +; LA64-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI13_0) ; LA64-NEXT: fld.d $fa1, $a1, 0 ; LA64-NEXT: fsub.d $fa0, $fa0, $fa1 ; LA64-NEXT: lu12i.w $a1, 275200 ; LA64-NEXT: bstrins.d $a0, $a1, 63, 32 ; LA64-NEXT: movgr2fr.d $fa1, $a0 ; LA64-NEXT: fadd.d $fa0, $fa1, $fa0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %1 = uitofp i64 %a to double ret double %1 } @@ -292,18 +283,16 @@ define double @bitcast_i64_to_double(i64 %a, i64 %b) nounwind { ; LA32-LABEL: bitcast_i64_to_double: ; LA32: # %bb.0: ; LA32-NEXT: addi.w $sp, $sp, -16 -; LA32-NEXT: addi.w $a2, $sp, 8 -; LA32-NEXT: ori $a2, $a2, 4 -; LA32-NEXT: st.w $a1, $a2, 0 +; LA32-NEXT: st.w $a1, $sp, 12 ; LA32-NEXT: st.w $a0, $sp, 8 ; LA32-NEXT: fld.d $fa0, $sp, 8 ; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: bitcast_i64_to_double: ; LA64: # %bb.0: ; LA64-NEXT: movgr2fr.d $fa0, $a0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %1 = bitcast i64 %a to double ret double %1 } @@ -313,17 +302,15 @@ define i64 @bitcast_double_to_i64(double %a) nounwind { ; LA32: # %bb.0: ; LA32-NEXT: addi.w $sp, $sp, -16 ; LA32-NEXT: fst.d $fa0, $sp, 8 -; LA32-NEXT: addi.w $a0, $sp, 8 -; LA32-NEXT: ori $a0, $a0, 4 -; LA32-NEXT: ld.w $a1, $a0, 0 ; LA32-NEXT: ld.w $a0, $sp, 8 +; LA32-NEXT: ld.w $a1, $sp, 12 ; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: bitcast_double_to_i64: ; LA64: # %bb.0: ; LA64-NEXT: movfr2gr.d $a0, $fa0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %1 = bitcast double %a to i64 ret i64 %1 } diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/fadd.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/fadd.ll index 15e1118d2e56051731d62d6b82ae893893aee35a..0c509297e219544e4169df74305ebf66cecac368 100644 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/fadd.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/fadd.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc --mtriple=loongarch32 --mattr=+d < %s | FileCheck %s --check-prefix=LA32 ; RUN: llc --mtriple=loongarch64 --mattr=+d < %s | FileCheck %s --check-prefix=LA64 @@ -7,12 +8,12 @@ define float @fadd_s(float %x, float %y) { ; LA32-LABEL: fadd_s: ; LA32: # %bb.0: ; LA32-NEXT: fadd.s $fa0, $fa0, $fa1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: fadd_s: ; LA64: # %bb.0: ; LA64-NEXT: fadd.s $fa0, $fa0, $fa1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %add = fadd float %x, %y ret float %add } @@ -21,12 +22,12 @@ define double @fadd_d(double %x, double %y) { ; LA32-LABEL: fadd_d: ; LA32: # %bb.0: ; LA32-NEXT: fadd.d $fa0, $fa0, $fa1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: fadd_d: ; LA64: # %bb.0: ; LA64-NEXT: fadd.d $fa0, $fa0, $fa1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %add = fadd double %x, %y ret double %add } diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/fcmp-dbl.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/fcmp-dbl.ll index bb35405abc0182e1bb2b1a888db7acfe2350d60c..8058f7b0810ce38ea53ae70bf50bbdba76f43edc 100644 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/fcmp-dbl.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/fcmp-dbl.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc --mtriple=loongarch32 --mattr=+d < %s | FileCheck %s --check-prefix=LA32 ; RUN: llc --mtriple=loongarch64 --mattr=+d < %s | FileCheck %s --check-prefix=LA64 @@ -8,12 +9,12 @@ define i1 @fcmp_false(double %a, double %b) { ; LA32-LABEL: fcmp_false: ; LA32: # %bb.0: ; LA32-NEXT: move $a0, $zero -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: fcmp_false: ; LA64: # %bb.0: ; LA64-NEXT: move $a0, $zero -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cmp = fcmp false double %a, %b ret i1 %cmp } @@ -23,13 +24,13 @@ define i1 @fcmp_oeq(double %a, double %b) { ; LA32: # %bb.0: ; LA32-NEXT: fcmp.ceq.d $fcc0, $fa0, $fa1 ; LA32-NEXT: movcf2gr $a0, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: fcmp_oeq: ; LA64: # %bb.0: ; LA64-NEXT: fcmp.ceq.d $fcc0, $fa0, $fa1 ; LA64-NEXT: movcf2gr $a0, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cmp = fcmp oeq double %a, %b ret i1 %cmp } @@ -39,13 +40,13 @@ define i1 @fcmp_ogt(double %a, double %b) { ; LA32: # %bb.0: ; LA32-NEXT: fcmp.clt.d $fcc0, $fa1, $fa0 ; LA32-NEXT: movcf2gr $a0, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: fcmp_ogt: ; LA64: # %bb.0: ; LA64-NEXT: fcmp.clt.d $fcc0, $fa1, $fa0 ; LA64-NEXT: movcf2gr $a0, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cmp = fcmp ogt double %a, %b ret i1 %cmp } @@ -55,13 +56,13 @@ define i1 @fcmp_oge(double %a, double %b) { ; LA32: # %bb.0: ; LA32-NEXT: fcmp.cle.d $fcc0, $fa1, $fa0 ; LA32-NEXT: movcf2gr $a0, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: fcmp_oge: ; LA64: # %bb.0: ; LA64-NEXT: fcmp.cle.d $fcc0, $fa1, $fa0 ; LA64-NEXT: movcf2gr $a0, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cmp = fcmp oge double %a, %b ret i1 %cmp } @@ -71,13 +72,13 @@ define i1 @fcmp_olt(double %a, double %b) { ; LA32: # %bb.0: ; LA32-NEXT: fcmp.clt.d $fcc0, $fa0, $fa1 ; LA32-NEXT: movcf2gr $a0, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: fcmp_olt: ; LA64: # %bb.0: ; LA64-NEXT: fcmp.clt.d $fcc0, $fa0, $fa1 ; LA64-NEXT: movcf2gr $a0, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cmp = fcmp olt double %a, %b ret i1 %cmp } @@ -87,13 +88,13 @@ define i1 @fcmp_ole(double %a, double %b) { ; LA32: # %bb.0: ; LA32-NEXT: fcmp.cle.d $fcc0, $fa0, $fa1 ; LA32-NEXT: movcf2gr $a0, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: fcmp_ole: ; LA64: # %bb.0: ; LA64-NEXT: fcmp.cle.d $fcc0, $fa0, $fa1 ; LA64-NEXT: movcf2gr $a0, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cmp = fcmp ole double %a, %b ret i1 %cmp } @@ -103,13 +104,13 @@ define i1 @fcmp_one(double %a, double %b) { ; LA32: # %bb.0: ; LA32-NEXT: fcmp.cne.d $fcc0, $fa0, $fa1 ; LA32-NEXT: movcf2gr $a0, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: fcmp_one: ; LA64: # %bb.0: ; LA64-NEXT: fcmp.cne.d $fcc0, $fa0, $fa1 ; LA64-NEXT: movcf2gr $a0, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cmp = fcmp one double %a, %b ret i1 %cmp } @@ -119,13 +120,13 @@ define i1 @fcmp_ord(double %a, double %b) { ; LA32: # %bb.0: ; LA32-NEXT: fcmp.cor.d $fcc0, $fa0, $fa1 ; LA32-NEXT: movcf2gr $a0, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: fcmp_ord: ; LA64: # %bb.0: ; LA64-NEXT: fcmp.cor.d $fcc0, $fa0, $fa1 ; LA64-NEXT: movcf2gr $a0, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cmp = fcmp ord double %a, %b ret i1 %cmp } @@ -135,13 +136,13 @@ define i1 @fcmp_ueq(double %a, double %b) { ; LA32: # %bb.0: ; LA32-NEXT: fcmp.cueq.d $fcc0, $fa0, $fa1 ; LA32-NEXT: movcf2gr $a0, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: fcmp_ueq: ; LA64: # %bb.0: ; LA64-NEXT: fcmp.cueq.d $fcc0, $fa0, $fa1 ; LA64-NEXT: movcf2gr $a0, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cmp = fcmp ueq double %a, %b ret i1 %cmp } @@ -151,13 +152,13 @@ define i1 @fcmp_ugt(double %a, double %b) { ; LA32: # %bb.0: ; LA32-NEXT: fcmp.cult.d $fcc0, $fa1, $fa0 ; LA32-NEXT: movcf2gr $a0, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: fcmp_ugt: ; LA64: # %bb.0: ; LA64-NEXT: fcmp.cult.d $fcc0, $fa1, $fa0 ; LA64-NEXT: movcf2gr $a0, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cmp = fcmp ugt double %a, %b ret i1 %cmp } @@ -167,13 +168,13 @@ define i1 @fcmp_uge(double %a, double %b) { ; LA32: # %bb.0: ; LA32-NEXT: fcmp.cule.d $fcc0, $fa1, $fa0 ; LA32-NEXT: movcf2gr $a0, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: fcmp_uge: ; LA64: # %bb.0: ; LA64-NEXT: fcmp.cule.d $fcc0, $fa1, $fa0 ; LA64-NEXT: movcf2gr $a0, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cmp = fcmp uge double %a, %b ret i1 %cmp } @@ -183,13 +184,13 @@ define i1 @fcmp_ult(double %a, double %b) { ; LA32: # %bb.0: ; LA32-NEXT: fcmp.cult.d $fcc0, $fa0, $fa1 ; LA32-NEXT: movcf2gr $a0, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: fcmp_ult: ; LA64: # %bb.0: ; LA64-NEXT: fcmp.cult.d $fcc0, $fa0, $fa1 ; LA64-NEXT: movcf2gr $a0, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cmp = fcmp ult double %a, %b ret i1 %cmp } @@ -199,13 +200,13 @@ define i1 @fcmp_ule(double %a, double %b) { ; LA32: # %bb.0: ; LA32-NEXT: fcmp.cule.d $fcc0, $fa0, $fa1 ; LA32-NEXT: movcf2gr $a0, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: fcmp_ule: ; LA64: # %bb.0: ; LA64-NEXT: fcmp.cule.d $fcc0, $fa0, $fa1 ; LA64-NEXT: movcf2gr $a0, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cmp = fcmp ule double %a, %b ret i1 %cmp } @@ -215,13 +216,13 @@ define i1 @fcmp_une(double %a, double %b) { ; LA32: # %bb.0: ; LA32-NEXT: fcmp.cune.d $fcc0, $fa0, $fa1 ; LA32-NEXT: movcf2gr $a0, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: fcmp_une: ; LA64: # %bb.0: ; LA64-NEXT: fcmp.cune.d $fcc0, $fa0, $fa1 ; LA64-NEXT: movcf2gr $a0, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cmp = fcmp une double %a, %b ret i1 %cmp } @@ -231,13 +232,13 @@ define i1 @fcmp_uno(double %a, double %b) { ; LA32: # %bb.0: ; LA32-NEXT: fcmp.cun.d $fcc0, $fa0, $fa1 ; LA32-NEXT: movcf2gr $a0, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: fcmp_uno: ; LA64: # %bb.0: ; LA64-NEXT: fcmp.cun.d $fcc0, $fa0, $fa1 ; LA64-NEXT: movcf2gr $a0, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cmp = fcmp uno double %a, %b ret i1 %cmp } @@ -246,12 +247,128 @@ define i1 @fcmp_true(double %a, double %b) { ; LA32-LABEL: fcmp_true: ; LA32: # %bb.0: ; LA32-NEXT: ori $a0, $zero, 1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: fcmp_true: ; LA64: # %bb.0: ; LA64-NEXT: ori $a0, $zero, 1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cmp = fcmp true double %a, %b ret i1 %cmp } + +define i1 @fcmp_fast_olt(double %a, double %b, i1 %c) nounwind { +; LA32-LABEL: fcmp_fast_olt: +; LA32: # %bb.0: +; LA32-NEXT: movgr2fr.w $fa1, $zero +; LA32-NEXT: movgr2frh.w $fa1, $zero +; LA32-NEXT: fcmp.cle.d $fcc0, $fa1, $fa0 +; LA32-NEXT: movcf2gr $a1, $fcc0 +; LA32-NEXT: bnez $a1, .LBB16_2 +; LA32-NEXT: # %bb.1: # %if.then +; LA32-NEXT: ret +; LA32-NEXT: .LBB16_2: # %if.else +; LA32-NEXT: movgr2fr.w $fa1, $zero +; LA32-NEXT: movgr2frh.w $fa1, $zero +; LA32-NEXT: fcmp.clt.d $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_fast_olt: +; LA64: # %bb.0: +; LA64-NEXT: movgr2fr.d $fa1, $zero +; LA64-NEXT: fcmp.cle.d $fcc0, $fa1, $fa0 +; LA64-NEXT: movcf2gr $a1, $fcc0 +; LA64-NEXT: bnez $a1, .LBB16_2 +; LA64-NEXT: # %bb.1: # %if.then +; LA64-NEXT: ret +; LA64-NEXT: .LBB16_2: # %if.else +; LA64-NEXT: fcmp.clt.d $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %cmp = fcmp fast olt double %a, 0.000000e+00 + br i1 %cmp, label %if.then, label %if.else + +if.then: + ret i1 %c + +if.else: + ret i1 %cmp +} + +define i1 @fcmp_fast_oeq(double %a, double %b, i1 %c) nounwind { +; LA32-LABEL: fcmp_fast_oeq: +; LA32: # %bb.0: +; LA32-NEXT: movgr2fr.w $fa1, $zero +; LA32-NEXT: movgr2frh.w $fa1, $zero +; LA32-NEXT: fcmp.ceq.d $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a1, $fcc0 +; LA32-NEXT: xori $a1, $a1, 1 +; LA32-NEXT: bnez $a1, .LBB17_2 +; LA32-NEXT: # %bb.1: # %if.then +; LA32-NEXT: ret +; LA32-NEXT: .LBB17_2: # %if.else +; LA32-NEXT: movgr2fr.w $fa1, $zero +; LA32-NEXT: movgr2frh.w $fa1, $zero +; LA32-NEXT: fcmp.ceq.d $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_fast_oeq: +; LA64: # %bb.0: +; LA64-NEXT: movgr2fr.d $fa1, $zero +; LA64-NEXT: fcmp.ceq.d $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a1, $fcc0 +; LA64-NEXT: xori $a1, $a1, 1 +; LA64-NEXT: bnez $a1, .LBB17_2 +; LA64-NEXT: # %bb.1: # %if.then +; LA64-NEXT: ret +; LA64-NEXT: .LBB17_2: # %if.else +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %cmp = fcmp fast oeq double %a, 0.000000e+00 + br i1 %cmp, label %if.then, label %if.else + +if.then: + ret i1 %c + +if.else: + ret i1 %cmp +} + +define i1 @fcmp_fast_ole(double %a, double %b, i1 %c) nounwind { +; LA32-LABEL: fcmp_fast_ole: +; LA32: # %bb.0: +; LA32-NEXT: movgr2fr.w $fa1, $zero +; LA32-NEXT: movgr2frh.w $fa1, $zero +; LA32-NEXT: fcmp.clt.d $fcc0, $fa1, $fa0 +; LA32-NEXT: bcnez $fcc0, .LBB18_2 +; LA32-NEXT: # %bb.1: # %if.then +; LA32-NEXT: ret +; LA32-NEXT: .LBB18_2: # %if.else +; LA32-NEXT: movgr2fr.w $fa1, $zero +; LA32-NEXT: movgr2frh.w $fa1, $zero +; LA32-NEXT: fcmp.cle.d $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_fast_ole: +; LA64: # %bb.0: +; LA64-NEXT: movgr2fr.d $fa1, $zero +; LA64-NEXT: fcmp.clt.d $fcc0, $fa1, $fa0 +; LA64-NEXT: bcnez $fcc0, .LBB18_2 +; LA64-NEXT: # %bb.1: # %if.then +; LA64-NEXT: ret +; LA64-NEXT: .LBB18_2: # %if.else +; LA64-NEXT: fcmp.cle.d $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %cmp = fcmp fast ole double %a, 0.000000e+00 + br i1 %cmp, label %if.then, label %if.else + +if.then: + ret i1 %c + +if.else: + ret i1 %cmp +} diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/fcmp-flt.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/fcmp-flt.ll index 33bdd0b50bd49cc064483bb14fac8b486cb904dc..d0f8d5342280df8215c15c9318a68c316f1e1fd0 100644 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/fcmp-flt.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/fcmp-flt.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc --mtriple=loongarch32 --mattr=+f,-d < %s | FileCheck %s --check-prefix=LA32 ; RUN: llc --mtriple=loongarch64 --mattr=+f,-d < %s | FileCheck %s --check-prefix=LA64 @@ -8,12 +9,12 @@ define i1 @fcmp_false(float %a, float %b) { ; LA32-LABEL: fcmp_false: ; LA32: # %bb.0: ; LA32-NEXT: move $a0, $zero -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: fcmp_false: ; LA64: # %bb.0: ; LA64-NEXT: move $a0, $zero -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cmp = fcmp false float %a, %b ret i1 %cmp } @@ -23,13 +24,13 @@ define i1 @fcmp_oeq(float %a, float %b) { ; LA32: # %bb.0: ; LA32-NEXT: fcmp.ceq.s $fcc0, $fa0, $fa1 ; LA32-NEXT: movcf2gr $a0, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: fcmp_oeq: ; LA64: # %bb.0: ; LA64-NEXT: fcmp.ceq.s $fcc0, $fa0, $fa1 ; LA64-NEXT: movcf2gr $a0, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cmp = fcmp oeq float %a, %b ret i1 %cmp } @@ -39,13 +40,13 @@ define i1 @fcmp_ogt(float %a, float %b) { ; LA32: # %bb.0: ; LA32-NEXT: fcmp.clt.s $fcc0, $fa1, $fa0 ; LA32-NEXT: movcf2gr $a0, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: fcmp_ogt: ; LA64: # %bb.0: ; LA64-NEXT: fcmp.clt.s $fcc0, $fa1, $fa0 ; LA64-NEXT: movcf2gr $a0, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cmp = fcmp ogt float %a, %b ret i1 %cmp } @@ -55,13 +56,13 @@ define i1 @fcmp_oge(float %a, float %b) { ; LA32: # %bb.0: ; LA32-NEXT: fcmp.cle.s $fcc0, $fa1, $fa0 ; LA32-NEXT: movcf2gr $a0, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: fcmp_oge: ; LA64: # %bb.0: ; LA64-NEXT: fcmp.cle.s $fcc0, $fa1, $fa0 ; LA64-NEXT: movcf2gr $a0, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cmp = fcmp oge float %a, %b ret i1 %cmp } @@ -71,13 +72,13 @@ define i1 @fcmp_olt(float %a, float %b) { ; LA32: # %bb.0: ; LA32-NEXT: fcmp.clt.s $fcc0, $fa0, $fa1 ; LA32-NEXT: movcf2gr $a0, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: fcmp_olt: ; LA64: # %bb.0: ; LA64-NEXT: fcmp.clt.s $fcc0, $fa0, $fa1 ; LA64-NEXT: movcf2gr $a0, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cmp = fcmp olt float %a, %b ret i1 %cmp } @@ -87,13 +88,13 @@ define i1 @fcmp_ole(float %a, float %b) { ; LA32: # %bb.0: ; LA32-NEXT: fcmp.cle.s $fcc0, $fa0, $fa1 ; LA32-NEXT: movcf2gr $a0, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: fcmp_ole: ; LA64: # %bb.0: ; LA64-NEXT: fcmp.cle.s $fcc0, $fa0, $fa1 ; LA64-NEXT: movcf2gr $a0, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cmp = fcmp ole float %a, %b ret i1 %cmp } @@ -103,13 +104,13 @@ define i1 @fcmp_one(float %a, float %b) { ; LA32: # %bb.0: ; LA32-NEXT: fcmp.cne.s $fcc0, $fa0, $fa1 ; LA32-NEXT: movcf2gr $a0, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: fcmp_one: ; LA64: # %bb.0: ; LA64-NEXT: fcmp.cne.s $fcc0, $fa0, $fa1 ; LA64-NEXT: movcf2gr $a0, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cmp = fcmp one float %a, %b ret i1 %cmp } @@ -119,13 +120,13 @@ define i1 @fcmp_ord(float %a, float %b) { ; LA32: # %bb.0: ; LA32-NEXT: fcmp.cor.s $fcc0, $fa0, $fa1 ; LA32-NEXT: movcf2gr $a0, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: fcmp_ord: ; LA64: # %bb.0: ; LA64-NEXT: fcmp.cor.s $fcc0, $fa0, $fa1 ; LA64-NEXT: movcf2gr $a0, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cmp = fcmp ord float %a, %b ret i1 %cmp } @@ -135,13 +136,13 @@ define i1 @fcmp_ueq(float %a, float %b) { ; LA32: # %bb.0: ; LA32-NEXT: fcmp.cueq.s $fcc0, $fa0, $fa1 ; LA32-NEXT: movcf2gr $a0, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: fcmp_ueq: ; LA64: # %bb.0: ; LA64-NEXT: fcmp.cueq.s $fcc0, $fa0, $fa1 ; LA64-NEXT: movcf2gr $a0, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cmp = fcmp ueq float %a, %b ret i1 %cmp } @@ -151,13 +152,13 @@ define i1 @fcmp_ugt(float %a, float %b) { ; LA32: # %bb.0: ; LA32-NEXT: fcmp.cult.s $fcc0, $fa1, $fa0 ; LA32-NEXT: movcf2gr $a0, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: fcmp_ugt: ; LA64: # %bb.0: ; LA64-NEXT: fcmp.cult.s $fcc0, $fa1, $fa0 ; LA64-NEXT: movcf2gr $a0, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cmp = fcmp ugt float %a, %b ret i1 %cmp } @@ -167,13 +168,13 @@ define i1 @fcmp_uge(float %a, float %b) { ; LA32: # %bb.0: ; LA32-NEXT: fcmp.cule.s $fcc0, $fa1, $fa0 ; LA32-NEXT: movcf2gr $a0, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: fcmp_uge: ; LA64: # %bb.0: ; LA64-NEXT: fcmp.cule.s $fcc0, $fa1, $fa0 ; LA64-NEXT: movcf2gr $a0, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cmp = fcmp uge float %a, %b ret i1 %cmp } @@ -183,13 +184,13 @@ define i1 @fcmp_ult(float %a, float %b) { ; LA32: # %bb.0: ; LA32-NEXT: fcmp.cult.s $fcc0, $fa0, $fa1 ; LA32-NEXT: movcf2gr $a0, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: fcmp_ult: ; LA64: # %bb.0: ; LA64-NEXT: fcmp.cult.s $fcc0, $fa0, $fa1 ; LA64-NEXT: movcf2gr $a0, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cmp = fcmp ult float %a, %b ret i1 %cmp } @@ -199,13 +200,13 @@ define i1 @fcmp_ule(float %a, float %b) { ; LA32: # %bb.0: ; LA32-NEXT: fcmp.cule.s $fcc0, $fa0, $fa1 ; LA32-NEXT: movcf2gr $a0, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: fcmp_ule: ; LA64: # %bb.0: ; LA64-NEXT: fcmp.cule.s $fcc0, $fa0, $fa1 ; LA64-NEXT: movcf2gr $a0, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cmp = fcmp ule float %a, %b ret i1 %cmp } @@ -215,13 +216,13 @@ define i1 @fcmp_une(float %a, float %b) { ; LA32: # %bb.0: ; LA32-NEXT: fcmp.cune.s $fcc0, $fa0, $fa1 ; LA32-NEXT: movcf2gr $a0, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: fcmp_une: ; LA64: # %bb.0: ; LA64-NEXT: fcmp.cune.s $fcc0, $fa0, $fa1 ; LA64-NEXT: movcf2gr $a0, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cmp = fcmp une float %a, %b ret i1 %cmp } @@ -231,13 +232,13 @@ define i1 @fcmp_uno(float %a, float %b) { ; LA32: # %bb.0: ; LA32-NEXT: fcmp.cun.s $fcc0, $fa0, $fa1 ; LA32-NEXT: movcf2gr $a0, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: fcmp_uno: ; LA64: # %bb.0: ; LA64-NEXT: fcmp.cun.s $fcc0, $fa0, $fa1 ; LA64-NEXT: movcf2gr $a0, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cmp = fcmp uno float %a, %b ret i1 %cmp } @@ -246,12 +247,118 @@ define i1 @fcmp_true(float %a, float %b) { ; LA32-LABEL: fcmp_true: ; LA32: # %bb.0: ; LA32-NEXT: ori $a0, $zero, 1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: fcmp_true: ; LA64: # %bb.0: ; LA64-NEXT: ori $a0, $zero, 1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cmp = fcmp true float %a, %b ret i1 %cmp } + +define i1 @fcmp_fast_olt(float %a, float %b, i1 %c) nounwind { +; LA32-LABEL: fcmp_fast_olt: +; LA32: # %bb.0: +; LA32-NEXT: movgr2fr.w $fa1, $zero +; LA32-NEXT: fcmp.cle.s $fcc0, $fa1, $fa0 +; LA32-NEXT: movcf2gr $a1, $fcc0 +; LA32-NEXT: bnez $a1, .LBB16_2 +; LA32-NEXT: # %bb.1: # %if.then +; LA32-NEXT: ret +; LA32-NEXT: .LBB16_2: # %if.else +; LA32-NEXT: fcmp.clt.s $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_fast_olt: +; LA64: # %bb.0: +; LA64-NEXT: movgr2fr.w $fa1, $zero +; LA64-NEXT: fcmp.cle.s $fcc0, $fa1, $fa0 +; LA64-NEXT: movcf2gr $a1, $fcc0 +; LA64-NEXT: bnez $a1, .LBB16_2 +; LA64-NEXT: # %bb.1: # %if.then +; LA64-NEXT: ret +; LA64-NEXT: .LBB16_2: # %if.else +; LA64-NEXT: fcmp.clt.s $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %cmp = fcmp fast olt float %a, 0.000000e+00 + br i1 %cmp, label %if.then, label %if.else + +if.then: + ret i1 %c + +if.else: + ret i1 %cmp +} + +define i1 @fcmp_fast_oeq(float %a, float %b, i1 %c) nounwind { +; LA32-LABEL: fcmp_fast_oeq: +; LA32: # %bb.0: +; LA32-NEXT: movgr2fr.w $fa1, $zero +; LA32-NEXT: fcmp.ceq.s $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a1, $fcc0 +; LA32-NEXT: xori $a1, $a1, 1 +; LA32-NEXT: bnez $a1, .LBB17_2 +; LA32-NEXT: # %bb.1: # %if.then +; LA32-NEXT: ret +; LA32-NEXT: .LBB17_2: # %if.else +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_fast_oeq: +; LA64: # %bb.0: +; LA64-NEXT: movgr2fr.w $fa1, $zero +; LA64-NEXT: fcmp.ceq.s $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a1, $fcc0 +; LA64-NEXT: xori $a1, $a1, 1 +; LA64-NEXT: bnez $a1, .LBB17_2 +; LA64-NEXT: # %bb.1: # %if.then +; LA64-NEXT: ret +; LA64-NEXT: .LBB17_2: # %if.else +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %cmp = fcmp fast oeq float %a, 0.000000e+00 + br i1 %cmp, label %if.then, label %if.else + +if.then: + ret i1 %c + +if.else: + ret i1 %cmp +} + +define i1 @fcmp_fast_ole(float %a, float %b, i1 %c) nounwind { +; LA32-LABEL: fcmp_fast_ole: +; LA32: # %bb.0: +; LA32-NEXT: movgr2fr.w $fa1, $zero +; LA32-NEXT: fcmp.clt.s $fcc0, $fa1, $fa0 +; LA32-NEXT: bcnez $fcc0, .LBB18_2 +; LA32-NEXT: # %bb.1: # %if.then +; LA32-NEXT: ret +; LA32-NEXT: .LBB18_2: # %if.else +; LA32-NEXT: fcmp.cle.s $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_fast_ole: +; LA64: # %bb.0: +; LA64-NEXT: movgr2fr.w $fa1, $zero +; LA64-NEXT: fcmp.clt.s $fcc0, $fa1, $fa0 +; LA64-NEXT: bcnez $fcc0, .LBB18_2 +; LA64-NEXT: # %bb.1: # %if.then +; LA64-NEXT: ret +; LA64-NEXT: .LBB18_2: # %if.else +; LA64-NEXT: fcmp.cle.s $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %cmp = fcmp fast ole float %a, 0.000000e+00 + br i1 %cmp, label %if.then, label %if.else + +if.then: + ret i1 %c + +if.else: + ret i1 %cmp +} diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/fdiv.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/fdiv.ll index 9c3f85950d5d43e480a7c702221b420ddf03330c..e3154122c90d807db92cc4ec8da2a5aa78e11124 100644 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/fdiv.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/fdiv.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc --mtriple=loongarch32 --mattr=+d < %s | FileCheck %s --check-prefix=LA32 ; RUN: llc --mtriple=loongarch64 --mattr=+d < %s | FileCheck %s --check-prefix=LA64 @@ -7,12 +8,12 @@ define float @fdiv_s(float %x, float %y) { ; LA32-LABEL: fdiv_s: ; LA32: # %bb.0: ; LA32-NEXT: fdiv.s $fa0, $fa0, $fa1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: fdiv_s: ; LA64: # %bb.0: ; LA64-NEXT: fdiv.s $fa0, $fa0, $fa1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %div = fdiv float %x, %y ret float %div } @@ -21,12 +22,12 @@ define double @fdiv_d(double %x, double %y) { ; LA32-LABEL: fdiv_d: ; LA32: # %bb.0: ; LA32-NEXT: fdiv.d $fa0, $fa0, $fa1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: fdiv_d: ; LA64: # %bb.0: ; LA64-NEXT: fdiv.d $fa0, $fa0, $fa1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %div = fdiv double %x, %y ret double %div } diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/fence.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/fence.ll index f8c98bbc713845cd078912970b446f3b76525037..724639f3c6fb9f02a7cf5ffeb0c057ee2ad94519 100644 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/fence.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/fence.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefix=LA32 ; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64 @@ -5,12 +6,12 @@ define void @fence_acquire() nounwind { ; LA32-LABEL: fence_acquire: ; LA32: # %bb.0: ; LA32-NEXT: dbar 0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: fence_acquire: ; LA64: # %bb.0: ; LA64-NEXT: dbar 0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret fence acquire ret void } @@ -19,12 +20,12 @@ define void @fence_release() nounwind { ; LA32-LABEL: fence_release: ; LA32: # %bb.0: ; LA32-NEXT: dbar 0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: fence_release: ; LA64: # %bb.0: ; LA64-NEXT: dbar 0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret fence release ret void } @@ -33,12 +34,12 @@ define void @fence_acq_rel() nounwind { ; LA32-LABEL: fence_acq_rel: ; LA32: # %bb.0: ; LA32-NEXT: dbar 0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: fence_acq_rel: ; LA64: # %bb.0: ; LA64-NEXT: dbar 0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret fence acq_rel ret void } @@ -47,12 +48,12 @@ define void @fence_seq_cst() nounwind { ; LA32-LABEL: fence_seq_cst: ; LA32: # %bb.0: ; LA32-NEXT: dbar 0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: fence_seq_cst: ; LA64: # %bb.0: ; LA64-NEXT: dbar 0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret fence seq_cst ret void } diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/float-convert.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/float-convert.ll index 30e0045a14674c8e761348b80fbe14092b63454d..b57d96aee32f495603278ac01f78444a8db9eb92 100644 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/float-convert.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/float-convert.ll @@ -9,25 +9,25 @@ define signext i8 @convert_float_to_i8(float %a) nounwind { ; LA32F: # %bb.0: ; LA32F-NEXT: ftintrz.w.s $fa0, $fa0 ; LA32F-NEXT: movfr2gr.s $a0, $fa0 -; LA32F-NEXT: jirl $zero, $ra, 0 +; LA32F-NEXT: ret ; ; LA32D-LABEL: convert_float_to_i8: ; LA32D: # %bb.0: ; LA32D-NEXT: ftintrz.w.s $fa0, $fa0 ; LA32D-NEXT: movfr2gr.s $a0, $fa0 -; LA32D-NEXT: jirl $zero, $ra, 0 +; LA32D-NEXT: ret ; ; LA64F-LABEL: convert_float_to_i8: ; LA64F: # %bb.0: ; LA64F-NEXT: ftintrz.w.s $fa0, $fa0 ; LA64F-NEXT: movfr2gr.s $a0, $fa0 -; LA64F-NEXT: jirl $zero, $ra, 0 +; LA64F-NEXT: ret ; ; LA64D-LABEL: convert_float_to_i8: ; LA64D: # %bb.0: ; LA64D-NEXT: ftintrz.l.s $fa0, $fa0 ; LA64D-NEXT: movfr2gr.d $a0, $fa0 -; LA64D-NEXT: jirl $zero, $ra, 0 +; LA64D-NEXT: ret %1 = fptosi float %a to i8 ret i8 %1 } @@ -37,25 +37,25 @@ define signext i16 @convert_float_to_i16(float %a) nounwind { ; LA32F: # %bb.0: ; LA32F-NEXT: ftintrz.w.s $fa0, $fa0 ; LA32F-NEXT: movfr2gr.s $a0, $fa0 -; LA32F-NEXT: jirl $zero, $ra, 0 +; LA32F-NEXT: ret ; ; LA32D-LABEL: convert_float_to_i16: ; LA32D: # %bb.0: ; LA32D-NEXT: ftintrz.w.s $fa0, $fa0 ; LA32D-NEXT: movfr2gr.s $a0, $fa0 -; LA32D-NEXT: jirl $zero, $ra, 0 +; LA32D-NEXT: ret ; ; LA64F-LABEL: convert_float_to_i16: ; LA64F: # %bb.0: ; LA64F-NEXT: ftintrz.w.s $fa0, $fa0 ; LA64F-NEXT: movfr2gr.s $a0, $fa0 -; LA64F-NEXT: jirl $zero, $ra, 0 +; LA64F-NEXT: ret ; ; LA64D-LABEL: convert_float_to_i16: ; LA64D: # %bb.0: ; LA64D-NEXT: ftintrz.l.s $fa0, $fa0 ; LA64D-NEXT: movfr2gr.d $a0, $fa0 -; LA64D-NEXT: jirl $zero, $ra, 0 +; LA64D-NEXT: ret %1 = fptosi float %a to i16 ret i16 %1 } @@ -65,25 +65,25 @@ define i32 @convert_float_to_i32(float %a) nounwind { ; LA32F: # %bb.0: ; LA32F-NEXT: ftintrz.w.s $fa0, $fa0 ; LA32F-NEXT: movfr2gr.s $a0, $fa0 -; LA32F-NEXT: jirl $zero, $ra, 0 +; LA32F-NEXT: ret ; ; LA32D-LABEL: convert_float_to_i32: ; LA32D: # %bb.0: ; LA32D-NEXT: ftintrz.w.s $fa0, $fa0 ; LA32D-NEXT: movfr2gr.s $a0, $fa0 -; LA32D-NEXT: jirl $zero, $ra, 0 +; LA32D-NEXT: ret ; ; LA64F-LABEL: convert_float_to_i32: ; LA64F: # %bb.0: ; LA64F-NEXT: ftintrz.w.s $fa0, $fa0 ; LA64F-NEXT: movfr2gr.s $a0, $fa0 -; LA64F-NEXT: jirl $zero, $ra, 0 +; LA64F-NEXT: ret ; ; LA64D-LABEL: convert_float_to_i32: ; LA64D: # %bb.0: ; LA64D-NEXT: ftintrz.w.s $fa0, $fa0 ; LA64D-NEXT: movfr2gr.s $a0, $fa0 -; LA64D-NEXT: jirl $zero, $ra, 0 +; LA64D-NEXT: ret %1 = fptosi float %a to i32 ret i32 %1 } @@ -93,31 +93,31 @@ define i64 @convert_float_to_i64(float %a) nounwind { ; LA32F: # %bb.0: ; LA32F-NEXT: addi.w $sp, $sp, -16 ; LA32F-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32F-NEXT: bl __fixsfdi +; LA32F-NEXT: bl %plt(__fixsfdi) ; LA32F-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload ; LA32F-NEXT: addi.w $sp, $sp, 16 -; LA32F-NEXT: jirl $zero, $ra, 0 +; LA32F-NEXT: ret ; ; LA32D-LABEL: convert_float_to_i64: ; LA32D: # %bb.0: ; LA32D-NEXT: addi.w $sp, $sp, -16 ; LA32D-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32D-NEXT: bl __fixsfdi +; LA32D-NEXT: bl %plt(__fixsfdi) ; LA32D-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload ; LA32D-NEXT: addi.w $sp, $sp, 16 -; LA32D-NEXT: jirl $zero, $ra, 0 +; LA32D-NEXT: ret ; ; LA64F-LABEL: convert_float_to_i64: ; LA64F: # %bb.0: ; LA64F-NEXT: ftintrz.w.s $fa0, $fa0 ; LA64F-NEXT: movfr2gr.s $a0, $fa0 -; LA64F-NEXT: jirl $zero, $ra, 0 +; LA64F-NEXT: ret ; ; LA64D-LABEL: convert_float_to_i64: ; LA64D: # %bb.0: ; LA64D-NEXT: ftintrz.l.s $fa0, $fa0 ; LA64D-NEXT: movfr2gr.d $a0, $fa0 -; LA64D-NEXT: jirl $zero, $ra, 0 +; LA64D-NEXT: ret %1 = fptosi float %a to i64 ret i64 %1 } @@ -127,25 +127,25 @@ define zeroext i8 @convert_float_to_u8(float %a) nounwind { ; LA32F: # %bb.0: ; LA32F-NEXT: ftintrz.w.s $fa0, $fa0 ; LA32F-NEXT: movfr2gr.s $a0, $fa0 -; LA32F-NEXT: jirl $zero, $ra, 0 +; LA32F-NEXT: ret ; ; LA32D-LABEL: convert_float_to_u8: ; LA32D: # %bb.0: ; LA32D-NEXT: ftintrz.w.s $fa0, $fa0 ; LA32D-NEXT: movfr2gr.s $a0, $fa0 -; LA32D-NEXT: jirl $zero, $ra, 0 +; LA32D-NEXT: ret ; ; LA64F-LABEL: convert_float_to_u8: ; LA64F: # %bb.0: ; LA64F-NEXT: ftintrz.w.s $fa0, $fa0 ; LA64F-NEXT: movfr2gr.s $a0, $fa0 -; LA64F-NEXT: jirl $zero, $ra, 0 +; LA64F-NEXT: ret ; ; LA64D-LABEL: convert_float_to_u8: ; LA64D: # %bb.0: ; LA64D-NEXT: ftintrz.l.s $fa0, $fa0 ; LA64D-NEXT: movfr2gr.d $a0, $fa0 -; LA64D-NEXT: jirl $zero, $ra, 0 +; LA64D-NEXT: ret %1 = fptoui float %a to i8 ret i8 %1 } @@ -155,25 +155,25 @@ define zeroext i16 @convert_float_to_u16(float %a) nounwind { ; LA32F: # %bb.0: ; LA32F-NEXT: ftintrz.w.s $fa0, $fa0 ; LA32F-NEXT: movfr2gr.s $a0, $fa0 -; LA32F-NEXT: jirl $zero, $ra, 0 +; LA32F-NEXT: ret ; ; LA32D-LABEL: convert_float_to_u16: ; LA32D: # %bb.0: ; LA32D-NEXT: ftintrz.w.s $fa0, $fa0 ; LA32D-NEXT: movfr2gr.s $a0, $fa0 -; LA32D-NEXT: jirl $zero, $ra, 0 +; LA32D-NEXT: ret ; ; LA64F-LABEL: convert_float_to_u16: ; LA64F: # %bb.0: ; LA64F-NEXT: ftintrz.w.s $fa0, $fa0 ; LA64F-NEXT: movfr2gr.s $a0, $fa0 -; LA64F-NEXT: jirl $zero, $ra, 0 +; LA64F-NEXT: ret ; ; LA64D-LABEL: convert_float_to_u16: ; LA64D: # %bb.0: ; LA64D-NEXT: ftintrz.l.s $fa0, $fa0 ; LA64D-NEXT: movfr2gr.d $a0, $fa0 -; LA64D-NEXT: jirl $zero, $ra, 0 +; LA64D-NEXT: ret %1 = fptoui float %a to i16 ret i16 %1 } @@ -181,8 +181,8 @@ define zeroext i16 @convert_float_to_u16(float %a) nounwind { define i32 @convert_float_to_u32(float %a) nounwind { ; LA32F-LABEL: convert_float_to_u32: ; LA32F: # %bb.0: -; LA32F-NEXT: pcalau12i $a0, .LCPI6_0 -; LA32F-NEXT: addi.w $a0, $a0, .LCPI6_0 +; LA32F-NEXT: pcalau12i $a0, %pc_hi20(.LCPI6_0) +; LA32F-NEXT: addi.w $a0, $a0, %pc_lo12(.LCPI6_0) ; LA32F-NEXT: fld.s $fa1, $a0, 0 ; LA32F-NEXT: fsub.s $fa2, $fa0, $fa1 ; LA32F-NEXT: ftintrz.w.s $fa2, $fa2 @@ -196,12 +196,12 @@ define i32 @convert_float_to_u32(float %a) nounwind { ; LA32F-NEXT: movfr2gr.s $a2, $fa0 ; LA32F-NEXT: maskeqz $a1, $a2, $a1 ; LA32F-NEXT: or $a0, $a1, $a0 -; LA32F-NEXT: jirl $zero, $ra, 0 +; LA32F-NEXT: ret ; ; LA32D-LABEL: convert_float_to_u32: ; LA32D: # %bb.0: -; LA32D-NEXT: pcalau12i $a0, .LCPI6_0 -; LA32D-NEXT: addi.w $a0, $a0, .LCPI6_0 +; LA32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI6_0) +; LA32D-NEXT: addi.w $a0, $a0, %pc_lo12(.LCPI6_0) ; LA32D-NEXT: fld.s $fa1, $a0, 0 ; LA32D-NEXT: fsub.s $fa2, $fa0, $fa1 ; LA32D-NEXT: ftintrz.w.s $fa2, $fa2 @@ -215,12 +215,12 @@ define i32 @convert_float_to_u32(float %a) nounwind { ; LA32D-NEXT: movfr2gr.s $a2, $fa0 ; LA32D-NEXT: maskeqz $a1, $a2, $a1 ; LA32D-NEXT: or $a0, $a1, $a0 -; LA32D-NEXT: jirl $zero, $ra, 0 +; LA32D-NEXT: ret ; ; LA64F-LABEL: convert_float_to_u32: ; LA64F: # %bb.0: -; LA64F-NEXT: pcalau12i $a0, .LCPI6_0 -; LA64F-NEXT: addi.d $a0, $a0, .LCPI6_0 +; LA64F-NEXT: pcalau12i $a0, %pc_hi20(.LCPI6_0) +; LA64F-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI6_0) ; LA64F-NEXT: fld.s $fa1, $a0, 0 ; LA64F-NEXT: fsub.s $fa2, $fa0, $fa1 ; LA64F-NEXT: ftintrz.w.s $fa2, $fa2 @@ -234,13 +234,13 @@ define i32 @convert_float_to_u32(float %a) nounwind { ; LA64F-NEXT: movfr2gr.s $a2, $fa0 ; LA64F-NEXT: maskeqz $a1, $a2, $a1 ; LA64F-NEXT: or $a0, $a1, $a0 -; LA64F-NEXT: jirl $zero, $ra, 0 +; LA64F-NEXT: ret ; ; LA64D-LABEL: convert_float_to_u32: ; LA64D: # %bb.0: ; LA64D-NEXT: ftintrz.l.s $fa0, $fa0 ; LA64D-NEXT: movfr2gr.d $a0, $fa0 -; LA64D-NEXT: jirl $zero, $ra, 0 +; LA64D-NEXT: ret %1 = fptoui float %a to i32 ret i32 %1 } @@ -250,24 +250,24 @@ define i64 @convert_float_to_u64(float %a) nounwind { ; LA32F: # %bb.0: ; LA32F-NEXT: addi.w $sp, $sp, -16 ; LA32F-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32F-NEXT: bl __fixunssfdi +; LA32F-NEXT: bl %plt(__fixunssfdi) ; LA32F-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload ; LA32F-NEXT: addi.w $sp, $sp, 16 -; LA32F-NEXT: jirl $zero, $ra, 0 +; LA32F-NEXT: ret ; ; LA32D-LABEL: convert_float_to_u64: ; LA32D: # %bb.0: ; LA32D-NEXT: addi.w $sp, $sp, -16 ; LA32D-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32D-NEXT: bl __fixunssfdi +; LA32D-NEXT: bl %plt(__fixunssfdi) ; LA32D-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload ; LA32D-NEXT: addi.w $sp, $sp, 16 -; LA32D-NEXT: jirl $zero, $ra, 0 +; LA32D-NEXT: ret ; ; LA64F-LABEL: convert_float_to_u64: ; LA64F: # %bb.0: -; LA64F-NEXT: pcalau12i $a0, .LCPI7_0 -; LA64F-NEXT: addi.d $a0, $a0, .LCPI7_0 +; LA64F-NEXT: pcalau12i $a0, %pc_hi20(.LCPI7_0) +; LA64F-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI7_0) ; LA64F-NEXT: fld.s $fa1, $a0, 0 ; LA64F-NEXT: fsub.s $fa2, $fa0, $fa1 ; LA64F-NEXT: ftintrz.w.s $fa2, $fa2 @@ -281,12 +281,12 @@ define i64 @convert_float_to_u64(float %a) nounwind { ; LA64F-NEXT: movfr2gr.s $a2, $fa0 ; LA64F-NEXT: maskeqz $a1, $a2, $a1 ; LA64F-NEXT: or $a0, $a1, $a0 -; LA64F-NEXT: jirl $zero, $ra, 0 +; LA64F-NEXT: ret ; ; LA64D-LABEL: convert_float_to_u64: ; LA64D: # %bb.0: -; LA64D-NEXT: pcalau12i $a0, .LCPI7_0 -; LA64D-NEXT: addi.d $a0, $a0, .LCPI7_0 +; LA64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI7_0) +; LA64D-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI7_0) ; LA64D-NEXT: fld.s $fa1, $a0, 0 ; LA64D-NEXT: fsub.s $fa2, $fa0, $fa1 ; LA64D-NEXT: ftintrz.l.s $fa2, $fa2 @@ -300,7 +300,7 @@ define i64 @convert_float_to_u64(float %a) nounwind { ; LA64D-NEXT: movfr2gr.d $a2, $fa0 ; LA64D-NEXT: maskeqz $a1, $a2, $a1 ; LA64D-NEXT: or $a0, $a1, $a0 -; LA64D-NEXT: jirl $zero, $ra, 0 +; LA64D-NEXT: ret %1 = fptoui float %a to i64 ret i64 %1 } @@ -310,25 +310,25 @@ define float @convert_i8_to_float(i8 signext %a) nounwind { ; LA32F: # %bb.0: ; LA32F-NEXT: movgr2fr.w $fa0, $a0 ; LA32F-NEXT: ffint.s.w $fa0, $fa0 -; LA32F-NEXT: jirl $zero, $ra, 0 +; LA32F-NEXT: ret ; ; LA32D-LABEL: convert_i8_to_float: ; LA32D: # %bb.0: ; LA32D-NEXT: movgr2fr.w $fa0, $a0 ; LA32D-NEXT: ffint.s.w $fa0, $fa0 -; LA32D-NEXT: jirl $zero, $ra, 0 +; LA32D-NEXT: ret ; ; LA64F-LABEL: convert_i8_to_float: ; LA64F: # %bb.0: ; LA64F-NEXT: movgr2fr.w $fa0, $a0 ; LA64F-NEXT: ffint.s.w $fa0, $fa0 -; LA64F-NEXT: jirl $zero, $ra, 0 +; LA64F-NEXT: ret ; ; LA64D-LABEL: convert_i8_to_float: ; LA64D: # %bb.0: ; LA64D-NEXT: movgr2fr.w $fa0, $a0 ; LA64D-NEXT: ffint.s.w $fa0, $fa0 -; LA64D-NEXT: jirl $zero, $ra, 0 +; LA64D-NEXT: ret %1 = sitofp i8 %a to float ret float %1 } @@ -338,25 +338,25 @@ define float @convert_i16_to_float(i16 signext %a) nounwind { ; LA32F: # %bb.0: ; LA32F-NEXT: movgr2fr.w $fa0, $a0 ; LA32F-NEXT: ffint.s.w $fa0, $fa0 -; LA32F-NEXT: jirl $zero, $ra, 0 +; LA32F-NEXT: ret ; ; LA32D-LABEL: convert_i16_to_float: ; LA32D: # %bb.0: ; LA32D-NEXT: movgr2fr.w $fa0, $a0 ; LA32D-NEXT: ffint.s.w $fa0, $fa0 -; LA32D-NEXT: jirl $zero, $ra, 0 +; LA32D-NEXT: ret ; ; LA64F-LABEL: convert_i16_to_float: ; LA64F: # %bb.0: ; LA64F-NEXT: movgr2fr.w $fa0, $a0 ; LA64F-NEXT: ffint.s.w $fa0, $fa0 -; LA64F-NEXT: jirl $zero, $ra, 0 +; LA64F-NEXT: ret ; ; LA64D-LABEL: convert_i16_to_float: ; LA64D: # %bb.0: ; LA64D-NEXT: movgr2fr.w $fa0, $a0 ; LA64D-NEXT: ffint.s.w $fa0, $fa0 -; LA64D-NEXT: jirl $zero, $ra, 0 +; LA64D-NEXT: ret %1 = sitofp i16 %a to float ret float %1 } @@ -366,27 +366,25 @@ define float @convert_i32_to_float(i32 %a) nounwind { ; LA32F: # %bb.0: ; LA32F-NEXT: movgr2fr.w $fa0, $a0 ; LA32F-NEXT: ffint.s.w $fa0, $fa0 -; LA32F-NEXT: jirl $zero, $ra, 0 +; LA32F-NEXT: ret ; ; LA32D-LABEL: convert_i32_to_float: ; LA32D: # %bb.0: ; LA32D-NEXT: movgr2fr.w $fa0, $a0 ; LA32D-NEXT: ffint.s.w $fa0, $fa0 -; LA32D-NEXT: jirl $zero, $ra, 0 +; LA32D-NEXT: ret ; ; LA64F-LABEL: convert_i32_to_float: ; LA64F: # %bb.0: -; LA64F-NEXT: addi.w $a0, $a0, 0 ; LA64F-NEXT: movgr2fr.w $fa0, $a0 ; LA64F-NEXT: ffint.s.w $fa0, $fa0 -; LA64F-NEXT: jirl $zero, $ra, 0 +; LA64F-NEXT: ret ; ; LA64D-LABEL: convert_i32_to_float: ; LA64D: # %bb.0: -; LA64D-NEXT: addi.w $a0, $a0, 0 ; LA64D-NEXT: movgr2fr.w $fa0, $a0 ; LA64D-NEXT: ffint.s.w $fa0, $fa0 -; LA64D-NEXT: jirl $zero, $ra, 0 +; LA64D-NEXT: ret %1 = sitofp i32 %a to float ret float %1 } @@ -396,31 +394,34 @@ define float @convert_i64_to_float(i64 %a) nounwind { ; LA32F: # %bb.0: ; LA32F-NEXT: addi.w $sp, $sp, -16 ; LA32F-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32F-NEXT: bl __floatdisf +; LA32F-NEXT: bl %plt(__floatdisf) ; LA32F-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload ; LA32F-NEXT: addi.w $sp, $sp, 16 -; LA32F-NEXT: jirl $zero, $ra, 0 +; LA32F-NEXT: ret ; ; LA32D-LABEL: convert_i64_to_float: ; LA32D: # %bb.0: ; LA32D-NEXT: addi.w $sp, $sp, -16 ; LA32D-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32D-NEXT: bl __floatdisf +; LA32D-NEXT: bl %plt(__floatdisf) ; LA32D-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload ; LA32D-NEXT: addi.w $sp, $sp, 16 -; LA32D-NEXT: jirl $zero, $ra, 0 +; LA32D-NEXT: ret ; ; LA64F-LABEL: convert_i64_to_float: ; LA64F: # %bb.0: -; LA64F-NEXT: movgr2fr.w $fa0, $a0 -; LA64F-NEXT: ffint.s.w $fa0, $fa0 -; LA64F-NEXT: jirl $zero, $ra, 0 +; LA64F-NEXT: addi.d $sp, $sp, -16 +; LA64F-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64F-NEXT: bl %plt(__floatdisf) +; LA64F-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64F-NEXT: addi.d $sp, $sp, 16 +; LA64F-NEXT: ret ; ; LA64D-LABEL: convert_i64_to_float: ; LA64D: # %bb.0: -; LA64D-NEXT: movgr2fr.w $fa0, $a0 -; LA64D-NEXT: ffint.s.w $fa0, $fa0 -; LA64D-NEXT: jirl $zero, $ra, 0 +; LA64D-NEXT: movgr2fr.d $fa0, $a0 +; LA64D-NEXT: ffint.s.l $fa0, $fa0 +; LA64D-NEXT: ret %1 = sitofp i64 %a to float ret float %1 } @@ -430,25 +431,25 @@ define float @convert_u8_to_float(i8 zeroext %a) nounwind { ; LA32F: # %bb.0: ; LA32F-NEXT: movgr2fr.w $fa0, $a0 ; LA32F-NEXT: ffint.s.w $fa0, $fa0 -; LA32F-NEXT: jirl $zero, $ra, 0 +; LA32F-NEXT: ret ; ; LA32D-LABEL: convert_u8_to_float: ; LA32D: # %bb.0: ; LA32D-NEXT: movgr2fr.w $fa0, $a0 ; LA32D-NEXT: ffint.s.w $fa0, $fa0 -; LA32D-NEXT: jirl $zero, $ra, 0 +; LA32D-NEXT: ret ; ; LA64F-LABEL: convert_u8_to_float: ; LA64F: # %bb.0: ; LA64F-NEXT: movgr2fr.w $fa0, $a0 ; LA64F-NEXT: ffint.s.w $fa0, $fa0 -; LA64F-NEXT: jirl $zero, $ra, 0 +; LA64F-NEXT: ret ; ; LA64D-LABEL: convert_u8_to_float: ; LA64D: # %bb.0: ; LA64D-NEXT: movgr2fr.w $fa0, $a0 ; LA64D-NEXT: ffint.s.w $fa0, $fa0 -; LA64D-NEXT: jirl $zero, $ra, 0 +; LA64D-NEXT: ret %1 = uitofp i8 %a to float ret float %1 } @@ -458,25 +459,25 @@ define float @convert_u16_to_float(i16 zeroext %a) nounwind { ; LA32F: # %bb.0: ; LA32F-NEXT: movgr2fr.w $fa0, $a0 ; LA32F-NEXT: ffint.s.w $fa0, $fa0 -; LA32F-NEXT: jirl $zero, $ra, 0 +; LA32F-NEXT: ret ; ; LA32D-LABEL: convert_u16_to_float: ; LA32D: # %bb.0: ; LA32D-NEXT: movgr2fr.w $fa0, $a0 ; LA32D-NEXT: ffint.s.w $fa0, $fa0 -; LA32D-NEXT: jirl $zero, $ra, 0 +; LA32D-NEXT: ret ; ; LA64F-LABEL: convert_u16_to_float: ; LA64F: # %bb.0: ; LA64F-NEXT: movgr2fr.w $fa0, $a0 ; LA64F-NEXT: ffint.s.w $fa0, $fa0 -; LA64F-NEXT: jirl $zero, $ra, 0 +; LA64F-NEXT: ret ; ; LA64D-LABEL: convert_u16_to_float: ; LA64D: # %bb.0: ; LA64D-NEXT: movgr2fr.w $fa0, $a0 ; LA64D-NEXT: ffint.s.w $fa0, $fa0 -; LA64D-NEXT: jirl $zero, $ra, 0 +; LA64D-NEXT: ret %1 = uitofp i16 %a to float ret float %1 } @@ -491,60 +492,43 @@ define float @convert_u32_to_float(i32 %a) nounwind { ; LA32F-NEXT: ffint.s.w $fa0, $fa0 ; LA32F-NEXT: fadd.s $fa0, $fa0, $fa0 ; LA32F-NEXT: slti $a1, $a0, 0 -; LA32F-NEXT: movgr2cf $fcc0, $a1 ; LA32F-NEXT: movgr2fr.w $fa1, $a0 ; LA32F-NEXT: ffint.s.w $fa1, $fa1 +; LA32F-NEXT: movgr2cf $fcc0, $a1 ; LA32F-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 -; LA32F-NEXT: jirl $zero, $ra, 0 +; LA32F-NEXT: ret ; ; LA32D-LABEL: convert_u32_to_float: ; LA32D: # %bb.0: ; LA32D-NEXT: addi.w $sp, $sp, -16 -; LA32D-NEXT: addi.w $a1, $sp, 8 -; LA32D-NEXT: ori $a1, $a1, 4 -; LA32D-NEXT: lu12i.w $a2, 275200 -; LA32D-NEXT: st.w $a2, $a1, 0 +; LA32D-NEXT: lu12i.w $a1, 275200 +; LA32D-NEXT: st.w $a1, $sp, 12 ; LA32D-NEXT: st.w $a0, $sp, 8 -; LA32D-NEXT: pcalau12i $a0, .LCPI14_0 -; LA32D-NEXT: addi.w $a0, $a0, .LCPI14_0 +; LA32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI14_0) +; LA32D-NEXT: addi.w $a0, $a0, %pc_lo12(.LCPI14_0) ; LA32D-NEXT: fld.d $fa0, $a0, 0 ; LA32D-NEXT: fld.d $fa1, $sp, 8 ; LA32D-NEXT: fsub.d $fa0, $fa1, $fa0 ; LA32D-NEXT: fcvt.s.d $fa0, $fa0 ; LA32D-NEXT: addi.w $sp, $sp, 16 -; LA32D-NEXT: jirl $zero, $ra, 0 +; LA32D-NEXT: ret ; ; LA64F-LABEL: convert_u32_to_float: ; LA64F: # %bb.0: -; LA64F-NEXT: bstrpick.d $a1, $a0, 31, 1 -; LA64F-NEXT: andi $a2, $a0, 1 -; LA64F-NEXT: or $a1, $a2, $a1 -; LA64F-NEXT: movgr2fr.w $fa0, $a1 -; LA64F-NEXT: ffint.s.w $fa0, $fa0 -; LA64F-NEXT: fadd.s $fa0, $fa0, $fa0 +; LA64F-NEXT: addi.d $sp, $sp, -16 +; LA64F-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill ; LA64F-NEXT: bstrpick.d $a0, $a0, 31, 0 -; LA64F-NEXT: slti $a1, $a0, 0 -; LA64F-NEXT: movgr2cf $fcc0, $a1 -; LA64F-NEXT: movgr2fr.w $fa1, $a0 -; LA64F-NEXT: ffint.s.w $fa1, $fa1 -; LA64F-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 -; LA64F-NEXT: jirl $zero, $ra, 0 +; LA64F-NEXT: bl %plt(__floatundisf) +; LA64F-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64F-NEXT: addi.d $sp, $sp, 16 +; LA64F-NEXT: ret ; ; LA64D-LABEL: convert_u32_to_float: ; LA64D: # %bb.0: -; LA64D-NEXT: bstrpick.d $a1, $a0, 31, 1 -; LA64D-NEXT: andi $a2, $a0, 1 -; LA64D-NEXT: or $a1, $a2, $a1 -; LA64D-NEXT: movgr2fr.w $fa0, $a1 -; LA64D-NEXT: ffint.s.w $fa0, $fa0 -; LA64D-NEXT: fadd.s $fa0, $fa0, $fa0 ; LA64D-NEXT: bstrpick.d $a0, $a0, 31, 0 -; LA64D-NEXT: slti $a1, $a0, 0 -; LA64D-NEXT: movgr2cf $fcc0, $a1 -; LA64D-NEXT: movgr2fr.w $fa1, $a0 -; LA64D-NEXT: ffint.s.w $fa1, $fa1 -; LA64D-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 -; LA64D-NEXT: jirl $zero, $ra, 0 +; LA64D-NEXT: movgr2fr.d $fa0, $a0 +; LA64D-NEXT: ffint.s.l $fa0, $fa0 +; LA64D-NEXT: ret %1 = uitofp i32 %a to float ret float %1 } @@ -554,49 +538,43 @@ define float @convert_u64_to_float(i64 %a) nounwind { ; LA32F: # %bb.0: ; LA32F-NEXT: addi.w $sp, $sp, -16 ; LA32F-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32F-NEXT: bl __floatundisf +; LA32F-NEXT: bl %plt(__floatundisf) ; LA32F-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload ; LA32F-NEXT: addi.w $sp, $sp, 16 -; LA32F-NEXT: jirl $zero, $ra, 0 +; LA32F-NEXT: ret ; ; LA32D-LABEL: convert_u64_to_float: ; LA32D: # %bb.0: ; LA32D-NEXT: addi.w $sp, $sp, -16 ; LA32D-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32D-NEXT: bl __floatundisf +; LA32D-NEXT: bl %plt(__floatundisf) ; LA32D-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload ; LA32D-NEXT: addi.w $sp, $sp, 16 -; LA32D-NEXT: jirl $zero, $ra, 0 +; LA32D-NEXT: ret ; ; LA64F-LABEL: convert_u64_to_float: ; LA64F: # %bb.0: -; LA64F-NEXT: srli.d $a1, $a0, 1 -; LA64F-NEXT: andi $a2, $a0, 1 -; LA64F-NEXT: or $a1, $a2, $a1 -; LA64F-NEXT: movgr2fr.w $fa0, $a1 -; LA64F-NEXT: ffint.s.w $fa0, $fa0 -; LA64F-NEXT: fadd.s $fa0, $fa0, $fa0 -; LA64F-NEXT: slti $a1, $a0, 0 -; LA64F-NEXT: movgr2cf $fcc0, $a1 -; LA64F-NEXT: movgr2fr.w $fa1, $a0 -; LA64F-NEXT: ffint.s.w $fa1, $fa1 -; LA64F-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 -; LA64F-NEXT: jirl $zero, $ra, 0 +; LA64F-NEXT: addi.d $sp, $sp, -16 +; LA64F-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64F-NEXT: bl %plt(__floatundisf) +; LA64F-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64F-NEXT: addi.d $sp, $sp, 16 +; LA64F-NEXT: ret ; ; LA64D-LABEL: convert_u64_to_float: ; LA64D: # %bb.0: ; LA64D-NEXT: srli.d $a1, $a0, 1 ; LA64D-NEXT: andi $a2, $a0, 1 ; LA64D-NEXT: or $a1, $a2, $a1 -; LA64D-NEXT: movgr2fr.w $fa0, $a1 -; LA64D-NEXT: ffint.s.w $fa0, $fa0 +; LA64D-NEXT: movgr2fr.d $fa0, $a1 +; LA64D-NEXT: ffint.s.l $fa0, $fa0 ; LA64D-NEXT: fadd.s $fa0, $fa0, $fa0 ; LA64D-NEXT: slti $a1, $a0, 0 +; LA64D-NEXT: movgr2fr.d $fa1, $a0 +; LA64D-NEXT: ffint.s.l $fa1, $fa1 ; LA64D-NEXT: movgr2cf $fcc0, $a1 -; LA64D-NEXT: movgr2fr.w $fa1, $a0 -; LA64D-NEXT: ffint.s.w $fa1, $fa1 ; LA64D-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 -; LA64D-NEXT: jirl $zero, $ra, 0 +; LA64D-NEXT: ret %1 = uitofp i64 %a to float ret float %1 } @@ -605,22 +583,22 @@ define i32 @bitcast_float_to_i32(float %a) nounwind { ; LA32F-LABEL: bitcast_float_to_i32: ; LA32F: # %bb.0: ; LA32F-NEXT: movfr2gr.s $a0, $fa0 -; LA32F-NEXT: jirl $zero, $ra, 0 +; LA32F-NEXT: ret ; ; LA32D-LABEL: bitcast_float_to_i32: ; LA32D: # %bb.0: ; LA32D-NEXT: movfr2gr.s $a0, $fa0 -; LA32D-NEXT: jirl $zero, $ra, 0 +; LA32D-NEXT: ret ; ; LA64F-LABEL: bitcast_float_to_i32: ; LA64F: # %bb.0: ; LA64F-NEXT: movfr2gr.s $a0, $fa0 -; LA64F-NEXT: jirl $zero, $ra, 0 +; LA64F-NEXT: ret ; ; LA64D-LABEL: bitcast_float_to_i32: ; LA64D: # %bb.0: ; LA64D-NEXT: movfr2gr.s $a0, $fa0 -; LA64D-NEXT: jirl $zero, $ra, 0 +; LA64D-NEXT: ret %1 = bitcast float %a to i32 ret i32 %1 } @@ -629,22 +607,22 @@ define float @bitcast_i32_to_float(i32 %a) nounwind { ; LA32F-LABEL: bitcast_i32_to_float: ; LA32F: # %bb.0: ; LA32F-NEXT: movgr2fr.w $fa0, $a0 -; LA32F-NEXT: jirl $zero, $ra, 0 +; LA32F-NEXT: ret ; ; LA32D-LABEL: bitcast_i32_to_float: ; LA32D: # %bb.0: ; LA32D-NEXT: movgr2fr.w $fa0, $a0 -; LA32D-NEXT: jirl $zero, $ra, 0 +; LA32D-NEXT: ret ; ; LA64F-LABEL: bitcast_i32_to_float: ; LA64F: # %bb.0: ; LA64F-NEXT: movgr2fr.w $fa0, $a0 -; LA64F-NEXT: jirl $zero, $ra, 0 +; LA64F-NEXT: ret ; ; LA64D-LABEL: bitcast_i32_to_float: ; LA64D: # %bb.0: ; LA64D-NEXT: movgr2fr.w $fa0, $a0 -; LA64D-NEXT: jirl $zero, $ra, 0 +; LA64D-NEXT: ret %1 = bitcast i32 %a to float ret float %1 } diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/fmul.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/fmul.ll index 78ee031c13015675e06ce4e462fad8cbdea00414..d3acb566c2a2e942cca9e960c0a66618c58867a9 100644 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/fmul.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/fmul.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc --mtriple=loongarch32 --mattr=+d < %s | FileCheck %s --check-prefix=LA32 ; RUN: llc --mtriple=loongarch64 --mattr=+d < %s | FileCheck %s --check-prefix=LA64 @@ -7,12 +8,12 @@ define float @fmul_s(float %x, float %y) { ; LA32-LABEL: fmul_s: ; LA32: # %bb.0: ; LA32-NEXT: fmul.s $fa0, $fa0, $fa1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: fmul_s: ; LA64: # %bb.0: ; LA64-NEXT: fmul.s $fa0, $fa0, $fa1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %mul = fmul float %x, %y ret float %mul } @@ -21,12 +22,12 @@ define double @fmul_d(double %x, double %y) { ; LA32-LABEL: fmul_d: ; LA32: # %bb.0: ; LA32-NEXT: fmul.d $fa0, $fa0, $fa1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: fmul_d: ; LA64: # %bb.0: ; LA64-NEXT: fmul.d $fa0, $fa0, $fa1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %mul = fmul double %x, %y ret double %mul } diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/fneg.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/fneg.ll index 3a8a4127d8e7a56dab4abd0ec8b3eb1442d0af57..da1952654191b4ddb6dc426447aad543a2780868 100644 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/fneg.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/fneg.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc --mtriple=loongarch32 --mattr=+d < %s | FileCheck %s --check-prefix=LA32 ; RUN: llc --mtriple=loongarch64 --mattr=+d < %s | FileCheck %s --check-prefix=LA64 @@ -7,12 +8,12 @@ define float @fneg_s(float %x) { ; LA32-LABEL: fneg_s: ; LA32: # %bb.0: ; LA32-NEXT: fneg.s $fa0, $fa0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: fneg_s: ; LA64: # %bb.0: ; LA64-NEXT: fneg.s $fa0, $fa0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %neg = fneg float %x ret float %neg } @@ -21,12 +22,12 @@ define double @fneg_d(double %x) { ; LA32-LABEL: fneg_d: ; LA32: # %bb.0: ; LA32-NEXT: fneg.d $fa0, $fa0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: fneg_d: ; LA64: # %bb.0: ; LA64-NEXT: fneg.d $fa0, $fa0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %neg = fneg double %x ret double %neg } diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/fsub.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/fsub.ll index 9ddf583d999c8df83d3370e22781b1369318fdc5..0aa0d634f183be9515bad813afc731796c0202f0 100644 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/fsub.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/fsub.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc --mtriple=loongarch32 --mattr=+d < %s | FileCheck %s --check-prefix=LA32 ; RUN: llc --mtriple=loongarch64 --mattr=+d < %s | FileCheck %s --check-prefix=LA64 @@ -7,12 +8,12 @@ define float @fsub_s(float %x, float %y) { ; LA32-LABEL: fsub_s: ; LA32: # %bb.0: ; LA32-NEXT: fsub.s $fa0, $fa0, $fa1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: fsub_s: ; LA64: # %bb.0: ; LA64-NEXT: fsub.s $fa0, $fa0, $fa1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %sub = fsub float %x, %y ret float %sub } @@ -21,12 +22,12 @@ define double @fsub_d(double %x, double %y) { ; LA32-LABEL: fsub_d: ; LA32: # %bb.0: ; LA32-NEXT: fsub.d $fa0, $fa0, $fa1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: fsub_d: ; LA64: # %bb.0: ; LA64-NEXT: fsub.d $fa0, $fa0, $fa1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %sub = fsub double %x, %y ret double %sub } @@ -35,12 +36,12 @@ define float @fneg_s(float %x) { ; LA32-LABEL: fneg_s: ; LA32: # %bb.0: ; LA32-NEXT: fneg.s $fa0, $fa0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: fneg_s: ; LA64: # %bb.0: ; LA64-NEXT: fneg.s $fa0, $fa0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %res = fsub float -0.0, %x ret float %res } @@ -49,12 +50,12 @@ define double @fneg_d(double %x) { ; LA32-LABEL: fneg_d: ; LA32: # %bb.0: ; LA32-NEXT: fneg.d $fa0, $fa0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: fneg_d: ; LA64: # %bb.0: ; LA64-NEXT: fneg.d $fa0, $fa0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %res = fsub double -0.0, %x ret double %res } diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/icmp.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/icmp.ll index 947886e6b9dc5b0abecbf23bacb2734913c2b818..605b3ab293787db094d5fce832f352465c2b7108 100644 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/icmp.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/icmp.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefix=LA32 ; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64 @@ -8,13 +9,13 @@ define i1 @icmp_eq(i32 signext %a, i32 signext %b) { ; LA32: # %bb.0: ; LA32-NEXT: xor $a0, $a0, $a1 ; LA32-NEXT: sltui $a0, $a0, 1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: icmp_eq: ; LA64: # %bb.0: ; LA64-NEXT: xor $a0, $a0, $a1 ; LA64-NEXT: sltui $a0, $a0, 1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %res = icmp eq i32 %a, %b ret i1 %res } @@ -24,13 +25,13 @@ define i1 @icmp_ne(i32 signext %a, i32 signext %b) { ; LA32: # %bb.0: ; LA32-NEXT: xor $a0, $a0, $a1 ; LA32-NEXT: sltu $a0, $zero, $a0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: icmp_ne: ; LA64: # %bb.0: ; LA64-NEXT: xor $a0, $a0, $a1 ; LA64-NEXT: sltu $a0, $zero, $a0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %res = icmp ne i32 %a, %b ret i1 %res } @@ -39,12 +40,12 @@ define i1 @icmp_ugt(i32 signext %a, i32 signext %b) { ; LA32-LABEL: icmp_ugt: ; LA32: # %bb.0: ; LA32-NEXT: sltu $a0, $a1, $a0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: icmp_ugt: ; LA64: # %bb.0: ; LA64-NEXT: sltu $a0, $a1, $a0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %res = icmp ugt i32 %a, %b ret i1 %res } @@ -54,13 +55,13 @@ define i1 @icmp_uge(i32 signext %a, i32 signext %b) { ; LA32: # %bb.0: ; LA32-NEXT: sltu $a0, $a0, $a1 ; LA32-NEXT: xori $a0, $a0, 1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: icmp_uge: ; LA64: # %bb.0: ; LA64-NEXT: sltu $a0, $a0, $a1 ; LA64-NEXT: xori $a0, $a0, 1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %res = icmp uge i32 %a, %b ret i1 %res } @@ -69,12 +70,12 @@ define i1 @icmp_ult(i32 signext %a, i32 signext %b) { ; LA32-LABEL: icmp_ult: ; LA32: # %bb.0: ; LA32-NEXT: sltu $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: icmp_ult: ; LA64: # %bb.0: ; LA64-NEXT: sltu $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %res = icmp ult i32 %a, %b ret i1 %res } @@ -84,13 +85,13 @@ define i1 @icmp_ule(i32 signext %a, i32 signext %b) { ; LA32: # %bb.0: ; LA32-NEXT: sltu $a0, $a1, $a0 ; LA32-NEXT: xori $a0, $a0, 1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: icmp_ule: ; LA64: # %bb.0: ; LA64-NEXT: sltu $a0, $a1, $a0 ; LA64-NEXT: xori $a0, $a0, 1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %res = icmp ule i32 %a, %b ret i1 %res } @@ -99,12 +100,12 @@ define i1 @icmp_sgt(i32 signext %a, i32 signext %b) { ; LA32-LABEL: icmp_sgt: ; LA32: # %bb.0: ; LA32-NEXT: slt $a0, $a1, $a0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: icmp_sgt: ; LA64: # %bb.0: ; LA64-NEXT: slt $a0, $a1, $a0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %res = icmp sgt i32 %a, %b ret i1 %res } @@ -114,13 +115,13 @@ define i1 @icmp_sge(i32 signext %a, i32 signext %b) { ; LA32: # %bb.0: ; LA32-NEXT: slt $a0, $a0, $a1 ; LA32-NEXT: xori $a0, $a0, 1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: icmp_sge: ; LA64: # %bb.0: ; LA64-NEXT: slt $a0, $a0, $a1 ; LA64-NEXT: xori $a0, $a0, 1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %res = icmp sge i32 %a, %b ret i1 %res } @@ -129,12 +130,12 @@ define i1 @icmp_slt(i32 signext %a, i32 signext %b) { ; LA32-LABEL: icmp_slt: ; LA32: # %bb.0: ; LA32-NEXT: slt $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: icmp_slt: ; LA64: # %bb.0: ; LA64-NEXT: slt $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %res = icmp slt i32 %a, %b ret i1 %res } @@ -144,13 +145,13 @@ define i1 @icmp_sle(i32 signext %a, i32 signext %b) { ; LA32: # %bb.0: ; LA32-NEXT: slt $a0, $a1, $a0 ; LA32-NEXT: xori $a0, $a0, 1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: icmp_sle: ; LA64: # %bb.0: ; LA64-NEXT: slt $a0, $a1, $a0 ; LA64-NEXT: xori $a0, $a0, 1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %res = icmp sle i32 %a, %b ret i1 %res } @@ -159,12 +160,12 @@ define i1 @icmp_slt_3(i32 signext %a) { ; LA32-LABEL: icmp_slt_3: ; LA32: # %bb.0: ; LA32-NEXT: slti $a0, $a0, 3 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: icmp_slt_3: ; LA64: # %bb.0: ; LA64-NEXT: slti $a0, $a0, 3 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %res = icmp slt i32 %a, 3 ret i1 %res } @@ -173,12 +174,12 @@ define i1 @icmp_ult_3(i32 signext %a) { ; LA32-LABEL: icmp_ult_3: ; LA32: # %bb.0: ; LA32-NEXT: sltui $a0, $a0, 3 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: icmp_ult_3: ; LA64: # %bb.0: ; LA64-NEXT: sltui $a0, $a0, 3 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %res = icmp ult i32 %a, 3 ret i1 %res } @@ -187,12 +188,12 @@ define i1 @icmp_eq_0(i32 signext %a) { ; LA32-LABEL: icmp_eq_0: ; LA32: # %bb.0: ; LA32-NEXT: sltui $a0, $a0, 1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: icmp_eq_0: ; LA64: # %bb.0: ; LA64-NEXT: sltui $a0, $a0, 1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %res = icmp eq i32 %a, 0 ret i1 %res } @@ -202,13 +203,13 @@ define i1 @icmp_eq_3(i32 signext %a) { ; LA32: # %bb.0: ; LA32-NEXT: addi.w $a0, $a0, -3 ; LA32-NEXT: sltui $a0, $a0, 1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: icmp_eq_3: ; LA64: # %bb.0: ; LA64-NEXT: addi.d $a0, $a0, -3 ; LA64-NEXT: sltui $a0, $a0, 1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %res = icmp eq i32 %a, 3 ret i1 %res } @@ -217,12 +218,12 @@ define i1 @icmp_ne_0(i32 signext %a) { ; LA32-LABEL: icmp_ne_0: ; LA32: # %bb.0: ; LA32-NEXT: sltu $a0, $zero, $a0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: icmp_ne_0: ; LA64: # %bb.0: ; LA64-NEXT: sltu $a0, $zero, $a0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %res = icmp ne i32 %a, 0 ret i1 %res } @@ -232,13 +233,13 @@ define i1 @icmp_ne_3(i32 signext %a) { ; LA32: # %bb.0: ; LA32-NEXT: addi.w $a0, $a0, -3 ; LA32-NEXT: sltu $a0, $zero, $a0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: icmp_ne_3: ; LA64: # %bb.0: ; LA64-NEXT: addi.d $a0, $a0, -3 ; LA64-NEXT: sltu $a0, $zero, $a0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %res = icmp ne i32 %a, 3 ret i1 %res } diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/indirectbr.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/indirectbr.ll index abbd700f44f7d84ecc858f6548504d61f8b5daf5..cd60183a0933becfb7dc954fdfe2e9e9bbee2988 100644 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/indirectbr.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/indirectbr.ll @@ -1,12 +1,13 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s define i32 @indirectbr(ptr %target) nounwind { ; CHECK-LABEL: indirectbr: ; CHECK: # %bb.0: -; CHECK-NEXT: jirl $zero, $a0, 0 +; CHECK-NEXT: jr $a0 ; CHECK-NEXT: .LBB0_1: # %test_label ; CHECK-NEXT: move $a0, $zero -; CHECK-NEXT: jirl $zero, $ra, 0 +; CHECK-NEXT: ret indirectbr ptr %target, [label %test_label] test_label: br label %ret @@ -20,7 +21,7 @@ define i32 @indirectbr_with_offset(ptr %a) nounwind { ; CHECK-NEXT: jirl $zero, $a0, 1380 ; CHECK-NEXT: .LBB1_1: # %test_label ; CHECK-NEXT: move $a0, $zero -; CHECK-NEXT: jirl $zero, $ra, 0 +; CHECK-NEXT: ret %target = getelementptr inbounds i8, ptr %a, i32 1380 indirectbr ptr %target, [label %test_label] test_label: diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll index 1f06c818acf20c5f9aee4375314ffca5549ef7cc..e91d0c145eab6e176081cd536cb65bb01720ba60 100644 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll @@ -7,13 +7,13 @@ define i8 @load_acquire_i8(ptr %ptr) { ; LA32: # %bb.0: ; LA32-NEXT: ld.b $a0, $a0, 0 ; LA32-NEXT: dbar 0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: load_acquire_i8: ; LA64: # %bb.0: ; LA64-NEXT: ld.b $a0, $a0, 0 ; LA64-NEXT: dbar 0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %val = load atomic i8, ptr %ptr acquire, align 1 ret i8 %val } @@ -23,13 +23,13 @@ define i16 @load_acquire_i16(ptr %ptr) { ; LA32: # %bb.0: ; LA32-NEXT: ld.h $a0, $a0, 0 ; LA32-NEXT: dbar 0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: load_acquire_i16: ; LA64: # %bb.0: ; LA64-NEXT: ld.h $a0, $a0, 0 ; LA64-NEXT: dbar 0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %val = load atomic i16, ptr %ptr acquire, align 2 ret i16 %val } @@ -39,13 +39,13 @@ define i32 @load_acquire_i32(ptr %ptr) { ; LA32: # %bb.0: ; LA32-NEXT: ld.w $a0, $a0, 0 ; LA32-NEXT: dbar 0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: load_acquire_i32: ; LA64: # %bb.0: ; LA64-NEXT: ld.w $a0, $a0, 0 ; LA64-NEXT: dbar 0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %val = load atomic i32, ptr %ptr acquire, align 4 ret i32 %val } @@ -58,16 +58,16 @@ define i64 @load_acquire_i64(ptr %ptr) { ; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill ; LA32-NEXT: .cfi_offset 1, -4 ; LA32-NEXT: ori $a1, $zero, 2 -; LA32-NEXT: bl __atomic_load_8 +; LA32-NEXT: bl %plt(__atomic_load_8) ; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload ; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: load_acquire_i64: ; LA64: # %bb.0: ; LA64-NEXT: ld.d $a0, $a0, 0 ; LA64-NEXT: dbar 0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %val = load atomic i64, ptr %ptr acquire, align 8 ret i64 %val } @@ -76,14 +76,14 @@ define void @store_release_i8(ptr %ptr, i8 signext %v) { ; LA32-LABEL: store_release_i8: ; LA32: # %bb.0: ; LA32-NEXT: dbar 0 -; LA32-NEXT: st.b $a0, $a1, 0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: st.b $a1, $a0, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: store_release_i8: ; LA64: # %bb.0: ; LA64-NEXT: dbar 0 -; LA64-NEXT: st.b $a0, $a1, 0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: st.b $a1, $a0, 0 +; LA64-NEXT: ret store atomic i8 %v, ptr %ptr release, align 1 ret void } @@ -92,14 +92,14 @@ define void @store_release_i16(ptr %ptr, i16 signext %v) { ; LA32-LABEL: store_release_i16: ; LA32: # %bb.0: ; LA32-NEXT: dbar 0 -; LA32-NEXT: st.h $a0, $a1, 0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: st.h $a1, $a0, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: store_release_i16: ; LA64: # %bb.0: ; LA64-NEXT: dbar 0 -; LA64-NEXT: st.h $a0, $a1, 0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: st.h $a1, $a0, 0 +; LA64-NEXT: ret store atomic i16 %v, ptr %ptr release, align 2 ret void } @@ -108,14 +108,13 @@ define void @store_release_i32(ptr %ptr, i32 signext %v) { ; LA32-LABEL: store_release_i32: ; LA32: # %bb.0: ; LA32-NEXT: dbar 0 -; LA32-NEXT: st.w $a0, $a1, 0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: st.w $a1, $a0, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: store_release_i32: ; LA64: # %bb.0: -; LA64-NEXT: dbar 0 -; LA64-NEXT: st.w $a0, $a1, 0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: amswap_db.w $zero, $a1, $a0 +; LA64-NEXT: ret store atomic i32 %v, ptr %ptr release, align 4 ret void } @@ -128,16 +127,214 @@ define void @store_release_i64(ptr %ptr, i64 %v) { ; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill ; LA32-NEXT: .cfi_offset 1, -4 ; LA32-NEXT: ori $a3, $zero, 3 -; LA32-NEXT: bl __atomic_store_8 +; LA32-NEXT: bl %plt(__atomic_store_8) ; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload ; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: store_release_i64: ; LA64: # %bb.0: -; LA64-NEXT: dbar 0 -; LA64-NEXT: st.d $a0, $a1, 0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: amswap_db.d $zero, $a1, $a0 +; LA64-NEXT: ret store atomic i64 %v, ptr %ptr release, align 8 ret void } + +define void @store_unordered_i8(ptr %ptr, i8 signext %v) { +; LA32-LABEL: store_unordered_i8: +; LA32: # %bb.0: +; LA32-NEXT: st.b $a1, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: store_unordered_i8: +; LA64: # %bb.0: +; LA64-NEXT: st.b $a1, $a0, 0 +; LA64-NEXT: ret + store atomic i8 %v, ptr %ptr unordered, align 1 + ret void +} + +define void @store_unordered_i16(ptr %ptr, i16 signext %v) { +; LA32-LABEL: store_unordered_i16: +; LA32: # %bb.0: +; LA32-NEXT: st.h $a1, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: store_unordered_i16: +; LA64: # %bb.0: +; LA64-NEXT: st.h $a1, $a0, 0 +; LA64-NEXT: ret + store atomic i16 %v, ptr %ptr unordered, align 2 + ret void +} + +define void @store_unordered_i32(ptr %ptr, i32 signext %v) { +; LA32-LABEL: store_unordered_i32: +; LA32: # %bb.0: +; LA32-NEXT: st.w $a1, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: store_unordered_i32: +; LA64: # %bb.0: +; LA64-NEXT: st.w $a1, $a0, 0 +; LA64-NEXT: ret + store atomic i32 %v, ptr %ptr unordered, align 4 + ret void +} + +define void @store_unordered_i64(ptr %ptr, i64 %v) { +; LA32-LABEL: store_unordered_i64: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: .cfi_def_cfa_offset 16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: .cfi_offset 1, -4 +; LA32-NEXT: move $a3, $zero +; LA32-NEXT: bl %plt(__atomic_store_8) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: store_unordered_i64: +; LA64: # %bb.0: +; LA64-NEXT: st.d $a1, $a0, 0 +; LA64-NEXT: ret + store atomic i64 %v, ptr %ptr unordered, align 8 + ret void +} + +define void @store_monotonic_i8(ptr %ptr, i8 signext %v) { +; LA32-LABEL: store_monotonic_i8: +; LA32: # %bb.0: +; LA32-NEXT: st.b $a1, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: store_monotonic_i8: +; LA64: # %bb.0: +; LA64-NEXT: st.b $a1, $a0, 0 +; LA64-NEXT: ret + store atomic i8 %v, ptr %ptr monotonic, align 1 + ret void +} + +define void @store_monotonic_i16(ptr %ptr, i16 signext %v) { +; LA32-LABEL: store_monotonic_i16: +; LA32: # %bb.0: +; LA32-NEXT: st.h $a1, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: store_monotonic_i16: +; LA64: # %bb.0: +; LA64-NEXT: st.h $a1, $a0, 0 +; LA64-NEXT: ret + store atomic i16 %v, ptr %ptr monotonic, align 2 + ret void +} + +define void @store_monotonic_i32(ptr %ptr, i32 signext %v) { +; LA32-LABEL: store_monotonic_i32: +; LA32: # %bb.0: +; LA32-NEXT: st.w $a1, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: store_monotonic_i32: +; LA64: # %bb.0: +; LA64-NEXT: st.w $a1, $a0, 0 +; LA64-NEXT: ret + store atomic i32 %v, ptr %ptr monotonic, align 4 + ret void +} + +define void @store_monotonic_i64(ptr %ptr, i64 %v) { +; LA32-LABEL: store_monotonic_i64: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: .cfi_def_cfa_offset 16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: .cfi_offset 1, -4 +; LA32-NEXT: move $a3, $zero +; LA32-NEXT: bl %plt(__atomic_store_8) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: store_monotonic_i64: +; LA64: # %bb.0: +; LA64-NEXT: st.d $a1, $a0, 0 +; LA64-NEXT: ret + store atomic i64 %v, ptr %ptr monotonic, align 8 + ret void +} + +define void @store_seq_cst_i8(ptr %ptr, i8 signext %v) { +; LA32-LABEL: store_seq_cst_i8: +; LA32: # %bb.0: +; LA32-NEXT: dbar 0 +; LA32-NEXT: st.b $a1, $a0, 0 +; LA32-NEXT: dbar 0 +; LA32-NEXT: ret +; +; LA64-LABEL: store_seq_cst_i8: +; LA64: # %bb.0: +; LA64-NEXT: dbar 0 +; LA64-NEXT: st.b $a1, $a0, 0 +; LA64-NEXT: dbar 0 +; LA64-NEXT: ret + store atomic i8 %v, ptr %ptr seq_cst, align 1 + ret void +} + +define void @store_seq_cst_i16(ptr %ptr, i16 signext %v) { +; LA32-LABEL: store_seq_cst_i16: +; LA32: # %bb.0: +; LA32-NEXT: dbar 0 +; LA32-NEXT: st.h $a1, $a0, 0 +; LA32-NEXT: dbar 0 +; LA32-NEXT: ret +; +; LA64-LABEL: store_seq_cst_i16: +; LA64: # %bb.0: +; LA64-NEXT: dbar 0 +; LA64-NEXT: st.h $a1, $a0, 0 +; LA64-NEXT: dbar 0 +; LA64-NEXT: ret + store atomic i16 %v, ptr %ptr seq_cst, align 2 + ret void +} + +define void @store_seq_cst_i32(ptr %ptr, i32 signext %v) { +; LA32-LABEL: store_seq_cst_i32: +; LA32: # %bb.0: +; LA32-NEXT: dbar 0 +; LA32-NEXT: st.w $a1, $a0, 0 +; LA32-NEXT: dbar 0 +; LA32-NEXT: ret +; +; LA64-LABEL: store_seq_cst_i32: +; LA64: # %bb.0: +; LA64-NEXT: amswap_db.w $zero, $a1, $a0 +; LA64-NEXT: ret + store atomic i32 %v, ptr %ptr seq_cst, align 4 + ret void +} + +define void @store_seq_cst_i64(ptr %ptr, i64 %v) { +; LA32-LABEL: store_seq_cst_i64: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: .cfi_def_cfa_offset 16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: .cfi_offset 1, -4 +; LA32-NEXT: ori $a3, $zero, 5 +; LA32-NEXT: bl %plt(__atomic_store_8) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: store_seq_cst_i64: +; LA64: # %bb.0: +; LA64-NEXT: amswap_db.d $zero, $a1, $a0 +; LA64-NEXT: ret + store atomic i64 %v, ptr %ptr seq_cst, align 8 + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/load-store-fp.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/load-store-fp.ll new file mode 100644 index 0000000000000000000000000000000000000000..3f392c5e3f9c6d9c7d5e7f2916de9e8d4fc0d825 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/load-store-fp.ll @@ -0,0 +1,123 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+f,-d < %s | FileCheck %s --check-prefix=LA32F +; RUN: llc --mtriple=loongarch32 --mattr=+d < %s | FileCheck %s --check-prefix=LA32D +; RUN: llc --mtriple=loongarch64 --mattr=+f,-d < %s | FileCheck %s --check-prefix=LA64F +; RUN: llc --mtriple=loongarch64 --mattr=+d < %s | FileCheck %s --check-prefix=LA64D + +define float @fldx_s(ptr %a, i64 %idx) nounwind { +; LA32F-LABEL: fldx_s: +; LA32F: # %bb.0: +; LA32F-NEXT: slli.w $a1, $a1, 2 +; LA32F-NEXT: fldx.s $fa0, $a0, $a1 +; LA32F-NEXT: ret +; +; LA32D-LABEL: fldx_s: +; LA32D: # %bb.0: +; LA32D-NEXT: slli.w $a1, $a1, 2 +; LA32D-NEXT: fldx.s $fa0, $a0, $a1 +; LA32D-NEXT: ret +; +; LA64F-LABEL: fldx_s: +; LA64F: # %bb.0: +; LA64F-NEXT: slli.d $a1, $a1, 2 +; LA64F-NEXT: fldx.s $fa0, $a0, $a1 +; LA64F-NEXT: ret +; +; LA64D-LABEL: fldx_s: +; LA64D: # %bb.0: +; LA64D-NEXT: slli.d $a1, $a1, 2 +; LA64D-NEXT: fldx.s $fa0, $a0, $a1 +; LA64D-NEXT: ret + %1 = getelementptr float, ptr %a, i64 %idx + %2 = load float, ptr %1 + ret float %2 +} + +define double @fldx_d(ptr %a, i64 %idx) nounwind { +; LA32F-LABEL: fldx_d: +; LA32F: # %bb.0: +; LA32F-NEXT: alsl.w $a1, $a1, $a0, 3 +; LA32F-NEXT: ld.w $a0, $a1, 0 +; LA32F-NEXT: ld.w $a1, $a1, 4 +; LA32F-NEXT: ret +; +; LA32D-LABEL: fldx_d: +; LA32D: # %bb.0: +; LA32D-NEXT: slli.w $a1, $a1, 3 +; LA32D-NEXT: fldx.d $fa0, $a0, $a1 +; LA32D-NEXT: ret +; +; LA64F-LABEL: fldx_d: +; LA64F: # %bb.0: +; LA64F-NEXT: slli.d $a1, $a1, 3 +; LA64F-NEXT: ldx.d $a0, $a0, $a1 +; LA64F-NEXT: ret +; +; LA64D-LABEL: fldx_d: +; LA64D: # %bb.0: +; LA64D-NEXT: slli.d $a1, $a1, 3 +; LA64D-NEXT: fldx.d $fa0, $a0, $a1 +; LA64D-NEXT: ret + %1 = getelementptr double, ptr %a, i64 %idx + %2 = load double, ptr %1 + ret double %2 +} + +define void @fstx_s(ptr %dst, i64 %idx, float %val) nounwind { +; LA32F-LABEL: fstx_s: +; LA32F: # %bb.0: +; LA32F-NEXT: slli.w $a1, $a1, 2 +; LA32F-NEXT: fstx.s $fa0, $a0, $a1 +; LA32F-NEXT: ret +; +; LA32D-LABEL: fstx_s: +; LA32D: # %bb.0: +; LA32D-NEXT: slli.w $a1, $a1, 2 +; LA32D-NEXT: fstx.s $fa0, $a0, $a1 +; LA32D-NEXT: ret +; +; LA64F-LABEL: fstx_s: +; LA64F: # %bb.0: +; LA64F-NEXT: slli.d $a1, $a1, 2 +; LA64F-NEXT: fstx.s $fa0, $a0, $a1 +; LA64F-NEXT: ret +; +; LA64D-LABEL: fstx_s: +; LA64D: # %bb.0: +; LA64D-NEXT: slli.d $a1, $a1, 2 +; LA64D-NEXT: fstx.s $fa0, $a0, $a1 +; LA64D-NEXT: ret + %1 = getelementptr float, ptr %dst, i64 %idx + store float %val, ptr %1 + ret void +} + +define void @fstx_d(ptr %dst, i64 %idx, double %val) nounwind { +; LA32F-LABEL: fstx_d: +; LA32F: # %bb.0: +; LA32F-NEXT: alsl.w $a0, $a1, $a0, 3 +; LA32F-NEXT: st.w $a4, $a0, 4 +; LA32F-NEXT: st.w $a3, $a0, 0 +; LA32F-NEXT: ret +; +; LA32D-LABEL: fstx_d: +; LA32D: # %bb.0: +; LA32D-NEXT: slli.w $a1, $a1, 3 +; LA32D-NEXT: fstx.d $fa0, $a0, $a1 +; LA32D-NEXT: ret +; +; LA64F-LABEL: fstx_d: +; LA64F: # %bb.0: +; LA64F-NEXT: slli.d $a1, $a1, 3 +; LA64F-NEXT: stx.d $a2, $a0, $a1 +; LA64F-NEXT: ret +; +; LA64D-LABEL: fstx_d: +; LA64D: # %bb.0: +; LA64D-NEXT: slli.d $a1, $a1, 3 +; LA64D-NEXT: fstx.d $fa0, $a0, $a1 +; LA64D-NEXT: ret + %1 = getelementptr double, ptr %dst, i64 %idx + store double %val, ptr %1 + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/load-store.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/load-store.ll index 8894e3cac3fb3600d721cc74e17946412a4f3ebd..387a62bd6c00ff60a7e9fb1e2289df5a058bf3ef 100644 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/load-store.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/load-store.ll @@ -11,23 +11,23 @@ define i32 @load_store_global() nounwind { ; ALL-LABEL: load_store_global: ; ALL: # %bb.0: -; LA32NOPIC-NEXT: pcalau12i $a0, G -; LA32NOPIC-NEXT: addi.w $a1, $a0, G -; LA32PIC-NEXT: pcalau12i $a0, .LG$local -; LA32PIC-NEXT: addi.w $a1, $a0, .LG$local +; LA32NOPIC-NEXT: pcalau12i $a0, %pc_hi20(G) +; LA32NOPIC-NEXT: addi.w $a1, $a0, %pc_lo12(G) +; LA32PIC-NEXT: pcalau12i $a0, %pc_hi20(.LG$local) +; LA32PIC-NEXT: addi.w $a1, $a0, %pc_lo12(.LG$local) ; LA32-NEXT: ld.w $a0, $a1, 0 ; LA32-NEXT: addi.w $a0, $a0, 1 ; LA32-NEXT: st.w $a0, $a1, 0 -; LA64NOPIC-NEXT: pcalau12i $a0, G -; LA64NOPIC-NEXT: addi.d $a1, $a0, G -; LA64PIC-NEXT: pcalau12i $a0, .LG$local -; LA64PIC-NEXT: addi.d $a1, $a0, .LG$local +; LA64NOPIC-NEXT: pcalau12i $a0, %pc_hi20(G) +; LA64NOPIC-NEXT: addi.d $a1, $a0, %pc_lo12(G) +; LA64PIC-NEXT: pcalau12i $a0, %pc_hi20(.LG$local) +; LA64PIC-NEXT: addi.d $a1, $a0, %pc_lo12(.LG$local) ; LA64-NEXT: ld.w $a0, $a1, 0 ; LA64-NEXT: addi.d $a0, $a0, 1 ; LA64-NEXT: st.w $a0, $a1, 0 -; ALL-NEXT: jirl $zero, $ra, 0 +; ALL-NEXT: ret %v = load i32, ptr @G %sum = add i32 %v, 1 @@ -39,30 +39,30 @@ define i32 @load_store_global_array(i32 %a) nounwind { ; ALL-LABEL: load_store_global_array: ; ALL: # %bb.0: -; LA32NOPIC-NEXT: pcalau12i $a1, arr -; LA32NOPIC-NEXT: addi.w $a2, $a1, arr -; LA32PIC-NEXT: pcalau12i $a1, .Larr$local -; LA32PIC-NEXT: addi.w $a2, $a1, .Larr$local +; LA32NOPIC-NEXT: pcalau12i $a1, %pc_hi20(arr) +; LA32NOPIC-NEXT: addi.w $a2, $a1, %pc_lo12(arr) +; LA32PIC-NEXT: pcalau12i $a1, %pc_hi20(.Larr$local) +; LA32PIC-NEXT: addi.w $a2, $a1, %pc_lo12(.Larr$local) ; LA32-NEXT: ld.w $a1, $a2, 0 ; LA32-NEXT: st.w $a0, $a2, 0 -; LA32NOPIC-NEXT: ld.w $a3, $a2, 0 -; LA32NOPIC-NEXT: st.w $a0, $a2, 0 +; LA32NOPIC-NEXT: ld.w $a3, $a2, 36 +; LA32NOPIC-NEXT: st.w $a0, $a2, 36 ; LA32PIC-NEXT: ld.w $a3, $a2, 36 ; LA32PIC-NEXT: st.w $a0, $a2, 36 -; LA64NOPIC-NEXT: pcalau12i $a1, arr -; LA64NOPIC-NEXT: addi.d $a2, $a1, arr -; LA64PIC-NEXT: pcalau12i $a1, .Larr$local -; LA64PIC-NEXT: addi.d $a2, $a1, .Larr$local +; LA64NOPIC-NEXT: pcalau12i $a1, %pc_hi20(arr) +; LA64NOPIC-NEXT: addi.d $a2, $a1, %pc_lo12(arr) +; LA64PIC-NEXT: pcalau12i $a1, %pc_hi20(.Larr$local) +; LA64PIC-NEXT: addi.d $a2, $a1, %pc_lo12(.Larr$local) ; LA64-NEXT: ld.w $a1, $a2, 0 ; LA64-NEXT: st.w $a0, $a2, 0 -; LA64NOPIC-NEXT: ld.w $a3, $a2, 0 -; LA64NOPIC-NEXT: st.w $a0, $a2, 0 +; LA64NOPIC-NEXT: ld.w $a3, $a2, 36 +; LA64NOPIC-NEXT: st.w $a0, $a2, 36 ; LA64PIC-NEXT: ld.w $a3, $a2, 36 ; LA64PIC-NEXT: st.w $a0, $a2, 36 ; ALL-NEXT: move $a0, $a1 -; ALL-NEXT: jirl $zero, $ra, 0 +; ALL-NEXT: ret %1 = load volatile i32, ptr @arr, align 4 store i32 %a, ptr @arr, align 4 @@ -80,13 +80,13 @@ define i64 @ld_b(ptr %a) nounwind { ; LA32-NEXT: ld.b $a1, $a0, 0 ; LA32-NEXT: ld.b $a0, $a0, 1 ; LA32-NEXT: srai.w $a1, $a0, 31 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: ld_b: ; LA64: # %bb.0: ; LA64-NEXT: ld.b $a1, $a0, 0 ; LA64-NEXT: ld.b $a0, $a0, 1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %1 = getelementptr i8, ptr %a, i64 1 %2 = load i8, ptr %1 %3 = sext i8 %2 to i64 @@ -100,13 +100,13 @@ define i64 @ld_h(ptr %a) nounwind { ; LA32-NEXT: ld.h $a1, $a0, 0 ; LA32-NEXT: ld.h $a0, $a0, 4 ; LA32-NEXT: srai.w $a1, $a0, 31 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: ld_h: ; LA64: # %bb.0: ; LA64-NEXT: ld.h $a1, $a0, 0 ; LA64-NEXT: ld.h $a0, $a0, 4 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %1 = getelementptr i16, ptr %a, i64 2 %2 = load i16, ptr %1 %3 = sext i16 %2 to i64 @@ -120,13 +120,13 @@ define i64 @ld_w(ptr %a) nounwind { ; LA32-NEXT: ld.w $a1, $a0, 0 ; LA32-NEXT: ld.w $a0, $a0, 12 ; LA32-NEXT: srai.w $a1, $a0, 31 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: ld_w: ; LA64: # %bb.0: ; LA64-NEXT: ld.w $a1, $a0, 0 ; LA64-NEXT: ld.w $a0, $a0, 12 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %1 = getelementptr i32, ptr %a, i64 3 %2 = load i32, ptr %1 %3 = sext i32 %2 to i64 @@ -141,13 +141,13 @@ define i64 @ld_d(ptr %a) nounwind { ; LA32-NEXT: ld.w $a1, $a0, 0 ; LA32-NEXT: ld.w $a1, $a0, 28 ; LA32-NEXT: ld.w $a0, $a0, 24 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: ld_d: ; LA64: # %bb.0: ; LA64-NEXT: ld.d $a1, $a0, 0 ; LA64-NEXT: ld.d $a0, $a0, 24 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %1 = getelementptr i64, ptr %a, i64 3 %2 = load i64, ptr %1 %3 = load volatile i64, ptr %a @@ -161,14 +161,14 @@ define i64 @ld_bu(ptr %a) nounwind { ; LA32-NEXT: ld.bu $a2, $a0, 4 ; LA32-NEXT: add.w $a0, $a2, $a1 ; LA32-NEXT: sltu $a1, $a0, $a2 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: ld_bu: ; LA64: # %bb.0: ; LA64-NEXT: ld.bu $a1, $a0, 0 ; LA64-NEXT: ld.bu $a0, $a0, 4 ; LA64-NEXT: add.d $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %1 = getelementptr i8, ptr %a, i64 4 %2 = load i8, ptr %1 %3 = zext i8 %2 to i64 @@ -185,14 +185,14 @@ define i64 @ld_hu(ptr %a) nounwind { ; LA32-NEXT: ld.hu $a2, $a0, 10 ; LA32-NEXT: add.w $a0, $a2, $a1 ; LA32-NEXT: sltu $a1, $a0, $a2 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: ld_hu: ; LA64: # %bb.0: ; LA64-NEXT: ld.hu $a1, $a0, 0 ; LA64-NEXT: ld.hu $a0, $a0, 10 ; LA64-NEXT: add.d $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %1 = getelementptr i16, ptr %a, i64 5 %2 = load i16, ptr %1 %3 = zext i16 %2 to i64 @@ -209,14 +209,14 @@ define i64 @ld_wu(ptr %a) nounwind { ; LA32-NEXT: ld.w $a2, $a0, 20 ; LA32-NEXT: add.w $a0, $a2, $a1 ; LA32-NEXT: sltu $a1, $a0, $a2 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: ld_wu: ; LA64: # %bb.0: ; LA64-NEXT: ld.wu $a1, $a0, 0 ; LA64-NEXT: ld.wu $a0, $a0, 20 ; LA64-NEXT: add.d $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %1 = getelementptr i32, ptr %a, i64 5 %2 = load i32, ptr %1 %3 = zext i32 %2 to i64 @@ -226,6 +226,178 @@ define i64 @ld_wu(ptr %a) nounwind { ret i64 %6 } +define i64 @ldx_b(ptr %a, i64 %idx) nounwind { +; LA32-LABEL: ldx_b: +; LA32: # %bb.0: +; LA32-NEXT: add.w $a1, $a0, $a1 +; LA32-NEXT: ld.b $a2, $a1, 0 +; LA32-NEXT: ld.b $a0, $a0, 0 +; LA32-NEXT: srai.w $a1, $a2, 31 +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: ldx_b: +; LA64: # %bb.0: +; LA64-NEXT: ldx.b $a1, $a0, $a1 +; LA64-NEXT: ld.b $a0, $a0, 0 +; LA64-NEXT: move $a0, $a1 +; LA64-NEXT: ret + %1 = getelementptr i8, ptr %a, i64 %idx + %2 = load i8, ptr %1 + %3 = sext i8 %2 to i64 + %4 = load volatile i8, ptr %a + ret i64 %3 +} + +define i64 @ldx_h(ptr %a, i64 %idx) nounwind { +; LA32-LABEL: ldx_h: +; LA32: # %bb.0: +; LA32-NEXT: alsl.w $a1, $a1, $a0, 1 +; LA32-NEXT: ld.h $a2, $a1, 0 +; LA32-NEXT: ld.h $a0, $a0, 0 +; LA32-NEXT: srai.w $a1, $a2, 31 +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: ldx_h: +; LA64: # %bb.0: +; LA64-NEXT: slli.d $a1, $a1, 1 +; LA64-NEXT: ldx.h $a1, $a0, $a1 +; LA64-NEXT: ld.h $a0, $a0, 0 +; LA64-NEXT: move $a0, $a1 +; LA64-NEXT: ret + %1 = getelementptr i16, ptr %a, i64 %idx + %2 = load i16, ptr %1 + %3 = sext i16 %2 to i64 + %4 = load volatile i16, ptr %a + ret i64 %3 +} + +define i64 @ldx_w(ptr %a, i64 %idx) nounwind { +; LA32-LABEL: ldx_w: +; LA32: # %bb.0: +; LA32-NEXT: alsl.w $a1, $a1, $a0, 2 +; LA32-NEXT: ld.w $a2, $a1, 0 +; LA32-NEXT: ld.w $a0, $a0, 0 +; LA32-NEXT: srai.w $a1, $a2, 31 +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: ldx_w: +; LA64: # %bb.0: +; LA64-NEXT: slli.d $a1, $a1, 2 +; LA64-NEXT: ldx.w $a1, $a0, $a1 +; LA64-NEXT: ld.w $a0, $a0, 0 +; LA64-NEXT: move $a0, $a1 +; LA64-NEXT: ret + %1 = getelementptr i32, ptr %a, i64 %idx + %2 = load i32, ptr %1 + %3 = sext i32 %2 to i64 + %4 = load volatile i32, ptr %a + ret i64 %3 +} + +define i64 @ldx_d(ptr %a, i64 %idx) nounwind { +; LA32-LABEL: ldx_d: +; LA32: # %bb.0: +; LA32-NEXT: alsl.w $a1, $a1, $a0, 3 +; LA32-NEXT: ld.w $a2, $a1, 0 +; LA32-NEXT: ld.w $a3, $a0, 0 +; LA32-NEXT: ld.w $a1, $a1, 4 +; LA32-NEXT: ld.w $a0, $a0, 4 +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: ldx_d: +; LA64: # %bb.0: +; LA64-NEXT: slli.d $a1, $a1, 3 +; LA64-NEXT: ldx.d $a1, $a0, $a1 +; LA64-NEXT: ld.d $a0, $a0, 0 +; LA64-NEXT: move $a0, $a1 +; LA64-NEXT: ret + %1 = getelementptr i64, ptr %a, i64 %idx + %2 = load i64, ptr %1 + %3 = load volatile i64, ptr %a + ret i64 %2 +} + +define i64 @ldx_bu(ptr %a, i64 %idx) nounwind { +; LA32-LABEL: ldx_bu: +; LA32: # %bb.0: +; LA32-NEXT: add.w $a1, $a0, $a1 +; LA32-NEXT: ld.bu $a1, $a1, 0 +; LA32-NEXT: ld.bu $a0, $a0, 0 +; LA32-NEXT: add.w $a0, $a1, $a0 +; LA32-NEXT: sltu $a1, $a0, $a1 +; LA32-NEXT: ret +; +; LA64-LABEL: ldx_bu: +; LA64: # %bb.0: +; LA64-NEXT: ldx.bu $a1, $a0, $a1 +; LA64-NEXT: ld.bu $a0, $a0, 0 +; LA64-NEXT: add.d $a0, $a1, $a0 +; LA64-NEXT: ret + %1 = getelementptr i8, ptr %a, i64 %idx + %2 = load i8, ptr %1 + %3 = zext i8 %2 to i64 + %4 = load volatile i8, ptr %a + %5 = zext i8 %4 to i64 + %6 = add i64 %3, %5 + ret i64 %6 +} + +define i64 @ldx_hu(ptr %a, i64 %idx) nounwind { +; LA32-LABEL: ldx_hu: +; LA32: # %bb.0: +; LA32-NEXT: alsl.w $a1, $a1, $a0, 1 +; LA32-NEXT: ld.hu $a1, $a1, 0 +; LA32-NEXT: ld.hu $a0, $a0, 0 +; LA32-NEXT: add.w $a0, $a1, $a0 +; LA32-NEXT: sltu $a1, $a0, $a1 +; LA32-NEXT: ret +; +; LA64-LABEL: ldx_hu: +; LA64: # %bb.0: +; LA64-NEXT: slli.d $a1, $a1, 1 +; LA64-NEXT: ldx.hu $a1, $a0, $a1 +; LA64-NEXT: ld.hu $a0, $a0, 0 +; LA64-NEXT: add.d $a0, $a1, $a0 +; LA64-NEXT: ret + %1 = getelementptr i16, ptr %a, i64 %idx + %2 = load i16, ptr %1 + %3 = zext i16 %2 to i64 + %4 = load volatile i16, ptr %a + %5 = zext i16 %4 to i64 + %6 = add i64 %3, %5 + ret i64 %6 +} + +define i64 @ldx_wu(ptr %a, i64 %idx) nounwind { +; LA32-LABEL: ldx_wu: +; LA32: # %bb.0: +; LA32-NEXT: alsl.w $a1, $a1, $a0, 2 +; LA32-NEXT: ld.w $a1, $a1, 0 +; LA32-NEXT: ld.w $a0, $a0, 0 +; LA32-NEXT: add.w $a0, $a1, $a0 +; LA32-NEXT: sltu $a1, $a0, $a1 +; LA32-NEXT: ret +; +; LA64-LABEL: ldx_wu: +; LA64: # %bb.0: +; LA64-NEXT: slli.d $a1, $a1, 2 +; LA64-NEXT: ldx.wu $a1, $a0, $a1 +; LA64-NEXT: ld.wu $a0, $a0, 0 +; LA64-NEXT: add.d $a0, $a1, $a0 +; LA64-NEXT: ret + %1 = getelementptr i32, ptr %a, i64 %idx + %2 = load i32, ptr %1 + %3 = zext i32 %2 to i64 + %4 = load volatile i32, ptr %a + %5 = zext i32 %4 to i64 + %6 = add i64 %3, %5 + ret i64 %6 +} + ;; Check indexed and unindexed stores. define void @st_b(ptr %a, i8 %b) nounwind { @@ -233,7 +405,7 @@ define void @st_b(ptr %a, i8 %b) nounwind { ; ALL: # %bb.0: ; ALL-NEXT: st.b $a1, $a0, 6 ; ALL-NEXT: st.b $a1, $a0, 0 -; ALL-NEXT: jirl $zero, $ra, 0 +; ALL-NEXT: ret store i8 %b, ptr %a %1 = getelementptr i8, ptr %a, i64 6 store i8 %b, ptr %1 @@ -245,7 +417,7 @@ define void @st_h(ptr %a, i16 %b) nounwind { ; ALL: # %bb.0: ; ALL-NEXT: st.h $a1, $a0, 14 ; ALL-NEXT: st.h $a1, $a0, 0 -; ALL-NEXT: jirl $zero, $ra, 0 +; ALL-NEXT: ret store i16 %b, ptr %a %1 = getelementptr i16, ptr %a, i64 7 store i16 %b, ptr %1 @@ -257,7 +429,7 @@ define void @st_w(ptr %a, i32 %b) nounwind { ; ALL: # %bb.0: ; ALL-NEXT: st.w $a1, $a0, 28 ; ALL-NEXT: st.w $a1, $a0, 0 -; ALL-NEXT: jirl $zero, $ra, 0 +; ALL-NEXT: ret store i32 %b, ptr %a %1 = getelementptr i32, ptr %a, i64 7 store i32 %b, ptr %1 @@ -271,19 +443,87 @@ define void @st_d(ptr %a, i64 %b) nounwind { ; LA32-NEXT: st.w $a2, $a0, 4 ; LA32-NEXT: st.w $a1, $a0, 64 ; LA32-NEXT: st.w $a1, $a0, 0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: st_d: ; LA64: # %bb.0: ; LA64-NEXT: st.d $a1, $a0, 64 ; LA64-NEXT: st.d $a1, $a0, 0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret store i64 %b, ptr %a %1 = getelementptr i64, ptr %a, i64 8 store i64 %b, ptr %1 ret void } +define void @stx_b(ptr %dst, i64 %idx, i8 %val) nounwind { +; LA32-LABEL: stx_b: +; LA32: # %bb.0: +; LA32-NEXT: add.w $a0, $a0, $a1 +; LA32-NEXT: st.b $a3, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: stx_b: +; LA64: # %bb.0: +; LA64-NEXT: stx.b $a2, $a0, $a1 +; LA64-NEXT: ret + %1 = getelementptr i8, ptr %dst, i64 %idx + store i8 %val, ptr %1 + ret void +} + +define void @stx_h(ptr %dst, i64 %idx, i16 %val) nounwind { +; LA32-LABEL: stx_h: +; LA32: # %bb.0: +; LA32-NEXT: alsl.w $a0, $a1, $a0, 1 +; LA32-NEXT: st.h $a3, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: stx_h: +; LA64: # %bb.0: +; LA64-NEXT: slli.d $a1, $a1, 1 +; LA64-NEXT: stx.h $a2, $a0, $a1 +; LA64-NEXT: ret + %1 = getelementptr i16, ptr %dst, i64 %idx + store i16 %val, ptr %1 + ret void +} + +define void @stx_w(ptr %dst, i64 %idx, i32 %val) nounwind { +; LA32-LABEL: stx_w: +; LA32: # %bb.0: +; LA32-NEXT: alsl.w $a0, $a1, $a0, 2 +; LA32-NEXT: st.w $a3, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: stx_w: +; LA64: # %bb.0: +; LA64-NEXT: slli.d $a1, $a1, 2 +; LA64-NEXT: stx.w $a2, $a0, $a1 +; LA64-NEXT: ret + %1 = getelementptr i32, ptr %dst, i64 %idx + store i32 %val, ptr %1 + ret void +} + +define void @stx_d(ptr %dst, i64 %idx, i64 %val) nounwind { +; LA32-LABEL: stx_d: +; LA32: # %bb.0: +; LA32-NEXT: alsl.w $a0, $a1, $a0, 3 +; LA32-NEXT: st.w $a4, $a0, 4 +; LA32-NEXT: st.w $a3, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: stx_d: +; LA64: # %bb.0: +; LA64-NEXT: slli.d $a1, $a1, 3 +; LA64-NEXT: stx.d $a2, $a0, $a1 +; LA64-NEXT: ret + %1 = getelementptr i64, ptr %dst, i64 %idx + store i64 %val, ptr %1 + ret void +} + ;; Check load from and store to an i1 location. define i64 @load_sext_zext_anyext_i1(ptr %a) nounwind { ;; sextload i1 @@ -295,7 +535,7 @@ define i64 @load_sext_zext_anyext_i1(ptr %a) nounwind { ; LA32-NEXT: sub.w $a0, $a2, $a1 ; LA32-NEXT: sltu $a1, $a2, $a1 ; LA32-NEXT: sub.w $a1, $zero, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: load_sext_zext_anyext_i1: ; LA64: # %bb.0: @@ -303,7 +543,7 @@ define i64 @load_sext_zext_anyext_i1(ptr %a) nounwind { ; LA64-NEXT: ld.bu $a1, $a0, 1 ; LA64-NEXT: ld.bu $a0, $a0, 2 ; LA64-NEXT: sub.d $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %1 = getelementptr i1, ptr %a, i64 1 %2 = load i1, ptr %1 %3 = sext i1 %2 to i64 @@ -325,7 +565,7 @@ define i16 @load_sext_zext_anyext_i1_i16(ptr %a) nounwind { ; LA32-NEXT: ld.bu $a1, $a0, 1 ; LA32-NEXT: ld.bu $a0, $a0, 2 ; LA32-NEXT: sub.w $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: load_sext_zext_anyext_i1_i16: ; LA64: # %bb.0: @@ -333,7 +573,7 @@ define i16 @load_sext_zext_anyext_i1_i16(ptr %a) nounwind { ; LA64-NEXT: ld.bu $a1, $a0, 1 ; LA64-NEXT: ld.bu $a0, $a0, 2 ; LA64-NEXT: sub.d $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %1 = getelementptr i1, ptr %a, i64 1 %2 = load i1, ptr %1 %3 = sext i1 %2 to i16 @@ -359,7 +599,7 @@ define i64 @ld_sd_constant(i64 %a) nounwind { ; LA32-NEXT: st.w $a1, $a0, 0 ; LA32-NEXT: move $a0, $a2 ; LA32-NEXT: move $a1, $a3 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: ld_sd_constant: ; LA64: # %bb.0: @@ -370,7 +610,7 @@ define i64 @ld_sd_constant(i64 %a) nounwind { ; LA64-NEXT: ld.d $a1, $a2, 0 ; LA64-NEXT: st.d $a0, $a2, 0 ; LA64-NEXT: move $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %1 = inttoptr i64 16045690984833335023 to ptr %2 = load volatile i64, ptr %1 store i64 %a, ptr %1 @@ -384,7 +624,7 @@ define float @load_store_float(ptr %a, float %b) nounwind { ; ALL-NEXT: fld.s $fa1, $a0, 4 ; ALL-NEXT: fst.s $fa0, $a0, 4 ; ALL-NEXT: fmov.s $fa0, $fa1 -; ALL-NEXT: jirl $zero, $ra, 0 +; ALL-NEXT: ret %1 = getelementptr float, ptr %a, i64 1 %2 = load float, ptr %1 store float %b, ptr %1 @@ -398,7 +638,7 @@ define double @load_store_double(ptr %a, double %b) nounwind { ; ALL-NEXT: fld.d $fa1, $a0, 8 ; ALL-NEXT: fst.d $fa0, $a0, 8 ; ALL-NEXT: fmov.d $fa0, $fa1 -; ALL-NEXT: jirl $zero, $ra, 0 +; ALL-NEXT: ret %1 = getelementptr double, ptr %a, i64 1 %2 = load double, ptr %1 store double %b, ptr %1 diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/lshr.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/lshr.ll index 2f63c64de8184817354fb0a52fbb1a15f7c4717b..3916298e298f45ef87c3a91e55fec7c3a0cf9d1b 100644 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/lshr.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/lshr.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefix=LA32 ; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64 @@ -6,11 +7,11 @@ define i1 @lshr_i1(i1 %x, i1 %y) { ; LA32-LABEL: lshr_i1: ; LA32: # %bb.0: -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: lshr_i1: ; LA64: # %bb.0: -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %lshr = lshr i1 %x, %y ret i1 %lshr } @@ -20,13 +21,13 @@ define i8 @lshr_i8(i8 %x, i8 %y) { ; LA32: # %bb.0: ; LA32-NEXT: andi $a0, $a0, 255 ; LA32-NEXT: srl.w $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: lshr_i8: ; LA64: # %bb.0: ; LA64-NEXT: andi $a0, $a0, 255 ; LA64-NEXT: srl.d $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %lshr = lshr i8 %x, %y ret i8 %lshr } @@ -36,13 +37,13 @@ define i16 @lshr_i16(i16 %x, i16 %y) { ; LA32: # %bb.0: ; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 ; LA32-NEXT: srl.w $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: lshr_i16: ; LA64: # %bb.0: ; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 ; LA64-NEXT: srl.d $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %lshr = lshr i16 %x, %y ret i16 %lshr } @@ -51,12 +52,12 @@ define i32 @lshr_i32(i32 %x, i32 %y) { ; LA32-LABEL: lshr_i32: ; LA32: # %bb.0: ; LA32-NEXT: srl.w $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: lshr_i32: ; LA64: # %bb.0: ; LA64-NEXT: srl.w $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %lshr = lshr i32 %x, %y ret i32 %lshr } @@ -78,12 +79,12 @@ define i64 @lshr_i64(i64 %x, i64 %y) { ; LA32-NEXT: srl.w $a1, $a1, $a2 ; LA32-NEXT: srai.w $a2, $a3, 31 ; LA32-NEXT: and $a1, $a2, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: lshr_i64: ; LA64: # %bb.0: ; LA64-NEXT: srl.d $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %lshr = lshr i64 %x, %y ret i64 %lshr } @@ -91,11 +92,11 @@ define i64 @lshr_i64(i64 %x, i64 %y) { define i1 @lshr_i1_3(i1 %x) { ; LA32-LABEL: lshr_i1_3: ; LA32: # %bb.0: -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: lshr_i1_3: ; LA64: # %bb.0: -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %lshr = lshr i1 %x, 3 ret i1 %lshr } @@ -104,12 +105,12 @@ define i8 @lshr_i8_3(i8 %x) { ; LA32-LABEL: lshr_i8_3: ; LA32: # %bb.0: ; LA32-NEXT: bstrpick.w $a0, $a0, 7, 3 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: lshr_i8_3: ; LA64: # %bb.0: ; LA64-NEXT: bstrpick.d $a0, $a0, 7, 3 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %lshr = lshr i8 %x, 3 ret i8 %lshr } @@ -118,12 +119,12 @@ define i16 @lshr_i16_3(i16 %x) { ; LA32-LABEL: lshr_i16_3: ; LA32: # %bb.0: ; LA32-NEXT: bstrpick.w $a0, $a0, 15, 3 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: lshr_i16_3: ; LA64: # %bb.0: ; LA64-NEXT: bstrpick.d $a0, $a0, 15, 3 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %lshr = lshr i16 %x, 3 ret i16 %lshr } @@ -132,12 +133,12 @@ define i32 @lshr_i32_3(i32 %x) { ; LA32-LABEL: lshr_i32_3: ; LA32: # %bb.0: ; LA32-NEXT: srli.w $a0, $a0, 3 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: lshr_i32_3: ; LA64: # %bb.0: ; LA64-NEXT: bstrpick.d $a0, $a0, 31, 3 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %lshr = lshr i32 %x, 3 ret i32 %lshr } @@ -149,12 +150,12 @@ define i64 @lshr_i64_3(i64 %x) { ; LA32-NEXT: slli.w $a2, $a1, 29 ; LA32-NEXT: or $a0, $a0, $a2 ; LA32-NEXT: srli.w $a1, $a1, 3 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: lshr_i64_3: ; LA64: # %bb.0: ; LA64-NEXT: srli.d $a0, $a0, 3 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %lshr = lshr i64 %x, 3 ret i64 %lshr } diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/mul.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/mul.ll index 0d31e790cf729c2e93d43992e36dff36a2585124..883f548c795860a0ea043193d52b2db4a89397f5 100644 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/mul.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/mul.ll @@ -8,12 +8,12 @@ define i1 @mul_i1(i1 %a, i1 %b) { ; LA32-LABEL: mul_i1: ; LA32: # %bb.0: # %entry ; LA32-NEXT: mul.w $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: mul_i1: ; LA64: # %bb.0: # %entry ; LA64-NEXT: mul.d $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret entry: %r = mul i1 %a, %b ret i1 %r @@ -23,12 +23,12 @@ define i8 @mul_i8(i8 %a, i8 %b) { ; LA32-LABEL: mul_i8: ; LA32: # %bb.0: # %entry ; LA32-NEXT: mul.w $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: mul_i8: ; LA64: # %bb.0: # %entry ; LA64-NEXT: mul.d $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret entry: %r = mul i8 %a, %b ret i8 %r @@ -38,12 +38,12 @@ define i16 @mul_i16(i16 %a, i16 %b) { ; LA32-LABEL: mul_i16: ; LA32: # %bb.0: # %entry ; LA32-NEXT: mul.w $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: mul_i16: ; LA64: # %bb.0: # %entry ; LA64-NEXT: mul.d $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret entry: %r = mul i16 %a, %b ret i16 %r @@ -53,12 +53,12 @@ define i32 @mul_i32(i32 %a, i32 %b) { ; LA32-LABEL: mul_i32: ; LA32: # %bb.0: # %entry ; LA32-NEXT: mul.w $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: mul_i32: ; LA64: # %bb.0: # %entry ; LA64-NEXT: mul.d $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret entry: %r = mul i32 %a, %b ret i32 %r @@ -73,12 +73,12 @@ define i64 @mul_i64(i64 %a, i64 %b) { ; LA32-NEXT: mul.w $a1, $a1, $a2 ; LA32-NEXT: add.w $a1, $a3, $a1 ; LA32-NEXT: mul.w $a0, $a0, $a2 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: mul_i64: ; LA64: # %bb.0: # %entry ; LA64-NEXT: mul.d $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret entry: %r = mul i64 %a, %b ret i64 %r @@ -91,12 +91,12 @@ define i64 @mul_pow2(i64 %a) { ; LA32-NEXT: srli.w $a2, $a0, 29 ; LA32-NEXT: or $a1, $a1, $a2 ; LA32-NEXT: slli.w $a0, $a0, 3 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: mul_pow2: ; LA64: # %bb.0: ; LA64-NEXT: slli.d $a0, $a0, 3 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %1 = mul i64 %a, 8 ret i64 %1 } @@ -105,17 +105,16 @@ define i64 @mul_p5(i64 %a) { ; LA32-LABEL: mul_p5: ; LA32: # %bb.0: ; LA32-NEXT: ori $a2, $zero, 5 -; LA32-NEXT: mul.w $a1, $a1, $a2 -; LA32-NEXT: mulh.wu $a3, $a0, $a2 -; LA32-NEXT: add.w $a1, $a3, $a1 -; LA32-NEXT: mul.w $a0, $a0, $a2 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: mulh.wu $a2, $a0, $a2 +; LA32-NEXT: alsl.w $a1, $a1, $a1, 2 +; LA32-NEXT: add.w $a1, $a2, $a1 +; LA32-NEXT: alsl.w $a0, $a0, $a0, 2 +; LA32-NEXT: ret ; ; LA64-LABEL: mul_p5: ; LA64: # %bb.0: -; LA64-NEXT: ori $a1, $zero, 5 -; LA64-NEXT: mul.d $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: alsl.d $a0, $a0, $a0, 2 +; LA64-NEXT: ret %1 = mul i64 %a, 5 ret i64 %1 } @@ -124,13 +123,13 @@ define i32 @mulh_w(i32 %a, i32 %b) { ; LA32-LABEL: mulh_w: ; LA32: # %bb.0: ; LA32-NEXT: mulh.w $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: mulh_w: ; LA64: # %bb.0: ; LA64-NEXT: mulw.d.w $a0, $a0, $a1 ; LA64-NEXT: srli.d $a0, $a0, 32 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %1 = sext i32 %a to i64 %2 = sext i32 %b to i64 %3 = mul i64 %1, %2 @@ -143,13 +142,13 @@ define i32 @mulh_wu(i32 %a, i32 %b) { ; LA32-LABEL: mulh_wu: ; LA32: # %bb.0: ; LA32-NEXT: mulh.wu $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: mulh_wu: ; LA64: # %bb.0: ; LA64-NEXT: mulw.d.wu $a0, $a0, $a1 ; LA64-NEXT: srli.d $a0, $a0, 32 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %1 = zext i32 %a to i64 %2 = zext i32 %b to i64 %3 = mul i64 %1, %2 @@ -200,12 +199,12 @@ define i64 @mulh_d(i64 %a, i64 %b) { ; LA32-NEXT: add.w $a0, $a4, $a2 ; LA32-NEXT: sltu $a2, $a0, $a4 ; LA32-NEXT: add.w $a1, $a1, $a2 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: mulh_d: ; LA64: # %bb.0: ; LA64-NEXT: mulh.d $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %1 = sext i64 %a to i128 %2 = sext i64 %b to i128 %3 = mul i128 %1, %2 @@ -236,12 +235,12 @@ define i64 @mulh_du(i64 %a, i64 %b) { ; LA32-NEXT: add.w $a0, $a4, $a0 ; LA32-NEXT: sltu $a2, $a0, $a4 ; LA32-NEXT: add.w $a1, $a1, $a2 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: mulh_du: ; LA64: # %bb.0: ; LA64-NEXT: mulh.du $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %1 = zext i64 %a to i128 %2 = zext i64 %b to i128 %3 = mul i128 %1, %2 @@ -256,12 +255,12 @@ define i64 @mulw_d_w(i32 %a, i32 %b) { ; LA32-NEXT: mul.w $a2, $a0, $a1 ; LA32-NEXT: mulh.w $a1, $a0, $a1 ; LA32-NEXT: move $a0, $a2 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: mulw_d_w: ; LA64: # %bb.0: ; LA64-NEXT: mulw.d.w $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %1 = sext i32 %a to i64 %2 = sext i32 %b to i64 %3 = mul i64 %1, %2 @@ -274,12 +273,12 @@ define i64 @mulw_d_wu(i32 %a, i32 %b) { ; LA32-NEXT: mul.w $a2, $a0, $a1 ; LA32-NEXT: mulh.wu $a1, $a0, $a1 ; LA32-NEXT: move $a0, $a2 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: mulw_d_wu: ; LA64: # %bb.0: ; LA64-NEXT: mulw.d.wu $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %1 = zext i32 %a to i64 %2 = zext i32 %b to i64 %3 = mul i64 %1, %2 diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/or.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/or.ll index 37006573244b51a4cc85030ee219b7b6c60ac851..ead72507d751a6114ac869eb52157704f51bbba4 100644 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/or.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/or.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefix=LA32 ; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64 @@ -7,12 +8,12 @@ define i1 @or_i1(i1 %a, i1 %b) { ; LA32-LABEL: or_i1: ; LA32: # %bb.0: # %entry ; LA32-NEXT: or $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: or_i1: ; LA64: # %bb.0: # %entry ; LA64-NEXT: or $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret entry: %r = or i1 %a, %b ret i1 %r @@ -22,12 +23,12 @@ define i8 @or_i8(i8 %a, i8 %b) { ; LA32-LABEL: or_i8: ; LA32: # %bb.0: # %entry ; LA32-NEXT: or $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: or_i8: ; LA64: # %bb.0: # %entry ; LA64-NEXT: or $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret entry: %r = or i8 %a, %b ret i8 %r @@ -37,12 +38,12 @@ define i16 @or_i16(i16 %a, i16 %b) { ; LA32-LABEL: or_i16: ; LA32: # %bb.0: # %entry ; LA32-NEXT: or $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: or_i16: ; LA64: # %bb.0: # %entry ; LA64-NEXT: or $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret entry: %r = or i16 %a, %b ret i16 %r @@ -52,12 +53,12 @@ define i32 @or_i32(i32 %a, i32 %b) { ; LA32-LABEL: or_i32: ; LA32: # %bb.0: # %entry ; LA32-NEXT: or $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: or_i32: ; LA64: # %bb.0: # %entry ; LA64-NEXT: or $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret entry: %r = or i32 %a, %b ret i32 %r @@ -68,12 +69,12 @@ define i64 @or_i64(i64 %a, i64 %b) { ; LA32: # %bb.0: # %entry ; LA32-NEXT: or $a0, $a0, $a2 ; LA32-NEXT: or $a1, $a1, $a3 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: or_i64: ; LA64: # %bb.0: # %entry ; LA64-NEXT: or $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret entry: %r = or i64 %a, %b ret i64 %r @@ -82,11 +83,11 @@ entry: define i1 @or_i1_0(i1 %b) { ; LA32-LABEL: or_i1_0: ; LA32: # %bb.0: # %entry -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: or_i1_0: ; LA64: # %bb.0: # %entry -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret entry: %r = or i1 4, %b ret i1 %r @@ -96,12 +97,12 @@ define i1 @or_i1_5(i1 %b) { ; LA32-LABEL: or_i1_5: ; LA32: # %bb.0: # %entry ; LA32-NEXT: ori $a0, $zero, 1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: or_i1_5: ; LA64: # %bb.0: # %entry ; LA64-NEXT: ori $a0, $zero, 1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret entry: %r = or i1 5, %b ret i1 %r @@ -111,12 +112,12 @@ define i8 @or_i8_5(i8 %b) { ; LA32-LABEL: or_i8_5: ; LA32: # %bb.0: # %entry ; LA32-NEXT: ori $a0, $a0, 5 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: or_i8_5: ; LA64: # %bb.0: # %entry ; LA64-NEXT: ori $a0, $a0, 5 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret entry: %r = or i8 5, %b ret i8 %r @@ -126,12 +127,12 @@ define i8 @or_i8_257(i8 %b) { ; LA32-LABEL: or_i8_257: ; LA32: # %bb.0: # %entry ; LA32-NEXT: ori $a0, $a0, 1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: or_i8_257: ; LA64: # %bb.0: # %entry ; LA64-NEXT: ori $a0, $a0, 1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret entry: %r = or i8 257, %b ret i8 %r @@ -141,12 +142,12 @@ define i16 @or_i16_5(i16 %b) { ; LA32-LABEL: or_i16_5: ; LA32: # %bb.0: # %entry ; LA32-NEXT: ori $a0, $a0, 5 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: or_i16_5: ; LA64: # %bb.0: # %entry ; LA64-NEXT: ori $a0, $a0, 5 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret entry: %r = or i16 5, %b ret i16 %r @@ -157,13 +158,13 @@ define i16 @or_i16_0x1000(i16 %b) { ; LA32: # %bb.0: # %entry ; LA32-NEXT: lu12i.w $a1, 1 ; LA32-NEXT: or $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: or_i16_0x1000: ; LA64: # %bb.0: # %entry ; LA64-NEXT: lu12i.w $a1, 1 ; LA64-NEXT: or $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret entry: %r = or i16 4096, %b ret i16 %r @@ -173,12 +174,12 @@ define i16 @or_i16_0x10001(i16 %b) { ; LA32-LABEL: or_i16_0x10001: ; LA32: # %bb.0: # %entry ; LA32-NEXT: ori $a0, $a0, 1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: or_i16_0x10001: ; LA64: # %bb.0: # %entry ; LA64-NEXT: ori $a0, $a0, 1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret entry: %r = or i16 65537, %b ret i16 %r @@ -188,12 +189,12 @@ define i32 @or_i32_5(i32 %b) { ; LA32-LABEL: or_i32_5: ; LA32: # %bb.0: # %entry ; LA32-NEXT: ori $a0, $a0, 5 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: or_i32_5: ; LA64: # %bb.0: # %entry ; LA64-NEXT: ori $a0, $a0, 5 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret entry: %r = or i32 5, %b ret i32 %r @@ -204,13 +205,13 @@ define i32 @or_i32_0x1000(i32 %b) { ; LA32: # %bb.0: # %entry ; LA32-NEXT: lu12i.w $a1, 1 ; LA32-NEXT: or $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: or_i32_0x1000: ; LA64: # %bb.0: # %entry ; LA64-NEXT: lu12i.w $a1, 1 ; LA64-NEXT: or $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret entry: %r = or i32 4096, %b ret i32 %r @@ -220,12 +221,12 @@ define i32 @or_i32_0x100000001(i32 %b) { ; LA32-LABEL: or_i32_0x100000001: ; LA32: # %bb.0: # %entry ; LA32-NEXT: ori $a0, $a0, 1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: or_i32_0x100000001: ; LA64: # %bb.0: # %entry ; LA64-NEXT: ori $a0, $a0, 1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret entry: %r = or i32 4294967297, %b ret i32 %r @@ -235,12 +236,12 @@ define i64 @or_i64_5(i64 %b) { ; LA32-LABEL: or_i64_5: ; LA32: # %bb.0: # %entry ; LA32-NEXT: ori $a0, $a0, 5 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: or_i64_5: ; LA64: # %bb.0: # %entry ; LA64-NEXT: ori $a0, $a0, 5 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret entry: %r = or i64 5, %b ret i64 %r @@ -251,13 +252,13 @@ define i64 @or_i64_0x1000(i64 %b) { ; LA32: # %bb.0: # %entry ; LA32-NEXT: lu12i.w $a2, 1 ; LA32-NEXT: or $a0, $a0, $a2 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: or_i64_0x1000: ; LA64: # %bb.0: # %entry ; LA64-NEXT: lu12i.w $a1, 1 ; LA64-NEXT: or $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret entry: %r = or i64 4096, %b ret i64 %r diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll index 1f1a5c9b920cbdc0e6f3494467f04830bab87ff9..9c94bfeeadc06d4983407c801b87c096f3418e10 100644 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll @@ -11,19 +11,19 @@ define i1 @sdiv_i1(i1 %a, i1 %b) { ; LA32-LABEL: sdiv_i1: ; LA32: # %bb.0: # %entry -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: sdiv_i1: ; LA64: # %bb.0: # %entry -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret ; ; LA32-TRAP-LABEL: sdiv_i1: ; LA32-TRAP: # %bb.0: # %entry -; LA32-TRAP-NEXT: jirl $zero, $ra, 0 +; LA32-TRAP-NEXT: ret ; ; LA64-TRAP-LABEL: sdiv_i1: ; LA64-TRAP: # %bb.0: # %entry -; LA64-TRAP-NEXT: jirl $zero, $ra, 0 +; LA64-TRAP-NEXT: ret entry: %r = sdiv i1 %a, %b ret i1 %r @@ -35,32 +35,36 @@ define i8 @sdiv_i8(i8 %a, i8 %b) { ; LA32-NEXT: ext.w.b $a1, $a1 ; LA32-NEXT: ext.w.b $a0, $a0 ; LA32-NEXT: div.w $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: sdiv_i8: ; LA64: # %bb.0: # %entry ; LA64-NEXT: ext.w.b $a1, $a1 ; LA64-NEXT: ext.w.b $a0, $a0 ; LA64-NEXT: div.d $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret ; ; LA32-TRAP-LABEL: sdiv_i8: ; LA32-TRAP: # %bb.0: # %entry ; LA32-TRAP-NEXT: ext.w.b $a1, $a1 ; LA32-TRAP-NEXT: ext.w.b $a0, $a0 ; LA32-TRAP-NEXT: div.w $a0, $a0, $a1 -; LA32-TRAP-NEXT: bnez $a1, 8 +; LA32-TRAP-NEXT: bnez $a1, .LBB1_2 +; LA32-TRAP-NEXT: # %bb.1: # %entry ; LA32-TRAP-NEXT: break 7 -; LA32-TRAP-NEXT: jirl $zero, $ra, 0 +; LA32-TRAP-NEXT: .LBB1_2: # %entry +; LA32-TRAP-NEXT: ret ; ; LA64-TRAP-LABEL: sdiv_i8: ; LA64-TRAP: # %bb.0: # %entry ; LA64-TRAP-NEXT: ext.w.b $a1, $a1 ; LA64-TRAP-NEXT: ext.w.b $a0, $a0 ; LA64-TRAP-NEXT: div.d $a0, $a0, $a1 -; LA64-TRAP-NEXT: bnez $a1, 8 +; LA64-TRAP-NEXT: bnez $a1, .LBB1_2 +; LA64-TRAP-NEXT: # %bb.1: # %entry ; LA64-TRAP-NEXT: break 7 -; LA64-TRAP-NEXT: jirl $zero, $ra, 0 +; LA64-TRAP-NEXT: .LBB1_2: # %entry +; LA64-TRAP-NEXT: ret entry: %r = sdiv i8 %a, %b ret i8 %r @@ -72,32 +76,36 @@ define i16 @sdiv_i16(i16 %a, i16 %b) { ; LA32-NEXT: ext.w.h $a1, $a1 ; LA32-NEXT: ext.w.h $a0, $a0 ; LA32-NEXT: div.w $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: sdiv_i16: ; LA64: # %bb.0: # %entry ; LA64-NEXT: ext.w.h $a1, $a1 ; LA64-NEXT: ext.w.h $a0, $a0 ; LA64-NEXT: div.d $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret ; ; LA32-TRAP-LABEL: sdiv_i16: ; LA32-TRAP: # %bb.0: # %entry ; LA32-TRAP-NEXT: ext.w.h $a1, $a1 ; LA32-TRAP-NEXT: ext.w.h $a0, $a0 ; LA32-TRAP-NEXT: div.w $a0, $a0, $a1 -; LA32-TRAP-NEXT: bnez $a1, 8 +; LA32-TRAP-NEXT: bnez $a1, .LBB2_2 +; LA32-TRAP-NEXT: # %bb.1: # %entry ; LA32-TRAP-NEXT: break 7 -; LA32-TRAP-NEXT: jirl $zero, $ra, 0 +; LA32-TRAP-NEXT: .LBB2_2: # %entry +; LA32-TRAP-NEXT: ret ; ; LA64-TRAP-LABEL: sdiv_i16: ; LA64-TRAP: # %bb.0: # %entry ; LA64-TRAP-NEXT: ext.w.h $a1, $a1 ; LA64-TRAP-NEXT: ext.w.h $a0, $a0 ; LA64-TRAP-NEXT: div.d $a0, $a0, $a1 -; LA64-TRAP-NEXT: bnez $a1, 8 +; LA64-TRAP-NEXT: bnez $a1, .LBB2_2 +; LA64-TRAP-NEXT: # %bb.1: # %entry ; LA64-TRAP-NEXT: break 7 -; LA64-TRAP-NEXT: jirl $zero, $ra, 0 +; LA64-TRAP-NEXT: .LBB2_2: # %entry +; LA64-TRAP-NEXT: ret entry: %r = sdiv i16 %a, %b ret i16 %r @@ -107,30 +115,34 @@ define i32 @sdiv_i32(i32 %a, i32 %b) { ; LA32-LABEL: sdiv_i32: ; LA32: # %bb.0: # %entry ; LA32-NEXT: div.w $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: sdiv_i32: ; LA64: # %bb.0: # %entry ; LA64-NEXT: addi.w $a1, $a1, 0 ; LA64-NEXT: addi.w $a0, $a0, 0 ; LA64-NEXT: div.d $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret ; ; LA32-TRAP-LABEL: sdiv_i32: ; LA32-TRAP: # %bb.0: # %entry ; LA32-TRAP-NEXT: div.w $a0, $a0, $a1 -; LA32-TRAP-NEXT: bnez $a1, 8 +; LA32-TRAP-NEXT: bnez $a1, .LBB3_2 +; LA32-TRAP-NEXT: # %bb.1: # %entry ; LA32-TRAP-NEXT: break 7 -; LA32-TRAP-NEXT: jirl $zero, $ra, 0 +; LA32-TRAP-NEXT: .LBB3_2: # %entry +; LA32-TRAP-NEXT: ret ; ; LA64-TRAP-LABEL: sdiv_i32: ; LA64-TRAP: # %bb.0: # %entry ; LA64-TRAP-NEXT: addi.w $a1, $a1, 0 ; LA64-TRAP-NEXT: addi.w $a0, $a0, 0 ; LA64-TRAP-NEXT: div.d $a0, $a0, $a1 -; LA64-TRAP-NEXT: bnez $a1, 8 +; LA64-TRAP-NEXT: bnez $a1, .LBB3_2 +; LA64-TRAP-NEXT: # %bb.1: # %entry ; LA64-TRAP-NEXT: break 7 -; LA64-TRAP-NEXT: jirl $zero, $ra, 0 +; LA64-TRAP-NEXT: .LBB3_2: # %entry +; LA64-TRAP-NEXT: ret entry: %r = sdiv i32 %a, %b ret i32 %r @@ -143,15 +155,15 @@ define i64 @sdiv_i64(i64 %a, i64 %b) { ; LA32-NEXT: .cfi_def_cfa_offset 16 ; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill ; LA32-NEXT: .cfi_offset 1, -4 -; LA32-NEXT: bl __divdi3 +; LA32-NEXT: bl %plt(__divdi3) ; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload ; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: sdiv_i64: ; LA64: # %bb.0: # %entry ; LA64-NEXT: div.d $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret ; ; LA32-TRAP-LABEL: sdiv_i64: ; LA32-TRAP: # %bb.0: # %entry @@ -159,17 +171,19 @@ define i64 @sdiv_i64(i64 %a, i64 %b) { ; LA32-TRAP-NEXT: .cfi_def_cfa_offset 16 ; LA32-TRAP-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill ; LA32-TRAP-NEXT: .cfi_offset 1, -4 -; LA32-TRAP-NEXT: bl __divdi3 +; LA32-TRAP-NEXT: bl %plt(__divdi3) ; LA32-TRAP-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload ; LA32-TRAP-NEXT: addi.w $sp, $sp, 16 -; LA32-TRAP-NEXT: jirl $zero, $ra, 0 +; LA32-TRAP-NEXT: ret ; ; LA64-TRAP-LABEL: sdiv_i64: ; LA64-TRAP: # %bb.0: # %entry ; LA64-TRAP-NEXT: div.d $a0, $a0, $a1 -; LA64-TRAP-NEXT: bnez $a1, 8 +; LA64-TRAP-NEXT: bnez $a1, .LBB4_2 +; LA64-TRAP-NEXT: # %bb.1: # %entry ; LA64-TRAP-NEXT: break 7 -; LA64-TRAP-NEXT: jirl $zero, $ra, 0 +; LA64-TRAP-NEXT: .LBB4_2: # %entry +; LA64-TRAP-NEXT: ret entry: %r = sdiv i64 %a, %b ret i64 %r @@ -178,19 +192,19 @@ entry: define i1 @udiv_i1(i1 %a, i1 %b) { ; LA32-LABEL: udiv_i1: ; LA32: # %bb.0: # %entry -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: udiv_i1: ; LA64: # %bb.0: # %entry -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret ; ; LA32-TRAP-LABEL: udiv_i1: ; LA32-TRAP: # %bb.0: # %entry -; LA32-TRAP-NEXT: jirl $zero, $ra, 0 +; LA32-TRAP-NEXT: ret ; ; LA64-TRAP-LABEL: udiv_i1: ; LA64-TRAP: # %bb.0: # %entry -; LA64-TRAP-NEXT: jirl $zero, $ra, 0 +; LA64-TRAP-NEXT: ret entry: %r = udiv i1 %a, %b ret i1 %r @@ -202,32 +216,36 @@ define i8 @udiv_i8(i8 %a, i8 %b) { ; LA32-NEXT: andi $a1, $a1, 255 ; LA32-NEXT: andi $a0, $a0, 255 ; LA32-NEXT: div.wu $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: udiv_i8: ; LA64: # %bb.0: # %entry ; LA64-NEXT: andi $a1, $a1, 255 ; LA64-NEXT: andi $a0, $a0, 255 ; LA64-NEXT: div.du $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret ; ; LA32-TRAP-LABEL: udiv_i8: ; LA32-TRAP: # %bb.0: # %entry ; LA32-TRAP-NEXT: andi $a1, $a1, 255 ; LA32-TRAP-NEXT: andi $a0, $a0, 255 ; LA32-TRAP-NEXT: div.wu $a0, $a0, $a1 -; LA32-TRAP-NEXT: bnez $a1, 8 +; LA32-TRAP-NEXT: bnez $a1, .LBB6_2 +; LA32-TRAP-NEXT: # %bb.1: # %entry ; LA32-TRAP-NEXT: break 7 -; LA32-TRAP-NEXT: jirl $zero, $ra, 0 +; LA32-TRAP-NEXT: .LBB6_2: # %entry +; LA32-TRAP-NEXT: ret ; ; LA64-TRAP-LABEL: udiv_i8: ; LA64-TRAP: # %bb.0: # %entry ; LA64-TRAP-NEXT: andi $a1, $a1, 255 ; LA64-TRAP-NEXT: andi $a0, $a0, 255 ; LA64-TRAP-NEXT: div.du $a0, $a0, $a1 -; LA64-TRAP-NEXT: bnez $a1, 8 +; LA64-TRAP-NEXT: bnez $a1, .LBB6_2 +; LA64-TRAP-NEXT: # %bb.1: # %entry ; LA64-TRAP-NEXT: break 7 -; LA64-TRAP-NEXT: jirl $zero, $ra, 0 +; LA64-TRAP-NEXT: .LBB6_2: # %entry +; LA64-TRAP-NEXT: ret entry: %r = udiv i8 %a, %b ret i8 %r @@ -239,32 +257,36 @@ define i16 @udiv_i16(i16 %a, i16 %b) { ; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 ; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 ; LA32-NEXT: div.wu $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: udiv_i16: ; LA64: # %bb.0: # %entry ; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 ; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 ; LA64-NEXT: div.du $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret ; ; LA32-TRAP-LABEL: udiv_i16: ; LA32-TRAP: # %bb.0: # %entry ; LA32-TRAP-NEXT: bstrpick.w $a1, $a1, 15, 0 ; LA32-TRAP-NEXT: bstrpick.w $a0, $a0, 15, 0 ; LA32-TRAP-NEXT: div.wu $a0, $a0, $a1 -; LA32-TRAP-NEXT: bnez $a1, 8 +; LA32-TRAP-NEXT: bnez $a1, .LBB7_2 +; LA32-TRAP-NEXT: # %bb.1: # %entry ; LA32-TRAP-NEXT: break 7 -; LA32-TRAP-NEXT: jirl $zero, $ra, 0 +; LA32-TRAP-NEXT: .LBB7_2: # %entry +; LA32-TRAP-NEXT: ret ; ; LA64-TRAP-LABEL: udiv_i16: ; LA64-TRAP: # %bb.0: # %entry ; LA64-TRAP-NEXT: bstrpick.d $a1, $a1, 15, 0 ; LA64-TRAP-NEXT: bstrpick.d $a0, $a0, 15, 0 ; LA64-TRAP-NEXT: div.du $a0, $a0, $a1 -; LA64-TRAP-NEXT: bnez $a1, 8 +; LA64-TRAP-NEXT: bnez $a1, .LBB7_2 +; LA64-TRAP-NEXT: # %bb.1: # %entry ; LA64-TRAP-NEXT: break 7 -; LA64-TRAP-NEXT: jirl $zero, $ra, 0 +; LA64-TRAP-NEXT: .LBB7_2: # %entry +; LA64-TRAP-NEXT: ret entry: %r = udiv i16 %a, %b ret i16 %r @@ -274,30 +296,34 @@ define i32 @udiv_i32(i32 %a, i32 %b) { ; LA32-LABEL: udiv_i32: ; LA32: # %bb.0: # %entry ; LA32-NEXT: div.wu $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: udiv_i32: ; LA64: # %bb.0: # %entry ; LA64-NEXT: bstrpick.d $a1, $a1, 31, 0 ; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0 ; LA64-NEXT: div.du $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret ; ; LA32-TRAP-LABEL: udiv_i32: ; LA32-TRAP: # %bb.0: # %entry ; LA32-TRAP-NEXT: div.wu $a0, $a0, $a1 -; LA32-TRAP-NEXT: bnez $a1, 8 +; LA32-TRAP-NEXT: bnez $a1, .LBB8_2 +; LA32-TRAP-NEXT: # %bb.1: # %entry ; LA32-TRAP-NEXT: break 7 -; LA32-TRAP-NEXT: jirl $zero, $ra, 0 +; LA32-TRAP-NEXT: .LBB8_2: # %entry +; LA32-TRAP-NEXT: ret ; ; LA64-TRAP-LABEL: udiv_i32: ; LA64-TRAP: # %bb.0: # %entry ; LA64-TRAP-NEXT: bstrpick.d $a1, $a1, 31, 0 ; LA64-TRAP-NEXT: bstrpick.d $a0, $a0, 31, 0 ; LA64-TRAP-NEXT: div.du $a0, $a0, $a1 -; LA64-TRAP-NEXT: bnez $a1, 8 +; LA64-TRAP-NEXT: bnez $a1, .LBB8_2 +; LA64-TRAP-NEXT: # %bb.1: # %entry ; LA64-TRAP-NEXT: break 7 -; LA64-TRAP-NEXT: jirl $zero, $ra, 0 +; LA64-TRAP-NEXT: .LBB8_2: # %entry +; LA64-TRAP-NEXT: ret entry: %r = udiv i32 %a, %b ret i32 %r @@ -310,15 +336,15 @@ define i64 @udiv_i64(i64 %a, i64 %b) { ; LA32-NEXT: .cfi_def_cfa_offset 16 ; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill ; LA32-NEXT: .cfi_offset 1, -4 -; LA32-NEXT: bl __udivdi3 +; LA32-NEXT: bl %plt(__udivdi3) ; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload ; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: udiv_i64: ; LA64: # %bb.0: # %entry ; LA64-NEXT: div.du $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret ; ; LA32-TRAP-LABEL: udiv_i64: ; LA32-TRAP: # %bb.0: # %entry @@ -326,17 +352,19 @@ define i64 @udiv_i64(i64 %a, i64 %b) { ; LA32-TRAP-NEXT: .cfi_def_cfa_offset 16 ; LA32-TRAP-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill ; LA32-TRAP-NEXT: .cfi_offset 1, -4 -; LA32-TRAP-NEXT: bl __udivdi3 +; LA32-TRAP-NEXT: bl %plt(__udivdi3) ; LA32-TRAP-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload ; LA32-TRAP-NEXT: addi.w $sp, $sp, 16 -; LA32-TRAP-NEXT: jirl $zero, $ra, 0 +; LA32-TRAP-NEXT: ret ; ; LA64-TRAP-LABEL: udiv_i64: ; LA64-TRAP: # %bb.0: # %entry ; LA64-TRAP-NEXT: div.du $a0, $a0, $a1 -; LA64-TRAP-NEXT: bnez $a1, 8 +; LA64-TRAP-NEXT: bnez $a1, .LBB9_2 +; LA64-TRAP-NEXT: # %bb.1: # %entry ; LA64-TRAP-NEXT: break 7 -; LA64-TRAP-NEXT: jirl $zero, $ra, 0 +; LA64-TRAP-NEXT: .LBB9_2: # %entry +; LA64-TRAP-NEXT: ret entry: %r = udiv i64 %a, %b ret i64 %r @@ -346,22 +374,22 @@ define i1 @srem_i1(i1 %a, i1 %b) { ; LA32-LABEL: srem_i1: ; LA32: # %bb.0: # %entry ; LA32-NEXT: move $a0, $zero -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: srem_i1: ; LA64: # %bb.0: # %entry ; LA64-NEXT: move $a0, $zero -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret ; ; LA32-TRAP-LABEL: srem_i1: ; LA32-TRAP: # %bb.0: # %entry ; LA32-TRAP-NEXT: move $a0, $zero -; LA32-TRAP-NEXT: jirl $zero, $ra, 0 +; LA32-TRAP-NEXT: ret ; ; LA64-TRAP-LABEL: srem_i1: ; LA64-TRAP: # %bb.0: # %entry ; LA64-TRAP-NEXT: move $a0, $zero -; LA64-TRAP-NEXT: jirl $zero, $ra, 0 +; LA64-TRAP-NEXT: ret entry: %r = srem i1 %a, %b ret i1 %r @@ -373,32 +401,36 @@ define i8 @srem_i8(i8 %a, i8 %b) { ; LA32-NEXT: ext.w.b $a1, $a1 ; LA32-NEXT: ext.w.b $a0, $a0 ; LA32-NEXT: mod.w $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: srem_i8: ; LA64: # %bb.0: # %entry ; LA64-NEXT: ext.w.b $a1, $a1 ; LA64-NEXT: ext.w.b $a0, $a0 ; LA64-NEXT: mod.d $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret ; ; LA32-TRAP-LABEL: srem_i8: ; LA32-TRAP: # %bb.0: # %entry ; LA32-TRAP-NEXT: ext.w.b $a1, $a1 ; LA32-TRAP-NEXT: ext.w.b $a0, $a0 ; LA32-TRAP-NEXT: mod.w $a0, $a0, $a1 -; LA32-TRAP-NEXT: bnez $a1, 8 +; LA32-TRAP-NEXT: bnez $a1, .LBB11_2 +; LA32-TRAP-NEXT: # %bb.1: # %entry ; LA32-TRAP-NEXT: break 7 -; LA32-TRAP-NEXT: jirl $zero, $ra, 0 +; LA32-TRAP-NEXT: .LBB11_2: # %entry +; LA32-TRAP-NEXT: ret ; ; LA64-TRAP-LABEL: srem_i8: ; LA64-TRAP: # %bb.0: # %entry ; LA64-TRAP-NEXT: ext.w.b $a1, $a1 ; LA64-TRAP-NEXT: ext.w.b $a0, $a0 ; LA64-TRAP-NEXT: mod.d $a0, $a0, $a1 -; LA64-TRAP-NEXT: bnez $a1, 8 +; LA64-TRAP-NEXT: bnez $a1, .LBB11_2 +; LA64-TRAP-NEXT: # %bb.1: # %entry ; LA64-TRAP-NEXT: break 7 -; LA64-TRAP-NEXT: jirl $zero, $ra, 0 +; LA64-TRAP-NEXT: .LBB11_2: # %entry +; LA64-TRAP-NEXT: ret entry: %r = srem i8 %a, %b ret i8 %r @@ -410,32 +442,36 @@ define i16 @srem_i16(i16 %a, i16 %b) { ; LA32-NEXT: ext.w.h $a1, $a1 ; LA32-NEXT: ext.w.h $a0, $a0 ; LA32-NEXT: mod.w $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: srem_i16: ; LA64: # %bb.0: # %entry ; LA64-NEXT: ext.w.h $a1, $a1 ; LA64-NEXT: ext.w.h $a0, $a0 ; LA64-NEXT: mod.d $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret ; ; LA32-TRAP-LABEL: srem_i16: ; LA32-TRAP: # %bb.0: # %entry ; LA32-TRAP-NEXT: ext.w.h $a1, $a1 ; LA32-TRAP-NEXT: ext.w.h $a0, $a0 ; LA32-TRAP-NEXT: mod.w $a0, $a0, $a1 -; LA32-TRAP-NEXT: bnez $a1, 8 +; LA32-TRAP-NEXT: bnez $a1, .LBB12_2 +; LA32-TRAP-NEXT: # %bb.1: # %entry ; LA32-TRAP-NEXT: break 7 -; LA32-TRAP-NEXT: jirl $zero, $ra, 0 +; LA32-TRAP-NEXT: .LBB12_2: # %entry +; LA32-TRAP-NEXT: ret ; ; LA64-TRAP-LABEL: srem_i16: ; LA64-TRAP: # %bb.0: # %entry ; LA64-TRAP-NEXT: ext.w.h $a1, $a1 ; LA64-TRAP-NEXT: ext.w.h $a0, $a0 ; LA64-TRAP-NEXT: mod.d $a0, $a0, $a1 -; LA64-TRAP-NEXT: bnez $a1, 8 +; LA64-TRAP-NEXT: bnez $a1, .LBB12_2 +; LA64-TRAP-NEXT: # %bb.1: # %entry ; LA64-TRAP-NEXT: break 7 -; LA64-TRAP-NEXT: jirl $zero, $ra, 0 +; LA64-TRAP-NEXT: .LBB12_2: # %entry +; LA64-TRAP-NEXT: ret entry: %r = srem i16 %a, %b ret i16 %r @@ -445,30 +481,34 @@ define i32 @srem_i32(i32 %a, i32 %b) { ; LA32-LABEL: srem_i32: ; LA32: # %bb.0: # %entry ; LA32-NEXT: mod.w $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: srem_i32: ; LA64: # %bb.0: # %entry ; LA64-NEXT: addi.w $a1, $a1, 0 ; LA64-NEXT: addi.w $a0, $a0, 0 ; LA64-NEXT: mod.d $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret ; ; LA32-TRAP-LABEL: srem_i32: ; LA32-TRAP: # %bb.0: # %entry ; LA32-TRAP-NEXT: mod.w $a0, $a0, $a1 -; LA32-TRAP-NEXT: bnez $a1, 8 +; LA32-TRAP-NEXT: bnez $a1, .LBB13_2 +; LA32-TRAP-NEXT: # %bb.1: # %entry ; LA32-TRAP-NEXT: break 7 -; LA32-TRAP-NEXT: jirl $zero, $ra, 0 +; LA32-TRAP-NEXT: .LBB13_2: # %entry +; LA32-TRAP-NEXT: ret ; ; LA64-TRAP-LABEL: srem_i32: ; LA64-TRAP: # %bb.0: # %entry ; LA64-TRAP-NEXT: addi.w $a1, $a1, 0 ; LA64-TRAP-NEXT: addi.w $a0, $a0, 0 ; LA64-TRAP-NEXT: mod.d $a0, $a0, $a1 -; LA64-TRAP-NEXT: bnez $a1, 8 +; LA64-TRAP-NEXT: bnez $a1, .LBB13_2 +; LA64-TRAP-NEXT: # %bb.1: # %entry ; LA64-TRAP-NEXT: break 7 -; LA64-TRAP-NEXT: jirl $zero, $ra, 0 +; LA64-TRAP-NEXT: .LBB13_2: # %entry +; LA64-TRAP-NEXT: ret entry: %r = srem i32 %a, %b ret i32 %r @@ -481,15 +521,15 @@ define i64 @srem_i64(i64 %a, i64 %b) { ; LA32-NEXT: .cfi_def_cfa_offset 16 ; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill ; LA32-NEXT: .cfi_offset 1, -4 -; LA32-NEXT: bl __moddi3 +; LA32-NEXT: bl %plt(__moddi3) ; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload ; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: srem_i64: ; LA64: # %bb.0: # %entry ; LA64-NEXT: mod.d $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret ; ; LA32-TRAP-LABEL: srem_i64: ; LA32-TRAP: # %bb.0: # %entry @@ -497,17 +537,19 @@ define i64 @srem_i64(i64 %a, i64 %b) { ; LA32-TRAP-NEXT: .cfi_def_cfa_offset 16 ; LA32-TRAP-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill ; LA32-TRAP-NEXT: .cfi_offset 1, -4 -; LA32-TRAP-NEXT: bl __moddi3 +; LA32-TRAP-NEXT: bl %plt(__moddi3) ; LA32-TRAP-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload ; LA32-TRAP-NEXT: addi.w $sp, $sp, 16 -; LA32-TRAP-NEXT: jirl $zero, $ra, 0 +; LA32-TRAP-NEXT: ret ; ; LA64-TRAP-LABEL: srem_i64: ; LA64-TRAP: # %bb.0: # %entry ; LA64-TRAP-NEXT: mod.d $a0, $a0, $a1 -; LA64-TRAP-NEXT: bnez $a1, 8 +; LA64-TRAP-NEXT: bnez $a1, .LBB14_2 +; LA64-TRAP-NEXT: # %bb.1: # %entry ; LA64-TRAP-NEXT: break 7 -; LA64-TRAP-NEXT: jirl $zero, $ra, 0 +; LA64-TRAP-NEXT: .LBB14_2: # %entry +; LA64-TRAP-NEXT: ret entry: %r = srem i64 %a, %b ret i64 %r @@ -517,22 +559,22 @@ define i1 @urem_i1(i1 %a, i1 %b) { ; LA32-LABEL: urem_i1: ; LA32: # %bb.0: # %entry ; LA32-NEXT: move $a0, $zero -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: urem_i1: ; LA64: # %bb.0: # %entry ; LA64-NEXT: move $a0, $zero -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret ; ; LA32-TRAP-LABEL: urem_i1: ; LA32-TRAP: # %bb.0: # %entry ; LA32-TRAP-NEXT: move $a0, $zero -; LA32-TRAP-NEXT: jirl $zero, $ra, 0 +; LA32-TRAP-NEXT: ret ; ; LA64-TRAP-LABEL: urem_i1: ; LA64-TRAP: # %bb.0: # %entry ; LA64-TRAP-NEXT: move $a0, $zero -; LA64-TRAP-NEXT: jirl $zero, $ra, 0 +; LA64-TRAP-NEXT: ret entry: %r = urem i1 %a, %b ret i1 %r @@ -544,32 +586,36 @@ define i8 @urem_i8(i8 %a, i8 %b) { ; LA32-NEXT: andi $a1, $a1, 255 ; LA32-NEXT: andi $a0, $a0, 255 ; LA32-NEXT: mod.wu $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: urem_i8: ; LA64: # %bb.0: # %entry ; LA64-NEXT: andi $a1, $a1, 255 ; LA64-NEXT: andi $a0, $a0, 255 ; LA64-NEXT: mod.du $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret ; ; LA32-TRAP-LABEL: urem_i8: ; LA32-TRAP: # %bb.0: # %entry ; LA32-TRAP-NEXT: andi $a1, $a1, 255 ; LA32-TRAP-NEXT: andi $a0, $a0, 255 ; LA32-TRAP-NEXT: mod.wu $a0, $a0, $a1 -; LA32-TRAP-NEXT: bnez $a1, 8 +; LA32-TRAP-NEXT: bnez $a1, .LBB16_2 +; LA32-TRAP-NEXT: # %bb.1: # %entry ; LA32-TRAP-NEXT: break 7 -; LA32-TRAP-NEXT: jirl $zero, $ra, 0 +; LA32-TRAP-NEXT: .LBB16_2: # %entry +; LA32-TRAP-NEXT: ret ; ; LA64-TRAP-LABEL: urem_i8: ; LA64-TRAP: # %bb.0: # %entry ; LA64-TRAP-NEXT: andi $a1, $a1, 255 ; LA64-TRAP-NEXT: andi $a0, $a0, 255 ; LA64-TRAP-NEXT: mod.du $a0, $a0, $a1 -; LA64-TRAP-NEXT: bnez $a1, 8 +; LA64-TRAP-NEXT: bnez $a1, .LBB16_2 +; LA64-TRAP-NEXT: # %bb.1: # %entry ; LA64-TRAP-NEXT: break 7 -; LA64-TRAP-NEXT: jirl $zero, $ra, 0 +; LA64-TRAP-NEXT: .LBB16_2: # %entry +; LA64-TRAP-NEXT: ret entry: %r = urem i8 %a, %b ret i8 %r @@ -581,32 +627,36 @@ define i16 @urem_i16(i16 %a, i16 %b) { ; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 ; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 ; LA32-NEXT: mod.wu $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: urem_i16: ; LA64: # %bb.0: # %entry ; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 ; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 ; LA64-NEXT: mod.du $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret ; ; LA32-TRAP-LABEL: urem_i16: ; LA32-TRAP: # %bb.0: # %entry ; LA32-TRAP-NEXT: bstrpick.w $a1, $a1, 15, 0 ; LA32-TRAP-NEXT: bstrpick.w $a0, $a0, 15, 0 ; LA32-TRAP-NEXT: mod.wu $a0, $a0, $a1 -; LA32-TRAP-NEXT: bnez $a1, 8 +; LA32-TRAP-NEXT: bnez $a1, .LBB17_2 +; LA32-TRAP-NEXT: # %bb.1: # %entry ; LA32-TRAP-NEXT: break 7 -; LA32-TRAP-NEXT: jirl $zero, $ra, 0 +; LA32-TRAP-NEXT: .LBB17_2: # %entry +; LA32-TRAP-NEXT: ret ; ; LA64-TRAP-LABEL: urem_i16: ; LA64-TRAP: # %bb.0: # %entry ; LA64-TRAP-NEXT: bstrpick.d $a1, $a1, 15, 0 ; LA64-TRAP-NEXT: bstrpick.d $a0, $a0, 15, 0 ; LA64-TRAP-NEXT: mod.du $a0, $a0, $a1 -; LA64-TRAP-NEXT: bnez $a1, 8 +; LA64-TRAP-NEXT: bnez $a1, .LBB17_2 +; LA64-TRAP-NEXT: # %bb.1: # %entry ; LA64-TRAP-NEXT: break 7 -; LA64-TRAP-NEXT: jirl $zero, $ra, 0 +; LA64-TRAP-NEXT: .LBB17_2: # %entry +; LA64-TRAP-NEXT: ret entry: %r = urem i16 %a, %b ret i16 %r @@ -616,30 +666,34 @@ define i32 @urem_i32(i32 %a, i32 %b) { ; LA32-LABEL: urem_i32: ; LA32: # %bb.0: # %entry ; LA32-NEXT: mod.wu $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: urem_i32: ; LA64: # %bb.0: # %entry ; LA64-NEXT: bstrpick.d $a1, $a1, 31, 0 ; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0 ; LA64-NEXT: mod.du $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret ; ; LA32-TRAP-LABEL: urem_i32: ; LA32-TRAP: # %bb.0: # %entry ; LA32-TRAP-NEXT: mod.wu $a0, $a0, $a1 -; LA32-TRAP-NEXT: bnez $a1, 8 +; LA32-TRAP-NEXT: bnez $a1, .LBB18_2 +; LA32-TRAP-NEXT: # %bb.1: # %entry ; LA32-TRAP-NEXT: break 7 -; LA32-TRAP-NEXT: jirl $zero, $ra, 0 +; LA32-TRAP-NEXT: .LBB18_2: # %entry +; LA32-TRAP-NEXT: ret ; ; LA64-TRAP-LABEL: urem_i32: ; LA64-TRAP: # %bb.0: # %entry ; LA64-TRAP-NEXT: bstrpick.d $a1, $a1, 31, 0 ; LA64-TRAP-NEXT: bstrpick.d $a0, $a0, 31, 0 ; LA64-TRAP-NEXT: mod.du $a0, $a0, $a1 -; LA64-TRAP-NEXT: bnez $a1, 8 +; LA64-TRAP-NEXT: bnez $a1, .LBB18_2 +; LA64-TRAP-NEXT: # %bb.1: # %entry ; LA64-TRAP-NEXT: break 7 -; LA64-TRAP-NEXT: jirl $zero, $ra, 0 +; LA64-TRAP-NEXT: .LBB18_2: # %entry +; LA64-TRAP-NEXT: ret entry: %r = urem i32 %a, %b ret i32 %r @@ -652,15 +706,15 @@ define i64 @urem_i64(i64 %a, i64 %b) { ; LA32-NEXT: .cfi_def_cfa_offset 16 ; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill ; LA32-NEXT: .cfi_offset 1, -4 -; LA32-NEXT: bl __umoddi3 +; LA32-NEXT: bl %plt(__umoddi3) ; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload ; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: urem_i64: ; LA64: # %bb.0: # %entry ; LA64-NEXT: mod.du $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret ; ; LA32-TRAP-LABEL: urem_i64: ; LA32-TRAP: # %bb.0: # %entry @@ -668,17 +722,19 @@ define i64 @urem_i64(i64 %a, i64 %b) { ; LA32-TRAP-NEXT: .cfi_def_cfa_offset 16 ; LA32-TRAP-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill ; LA32-TRAP-NEXT: .cfi_offset 1, -4 -; LA32-TRAP-NEXT: bl __umoddi3 +; LA32-TRAP-NEXT: bl %plt(__umoddi3) ; LA32-TRAP-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload ; LA32-TRAP-NEXT: addi.w $sp, $sp, 16 -; LA32-TRAP-NEXT: jirl $zero, $ra, 0 +; LA32-TRAP-NEXT: ret ; ; LA64-TRAP-LABEL: urem_i64: ; LA64-TRAP: # %bb.0: # %entry ; LA64-TRAP-NEXT: mod.du $a0, $a0, $a1 -; LA64-TRAP-NEXT: bnez $a1, 8 +; LA64-TRAP-NEXT: bnez $a1, .LBB19_2 +; LA64-TRAP-NEXT: # %bb.1: # %entry ; LA64-TRAP-NEXT: break 7 -; LA64-TRAP-NEXT: jirl $zero, $ra, 0 +; LA64-TRAP-NEXT: .LBB19_2: # %entry +; LA64-TRAP-NEXT: ret entry: %r = urem i64 %a, %b ret i64 %r diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/select-bare-dbl.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/select-bare-dbl.ll index 4c6026aba5acf1c42a5341b3bad794e787638e2d..c26519de35cd9ec7ff3eb1ad30ad41f289bee9d7 100644 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/select-bare-dbl.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/select-bare-dbl.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc --mtriple=loongarch32 --mattr=+d < %s | FileCheck %s --check-prefix=LA32 ; RUN: llc --mtriple=loongarch64 --mattr=+d < %s | FileCheck %s --check-prefix=LA64 @@ -10,14 +11,14 @@ define double @test(i1 %a, double %b, double %c) { ; LA32-NEXT: andi $a0, $a0, 1 ; LA32-NEXT: movgr2cf $fcc0, $a0 ; LA32-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: test: ; LA64: # %bb.0: ; LA64-NEXT: andi $a0, $a0, 1 ; LA64-NEXT: movgr2cf $fcc0, $a0 ; LA64-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %res = select i1 %a, double %b, double %c ret double %res } diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/select-bare-flt.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/select-bare-flt.ll index af4789b522586bf03dddf91f98e3e20ec5dcfa55..a625fd4789066723d3d74f5ff4173053f13d59ca 100644 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/select-bare-flt.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/select-bare-flt.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc --mtriple=loongarch32 --mattr=+f,-d < %s | FileCheck %s --check-prefix=LA32 ; RUN: llc --mtriple=loongarch64 --mattr=+f,-d < %s | FileCheck %s --check-prefix=LA64 @@ -10,14 +11,14 @@ define float @test(i1 %a, float %b, float %c) { ; LA32-NEXT: andi $a0, $a0, 1 ; LA32-NEXT: movgr2cf $fcc0, $a0 ; LA32-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: test: ; LA64: # %bb.0: ; LA64-NEXT: andi $a0, $a0, 1 ; LA64-NEXT: movgr2cf $fcc0, $a0 ; LA64-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %res = select i1 %a, float %b, float %c ret float %res } diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/select-bare-int.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/select-bare-int.ll index 3481e79b248bd9eeeae112b31c0ac5afde9959d8..ddbc4ad719446c4aac19adea65f652c5b26f36bd 100644 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/select-bare-int.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/select-bare-int.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefix=LA32 ; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64 @@ -10,7 +11,7 @@ define i1 @bare_select_i1(i1 %a, i1 %b, i1 %c) { ; LA32-NEXT: masknez $a2, $a2, $a0 ; LA32-NEXT: maskeqz $a0, $a1, $a0 ; LA32-NEXT: or $a0, $a0, $a2 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: bare_select_i1: ; LA64: # %bb.0: @@ -18,7 +19,7 @@ define i1 @bare_select_i1(i1 %a, i1 %b, i1 %c) { ; LA64-NEXT: masknez $a2, $a2, $a0 ; LA64-NEXT: maskeqz $a0, $a1, $a0 ; LA64-NEXT: or $a0, $a0, $a2 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %res = select i1 %a, i1 %b, i1 %c ret i1 %res } @@ -30,7 +31,7 @@ define i8 @bare_select_i8(i1 %a, i8 %b, i8 %c) { ; LA32-NEXT: masknez $a2, $a2, $a0 ; LA32-NEXT: maskeqz $a0, $a1, $a0 ; LA32-NEXT: or $a0, $a0, $a2 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: bare_select_i8: ; LA64: # %bb.0: @@ -38,7 +39,7 @@ define i8 @bare_select_i8(i1 %a, i8 %b, i8 %c) { ; LA64-NEXT: masknez $a2, $a2, $a0 ; LA64-NEXT: maskeqz $a0, $a1, $a0 ; LA64-NEXT: or $a0, $a0, $a2 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %res = select i1 %a, i8 %b, i8 %c ret i8 %res } @@ -50,7 +51,7 @@ define i16 @bare_select_i16(i1 %a, i16 %b, i16 %c) { ; LA32-NEXT: masknez $a2, $a2, $a0 ; LA32-NEXT: maskeqz $a0, $a1, $a0 ; LA32-NEXT: or $a0, $a0, $a2 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: bare_select_i16: ; LA64: # %bb.0: @@ -58,7 +59,7 @@ define i16 @bare_select_i16(i1 %a, i16 %b, i16 %c) { ; LA64-NEXT: masknez $a2, $a2, $a0 ; LA64-NEXT: maskeqz $a0, $a1, $a0 ; LA64-NEXT: or $a0, $a0, $a2 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %res = select i1 %a, i16 %b, i16 %c ret i16 %res } @@ -70,7 +71,7 @@ define i32 @bare_select_i32(i1 %a, i32 %b, i32 %c) { ; LA32-NEXT: masknez $a2, $a2, $a0 ; LA32-NEXT: maskeqz $a0, $a1, $a0 ; LA32-NEXT: or $a0, $a0, $a2 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: bare_select_i32: ; LA64: # %bb.0: @@ -78,7 +79,7 @@ define i32 @bare_select_i32(i1 %a, i32 %b, i32 %c) { ; LA64-NEXT: masknez $a2, $a2, $a0 ; LA64-NEXT: maskeqz $a0, $a1, $a0 ; LA64-NEXT: or $a0, $a0, $a2 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %res = select i1 %a, i32 %b, i32 %c ret i32 %res } @@ -93,7 +94,7 @@ define i64 @bare_select_i64(i1 %a, i64 %b, i64 %c) { ; LA32-NEXT: masknez $a1, $a4, $a5 ; LA32-NEXT: maskeqz $a2, $a2, $a5 ; LA32-NEXT: or $a1, $a2, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: bare_select_i64: ; LA64: # %bb.0: @@ -101,7 +102,7 @@ define i64 @bare_select_i64(i1 %a, i64 %b, i64 %c) { ; LA64-NEXT: masknez $a2, $a2, $a0 ; LA64-NEXT: maskeqz $a0, $a1, $a0 ; LA64-NEXT: or $a0, $a0, $a2 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %res = select i1 %a, i64 %b, i64 %c ret i64 %res } diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/select-fpcc-dbl.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/select-fpcc-dbl.ll index 4397b64d927beb0a4dbd6098eee3929a787a5891..8d26996b86fe030bea317dfd8552bd71dcd09661 100644 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/select-fpcc-dbl.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/select-fpcc-dbl.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc --mtriple=loongarch32 --mattr=+d < %s | FileCheck %s --check-prefix=LA32 ; RUN: llc --mtriple=loongarch64 --mattr=+d < %s | FileCheck %s --check-prefix=LA64 @@ -7,12 +8,12 @@ define double @fcmp_false(double %a, double %b, double %x, double %y) { ; LA32-LABEL: fcmp_false: ; LA32: # %bb.0: ; LA32-NEXT: fmov.d $fa0, $fa3 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: fcmp_false: ; LA64: # %bb.0: ; LA64-NEXT: fmov.d $fa0, $fa3 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cmp = fcmp false double %a, %b %res = select i1 %cmp, double %x, double %y ret double %res @@ -23,13 +24,13 @@ define double @fcmp_oeq(double %a, double %b, double %x, double %y) { ; LA32: # %bb.0: ; LA32-NEXT: fcmp.ceq.d $fcc0, $fa0, $fa1 ; LA32-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: fcmp_oeq: ; LA64: # %bb.0: ; LA64-NEXT: fcmp.ceq.d $fcc0, $fa0, $fa1 ; LA64-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cmp = fcmp oeq double %a, %b %res = select i1 %cmp, double %x, double %y ret double %res @@ -40,13 +41,13 @@ define double @fcmp_ogt(double %a, double %b, double %x, double %y) { ; LA32: # %bb.0: ; LA32-NEXT: fcmp.clt.d $fcc0, $fa1, $fa0 ; LA32-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: fcmp_ogt: ; LA64: # %bb.0: ; LA64-NEXT: fcmp.clt.d $fcc0, $fa1, $fa0 ; LA64-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cmp = fcmp ogt double %a, %b %res = select i1 %cmp, double %x, double %y ret double %res @@ -57,13 +58,13 @@ define double @fcmp_oge(double %a, double %b, double %x, double %y) { ; LA32: # %bb.0: ; LA32-NEXT: fcmp.cle.d $fcc0, $fa1, $fa0 ; LA32-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: fcmp_oge: ; LA64: # %bb.0: ; LA64-NEXT: fcmp.cle.d $fcc0, $fa1, $fa0 ; LA64-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cmp = fcmp oge double %a, %b %res = select i1 %cmp, double %x, double %y ret double %res @@ -74,13 +75,13 @@ define double @fcmp_olt(double %a, double %b, double %x, double %y) { ; LA32: # %bb.0: ; LA32-NEXT: fcmp.clt.d $fcc0, $fa0, $fa1 ; LA32-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: fcmp_olt: ; LA64: # %bb.0: ; LA64-NEXT: fcmp.clt.d $fcc0, $fa0, $fa1 ; LA64-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cmp = fcmp olt double %a, %b %res = select i1 %cmp, double %x, double %y ret double %res @@ -91,13 +92,13 @@ define double @fcmp_ole(double %a, double %b, double %x, double %y) { ; LA32: # %bb.0: ; LA32-NEXT: fcmp.cle.d $fcc0, $fa0, $fa1 ; LA32-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: fcmp_ole: ; LA64: # %bb.0: ; LA64-NEXT: fcmp.cle.d $fcc0, $fa0, $fa1 ; LA64-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cmp = fcmp ole double %a, %b %res = select i1 %cmp, double %x, double %y ret double %res @@ -108,13 +109,13 @@ define double @fcmp_one(double %a, double %b, double %x, double %y) { ; LA32: # %bb.0: ; LA32-NEXT: fcmp.cne.d $fcc0, $fa0, $fa1 ; LA32-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: fcmp_one: ; LA64: # %bb.0: ; LA64-NEXT: fcmp.cne.d $fcc0, $fa0, $fa1 ; LA64-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cmp = fcmp one double %a, %b %res = select i1 %cmp, double %x, double %y ret double %res @@ -125,13 +126,13 @@ define double @fcmp_ord(double %a, double %b, double %x, double %y) { ; LA32: # %bb.0: ; LA32-NEXT: fcmp.cor.d $fcc0, $fa0, $fa1 ; LA32-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: fcmp_ord: ; LA64: # %bb.0: ; LA64-NEXT: fcmp.cor.d $fcc0, $fa0, $fa1 ; LA64-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cmp = fcmp ord double %a, %b %res = select i1 %cmp, double %x, double %y ret double %res @@ -142,13 +143,13 @@ define double @fcmp_ueq(double %a, double %b, double %x, double %y) { ; LA32: # %bb.0: ; LA32-NEXT: fcmp.cueq.d $fcc0, $fa0, $fa1 ; LA32-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: fcmp_ueq: ; LA64: # %bb.0: ; LA64-NEXT: fcmp.cueq.d $fcc0, $fa0, $fa1 ; LA64-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cmp = fcmp ueq double %a, %b %res = select i1 %cmp, double %x, double %y ret double %res @@ -159,13 +160,13 @@ define double @fcmp_ugt(double %a, double %b, double %x, double %y) { ; LA32: # %bb.0: ; LA32-NEXT: fcmp.cult.d $fcc0, $fa1, $fa0 ; LA32-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: fcmp_ugt: ; LA64: # %bb.0: ; LA64-NEXT: fcmp.cult.d $fcc0, $fa1, $fa0 ; LA64-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cmp = fcmp ugt double %a, %b %res = select i1 %cmp, double %x, double %y ret double %res @@ -176,13 +177,13 @@ define double @fcmp_uge(double %a, double %b, double %x, double %y) { ; LA32: # %bb.0: ; LA32-NEXT: fcmp.cule.d $fcc0, $fa1, $fa0 ; LA32-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: fcmp_uge: ; LA64: # %bb.0: ; LA64-NEXT: fcmp.cule.d $fcc0, $fa1, $fa0 ; LA64-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cmp = fcmp uge double %a, %b %res = select i1 %cmp, double %x, double %y ret double %res @@ -193,13 +194,13 @@ define double @fcmp_ult(double %a, double %b, double %x, double %y) { ; LA32: # %bb.0: ; LA32-NEXT: fcmp.cult.d $fcc0, $fa0, $fa1 ; LA32-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: fcmp_ult: ; LA64: # %bb.0: ; LA64-NEXT: fcmp.cult.d $fcc0, $fa0, $fa1 ; LA64-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cmp = fcmp ult double %a, %b %res = select i1 %cmp, double %x, double %y ret double %res @@ -210,13 +211,13 @@ define double @fcmp_ule(double %a, double %b, double %x, double %y) { ; LA32: # %bb.0: ; LA32-NEXT: fcmp.cule.d $fcc0, $fa0, $fa1 ; LA32-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: fcmp_ule: ; LA64: # %bb.0: ; LA64-NEXT: fcmp.cule.d $fcc0, $fa0, $fa1 ; LA64-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cmp = fcmp ule double %a, %b %res = select i1 %cmp, double %x, double %y ret double %res @@ -227,13 +228,13 @@ define double @fcmp_une(double %a, double %b, double %x, double %y) { ; LA32: # %bb.0: ; LA32-NEXT: fcmp.cune.d $fcc0, $fa0, $fa1 ; LA32-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: fcmp_une: ; LA64: # %bb.0: ; LA64-NEXT: fcmp.cune.d $fcc0, $fa0, $fa1 ; LA64-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cmp = fcmp une double %a, %b %res = select i1 %cmp, double %x, double %y ret double %res @@ -244,13 +245,13 @@ define double @fcmp_uno(double %a, double %b, double %x, double %y) { ; LA32: # %bb.0: ; LA32-NEXT: fcmp.cun.d $fcc0, $fa0, $fa1 ; LA32-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: fcmp_uno: ; LA64: # %bb.0: ; LA64-NEXT: fcmp.cun.d $fcc0, $fa0, $fa1 ; LA64-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cmp = fcmp uno double %a, %b %res = select i1 %cmp, double %x, double %y ret double %res @@ -260,12 +261,12 @@ define double @fcmp_true(double %a, double %b, double %x, double %y) { ; LA32-LABEL: fcmp_true: ; LA32: # %bb.0: ; LA32-NEXT: fmov.d $fa0, $fa2 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: fcmp_true: ; LA64: # %bb.0: ; LA64-NEXT: fmov.d $fa0, $fa2 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cmp = fcmp true double %a, %b %res = select i1 %cmp, double %x, double %y ret double %res diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/select-fpcc-flt.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/select-fpcc-flt.ll index 23d71493cb4be7af8e8f2b6614a15fff8e3622bc..1f6d2313ab72cb87d4ca25149cb763cc4481e283 100644 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/select-fpcc-flt.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/select-fpcc-flt.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc --mtriple=loongarch32 --mattr=+f,-d < %s | FileCheck %s --check-prefix=LA32 ; RUN: llc --mtriple=loongarch64 --mattr=+f,-d < %s | FileCheck %s --check-prefix=LA64 @@ -7,12 +8,12 @@ define float @fcmp_false(float %a, float %b, float %x, float %y) { ; LA32-LABEL: fcmp_false: ; LA32: # %bb.0: ; LA32-NEXT: fmov.s $fa0, $fa3 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: fcmp_false: ; LA64: # %bb.0: ; LA64-NEXT: fmov.s $fa0, $fa3 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cmp = fcmp false float %a, %b %res = select i1 %cmp, float %x, float %y ret float %res @@ -23,13 +24,13 @@ define float @fcmp_oeq(float %a, float %b, float %x, float %y) { ; LA32: # %bb.0: ; LA32-NEXT: fcmp.ceq.s $fcc0, $fa0, $fa1 ; LA32-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: fcmp_oeq: ; LA64: # %bb.0: ; LA64-NEXT: fcmp.ceq.s $fcc0, $fa0, $fa1 ; LA64-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cmp = fcmp oeq float %a, %b %res = select i1 %cmp, float %x, float %y ret float %res @@ -40,13 +41,13 @@ define float @fcmp_ogt(float %a, float %b, float %x, float %y) { ; LA32: # %bb.0: ; LA32-NEXT: fcmp.clt.s $fcc0, $fa1, $fa0 ; LA32-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: fcmp_ogt: ; LA64: # %bb.0: ; LA64-NEXT: fcmp.clt.s $fcc0, $fa1, $fa0 ; LA64-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cmp = fcmp ogt float %a, %b %res = select i1 %cmp, float %x, float %y ret float %res @@ -57,13 +58,13 @@ define float @fcmp_oge(float %a, float %b, float %x, float %y) { ; LA32: # %bb.0: ; LA32-NEXT: fcmp.cle.s $fcc0, $fa1, $fa0 ; LA32-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: fcmp_oge: ; LA64: # %bb.0: ; LA64-NEXT: fcmp.cle.s $fcc0, $fa1, $fa0 ; LA64-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cmp = fcmp oge float %a, %b %res = select i1 %cmp, float %x, float %y ret float %res @@ -74,13 +75,13 @@ define float @fcmp_olt(float %a, float %b, float %x, float %y) { ; LA32: # %bb.0: ; LA32-NEXT: fcmp.clt.s $fcc0, $fa0, $fa1 ; LA32-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: fcmp_olt: ; LA64: # %bb.0: ; LA64-NEXT: fcmp.clt.s $fcc0, $fa0, $fa1 ; LA64-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cmp = fcmp olt float %a, %b %res = select i1 %cmp, float %x, float %y ret float %res @@ -91,13 +92,13 @@ define float @fcmp_ole(float %a, float %b, float %x, float %y) { ; LA32: # %bb.0: ; LA32-NEXT: fcmp.cle.s $fcc0, $fa0, $fa1 ; LA32-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: fcmp_ole: ; LA64: # %bb.0: ; LA64-NEXT: fcmp.cle.s $fcc0, $fa0, $fa1 ; LA64-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cmp = fcmp ole float %a, %b %res = select i1 %cmp, float %x, float %y ret float %res @@ -108,13 +109,13 @@ define float @fcmp_one(float %a, float %b, float %x, float %y) { ; LA32: # %bb.0: ; LA32-NEXT: fcmp.cne.s $fcc0, $fa0, $fa1 ; LA32-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: fcmp_one: ; LA64: # %bb.0: ; LA64-NEXT: fcmp.cne.s $fcc0, $fa0, $fa1 ; LA64-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cmp = fcmp one float %a, %b %res = select i1 %cmp, float %x, float %y ret float %res @@ -125,13 +126,13 @@ define float @fcmp_ord(float %a, float %b, float %x, float %y) { ; LA32: # %bb.0: ; LA32-NEXT: fcmp.cor.s $fcc0, $fa0, $fa1 ; LA32-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: fcmp_ord: ; LA64: # %bb.0: ; LA64-NEXT: fcmp.cor.s $fcc0, $fa0, $fa1 ; LA64-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cmp = fcmp ord float %a, %b %res = select i1 %cmp, float %x, float %y ret float %res @@ -142,13 +143,13 @@ define float @fcmp_ueq(float %a, float %b, float %x, float %y) { ; LA32: # %bb.0: ; LA32-NEXT: fcmp.cueq.s $fcc0, $fa0, $fa1 ; LA32-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: fcmp_ueq: ; LA64: # %bb.0: ; LA64-NEXT: fcmp.cueq.s $fcc0, $fa0, $fa1 ; LA64-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cmp = fcmp ueq float %a, %b %res = select i1 %cmp, float %x, float %y ret float %res @@ -159,13 +160,13 @@ define float @fcmp_ugt(float %a, float %b, float %x, float %y) { ; LA32: # %bb.0: ; LA32-NEXT: fcmp.cult.s $fcc0, $fa1, $fa0 ; LA32-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: fcmp_ugt: ; LA64: # %bb.0: ; LA64-NEXT: fcmp.cult.s $fcc0, $fa1, $fa0 ; LA64-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cmp = fcmp ugt float %a, %b %res = select i1 %cmp, float %x, float %y ret float %res @@ -176,13 +177,13 @@ define float @fcmp_uge(float %a, float %b, float %x, float %y) { ; LA32: # %bb.0: ; LA32-NEXT: fcmp.cule.s $fcc0, $fa1, $fa0 ; LA32-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: fcmp_uge: ; LA64: # %bb.0: ; LA64-NEXT: fcmp.cule.s $fcc0, $fa1, $fa0 ; LA64-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cmp = fcmp uge float %a, %b %res = select i1 %cmp, float %x, float %y ret float %res @@ -193,13 +194,13 @@ define float @fcmp_ult(float %a, float %b, float %x, float %y) { ; LA32: # %bb.0: ; LA32-NEXT: fcmp.cult.s $fcc0, $fa0, $fa1 ; LA32-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: fcmp_ult: ; LA64: # %bb.0: ; LA64-NEXT: fcmp.cult.s $fcc0, $fa0, $fa1 ; LA64-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cmp = fcmp ult float %a, %b %res = select i1 %cmp, float %x, float %y ret float %res @@ -210,13 +211,13 @@ define float @fcmp_ule(float %a, float %b, float %x, float %y) { ; LA32: # %bb.0: ; LA32-NEXT: fcmp.cule.s $fcc0, $fa0, $fa1 ; LA32-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: fcmp_ule: ; LA64: # %bb.0: ; LA64-NEXT: fcmp.cule.s $fcc0, $fa0, $fa1 ; LA64-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cmp = fcmp ule float %a, %b %res = select i1 %cmp, float %x, float %y ret float %res @@ -227,13 +228,13 @@ define float @fcmp_une(float %a, float %b, float %x, float %y) { ; LA32: # %bb.0: ; LA32-NEXT: fcmp.cune.s $fcc0, $fa0, $fa1 ; LA32-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: fcmp_une: ; LA64: # %bb.0: ; LA64-NEXT: fcmp.cune.s $fcc0, $fa0, $fa1 ; LA64-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cmp = fcmp une float %a, %b %res = select i1 %cmp, float %x, float %y ret float %res @@ -244,13 +245,13 @@ define float @fcmp_uno(float %a, float %b, float %x, float %y) { ; LA32: # %bb.0: ; LA32-NEXT: fcmp.cun.s $fcc0, $fa0, $fa1 ; LA32-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: fcmp_uno: ; LA64: # %bb.0: ; LA64-NEXT: fcmp.cun.s $fcc0, $fa0, $fa1 ; LA64-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cmp = fcmp uno float %a, %b %res = select i1 %cmp, float %x, float %y ret float %res @@ -260,12 +261,12 @@ define float @fcmp_true(float %a, float %b, float %x, float %y) { ; LA32-LABEL: fcmp_true: ; LA32: # %bb.0: ; LA32-NEXT: fmov.s $fa0, $fa2 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: fcmp_true: ; LA64: # %bb.0: ; LA64-NEXT: fmov.s $fa0, $fa2 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cmp = fcmp true float %a, %b %res = select i1 %cmp, float %x, float %y ret float %res diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/select-fpcc-int.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/select-fpcc-int.ll index 9e742ee576cbbf4fcdea1b6a5f45d1225313894b..3e88181a11fe82e3590cf309812c0f6fb44ef279 100644 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/select-fpcc-int.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/select-fpcc-int.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc --mtriple=loongarch32 --mattr=+d < %s | FileCheck %s --check-prefix=LA32 ; RUN: llc --mtriple=loongarch64 --mattr=+d < %s | FileCheck %s --check-prefix=LA64 @@ -7,12 +8,12 @@ define i32 @f32_fcmp_false(float %a, float %b, i32 %x, i32 %y) { ; LA32-LABEL: f32_fcmp_false: ; LA32: # %bb.0: ; LA32-NEXT: move $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: f32_fcmp_false: ; LA64: # %bb.0: ; LA64-NEXT: move $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cmp = fcmp false float %a, %b %res = select i1 %cmp, i32 %x, i32 %y ret i32 %res @@ -26,7 +27,7 @@ define i32 @f32_fcmp_oeq(float %a, float %b, i32 %x, i32 %y) { ; LA32-NEXT: masknez $a1, $a1, $a2 ; LA32-NEXT: maskeqz $a0, $a0, $a2 ; LA32-NEXT: or $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: f32_fcmp_oeq: ; LA64: # %bb.0: @@ -35,7 +36,7 @@ define i32 @f32_fcmp_oeq(float %a, float %b, i32 %x, i32 %y) { ; LA64-NEXT: masknez $a1, $a1, $a2 ; LA64-NEXT: maskeqz $a0, $a0, $a2 ; LA64-NEXT: or $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cmp = fcmp oeq float %a, %b %res = select i1 %cmp, i32 %x, i32 %y ret i32 %res @@ -49,7 +50,7 @@ define i32 @f32_fcmp_ogt(float %a, float %b, i32 %x, i32 %y) { ; LA32-NEXT: masknez $a1, $a1, $a2 ; LA32-NEXT: maskeqz $a0, $a0, $a2 ; LA32-NEXT: or $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: f32_fcmp_ogt: ; LA64: # %bb.0: @@ -58,7 +59,7 @@ define i32 @f32_fcmp_ogt(float %a, float %b, i32 %x, i32 %y) { ; LA64-NEXT: masknez $a1, $a1, $a2 ; LA64-NEXT: maskeqz $a0, $a0, $a2 ; LA64-NEXT: or $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cmp = fcmp ogt float %a, %b %res = select i1 %cmp, i32 %x, i32 %y ret i32 %res @@ -72,7 +73,7 @@ define i32 @f32_fcmp_oge(float %a, float %b, i32 %x, i32 %y) { ; LA32-NEXT: masknez $a1, $a1, $a2 ; LA32-NEXT: maskeqz $a0, $a0, $a2 ; LA32-NEXT: or $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: f32_fcmp_oge: ; LA64: # %bb.0: @@ -81,7 +82,7 @@ define i32 @f32_fcmp_oge(float %a, float %b, i32 %x, i32 %y) { ; LA64-NEXT: masknez $a1, $a1, $a2 ; LA64-NEXT: maskeqz $a0, $a0, $a2 ; LA64-NEXT: or $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cmp = fcmp oge float %a, %b %res = select i1 %cmp, i32 %x, i32 %y ret i32 %res @@ -95,7 +96,7 @@ define i32 @f32_fcmp_olt(float %a, float %b, i32 %x, i32 %y) { ; LA32-NEXT: masknez $a1, $a1, $a2 ; LA32-NEXT: maskeqz $a0, $a0, $a2 ; LA32-NEXT: or $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: f32_fcmp_olt: ; LA64: # %bb.0: @@ -104,7 +105,7 @@ define i32 @f32_fcmp_olt(float %a, float %b, i32 %x, i32 %y) { ; LA64-NEXT: masknez $a1, $a1, $a2 ; LA64-NEXT: maskeqz $a0, $a0, $a2 ; LA64-NEXT: or $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cmp = fcmp olt float %a, %b %res = select i1 %cmp, i32 %x, i32 %y ret i32 %res @@ -118,7 +119,7 @@ define i32 @f32_fcmp_ole(float %a, float %b, i32 %x, i32 %y) { ; LA32-NEXT: masknez $a1, $a1, $a2 ; LA32-NEXT: maskeqz $a0, $a0, $a2 ; LA32-NEXT: or $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: f32_fcmp_ole: ; LA64: # %bb.0: @@ -127,7 +128,7 @@ define i32 @f32_fcmp_ole(float %a, float %b, i32 %x, i32 %y) { ; LA64-NEXT: masknez $a1, $a1, $a2 ; LA64-NEXT: maskeqz $a0, $a0, $a2 ; LA64-NEXT: or $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cmp = fcmp ole float %a, %b %res = select i1 %cmp, i32 %x, i32 %y ret i32 %res @@ -141,7 +142,7 @@ define i32 @f32_fcmp_one(float %a, float %b, i32 %x, i32 %y) { ; LA32-NEXT: masknez $a1, $a1, $a2 ; LA32-NEXT: maskeqz $a0, $a0, $a2 ; LA32-NEXT: or $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: f32_fcmp_one: ; LA64: # %bb.0: @@ -150,7 +151,7 @@ define i32 @f32_fcmp_one(float %a, float %b, i32 %x, i32 %y) { ; LA64-NEXT: masknez $a1, $a1, $a2 ; LA64-NEXT: maskeqz $a0, $a0, $a2 ; LA64-NEXT: or $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cmp = fcmp one float %a, %b %res = select i1 %cmp, i32 %x, i32 %y ret i32 %res @@ -164,7 +165,7 @@ define i32 @f32_fcmp_ord(float %a, float %b, i32 %x, i32 %y) { ; LA32-NEXT: masknez $a1, $a1, $a2 ; LA32-NEXT: maskeqz $a0, $a0, $a2 ; LA32-NEXT: or $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: f32_fcmp_ord: ; LA64: # %bb.0: @@ -173,7 +174,7 @@ define i32 @f32_fcmp_ord(float %a, float %b, i32 %x, i32 %y) { ; LA64-NEXT: masknez $a1, $a1, $a2 ; LA64-NEXT: maskeqz $a0, $a0, $a2 ; LA64-NEXT: or $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cmp = fcmp ord float %a, %b %res = select i1 %cmp, i32 %x, i32 %y ret i32 %res @@ -187,7 +188,7 @@ define i32 @f32_fcmp_ueq(float %a, float %b, i32 %x, i32 %y) { ; LA32-NEXT: masknez $a1, $a1, $a2 ; LA32-NEXT: maskeqz $a0, $a0, $a2 ; LA32-NEXT: or $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: f32_fcmp_ueq: ; LA64: # %bb.0: @@ -196,7 +197,7 @@ define i32 @f32_fcmp_ueq(float %a, float %b, i32 %x, i32 %y) { ; LA64-NEXT: masknez $a1, $a1, $a2 ; LA64-NEXT: maskeqz $a0, $a0, $a2 ; LA64-NEXT: or $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cmp = fcmp ueq float %a, %b %res = select i1 %cmp, i32 %x, i32 %y ret i32 %res @@ -210,7 +211,7 @@ define i32 @f32_fcmp_ugt(float %a, float %b, i32 %x, i32 %y) { ; LA32-NEXT: masknez $a1, $a1, $a2 ; LA32-NEXT: maskeqz $a0, $a0, $a2 ; LA32-NEXT: or $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: f32_fcmp_ugt: ; LA64: # %bb.0: @@ -219,7 +220,7 @@ define i32 @f32_fcmp_ugt(float %a, float %b, i32 %x, i32 %y) { ; LA64-NEXT: masknez $a1, $a1, $a2 ; LA64-NEXT: maskeqz $a0, $a0, $a2 ; LA64-NEXT: or $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cmp = fcmp ugt float %a, %b %res = select i1 %cmp, i32 %x, i32 %y ret i32 %res @@ -233,7 +234,7 @@ define i32 @f32_fcmp_uge(float %a, float %b, i32 %x, i32 %y) { ; LA32-NEXT: masknez $a1, $a1, $a2 ; LA32-NEXT: maskeqz $a0, $a0, $a2 ; LA32-NEXT: or $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: f32_fcmp_uge: ; LA64: # %bb.0: @@ -242,7 +243,7 @@ define i32 @f32_fcmp_uge(float %a, float %b, i32 %x, i32 %y) { ; LA64-NEXT: masknez $a1, $a1, $a2 ; LA64-NEXT: maskeqz $a0, $a0, $a2 ; LA64-NEXT: or $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cmp = fcmp uge float %a, %b %res = select i1 %cmp, i32 %x, i32 %y ret i32 %res @@ -256,7 +257,7 @@ define i32 @f32_fcmp_ult(float %a, float %b, i32 %x, i32 %y) { ; LA32-NEXT: masknez $a1, $a1, $a2 ; LA32-NEXT: maskeqz $a0, $a0, $a2 ; LA32-NEXT: or $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: f32_fcmp_ult: ; LA64: # %bb.0: @@ -265,7 +266,7 @@ define i32 @f32_fcmp_ult(float %a, float %b, i32 %x, i32 %y) { ; LA64-NEXT: masknez $a1, $a1, $a2 ; LA64-NEXT: maskeqz $a0, $a0, $a2 ; LA64-NEXT: or $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cmp = fcmp ult float %a, %b %res = select i1 %cmp, i32 %x, i32 %y ret i32 %res @@ -279,7 +280,7 @@ define i32 @f32_fcmp_ule(float %a, float %b, i32 %x, i32 %y) { ; LA32-NEXT: masknez $a1, $a1, $a2 ; LA32-NEXT: maskeqz $a0, $a0, $a2 ; LA32-NEXT: or $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: f32_fcmp_ule: ; LA64: # %bb.0: @@ -288,7 +289,7 @@ define i32 @f32_fcmp_ule(float %a, float %b, i32 %x, i32 %y) { ; LA64-NEXT: masknez $a1, $a1, $a2 ; LA64-NEXT: maskeqz $a0, $a0, $a2 ; LA64-NEXT: or $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cmp = fcmp ule float %a, %b %res = select i1 %cmp, i32 %x, i32 %y ret i32 %res @@ -302,7 +303,7 @@ define i32 @f32_fcmp_une(float %a, float %b, i32 %x, i32 %y) { ; LA32-NEXT: masknez $a1, $a1, $a2 ; LA32-NEXT: maskeqz $a0, $a0, $a2 ; LA32-NEXT: or $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: f32_fcmp_une: ; LA64: # %bb.0: @@ -311,7 +312,7 @@ define i32 @f32_fcmp_une(float %a, float %b, i32 %x, i32 %y) { ; LA64-NEXT: masknez $a1, $a1, $a2 ; LA64-NEXT: maskeqz $a0, $a0, $a2 ; LA64-NEXT: or $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cmp = fcmp une float %a, %b %res = select i1 %cmp, i32 %x, i32 %y ret i32 %res @@ -325,7 +326,7 @@ define i32 @f32_fcmp_uno(float %a, float %b, i32 %x, i32 %y) { ; LA32-NEXT: masknez $a1, $a1, $a2 ; LA32-NEXT: maskeqz $a0, $a0, $a2 ; LA32-NEXT: or $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: f32_fcmp_uno: ; LA64: # %bb.0: @@ -334,7 +335,7 @@ define i32 @f32_fcmp_uno(float %a, float %b, i32 %x, i32 %y) { ; LA64-NEXT: masknez $a1, $a1, $a2 ; LA64-NEXT: maskeqz $a0, $a0, $a2 ; LA64-NEXT: or $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cmp = fcmp uno float %a, %b %res = select i1 %cmp, i32 %x, i32 %y ret i32 %res @@ -343,11 +344,11 @@ define i32 @f32_fcmp_uno(float %a, float %b, i32 %x, i32 %y) { define i32 @f32_fcmp_true(float %a, float %b, i32 %x, i32 %y) { ; LA32-LABEL: f32_fcmp_true: ; LA32: # %bb.0: -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: f32_fcmp_true: ; LA64: # %bb.0: -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cmp = fcmp true float %a, %b %res = select i1 %cmp, i32 %x, i32 %y ret i32 %res @@ -357,12 +358,12 @@ define i32 @f64_fcmp_false(double %a, double %b, i32 %x, i32 %y) { ; LA32-LABEL: f64_fcmp_false: ; LA32: # %bb.0: ; LA32-NEXT: move $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: f64_fcmp_false: ; LA64: # %bb.0: ; LA64-NEXT: move $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cmp = fcmp false double %a, %b %res = select i1 %cmp, i32 %x, i32 %y ret i32 %res @@ -376,7 +377,7 @@ define i32 @f64_fcmp_oeq(double %a, double %b, i32 %x, i32 %y) { ; LA32-NEXT: masknez $a1, $a1, $a2 ; LA32-NEXT: maskeqz $a0, $a0, $a2 ; LA32-NEXT: or $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: f64_fcmp_oeq: ; LA64: # %bb.0: @@ -385,7 +386,7 @@ define i32 @f64_fcmp_oeq(double %a, double %b, i32 %x, i32 %y) { ; LA64-NEXT: masknez $a1, $a1, $a2 ; LA64-NEXT: maskeqz $a0, $a0, $a2 ; LA64-NEXT: or $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cmp = fcmp oeq double %a, %b %res = select i1 %cmp, i32 %x, i32 %y ret i32 %res @@ -399,7 +400,7 @@ define i32 @f64_fcmp_ogt(double %a, double %b, i32 %x, i32 %y) { ; LA32-NEXT: masknez $a1, $a1, $a2 ; LA32-NEXT: maskeqz $a0, $a0, $a2 ; LA32-NEXT: or $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: f64_fcmp_ogt: ; LA64: # %bb.0: @@ -408,7 +409,7 @@ define i32 @f64_fcmp_ogt(double %a, double %b, i32 %x, i32 %y) { ; LA64-NEXT: masknez $a1, $a1, $a2 ; LA64-NEXT: maskeqz $a0, $a0, $a2 ; LA64-NEXT: or $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cmp = fcmp ogt double %a, %b %res = select i1 %cmp, i32 %x, i32 %y ret i32 %res @@ -422,7 +423,7 @@ define i32 @f64_fcmp_oge(double %a, double %b, i32 %x, i32 %y) { ; LA32-NEXT: masknez $a1, $a1, $a2 ; LA32-NEXT: maskeqz $a0, $a0, $a2 ; LA32-NEXT: or $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: f64_fcmp_oge: ; LA64: # %bb.0: @@ -431,7 +432,7 @@ define i32 @f64_fcmp_oge(double %a, double %b, i32 %x, i32 %y) { ; LA64-NEXT: masknez $a1, $a1, $a2 ; LA64-NEXT: maskeqz $a0, $a0, $a2 ; LA64-NEXT: or $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cmp = fcmp oge double %a, %b %res = select i1 %cmp, i32 %x, i32 %y ret i32 %res @@ -445,7 +446,7 @@ define i32 @f64_fcmp_olt(double %a, double %b, i32 %x, i32 %y) { ; LA32-NEXT: masknez $a1, $a1, $a2 ; LA32-NEXT: maskeqz $a0, $a0, $a2 ; LA32-NEXT: or $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: f64_fcmp_olt: ; LA64: # %bb.0: @@ -454,7 +455,7 @@ define i32 @f64_fcmp_olt(double %a, double %b, i32 %x, i32 %y) { ; LA64-NEXT: masknez $a1, $a1, $a2 ; LA64-NEXT: maskeqz $a0, $a0, $a2 ; LA64-NEXT: or $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cmp = fcmp olt double %a, %b %res = select i1 %cmp, i32 %x, i32 %y ret i32 %res @@ -468,7 +469,7 @@ define i32 @f64_fcmp_ole(double %a, double %b, i32 %x, i32 %y) { ; LA32-NEXT: masknez $a1, $a1, $a2 ; LA32-NEXT: maskeqz $a0, $a0, $a2 ; LA32-NEXT: or $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: f64_fcmp_ole: ; LA64: # %bb.0: @@ -477,7 +478,7 @@ define i32 @f64_fcmp_ole(double %a, double %b, i32 %x, i32 %y) { ; LA64-NEXT: masknez $a1, $a1, $a2 ; LA64-NEXT: maskeqz $a0, $a0, $a2 ; LA64-NEXT: or $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cmp = fcmp ole double %a, %b %res = select i1 %cmp, i32 %x, i32 %y ret i32 %res @@ -491,7 +492,7 @@ define i32 @f64_fcmp_one(double %a, double %b, i32 %x, i32 %y) { ; LA32-NEXT: masknez $a1, $a1, $a2 ; LA32-NEXT: maskeqz $a0, $a0, $a2 ; LA32-NEXT: or $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: f64_fcmp_one: ; LA64: # %bb.0: @@ -500,7 +501,7 @@ define i32 @f64_fcmp_one(double %a, double %b, i32 %x, i32 %y) { ; LA64-NEXT: masknez $a1, $a1, $a2 ; LA64-NEXT: maskeqz $a0, $a0, $a2 ; LA64-NEXT: or $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cmp = fcmp one double %a, %b %res = select i1 %cmp, i32 %x, i32 %y ret i32 %res @@ -514,7 +515,7 @@ define i32 @f64_fcmp_ord(double %a, double %b, i32 %x, i32 %y) { ; LA32-NEXT: masknez $a1, $a1, $a2 ; LA32-NEXT: maskeqz $a0, $a0, $a2 ; LA32-NEXT: or $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: f64_fcmp_ord: ; LA64: # %bb.0: @@ -523,7 +524,7 @@ define i32 @f64_fcmp_ord(double %a, double %b, i32 %x, i32 %y) { ; LA64-NEXT: masknez $a1, $a1, $a2 ; LA64-NEXT: maskeqz $a0, $a0, $a2 ; LA64-NEXT: or $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cmp = fcmp ord double %a, %b %res = select i1 %cmp, i32 %x, i32 %y ret i32 %res @@ -537,7 +538,7 @@ define i32 @f64_fcmp_ueq(double %a, double %b, i32 %x, i32 %y) { ; LA32-NEXT: masknez $a1, $a1, $a2 ; LA32-NEXT: maskeqz $a0, $a0, $a2 ; LA32-NEXT: or $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: f64_fcmp_ueq: ; LA64: # %bb.0: @@ -546,7 +547,7 @@ define i32 @f64_fcmp_ueq(double %a, double %b, i32 %x, i32 %y) { ; LA64-NEXT: masknez $a1, $a1, $a2 ; LA64-NEXT: maskeqz $a0, $a0, $a2 ; LA64-NEXT: or $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cmp = fcmp ueq double %a, %b %res = select i1 %cmp, i32 %x, i32 %y ret i32 %res @@ -560,7 +561,7 @@ define i32 @f64_fcmp_ugt(double %a, double %b, i32 %x, i32 %y) { ; LA32-NEXT: masknez $a1, $a1, $a2 ; LA32-NEXT: maskeqz $a0, $a0, $a2 ; LA32-NEXT: or $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: f64_fcmp_ugt: ; LA64: # %bb.0: @@ -569,7 +570,7 @@ define i32 @f64_fcmp_ugt(double %a, double %b, i32 %x, i32 %y) { ; LA64-NEXT: masknez $a1, $a1, $a2 ; LA64-NEXT: maskeqz $a0, $a0, $a2 ; LA64-NEXT: or $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cmp = fcmp ugt double %a, %b %res = select i1 %cmp, i32 %x, i32 %y ret i32 %res @@ -583,7 +584,7 @@ define i32 @f64_fcmp_uge(double %a, double %b, i32 %x, i32 %y) { ; LA32-NEXT: masknez $a1, $a1, $a2 ; LA32-NEXT: maskeqz $a0, $a0, $a2 ; LA32-NEXT: or $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: f64_fcmp_uge: ; LA64: # %bb.0: @@ -592,7 +593,7 @@ define i32 @f64_fcmp_uge(double %a, double %b, i32 %x, i32 %y) { ; LA64-NEXT: masknez $a1, $a1, $a2 ; LA64-NEXT: maskeqz $a0, $a0, $a2 ; LA64-NEXT: or $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cmp = fcmp uge double %a, %b %res = select i1 %cmp, i32 %x, i32 %y ret i32 %res @@ -606,7 +607,7 @@ define i32 @f64_fcmp_ult(double %a, double %b, i32 %x, i32 %y) { ; LA32-NEXT: masknez $a1, $a1, $a2 ; LA32-NEXT: maskeqz $a0, $a0, $a2 ; LA32-NEXT: or $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: f64_fcmp_ult: ; LA64: # %bb.0: @@ -615,7 +616,7 @@ define i32 @f64_fcmp_ult(double %a, double %b, i32 %x, i32 %y) { ; LA64-NEXT: masknez $a1, $a1, $a2 ; LA64-NEXT: maskeqz $a0, $a0, $a2 ; LA64-NEXT: or $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cmp = fcmp ult double %a, %b %res = select i1 %cmp, i32 %x, i32 %y ret i32 %res @@ -629,7 +630,7 @@ define i32 @f64_fcmp_ule(double %a, double %b, i32 %x, i32 %y) { ; LA32-NEXT: masknez $a1, $a1, $a2 ; LA32-NEXT: maskeqz $a0, $a0, $a2 ; LA32-NEXT: or $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: f64_fcmp_ule: ; LA64: # %bb.0: @@ -638,7 +639,7 @@ define i32 @f64_fcmp_ule(double %a, double %b, i32 %x, i32 %y) { ; LA64-NEXT: masknez $a1, $a1, $a2 ; LA64-NEXT: maskeqz $a0, $a0, $a2 ; LA64-NEXT: or $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cmp = fcmp ule double %a, %b %res = select i1 %cmp, i32 %x, i32 %y ret i32 %res @@ -652,7 +653,7 @@ define i32 @f64_fcmp_une(double %a, double %b, i32 %x, i32 %y) { ; LA32-NEXT: masknez $a1, $a1, $a2 ; LA32-NEXT: maskeqz $a0, $a0, $a2 ; LA32-NEXT: or $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: f64_fcmp_une: ; LA64: # %bb.0: @@ -661,7 +662,7 @@ define i32 @f64_fcmp_une(double %a, double %b, i32 %x, i32 %y) { ; LA64-NEXT: masknez $a1, $a1, $a2 ; LA64-NEXT: maskeqz $a0, $a0, $a2 ; LA64-NEXT: or $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cmp = fcmp une double %a, %b %res = select i1 %cmp, i32 %x, i32 %y ret i32 %res @@ -675,7 +676,7 @@ define i32 @f64_fcmp_uno(double %a, double %b, i32 %x, i32 %y) { ; LA32-NEXT: masknez $a1, $a1, $a2 ; LA32-NEXT: maskeqz $a0, $a0, $a2 ; LA32-NEXT: or $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: f64_fcmp_uno: ; LA64: # %bb.0: @@ -684,7 +685,7 @@ define i32 @f64_fcmp_uno(double %a, double %b, i32 %x, i32 %y) { ; LA64-NEXT: masknez $a1, $a1, $a2 ; LA64-NEXT: maskeqz $a0, $a0, $a2 ; LA64-NEXT: or $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cmp = fcmp uno double %a, %b %res = select i1 %cmp, i32 %x, i32 %y ret i32 %res @@ -693,11 +694,11 @@ define i32 @f64_fcmp_uno(double %a, double %b, i32 %x, i32 %y) { define i32 @f64_fcmp_true(double %a, double %b, i32 %x, i32 %y) { ; LA32-LABEL: f64_fcmp_true: ; LA32: # %bb.0: -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: f64_fcmp_true: ; LA64: # %bb.0: -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cmp = fcmp true double %a, %b %res = select i1 %cmp, i32 %x, i32 %y ret i32 %res diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/select-icc-dbl.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/select-icc-dbl.ll index 5ccee6b193b0dae021975810096e561e3b95273e..d8b0ecfd5dac05f7ddca0940f7b369010b7d0bcf 100644 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/select-icc-dbl.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/select-icc-dbl.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc --mtriple=loongarch32 --mattr=+d < %s | FileCheck %s --check-prefix=LA32 ; RUN: llc --mtriple=loongarch64 --mattr=+d < %s | FileCheck %s --check-prefix=LA64 @@ -10,7 +11,7 @@ define double @select_eq(i32 signext %a, i32 signext %b, double %x, double %y) { ; LA32-NEXT: sltui $a0, $a0, 1 ; LA32-NEXT: movgr2cf $fcc0, $a0 ; LA32-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: select_eq: ; LA64: # %bb.0: @@ -18,7 +19,7 @@ define double @select_eq(i32 signext %a, i32 signext %b, double %x, double %y) { ; LA64-NEXT: sltui $a0, $a0, 1 ; LA64-NEXT: movgr2cf $fcc0, $a0 ; LA64-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cond = icmp eq i32 %a, %b %res = select i1 %cond, double %x, double %y ret double %res @@ -31,7 +32,7 @@ define double @select_ne(i32 signext %a, i32 signext %b, double %x, double %y) { ; LA32-NEXT: sltu $a0, $zero, $a0 ; LA32-NEXT: movgr2cf $fcc0, $a0 ; LA32-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: select_ne: ; LA64: # %bb.0: @@ -39,7 +40,7 @@ define double @select_ne(i32 signext %a, i32 signext %b, double %x, double %y) { ; LA64-NEXT: sltu $a0, $zero, $a0 ; LA64-NEXT: movgr2cf $fcc0, $a0 ; LA64-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cond = icmp ne i32 %a, %b %res = select i1 %cond, double %x, double %y ret double %res @@ -51,14 +52,14 @@ define double @select_ugt(i32 signext %a, i32 signext %b, double %x, double %y) ; LA32-NEXT: sltu $a0, $a1, $a0 ; LA32-NEXT: movgr2cf $fcc0, $a0 ; LA32-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: select_ugt: ; LA64: # %bb.0: ; LA64-NEXT: sltu $a0, $a1, $a0 ; LA64-NEXT: movgr2cf $fcc0, $a0 ; LA64-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cond = icmp ugt i32 %a, %b %res = select i1 %cond, double %x, double %y ret double %res @@ -71,7 +72,7 @@ define double @select_uge(i32 signext %a, i32 signext %b, double %x, double %y) ; LA32-NEXT: xori $a0, $a0, 1 ; LA32-NEXT: movgr2cf $fcc0, $a0 ; LA32-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: select_uge: ; LA64: # %bb.0: @@ -79,7 +80,7 @@ define double @select_uge(i32 signext %a, i32 signext %b, double %x, double %y) ; LA64-NEXT: xori $a0, $a0, 1 ; LA64-NEXT: movgr2cf $fcc0, $a0 ; LA64-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cond = icmp uge i32 %a, %b %res = select i1 %cond, double %x, double %y ret double %res @@ -91,14 +92,14 @@ define double @select_ult(i32 signext %a, i32 signext %b, double %x, double %y) ; LA32-NEXT: sltu $a0, $a0, $a1 ; LA32-NEXT: movgr2cf $fcc0, $a0 ; LA32-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: select_ult: ; LA64: # %bb.0: ; LA64-NEXT: sltu $a0, $a0, $a1 ; LA64-NEXT: movgr2cf $fcc0, $a0 ; LA64-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cond = icmp ult i32 %a, %b %res = select i1 %cond, double %x, double %y ret double %res @@ -111,7 +112,7 @@ define double @select_ule(i32 signext %a, i32 signext %b, double %x, double %y) ; LA32-NEXT: xori $a0, $a0, 1 ; LA32-NEXT: movgr2cf $fcc0, $a0 ; LA32-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: select_ule: ; LA64: # %bb.0: @@ -119,7 +120,7 @@ define double @select_ule(i32 signext %a, i32 signext %b, double %x, double %y) ; LA64-NEXT: xori $a0, $a0, 1 ; LA64-NEXT: movgr2cf $fcc0, $a0 ; LA64-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cond = icmp ule i32 %a, %b %res = select i1 %cond, double %x, double %y ret double %res @@ -131,14 +132,14 @@ define double @select_sgt(i32 signext %a, i32 signext %b, double %x, double %y) ; LA32-NEXT: slt $a0, $a1, $a0 ; LA32-NEXT: movgr2cf $fcc0, $a0 ; LA32-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: select_sgt: ; LA64: # %bb.0: ; LA64-NEXT: slt $a0, $a1, $a0 ; LA64-NEXT: movgr2cf $fcc0, $a0 ; LA64-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cond = icmp sgt i32 %a, %b %res = select i1 %cond, double %x, double %y ret double %res @@ -151,7 +152,7 @@ define double @select_sge(i32 signext %a, i32 signext %b, double %x, double %y) ; LA32-NEXT: xori $a0, $a0, 1 ; LA32-NEXT: movgr2cf $fcc0, $a0 ; LA32-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: select_sge: ; LA64: # %bb.0: @@ -159,7 +160,7 @@ define double @select_sge(i32 signext %a, i32 signext %b, double %x, double %y) ; LA64-NEXT: xori $a0, $a0, 1 ; LA64-NEXT: movgr2cf $fcc0, $a0 ; LA64-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cond = icmp sge i32 %a, %b %res = select i1 %cond, double %x, double %y ret double %res @@ -171,14 +172,14 @@ define double @select_slt(i32 signext %a, i32 signext %b, double %x, double %y) ; LA32-NEXT: slt $a0, $a0, $a1 ; LA32-NEXT: movgr2cf $fcc0, $a0 ; LA32-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: select_slt: ; LA64: # %bb.0: ; LA64-NEXT: slt $a0, $a0, $a1 ; LA64-NEXT: movgr2cf $fcc0, $a0 ; LA64-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cond = icmp slt i32 %a, %b %res = select i1 %cond, double %x, double %y ret double %res @@ -191,7 +192,7 @@ define double @select_sle(i32 signext %a, i32 signext %b, double %x, double %y) ; LA32-NEXT: xori $a0, $a0, 1 ; LA32-NEXT: movgr2cf $fcc0, $a0 ; LA32-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: select_sle: ; LA64: # %bb.0: @@ -199,7 +200,7 @@ define double @select_sle(i32 signext %a, i32 signext %b, double %x, double %y) ; LA64-NEXT: xori $a0, $a0, 1 ; LA64-NEXT: movgr2cf $fcc0, $a0 ; LA64-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cond = icmp sle i32 %a, %b %res = select i1 %cond, double %x, double %y ret double %res diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/select-icc-flt.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/select-icc-flt.ll index 98b999776e3f647f7ed7207753e2fd26b2a8cdd6..8870e78edf6e3de3cb124ab61de0ae64b3814dbc 100644 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/select-icc-flt.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/select-icc-flt.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc --mtriple=loongarch32 --mattr=+f,-d < %s | FileCheck %s --check-prefix=LA32 ; RUN: llc --mtriple=loongarch64 --mattr=+f,-d < %s | FileCheck %s --check-prefix=LA64 @@ -10,7 +11,7 @@ define float @select_eq(i32 signext %a, i32 signext %b, float %x, float %y) { ; LA32-NEXT: sltui $a0, $a0, 1 ; LA32-NEXT: movgr2cf $fcc0, $a0 ; LA32-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: select_eq: ; LA64: # %bb.0: @@ -18,7 +19,7 @@ define float @select_eq(i32 signext %a, i32 signext %b, float %x, float %y) { ; LA64-NEXT: sltui $a0, $a0, 1 ; LA64-NEXT: movgr2cf $fcc0, $a0 ; LA64-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cond = icmp eq i32 %a, %b %res = select i1 %cond, float %x, float %y ret float %res @@ -31,7 +32,7 @@ define float @select_ne(i32 signext %a, i32 signext %b, float %x, float %y) { ; LA32-NEXT: sltu $a0, $zero, $a0 ; LA32-NEXT: movgr2cf $fcc0, $a0 ; LA32-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: select_ne: ; LA64: # %bb.0: @@ -39,7 +40,7 @@ define float @select_ne(i32 signext %a, i32 signext %b, float %x, float %y) { ; LA64-NEXT: sltu $a0, $zero, $a0 ; LA64-NEXT: movgr2cf $fcc0, $a0 ; LA64-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cond = icmp ne i32 %a, %b %res = select i1 %cond, float %x, float %y ret float %res @@ -51,14 +52,14 @@ define float @select_ugt(i32 signext %a, i32 signext %b, float %x, float %y) { ; LA32-NEXT: sltu $a0, $a1, $a0 ; LA32-NEXT: movgr2cf $fcc0, $a0 ; LA32-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: select_ugt: ; LA64: # %bb.0: ; LA64-NEXT: sltu $a0, $a1, $a0 ; LA64-NEXT: movgr2cf $fcc0, $a0 ; LA64-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cond = icmp ugt i32 %a, %b %res = select i1 %cond, float %x, float %y ret float %res @@ -71,7 +72,7 @@ define float @select_uge(i32 signext %a, i32 signext %b, float %x, float %y) { ; LA32-NEXT: xori $a0, $a0, 1 ; LA32-NEXT: movgr2cf $fcc0, $a0 ; LA32-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: select_uge: ; LA64: # %bb.0: @@ -79,7 +80,7 @@ define float @select_uge(i32 signext %a, i32 signext %b, float %x, float %y) { ; LA64-NEXT: xori $a0, $a0, 1 ; LA64-NEXT: movgr2cf $fcc0, $a0 ; LA64-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cond = icmp uge i32 %a, %b %res = select i1 %cond, float %x, float %y ret float %res @@ -91,14 +92,14 @@ define float @select_ult(i32 signext %a, i32 signext %b, float %x, float %y) { ; LA32-NEXT: sltu $a0, $a0, $a1 ; LA32-NEXT: movgr2cf $fcc0, $a0 ; LA32-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: select_ult: ; LA64: # %bb.0: ; LA64-NEXT: sltu $a0, $a0, $a1 ; LA64-NEXT: movgr2cf $fcc0, $a0 ; LA64-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cond = icmp ult i32 %a, %b %res = select i1 %cond, float %x, float %y ret float %res @@ -111,7 +112,7 @@ define float @select_ule(i32 signext %a, i32 signext %b, float %x, float %y) { ; LA32-NEXT: xori $a0, $a0, 1 ; LA32-NEXT: movgr2cf $fcc0, $a0 ; LA32-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: select_ule: ; LA64: # %bb.0: @@ -119,7 +120,7 @@ define float @select_ule(i32 signext %a, i32 signext %b, float %x, float %y) { ; LA64-NEXT: xori $a0, $a0, 1 ; LA64-NEXT: movgr2cf $fcc0, $a0 ; LA64-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cond = icmp ule i32 %a, %b %res = select i1 %cond, float %x, float %y ret float %res @@ -131,14 +132,14 @@ define float @select_sgt(i32 signext %a, i32 signext %b, float %x, float %y) { ; LA32-NEXT: slt $a0, $a1, $a0 ; LA32-NEXT: movgr2cf $fcc0, $a0 ; LA32-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: select_sgt: ; LA64: # %bb.0: ; LA64-NEXT: slt $a0, $a1, $a0 ; LA64-NEXT: movgr2cf $fcc0, $a0 ; LA64-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cond = icmp sgt i32 %a, %b %res = select i1 %cond, float %x, float %y ret float %res @@ -151,7 +152,7 @@ define float @select_sge(i32 signext %a, i32 signext %b, float %x, float %y) { ; LA32-NEXT: xori $a0, $a0, 1 ; LA32-NEXT: movgr2cf $fcc0, $a0 ; LA32-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: select_sge: ; LA64: # %bb.0: @@ -159,7 +160,7 @@ define float @select_sge(i32 signext %a, i32 signext %b, float %x, float %y) { ; LA64-NEXT: xori $a0, $a0, 1 ; LA64-NEXT: movgr2cf $fcc0, $a0 ; LA64-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cond = icmp sge i32 %a, %b %res = select i1 %cond, float %x, float %y ret float %res @@ -171,14 +172,14 @@ define float @select_slt(i32 signext %a, i32 signext %b, float %x, float %y) { ; LA32-NEXT: slt $a0, $a0, $a1 ; LA32-NEXT: movgr2cf $fcc0, $a0 ; LA32-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: select_slt: ; LA64: # %bb.0: ; LA64-NEXT: slt $a0, $a0, $a1 ; LA64-NEXT: movgr2cf $fcc0, $a0 ; LA64-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cond = icmp slt i32 %a, %b %res = select i1 %cond, float %x, float %y ret float %res @@ -191,7 +192,7 @@ define float @select_sle(i32 signext %a, i32 signext %b, float %x, float %y) { ; LA32-NEXT: xori $a0, $a0, 1 ; LA32-NEXT: movgr2cf $fcc0, $a0 ; LA32-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: select_sle: ; LA64: # %bb.0: @@ -199,7 +200,7 @@ define float @select_sle(i32 signext %a, i32 signext %b, float %x, float %y) { ; LA64-NEXT: xori $a0, $a0, 1 ; LA64-NEXT: movgr2cf $fcc0, $a0 ; LA64-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cond = icmp sle i32 %a, %b %res = select i1 %cond, float %x, float %y ret float %res diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/select-icc-int.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/select-icc-int.ll index 3b7c2adfb868b3923cc7d0d915d27348f8cb8113..0acf31f8bb1abfc1395876d1ab7c6dc3135c74b3 100644 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/select-icc-int.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/select-icc-int.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefix=LA32 ; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64 @@ -11,7 +12,7 @@ define i32 @select_eq(i32 signext %a, i32 signext %b, i32 %x, i32 %y) { ; LA32-NEXT: masknez $a1, $a3, $a0 ; LA32-NEXT: maskeqz $a0, $a2, $a0 ; LA32-NEXT: or $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: select_eq: ; LA64: # %bb.0: @@ -20,7 +21,7 @@ define i32 @select_eq(i32 signext %a, i32 signext %b, i32 %x, i32 %y) { ; LA64-NEXT: masknez $a1, $a3, $a0 ; LA64-NEXT: maskeqz $a0, $a2, $a0 ; LA64-NEXT: or $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cond = icmp eq i32 %a, %b %res = select i1 %cond, i32 %x, i32 %y ret i32 %res @@ -34,7 +35,7 @@ define i32 @select_ne(i32 signext %a, i32 signext %b, i32 %x, i32 %y) { ; LA32-NEXT: masknez $a1, $a3, $a0 ; LA32-NEXT: maskeqz $a0, $a2, $a0 ; LA32-NEXT: or $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: select_ne: ; LA64: # %bb.0: @@ -43,7 +44,7 @@ define i32 @select_ne(i32 signext %a, i32 signext %b, i32 %x, i32 %y) { ; LA64-NEXT: masknez $a1, $a3, $a0 ; LA64-NEXT: maskeqz $a0, $a2, $a0 ; LA64-NEXT: or $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cond = icmp ne i32 %a, %b %res = select i1 %cond, i32 %x, i32 %y ret i32 %res @@ -56,7 +57,7 @@ define i32 @select_ugt(i32 signext %a, i32 signext %b, i32 %x, i32 %y) { ; LA32-NEXT: masknez $a1, $a3, $a0 ; LA32-NEXT: maskeqz $a0, $a2, $a0 ; LA32-NEXT: or $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: select_ugt: ; LA64: # %bb.0: @@ -64,7 +65,7 @@ define i32 @select_ugt(i32 signext %a, i32 signext %b, i32 %x, i32 %y) { ; LA64-NEXT: masknez $a1, $a3, $a0 ; LA64-NEXT: maskeqz $a0, $a2, $a0 ; LA64-NEXT: or $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cond = icmp ugt i32 %a, %b %res = select i1 %cond, i32 %x, i32 %y ret i32 %res @@ -78,7 +79,7 @@ define i32 @select_uge(i32 signext %a, i32 signext %b, i32 %x, i32 %y) { ; LA32-NEXT: masknez $a1, $a3, $a0 ; LA32-NEXT: maskeqz $a0, $a2, $a0 ; LA32-NEXT: or $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: select_uge: ; LA64: # %bb.0: @@ -87,7 +88,7 @@ define i32 @select_uge(i32 signext %a, i32 signext %b, i32 %x, i32 %y) { ; LA64-NEXT: masknez $a1, $a3, $a0 ; LA64-NEXT: maskeqz $a0, $a2, $a0 ; LA64-NEXT: or $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cond = icmp uge i32 %a, %b %res = select i1 %cond, i32 %x, i32 %y ret i32 %res @@ -100,7 +101,7 @@ define i32 @select_ult(i32 signext %a, i32 signext %b, i32 %x, i32 %y) { ; LA32-NEXT: masknez $a1, $a3, $a0 ; LA32-NEXT: maskeqz $a0, $a2, $a0 ; LA32-NEXT: or $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: select_ult: ; LA64: # %bb.0: @@ -108,7 +109,7 @@ define i32 @select_ult(i32 signext %a, i32 signext %b, i32 %x, i32 %y) { ; LA64-NEXT: masknez $a1, $a3, $a0 ; LA64-NEXT: maskeqz $a0, $a2, $a0 ; LA64-NEXT: or $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cond = icmp ult i32 %a, %b %res = select i1 %cond, i32 %x, i32 %y ret i32 %res @@ -122,7 +123,7 @@ define i32 @select_ule(i32 signext %a, i32 signext %b, i32 %x, i32 %y) { ; LA32-NEXT: masknez $a1, $a3, $a0 ; LA32-NEXT: maskeqz $a0, $a2, $a0 ; LA32-NEXT: or $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: select_ule: ; LA64: # %bb.0: @@ -131,7 +132,7 @@ define i32 @select_ule(i32 signext %a, i32 signext %b, i32 %x, i32 %y) { ; LA64-NEXT: masknez $a1, $a3, $a0 ; LA64-NEXT: maskeqz $a0, $a2, $a0 ; LA64-NEXT: or $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cond = icmp ule i32 %a, %b %res = select i1 %cond, i32 %x, i32 %y ret i32 %res @@ -144,7 +145,7 @@ define i32 @select_sgt(i32 signext %a, i32 signext %b, i32 %x, i32 %y) { ; LA32-NEXT: masknez $a1, $a3, $a0 ; LA32-NEXT: maskeqz $a0, $a2, $a0 ; LA32-NEXT: or $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: select_sgt: ; LA64: # %bb.0: @@ -152,7 +153,7 @@ define i32 @select_sgt(i32 signext %a, i32 signext %b, i32 %x, i32 %y) { ; LA64-NEXT: masknez $a1, $a3, $a0 ; LA64-NEXT: maskeqz $a0, $a2, $a0 ; LA64-NEXT: or $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cond = icmp sgt i32 %a, %b %res = select i1 %cond, i32 %x, i32 %y ret i32 %res @@ -166,7 +167,7 @@ define i32 @select_sge(i32 signext %a, i32 signext %b, i32 %x, i32 %y) { ; LA32-NEXT: masknez $a1, $a3, $a0 ; LA32-NEXT: maskeqz $a0, $a2, $a0 ; LA32-NEXT: or $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: select_sge: ; LA64: # %bb.0: @@ -175,7 +176,7 @@ define i32 @select_sge(i32 signext %a, i32 signext %b, i32 %x, i32 %y) { ; LA64-NEXT: masknez $a1, $a3, $a0 ; LA64-NEXT: maskeqz $a0, $a2, $a0 ; LA64-NEXT: or $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cond = icmp sge i32 %a, %b %res = select i1 %cond, i32 %x, i32 %y ret i32 %res @@ -188,7 +189,7 @@ define i32 @select_slt(i32 signext %a, i32 signext %b, i32 %x, i32 %y) { ; LA32-NEXT: masknez $a1, $a3, $a0 ; LA32-NEXT: maskeqz $a0, $a2, $a0 ; LA32-NEXT: or $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: select_slt: ; LA64: # %bb.0: @@ -196,7 +197,7 @@ define i32 @select_slt(i32 signext %a, i32 signext %b, i32 %x, i32 %y) { ; LA64-NEXT: masknez $a1, $a3, $a0 ; LA64-NEXT: maskeqz $a0, $a2, $a0 ; LA64-NEXT: or $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cond = icmp slt i32 %a, %b %res = select i1 %cond, i32 %x, i32 %y ret i32 %res @@ -210,7 +211,7 @@ define i32 @select_sle(i32 signext %a, i32 signext %b, i32 %x, i32 %y) { ; LA32-NEXT: masknez $a1, $a3, $a0 ; LA32-NEXT: maskeqz $a0, $a2, $a0 ; LA32-NEXT: or $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: select_sle: ; LA64: # %bb.0: @@ -219,7 +220,7 @@ define i32 @select_sle(i32 signext %a, i32 signext %b, i32 %x, i32 %y) { ; LA64-NEXT: masknez $a1, $a3, $a0 ; LA64-NEXT: maskeqz $a0, $a2, $a0 ; LA64-NEXT: or $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %cond = icmp sle i32 %a, %b %res = select i1 %cond, i32 %x, i32 %y ret i32 %res diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/sext-zext-trunc.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/sext-zext-trunc.ll index 911751bc65525947474f57d3937fb1016d075b9e..7053d53408961ba728ac1184e2559f8ee8159d56 100644 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/sext-zext-trunc.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/sext-zext-trunc.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefix=LA32 ; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64 @@ -8,13 +9,13 @@ define i8 @sext_i1_to_i8(i1 %a) { ; LA32: # %bb.0: ; LA32-NEXT: andi $a0, $a0, 1 ; LA32-NEXT: sub.w $a0, $zero, $a0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: sext_i1_to_i8: ; LA64: # %bb.0: ; LA64-NEXT: andi $a0, $a0, 1 ; LA64-NEXT: sub.d $a0, $zero, $a0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %1 = sext i1 %a to i8 ret i8 %1 } @@ -24,13 +25,13 @@ define i16 @sext_i1_to_i16(i1 %a) { ; LA32: # %bb.0: ; LA32-NEXT: andi $a0, $a0, 1 ; LA32-NEXT: sub.w $a0, $zero, $a0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: sext_i1_to_i16: ; LA64: # %bb.0: ; LA64-NEXT: andi $a0, $a0, 1 ; LA64-NEXT: sub.d $a0, $zero, $a0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %1 = sext i1 %a to i16 ret i16 %1 } @@ -40,13 +41,13 @@ define i32 @sext_i1_to_i32(i1 %a) { ; LA32: # %bb.0: ; LA32-NEXT: andi $a0, $a0, 1 ; LA32-NEXT: sub.w $a0, $zero, $a0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: sext_i1_to_i32: ; LA64: # %bb.0: ; LA64-NEXT: andi $a0, $a0, 1 ; LA64-NEXT: sub.d $a0, $zero, $a0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %1 = sext i1 %a to i32 ret i32 %1 } @@ -57,13 +58,13 @@ define i64 @sext_i1_to_i64(i1 %a) { ; LA32-NEXT: andi $a0, $a0, 1 ; LA32-NEXT: sub.w $a0, $zero, $a0 ; LA32-NEXT: move $a1, $a0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: sext_i1_to_i64: ; LA64: # %bb.0: ; LA64-NEXT: andi $a0, $a0, 1 ; LA64-NEXT: sub.d $a0, $zero, $a0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %1 = sext i1 %a to i64 ret i64 %1 } @@ -72,12 +73,12 @@ define i16 @sext_i8_to_i16(i8 %a) { ; LA32-LABEL: sext_i8_to_i16: ; LA32: # %bb.0: ; LA32-NEXT: ext.w.b $a0, $a0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: sext_i8_to_i16: ; LA64: # %bb.0: ; LA64-NEXT: ext.w.b $a0, $a0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %1 = sext i8 %a to i16 ret i16 %1 } @@ -86,12 +87,12 @@ define i32 @sext_i8_to_i32(i8 %a) { ; LA32-LABEL: sext_i8_to_i32: ; LA32: # %bb.0: ; LA32-NEXT: ext.w.b $a0, $a0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: sext_i8_to_i32: ; LA64: # %bb.0: ; LA64-NEXT: ext.w.b $a0, $a0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %1 = sext i8 %a to i32 ret i32 %1 } @@ -101,12 +102,12 @@ define i64 @sext_i8_to_i64(i8 %a) { ; LA32: # %bb.0: ; LA32-NEXT: ext.w.b $a0, $a0 ; LA32-NEXT: srai.w $a1, $a0, 31 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: sext_i8_to_i64: ; LA64: # %bb.0: ; LA64-NEXT: ext.w.b $a0, $a0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %1 = sext i8 %a to i64 ret i64 %1 } @@ -115,12 +116,12 @@ define i32 @sext_i16_to_i32(i16 %a) { ; LA32-LABEL: sext_i16_to_i32: ; LA32: # %bb.0: ; LA32-NEXT: ext.w.h $a0, $a0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: sext_i16_to_i32: ; LA64: # %bb.0: ; LA64-NEXT: ext.w.h $a0, $a0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %1 = sext i16 %a to i32 ret i32 %1 } @@ -130,12 +131,12 @@ define i64 @sext_i16_to_i64(i16 %a) { ; LA32: # %bb.0: ; LA32-NEXT: ext.w.h $a0, $a0 ; LA32-NEXT: srai.w $a1, $a0, 31 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: sext_i16_to_i64: ; LA64: # %bb.0: ; LA64-NEXT: ext.w.h $a0, $a0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %1 = sext i16 %a to i64 ret i64 %1 } @@ -144,12 +145,12 @@ define i64 @sext_i32_to_i64(i32 %a) { ; LA32-LABEL: sext_i32_to_i64: ; LA32: # %bb.0: ; LA32-NEXT: srai.w $a1, $a0, 31 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: sext_i32_to_i64: ; LA64: # %bb.0: ; LA64-NEXT: addi.w $a0, $a0, 0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %1 = sext i32 %a to i64 ret i64 %1 } @@ -158,12 +159,12 @@ define i8 @zext_i1_to_i8(i1 %a) { ; LA32-LABEL: zext_i1_to_i8: ; LA32: # %bb.0: ; LA32-NEXT: andi $a0, $a0, 1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: zext_i1_to_i8: ; LA64: # %bb.0: ; LA64-NEXT: andi $a0, $a0, 1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %1 = zext i1 %a to i8 ret i8 %1 } @@ -172,12 +173,12 @@ define i16 @zext_i1_to_i16(i1 %a) { ; LA32-LABEL: zext_i1_to_i16: ; LA32: # %bb.0: ; LA32-NEXT: andi $a0, $a0, 1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: zext_i1_to_i16: ; LA64: # %bb.0: ; LA64-NEXT: andi $a0, $a0, 1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %1 = zext i1 %a to i16 ret i16 %1 } @@ -186,12 +187,12 @@ define i32 @zext_i1_to_i32(i1 %a) { ; LA32-LABEL: zext_i1_to_i32: ; LA32: # %bb.0: ; LA32-NEXT: andi $a0, $a0, 1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: zext_i1_to_i32: ; LA64: # %bb.0: ; LA64-NEXT: andi $a0, $a0, 1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %1 = zext i1 %a to i32 ret i32 %1 } @@ -201,12 +202,12 @@ define i64 @zext_i1_to_i64(i1 %a) { ; LA32: # %bb.0: ; LA32-NEXT: andi $a0, $a0, 1 ; LA32-NEXT: move $a1, $zero -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: zext_i1_to_i64: ; LA64: # %bb.0: ; LA64-NEXT: andi $a0, $a0, 1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %1 = zext i1 %a to i64 ret i64 %1 } @@ -215,12 +216,12 @@ define i16 @zext_i8_to_i16(i8 %a) { ; LA32-LABEL: zext_i8_to_i16: ; LA32: # %bb.0: ; LA32-NEXT: andi $a0, $a0, 255 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: zext_i8_to_i16: ; LA64: # %bb.0: ; LA64-NEXT: andi $a0, $a0, 255 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %1 = zext i8 %a to i16 ret i16 %1 } @@ -229,12 +230,12 @@ define i32 @zext_i8_to_i32(i8 %a) { ; LA32-LABEL: zext_i8_to_i32: ; LA32: # %bb.0: ; LA32-NEXT: andi $a0, $a0, 255 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: zext_i8_to_i32: ; LA64: # %bb.0: ; LA64-NEXT: andi $a0, $a0, 255 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %1 = zext i8 %a to i32 ret i32 %1 } @@ -244,12 +245,12 @@ define i64 @zext_i8_to_i64(i8 %a) { ; LA32: # %bb.0: ; LA32-NEXT: andi $a0, $a0, 255 ; LA32-NEXT: move $a1, $zero -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: zext_i8_to_i64: ; LA64: # %bb.0: ; LA64-NEXT: andi $a0, $a0, 255 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %1 = zext i8 %a to i64 ret i64 %1 } @@ -258,12 +259,12 @@ define i32 @zext_i16_to_i32(i16 %a) { ; LA32-LABEL: zext_i16_to_i32: ; LA32: # %bb.0: ; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: zext_i16_to_i32: ; LA64: # %bb.0: ; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %1 = zext i16 %a to i32 ret i32 %1 } @@ -273,12 +274,12 @@ define i64 @zext_i16_to_i64(i16 %a) { ; LA32: # %bb.0: ; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 ; LA32-NEXT: move $a1, $zero -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: zext_i16_to_i64: ; LA64: # %bb.0: ; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %1 = zext i16 %a to i64 ret i64 %1 } @@ -287,12 +288,12 @@ define i64 @zext_i32_to_i64(i32 %a) { ; LA32-LABEL: zext_i32_to_i64: ; LA32: # %bb.0: ; LA32-NEXT: move $a1, $zero -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: zext_i32_to_i64: ; LA64: # %bb.0: ; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %1 = zext i32 %a to i64 ret i64 %1 } @@ -300,11 +301,11 @@ define i64 @zext_i32_to_i64(i32 %a) { define i1 @trunc_i8_to_i1(i8 %a) { ; LA32-LABEL: trunc_i8_to_i1: ; LA32: # %bb.0: -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: trunc_i8_to_i1: ; LA64: # %bb.0: -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %1 = trunc i8 %a to i1 ret i1 %1 } @@ -312,11 +313,11 @@ define i1 @trunc_i8_to_i1(i8 %a) { define i1 @trunc_i16_to_i1(i16 %a) { ; LA32-LABEL: trunc_i16_to_i1: ; LA32: # %bb.0: -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: trunc_i16_to_i1: ; LA64: # %bb.0: -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %1 = trunc i16 %a to i1 ret i1 %1 } @@ -324,11 +325,11 @@ define i1 @trunc_i16_to_i1(i16 %a) { define i1 @trunc_i32_to_i1(i32 %a) { ; LA32-LABEL: trunc_i32_to_i1: ; LA32: # %bb.0: -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: trunc_i32_to_i1: ; LA64: # %bb.0: -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %1 = trunc i32 %a to i1 ret i1 %1 } @@ -336,11 +337,11 @@ define i1 @trunc_i32_to_i1(i32 %a) { define i1 @trunc_i64_to_i1(i64 %a) { ; LA32-LABEL: trunc_i64_to_i1: ; LA32: # %bb.0: -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: trunc_i64_to_i1: ; LA64: # %bb.0: -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %1 = trunc i64 %a to i1 ret i1 %1 } @@ -348,11 +349,11 @@ define i1 @trunc_i64_to_i1(i64 %a) { define i8 @trunc_i16_to_i8(i16 %a) { ; LA32-LABEL: trunc_i16_to_i8: ; LA32: # %bb.0: -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: trunc_i16_to_i8: ; LA64: # %bb.0: -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %1 = trunc i16 %a to i8 ret i8 %1 } @@ -360,11 +361,11 @@ define i8 @trunc_i16_to_i8(i16 %a) { define i8 @trunc_i32_to_i8(i32 %a) { ; LA32-LABEL: trunc_i32_to_i8: ; LA32: # %bb.0: -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: trunc_i32_to_i8: ; LA64: # %bb.0: -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %1 = trunc i32 %a to i8 ret i8 %1 } @@ -372,11 +373,11 @@ define i8 @trunc_i32_to_i8(i32 %a) { define i8 @trunc_i64_to_i8(i64 %a) { ; LA32-LABEL: trunc_i64_to_i8: ; LA32: # %bb.0: -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: trunc_i64_to_i8: ; LA64: # %bb.0: -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %1 = trunc i64 %a to i8 ret i8 %1 } @@ -384,11 +385,11 @@ define i8 @trunc_i64_to_i8(i64 %a) { define i16 @trunc_i32_to_i16(i32 %a) { ; LA32-LABEL: trunc_i32_to_i16: ; LA32: # %bb.0: -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: trunc_i32_to_i16: ; LA64: # %bb.0: -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %1 = trunc i32 %a to i16 ret i16 %1 } @@ -396,11 +397,11 @@ define i16 @trunc_i32_to_i16(i32 %a) { define i16 @trunc_i64_to_i16(i64 %a) { ; LA32-LABEL: trunc_i64_to_i16: ; LA32: # %bb.0: -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: trunc_i64_to_i16: ; LA64: # %bb.0: -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %1 = trunc i64 %a to i16 ret i16 %1 } @@ -408,11 +409,11 @@ define i16 @trunc_i64_to_i16(i64 %a) { define i32 @trunc_i64_to_i32(i64 %a) { ; LA32-LABEL: trunc_i64_to_i32: ; LA32: # %bb.0: -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: trunc_i64_to_i32: ; LA64: # %bb.0: -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %1 = trunc i64 %a to i32 ret i32 %1 } diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/shl.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/shl.ll index de25040452b126ff63ef47abe78805a52dd43fe6..4baf18931dc57c419723e34bae221c2149ad7269 100644 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/shl.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/shl.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefix=LA32 ; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64 @@ -6,11 +7,11 @@ define i1 @shl_i1(i1 %x, i1 %y) { ; LA32-LABEL: shl_i1: ; LA32: # %bb.0: -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: shl_i1: ; LA64: # %bb.0: -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %shl = shl i1 %x, %y ret i1 %shl } @@ -19,12 +20,12 @@ define i8 @shl_i8(i8 %x, i8 %y) { ; LA32-LABEL: shl_i8: ; LA32: # %bb.0: ; LA32-NEXT: sll.w $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: shl_i8: ; LA64: # %bb.0: ; LA64-NEXT: sll.d $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %shl = shl i8 %x, %y ret i8 %shl } @@ -33,12 +34,12 @@ define i16 @shl_i16(i16 %x, i16 %y) { ; LA32-LABEL: shl_i16: ; LA32: # %bb.0: ; LA32-NEXT: sll.w $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: shl_i16: ; LA64: # %bb.0: ; LA64-NEXT: sll.d $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %shl = shl i16 %x, %y ret i16 %shl } @@ -47,12 +48,12 @@ define i32 @shl_i32(i32 %x, i32 %y) { ; LA32-LABEL: shl_i32: ; LA32: # %bb.0: ; LA32-NEXT: sll.w $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: shl_i32: ; LA64: # %bb.0: ; LA64-NEXT: sll.w $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %shl = shl i32 %x, %y ret i32 %shl } @@ -74,12 +75,12 @@ define i64 @shl_i64(i64 %x, i64 %y) { ; LA32-NEXT: sll.w $a0, $a0, $a2 ; LA32-NEXT: srai.w $a2, $a3, 31 ; LA32-NEXT: and $a0, $a2, $a0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: shl_i64: ; LA64: # %bb.0: ; LA64-NEXT: sll.d $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %shl = shl i64 %x, %y ret i64 %shl } @@ -87,11 +88,11 @@ define i64 @shl_i64(i64 %x, i64 %y) { define i1 @shl_i1_3(i1 %x) { ; LA32-LABEL: shl_i1_3: ; LA32: # %bb.0: -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: shl_i1_3: ; LA64: # %bb.0: -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %shl = shl i1 %x, 3 ret i1 %shl } @@ -100,12 +101,12 @@ define i8 @shl_i8_3(i8 %x) { ; LA32-LABEL: shl_i8_3: ; LA32: # %bb.0: ; LA32-NEXT: slli.w $a0, $a0, 3 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: shl_i8_3: ; LA64: # %bb.0: ; LA64-NEXT: slli.d $a0, $a0, 3 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %shl = shl i8 %x, 3 ret i8 %shl } @@ -114,12 +115,12 @@ define i16 @shl_i16_3(i16 %x) { ; LA32-LABEL: shl_i16_3: ; LA32: # %bb.0: ; LA32-NEXT: slli.w $a0, $a0, 3 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: shl_i16_3: ; LA64: # %bb.0: ; LA64-NEXT: slli.d $a0, $a0, 3 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %shl = shl i16 %x, 3 ret i16 %shl } @@ -128,12 +129,12 @@ define i32 @shl_i32_3(i32 %x) { ; LA32-LABEL: shl_i32_3: ; LA32: # %bb.0: ; LA32-NEXT: slli.w $a0, $a0, 3 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: shl_i32_3: ; LA64: # %bb.0: ; LA64-NEXT: slli.d $a0, $a0, 3 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %shl = shl i32 %x, 3 ret i32 %shl } @@ -145,12 +146,12 @@ define i64 @shl_i64_3(i64 %x) { ; LA32-NEXT: srli.w $a2, $a0, 29 ; LA32-NEXT: or $a1, $a1, $a2 ; LA32-NEXT: slli.w $a0, $a0, 3 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: shl_i64_3: ; LA64: # %bb.0: ; LA64-NEXT: slli.d $a0, $a0, 3 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %shl = shl i64 %x, 3 ret i64 %shl } diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/sub.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/sub.ll index dfa55c29ebaedc02cb48a54ac631af15a03187a0..98357744f5219f508e961026eea72baffb5f2052 100644 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/sub.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/sub.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefix=LA32 ; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64 @@ -7,12 +8,12 @@ define i1 @sub_i1(i1 %x, i1 %y) { ; LA32-LABEL: sub_i1: ; LA32: # %bb.0: ; LA32-NEXT: sub.w $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: sub_i1: ; LA64: # %bb.0: ; LA64-NEXT: sub.d $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %sub = sub i1 %x, %y ret i1 %sub } @@ -21,12 +22,12 @@ define i8 @sub_i8(i8 %x, i8 %y) { ; LA32-LABEL: sub_i8: ; LA32: # %bb.0: ; LA32-NEXT: sub.w $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: sub_i8: ; LA64: # %bb.0: ; LA64-NEXT: sub.d $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %sub = sub i8 %x, %y ret i8 %sub } @@ -35,12 +36,12 @@ define i16 @sub_i16(i16 %x, i16 %y) { ; LA32-LABEL: sub_i16: ; LA32: # %bb.0: ; LA32-NEXT: sub.w $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: sub_i16: ; LA64: # %bb.0: ; LA64-NEXT: sub.d $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %sub = sub i16 %x, %y ret i16 %sub } @@ -49,12 +50,12 @@ define i32 @sub_i32(i32 %x, i32 %y) { ; LA32-LABEL: sub_i32: ; LA32: # %bb.0: ; LA32-NEXT: sub.w $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: sub_i32: ; LA64: # %bb.0: ; LA64-NEXT: sub.d $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %sub = sub i32 %x, %y ret i32 %sub } @@ -65,12 +66,12 @@ define signext i32 @sub_i32_sext(i32 %x, i32 %y) { ; LA32-LABEL: sub_i32_sext: ; LA32: # %bb.0: ; LA32-NEXT: sub.w $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: sub_i32_sext: ; LA64: # %bb.0: ; LA64-NEXT: sub.w $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %sub = sub i32 %x, %y ret i32 %sub } @@ -82,12 +83,12 @@ define i64 @sub_i64(i64 %x, i64 %y) { ; LA32-NEXT: sltu $a3, $a0, $a2 ; LA32-NEXT: sub.w $a1, $a1, $a3 ; LA32-NEXT: sub.w $a0, $a0, $a2 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: sub_i64: ; LA64: # %bb.0: ; LA64-NEXT: sub.d $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %sub = sub i64 %x, %y ret i64 %sub } diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/xor.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/xor.ll index 2f85e645c04f72548b9b52f662c567b1f2e33424..373c9cf4b64e9a2d2fe9316737ca44b1036e3664 100644 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/xor.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/xor.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefix=LA32 ; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64 @@ -7,12 +8,12 @@ define i1 @xor_i1(i1 %a, i1 %b) { ; LA32-LABEL: xor_i1: ; LA32: # %bb.0: # %entry ; LA32-NEXT: xor $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: xor_i1: ; LA64: # %bb.0: # %entry ; LA64-NEXT: xor $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret entry: %r = xor i1 %a, %b ret i1 %r @@ -22,12 +23,12 @@ define i8 @xor_i8(i8 %a, i8 %b) { ; LA32-LABEL: xor_i8: ; LA32: # %bb.0: # %entry ; LA32-NEXT: xor $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: xor_i8: ; LA64: # %bb.0: # %entry ; LA64-NEXT: xor $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret entry: %r = xor i8 %a, %b ret i8 %r @@ -37,12 +38,12 @@ define i16 @xor_i16(i16 %a, i16 %b) { ; LA32-LABEL: xor_i16: ; LA32: # %bb.0: # %entry ; LA32-NEXT: xor $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: xor_i16: ; LA64: # %bb.0: # %entry ; LA64-NEXT: xor $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret entry: %r = xor i16 %a, %b ret i16 %r @@ -52,12 +53,12 @@ define i32 @xor_i32(i32 %a, i32 %b) { ; LA32-LABEL: xor_i32: ; LA32: # %bb.0: # %entry ; LA32-NEXT: xor $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: xor_i32: ; LA64: # %bb.0: # %entry ; LA64-NEXT: xor $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret entry: %r = xor i32 %a, %b ret i32 %r @@ -68,12 +69,12 @@ define i64 @xor_i64(i64 %a, i64 %b) { ; LA32: # %bb.0: # %entry ; LA32-NEXT: xor $a0, $a0, $a2 ; LA32-NEXT: xor $a1, $a1, $a3 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: xor_i64: ; LA64: # %bb.0: # %entry ; LA64-NEXT: xor $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret entry: %r = xor i64 %a, %b ret i64 %r @@ -82,11 +83,11 @@ entry: define i1 @xor_i1_0(i1 %b) { ; LA32-LABEL: xor_i1_0: ; LA32: # %bb.0: # %entry -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: xor_i1_0: ; LA64: # %bb.0: # %entry -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret entry: %r = xor i1 4, %b ret i1 %r @@ -96,12 +97,12 @@ define i1 @xor_i1_5(i1 %b) { ; LA32-LABEL: xor_i1_5: ; LA32: # %bb.0: # %entry ; LA32-NEXT: xori $a0, $a0, 1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: xor_i1_5: ; LA64: # %bb.0: # %entry ; LA64-NEXT: xori $a0, $a0, 1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret entry: %r = xor i1 5, %b ret i1 %r @@ -111,12 +112,12 @@ define i8 @xor_i8_5(i8 %b) { ; LA32-LABEL: xor_i8_5: ; LA32: # %bb.0: # %entry ; LA32-NEXT: xori $a0, $a0, 5 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: xor_i8_5: ; LA64: # %bb.0: # %entry ; LA64-NEXT: xori $a0, $a0, 5 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret entry: %r = xor i8 5, %b ret i8 %r @@ -126,12 +127,12 @@ define i8 @xor_i8_257(i8 %b) { ; LA32-LABEL: xor_i8_257: ; LA32: # %bb.0: # %entry ; LA32-NEXT: xori $a0, $a0, 1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: xor_i8_257: ; LA64: # %bb.0: # %entry ; LA64-NEXT: xori $a0, $a0, 1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret entry: %r = xor i8 257, %b ret i8 %r @@ -141,12 +142,12 @@ define i16 @xor_i16_5(i16 %b) { ; LA32-LABEL: xor_i16_5: ; LA32: # %bb.0: # %entry ; LA32-NEXT: xori $a0, $a0, 5 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: xor_i16_5: ; LA64: # %bb.0: # %entry ; LA64-NEXT: xori $a0, $a0, 5 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret entry: %r = xor i16 5, %b ret i16 %r @@ -157,13 +158,13 @@ define i16 @xor_i16_0x1000(i16 %b) { ; LA32: # %bb.0: # %entry ; LA32-NEXT: lu12i.w $a1, 1 ; LA32-NEXT: xor $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: xor_i16_0x1000: ; LA64: # %bb.0: # %entry ; LA64-NEXT: lu12i.w $a1, 1 ; LA64-NEXT: xor $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret entry: %r = xor i16 4096, %b ret i16 %r @@ -173,12 +174,12 @@ define i16 @xor_i16_0x10001(i16 %b) { ; LA32-LABEL: xor_i16_0x10001: ; LA32: # %bb.0: # %entry ; LA32-NEXT: xori $a0, $a0, 1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: xor_i16_0x10001: ; LA64: # %bb.0: # %entry ; LA64-NEXT: xori $a0, $a0, 1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret entry: %r = xor i16 65537, %b ret i16 %r @@ -188,12 +189,12 @@ define i32 @xor_i32_5(i32 %b) { ; LA32-LABEL: xor_i32_5: ; LA32: # %bb.0: # %entry ; LA32-NEXT: xori $a0, $a0, 5 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: xor_i32_5: ; LA64: # %bb.0: # %entry ; LA64-NEXT: xori $a0, $a0, 5 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret entry: %r = xor i32 5, %b ret i32 %r @@ -204,13 +205,13 @@ define i32 @xor_i32_0x1000(i32 %b) { ; LA32: # %bb.0: # %entry ; LA32-NEXT: lu12i.w $a1, 1 ; LA32-NEXT: xor $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: xor_i32_0x1000: ; LA64: # %bb.0: # %entry ; LA64-NEXT: lu12i.w $a1, 1 ; LA64-NEXT: xor $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret entry: %r = xor i32 4096, %b ret i32 %r @@ -220,12 +221,12 @@ define i32 @xor_i32_0x100000001(i32 %b) { ; LA32-LABEL: xor_i32_0x100000001: ; LA32: # %bb.0: # %entry ; LA32-NEXT: xori $a0, $a0, 1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: xor_i32_0x100000001: ; LA64: # %bb.0: # %entry ; LA64-NEXT: xori $a0, $a0, 1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret entry: %r = xor i32 4294967297, %b ret i32 %r @@ -235,12 +236,12 @@ define i64 @xor_i64_5(i64 %b) { ; LA32-LABEL: xor_i64_5: ; LA32: # %bb.0: # %entry ; LA32-NEXT: xori $a0, $a0, 5 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: xor_i64_5: ; LA64: # %bb.0: # %entry ; LA64-NEXT: xori $a0, $a0, 5 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret entry: %r = xor i64 5, %b ret i64 %r @@ -251,13 +252,13 @@ define i64 @xor_i64_0x1000(i64 %b) { ; LA32: # %bb.0: # %entry ; LA32-NEXT: lu12i.w $a2, 1 ; LA32-NEXT: xor $a0, $a0, $a2 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: xor_i64_0x1000: ; LA64: # %bb.0: # %entry ; LA64-NEXT: lu12i.w $a1, 1 ; LA64-NEXT: xor $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret entry: %r = xor i64 4096, %b ret i64 %r diff --git a/llvm/test/CodeGen/LoongArch/jump-table.ll b/llvm/test/CodeGen/LoongArch/jump-table.ll new file mode 100644 index 0000000000000000000000000000000000000000..8bd4c952cf1eee47352966f8d5b9694508f79c5f --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/jump-table.ll @@ -0,0 +1,151 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --min-jump-table-entries=5 < %s \ +; RUN: | FileCheck %s --check-prefix=LA32 +; RUN: llc --mtriple=loongarch64 --min-jump-table-entries=5 < %s \ +; RUN: | FileCheck %s --check-prefix=LA64 +; RUN: llc --mtriple=loongarch32 --min-jump-table-entries=4 < %s \ +; RUN: | FileCheck %s --check-prefix=LA32-JT +; RUN: llc --mtriple=loongarch64 --min-jump-table-entries=4 < %s \ +; RUN: | FileCheck %s --check-prefix=LA64-JT + +;; The default mininum number of entries to use a jump table is 4. +;; +;; Note: The parameter `--min-jump-table-entries` will have no effect once we +;; have set the default value using `setMinimumJumpTableEntries`. + +define void @switch_4_arms(i32 %in, ptr %out) nounwind { +; LA32-LABEL: switch_4_arms: +; LA32: # %bb.0: # %entry +; LA32-NEXT: ori $a2, $zero, 2 +; LA32-NEXT: blt $a2, $a0, .LBB0_4 +; LA32-NEXT: # %bb.1: # %entry +; LA32-NEXT: ori $a3, $zero, 1 +; LA32-NEXT: beq $a0, $a3, .LBB0_7 +; LA32-NEXT: # %bb.2: # %entry +; LA32-NEXT: bne $a0, $a2, .LBB0_9 +; LA32-NEXT: # %bb.3: # %bb2 +; LA32-NEXT: ori $a0, $zero, 3 +; LA32-NEXT: st.w $a0, $a1, 0 +; LA32-NEXT: ret +; LA32-NEXT: .LBB0_4: # %entry +; LA32-NEXT: ori $a3, $zero, 3 +; LA32-NEXT: beq $a0, $a3, .LBB0_8 +; LA32-NEXT: # %bb.5: # %entry +; LA32-NEXT: ori $a2, $zero, 4 +; LA32-NEXT: bne $a0, $a2, .LBB0_9 +; LA32-NEXT: # %bb.6: # %bb4 +; LA32-NEXT: ori $a0, $zero, 1 +; LA32-NEXT: st.w $a0, $a1, 0 +; LA32-NEXT: ret +; LA32-NEXT: .LBB0_7: # %bb1 +; LA32-NEXT: ori $a0, $zero, 4 +; LA32-NEXT: st.w $a0, $a1, 0 +; LA32-NEXT: ret +; LA32-NEXT: .LBB0_8: # %bb3 +; LA32-NEXT: st.w $a2, $a1, 0 +; LA32-NEXT: .LBB0_9: # %exit +; LA32-NEXT: ret +; +; LA64-LABEL: switch_4_arms: +; LA64: # %bb.0: # %entry +; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0 +; LA64-NEXT: ori $a2, $zero, 2 +; LA64-NEXT: blt $a2, $a0, .LBB0_4 +; LA64-NEXT: # %bb.1: # %entry +; LA64-NEXT: ori $a3, $zero, 1 +; LA64-NEXT: beq $a0, $a3, .LBB0_7 +; LA64-NEXT: # %bb.2: # %entry +; LA64-NEXT: bne $a0, $a2, .LBB0_9 +; LA64-NEXT: # %bb.3: # %bb2 +; LA64-NEXT: ori $a0, $zero, 3 +; LA64-NEXT: st.w $a0, $a1, 0 +; LA64-NEXT: ret +; LA64-NEXT: .LBB0_4: # %entry +; LA64-NEXT: ori $a3, $zero, 3 +; LA64-NEXT: beq $a0, $a3, .LBB0_8 +; LA64-NEXT: # %bb.5: # %entry +; LA64-NEXT: ori $a2, $zero, 4 +; LA64-NEXT: bne $a0, $a2, .LBB0_9 +; LA64-NEXT: # %bb.6: # %bb4 +; LA64-NEXT: ori $a0, $zero, 1 +; LA64-NEXT: st.w $a0, $a1, 0 +; LA64-NEXT: ret +; LA64-NEXT: .LBB0_7: # %bb1 +; LA64-NEXT: ori $a0, $zero, 4 +; LA64-NEXT: st.w $a0, $a1, 0 +; LA64-NEXT: ret +; LA64-NEXT: .LBB0_8: # %bb3 +; LA64-NEXT: st.w $a2, $a1, 0 +; LA64-NEXT: .LBB0_9: # %exit +; LA64-NEXT: ret +; +; LA32-JT-LABEL: switch_4_arms: +; LA32-JT: # %bb.0: # %entry +; LA32-JT-NEXT: addi.w $a2, $a0, -1 +; LA32-JT-NEXT: ori $a0, $zero, 3 +; LA32-JT-NEXT: bltu $a0, $a2, .LBB0_6 +; LA32-JT-NEXT: # %bb.1: # %entry +; LA32-JT-NEXT: pcalau12i $a3, %pc_hi20(.LJTI0_0) +; LA32-JT-NEXT: addi.w $a3, $a3, %pc_lo12(.LJTI0_0) +; LA32-JT-NEXT: alsl.w $a2, $a2, $a3, 2 +; LA32-JT-NEXT: ld.w $a2, $a2, 0 +; LA32-JT-NEXT: jr $a2 +; LA32-JT-NEXT: .LBB0_2: # %bb1 +; LA32-JT-NEXT: ori $a0, $zero, 4 +; LA32-JT-NEXT: b .LBB0_5 +; LA32-JT-NEXT: .LBB0_3: # %bb3 +; LA32-JT-NEXT: ori $a0, $zero, 2 +; LA32-JT-NEXT: b .LBB0_5 +; LA32-JT-NEXT: .LBB0_4: # %bb4 +; LA32-JT-NEXT: ori $a0, $zero, 1 +; LA32-JT-NEXT: .LBB0_5: # %exit +; LA32-JT-NEXT: st.w $a0, $a1, 0 +; LA32-JT-NEXT: .LBB0_6: # %exit +; LA32-JT-NEXT: ret +; +; LA64-JT-LABEL: switch_4_arms: +; LA64-JT: # %bb.0: # %entry +; LA64-JT-NEXT: bstrpick.d $a0, $a0, 31, 0 +; LA64-JT-NEXT: addi.d $a2, $a0, -1 +; LA64-JT-NEXT: ori $a0, $zero, 3 +; LA64-JT-NEXT: bltu $a0, $a2, .LBB0_6 +; LA64-JT-NEXT: # %bb.1: # %entry +; LA64-JT-NEXT: slli.d $a2, $a2, 3 +; LA64-JT-NEXT: pcalau12i $a3, %pc_hi20(.LJTI0_0) +; LA64-JT-NEXT: addi.d $a3, $a3, %pc_lo12(.LJTI0_0) +; LA64-JT-NEXT: ldx.d $a2, $a2, $a3 +; LA64-JT-NEXT: jr $a2 +; LA64-JT-NEXT: .LBB0_2: # %bb1 +; LA64-JT-NEXT: ori $a0, $zero, 4 +; LA64-JT-NEXT: b .LBB0_5 +; LA64-JT-NEXT: .LBB0_3: # %bb3 +; LA64-JT-NEXT: ori $a0, $zero, 2 +; LA64-JT-NEXT: b .LBB0_5 +; LA64-JT-NEXT: .LBB0_4: # %bb4 +; LA64-JT-NEXT: ori $a0, $zero, 1 +; LA64-JT-NEXT: .LBB0_5: # %exit +; LA64-JT-NEXT: st.w $a0, $a1, 0 +; LA64-JT-NEXT: .LBB0_6: # %exit +; LA64-JT-NEXT: ret +entry: + switch i32 %in, label %exit [ + i32 1, label %bb1 + i32 2, label %bb2 + i32 3, label %bb3 + i32 4, label %bb4 + ] +bb1: + store i32 4, ptr %out + br label %exit +bb2: + store i32 3, ptr %out + br label %exit +bb3: + store i32 2, ptr %out + br label %exit +bb4: + store i32 1, ptr %out + br label %exit +exit: + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/ldptr.ll b/llvm/test/CodeGen/LoongArch/ldptr.ll new file mode 100644 index 0000000000000000000000000000000000000000..81254164e7e516510bc9d6f6ed80f456b1cd3b99 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/ldptr.ll @@ -0,0 +1,123 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefix=LA32 +; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64 + +;; Check that ldptr.w is not emitted for small offsets. +define signext i32 @ldptr_w_too_small_offset(ptr %p) nounwind { +; LA32-LABEL: ldptr_w_too_small_offset: +; LA32: # %bb.0: # %entry +; LA32-NEXT: ld.w $a0, $a0, 2044 +; LA32-NEXT: ret +; +; LA64-LABEL: ldptr_w_too_small_offset: +; LA64: # %bb.0: # %entry +; LA64-NEXT: ld.w $a0, $a0, 2044 +; LA64-NEXT: ret +entry: + %addr = getelementptr inbounds i32, ptr %p, i64 511 + %val = load i32, ptr %addr, align 4 + ret i32 %val +} + +;; Check that ldptr.w is emitted for applicable offsets. +define signext i32 @ldptr_w(ptr %p) nounwind { +; LA32-LABEL: ldptr_w: +; LA32: # %bb.0: # %entry +; LA32-NEXT: ori $a1, $zero, 2048 +; LA32-NEXT: add.w $a0, $a0, $a1 +; LA32-NEXT: ld.w $a0, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: ldptr_w: +; LA64: # %bb.0: # %entry +; LA64-NEXT: ldptr.w $a0, $a0, 2048 +; LA64-NEXT: ret +entry: + %addr = getelementptr inbounds i32, ptr %p, i64 512 + %val = load i32, ptr %addr, align 4 + ret i32 %val +} + +;; Check that ldptr.w is not emitted for out-of-range offsets. +define signext i32 @ldptr_w_too_big_offset(ptr %p) nounwind { +; LA32-LABEL: ldptr_w_too_big_offset: +; LA32: # %bb.0: # %entry +; LA32-NEXT: lu12i.w $a1, 8 +; LA32-NEXT: add.w $a0, $a0, $a1 +; LA32-NEXT: ld.w $a0, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: ldptr_w_too_big_offset: +; LA64: # %bb.0: # %entry +; LA64-NEXT: lu12i.w $a1, 8 +; LA64-NEXT: ldx.w $a0, $a0, $a1 +; LA64-NEXT: ret +entry: + %addr = getelementptr inbounds i32, ptr %p, i64 8192 + %val = load i32, ptr %addr, align 4 + ret i32 %val +} + +;; Check that ldptr.d is not emitted for small offsets. +define i64 @ldptr_d_too_small_offset(ptr %p) nounwind { +; LA32-LABEL: ldptr_d_too_small_offset: +; LA32: # %bb.0: # %entry +; LA32-NEXT: ld.w $a2, $a0, 2040 +; LA32-NEXT: ld.w $a1, $a0, 2044 +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: ldptr_d_too_small_offset: +; LA64: # %bb.0: # %entry +; LA64-NEXT: ld.d $a0, $a0, 2040 +; LA64-NEXT: ret +entry: + %addr = getelementptr inbounds i64, ptr %p, i64 255 + %val = load i64, ptr %addr, align 8 + ret i64 %val +} + +;; Check that ldptr.d is emitted for applicable offsets. +define i64 @ldptr_d(ptr %p) nounwind { +; LA32-LABEL: ldptr_d: +; LA32: # %bb.0: # %entry +; LA32-NEXT: ori $a1, $zero, 2052 +; LA32-NEXT: add.w $a1, $a0, $a1 +; LA32-NEXT: ori $a2, $zero, 2048 +; LA32-NEXT: add.w $a0, $a0, $a2 +; LA32-NEXT: ld.w $a0, $a0, 0 +; LA32-NEXT: ld.w $a1, $a1, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: ldptr_d: +; LA64: # %bb.0: # %entry +; LA64-NEXT: ldptr.d $a0, $a0, 2048 +; LA64-NEXT: ret +entry: + %addr = getelementptr inbounds i64, ptr %p, i64 256 + %val = load i64, ptr %addr, align 8 + ret i64 %val +} + +;; Check that ldptr.d is not emitted for out-of-range offsets. +define i64 @ldptr_d_too_big_offset(ptr %p) nounwind { +; LA32-LABEL: ldptr_d_too_big_offset: +; LA32: # %bb.0: # %entry +; LA32-NEXT: lu12i.w $a1, 8 +; LA32-NEXT: ori $a2, $a1, 4 +; LA32-NEXT: add.w $a2, $a0, $a2 +; LA32-NEXT: add.w $a0, $a0, $a1 +; LA32-NEXT: ld.w $a0, $a0, 0 +; LA32-NEXT: ld.w $a1, $a2, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: ldptr_d_too_big_offset: +; LA64: # %bb.0: # %entry +; LA64-NEXT: lu12i.w $a1, 8 +; LA64-NEXT: ldx.d $a0, $a0, $a1 +; LA64-NEXT: ret +entry: + %addr = getelementptr inbounds i64, ptr %p, i64 4096 + %val = load i64, ptr %addr, align 8 + ret i64 %val +} diff --git a/llvm/test/CodeGen/LoongArch/ldx-stx-sp-1.ll b/llvm/test/CodeGen/LoongArch/ldx-stx-sp-1.ll new file mode 100644 index 0000000000000000000000000000000000000000..3b8fb592e49cb2070ef07f594832bf3e75d42001 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/ldx-stx-sp-1.ll @@ -0,0 +1,68 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+f < %s | FileCheck %s + +;; This should not crash the code generator, but the indexed loads/stores +;; should still be present (the important part is that [f]{ld,st}x shouldn't +;; take an $sp argument). + +define i8 @test_load_i(i64 %i) { +; CHECK-LABEL: test_load_i: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $sp, $sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: addi.d $a1, $sp, 8 +; CHECK-NEXT: ldx.b $a0, $a0, $a1 +; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: ret + %1 = alloca ptr + %2 = getelementptr inbounds i8, ptr %1, i64 %i + %3 = load i8, ptr %2 + ret i8 %3 +} + +define float @test_load_f(i64 %i) { +; CHECK-LABEL: test_load_f: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $sp, $sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: slli.d $a0, $a0, 2 +; CHECK-NEXT: addi.d $a1, $sp, 8 +; CHECK-NEXT: fldx.s $fa0, $a0, $a1 +; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: ret + %1 = alloca ptr + %2 = getelementptr inbounds float, ptr %1, i64 %i + %3 = load float, ptr %2 + ret float %3 +} + +define void @test_store_i(i64 %i, i8 %v) { +; CHECK-LABEL: test_store_i: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $sp, $sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: addi.d $a2, $sp, 8 +; CHECK-NEXT: stx.b $a1, $a0, $a2 +; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: ret + %1 = alloca ptr + %2 = getelementptr inbounds i8, ptr %1, i64 %i + store i8 %v, ptr %2, align 1 + ret void +} + +define void @test_store_f(i64 %i, float %v) { +; CHECK-LABEL: test_store_f: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $sp, $sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: slli.d $a0, $a0, 2 +; CHECK-NEXT: addi.d $a1, $sp, 8 +; CHECK-NEXT: fstx.s $fa0, $a0, $a1 +; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: ret + %1 = alloca ptr + %2 = getelementptr inbounds float, ptr %1, i64 %i + store float %v, ptr %2, align 4 + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/ldx-stx-sp-2.ll b/llvm/test/CodeGen/LoongArch/ldx-stx-sp-2.ll new file mode 100644 index 0000000000000000000000000000000000000000..be125f25ab2beb04b4d7d5043adbf9a388674d23 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/ldx-stx-sp-2.ll @@ -0,0 +1,20 @@ +; RUN: llc --mtriple=loongarch32 < %s +; RUN: llc --mtriple=loongarch64 < %s + +;; This should not crash the code generator. + +@.str.2 = external dso_local unnamed_addr constant [69 x i8], align 1 + +define dso_local void @main() { +entry: + %n0 = alloca [2 x [3 x i32]], align 4 + %0 = load i32, ptr poison, align 4 + %idxprom15 = sext i32 %0 to i64 + %arrayidx16 = getelementptr inbounds [2 x [3 x i32]], ptr %n0, i64 0, i64 %idxprom15 + %arrayidx17 = getelementptr inbounds [3 x i32], ptr %arrayidx16, i64 0, i64 0 + %1 = load i32, ptr %arrayidx17, align 4 + call void (ptr, ...) @printf(ptr noundef @.str.2, i32 noundef signext %1) + ret void +} + +declare void @printf(ptr, ...) diff --git a/llvm/test/CodeGen/LoongArch/ldx-stx-sp-3.ll b/llvm/test/CodeGen/LoongArch/ldx-stx-sp-3.ll new file mode 100644 index 0000000000000000000000000000000000000000..45d2450bd64c1d412ad3427339de197b996fdf2c --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/ldx-stx-sp-3.ll @@ -0,0 +1,23 @@ +; RUN: llc --mtriple=loongarch32 < %s +; RUN: llc --mtriple=loongarch64 < %s + +;; This should not crash the code generator. + +define void @_ZN12_GLOBAL__N_111DumpVisitorclIN4llvm16itanium_demangle8FoldExprEEEvPKT_() { +entry: + %ref.tmp6.i.i = alloca [4 x i8], align 1 + br label %for.cond.i.i + +for.cond.i.i: ; preds = %for.body.i.i, %entry + %__begin0.0.add.i.i = add nuw nsw i64 poison, 1 + br label %for.body.i.i + +for.body.i.i: ; preds = %for.cond.i.i + %__begin0.0.ptr.i.i = getelementptr inbounds i8, ptr %ref.tmp6.i.i, i64 %__begin0.0.add.i.i + %0 = load i8, ptr %__begin0.0.ptr.i.i, align 1 + %tobool18.not.i.i = icmp eq i8 %0, 0 + br i1 %tobool18.not.i.i, label %for.cond.i.i, label %exit + +exit: ; preds = %for.body.i.i + unreachable +} diff --git a/llvm/test/CodeGen/LoongArch/mafft-Lalignmm.ll b/llvm/test/CodeGen/LoongArch/mafft-Lalignmm.ll new file mode 100644 index 0000000000000000000000000000000000000000..4a9189c97e417eed2a08b7db74681f7a9a620d4f --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/mafft-Lalignmm.ll @@ -0,0 +1,127 @@ +; RUN: llc --mtriple=loongarch64 -mattr=+d %s -o /dev/null + +; ModuleID = 'bugpoint-reduced-simplifycfg.bc' +source_filename = "test-suite-src/MultiSource/Benchmarks/mafft/Lalignmm.c" + +define float @Lalignmm_hmout(ptr %seq1, ptr %eff1, i32 %icyc) { +entry: + %call4 = tail call i64 @strlen(ptr dereferenceable(1) poison) + %conv5 = trunc i64 %call4 to i32 + %call7 = tail call i64 @strlen(ptr dereferenceable(1) poison) + %call20 = tail call ptr @AllocateFloatVec(i32 signext poison) + %call22 = tail call ptr @AllocateFloatVec(i32 signext poison) + tail call void @st_OpeningGapCount(ptr poison, i32 signext %icyc, ptr %seq1, ptr %eff1, i32 signext %conv5) + %sub110 = add nsw i32 %conv5, -1 + %sub111 = add nsw i32 0, -1 + br i1 poison, label %for.cond.preheader.i, label %if.end.i + +for.cond.preheader.i: ; preds = %entry + %sext294 = shl i64 %call4, 32 + %conv23.i = ashr exact i64 %sext294, 32 + br label %for.body.i + +for.body.i: ; preds = %for.body.i, %for.cond.preheader.i + %call.i = tail call ptr @strncpy(ptr poison, ptr poison, i64 %conv23.i) + br label %for.body.i + +if.end.i: ; preds = %entry + %call82.i = tail call ptr @AllocateFloatVec(i32 signext poison) + %call84.i = tail call ptr @AllocateFloatVec(i32 signext poison) + %call86.i = tail call ptr @AllocateFloatVec(i32 signext poison) + %call88.i = tail call ptr @AllocateFloatVec(i32 signext poison) + %call90.i = tail call ptr @AllocateFloatVec(i32 signext poison) + %call92.i = tail call ptr @AllocateIntVec(i32 signext poison) + %call94.i = tail call ptr @AllocateIntVec(i32 signext poison) + %call104.i = tail call ptr @AllocateFloatVec(i32 signext poison) + %call108.i = tail call ptr @AllocateFloatVec(i32 signext poison) + %call110.i = tail call ptr @AllocateIntVec(i32 signext poison) + %idxprom220.i = sext i32 %sub111 to i64 + %mpjpt.018.i = getelementptr inbounds i32, ptr %call110.i, i64 1 + %arrayidx329.i = getelementptr inbounds float, ptr %call108.i, i64 %idxprom220.i + %idxprom332.i = and i64 %call7, 4294967295 + %wide.trip.count130.i = zext i32 poison to i64 + %0 = add nsw i64 1, -1 + %arrayidx239.i = getelementptr inbounds float, ptr %call104.i, i64 1 + %1 = load float, ptr %arrayidx239.i, align 4 + store float %1, ptr %call84.i, align 4 + %curpt.017.i = getelementptr inbounds float, ptr %call84.i, i64 1 + %arrayidx279.i = getelementptr inbounds float, ptr %call20, i64 %0 + %2 = load ptr, ptr poison, align 8 + %3 = load ptr, ptr null, align 8 + %4 = trunc i64 %0 to i32 + br label %for.body260.us.i + +for.body260.us.i: ; preds = %if.end292.us.i, %if.end.i + %indvars.iv132.i = phi i64 [ %indvars.iv.next133.i, %if.end292.us.i ], [ 1, %if.end.i ] + %mpjpt.026.us.i = phi ptr [ poison, %if.end292.us.i ], [ %mpjpt.018.i, %if.end.i ] + %curpt.025.us.i = phi ptr [ %curpt.0.us.i, %if.end292.us.i ], [ %curpt.017.i, %if.end.i ] + %prept.022.us.i = phi ptr [ %incdec.ptr316.us.i, %if.end292.us.i ], [ %call82.i, %if.end.i ] + %mi.021.us.i = phi float [ %mi.1.us.i, %if.end292.us.i ], [ poison, %if.end.i ] + %5 = load float, ptr %prept.022.us.i, align 4 + %6 = add nsw i64 %indvars.iv132.i, -1 + %arrayidx263.us.i = getelementptr inbounds float, ptr %call22, i64 %6 + %7 = load float, ptr %arrayidx263.us.i, align 4 + %add264.us.i = fadd float %mi.021.us.i, %7 + %cmp265.us.i = fcmp ogt float %add264.us.i, %5 + %wm.0.us.i = select i1 %cmp265.us.i, float %add264.us.i, float %5 + %arrayidx270.us.i = getelementptr inbounds float, ptr poison, i64 %indvars.iv132.i + %cmp272.us.i = fcmp ult float 0.000000e+00, %mi.021.us.i + %mi.1.us.i = select i1 %cmp272.us.i, float %mi.021.us.i, float 0.000000e+00 + %8 = trunc i64 %6 to i32 + %mpi.1.us.i = select i1 %cmp272.us.i, i32 0, i32 %8 + %9 = load float, ptr %arrayidx279.i, align 4 + %add280.us.i = fadd float 0.000000e+00, %9 + %cmp281.us.i = fcmp ogt float %add280.us.i, %wm.0.us.i + %wm.1.us.i = select i1 %cmp281.us.i, float %add280.us.i, float %wm.0.us.i + %cmp288.us.i = fcmp ult float poison, 0.000000e+00 + br i1 %cmp288.us.i, label %if.end292.us.i, label %if.then290.us.i + +if.then290.us.i: ; preds = %for.body260.us.i + store i32 %4, ptr %mpjpt.026.us.i, align 4 + br label %if.end292.us.i + +if.end292.us.i: ; preds = %if.then290.us.i, %for.body260.us.i + %10 = phi i32 [ %4, %if.then290.us.i ], [ poison, %for.body260.us.i ] + %add293.us.i = fadd float %wm.1.us.i, 0.000000e+00 + %arrayidx297.us.i = getelementptr inbounds float, ptr %2, i64 %indvars.iv132.i + store float %add293.us.i, ptr %arrayidx297.us.i, align 4 + %arrayidx306.us.i = getelementptr inbounds i32, ptr %call94.i, i64 %indvars.iv132.i + store i32 %10, ptr %arrayidx306.us.i, align 4 + %arrayidx308.us.i = getelementptr inbounds i32, ptr %call92.i, i64 %indvars.iv132.i + store i32 %mpi.1.us.i, ptr %arrayidx308.us.i, align 4 + %11 = load float, ptr %curpt.025.us.i, align 4 + %arrayidx310.us.i = getelementptr inbounds float, ptr %call86.i, i64 %indvars.iv132.i + store float %11, ptr %arrayidx310.us.i, align 4 + %arrayidx312.us.i = getelementptr inbounds float, ptr %call90.i, i64 %indvars.iv132.i + store float 0.000000e+00, ptr %arrayidx312.us.i, align 4 + %arrayidx314.us.i = getelementptr inbounds float, ptr %call88.i, i64 %indvars.iv132.i + store float %mi.1.us.i, ptr %arrayidx314.us.i, align 4 + %incdec.ptr316.us.i = getelementptr inbounds float, ptr %prept.022.us.i, i64 1 + %indvars.iv.next133.i = add nuw nsw i64 %indvars.iv132.i, 1 + %curpt.0.us.i = getelementptr inbounds float, ptr %curpt.025.us.i, i64 1 + %exitcond137.not.i = icmp eq i64 %indvars.iv.next133.i, %wide.trip.count130.i + br i1 %exitcond137.not.i, label %for.end321.i, label %for.body260.us.i + +for.end321.i: ; preds = %if.end292.us.i + %12 = load float, ptr %arrayidx329.i, align 4 + %arrayidx333.i = getelementptr inbounds float, ptr %3, i64 %idxprom332.i + store float %12, ptr %arrayidx333.i, align 4 + tail call fastcc void @match_calc(ptr %call104.i, ptr poison, ptr poison, i32 signext %sub111, i32 signext %conv5, ptr poison, ptr poison, i32 signext 1) + br label %for.body429.i + +for.body429.i: ; preds = %for.body429.i, %for.end321.i + %j.743.i = phi i32 [ %sub111, %for.end321.i ], [ %sub436.i, %for.body429.i ] + %sub436.i = add nsw i32 %j.743.i, -1 + %idxprom437.i = zext i32 %sub436.i to i64 + %arrayidx438.i = getelementptr inbounds float, ptr %call108.i, i64 %idxprom437.i + store float 0.000000e+00, ptr %arrayidx438.i, align 4 + store i32 %sub110, ptr poison, align 4 + br label %for.body429.i +} + +declare i64 @strlen(ptr) +declare ptr @AllocateFloatVec(i32) +declare void @st_OpeningGapCount(ptr, i32, ptr, ptr, i32) +declare ptr @strncpy(ptr, ptr, i64) +declare ptr @AllocateIntVec(i32) +declare void @match_calc(ptr, ptr, ptr, i32, i32, ptr, ptr, i32) diff --git a/llvm/test/CodeGen/LoongArch/memcmp.ll b/llvm/test/CodeGen/LoongArch/memcmp.ll new file mode 100644 index 0000000000000000000000000000000000000000..4d4f376cd5385bc775a089222645045e4406780a --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/memcmp.ll @@ -0,0 +1,24 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s + +;; Before getSelectionDAGInfo() interface hooks were defined DAGBuilder +;; would crash. + +define signext i32 @test1(ptr %buffer1, ptr %buffer2) { +; CHECK-LABEL: test1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1, -8 +; CHECK-NEXT: ori $a2, $zero, 16 +; CHECK-NEXT: bl %plt(memcmp) +; CHECK-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: ret +entry: + %call = call signext i32 @memcmp(ptr %buffer1, ptr %buffer2, i64 16) + ret i32 %call +} + +declare signext i32 @memcmp(ptr, ptr, i64) diff --git a/llvm/test/CodeGen/LoongArch/mir-target-flags.ll b/llvm/test/CodeGen/LoongArch/mir-target-flags.ll new file mode 100644 index 0000000000000000000000000000000000000000..9f3a061fe7244a88603d17ec4d4447a0b08adb34 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/mir-target-flags.ll @@ -0,0 +1,44 @@ +; RUN: llc --mtriple=loongarch64 --stop-after loongarch-prera-expand-pseudo \ +; RUN: --relocation-model=pic %s -o %t.mir +; RUN: llc --mtriple=loongarch64 --run-pass loongarch-prera-expand-pseudo \ +; RUN: %t.mir -o - | FileCheck %s + +;; This tests the LoongArch-specific serialization and deserialization of +;; `target-flags(...)` + +@g_e = external global i32 +@g_i = internal global i32 0 +@t_un = external thread_local global i32 +@t_ld = external thread_local(localdynamic) global i32 +@t_ie = external thread_local(initialexec) global i32 +@t_le = external thread_local(localexec) global i32 + +declare void @callee1() nounwind +declare dso_local void @callee2() nounwind + +define void @caller() nounwind { +; CHECK-LABEL: name: caller +; CHECK: target-flags(loongarch-got-pc-hi) @g_e +; CHECK-NEXT: target-flags(loongarch-got-pc-lo) @g_e +; CHECK: target-flags(loongarch-pcrel-hi) @g_i +; CHECK-NEXT: target-flags(loongarch-pcrel-lo) @g_i +; CHECK: target-flags(loongarch-gd-pc-hi) @t_un +; CHECK-NEXT: target-flags(loongarch-got-pc-lo) @t_un +; CHECK: target-flags(loongarch-ld-pc-hi) @t_ld +; CHECK-NEXT: target-flags(loongarch-got-pc-lo) @t_ld +; CHECK: target-flags(loongarch-ie-pc-hi) @t_ie +; CHECK-NEXT: target-flags(loongarch-ie-pc-lo) @t_ie +; CHECK: target-flags(loongarch-le-hi) @t_le +; CHECK-NEXT: target-flags(loongarch-le-lo) @t_le +; CHECK: target-flags(loongarch-call-plt) @callee1 +; CHECK: target-flags(loongarch-call) @callee2 + %a = load volatile i32, ptr @g_e + %b = load volatile i32, ptr @g_i + %c = load volatile i32, ptr @t_un + %d = load volatile i32, ptr @t_ld + %e = load volatile i32, ptr @t_ie + %f = load volatile i32, ptr @t_le + call i32 @callee1() + call i32 @callee2() + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/misc.mir b/llvm/test/CodeGen/LoongArch/misc.mir deleted file mode 100644 index 56793c583904e09bc27b7a7b9d9dc35d4e54531f..0000000000000000000000000000000000000000 --- a/llvm/test/CodeGen/LoongArch/misc.mir +++ /dev/null @@ -1,200 +0,0 @@ -# RUN: llc %s -mtriple=loongarch64 -start-after=prologepilog -O0 -filetype=obj -o - \ -# RUN: | extract-section .text \ -# RUN: | FileCheck %s -check-prefix=CHECK-ENC -# RUN: llc %s -mtriple=loongarch64 -start-after=prologepilog -O0 -filetype=asm -o - \ -# RUN: | FileCheck %s -check-prefix=CHECK-ASM - -# ------------------------------------------------------------------------------------------------- -# Encoding format: I15 -# ------------------------------------------------------------------------------------------------- -# ---------------------------------------------------+--------------------------------------------- -# 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00 -# ---------------------------------------------------+--------------------------------------------- -# opcode | imm15 -# ---------------------------------------------------+--------------------------------------------- - ---- -# CHECK-LABEL: test_DBAR: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -# CHECK-ASM: dbar 0 -name: test_DBAR -body: | - bb.0: - DBAR 0 -... ---- -# CHECK-LABEL: test_IBAR: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 1 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -# CHECK-ASM: ibar 0 -name: test_IBAR -body: | - bb.0: - IBAR 0 -... ---- -# CHECK-LABEL: test_SYSCALL: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 1 0 1 0 1 1 0 0 0 0 0 0 0 0 0 1 1 0 0 1 0 0 -# CHECK-ASM: syscall 100 -name: test_SYSCALL -body: | - bb.0: - SYSCALL 100 -... ---- -# CHECK-LABEL: test_BREAK: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 1 0 1 0 1 0 0 0 0 0 0 0 0 0 1 1 0 0 0 1 1 1 -# CHECK-ASM: break 199 -name: test_BREAK -body: | - bb.0: - BREAK 199 -... - -# ------------------------------------------------------------------------------------------------- -# Encoding format: I26 -# ------------------------------------------------------------------------------------------------- -# ------------------+-----------------------------------------------+------------------------------ -# 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00 -# ------------------+-----------------------------------------------+------------------------------ -# opcode | imm26{15-0} | imm26{25-16} -# ------------------+-----------------------------------------------+------------------------------ - ---- -# CHECK-LABEL: test_B: -# CHECK-ENC: 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 -# CHECK-ASM: b 80 -name: test_B -body: | - bb.0: - B 80 -... ---- -# CHECK-LABEL: test_BL: -# CHECK-ENC: 0 1 0 1 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 -# CHECK-ASM: bl 136 -name: test_BL -body: | - bb.0: - BL 136 -... - -# -------------------------------------------------------------------------------------------------------- -# Encoding format: BSTR_W -# -------------------------------------------------------------------------------------------------------- -# ---------------------------------+--------------+---------+--------------+--------------+--------------- -# 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00 -# ---------------------------------+--------------+---------+--------------+--------------+--------------- -# opcode{11-1} | msb |opcode{0}| lsb | rj | rd -# ---------------------------------+--------------+---------+--------------+--------------+--------------- - ---- -# CHECK-LABEL: test_BSTRINS_W: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 1 1 0 0 1 1 1 0 0 0 0 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: bstrins.w $a0, $a1, 7, 2 -name: test_BSTRINS_W -body: | - bb.0: - $r4 = BSTRINS_W $r4, $r5, 7, 2 -... ---- -# CHECK-LABEL: test_BSTRPICK_W: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 1 1 0 1 0 1 0 1 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: bstrpick.w $a0, $a1, 10, 4 -name: test_BSTRPICK_W -body: | - bb.0: - $r4 = BSTRPICK_W $r5, 10, 4 -... - -# ------------------------------------------------------------------------------------------------- -# Encoding format: BSTR_D -# ------------------------------------------------------------------------------------------------- -# ------------------------------+-----------------+-----------------+--------------+--------------- -# 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00 -# ------------------------------+-----------------+-----------------+--------------+--------------- -# opcode | msb | lsb | rj | rd -# ------------------------------+-----------------+-----------------+--------------+--------------- - ---- -# CHECK-LABEL: test_BSTRINS_D: -# CHECK-ENC: 0 0 0 0 0 0 0 0 1 0 0 0 0 1 1 1 0 0 0 0 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: bstrins.d $a0, $a1, 7, 2 -name: test_BSTRINS_D -body: | - bb.0: - $r4 = BSTRINS_D $r4, $r5, 7, 2 -... ---- -# CHECK-LABEL: test_BSTRPICK_D: -# CHECK-ENC: 0 0 0 0 0 0 0 0 1 1 1 0 0 1 1 1 0 1 0 1 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: bstrpick.d $a0, $a1, 39, 22 -name: test_BSTRPICK_D -body: | - bb.0: - $r4 = BSTRPICK_D $r5, 39, 22 -... - -# ------------------------------------------------------------------------------------------------- -# Encoding format: ASRT -# ------------------------------------------------------------------------------------------------- -# ---------------------------------------------------+--------------+--------------+--------------- -# 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00 -# ---------------------------------------------------+--------------+--------------+--------------- -# opcode | rk | rj | 0x0 -# ---------------------------------------------------+--------------+--------------+--------------- - ---- -# CHECK-LABEL: test_ASRTLE_D: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 1 0 0 1 0 0 0 0 0 0 0 -# CHECK-ASM: asrtle.d $a0, $a1 -name: test_ASRTLE_D -body: | - bb.0: - ASRTLE_D $r4, $r5 -... ---- -# CHECK-LABEL: test_ASRTGT_D: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 1 0 1 0 0 1 0 0 0 0 0 0 0 -# CHECK-ASM: asrtgt.d $a0, $a1 -name: test_ASRTGT_D -body: | - bb.0: - ASRTGT_D $r4, $r5 -... - -# ------------------------------------------------------------------------------------------------- -# Encoding format: PRELD -# ------------------------------------------------------------------------------------------------- -# ------------------------------+-----------------------------------+--------------+--------------- -# 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00 -# ------------------------------+-----------------------------------+--------------+--------------- -# opcode | imm12 | rj | imm5 -# ------------------------------+-----------------------------------+--------------+--------------- - ---- -# CHECK-LABEL: test_PRELD: -# CHECK-ENC: 0 0 1 0 1 0 1 0 1 1 0 0 0 0 0 0 0 1 0 1 0 1 0 0 1 0 0 0 1 1 1 1 -# CHECK-ASM: preld 15, $a0, 21 -name: test_PRELD -body: | - bb.0: - PRELD 15, $r4, 21 -... - -# ------------------------------------------------------------------------------------------------- -# Encoding format: PRELDX -# ------------------------------------------------------------------------------------------------- -# ---------------------------------------------------+--------------+--------------+--------------- -# 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00 -# ---------------------------------------------------+--------------+--------------+--------------- -# opcode | rk | rj | imm5 -# ---------------------------------------------------+--------------+--------------+--------------- - ---- -# CHECK-LABEL: test_PRELDX: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 0 1 0 1 1 0 0 0 0 0 1 0 1 0 0 1 0 0 0 1 0 1 1 -# CHECK-ASM: preldx 11, $a0, $a1 -name: test_PRELDX -body: | - bb.0: - PRELDX 11, $r4, $r5 diff --git a/llvm/test/CodeGen/LoongArch/nomerge.ll b/llvm/test/CodeGen/LoongArch/nomerge.ll new file mode 100644 index 0000000000000000000000000000000000000000..e4aecd79993eacca8fa2e63261bb6052472f7505 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/nomerge.ll @@ -0,0 +1,35 @@ +; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s + +define void @foo(i32 %i) { +entry: + switch i32 %i, label %if.end3 [ + i32 5, label %if.then + i32 7, label %if.then2 + ] + +if.then: + tail call void @bar() #0 + br label %if.end3 + +if.then2: + tail call void @bar() #0 + br label %if.end3 + +if.end3: + tail call void @bar() #0 + ret void +} + +declare void @bar() + +attributes #0 = { nomerge } + +; CHECK-LABEL: foo: +; CHECK: # %bb.0: # %entry +; CHECK: # %bb.1: # %entry +; CHECK: # %bb.2: # %if.then +; CHECK-NEXT: bl %plt(bar) +; CHECK: .LBB0_3: # %if.then2 +; CHECK-NEXT: bl %plt(bar) +; CHECK: .LBB0_4: # %if.end3 +; CHECK: b %plt(bar) diff --git a/llvm/test/CodeGen/LoongArch/not.ll b/llvm/test/CodeGen/LoongArch/not.ll new file mode 100644 index 0000000000000000000000000000000000000000..b9e02bdf111d9c544658833f94c71a2015e08905 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/not.ll @@ -0,0 +1,243 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefix=LA32 +; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64 + +define i8 @nor_i8(i8 %a, i8 %b) nounwind { +; LA32-LABEL: nor_i8: +; LA32: # %bb.0: +; LA32-NEXT: nor $a0, $a0, $a1 +; LA32-NEXT: ret +; +; LA64-LABEL: nor_i8: +; LA64: # %bb.0: +; LA64-NEXT: nor $a0, $a0, $a1 +; LA64-NEXT: ret + %or = or i8 %a, %b + %neg = xor i8 %or, -1 + ret i8 %neg +} + +define i16 @nor_i16(i16 %a, i16 %b) nounwind { +; LA32-LABEL: nor_i16: +; LA32: # %bb.0: +; LA32-NEXT: nor $a0, $a0, $a1 +; LA32-NEXT: ret +; +; LA64-LABEL: nor_i16: +; LA64: # %bb.0: +; LA64-NEXT: nor $a0, $a0, $a1 +; LA64-NEXT: ret + %or = or i16 %a, %b + %neg = xor i16 %or, -1 + ret i16 %neg +} + +define i32 @nor_i32(i32 %a, i32 %b) nounwind { +; LA32-LABEL: nor_i32: +; LA32: # %bb.0: +; LA32-NEXT: nor $a0, $a0, $a1 +; LA32-NEXT: ret +; +; LA64-LABEL: nor_i32: +; LA64: # %bb.0: +; LA64-NEXT: nor $a0, $a0, $a1 +; LA64-NEXT: ret + %or = or i32 %a, %b + %neg = xor i32 %or, -1 + ret i32 %neg +} + +define i64 @nor_i64(i64 %a, i64 %b) nounwind { +; LA32-LABEL: nor_i64: +; LA32: # %bb.0: +; LA32-NEXT: nor $a0, $a0, $a2 +; LA32-NEXT: nor $a1, $a1, $a3 +; LA32-NEXT: ret +; +; LA64-LABEL: nor_i64: +; LA64: # %bb.0: +; LA64-NEXT: nor $a0, $a0, $a1 +; LA64-NEXT: ret + %or = or i64 %a, %b + %neg = xor i64 %or, -1 + ret i64 %neg +} + +define i8 @nor_zero_i8(i8 %a) nounwind { +; LA32-LABEL: nor_zero_i8: +; LA32: # %bb.0: +; LA32-NEXT: nor $a0, $a0, $zero +; LA32-NEXT: ret +; +; LA64-LABEL: nor_zero_i8: +; LA64: # %bb.0: +; LA64-NEXT: nor $a0, $a0, $zero +; LA64-NEXT: ret + %neg = xor i8 %a, -1 + ret i8 %neg +} + +define i16 @nor_zero_i16(i16 %a) nounwind { +; LA32-LABEL: nor_zero_i16: +; LA32: # %bb.0: +; LA32-NEXT: nor $a0, $a0, $zero +; LA32-NEXT: ret +; +; LA64-LABEL: nor_zero_i16: +; LA64: # %bb.0: +; LA64-NEXT: nor $a0, $a0, $zero +; LA64-NEXT: ret + %neg = xor i16 %a, -1 + ret i16 %neg +} + +define i32 @nor_zero_i32(i32 %a) nounwind { +; LA32-LABEL: nor_zero_i32: +; LA32: # %bb.0: +; LA32-NEXT: nor $a0, $a0, $zero +; LA32-NEXT: ret +; +; LA64-LABEL: nor_zero_i32: +; LA64: # %bb.0: +; LA64-NEXT: nor $a0, $a0, $zero +; LA64-NEXT: ret + %neg = xor i32 %a, -1 + ret i32 %neg +} + +define i64 @nor_zero_i64(i64 %a) nounwind { +; LA32-LABEL: nor_zero_i64: +; LA32: # %bb.0: +; LA32-NEXT: nor $a0, $a0, $zero +; LA32-NEXT: nor $a1, $a1, $zero +; LA32-NEXT: ret +; +; LA64-LABEL: nor_zero_i64: +; LA64: # %bb.0: +; LA64-NEXT: nor $a0, $a0, $zero +; LA64-NEXT: ret + %neg = xor i64 %a, -1 + ret i64 %neg +} + +define i8 @orn_i8(i8 %a, i8 %b) nounwind { +; LA32-LABEL: orn_i8: +; LA32: # %bb.0: +; LA32-NEXT: orn $a0, $a0, $a1 +; LA32-NEXT: ret +; +; LA64-LABEL: orn_i8: +; LA64: # %bb.0: +; LA64-NEXT: orn $a0, $a0, $a1 +; LA64-NEXT: ret + %neg = xor i8 %b, -1 + %or = or i8 %neg, %a + ret i8 %or +} + +define i16 @orn_i16(i16 %a, i16 %b) nounwind { +; LA32-LABEL: orn_i16: +; LA32: # %bb.0: +; LA32-NEXT: orn $a0, $a0, $a1 +; LA32-NEXT: ret +; +; LA64-LABEL: orn_i16: +; LA64: # %bb.0: +; LA64-NEXT: orn $a0, $a0, $a1 +; LA64-NEXT: ret + %neg = xor i16 %b, -1 + %or = or i16 %neg, %a + ret i16 %or +} + +define i32 @orn_i32(i32 %a, i32 %b) nounwind { +; LA32-LABEL: orn_i32: +; LA32: # %bb.0: +; LA32-NEXT: orn $a0, $a0, $a1 +; LA32-NEXT: ret +; +; LA64-LABEL: orn_i32: +; LA64: # %bb.0: +; LA64-NEXT: orn $a0, $a0, $a1 +; LA64-NEXT: ret + %neg = xor i32 %b, -1 + %or = or i32 %neg, %a + ret i32 %or +} + +define i64 @orn_i64(i64 %a, i64 %b) nounwind { +; LA32-LABEL: orn_i64: +; LA32: # %bb.0: +; LA32-NEXT: orn $a0, $a0, $a2 +; LA32-NEXT: orn $a1, $a1, $a3 +; LA32-NEXT: ret +; +; LA64-LABEL: orn_i64: +; LA64: # %bb.0: +; LA64-NEXT: orn $a0, $a0, $a1 +; LA64-NEXT: ret + %neg = xor i64 %b, -1 + %or = or i64 %neg, %a + ret i64 %or +} + +define i8 @andn_i8(i8 %a, i8 %b) nounwind { +; LA32-LABEL: andn_i8: +; LA32: # %bb.0: +; LA32-NEXT: andn $a0, $a0, $a1 +; LA32-NEXT: ret +; +; LA64-LABEL: andn_i8: +; LA64: # %bb.0: +; LA64-NEXT: andn $a0, $a0, $a1 +; LA64-NEXT: ret + %neg = xor i8 %b, -1 + %and = and i8 %neg, %a + ret i8 %and +} + +define i16 @andn_i16(i16 %a, i16 %b) nounwind { +; LA32-LABEL: andn_i16: +; LA32: # %bb.0: +; LA32-NEXT: andn $a0, $a0, $a1 +; LA32-NEXT: ret +; +; LA64-LABEL: andn_i16: +; LA64: # %bb.0: +; LA64-NEXT: andn $a0, $a0, $a1 +; LA64-NEXT: ret + %neg = xor i16 %b, -1 + %and = and i16 %neg, %a + ret i16 %and +} + +define i32 @andn_i32(i32 %a, i32 %b) nounwind { +; LA32-LABEL: andn_i32: +; LA32: # %bb.0: +; LA32-NEXT: andn $a0, $a0, $a1 +; LA32-NEXT: ret +; +; LA64-LABEL: andn_i32: +; LA64: # %bb.0: +; LA64-NEXT: andn $a0, $a0, $a1 +; LA64-NEXT: ret + %neg = xor i32 %b, -1 + %and = and i32 %neg, %a + ret i32 %and +} + +define i64 @andn_i64(i64 %a, i64 %b) nounwind { +; LA32-LABEL: andn_i64: +; LA32: # %bb.0: +; LA32-NEXT: andn $a0, $a0, $a2 +; LA32-NEXT: andn $a1, $a1, $a3 +; LA32-NEXT: ret +; +; LA64-LABEL: andn_i64: +; LA64: # %bb.0: +; LA64-NEXT: andn $a0, $a0, $a1 +; LA64-NEXT: ret + %neg = xor i64 %b, -1 + %and = and i64 %neg, %a + ret i64 %and +} diff --git a/llvm/test/CodeGen/LoongArch/numeric-reg-names.ll b/llvm/test/CodeGen/LoongArch/numeric-reg-names.ll new file mode 100644 index 0000000000000000000000000000000000000000..153a697a55b9a97c7845217e14a0429f9e36a2d4 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/numeric-reg-names.ll @@ -0,0 +1,42 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --loongarch-numeric-reg < %s \ +; RUN: | FileCheck %s --check-prefix=LA32 +; RUN: llc --mtriple=loongarch64 --loongarch-numeric-reg < %s \ +; RUN: | FileCheck %s --check-prefix=LA64 + +@.str_1 = internal constant [7 x i8] c"hello\0A\00" + +declare i32 @printf(ptr, ...) + +define i32 @main() { +; LA32-LABEL: main: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $r3, $r3, -16 +; LA32-NEXT: .cfi_def_cfa_offset 16 +; LA32-NEXT: st.w $r1, $r3, 12 # 4-byte Folded Spill +; LA32-NEXT: .cfi_offset 1, -4 +; LA32-NEXT: pcalau12i $r4, %pc_hi20(.str_1) +; LA32-NEXT: addi.w $r4, $r4, %pc_lo12(.str_1) +; LA32-NEXT: bl %plt(printf) +; LA32-NEXT: move $r4, $r0 +; LA32-NEXT: ld.w $r1, $r3, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $r3, $r3, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: main: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $r3, $r3, -16 +; LA64-NEXT: .cfi_def_cfa_offset 16 +; LA64-NEXT: st.d $r1, $r3, 8 # 8-byte Folded Spill +; LA64-NEXT: .cfi_offset 1, -8 +; LA64-NEXT: pcalau12i $r4, %pc_hi20(.str_1) +; LA64-NEXT: addi.d $r4, $r4, %pc_lo12(.str_1) +; LA64-NEXT: bl %plt(printf) +; LA64-NEXT: move $r4, $r0 +; LA64-NEXT: ld.d $r1, $r3, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $r3, $r3, 16 +; LA64-NEXT: ret + %s = getelementptr [7 x i8], ptr @.str_1, i64 0, i64 0 + call i32 (ptr, ...) @printf(ptr %s) + ret i32 0 +} diff --git a/llvm/test/CodeGen/LoongArch/opt-pipeline.ll b/llvm/test/CodeGen/LoongArch/opt-pipeline.ll new file mode 100644 index 0000000000000000000000000000000000000000..f4a01a52b544adeaf420af51064b864e54595391 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/opt-pipeline.ll @@ -0,0 +1,162 @@ +;; When EXPENSIVE_CHECKS are enabled, the machine verifier appears between each +;; pass. Ignore it with 'grep -v'. +; RUN: llc --mtriple=loongarch32 -O1 --debug-pass=Structure %s -o /dev/null 2>&1 | \ +; RUN: grep -v "Verify generated machine code" | FileCheck %s +; RUN: llc --mtriple=loongarch32 -O2 --debug-pass=Structure %s -o /dev/null 2>&1 | \ +; RUN: grep -v "Verify generated machine code" | FileCheck %s +; RUN: llc --mtriple=loongarch32 -O3 --debug-pass=Structure %s -o /dev/null 2>&1 | \ +; RUN: grep -v "Verify generated machine code" | FileCheck %s +; RUN: llc --mtriple=loongarch64 -O1 --debug-pass=Structure %s -o /dev/null 2>&1 | \ +; RUN: grep -v "Verify generated machine code" | FileCheck %s +; RUN: llc --mtriple=loongarch64 -O2 --debug-pass=Structure %s -o /dev/null 2>&1 | \ +; RUN: grep -v "Verify generated machine code" | FileCheck %s +; RUN: llc --mtriple=loongarch64 -O3 --debug-pass=Structure %s -o /dev/null 2>&1 | \ +; RUN: grep -v "Verify generated machine code" | FileCheck %s + +; REQUIRES: asserts + +; CHECK-LABEL: Pass Arguments: +; CHECK-NEXT: Target Library Information +; CHECK-NEXT: Target Pass Configuration +; CHECK-NEXT: Machine Module Information +; CHECK-NEXT: Target Transform Information +; CHECK-NEXT: Type-Based Alias Analysis +; CHECK-NEXT: Scoped NoAlias Alias Analysis +; CHECK-NEXT: Assumption Cache Tracker +; CHECK-NEXT: Profile summary info +; CHECK-NEXT: Create Garbage Collector Module Metadata +; CHECK-NEXT: Machine Branch Probability Analysis +; CHECK-NEXT: Default Regalloc Eviction Advisor +; CHECK-NEXT: ModulePass Manager +; CHECK-NEXT: Pre-ISel Intrinsic Lowering +; CHECK-NEXT: FunctionPass Manager +; CHECK-NEXT: Expand Atomic instructions +; CHECK-NEXT: Module Verifier +; CHECK-NEXT: Dominator Tree Construction +; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) +; CHECK-NEXT: Natural Loop Information +; CHECK-NEXT: Canonicalize natural loops +; CHECK-NEXT: Scalar Evolution Analysis +; CHECK-NEXT: Loop Pass Manager +; CHECK-NEXT: Canonicalize Freeze Instructions in Loops +; CHECK-NEXT: Induction Variable Users +; CHECK-NEXT: Loop Strength Reduction +; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) +; CHECK-NEXT: Function Alias Analysis Results +; CHECK-NEXT: Merge contiguous icmps into a memcmp +; CHECK-NEXT: Natural Loop Information +; CHECK-NEXT: Lazy Branch Probability Analysis +; CHECK-NEXT: Lazy Block Frequency Analysis +; CHECK-NEXT: Expand memcmp() to load/stores +; CHECK-NEXT: Lower Garbage Collection Instructions +; CHECK-NEXT: Shadow Stack GC Lowering +; CHECK-NEXT: Lower constant intrinsics +; CHECK-NEXT: Remove unreachable blocks from the CFG +; CHECK-NEXT: Natural Loop Information +; CHECK-NEXT: Post-Dominator Tree Construction +; CHECK-NEXT: Branch Probability Analysis +; CHECK-NEXT: Block Frequency Analysis +; CHECK-NEXT: Constant Hoisting +; CHECK-NEXT: Replace intrinsics with calls to vector library +; CHECK-NEXT: Partially inline calls to library functions +; CHECK-NEXT: Expand vector predication intrinsics +; CHECK-NEXT: Scalarize Masked Memory Intrinsics +; CHECK-NEXT: Expand reduction intrinsics +; CHECK-NEXT: Natural Loop Information +; CHECK-NEXT: TLS Variable Hoist +; CHECK-NEXT: CodeGen Prepare +; CHECK-NEXT: Dominator Tree Construction +; CHECK-NEXT: Exception handling preparation +; CHECK-NEXT: Safe Stack instrumentation pass +; CHECK-NEXT: Insert stack protectors +; CHECK-NEXT: Module Verifier +; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) +; CHECK-NEXT: Function Alias Analysis Results +; CHECK-NEXT: Natural Loop Information +; CHECK-NEXT: Post-Dominator Tree Construction +; CHECK-NEXT: Branch Probability Analysis +; CHECK-NEXT: Lazy Branch Probability Analysis +; CHECK-NEXT: Lazy Block Frequency Analysis +; CHECK-NEXT: LoongArch DAG->DAG Pattern Instruction Selection +; CHECK-NEXT: Finalize ISel and expand pseudo-instructions +; CHECK-NEXT: Lazy Machine Block Frequency Analysis +; CHECK-NEXT: Early Tail Duplication +; CHECK-NEXT: Optimize machine instruction PHIs +; CHECK-NEXT: Slot index numbering +; CHECK-NEXT: Merge disjoint stack slots +; CHECK-NEXT: Local Stack Slot Allocation +; CHECK-NEXT: Remove dead machine instructions +; CHECK-NEXT: MachineDominator Tree Construction +; CHECK-NEXT: Machine Natural Loop Construction +; CHECK-NEXT: Machine Block Frequency Analysis +; CHECK-NEXT: Early Machine Loop Invariant Code Motion +; CHECK-NEXT: MachineDominator Tree Construction +; CHECK-NEXT: Machine Block Frequency Analysis +; CHECK-NEXT: Machine Common Subexpression Elimination +; CHECK-NEXT: MachinePostDominator Tree Construction +; CHECK-NEXT: Machine Cycle Info Analysis +; CHECK-NEXT: Machine code sinking +; CHECK-NEXT: Peephole Optimizations +; CHECK-NEXT: Remove dead machine instructions +; CHECK-NEXT: LoongArch Pre-RA pseudo instruction expansion pass +; CHECK-NEXT: Detect Dead Lanes +; CHECK-NEXT: Process Implicit Definitions +; CHECK-NEXT: Remove unreachable machine basic blocks +; CHECK-NEXT: Live Variable Analysis +; CHECK-NEXT: Eliminate PHI nodes for register allocation +; CHECK-NEXT: Two-Address instruction pass +; CHECK-NEXT: MachineDominator Tree Construction +; CHECK-NEXT: Slot index numbering +; CHECK-NEXT: Live Interval Analysis +; CHECK-NEXT: Simple Register Coalescing +; CHECK-NEXT: Rename Disconnected Subregister Components +; CHECK-NEXT: Machine Instruction Scheduler +; CHECK-NEXT: Machine Block Frequency Analysis +; CHECK-NEXT: Debug Variable Analysis +; CHECK-NEXT: Live Stack Slot Analysis +; CHECK-NEXT: Virtual Register Map +; CHECK-NEXT: Live Register Matrix +; CHECK-NEXT: Bundle Machine CFG Edges +; CHECK-NEXT: Spill Code Placement Analysis +; CHECK-NEXT: Lazy Machine Block Frequency Analysis +; CHECK-NEXT: Machine Optimization Remark Emitter +; CHECK-NEXT: Greedy Register Allocator +; CHECK-NEXT: Virtual Register Rewriter +; CHECK-NEXT: Register Allocation Pass Scoring +; CHECK-NEXT: Stack Slot Coloring +; CHECK-NEXT: Machine Copy Propagation Pass +; CHECK-NEXT: Machine Loop Invariant Code Motion +; CHECK-NEXT: Remove Redundant DEBUG_VALUE analysis +; CHECK-NEXT: Fixup Statepoint Caller Saved +; CHECK-NEXT: PostRA Machine Sink +; CHECK-NEXT: Machine Block Frequency Analysis +; CHECK-NEXT: MachineDominator Tree Construction +; CHECK-NEXT: MachinePostDominator Tree Construction +; CHECK-NEXT: Lazy Machine Block Frequency Analysis +; CHECK-NEXT: Machine Optimization Remark Emitter +; CHECK-NEXT: Shrink Wrapping analysis +; CHECK-NEXT: Prologue/Epilogue Insertion & Frame Finalization +; CHECK-NEXT: Control Flow Optimizer +; CHECK-NEXT: Lazy Machine Block Frequency Analysis +; CHECK-NEXT: Tail Duplication +; CHECK-NEXT: Machine Copy Propagation Pass +; CHECK-NEXT: Post-RA pseudo instruction expansion pass +; CHECK-NEXT: MachineDominator Tree Construction +; CHECK-NEXT: Machine Natural Loop Construction +; CHECK-NEXT: Post RA top-down list latency scheduler +; CHECK-NEXT: Analyze Machine Code For Garbage Collection +; CHECK-NEXT: Machine Block Frequency Analysis +; CHECK-NEXT: MachinePostDominator Tree Construction +; CHECK-NEXT: Branch Probability Basic Block Placement +; CHECK-NEXT: Insert fentry calls +; CHECK-NEXT: Insert XRay ops +; CHECK-NEXT: Implement the 'patchable-function' attribute +; CHECK-NEXT: Branch relaxation pass +; CHECK-NEXT: Contiguously Lay Out Funclets +; CHECK-NEXT: StackMap Liveness Analysis +; CHECK-NEXT: Live DEBUG_VALUE analysis +; CHECK-NEXT: LoongArch atomic pseudo instruction expansion pass +; CHECK-NEXT: Lazy Machine Block Frequency Analysis +; CHECK-NEXT: Machine Optimization Remark Emitter +; CHECK-NEXT: LoongArch Assembly Printer +; CHECK-NEXT: Free MachineFunction diff --git a/llvm/test/CodeGen/LoongArch/patchable-function-entry.ll b/llvm/test/CodeGen/LoongArch/patchable-function-entry.ll new file mode 100644 index 0000000000000000000000000000000000000000..12d4bfb50a198985d60b15e1d3b03273cf29e226 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/patchable-function-entry.ll @@ -0,0 +1,63 @@ +;; Test the function attribute "patchable-function-entry". +;; Adapted from the RISCV test case. +; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefixes=CHECK,LA32 +; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefixes=CHECK,LA64 + +define void @f0() "patchable-function-entry"="0" { +; CHECK-LABEL: f0: +; CHECK-NEXT: .Lfunc_begin0: +; CHECK-NOT: nop +; CHECK: ret +; CHECK-NOT: .section __patchable_function_entries + ret void +} + +define void @f1() "patchable-function-entry"="1" { +; CHECK-LABEL: f1: +; CHECK-NEXT: .Lfunc_begin1: +; CHECK: nop +; CHECK-NEXT: ret +; CHECK: .section __patchable_function_entries,"awo",@progbits,f1{{$}} +; LA32: .p2align 2 +; LA32-NEXT: .word .Lfunc_begin1 +; LA64: .p2align 3 +; LA64-NEXT: .dword .Lfunc_begin1 + ret void +} + +$f5 = comdat any +define void @f5() "patchable-function-entry"="5" comdat { +; CHECK-LABEL: f5: +; CHECK-NEXT: .Lfunc_begin2: +; CHECK-COUNT-5: nop +; CHECK-NEXT: ret +; CHECK: .section __patchable_function_entries,"aGwo",@progbits,f5,comdat,f5{{$}} +; LA32: .p2align 2 +; LA32-NEXT: .word .Lfunc_begin2 +; LA64: .p2align 3 +; LA64-NEXT: .dword .Lfunc_begin2 + ret void +} + +;; -fpatchable-function-entry=3,2 +;; "patchable-function-prefix" emits data before the function entry label. +define void @f3_2() "patchable-function-entry"="1" "patchable-function-prefix"="2" { +; CHECK-LABEL: .type f3_2,@function +; CHECK-NEXT: .Ltmp0: # @f3_2 +; CHECK-COUNT-2: nop +; CHECK-NEXT: f3_2: +; CHECK: # %bb.0: +; CHECK-NEXT: nop +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA64-NEXT: addi.d $sp, $sp, -16 +;; .size does not include the prefix. +; CHECK: .Lfunc_end3: +; CHECK-NEXT: .size f3_2, .Lfunc_end3-f3_2 +; CHECK: .section __patchable_function_entries,"awo",@progbits,f3_2{{$}} +; LA32: .p2align 2 +; LA32-NEXT: .word .Ltmp0 +; LA64: .p2align 3 +; LA64-NEXT: .dword .Ltmp0 + %frame = alloca i8, i32 16 + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/returnaddr-error.ll b/llvm/test/CodeGen/LoongArch/returnaddr-error.ll new file mode 100644 index 0000000000000000000000000000000000000000..6ac1e0afcd5c0bbf985905821f01ca511bdab996 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/returnaddr-error.ll @@ -0,0 +1,9 @@ +; RUN: not llc --mtriple=loongarch64 < %s 2>&1 | FileCheck %s + +declare ptr @llvm.returnaddress(i32 immarg) + +define ptr @non_zero_returnaddress() nounwind { +; CHECK: return address can only be determined for the current frame + %1 = call ptr @llvm.returnaddress(i32 1) + ret ptr %1 +} diff --git a/llvm/test/CodeGen/LoongArch/rotl-rotr.ll b/llvm/test/CodeGen/LoongArch/rotl-rotr.ll new file mode 100644 index 0000000000000000000000000000000000000000..f54a47ae847af54b46f1fc65952a438a075d4714 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/rotl-rotr.ll @@ -0,0 +1,629 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefix=LA32 +; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64 + +;; TODO: Add optimization to ISD::ROTL + +define i32 @rotl_32(i32 %x, i32 %y) nounwind { +; LA32-LABEL: rotl_32: +; LA32: # %bb.0: +; LA32-NEXT: ori $a2, $zero, 32 +; LA32-NEXT: sub.w $a1, $a2, $a1 +; LA32-NEXT: rotr.w $a0, $a0, $a1 +; LA32-NEXT: ret +; +; LA64-LABEL: rotl_32: +; LA64: # %bb.0: +; LA64-NEXT: sub.d $a2, $zero, $a1 +; LA64-NEXT: sll.w $a1, $a0, $a1 +; LA64-NEXT: srl.w $a0, $a0, $a2 +; LA64-NEXT: or $a0, $a1, $a0 +; LA64-NEXT: ret + %z = sub i32 32, %y + %b = shl i32 %x, %y + %c = lshr i32 %x, %z + %d = or i32 %b, %c + ret i32 %d +} + +define i32 @rotr_32(i32 %x, i32 %y) nounwind { +; LA32-LABEL: rotr_32: +; LA32: # %bb.0: +; LA32-NEXT: rotr.w $a0, $a0, $a1 +; LA32-NEXT: ret +; +; LA64-LABEL: rotr_32: +; LA64: # %bb.0: +; LA64-NEXT: rotr.w $a0, $a0, $a1 +; LA64-NEXT: ret + %z = sub i32 32, %y + %b = lshr i32 %x, %y + %c = shl i32 %x, %z + %d = or i32 %b, %c + ret i32 %d +} + +define i64 @rotl_64(i64 %x, i64 %y) nounwind { +; LA32-LABEL: rotl_64: +; LA32: # %bb.0: +; LA32-NEXT: xori $a3, $a2, 31 +; LA32-NEXT: srli.w $a4, $a0, 1 +; LA32-NEXT: srl.w $a3, $a4, $a3 +; LA32-NEXT: sll.w $a4, $a1, $a2 +; LA32-NEXT: or $a3, $a4, $a3 +; LA32-NEXT: addi.w $a4, $a2, -32 +; LA32-NEXT: slti $a5, $a4, 0 +; LA32-NEXT: maskeqz $a3, $a3, $a5 +; LA32-NEXT: sll.w $a6, $a0, $a4 +; LA32-NEXT: masknez $a5, $a6, $a5 +; LA32-NEXT: or $a3, $a3, $a5 +; LA32-NEXT: ori $a5, $zero, 64 +; LA32-NEXT: sub.w $a5, $a5, $a2 +; LA32-NEXT: xori $a5, $a5, 31 +; LA32-NEXT: slli.w $a6, $a1, 1 +; LA32-NEXT: sll.w $a5, $a6, $a5 +; LA32-NEXT: sub.w $a6, $zero, $a2 +; LA32-NEXT: srl.w $a7, $a1, $a6 +; LA32-NEXT: ori $a1, $zero, 32 +; LA32-NEXT: sub.w $t0, $a1, $a2 +; LA32-NEXT: srai.w $a1, $t0, 31 +; LA32-NEXT: and $a1, $a1, $a7 +; LA32-NEXT: or $a1, $a3, $a1 +; LA32-NEXT: srl.w $a3, $a0, $a6 +; LA32-NEXT: or $a3, $a3, $a5 +; LA32-NEXT: slti $a5, $t0, 0 +; LA32-NEXT: masknez $a6, $a7, $a5 +; LA32-NEXT: maskeqz $a3, $a3, $a5 +; LA32-NEXT: or $a3, $a3, $a6 +; LA32-NEXT: sll.w $a0, $a0, $a2 +; LA32-NEXT: srai.w $a2, $a4, 31 +; LA32-NEXT: and $a0, $a2, $a0 +; LA32-NEXT: or $a0, $a0, $a3 +; LA32-NEXT: ret +; +; LA64-LABEL: rotl_64: +; LA64: # %bb.0: +; LA64-NEXT: ori $a2, $zero, 64 +; LA64-NEXT: sub.d $a1, $a2, $a1 +; LA64-NEXT: rotr.d $a0, $a0, $a1 +; LA64-NEXT: ret + %z = sub i64 64, %y + %b = shl i64 %x, %y + %c = lshr i64 %x, %z + %d = or i64 %b, %c + ret i64 %d +} + +define i64 @rotr_64(i64 %x, i64 %y) nounwind { +; LA32-LABEL: rotr_64: +; LA32: # %bb.0: +; LA32-NEXT: xori $a3, $a2, 31 +; LA32-NEXT: slli.w $a4, $a1, 1 +; LA32-NEXT: sll.w $a3, $a4, $a3 +; LA32-NEXT: srl.w $a4, $a0, $a2 +; LA32-NEXT: or $a3, $a4, $a3 +; LA32-NEXT: addi.w $a4, $a2, -32 +; LA32-NEXT: slti $a5, $a4, 0 +; LA32-NEXT: maskeqz $a3, $a3, $a5 +; LA32-NEXT: srl.w $a6, $a1, $a4 +; LA32-NEXT: masknez $a5, $a6, $a5 +; LA32-NEXT: or $a3, $a3, $a5 +; LA32-NEXT: ori $a5, $zero, 64 +; LA32-NEXT: sub.w $a5, $a5, $a2 +; LA32-NEXT: xori $a5, $a5, 31 +; LA32-NEXT: srli.w $a6, $a0, 1 +; LA32-NEXT: srl.w $a5, $a6, $a5 +; LA32-NEXT: sub.w $a6, $zero, $a2 +; LA32-NEXT: sll.w $a7, $a0, $a6 +; LA32-NEXT: ori $a0, $zero, 32 +; LA32-NEXT: sub.w $t0, $a0, $a2 +; LA32-NEXT: srai.w $a0, $t0, 31 +; LA32-NEXT: and $a0, $a0, $a7 +; LA32-NEXT: or $a0, $a3, $a0 +; LA32-NEXT: sll.w $a3, $a1, $a6 +; LA32-NEXT: or $a3, $a3, $a5 +; LA32-NEXT: slti $a5, $t0, 0 +; LA32-NEXT: masknez $a6, $a7, $a5 +; LA32-NEXT: maskeqz $a3, $a3, $a5 +; LA32-NEXT: or $a3, $a3, $a6 +; LA32-NEXT: srl.w $a1, $a1, $a2 +; LA32-NEXT: srai.w $a2, $a4, 31 +; LA32-NEXT: and $a1, $a2, $a1 +; LA32-NEXT: or $a1, $a1, $a3 +; LA32-NEXT: ret +; +; LA64-LABEL: rotr_64: +; LA64: # %bb.0: +; LA64-NEXT: rotr.d $a0, $a0, $a1 +; LA64-NEXT: ret + %z = sub i64 64, %y + %b = lshr i64 %x, %y + %c = shl i64 %x, %z + %d = or i64 %b, %c + ret i64 %d +} + +define i32 @rotl_32_mask(i32 %x, i32 %y) nounwind { +; LA32-LABEL: rotl_32_mask: +; LA32: # %bb.0: +; LA32-NEXT: sub.w $a1, $zero, $a1 +; LA32-NEXT: rotr.w $a0, $a0, $a1 +; LA32-NEXT: ret +; +; LA64-LABEL: rotl_32_mask: +; LA64: # %bb.0: +; LA64-NEXT: sub.d $a2, $zero, $a1 +; LA64-NEXT: sll.w $a1, $a0, $a1 +; LA64-NEXT: srl.w $a0, $a0, $a2 +; LA64-NEXT: or $a0, $a1, $a0 +; LA64-NEXT: ret + %z = sub i32 0, %y + %and = and i32 %z, 31 + %b = shl i32 %x, %y + %c = lshr i32 %x, %and + %d = or i32 %b, %c + ret i32 %d +} + +define i32 @rotl_32_mask_and_63_and_31(i32 %x, i32 %y) nounwind { +; LA32-LABEL: rotl_32_mask_and_63_and_31: +; LA32: # %bb.0: +; LA32-NEXT: sub.w $a1, $zero, $a1 +; LA32-NEXT: rotr.w $a0, $a0, $a1 +; LA32-NEXT: ret +; +; LA64-LABEL: rotl_32_mask_and_63_and_31: +; LA64: # %bb.0: +; LA64-NEXT: sub.d $a2, $zero, $a1 +; LA64-NEXT: sll.w $a1, $a0, $a1 +; LA64-NEXT: srl.w $a0, $a0, $a2 +; LA64-NEXT: or $a0, $a1, $a0 +; LA64-NEXT: ret + %a = and i32 %y, 63 + %b = shl i32 %x, %a + %c = sub i32 0, %y + %d = and i32 %c, 31 + %e = lshr i32 %x, %d + %f = or i32 %b, %e + ret i32 %f +} + +define i32 @rotl_32_mask_or_64_or_32(i32 %x, i32 %y) nounwind { +; LA32-LABEL: rotl_32_mask_or_64_or_32: +; LA32: # %bb.0: +; LA32-NEXT: sub.w $a1, $zero, $a1 +; LA32-NEXT: rotr.w $a0, $a0, $a1 +; LA32-NEXT: ret +; +; LA64-LABEL: rotl_32_mask_or_64_or_32: +; LA64: # %bb.0: +; LA64-NEXT: sub.d $a2, $zero, $a1 +; LA64-NEXT: sll.w $a1, $a0, $a1 +; LA64-NEXT: srl.w $a0, $a0, $a2 +; LA64-NEXT: or $a0, $a1, $a0 +; LA64-NEXT: ret + %a = or i32 %y, 64 + %b = shl i32 %x, %a + %c = sub i32 0, %y + %d = or i32 %c, 32 + %e = lshr i32 %x, %d + %f = or i32 %b, %e + ret i32 %f +} + +define i32 @rotr_32_mask(i32 %x, i32 %y) nounwind { +; LA32-LABEL: rotr_32_mask: +; LA32: # %bb.0: +; LA32-NEXT: rotr.w $a0, $a0, $a1 +; LA32-NEXT: ret +; +; LA64-LABEL: rotr_32_mask: +; LA64: # %bb.0: +; LA64-NEXT: rotr.w $a0, $a0, $a1 +; LA64-NEXT: ret + %z = sub i32 0, %y + %and = and i32 %z, 31 + %b = lshr i32 %x, %y + %c = shl i32 %x, %and + %d = or i32 %b, %c + ret i32 %d +} + +define i32 @rotr_32_mask_and_63_and_31(i32 %x, i32 %y) nounwind { +; LA32-LABEL: rotr_32_mask_and_63_and_31: +; LA32: # %bb.0: +; LA32-NEXT: rotr.w $a0, $a0, $a1 +; LA32-NEXT: ret +; +; LA64-LABEL: rotr_32_mask_and_63_and_31: +; LA64: # %bb.0: +; LA64-NEXT: rotr.w $a0, $a0, $a1 +; LA64-NEXT: ret + %a = and i32 %y, 63 + %b = lshr i32 %x, %a + %c = sub i32 0, %y + %d = and i32 %c, 31 + %e = shl i32 %x, %d + %f = or i32 %b, %e + ret i32 %f +} + +define i32 @rotr_32_mask_or_64_or_32(i32 %x, i32 %y) nounwind { +; LA32-LABEL: rotr_32_mask_or_64_or_32: +; LA32: # %bb.0: +; LA32-NEXT: rotr.w $a0, $a0, $a1 +; LA32-NEXT: ret +; +; LA64-LABEL: rotr_32_mask_or_64_or_32: +; LA64: # %bb.0: +; LA64-NEXT: rotr.w $a0, $a0, $a1 +; LA64-NEXT: ret + %a = or i32 %y, 64 + %b = lshr i32 %x, %a + %c = sub i32 0, %y + %d = or i32 %c, 32 + %e = shl i32 %x, %d + %f = or i32 %b, %e + ret i32 %f +} + +define i64 @rotl_64_mask(i64 %x, i64 %y) nounwind { +; LA32-LABEL: rotl_64_mask: +; LA32: # %bb.0: +; LA32-NEXT: xori $a3, $a2, 31 +; LA32-NEXT: srli.w $a4, $a0, 1 +; LA32-NEXT: srl.w $a3, $a4, $a3 +; LA32-NEXT: sll.w $a4, $a1, $a2 +; LA32-NEXT: or $a3, $a4, $a3 +; LA32-NEXT: sub.w $a4, $zero, $a2 +; LA32-NEXT: srl.w $a5, $a1, $a4 +; LA32-NEXT: andi $a6, $a4, 63 +; LA32-NEXT: addi.w $a7, $a6, -32 +; LA32-NEXT: srai.w $t0, $a7, 31 +; LA32-NEXT: and $a5, $t0, $a5 +; LA32-NEXT: addi.w $t0, $a2, -32 +; LA32-NEXT: slti $t1, $t0, 0 +; LA32-NEXT: maskeqz $a3, $a3, $t1 +; LA32-NEXT: sll.w $t2, $a0, $t0 +; LA32-NEXT: masknez $t1, $t2, $t1 +; LA32-NEXT: or $a3, $a3, $t1 +; LA32-NEXT: xori $a6, $a6, 31 +; LA32-NEXT: slli.w $t1, $a1, 1 +; LA32-NEXT: sll.w $a6, $t1, $a6 +; LA32-NEXT: or $a3, $a3, $a5 +; LA32-NEXT: srl.w $a4, $a0, $a4 +; LA32-NEXT: or $a4, $a4, $a6 +; LA32-NEXT: srl.w $a1, $a1, $a7 +; LA32-NEXT: slti $a5, $a7, 0 +; LA32-NEXT: masknez $a1, $a1, $a5 +; LA32-NEXT: maskeqz $a4, $a4, $a5 +; LA32-NEXT: or $a1, $a4, $a1 +; LA32-NEXT: sll.w $a0, $a0, $a2 +; LA32-NEXT: srai.w $a2, $t0, 31 +; LA32-NEXT: and $a0, $a2, $a0 +; LA32-NEXT: or $a0, $a0, $a1 +; LA32-NEXT: move $a1, $a3 +; LA32-NEXT: ret +; +; LA64-LABEL: rotl_64_mask: +; LA64: # %bb.0: +; LA64-NEXT: sub.d $a1, $zero, $a1 +; LA64-NEXT: rotr.d $a0, $a0, $a1 +; LA64-NEXT: ret + %z = sub i64 0, %y + %and = and i64 %z, 63 + %b = shl i64 %x, %y + %c = lshr i64 %x, %and + %d = or i64 %b, %c + ret i64 %d +} + +define i64 @rotl_64_mask_and_127_and_63(i64 %x, i64 %y) nounwind { +; LA32-LABEL: rotl_64_mask_and_127_and_63: +; LA32: # %bb.0: +; LA32-NEXT: srli.w $a3, $a0, 1 +; LA32-NEXT: andi $a4, $a2, 127 +; LA32-NEXT: xori $a5, $a4, 31 +; LA32-NEXT: srl.w $a3, $a3, $a5 +; LA32-NEXT: sll.w $a5, $a1, $a2 +; LA32-NEXT: or $a3, $a5, $a3 +; LA32-NEXT: sub.w $a5, $zero, $a2 +; LA32-NEXT: srl.w $a6, $a1, $a5 +; LA32-NEXT: andi $a7, $a5, 63 +; LA32-NEXT: addi.w $t0, $a7, -32 +; LA32-NEXT: srai.w $t1, $t0, 31 +; LA32-NEXT: and $a6, $t1, $a6 +; LA32-NEXT: addi.w $a4, $a4, -32 +; LA32-NEXT: slti $t1, $a4, 0 +; LA32-NEXT: maskeqz $a3, $a3, $t1 +; LA32-NEXT: sll.w $t2, $a0, $a4 +; LA32-NEXT: masknez $t1, $t2, $t1 +; LA32-NEXT: or $a3, $a3, $t1 +; LA32-NEXT: xori $a7, $a7, 31 +; LA32-NEXT: slli.w $t1, $a1, 1 +; LA32-NEXT: sll.w $a7, $t1, $a7 +; LA32-NEXT: or $a3, $a3, $a6 +; LA32-NEXT: srl.w $a5, $a0, $a5 +; LA32-NEXT: or $a5, $a5, $a7 +; LA32-NEXT: srl.w $a1, $a1, $t0 +; LA32-NEXT: slti $a6, $t0, 0 +; LA32-NEXT: masknez $a1, $a1, $a6 +; LA32-NEXT: maskeqz $a5, $a5, $a6 +; LA32-NEXT: or $a1, $a5, $a1 +; LA32-NEXT: sll.w $a0, $a0, $a2 +; LA32-NEXT: srai.w $a2, $a4, 31 +; LA32-NEXT: and $a0, $a2, $a0 +; LA32-NEXT: or $a0, $a0, $a1 +; LA32-NEXT: move $a1, $a3 +; LA32-NEXT: ret +; +; LA64-LABEL: rotl_64_mask_and_127_and_63: +; LA64: # %bb.0: +; LA64-NEXT: sub.d $a1, $zero, $a1 +; LA64-NEXT: rotr.d $a0, $a0, $a1 +; LA64-NEXT: ret + %a = and i64 %y, 127 + %b = shl i64 %x, %a + %c = sub i64 0, %y + %d = and i64 %c, 63 + %e = lshr i64 %x, %d + %f = or i64 %b, %e + ret i64 %f +} + +define i64 @rotl_64_mask_or_128_or_64(i64 %x, i64 %y) nounwind { +; LA32-LABEL: rotl_64_mask_or_128_or_64: +; LA32: # %bb.0: +; LA32-NEXT: sll.w $a3, $a0, $a2 +; LA32-NEXT: sub.w $a0, $zero, $a2 +; LA32-NEXT: srl.w $a0, $a1, $a0 +; LA32-NEXT: move $a1, $a3 +; LA32-NEXT: ret +; +; LA64-LABEL: rotl_64_mask_or_128_or_64: +; LA64: # %bb.0: +; LA64-NEXT: sub.d $a1, $zero, $a1 +; LA64-NEXT: rotr.d $a0, $a0, $a1 +; LA64-NEXT: ret + %a = or i64 %y, 128 + %b = shl i64 %x, %a + %c = sub i64 0, %y + %d = or i64 %c, 64 + %e = lshr i64 %x, %d + %f = or i64 %b, %e + ret i64 %f +} + +define i64 @rotr_64_mask(i64 %x, i64 %y) nounwind { +; LA32-LABEL: rotr_64_mask: +; LA32: # %bb.0: +; LA32-NEXT: xori $a3, $a2, 31 +; LA32-NEXT: slli.w $a4, $a1, 1 +; LA32-NEXT: sll.w $a3, $a4, $a3 +; LA32-NEXT: srl.w $a4, $a0, $a2 +; LA32-NEXT: or $a3, $a4, $a3 +; LA32-NEXT: sub.w $a4, $zero, $a2 +; LA32-NEXT: sll.w $a5, $a0, $a4 +; LA32-NEXT: andi $a6, $a4, 63 +; LA32-NEXT: addi.w $a7, $a6, -32 +; LA32-NEXT: srai.w $t0, $a7, 31 +; LA32-NEXT: and $a5, $t0, $a5 +; LA32-NEXT: addi.w $t0, $a2, -32 +; LA32-NEXT: slti $t1, $t0, 0 +; LA32-NEXT: maskeqz $a3, $a3, $t1 +; LA32-NEXT: srl.w $t2, $a1, $t0 +; LA32-NEXT: masknez $t1, $t2, $t1 +; LA32-NEXT: or $a3, $a3, $t1 +; LA32-NEXT: xori $a6, $a6, 31 +; LA32-NEXT: srli.w $t1, $a0, 1 +; LA32-NEXT: srl.w $a6, $t1, $a6 +; LA32-NEXT: or $a3, $a3, $a5 +; LA32-NEXT: sll.w $a4, $a1, $a4 +; LA32-NEXT: or $a4, $a4, $a6 +; LA32-NEXT: sll.w $a0, $a0, $a7 +; LA32-NEXT: slti $a5, $a7, 0 +; LA32-NEXT: masknez $a0, $a0, $a5 +; LA32-NEXT: maskeqz $a4, $a4, $a5 +; LA32-NEXT: or $a0, $a4, $a0 +; LA32-NEXT: srl.w $a1, $a1, $a2 +; LA32-NEXT: srai.w $a2, $t0, 31 +; LA32-NEXT: and $a1, $a2, $a1 +; LA32-NEXT: or $a1, $a1, $a0 +; LA32-NEXT: move $a0, $a3 +; LA32-NEXT: ret +; +; LA64-LABEL: rotr_64_mask: +; LA64: # %bb.0: +; LA64-NEXT: rotr.d $a0, $a0, $a1 +; LA64-NEXT: ret + %z = sub i64 0, %y + %and = and i64 %z, 63 + %b = lshr i64 %x, %y + %c = shl i64 %x, %and + %d = or i64 %b, %c + ret i64 %d +} + +define i64 @rotr_64_mask_and_127_and_63(i64 %x, i64 %y) nounwind { +; LA32-LABEL: rotr_64_mask_and_127_and_63: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a3, $a1, 1 +; LA32-NEXT: andi $a4, $a2, 127 +; LA32-NEXT: xori $a5, $a4, 31 +; LA32-NEXT: sll.w $a3, $a3, $a5 +; LA32-NEXT: srl.w $a5, $a0, $a2 +; LA32-NEXT: or $a3, $a5, $a3 +; LA32-NEXT: sub.w $a5, $zero, $a2 +; LA32-NEXT: sll.w $a6, $a0, $a5 +; LA32-NEXT: andi $a7, $a5, 63 +; LA32-NEXT: addi.w $t0, $a7, -32 +; LA32-NEXT: srai.w $t1, $t0, 31 +; LA32-NEXT: and $a6, $t1, $a6 +; LA32-NEXT: addi.w $a4, $a4, -32 +; LA32-NEXT: slti $t1, $a4, 0 +; LA32-NEXT: maskeqz $a3, $a3, $t1 +; LA32-NEXT: srl.w $t2, $a1, $a4 +; LA32-NEXT: masknez $t1, $t2, $t1 +; LA32-NEXT: or $a3, $a3, $t1 +; LA32-NEXT: xori $a7, $a7, 31 +; LA32-NEXT: srli.w $t1, $a0, 1 +; LA32-NEXT: srl.w $a7, $t1, $a7 +; LA32-NEXT: or $a3, $a3, $a6 +; LA32-NEXT: sll.w $a5, $a1, $a5 +; LA32-NEXT: or $a5, $a5, $a7 +; LA32-NEXT: sll.w $a0, $a0, $t0 +; LA32-NEXT: slti $a6, $t0, 0 +; LA32-NEXT: masknez $a0, $a0, $a6 +; LA32-NEXT: maskeqz $a5, $a5, $a6 +; LA32-NEXT: or $a0, $a5, $a0 +; LA32-NEXT: srl.w $a1, $a1, $a2 +; LA32-NEXT: srai.w $a2, $a4, 31 +; LA32-NEXT: and $a1, $a2, $a1 +; LA32-NEXT: or $a1, $a1, $a0 +; LA32-NEXT: move $a0, $a3 +; LA32-NEXT: ret +; +; LA64-LABEL: rotr_64_mask_and_127_and_63: +; LA64: # %bb.0: +; LA64-NEXT: rotr.d $a0, $a0, $a1 +; LA64-NEXT: ret + %a = and i64 %y, 127 + %b = lshr i64 %x, %a + %c = sub i64 0, %y + %d = and i64 %c, 63 + %e = shl i64 %x, %d + %f = or i64 %b, %e + ret i64 %f +} + +define i64 @rotr_64_mask_or_128_or_64(i64 %x, i64 %y) nounwind { +; LA32-LABEL: rotr_64_mask_or_128_or_64: +; LA32: # %bb.0: +; LA32-NEXT: srl.w $a3, $a1, $a2 +; LA32-NEXT: sub.w $a1, $zero, $a2 +; LA32-NEXT: sll.w $a1, $a0, $a1 +; LA32-NEXT: move $a0, $a3 +; LA32-NEXT: ret +; +; LA64-LABEL: rotr_64_mask_or_128_or_64: +; LA64: # %bb.0: +; LA64-NEXT: rotr.d $a0, $a0, $a1 +; LA64-NEXT: ret + %a = or i64 %y, 128 + %b = lshr i64 %x, %a + %c = sub i64 0, %y + %d = or i64 %c, 64 + %e = shl i64 %x, %d + %f = or i64 %b, %e + ret i64 %f +} + +define i32 @rotri_i32(i32 %a) nounwind { +; LA32-LABEL: rotri_i32: +; LA32: # %bb.0: +; LA32-NEXT: rotri.w $a0, $a0, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: rotri_i32: +; LA64: # %bb.0: +; LA64-NEXT: rotri.w $a0, $a0, 16 +; LA64-NEXT: ret + %shl = shl i32 %a, 16 + %shr = lshr i32 %a, 16 + %or = or i32 %shl, %shr + ret i32 %or +} + +define i64 @rotri_i64(i64 %a) nounwind { +; LA32-LABEL: rotri_i64: +; LA32: # %bb.0: +; LA32-NEXT: move $a2, $a0 +; LA32-NEXT: move $a0, $a1 +; LA32-NEXT: move $a1, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: rotri_i64: +; LA64: # %bb.0: +; LA64-NEXT: rotri.d $a0, $a0, 32 +; LA64-NEXT: ret + %shl = shl i64 %a, 32 + %shr = lshr i64 %a, 32 + %or = or i64 %shl, %shr + ret i64 %or +} + +declare i32 @llvm.fshl.i32(i32, i32, i32) +declare i64 @llvm.fshl.i64(i64, i64, i64) +declare i32 @llvm.fshr.i32(i32, i32, i32) +declare i64 @llvm.fshr.i64(i64, i64, i64) + +define signext i32 @rotl_i32_fshl(i32 signext %a) nounwind { +; LA32-LABEL: rotl_i32_fshl: +; LA32: # %bb.0: +; LA32-NEXT: rotri.w $a0, $a0, 20 +; LA32-NEXT: ret +; +; LA64-LABEL: rotl_i32_fshl: +; LA64: # %bb.0: +; LA64-NEXT: rotri.w $a0, $a0, 20 +; LA64-NEXT: ret + %or = tail call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 12) + ret i32 %or +} + +define i64 @rotl_i64_fshl(i64 %a) nounwind { +; LA32-LABEL: rotl_i64_fshl: +; LA32: # %bb.0: +; LA32-NEXT: srli.w $a2, $a1, 20 +; LA32-NEXT: slli.w $a3, $a0, 12 +; LA32-NEXT: or $a2, $a3, $a2 +; LA32-NEXT: srli.w $a0, $a0, 20 +; LA32-NEXT: slli.w $a1, $a1, 12 +; LA32-NEXT: or $a1, $a1, $a0 +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: rotl_i64_fshl: +; LA64: # %bb.0: +; LA64-NEXT: rotri.d $a0, $a0, 52 +; LA64-NEXT: ret + %or = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 12) + ret i64 %or +} + +define signext i32 @rotr_i32_fshr(i32 signext %a) nounwind { +; LA32-LABEL: rotr_i32_fshr: +; LA32: # %bb.0: +; LA32-NEXT: rotri.w $a0, $a0, 12 +; LA32-NEXT: ret +; +; LA64-LABEL: rotr_i32_fshr: +; LA64: # %bb.0: +; LA64-NEXT: slli.d $a1, $a0, 20 +; LA64-NEXT: bstrpick.d $a0, $a0, 31, 12 +; LA64-NEXT: or $a0, $a0, $a1 +; LA64-NEXT: addi.w $a0, $a0, 0 +; LA64-NEXT: ret + %or = tail call i32 @llvm.fshr.i32(i32 %a, i32 %a, i32 12) + ret i32 %or +} + +define i64 @rotr_i64_fshr(i64 %a) nounwind { +; LA32-LABEL: rotr_i64_fshr: +; LA32: # %bb.0: +; LA32-NEXT: srli.w $a2, $a0, 12 +; LA32-NEXT: slli.w $a3, $a1, 20 +; LA32-NEXT: or $a2, $a3, $a2 +; LA32-NEXT: srli.w $a1, $a1, 12 +; LA32-NEXT: slli.w $a0, $a0, 20 +; LA32-NEXT: or $a1, $a0, $a1 +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: rotr_i64_fshr: +; LA64: # %bb.0: +; LA64-NEXT: rotri.d $a0, $a0, 12 +; LA64-NEXT: ret + %or = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 12) + ret i64 %or +} diff --git a/llvm/test/CodeGen/LoongArch/select-to-shiftand.ll b/llvm/test/CodeGen/LoongArch/select-to-shiftand.ll new file mode 100644 index 0000000000000000000000000000000000000000..61fe123ee6aac54e7e5c68f4a5020c646a7da277 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/select-to-shiftand.ll @@ -0,0 +1,234 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefix=LA32 +; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64 + +;; Compare if positive and select variable or zero. +define i8 @pos_sel_variable_and_zero_i8(i8 signext %a, i8 signext %b) { +; LA32-LABEL: pos_sel_variable_and_zero_i8: +; LA32: # %bb.0: +; LA32-NEXT: srai.w $a0, $a0, 7 +; LA32-NEXT: andn $a0, $a1, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: pos_sel_variable_and_zero_i8: +; LA64: # %bb.0: +; LA64-NEXT: srai.d $a0, $a0, 7 +; LA64-NEXT: andn $a0, $a1, $a0 +; LA64-NEXT: ret + %cmp = icmp sgt i8 %a, -1 + %sel = select i1 %cmp, i8 %b, i8 0 + ret i8 %sel +} + +define i16 @pos_sel_variable_and_zero_i16(i16 signext %a, i16 signext %b) { +; LA32-LABEL: pos_sel_variable_and_zero_i16: +; LA32: # %bb.0: +; LA32-NEXT: srai.w $a0, $a0, 15 +; LA32-NEXT: andn $a0, $a1, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: pos_sel_variable_and_zero_i16: +; LA64: # %bb.0: +; LA64-NEXT: srai.d $a0, $a0, 15 +; LA64-NEXT: andn $a0, $a1, $a0 +; LA64-NEXT: ret + %cmp = icmp sgt i16 %a, -1 + %sel = select i1 %cmp, i16 %b, i16 0 + ret i16 %sel +} + +define i32 @pos_sel_variable_and_zero_i32(i32 signext %a, i32 signext %b) { +; LA32-LABEL: pos_sel_variable_and_zero_i32: +; LA32: # %bb.0: +; LA32-NEXT: srai.w $a0, $a0, 31 +; LA32-NEXT: andn $a0, $a1, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: pos_sel_variable_and_zero_i32: +; LA64: # %bb.0: +; LA64-NEXT: srai.d $a0, $a0, 31 +; LA64-NEXT: andn $a0, $a1, $a0 +; LA64-NEXT: ret + %cmp = icmp sgt i32 %a, -1 + %sel = select i1 %cmp, i32 %b, i32 0 + ret i32 %sel +} + +define i64 @pos_sel_variable_and_zero_i64(i64 signext %a, i64 signext %b) { +; LA32-LABEL: pos_sel_variable_and_zero_i64: +; LA32: # %bb.0: +; LA32-NEXT: srai.w $a1, $a1, 31 +; LA32-NEXT: andn $a0, $a2, $a1 +; LA32-NEXT: andn $a1, $a3, $a1 +; LA32-NEXT: ret +; +; LA64-LABEL: pos_sel_variable_and_zero_i64: +; LA64: # %bb.0: +; LA64-NEXT: srai.d $a0, $a0, 63 +; LA64-NEXT: andn $a0, $a1, $a0 +; LA64-NEXT: ret + %cmp = icmp sgt i64 %a, -1 + %sel = select i1 %cmp, i64 %b, i64 0 + ret i64 %sel +} + +;; Compare if not negative or zero and select the same variable as being +;; compared: smax(a, 0). +define i8 @not_neg_not_zero_sel_same_variable_i8(i8 signext %a) { +; LA32-LABEL: not_neg_not_zero_sel_same_variable_i8: +; LA32: # %bb.0: +; LA32-NEXT: srai.w $a1, $a0, 7 +; LA32-NEXT: andn $a0, $a0, $a1 +; LA32-NEXT: ret +; +; LA64-LABEL: not_neg_not_zero_sel_same_variable_i8: +; LA64: # %bb.0: +; LA64-NEXT: srai.d $a1, $a0, 7 +; LA64-NEXT: andn $a0, $a0, $a1 +; LA64-NEXT: ret + %cmp = icmp sgt i8 %a, 0 + %sel = select i1 %cmp, i8 %a, i8 0 + ret i8 %sel +} + +define i16 @not_neg_not_zero_sel_same_variable_i16(i16 signext %a) { +; LA32-LABEL: not_neg_not_zero_sel_same_variable_i16: +; LA32: # %bb.0: +; LA32-NEXT: srai.w $a1, $a0, 15 +; LA32-NEXT: andn $a0, $a0, $a1 +; LA32-NEXT: ret +; +; LA64-LABEL: not_neg_not_zero_sel_same_variable_i16: +; LA64: # %bb.0: +; LA64-NEXT: srai.d $a1, $a0, 15 +; LA64-NEXT: andn $a0, $a0, $a1 +; LA64-NEXT: ret + %cmp = icmp sgt i16 %a, 0 + %sel = select i1 %cmp, i16 %a, i16 0 + ret i16 %sel +} + +define i32 @not_neg_not_zero_sel_same_variable_i32(i32 signext %a) { +; LA32-LABEL: not_neg_not_zero_sel_same_variable_i32: +; LA32: # %bb.0: +; LA32-NEXT: srai.w $a1, $a0, 31 +; LA32-NEXT: andn $a0, $a0, $a1 +; LA32-NEXT: ret +; +; LA64-LABEL: not_neg_not_zero_sel_same_variable_i32: +; LA64: # %bb.0: +; LA64-NEXT: srai.d $a1, $a0, 31 +; LA64-NEXT: andn $a0, $a0, $a1 +; LA64-NEXT: ret + %cmp = icmp sgt i32 %a, 0 + %sel = select i1 %cmp, i32 %a, i32 0 + ret i32 %sel +} + +define i64 @not_neg_not_zero_sel_same_variable_i64(i64 signext %a) { +; LA32-LABEL: not_neg_not_zero_sel_same_variable_i64: +; LA32: # %bb.0: +; LA32-NEXT: srai.w $a2, $a1, 31 +; LA32-NEXT: andn $a0, $a0, $a2 +; LA32-NEXT: andn $a1, $a1, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: not_neg_not_zero_sel_same_variable_i64: +; LA64: # %bb.0: +; LA64-NEXT: srai.d $a1, $a0, 63 +; LA64-NEXT: andn $a0, $a0, $a1 +; LA64-NEXT: ret + %cmp = icmp sgt i64 %a, 0 + %sel = select i1 %cmp, i64 %a, i64 0 + ret i64 %sel +} + +;; ret = (x-y) > 0 ? x-y : 0 +define i8 @sub_clamp_zero_i8(i8 signext %x, i8 signext %y) { +; LA32-LABEL: sub_clamp_zero_i8: +; LA32: # %bb.0: +; LA32-NEXT: sub.w $a0, $a0, $a1 +; LA32-NEXT: ext.w.b $a1, $a0 +; LA32-NEXT: srai.w $a1, $a1, 7 +; LA32-NEXT: andn $a0, $a0, $a1 +; LA32-NEXT: ret +; +; LA64-LABEL: sub_clamp_zero_i8: +; LA64: # %bb.0: +; LA64-NEXT: sub.d $a0, $a0, $a1 +; LA64-NEXT: ext.w.b $a1, $a0 +; LA64-NEXT: srai.d $a1, $a1, 7 +; LA64-NEXT: andn $a0, $a0, $a1 +; LA64-NEXT: ret + %sub = sub nsw i8 %x, %y + %cmp = icmp sgt i8 %sub, 0 + %sel = select i1 %cmp, i8 %sub, i8 0 + ret i8 %sel +} + +define i16 @sub_clamp_zero_i16(i16 signext %x, i16 signext %y) { +; LA32-LABEL: sub_clamp_zero_i16: +; LA32: # %bb.0: +; LA32-NEXT: sub.w $a0, $a0, $a1 +; LA32-NEXT: ext.w.h $a1, $a0 +; LA32-NEXT: srai.w $a1, $a1, 15 +; LA32-NEXT: andn $a0, $a0, $a1 +; LA32-NEXT: ret +; +; LA64-LABEL: sub_clamp_zero_i16: +; LA64: # %bb.0: +; LA64-NEXT: sub.d $a0, $a0, $a1 +; LA64-NEXT: ext.w.h $a1, $a0 +; LA64-NEXT: srai.d $a1, $a1, 15 +; LA64-NEXT: andn $a0, $a0, $a1 +; LA64-NEXT: ret + %sub = sub nsw i16 %x, %y + %cmp = icmp sgt i16 %sub, 0 + %sel = select i1 %cmp, i16 %sub, i16 0 + ret i16 %sel +} + +define i32 @sub_clamp_zero_i32(i32 signext %x, i32 signext %y) { +; LA32-LABEL: sub_clamp_zero_i32: +; LA32: # %bb.0: +; LA32-NEXT: sub.w $a0, $a0, $a1 +; LA32-NEXT: srai.w $a1, $a0, 31 +; LA32-NEXT: andn $a0, $a0, $a1 +; LA32-NEXT: ret +; +; LA64-LABEL: sub_clamp_zero_i32: +; LA64: # %bb.0: +; LA64-NEXT: sub.d $a2, $a0, $a1 +; LA64-NEXT: sub.w $a0, $a0, $a1 +; LA64-NEXT: srai.d $a0, $a0, 31 +; LA64-NEXT: andn $a0, $a2, $a0 +; LA64-NEXT: ret + %sub = sub nsw i32 %x, %y + %cmp = icmp sgt i32 %sub, 0 + %sel = select i1 %cmp, i32 %sub, i32 0 + ret i32 %sel +} + +define i64 @sub_clamp_zero_i64(i64 signext %x, i64 signext %y) { +; LA32-LABEL: sub_clamp_zero_i64: +; LA32: # %bb.0: +; LA32-NEXT: sub.w $a1, $a1, $a3 +; LA32-NEXT: sltu $a3, $a0, $a2 +; LA32-NEXT: sub.w $a1, $a1, $a3 +; LA32-NEXT: sub.w $a0, $a0, $a2 +; LA32-NEXT: srai.w $a2, $a1, 31 +; LA32-NEXT: andn $a1, $a1, $a2 +; LA32-NEXT: andn $a0, $a0, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: sub_clamp_zero_i64: +; LA64: # %bb.0: +; LA64-NEXT: sub.d $a0, $a0, $a1 +; LA64-NEXT: srai.d $a1, $a0, 63 +; LA64-NEXT: andn $a0, $a0, $a1 +; LA64-NEXT: ret + %sub = sub nsw i64 %x, %y + %cmp = icmp sgt i64 %sub, 0 + %sel = select i1 %cmp, i64 %sub, i64 0 + ret i64 %sel +} diff --git a/llvm/test/CodeGen/LoongArch/shift-masked-shamt.ll b/llvm/test/CodeGen/LoongArch/shift-masked-shamt.ll index 1878e0ed2424052bdb8aaf2c0bd9821d20c16dbf..e151624d908c2ca8af72e4cf55def12424eef029 100644 --- a/llvm/test/CodeGen/LoongArch/shift-masked-shamt.ll +++ b/llvm/test/CodeGen/LoongArch/shift-masked-shamt.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefix=LA32 ; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64 @@ -9,12 +10,12 @@ define i32 @sll_redundant_mask(i32 %a, i32 %b) { ; LA32-LABEL: sll_redundant_mask: ; LA32: # %bb.0: ; LA32-NEXT: sll.w $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: sll_redundant_mask: ; LA64: # %bb.0: ; LA64-NEXT: sll.w $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %1 = and i32 %b, 31 %2 = shl i32 %a, %1 ret i32 %2 @@ -25,13 +26,13 @@ define i32 @sll_non_redundant_mask(i32 %a, i32 %b) { ; LA32: # %bb.0: ; LA32-NEXT: andi $a1, $a1, 15 ; LA32-NEXT: sll.w $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: sll_non_redundant_mask: ; LA64: # %bb.0: ; LA64-NEXT: andi $a1, $a1, 15 ; LA64-NEXT: sll.w $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %1 = and i32 %b, 15 %2 = shl i32 %a, %1 ret i32 %2 @@ -41,12 +42,12 @@ define i32 @srl_redundant_mask(i32 %a, i32 %b) { ; LA32-LABEL: srl_redundant_mask: ; LA32: # %bb.0: ; LA32-NEXT: srl.w $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: srl_redundant_mask: ; LA64: # %bb.0: ; LA64-NEXT: srl.w $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %1 = and i32 %b, 4095 %2 = lshr i32 %a, %1 ret i32 %2 @@ -57,13 +58,13 @@ define i32 @srl_non_redundant_mask(i32 %a, i32 %b) { ; LA32: # %bb.0: ; LA32-NEXT: andi $a1, $a1, 7 ; LA32-NEXT: srl.w $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: srl_non_redundant_mask: ; LA64: # %bb.0: ; LA64-NEXT: andi $a1, $a1, 7 ; LA64-NEXT: srl.w $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %1 = and i32 %b, 7 %2 = lshr i32 %a, %1 ret i32 %2 @@ -73,12 +74,12 @@ define i32 @sra_redundant_mask(i32 %a, i32 %b) { ; LA32-LABEL: sra_redundant_mask: ; LA32: # %bb.0: ; LA32-NEXT: sra.w $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: sra_redundant_mask: ; LA64: # %bb.0: ; LA64-NEXT: sra.w $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %1 = and i32 %b, 65535 %2 = ashr i32 %a, %1 ret i32 %2 @@ -89,13 +90,13 @@ define i32 @sra_non_redundant_mask(i32 %a, i32 %b) { ; LA32: # %bb.0: ; LA32-NEXT: andi $a1, $a1, 32 ; LA32-NEXT: sra.w $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: sra_non_redundant_mask: ; LA64: # %bb.0: ; LA64-NEXT: andi $a1, $a1, 32 ; LA64-NEXT: sra.w $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %1 = and i32 %b, 32 %2 = ashr i32 %a, %1 ret i32 %2 @@ -106,13 +107,13 @@ define i32 @sll_redundant_mask_zeros(i32 %a, i32 %b) { ; LA32: # %bb.0: ; LA32-NEXT: slli.w $a1, $a1, 1 ; LA32-NEXT: sll.w $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: sll_redundant_mask_zeros: ; LA64: # %bb.0: ; LA64-NEXT: slli.d $a1, $a1, 1 ; LA64-NEXT: sll.w $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %1 = shl i32 %b, 1 %2 = and i32 %1, 30 %3 = shl i32 %a, %2 @@ -124,13 +125,13 @@ define i32 @srl_redundant_mask_zeros(i32 %a, i32 %b) { ; LA32: # %bb.0: ; LA32-NEXT: slli.w $a1, $a1, 2 ; LA32-NEXT: srl.w $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: srl_redundant_mask_zeros: ; LA64: # %bb.0: ; LA64-NEXT: slli.d $a1, $a1, 2 ; LA64-NEXT: srl.w $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %1 = shl i32 %b, 2 %2 = and i32 %1, 28 %3 = lshr i32 %a, %2 @@ -142,13 +143,13 @@ define i32 @sra_redundant_mask_zeros(i32 %a, i32 %b) { ; LA32: # %bb.0: ; LA32-NEXT: slli.w $a1, $a1, 3 ; LA32-NEXT: sra.w $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: sra_redundant_mask_zeros: ; LA64: # %bb.0: ; LA64-NEXT: slli.d $a1, $a1, 3 ; LA64-NEXT: sra.w $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %1 = shl i32 %b, 3 %2 = and i32 %1, 24 %3 = ashr i32 %a, %2 @@ -174,13 +175,13 @@ define i64 @sll_redundant_mask_zeros_i64(i64 %a, i64 %b) { ; LA32-NEXT: sll.w $a0, $a0, $a2 ; LA32-NEXT: srai.w $a2, $a3, 31 ; LA32-NEXT: and $a0, $a2, $a0 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: sll_redundant_mask_zeros_i64: ; LA64: # %bb.0: ; LA64-NEXT: slli.d $a1, $a1, 2 ; LA64-NEXT: sll.d $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %1 = shl i64 %b, 2 %2 = and i64 %1, 60 %3 = shl i64 %a, %2 @@ -206,13 +207,13 @@ define i64 @srl_redundant_mask_zeros_i64(i64 %a, i64 %b) { ; LA32-NEXT: srl.w $a1, $a1, $a2 ; LA32-NEXT: srai.w $a2, $a3, 31 ; LA32-NEXT: and $a1, $a2, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: srl_redundant_mask_zeros_i64: ; LA64: # %bb.0: ; LA64-NEXT: slli.d $a1, $a1, 3 ; LA64-NEXT: srl.d $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %1 = shl i64 %b, 3 %2 = and i64 %1, 56 %3 = lshr i64 %a, %2 @@ -241,13 +242,13 @@ define i64 @sra_redundant_mask_zeros_i64(i64 %a, i64 %b) { ; LA32-NEXT: masknez $a1, $a1, $a6 ; LA32-NEXT: or $a0, $a0, $a1 ; LA32-NEXT: move $a1, $a2 -; LA32-NEXT: jirl $zero, $ra, 0 +; LA32-NEXT: ret ; ; LA64-LABEL: sra_redundant_mask_zeros_i64: ; LA64: # %bb.0: ; LA64-NEXT: slli.d $a1, $a1, 4 ; LA64-NEXT: sra.d $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: ret %1 = shl i64 %b, 4 %2 = and i64 %1, 48 %3 = ashr i64 %a, %2 diff --git a/llvm/test/CodeGen/LoongArch/soft-fp-to-int.ll b/llvm/test/CodeGen/LoongArch/soft-fp-to-int.ll new file mode 100644 index 0000000000000000000000000000000000000000..d12cbaaabb98634f76506efc98b2ec20dde5a4e4 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/soft-fp-to-int.ll @@ -0,0 +1,155 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefix=LA32 +; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64 + +define i32 @fptosi_i32_fp128(fp128 %X) nounwind { +; LA32-LABEL: fptosi_i32_fp128: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -32 +; LA32-NEXT: st.w $ra, $sp, 28 # 4-byte Folded Spill +; LA32-NEXT: ld.w $a1, $a0, 12 +; LA32-NEXT: st.w $a1, $sp, 20 +; LA32-NEXT: ld.w $a1, $a0, 8 +; LA32-NEXT: st.w $a1, $sp, 16 +; LA32-NEXT: ld.w $a1, $a0, 4 +; LA32-NEXT: st.w $a1, $sp, 12 +; LA32-NEXT: ld.w $a0, $a0, 0 +; LA32-NEXT: st.w $a0, $sp, 8 +; LA32-NEXT: addi.w $a0, $sp, 8 +; LA32-NEXT: bl %plt(__fixtfsi) +; LA32-NEXT: ld.w $ra, $sp, 28 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 32 +; LA32-NEXT: ret +; +; LA64-LABEL: fptosi_i32_fp128: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: bl %plt(__fixtfsi) +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret + %tmp = fptosi fp128 %X to i32 + ret i32 %tmp +} + +define i32 @fptosi_i32_double(double %X) nounwind { +; LA32-LABEL: fptosi_i32_double: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: bl %plt(__fixdfsi) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: fptosi_i32_double: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: bl %plt(__fixdfsi) +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret + %tmp = fptosi double %X to i32 + ret i32 %tmp +} + +define i32 @fptosi_i32_float(float %X) nounwind { +; LA32-LABEL: fptosi_i32_float: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: bl %plt(__fixsfsi) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: fptosi_i32_float: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0 +; LA64-NEXT: bl %plt(__fixsfsi) +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret + %tmp = fptosi float %X to i32 + ret i32 %tmp +} + +define i64 @fptosi_i64_fp128(fp128 %X) nounwind { +; LA32-LABEL: fptosi_i64_fp128: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -32 +; LA32-NEXT: st.w $ra, $sp, 28 # 4-byte Folded Spill +; LA32-NEXT: ld.w $a1, $a0, 12 +; LA32-NEXT: st.w $a1, $sp, 12 +; LA32-NEXT: ld.w $a1, $a0, 8 +; LA32-NEXT: st.w $a1, $sp, 8 +; LA32-NEXT: ld.w $a1, $a0, 4 +; LA32-NEXT: st.w $a1, $sp, 4 +; LA32-NEXT: ld.w $a0, $a0, 0 +; LA32-NEXT: st.w $a0, $sp, 0 +; LA32-NEXT: addi.w $a0, $sp, 0 +; LA32-NEXT: bl %plt(__fixtfdi) +; LA32-NEXT: ld.w $ra, $sp, 28 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 32 +; LA32-NEXT: ret +; +; LA64-LABEL: fptosi_i64_fp128: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: bl %plt(__fixtfdi) +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret + %tmp = fptosi fp128 %X to i64 + ret i64 %tmp +} + +define i64 @fptosi_i64_double(double %X) nounwind { +; LA32-LABEL: fptosi_i64_double: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: bl %plt(__fixdfdi) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: fptosi_i64_double: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: bl %plt(__fixdfdi) +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret + %tmp = fptosi double %X to i64 + ret i64 %tmp +} + +define i64 @fptosi_i64_float(float %X) nounwind { +; LA32-LABEL: fptosi_i64_float: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: bl %plt(__fixsfdi) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: fptosi_i64_float: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0 +; LA64-NEXT: bl %plt(__fixsfdi) +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret + %tmp = fptosi float %X to i64 + ret i64 %tmp +} diff --git a/llvm/test/CodeGen/LoongArch/spill-ra-without-kill.ll b/llvm/test/CodeGen/LoongArch/spill-ra-without-kill.ll new file mode 100644 index 0000000000000000000000000000000000000000..d800befef0ba7fa91d3e05699989ec0d66bc6d6b --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/spill-ra-without-kill.ll @@ -0,0 +1,102 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -O0 --mtriple=loongarch64 --verify-machineinstrs < %s | FileCheck %s + +;; This test case is reduced from pr17377.c of the GCC C Torture Suite using +;; bugpoint. + +@calls = external dso_local global i32, align 4 +declare ptr @llvm.returnaddress(i32 immarg) + +define dso_local ptr @f(i32 noundef signext %i) "frame-pointer"="all" { +; CHECK-LABEL: f: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -48 +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1, -8 +; CHECK-NEXT: .cfi_offset 22, -16 +; CHECK-NEXT: addi.d $fp, $sp, 48 +; CHECK-NEXT: .cfi_def_cfa 22, 0 +; CHECK-NEXT: st.d $ra, $fp, -40 # 8-byte Folded Spill +; CHECK-NEXT: move $a1, $a0 +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(calls) +; CHECK-NEXT: addi.d $a3, $a0, %pc_lo12(calls) +; CHECK-NEXT: ld.wu $a0, $a3, 0 +; CHECK-NEXT: addi.d $a2, $a0, 1 +; CHECK-NEXT: st.w $a2, $a3, 0 +; CHECK-NEXT: st.w $a1, $fp, -28 +; CHECK-NEXT: bnez $a0, .LBB0_2 +; CHECK-NEXT: b .LBB0_1 +; CHECK-NEXT: .LBB0_1: # %if.then +; CHECK-NEXT: ld.d $a0, $fp, -40 # 8-byte Folded Reload +; CHECK-NEXT: st.d $a0, $fp, -24 +; CHECK-NEXT: b .LBB0_7 +; CHECK-NEXT: .LBB0_2: # %if.end +; CHECK-NEXT: ld.wu $a0, $fp, -28 +; CHECK-NEXT: st.d $a0, $fp, -48 # 8-byte Folded Spill +; CHECK-NEXT: beqz $a0, .LBB0_5 +; CHECK-NEXT: b .LBB0_3 +; CHECK-NEXT: .LBB0_3: # %if.end +; CHECK-NEXT: ld.d $a0, $fp, -48 # 8-byte Folded Reload +; CHECK-NEXT: ori $a1, $zero, 1 +; CHECK-NEXT: bne $a0, $a1, .LBB0_6 +; CHECK-NEXT: b .LBB0_4 +; CHECK-NEXT: .LBB0_4: # %sw.bb +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(f) +; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(f) +; CHECK-NEXT: st.d $a0, $fp, -24 +; CHECK-NEXT: b .LBB0_7 +; CHECK-NEXT: .LBB0_5: # %sw.bb1 +; CHECK-NEXT: ld.d $a0, $fp, -40 # 8-byte Folded Reload +; CHECK-NEXT: st.d $a0, $fp, -24 +; CHECK-NEXT: b .LBB0_7 +; CHECK-NEXT: .LBB0_6: # %sw.epilog +; CHECK-NEXT: move $a0, $zero +; CHECK-NEXT: st.d $a0, $fp, -24 +; CHECK-NEXT: b .LBB0_7 +; CHECK-NEXT: .LBB0_7: # %return +; CHECK-NEXT: ld.d $a0, $fp, -24 +; CHECK-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 48 +; CHECK-NEXT: ret +entry: + %retval = alloca ptr, align 8 + %i.addr = alloca i32, align 4 + store i32 %i, ptr %i.addr, align 4 + %0 = load i32, ptr @calls, align 4 + %inc = add nsw i32 %0, 1 + store i32 %inc, ptr @calls, align 4 + %cmp = icmp eq i32 %0, 0 + br i1 %cmp, label %if.then, label %if.end + +if.then: + %1 = call ptr @llvm.returnaddress(i32 0) + store ptr %1, ptr %retval, align 8 + br label %return + +if.end: + %2 = load i32, ptr %i.addr, align 4 + switch i32 %2, label %sw.epilog [ + i32 1, label %sw.bb + i32 0, label %sw.bb1 + ] + +sw.bb: + store ptr @f, ptr %retval, align 8 + br label %return + +sw.bb1: + %3 = call ptr @llvm.returnaddress(i32 0) + store ptr %3, ptr %retval, align 8 + br label %return + +sw.epilog: + store ptr null, ptr %retval, align 8 + br label %return + +return: + %4 = load ptr, ptr %retval, align 8 + ret ptr %4 +} diff --git a/llvm/test/CodeGen/LoongArch/spill-reload-cfr.ll b/llvm/test/CodeGen/LoongArch/spill-reload-cfr.ll new file mode 100644 index 0000000000000000000000000000000000000000..63407ad003f694f87a45e67a01162b6463628935 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/spill-reload-cfr.ll @@ -0,0 +1,89 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+d < %s | FileCheck %s --check-prefix=LA32 +; RUN: llc --mtriple=loongarch64 --mattr=+d < %s | FileCheck %s --check-prefix=LA64 + +;; Check the $fcc* register is spilled before funtion call and then reloaded. +declare void @foo() + +define i1 @load_store_fcc_reg(float %a, i1 %c) { +; LA32-LABEL: load_store_fcc_reg: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -32 +; LA32-NEXT: .cfi_def_cfa_offset 32 +; LA32-NEXT: st.w $ra, $sp, 28 # 4-byte Folded Spill +; LA32-NEXT: st.w $fp, $sp, 24 # 4-byte Folded Spill +; LA32-NEXT: fst.d $fs0, $sp, 16 # 8-byte Folded Spill +; LA32-NEXT: fst.d $fs1, $sp, 8 # 8-byte Folded Spill +; LA32-NEXT: .cfi_offset 1, -4 +; LA32-NEXT: .cfi_offset 22, -8 +; LA32-NEXT: .cfi_offset 56, -16 +; LA32-NEXT: .cfi_offset 57, -24 +; LA32-NEXT: move $fp, $a0 +; LA32-NEXT: fmov.s $fs0, $fa0 +; LA32-NEXT: movgr2fr.w $fs1, $zero +; LA32-NEXT: fcmp.cult.s $fcc0, $fs1, $fa0 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: st.w $a0, $sp, 4 +; LA32-NEXT: bl %plt(foo) +; LA32-NEXT: ld.w $a0, $sp, 4 +; LA32-NEXT: movgr2cf $fcc0, $a0 +; LA32-NEXT: bcnez $fcc0, .LBB0_2 +; LA32-NEXT: # %bb.1: # %if.then +; LA32-NEXT: move $a0, $fp +; LA32-NEXT: b .LBB0_3 +; LA32-NEXT: .LBB0_2: # %if.else +; LA32-NEXT: fcmp.cle.s $fcc0, $fs0, $fs1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: .LBB0_3: # %if.then +; LA32-NEXT: fld.d $fs1, $sp, 8 # 8-byte Folded Reload +; LA32-NEXT: fld.d $fs0, $sp, 16 # 8-byte Folded Reload +; LA32-NEXT: ld.w $fp, $sp, 24 # 4-byte Folded Reload +; LA32-NEXT: ld.w $ra, $sp, 28 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 32 +; LA32-NEXT: ret +; +; LA64-LABEL: load_store_fcc_reg: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -48 +; LA64-NEXT: .cfi_def_cfa_offset 48 +; LA64-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill +; LA64-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill +; LA64-NEXT: fst.d $fs0, $sp, 24 # 8-byte Folded Spill +; LA64-NEXT: fst.d $fs1, $sp, 16 # 8-byte Folded Spill +; LA64-NEXT: .cfi_offset 1, -8 +; LA64-NEXT: .cfi_offset 22, -16 +; LA64-NEXT: .cfi_offset 56, -24 +; LA64-NEXT: .cfi_offset 57, -32 +; LA64-NEXT: move $fp, $a0 +; LA64-NEXT: fmov.s $fs0, $fa0 +; LA64-NEXT: movgr2fr.w $fs1, $zero +; LA64-NEXT: fcmp.cult.s $fcc0, $fs1, $fa0 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: st.d $a0, $sp, 8 +; LA64-NEXT: bl %plt(foo) +; LA64-NEXT: ld.d $a0, $sp, 8 +; LA64-NEXT: movgr2cf $fcc0, $a0 +; LA64-NEXT: bcnez $fcc0, .LBB0_2 +; LA64-NEXT: # %bb.1: # %if.then +; LA64-NEXT: move $a0, $fp +; LA64-NEXT: b .LBB0_3 +; LA64-NEXT: .LBB0_2: # %if.else +; LA64-NEXT: fcmp.cle.s $fcc0, $fs0, $fs1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: .LBB0_3: # %if.then +; LA64-NEXT: fld.d $fs1, $sp, 16 # 8-byte Folded Reload +; LA64-NEXT: fld.d $fs0, $sp, 24 # 8-byte Folded Reload +; LA64-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload +; LA64-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 48 +; LA64-NEXT: ret + %cmp = fcmp ole float %a, 0.000000e+00 + call void @foo() + br i1 %cmp, label %if.then, label %if.else + +if.then: + ret i1 %c + +if.else: + ret i1 %cmp +} diff --git a/llvm/test/CodeGen/LoongArch/split-sp-adjust.ll b/llvm/test/CodeGen/LoongArch/split-sp-adjust.ll new file mode 100644 index 0000000000000000000000000000000000000000..8217336637da28bd614471284d8555967817d138 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/split-sp-adjust.ll @@ -0,0 +1,46 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --verify-machineinstrs < %s \ +; RUN: | FileCheck %s + +;; The stack size is 2048 and the SP adjustment will be split. +define i32 @SplitSP() nounwind { +; CHECK-LABEL: SplitSP: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -2032 +; CHECK-NEXT: st.d $ra, $sp, 2024 # 8-byte Folded Spill +; CHECK-NEXT: addi.d $sp, $sp, -16 +; CHECK-NEXT: addi.d $a0, $sp, 12 +; CHECK-NEXT: bl %plt(foo) +; CHECK-NEXT: move $a0, $zero +; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: ld.d $ra, $sp, 2024 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 2032 +; CHECK-NEXT: ret +entry: + %xx = alloca [2028 x i8], align 1 + %0 = getelementptr inbounds [2028 x i8], ptr %xx, i32 0, i32 0 + %call = call i32 @foo(ptr nonnull %0) + ret i32 0 +} + +;; The stack size is 2032 and the SP adjustment will not be split. +;; 2016 + 8(RA) + 8(emergency spill slot) = 2032 +define i32 @NoSplitSP() nounwind { +; CHECK-LABEL: NoSplitSP: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -2032 +; CHECK-NEXT: st.d $ra, $sp, 2024 # 8-byte Folded Spill +; CHECK-NEXT: addi.d $a0, $sp, 8 +; CHECK-NEXT: bl %plt(foo) +; CHECK-NEXT: move $a0, $zero +; CHECK-NEXT: ld.d $ra, $sp, 2024 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 2032 +; CHECK-NEXT: ret +entry: + %xx = alloca [2016 x i8], align 1 + %0 = getelementptr inbounds [2024 x i8], ptr %xx, i32 0, i32 0 + %call = call i32 @foo(ptr nonnull %0) + ret i32 0 +} + +declare i32 @foo(ptr) diff --git a/llvm/test/CodeGen/LoongArch/stack-realignment-with-variable-sized-objects.ll b/llvm/test/CodeGen/LoongArch/stack-realignment-with-variable-sized-objects.ll new file mode 100644 index 0000000000000000000000000000000000000000..667a7aff8b9b7ce528b098bcb57886127a89cb3e --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/stack-realignment-with-variable-sized-objects.ll @@ -0,0 +1,73 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefix=LA32 +; RUN: llc --mtriple=loongarch64 --verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefix=LA64 + +declare void @callee(ptr, ptr) + +define void @caller(i32 %n) { +; LA32-LABEL: caller: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -64 +; LA32-NEXT: .cfi_def_cfa_offset 64 +; LA32-NEXT: st.w $ra, $sp, 60 # 4-byte Folded Spill +; LA32-NEXT: st.w $fp, $sp, 56 # 4-byte Folded Spill +; LA32-NEXT: st.w $s8, $sp, 52 # 4-byte Folded Spill +; LA32-NEXT: .cfi_offset 1, -4 +; LA32-NEXT: .cfi_offset 22, -8 +; LA32-NEXT: .cfi_offset 31, -12 +; LA32-NEXT: addi.w $fp, $sp, 64 +; LA32-NEXT: .cfi_def_cfa 22, 0 +; LA32-NEXT: srli.w $a1, $sp, 6 +; LA32-NEXT: slli.w $sp, $a1, 6 +; LA32-NEXT: move $s8, $sp +; LA32-NEXT: addi.w $a0, $a0, 15 +; LA32-NEXT: addi.w $a1, $zero, -16 +; LA32-NEXT: and $a0, $a0, $a1 +; LA32-NEXT: sub.w $a0, $sp, $a0 +; LA32-NEXT: move $sp, $a0 +; LA32-NEXT: addi.w $a1, $s8, 0 +; LA32-NEXT: bl %plt(callee) +; LA32-NEXT: addi.w $sp, $fp, -64 +; LA32-NEXT: ld.w $s8, $sp, 52 # 4-byte Folded Reload +; LA32-NEXT: ld.w $fp, $sp, 56 # 4-byte Folded Reload +; LA32-NEXT: ld.w $ra, $sp, 60 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 64 +; LA32-NEXT: ret +; +; LA64-LABEL: caller: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -64 +; LA64-NEXT: .cfi_def_cfa_offset 64 +; LA64-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; LA64-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; LA64-NEXT: st.d $s8, $sp, 40 # 8-byte Folded Spill +; LA64-NEXT: .cfi_offset 1, -8 +; LA64-NEXT: .cfi_offset 22, -16 +; LA64-NEXT: .cfi_offset 31, -24 +; LA64-NEXT: addi.d $fp, $sp, 64 +; LA64-NEXT: .cfi_def_cfa 22, 0 +; LA64-NEXT: srli.d $a1, $sp, 6 +; LA64-NEXT: slli.d $sp, $a1, 6 +; LA64-NEXT: move $s8, $sp +; LA64-NEXT: addi.w $a1, $zero, -16 +; LA64-NEXT: lu32i.d $a1, 1 +; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0 +; LA64-NEXT: addi.d $a0, $a0, 15 +; LA64-NEXT: and $a0, $a0, $a1 +; LA64-NEXT: sub.d $a0, $sp, $a0 +; LA64-NEXT: move $sp, $a0 +; LA64-NEXT: addi.d $a1, $s8, 0 +; LA64-NEXT: bl %plt(callee) +; LA64-NEXT: addi.d $sp, $fp, -64 +; LA64-NEXT: ld.d $s8, $sp, 40 # 8-byte Folded Reload +; LA64-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; LA64-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 64 +; LA64-NEXT: ret + %1 = alloca i8, i32 %n + %2 = alloca i32, align 64 + call void @callee(ptr %1, ptr %2) + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/stack-realignment.ll b/llvm/test/CodeGen/LoongArch/stack-realignment.ll new file mode 100644 index 0000000000000000000000000000000000000000..a9f3fc4e2a0e5002ab1c85d100b0f676cfc2c4af --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/stack-realignment.ll @@ -0,0 +1,631 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefix=LA32 +; RUN: llc --mtriple=loongarch64 --verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefix=LA64 + +declare void @callee(ptr) + +define void @caller32() { +; LA32-LABEL: caller32: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -32 +; LA32-NEXT: .cfi_def_cfa_offset 32 +; LA32-NEXT: st.w $ra, $sp, 28 # 4-byte Folded Spill +; LA32-NEXT: st.w $fp, $sp, 24 # 4-byte Folded Spill +; LA32-NEXT: .cfi_offset 1, -4 +; LA32-NEXT: .cfi_offset 22, -8 +; LA32-NEXT: addi.w $fp, $sp, 32 +; LA32-NEXT: .cfi_def_cfa 22, 0 +; LA32-NEXT: srli.w $a0, $sp, 5 +; LA32-NEXT: slli.w $sp, $a0, 5 +; LA32-NEXT: addi.w $a0, $sp, 0 +; LA32-NEXT: bl %plt(callee) +; LA32-NEXT: addi.w $sp, $fp, -32 +; LA32-NEXT: ld.w $fp, $sp, 24 # 4-byte Folded Reload +; LA32-NEXT: ld.w $ra, $sp, 28 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 32 +; LA32-NEXT: ret +; +; LA64-LABEL: caller32: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -32 +; LA64-NEXT: .cfi_def_cfa_offset 32 +; LA64-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill +; LA64-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill +; LA64-NEXT: .cfi_offset 1, -8 +; LA64-NEXT: .cfi_offset 22, -16 +; LA64-NEXT: addi.d $fp, $sp, 32 +; LA64-NEXT: .cfi_def_cfa 22, 0 +; LA64-NEXT: srli.d $a0, $sp, 5 +; LA64-NEXT: slli.d $sp, $a0, 5 +; LA64-NEXT: addi.d $a0, $sp, 0 +; LA64-NEXT: bl %plt(callee) +; LA64-NEXT: addi.d $sp, $fp, -32 +; LA64-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload +; LA64-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 32 +; LA64-NEXT: ret + %1 = alloca i8, align 32 + call void @callee(ptr %1) + ret void +} + +define void @caller_no_realign32() "no-realign-stack" { +; LA32-LABEL: caller_no_realign32: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: .cfi_def_cfa_offset 16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: .cfi_offset 1, -4 +; LA32-NEXT: addi.w $a0, $sp, 0 +; LA32-NEXT: bl %plt(callee) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: caller_no_realign32: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: .cfi_def_cfa_offset 16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: .cfi_offset 1, -8 +; LA64-NEXT: addi.d $a0, $sp, 0 +; LA64-NEXT: bl %plt(callee) +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret + %1 = alloca i8, align 32 + call void @callee(ptr %1) + ret void +} + +define void @caller64() { +; LA32-LABEL: caller64: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -64 +; LA32-NEXT: .cfi_def_cfa_offset 64 +; LA32-NEXT: st.w $ra, $sp, 60 # 4-byte Folded Spill +; LA32-NEXT: st.w $fp, $sp, 56 # 4-byte Folded Spill +; LA32-NEXT: .cfi_offset 1, -4 +; LA32-NEXT: .cfi_offset 22, -8 +; LA32-NEXT: addi.w $fp, $sp, 64 +; LA32-NEXT: .cfi_def_cfa 22, 0 +; LA32-NEXT: srli.w $a0, $sp, 6 +; LA32-NEXT: slli.w $sp, $a0, 6 +; LA32-NEXT: addi.w $a0, $sp, 0 +; LA32-NEXT: bl %plt(callee) +; LA32-NEXT: addi.w $sp, $fp, -64 +; LA32-NEXT: ld.w $fp, $sp, 56 # 4-byte Folded Reload +; LA32-NEXT: ld.w $ra, $sp, 60 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 64 +; LA32-NEXT: ret +; +; LA64-LABEL: caller64: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -64 +; LA64-NEXT: .cfi_def_cfa_offset 64 +; LA64-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; LA64-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; LA64-NEXT: .cfi_offset 1, -8 +; LA64-NEXT: .cfi_offset 22, -16 +; LA64-NEXT: addi.d $fp, $sp, 64 +; LA64-NEXT: .cfi_def_cfa 22, 0 +; LA64-NEXT: srli.d $a0, $sp, 6 +; LA64-NEXT: slli.d $sp, $a0, 6 +; LA64-NEXT: addi.d $a0, $sp, 0 +; LA64-NEXT: bl %plt(callee) +; LA64-NEXT: addi.d $sp, $fp, -64 +; LA64-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; LA64-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 64 +; LA64-NEXT: ret + %1 = alloca i8, align 64 + call void @callee(ptr %1) + ret void +} + +define void @caller_no_realign64() "no-realign-stack" { +; LA32-LABEL: caller_no_realign64: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: .cfi_def_cfa_offset 16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: .cfi_offset 1, -4 +; LA32-NEXT: addi.w $a0, $sp, 0 +; LA32-NEXT: bl %plt(callee) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: caller_no_realign64: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: .cfi_def_cfa_offset 16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: .cfi_offset 1, -8 +; LA64-NEXT: addi.d $a0, $sp, 0 +; LA64-NEXT: bl %plt(callee) +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret + %1 = alloca i8, align 64 + call void @callee(ptr %1) + ret void +} + +define void @caller128() { +; LA32-LABEL: caller128: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -128 +; LA32-NEXT: .cfi_def_cfa_offset 128 +; LA32-NEXT: st.w $ra, $sp, 124 # 4-byte Folded Spill +; LA32-NEXT: st.w $fp, $sp, 120 # 4-byte Folded Spill +; LA32-NEXT: .cfi_offset 1, -4 +; LA32-NEXT: .cfi_offset 22, -8 +; LA32-NEXT: addi.w $fp, $sp, 128 +; LA32-NEXT: .cfi_def_cfa 22, 0 +; LA32-NEXT: srli.w $a0, $sp, 7 +; LA32-NEXT: slli.w $sp, $a0, 7 +; LA32-NEXT: addi.w $a0, $sp, 0 +; LA32-NEXT: bl %plt(callee) +; LA32-NEXT: addi.w $sp, $fp, -128 +; LA32-NEXT: ld.w $fp, $sp, 120 # 4-byte Folded Reload +; LA32-NEXT: ld.w $ra, $sp, 124 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 128 +; LA32-NEXT: ret +; +; LA64-LABEL: caller128: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -128 +; LA64-NEXT: .cfi_def_cfa_offset 128 +; LA64-NEXT: st.d $ra, $sp, 120 # 8-byte Folded Spill +; LA64-NEXT: st.d $fp, $sp, 112 # 8-byte Folded Spill +; LA64-NEXT: .cfi_offset 1, -8 +; LA64-NEXT: .cfi_offset 22, -16 +; LA64-NEXT: addi.d $fp, $sp, 128 +; LA64-NEXT: .cfi_def_cfa 22, 0 +; LA64-NEXT: srli.d $a0, $sp, 7 +; LA64-NEXT: slli.d $sp, $a0, 7 +; LA64-NEXT: addi.d $a0, $sp, 0 +; LA64-NEXT: bl %plt(callee) +; LA64-NEXT: addi.d $sp, $fp, -128 +; LA64-NEXT: ld.d $fp, $sp, 112 # 8-byte Folded Reload +; LA64-NEXT: ld.d $ra, $sp, 120 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 128 +; LA64-NEXT: ret + %1 = alloca i8, align 128 + call void @callee(ptr %1) + ret void +} + +define void @caller_no_realign128() "no-realign-stack" { +; LA32-LABEL: caller_no_realign128: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: .cfi_def_cfa_offset 16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: .cfi_offset 1, -4 +; LA32-NEXT: addi.w $a0, $sp, 0 +; LA32-NEXT: bl %plt(callee) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: caller_no_realign128: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: .cfi_def_cfa_offset 16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: .cfi_offset 1, -8 +; LA64-NEXT: addi.d $a0, $sp, 0 +; LA64-NEXT: bl %plt(callee) +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret + %1 = alloca i8, align 128 + call void @callee(ptr %1) + ret void +} + +define void @caller256() { +; LA32-LABEL: caller256: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -256 +; LA32-NEXT: .cfi_def_cfa_offset 256 +; LA32-NEXT: st.w $ra, $sp, 252 # 4-byte Folded Spill +; LA32-NEXT: st.w $fp, $sp, 248 # 4-byte Folded Spill +; LA32-NEXT: .cfi_offset 1, -4 +; LA32-NEXT: .cfi_offset 22, -8 +; LA32-NEXT: addi.w $fp, $sp, 256 +; LA32-NEXT: .cfi_def_cfa 22, 0 +; LA32-NEXT: srli.w $a0, $sp, 8 +; LA32-NEXT: slli.w $sp, $a0, 8 +; LA32-NEXT: addi.w $a0, $sp, 0 +; LA32-NEXT: bl %plt(callee) +; LA32-NEXT: addi.w $sp, $fp, -256 +; LA32-NEXT: ld.w $fp, $sp, 248 # 4-byte Folded Reload +; LA32-NEXT: ld.w $ra, $sp, 252 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 256 +; LA32-NEXT: ret +; +; LA64-LABEL: caller256: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -256 +; LA64-NEXT: .cfi_def_cfa_offset 256 +; LA64-NEXT: st.d $ra, $sp, 248 # 8-byte Folded Spill +; LA64-NEXT: st.d $fp, $sp, 240 # 8-byte Folded Spill +; LA64-NEXT: .cfi_offset 1, -8 +; LA64-NEXT: .cfi_offset 22, -16 +; LA64-NEXT: addi.d $fp, $sp, 256 +; LA64-NEXT: .cfi_def_cfa 22, 0 +; LA64-NEXT: srli.d $a0, $sp, 8 +; LA64-NEXT: slli.d $sp, $a0, 8 +; LA64-NEXT: addi.d $a0, $sp, 0 +; LA64-NEXT: bl %plt(callee) +; LA64-NEXT: addi.d $sp, $fp, -256 +; LA64-NEXT: ld.d $fp, $sp, 240 # 8-byte Folded Reload +; LA64-NEXT: ld.d $ra, $sp, 248 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 256 +; LA64-NEXT: ret + %1 = alloca i8, align 256 + call void @callee(ptr %1) + ret void +} + +define void @caller_no_realign256() "no-realign-stack" { +; LA32-LABEL: caller_no_realign256: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: .cfi_def_cfa_offset 16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: .cfi_offset 1, -4 +; LA32-NEXT: addi.w $a0, $sp, 0 +; LA32-NEXT: bl %plt(callee) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: caller_no_realign256: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: .cfi_def_cfa_offset 16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: .cfi_offset 1, -8 +; LA64-NEXT: addi.d $a0, $sp, 0 +; LA64-NEXT: bl %plt(callee) +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret + %1 = alloca i8, align 256 + call void @callee(ptr %1) + ret void +} + +define void @caller512() { +; LA32-LABEL: caller512: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -1024 +; LA32-NEXT: .cfi_def_cfa_offset 1024 +; LA32-NEXT: st.w $ra, $sp, 1020 # 4-byte Folded Spill +; LA32-NEXT: st.w $fp, $sp, 1016 # 4-byte Folded Spill +; LA32-NEXT: .cfi_offset 1, -4 +; LA32-NEXT: .cfi_offset 22, -8 +; LA32-NEXT: addi.w $fp, $sp, 1024 +; LA32-NEXT: .cfi_def_cfa 22, 0 +; LA32-NEXT: srli.w $a0, $sp, 9 +; LA32-NEXT: slli.w $sp, $a0, 9 +; LA32-NEXT: addi.w $a0, $sp, 512 +; LA32-NEXT: bl %plt(callee) +; LA32-NEXT: addi.w $sp, $fp, -1024 +; LA32-NEXT: ld.w $fp, $sp, 1016 # 4-byte Folded Reload +; LA32-NEXT: ld.w $ra, $sp, 1020 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 1024 +; LA32-NEXT: ret +; +; LA64-LABEL: caller512: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -1024 +; LA64-NEXT: .cfi_def_cfa_offset 1024 +; LA64-NEXT: st.d $ra, $sp, 1016 # 8-byte Folded Spill +; LA64-NEXT: st.d $fp, $sp, 1008 # 8-byte Folded Spill +; LA64-NEXT: .cfi_offset 1, -8 +; LA64-NEXT: .cfi_offset 22, -16 +; LA64-NEXT: addi.d $fp, $sp, 1024 +; LA64-NEXT: .cfi_def_cfa 22, 0 +; LA64-NEXT: srli.d $a0, $sp, 9 +; LA64-NEXT: slli.d $sp, $a0, 9 +; LA64-NEXT: addi.d $a0, $sp, 512 +; LA64-NEXT: bl %plt(callee) +; LA64-NEXT: addi.d $sp, $fp, -1024 +; LA64-NEXT: ld.d $fp, $sp, 1008 # 8-byte Folded Reload +; LA64-NEXT: ld.d $ra, $sp, 1016 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 1024 +; LA64-NEXT: ret + %1 = alloca i8, align 512 + call void @callee(ptr %1) + ret void +} + +define void @caller_no_realign512() "no-realign-stack" { +; LA32-LABEL: caller_no_realign512: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: .cfi_def_cfa_offset 16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: .cfi_offset 1, -4 +; LA32-NEXT: addi.w $a0, $sp, 0 +; LA32-NEXT: bl %plt(callee) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: caller_no_realign512: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: .cfi_def_cfa_offset 16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: .cfi_offset 1, -8 +; LA64-NEXT: addi.d $a0, $sp, 0 +; LA64-NEXT: bl %plt(callee) +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret + %1 = alloca i8, align 512 + call void @callee(ptr %1) + ret void +} + +define void @caller1024() { +; LA32-LABEL: caller1024: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -2032 +; LA32-NEXT: .cfi_def_cfa_offset 2032 +; LA32-NEXT: st.w $ra, $sp, 2028 # 4-byte Folded Spill +; LA32-NEXT: st.w $fp, $sp, 2024 # 4-byte Folded Spill +; LA32-NEXT: .cfi_offset 1, -4 +; LA32-NEXT: .cfi_offset 22, -8 +; LA32-NEXT: addi.w $fp, $sp, 2032 +; LA32-NEXT: .cfi_def_cfa 22, 0 +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: srli.w $a0, $sp, 10 +; LA32-NEXT: slli.w $sp, $a0, 10 +; LA32-NEXT: addi.w $a0, $sp, 1024 +; LA32-NEXT: bl %plt(callee) +; LA32-NEXT: addi.w $sp, $fp, -2048 +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ld.w $fp, $sp, 2024 # 4-byte Folded Reload +; LA32-NEXT: ld.w $ra, $sp, 2028 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 2032 +; LA32-NEXT: ret +; +; LA64-LABEL: caller1024: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -2032 +; LA64-NEXT: .cfi_def_cfa_offset 2032 +; LA64-NEXT: st.d $ra, $sp, 2024 # 8-byte Folded Spill +; LA64-NEXT: st.d $fp, $sp, 2016 # 8-byte Folded Spill +; LA64-NEXT: .cfi_offset 1, -8 +; LA64-NEXT: .cfi_offset 22, -16 +; LA64-NEXT: addi.d $fp, $sp, 2032 +; LA64-NEXT: .cfi_def_cfa 22, 0 +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: srli.d $a0, $sp, 10 +; LA64-NEXT: slli.d $sp, $a0, 10 +; LA64-NEXT: addi.d $a0, $sp, 1024 +; LA64-NEXT: bl %plt(callee) +; LA64-NEXT: addi.d $sp, $fp, -2048 +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ld.d $fp, $sp, 2016 # 8-byte Folded Reload +; LA64-NEXT: ld.d $ra, $sp, 2024 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 2032 +; LA64-NEXT: ret + %1 = alloca i8, align 1024 + call void @callee(ptr %1) + ret void +} + +define void @caller_no_realign1024() "no-realign-stack" { +; LA32-LABEL: caller_no_realign1024: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: .cfi_def_cfa_offset 16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: .cfi_offset 1, -4 +; LA32-NEXT: addi.w $a0, $sp, 0 +; LA32-NEXT: bl %plt(callee) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: caller_no_realign1024: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: .cfi_def_cfa_offset 16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: .cfi_offset 1, -8 +; LA64-NEXT: addi.d $a0, $sp, 0 +; LA64-NEXT: bl %plt(callee) +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret + %1 = alloca i8, align 1024 + call void @callee(ptr %1) + ret void +} + +define void @caller2048() { +; LA32-LABEL: caller2048: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -2032 +; LA32-NEXT: .cfi_def_cfa_offset 2032 +; LA32-NEXT: st.w $ra, $sp, 2028 # 4-byte Folded Spill +; LA32-NEXT: st.w $fp, $sp, 2024 # 4-byte Folded Spill +; LA32-NEXT: .cfi_offset 1, -4 +; LA32-NEXT: .cfi_offset 22, -8 +; LA32-NEXT: addi.w $fp, $sp, 2032 +; LA32-NEXT: .cfi_def_cfa 22, 0 +; LA32-NEXT: addi.w $sp, $sp, -2048 +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: srli.w $a0, $sp, 11 +; LA32-NEXT: slli.w $sp, $a0, 11 +; LA32-NEXT: ori $a0, $zero, 2048 +; LA32-NEXT: add.w $a0, $sp, $a0 +; LA32-NEXT: bl %plt(callee) +; LA32-NEXT: lu12i.w $a0, 1 +; LA32-NEXT: sub.w $sp, $fp, $a0 +; LA32-NEXT: addi.w $sp, $sp, 2032 +; LA32-NEXT: addi.w $sp, $sp, 32 +; LA32-NEXT: ld.w $fp, $sp, 2024 # 4-byte Folded Reload +; LA32-NEXT: ld.w $ra, $sp, 2028 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 2032 +; LA32-NEXT: ret +; +; LA64-LABEL: caller2048: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -2032 +; LA64-NEXT: .cfi_def_cfa_offset 2032 +; LA64-NEXT: st.d $ra, $sp, 2024 # 8-byte Folded Spill +; LA64-NEXT: st.d $fp, $sp, 2016 # 8-byte Folded Spill +; LA64-NEXT: .cfi_offset 1, -8 +; LA64-NEXT: .cfi_offset 22, -16 +; LA64-NEXT: addi.d $fp, $sp, 2032 +; LA64-NEXT: .cfi_def_cfa 22, 0 +; LA64-NEXT: addi.d $sp, $sp, -2048 +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: srli.d $a0, $sp, 11 +; LA64-NEXT: slli.d $sp, $a0, 11 +; LA64-NEXT: ori $a0, $zero, 2048 +; LA64-NEXT: add.d $a0, $sp, $a0 +; LA64-NEXT: bl %plt(callee) +; LA64-NEXT: lu12i.w $a0, 1 +; LA64-NEXT: sub.d $sp, $fp, $a0 +; LA64-NEXT: addi.d $sp, $sp, 2032 +; LA64-NEXT: addi.d $sp, $sp, 32 +; LA64-NEXT: ld.d $fp, $sp, 2016 # 8-byte Folded Reload +; LA64-NEXT: ld.d $ra, $sp, 2024 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 2032 +; LA64-NEXT: ret + %1 = alloca i8, align 2048 + call void @callee(ptr %1) + ret void +} + +define void @caller_no_realign2048() "no-realign-stack" { +; LA32-LABEL: caller_no_realign2048: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: .cfi_def_cfa_offset 16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: .cfi_offset 1, -4 +; LA32-NEXT: addi.w $a0, $sp, 0 +; LA32-NEXT: bl %plt(callee) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: caller_no_realign2048: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: .cfi_def_cfa_offset 16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: .cfi_offset 1, -8 +; LA64-NEXT: addi.d $a0, $sp, 0 +; LA64-NEXT: bl %plt(callee) +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret + %1 = alloca i8, align 2048 + call void @callee(ptr %1) + ret void +} + +define void @caller4096() { +; LA32-LABEL: caller4096: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -2032 +; LA32-NEXT: .cfi_def_cfa_offset 2032 +; LA32-NEXT: st.w $ra, $sp, 2028 # 4-byte Folded Spill +; LA32-NEXT: st.w $fp, $sp, 2024 # 4-byte Folded Spill +; LA32-NEXT: .cfi_offset 1, -4 +; LA32-NEXT: .cfi_offset 22, -8 +; LA32-NEXT: addi.w $fp, $sp, 2032 +; LA32-NEXT: .cfi_def_cfa 22, 0 +; LA32-NEXT: lu12i.w $a0, 1 +; LA32-NEXT: ori $a0, $a0, 2064 +; LA32-NEXT: sub.w $sp, $sp, $a0 +; LA32-NEXT: srli.w $a0, $sp, 12 +; LA32-NEXT: slli.w $sp, $a0, 12 +; LA32-NEXT: lu12i.w $a0, 1 +; LA32-NEXT: add.w $a0, $sp, $a0 +; LA32-NEXT: bl %plt(callee) +; LA32-NEXT: lu12i.w $a0, 2 +; LA32-NEXT: sub.w $sp, $fp, $a0 +; LA32-NEXT: lu12i.w $a0, 1 +; LA32-NEXT: ori $a0, $a0, 2064 +; LA32-NEXT: add.w $sp, $sp, $a0 +; LA32-NEXT: ld.w $fp, $sp, 2024 # 4-byte Folded Reload +; LA32-NEXT: ld.w $ra, $sp, 2028 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 2032 +; LA32-NEXT: ret +; +; LA64-LABEL: caller4096: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -2032 +; LA64-NEXT: .cfi_def_cfa_offset 2032 +; LA64-NEXT: st.d $ra, $sp, 2024 # 8-byte Folded Spill +; LA64-NEXT: st.d $fp, $sp, 2016 # 8-byte Folded Spill +; LA64-NEXT: .cfi_offset 1, -8 +; LA64-NEXT: .cfi_offset 22, -16 +; LA64-NEXT: addi.d $fp, $sp, 2032 +; LA64-NEXT: .cfi_def_cfa 22, 0 +; LA64-NEXT: lu12i.w $a0, 1 +; LA64-NEXT: ori $a0, $a0, 2064 +; LA64-NEXT: sub.d $sp, $sp, $a0 +; LA64-NEXT: srli.d $a0, $sp, 12 +; LA64-NEXT: slli.d $sp, $a0, 12 +; LA64-NEXT: lu12i.w $a0, 1 +; LA64-NEXT: add.d $a0, $sp, $a0 +; LA64-NEXT: bl %plt(callee) +; LA64-NEXT: lu12i.w $a0, 2 +; LA64-NEXT: sub.d $sp, $fp, $a0 +; LA64-NEXT: lu12i.w $a0, 1 +; LA64-NEXT: ori $a0, $a0, 2064 +; LA64-NEXT: add.d $sp, $sp, $a0 +; LA64-NEXT: ld.d $fp, $sp, 2016 # 8-byte Folded Reload +; LA64-NEXT: ld.d $ra, $sp, 2024 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 2032 +; LA64-NEXT: ret + %1 = alloca i8, align 4096 + call void @callee(ptr %1) + ret void +} + +define void @caller_no_realign4096() "no-realign-stack" { +; LA32-LABEL: caller_no_realign4096: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: .cfi_def_cfa_offset 16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: .cfi_offset 1, -4 +; LA32-NEXT: addi.w $a0, $sp, 0 +; LA32-NEXT: bl %plt(callee) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: caller_no_realign4096: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: .cfi_def_cfa_offset 16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: .cfi_offset 1, -8 +; LA64-NEXT: addi.d $a0, $sp, 0 +; LA64-NEXT: bl %plt(callee) +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret + %1 = alloca i8, align 4096 + call void @callee(ptr %1) + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/stptr.ll b/llvm/test/CodeGen/LoongArch/stptr.ll new file mode 100644 index 0000000000000000000000000000000000000000..5b6998b087318cc2b25d261de2e240f01e5ca9c6 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/stptr.ll @@ -0,0 +1,116 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefix=LA32 +; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64 + +;; Check that stptr.w is not emitted for small offsets. +define void @stptr_w_too_small_offset(ptr %p, i32 signext %val) nounwind { +; LA32-LABEL: stptr_w_too_small_offset: +; LA32: # %bb.0: +; LA32-NEXT: st.w $a1, $a0, 2044 +; LA32-NEXT: ret +; +; LA64-LABEL: stptr_w_too_small_offset: +; LA64: # %bb.0: +; LA64-NEXT: st.w $a1, $a0, 2044 +; LA64-NEXT: ret + %addr = getelementptr inbounds i32, ptr %p, i64 511 + store i32 %val, ptr %addr, align 4 + ret void +} + +;; Check that stptr.w is emitted for applicable offsets. +define void @stptr_w(ptr %p, i32 signext %val) nounwind { +; LA32-LABEL: stptr_w: +; LA32: # %bb.0: +; LA32-NEXT: ori $a2, $zero, 2048 +; LA32-NEXT: add.w $a0, $a0, $a2 +; LA32-NEXT: st.w $a1, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: stptr_w: +; LA64: # %bb.0: +; LA64-NEXT: stptr.w $a1, $a0, 2048 +; LA64-NEXT: ret + %addr = getelementptr inbounds i32, ptr %p, i64 512 + store i32 %val, ptr %addr, align 4 + ret void +} + +;; Check that stptr.w is not emitted for out-of-range offsets. +define void @stptr_w_too_big_offset(ptr %p, i32 signext %val) nounwind { +; LA32-LABEL: stptr_w_too_big_offset: +; LA32: # %bb.0: +; LA32-NEXT: lu12i.w $a2, 8 +; LA32-NEXT: add.w $a0, $a0, $a2 +; LA32-NEXT: st.w $a1, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: stptr_w_too_big_offset: +; LA64: # %bb.0: +; LA64-NEXT: lu12i.w $a2, 8 +; LA64-NEXT: stx.w $a1, $a0, $a2 +; LA64-NEXT: ret + %addr = getelementptr inbounds i32, ptr %p, i64 8192 + store i32 %val, ptr %addr, align 4 + ret void +} + +;; Check that stptr.d is not emitted for small offsets. +define void @stptr_d_too_small_offset(ptr %p, i64 %val) nounwind { +; LA32-LABEL: stptr_d_too_small_offset: +; LA32: # %bb.0: +; LA32-NEXT: st.w $a2, $a0, 2044 +; LA32-NEXT: st.w $a1, $a0, 2040 +; LA32-NEXT: ret +; +; LA64-LABEL: stptr_d_too_small_offset: +; LA64: # %bb.0: +; LA64-NEXT: st.d $a1, $a0, 2040 +; LA64-NEXT: ret + %addr = getelementptr inbounds i64, ptr %p, i64 255 + store i64 %val, ptr %addr, align 8 + ret void +} + +;; Check that stptr.d is emitted for applicable offsets. +define void @stptr_d(ptr %p, i64 %val) nounwind { +; LA32-LABEL: stptr_d: +; LA32: # %bb.0: +; LA32-NEXT: ori $a3, $zero, 2052 +; LA32-NEXT: add.w $a3, $a0, $a3 +; LA32-NEXT: st.w $a2, $a3, 0 +; LA32-NEXT: ori $a2, $zero, 2048 +; LA32-NEXT: add.w $a0, $a0, $a2 +; LA32-NEXT: st.w $a1, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: stptr_d: +; LA64: # %bb.0: +; LA64-NEXT: stptr.d $a1, $a0, 2048 +; LA64-NEXT: ret + %addr = getelementptr inbounds i64, ptr %p, i64 256 + store i64 %val, ptr %addr, align 8 + ret void +} + +;; Check that stptr.d is not emitted for out-of-range offsets. +define void @stptr_d_too_big_offset(ptr %p, i64 %val) nounwind { +; LA32-LABEL: stptr_d_too_big_offset: +; LA32: # %bb.0: +; LA32-NEXT: lu12i.w $a3, 8 +; LA32-NEXT: add.w $a4, $a0, $a3 +; LA32-NEXT: st.w $a1, $a4, 0 +; LA32-NEXT: ori $a1, $a3, 4 +; LA32-NEXT: add.w $a0, $a0, $a1 +; LA32-NEXT: st.w $a2, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: stptr_d_too_big_offset: +; LA64: # %bb.0: +; LA64-NEXT: lu12i.w $a2, 8 +; LA64-NEXT: stx.d $a1, $a0, $a2 +; LA64-NEXT: ret + %addr = getelementptr inbounds i64, ptr %p, i64 4096 + store i64 %val, ptr %addr, align 8 + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/tail-calls.ll b/llvm/test/CodeGen/LoongArch/tail-calls.ll new file mode 100644 index 0000000000000000000000000000000000000000..ff547631792ef688ea19c59d1ad46ce80c8e9c84 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/tail-calls.ll @@ -0,0 +1,188 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s + +;; Perform tail call optimization for global address. +declare i32 @callee_tail(i32 %i) +define i32 @caller_tail(i32 %i) nounwind { +; CHECK-LABEL: caller_tail: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: b %plt(callee_tail) +entry: + %r = tail call i32 @callee_tail(i32 %i) + ret i32 %r +} + +;; Perform tail call optimization for external symbol. +;; Bytes copied should be large enough, otherwise the memcpy call would be optimized to multiple ld/st insns. +@dest = global [2 x i8] zeroinitializer +declare void @llvm.memcpy.p0i8.p0i8.i32(ptr, ptr, i32, i1) +define void @caller_extern(ptr %src) optsize { +; CHECK-LABEL: caller_extern: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: move $a1, $a0 +; CHECK-NEXT: pcalau12i $a0, %got_pc_hi20(dest) +; CHECK-NEXT: ld.d $a0, $a0, %got_pc_lo12(dest) +; CHECK-NEXT: ori $a2, $zero, 33 +; CHECK-NEXT: b %plt(memcpy) +entry: + tail call void @llvm.memcpy.p0i8.p0i8.i32(ptr getelementptr inbounds ([2 x i8], ptr @dest, i32 0, i32 0), ptr %src, i32 33, i1 false) + ret void +} + +;; Perform indirect tail call optimization (for function pointer call). +declare void @callee_indirect1() +declare void @callee_indirect2() +define void @caller_indirect_tail(i32 %a) nounwind { +; CHECK-LABEL: caller_indirect_tail: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: bstrpick.d $a0, $a0, 31, 0 +; CHECK-NEXT: sltui $a0, $a0, 1 +; CHECK-NEXT: pcalau12i $a1, %got_pc_hi20(callee_indirect2) +; CHECK-NEXT: ld.d $a1, $a1, %got_pc_lo12(callee_indirect2) +; CHECK-NEXT: masknez $a1, $a1, $a0 +; CHECK-NEXT: pcalau12i $a2, %got_pc_hi20(callee_indirect1) +; CHECK-NEXT: ld.d $a2, $a2, %got_pc_lo12(callee_indirect1) +; CHECK-NEXT: maskeqz $a0, $a2, $a0 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: jr $a0 +entry: + %tobool = icmp eq i32 %a, 0 + %callee = select i1 %tobool, ptr @callee_indirect1, ptr @callee_indirect2 + tail call void %callee() + ret void +} + +;; Do not tail call optimize functions with varargs passed by stack. +declare i32 @callee_varargs(i32, ...) +define void @caller_varargs(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: caller_varargs: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -16 +; CHECK-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; CHECK-NEXT: st.d $a0, $sp, 0 +; CHECK-NEXT: move $a2, $a1 +; CHECK-NEXT: move $a3, $a0 +; CHECK-NEXT: move $a4, $a0 +; CHECK-NEXT: move $a5, $a1 +; CHECK-NEXT: move $a6, $a1 +; CHECK-NEXT: move $a7, $a0 +; CHECK-NEXT: bl %plt(callee_varargs) +; CHECK-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: ret +entry: + %call = tail call i32 (i32, ...) @callee_varargs(i32 %a, i32 %b, i32 %b, i32 %a, i32 %a, i32 %b, i32 %b, i32 %a, i32 %a) + ret void +} + +;; Do not tail call optimize if stack is used to pass parameters. +declare i32 @callee_args(i32 %a, i32 %b, i32 %c, i32 %dd, i32 %e, i32 %ff, i32 %g, i32 %h, i32 %i) +define i32 @caller_args(i32 %a, i32 %b, i32 %c, i32 %dd, i32 %e, i32 %ff, i32 %g, i32 %h, i32 %i) nounwind { +; CHECK-LABEL: caller_args: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -16 +; CHECK-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; CHECK-NEXT: ld.d $t0, $sp, 16 +; CHECK-NEXT: st.d $t0, $sp, 0 +; CHECK-NEXT: bl %plt(callee_args) +; CHECK-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: ret +entry: + %r = tail call i32 @callee_args(i32 %a, i32 %b, i32 %c, i32 %dd, i32 %e, i32 %ff, i32 %g, i32 %h, i32 %i) + ret i32 %r +} + +;; Do not tail call optimize if parameters need to be passed indirectly. +declare i32 @callee_indirect_args(i256 %a) +define void @caller_indirect_args() nounwind { +; CHECK-LABEL: caller_indirect_args: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -48 +; CHECK-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill +; CHECK-NEXT: st.d $zero, $sp, 24 +; CHECK-NEXT: st.d $zero, $sp, 16 +; CHECK-NEXT: st.d $zero, $sp, 8 +; CHECK-NEXT: ori $a0, $zero, 1 +; CHECK-NEXT: st.d $a0, $sp, 0 +; CHECK-NEXT: addi.d $a0, $sp, 0 +; CHECK-NEXT: bl %plt(callee_indirect_args) +; CHECK-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 48 +; CHECK-NEXT: ret +entry: + %call = tail call i32 @callee_indirect_args(i256 1) + ret void +} + +;; Do not tail call optimize if byval parameters need to be passed. +declare i32 @callee_byval(ptr byval(ptr) %a) +define i32 @caller_byval() nounwind { +; CHECK-LABEL: caller_byval: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -32 +; CHECK-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill +; CHECK-NEXT: ld.d $a0, $sp, 16 +; CHECK-NEXT: st.d $a0, $sp, 8 +; CHECK-NEXT: addi.d $a0, $sp, 8 +; CHECK-NEXT: bl %plt(callee_byval) +; CHECK-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 32 +; CHECK-NEXT: ret +entry: + %a = alloca ptr + %r = tail call i32 @callee_byval(ptr byval(ptr) %a) + ret i32 %r +} + +;; Do not tail call optimize if callee uses structret semantics. +%struct.A = type { i32 } +@a = global %struct.A zeroinitializer + +declare void @callee_struct(ptr sret(%struct.A) %a) +define void @caller_nostruct() nounwind { +; CHECK-LABEL: caller_nostruct: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -16 +; CHECK-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; CHECK-NEXT: pcalau12i $a0, %got_pc_hi20(a) +; CHECK-NEXT: ld.d $a0, $a0, %got_pc_lo12(a) +; CHECK-NEXT: bl %plt(callee_struct) +; CHECK-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: ret +entry: + tail call void @callee_struct(ptr sret(%struct.A) @a) + ret void +} + +;; Do not tail call optimize if caller uses structret semantics. +declare void @callee_nostruct() +define void @caller_struct(ptr sret(%struct.A) %a) nounwind { +; CHECK-LABEL: caller_struct: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -16 +; CHECK-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; CHECK-NEXT: bl %plt(callee_nostruct) +; CHECK-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: ret +entry: + tail call void @callee_nostruct() + ret void +} + +;; Do not tail call optimize if disabled. +define i32 @disable_tail_calls(i32 %i) nounwind "disable-tail-calls"="true" { +; CHECK-LABEL: disable_tail_calls: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -16 +; CHECK-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; CHECK-NEXT: bl %plt(callee_tail) +; CHECK-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: ret +entry: + %rv = tail call i32 @callee_tail(i32 %i) + ret i32 %rv +} diff --git a/llvm/test/CodeGen/LoongArch/target-abi-from-triple-edge-cases.ll b/llvm/test/CodeGen/LoongArch/target-abi-from-triple-edge-cases.ll new file mode 100644 index 0000000000000000000000000000000000000000..1d5ed089c69fa12c887ca2b428927576e0df4f08 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/target-abi-from-triple-edge-cases.ll @@ -0,0 +1,74 @@ +;; Check that an unknown --target-abi is ignored and the triple-implied ABI is +;; used. +; RUN: llc --mtriple=loongarch32-linux-gnu --target-abi=foo --mattr=+d < %s 2>&1 \ +; RUN: | FileCheck %s --check-prefixes=ILP32D,UNKNOWN +; RUN: llc --mtriple=loongarch64-linux-gnu --target-abi=foo --mattr=+d < %s 2>&1 \ +; RUN: | FileCheck %s --check-prefixes=LP64D,UNKNOWN + +; UNKNOWN: 'foo' is not a recognized ABI for this target, ignoring and using triple-implied ABI + +;; Check that --target-abi takes precedence over triple-supplied ABI modifiers. +; RUN: llc --mtriple=loongarch32-linux-gnusf --target-abi=ilp32d --mattr=+d < %s 2>&1 \ +; RUN: | FileCheck %s --check-prefixes=ILP32D,CONFLICT-ILP32D +; RUN: llc --mtriple=loongarch64-linux-gnusf --target-abi=lp64d --mattr=+d < %s 2>&1 \ +; RUN: | FileCheck %s --check-prefixes=LP64D,CONFLICT-LP64D + +; CONFLICT-ILP32D: warning: triple-implied ABI conflicts with provided target-abi 'ilp32d', using target-abi +; CONFLICT-LP64D: warning: triple-implied ABI conflicts with provided target-abi 'lp64d', using target-abi + +;; Check that no warning is reported when there is no environment component in +;; triple-supplied ABI modifiers and --target-abi is used. +; RUN: llc --mtriple=loongarch64-linux --target-abi=lp64d --mattr=+d < %s 2>&1 \ +; RUN: | FileCheck %s --check-prefixes=LP64D,NO-WARNING + +; NO-WARNING-NOT: warning: triple-implied ABI conflicts with provided target-abi 'lp64d', using target-abi + +;; Check that ILP32-on-LA64 and LP64-on-LA32 combinations are handled properly. +; RUN: llc --mtriple=loongarch64 --target-abi=ilp32d --mattr=+d < %s 2>&1 \ +; RUN: | FileCheck %s --check-prefixes=LP64D,32ON64 +; RUN: llc --mtriple=loongarch32 --target-abi=lp64d --mattr=+d < %s 2>&1 \ +; RUN: | FileCheck %s --check-prefixes=ILP32D,64ON32 + +; 32ON64: 32-bit ABIs are not supported for 64-bit targets, ignoring target-abi and using triple-implied ABI +; 64ON32: 64-bit ABIs are not supported for 32-bit targets, ignoring target-abi and using triple-implied ABI + +define float @f(float %a) { +; ILP32D-LABEL: f: +; ILP32D: # %bb.0: +; ILP32D-NEXT: addi.w $a0, $zero, 1 +; ILP32D-NEXT: movgr2fr.w $fa1, $a0 +; ILP32D-NEXT: ffint.s.w $fa1, $fa1 +; ILP32D-NEXT: fadd.s $fa0, $fa0, $fa1 +; ILP32D-NEXT: ret +; +; LP64D-LABEL: f: +; LP64D: # %bb.0: +; LP64D-NEXT: addi.w $a0, $zero, 1 +; LP64D-NEXT: movgr2fr.w $fa1, $a0 +; LP64D-NEXT: ffint.s.w $fa1, $fa1 +; LP64D-NEXT: fadd.s $fa0, $fa0, $fa1 +; LP64D-NEXT: ret + %1 = fadd float %a, 1.0 + ret float %1 +} + +define double @g(double %a) { +; ILP32D-LABEL: g: +; ILP32D: # %bb.0: +; ILP32D-NEXT: addi.w $a0, $zero, 1 +; ILP32D-NEXT: movgr2fr.w $fa1, $a0 +; ILP32D-NEXT: ffint.s.w $fa1, $fa1 +; ILP32D-NEXT: fcvt.d.s $fa1, $fa1 +; ILP32D-NEXT: fadd.d $fa0, $fa0, $fa1 +; ILP32D-NEXT: ret +; +; LP64D-LABEL: g: +; LP64D: # %bb.0: +; LP64D-NEXT: addi.d $a0, $zero, 1 +; LP64D-NEXT: movgr2fr.d $fa1, $a0 +; LP64D-NEXT: ffint.d.l $fa1, $fa1 +; LP64D-NEXT: fadd.d $fa0, $fa0, $fa1 +; LP64D-NEXT: ret + %1 = fadd double %a, 1.0 + ret double %1 +} diff --git a/llvm/test/CodeGen/LoongArch/target-abi-from-triple.ll b/llvm/test/CodeGen/LoongArch/target-abi-from-triple.ll new file mode 100644 index 0000000000000000000000000000000000000000..0aca339038860484d9a88efeaaa6f2b31d3b31ad --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/target-abi-from-triple.ll @@ -0,0 +1,49 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py + +;; Check that the correct ABI is chosen based on the triple given. +;; TODO: enable the S and F ABIs once support is wired up. +; RUN: llc --mtriple=loongarch32-linux-gnuf64 --mattr=+d < %s \ +; RUN: | FileCheck %s --check-prefix=ILP32D +; RUN: llc --mtriple=loongarch64-linux-gnuf64 --mattr=+d < %s \ +; RUN: | FileCheck %s --check-prefix=LP64D + +define float @f(float %a) { +; ILP32D-LABEL: f: +; ILP32D: # %bb.0: +; ILP32D-NEXT: addi.w $a0, $zero, 1 +; ILP32D-NEXT: movgr2fr.w $fa1, $a0 +; ILP32D-NEXT: ffint.s.w $fa1, $fa1 +; ILP32D-NEXT: fadd.s $fa0, $fa0, $fa1 +; ILP32D-NEXT: ret +; +; LP64D-LABEL: f: +; LP64D: # %bb.0: +; LP64D-NEXT: addi.w $a0, $zero, 1 +; LP64D-NEXT: movgr2fr.w $fa1, $a0 +; LP64D-NEXT: ffint.s.w $fa1, $fa1 +; LP64D-NEXT: fadd.s $fa0, $fa0, $fa1 +; LP64D-NEXT: ret + %1 = fadd float %a, 1.0 + ret float %1 +} + +define double @g(double %a) { +; ILP32D-LABEL: g: +; ILP32D: # %bb.0: +; ILP32D-NEXT: addi.w $a0, $zero, 1 +; ILP32D-NEXT: movgr2fr.w $fa1, $a0 +; ILP32D-NEXT: ffint.s.w $fa1, $fa1 +; ILP32D-NEXT: fcvt.d.s $fa1, $fa1 +; ILP32D-NEXT: fadd.d $fa0, $fa0, $fa1 +; ILP32D-NEXT: ret +; +; LP64D-LABEL: g: +; LP64D: # %bb.0: +; LP64D-NEXT: addi.d $a0, $zero, 1 +; LP64D-NEXT: movgr2fr.d $fa1, $a0 +; LP64D-NEXT: ffint.d.l $fa1, $fa1 +; LP64D-NEXT: fadd.d $fa0, $fa0, $fa1 +; LP64D-NEXT: ret + %1 = fadd double %a, 1.0 + ret double %1 +} diff --git a/llvm/test/CodeGen/LoongArch/thread-pointer.ll b/llvm/test/CodeGen/LoongArch/thread-pointer.ll new file mode 100644 index 0000000000000000000000000000000000000000..805709e61c541129908f8ef6cc741870e8762478 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/thread-pointer.ll @@ -0,0 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s --mtriple=loongarch32 | FileCheck %s +; RUN: llc < %s --mtriple=loongarch64 | FileCheck %s + +declare ptr @llvm.thread.pointer() + +define ptr @thread_pointer() nounwind { +; CHECK-LABEL: thread_pointer: +; CHECK: # %bb.0: +; CHECK-NEXT: move $a0, $tp +; CHECK-NEXT: ret + %1 = tail call ptr @llvm.thread.pointer() + ret ptr %1 +} diff --git a/llvm/test/CodeGen/LoongArch/tls-models.ll b/llvm/test/CodeGen/LoongArch/tls-models.ll new file mode 100644 index 0000000000000000000000000000000000000000..d973cd45da0121f6869b50507c1ca2c423d4bbdb --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/tls-models.ll @@ -0,0 +1,166 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --relocation-model=pic < %s | FileCheck %s --check-prefix=LA32PIC +; RUN: llc --mtriple=loongarch64 --relocation-model=pic < %s | FileCheck %s --check-prefix=LA64PIC +; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefix=LA32NOPIC +; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64NOPIC + +;; Check that TLS symbols are lowered correctly based on the specified +;; model. Make sure they're external to avoid them all being optimised to Local +;; Exec for the executable. + +@unspecified = external thread_local global i32 +@ld = external thread_local(localdynamic) global i32 +@ie = external thread_local(initialexec) global i32 +@le = external thread_local(localexec) global i32 + +;; No model specified (global dynamic) + +define ptr @f1() nounwind { +; LA32PIC-LABEL: f1: +; LA32PIC: # %bb.0: # %entry +; LA32PIC-NEXT: addi.w $sp, $sp, -16 +; LA32PIC-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32PIC-NEXT: pcalau12i $a0, %gd_pc_hi20(unspecified) +; LA32PIC-NEXT: addi.w $a0, $a0, %got_pc_lo12(unspecified) +; LA32PIC-NEXT: bl %plt(__tls_get_addr) +; LA32PIC-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32PIC-NEXT: addi.w $sp, $sp, 16 +; LA32PIC-NEXT: ret +; +; LA64PIC-LABEL: f1: +; LA64PIC: # %bb.0: # %entry +; LA64PIC-NEXT: addi.d $sp, $sp, -16 +; LA64PIC-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64PIC-NEXT: pcalau12i $a0, %gd_pc_hi20(unspecified) +; LA64PIC-NEXT: addi.d $a0, $a0, %got_pc_lo12(unspecified) +; LA64PIC-NEXT: bl %plt(__tls_get_addr) +; LA64PIC-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64PIC-NEXT: addi.d $sp, $sp, 16 +; LA64PIC-NEXT: ret +; +; LA32NOPIC-LABEL: f1: +; LA32NOPIC: # %bb.0: # %entry +; LA32NOPIC-NEXT: pcalau12i $a0, %ie_pc_hi20(unspecified) +; LA32NOPIC-NEXT: ld.w $a0, $a0, %ie_pc_lo12(unspecified) +; LA32NOPIC-NEXT: add.w $a0, $a0, $tp +; LA32NOPIC-NEXT: ret +; +; LA64NOPIC-LABEL: f1: +; LA64NOPIC: # %bb.0: # %entry +; LA64NOPIC-NEXT: pcalau12i $a0, %ie_pc_hi20(unspecified) +; LA64NOPIC-NEXT: ld.d $a0, $a0, %ie_pc_lo12(unspecified) +; LA64NOPIC-NEXT: add.d $a0, $a0, $tp +; LA64NOPIC-NEXT: ret +entry: + ret ptr @unspecified +} + +;; localdynamic specified + +define ptr @f2() nounwind { +; LA32PIC-LABEL: f2: +; LA32PIC: # %bb.0: # %entry +; LA32PIC-NEXT: addi.w $sp, $sp, -16 +; LA32PIC-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32PIC-NEXT: pcalau12i $a0, %ld_pc_hi20(ld) +; LA32PIC-NEXT: addi.w $a0, $a0, %got_pc_lo12(ld) +; LA32PIC-NEXT: bl %plt(__tls_get_addr) +; LA32PIC-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32PIC-NEXT: addi.w $sp, $sp, 16 +; LA32PIC-NEXT: ret +; +; LA64PIC-LABEL: f2: +; LA64PIC: # %bb.0: # %entry +; LA64PIC-NEXT: addi.d $sp, $sp, -16 +; LA64PIC-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64PIC-NEXT: pcalau12i $a0, %ld_pc_hi20(ld) +; LA64PIC-NEXT: addi.d $a0, $a0, %got_pc_lo12(ld) +; LA64PIC-NEXT: bl %plt(__tls_get_addr) +; LA64PIC-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64PIC-NEXT: addi.d $sp, $sp, 16 +; LA64PIC-NEXT: ret +; +; LA32NOPIC-LABEL: f2: +; LA32NOPIC: # %bb.0: # %entry +; LA32NOPIC-NEXT: pcalau12i $a0, %ie_pc_hi20(ld) +; LA32NOPIC-NEXT: ld.w $a0, $a0, %ie_pc_lo12(ld) +; LA32NOPIC-NEXT: add.w $a0, $a0, $tp +; LA32NOPIC-NEXT: ret +; +; LA64NOPIC-LABEL: f2: +; LA64NOPIC: # %bb.0: # %entry +; LA64NOPIC-NEXT: pcalau12i $a0, %ie_pc_hi20(ld) +; LA64NOPIC-NEXT: ld.d $a0, $a0, %ie_pc_lo12(ld) +; LA64NOPIC-NEXT: add.d $a0, $a0, $tp +; LA64NOPIC-NEXT: ret +entry: + ret ptr @ld +} + +;; initialexec specified + +define ptr @f3() nounwind { +; LA32PIC-LABEL: f3: +; LA32PIC: # %bb.0: # %entry +; LA32PIC-NEXT: pcalau12i $a0, %ie_pc_hi20(ie) +; LA32PIC-NEXT: ld.w $a0, $a0, %ie_pc_lo12(ie) +; LA32PIC-NEXT: add.w $a0, $a0, $tp +; LA32PIC-NEXT: ret +; +; LA64PIC-LABEL: f3: +; LA64PIC: # %bb.0: # %entry +; LA64PIC-NEXT: pcalau12i $a0, %ie_pc_hi20(ie) +; LA64PIC-NEXT: ld.d $a0, $a0, %ie_pc_lo12(ie) +; LA64PIC-NEXT: add.d $a0, $a0, $tp +; LA64PIC-NEXT: ret +; +; LA32NOPIC-LABEL: f3: +; LA32NOPIC: # %bb.0: # %entry +; LA32NOPIC-NEXT: pcalau12i $a0, %ie_pc_hi20(ie) +; LA32NOPIC-NEXT: ld.w $a0, $a0, %ie_pc_lo12(ie) +; LA32NOPIC-NEXT: add.w $a0, $a0, $tp +; LA32NOPIC-NEXT: ret +; +; LA64NOPIC-LABEL: f3: +; LA64NOPIC: # %bb.0: # %entry +; LA64NOPIC-NEXT: pcalau12i $a0, %ie_pc_hi20(ie) +; LA64NOPIC-NEXT: ld.d $a0, $a0, %ie_pc_lo12(ie) +; LA64NOPIC-NEXT: add.d $a0, $a0, $tp +; LA64NOPIC-NEXT: ret +entry: + ret ptr @ie +} + +;; localexec specified + +define ptr @f4() nounwind { +; LA32PIC-LABEL: f4: +; LA32PIC: # %bb.0: # %entry +; LA32PIC-NEXT: lu12i.w $a0, %le_hi20(le) +; LA32PIC-NEXT: ori $a0, $a0, %le_lo12(le) +; LA32PIC-NEXT: add.w $a0, $a0, $tp +; LA32PIC-NEXT: ret +; +; LA64PIC-LABEL: f4: +; LA64PIC: # %bb.0: # %entry +; LA64PIC-NEXT: lu12i.w $a0, %le_hi20(le) +; LA64PIC-NEXT: ori $a0, $a0, %le_lo12(le) +; LA64PIC-NEXT: add.d $a0, $a0, $tp +; LA64PIC-NEXT: ret +; +; LA32NOPIC-LABEL: f4: +; LA32NOPIC: # %bb.0: # %entry +; LA32NOPIC-NEXT: lu12i.w $a0, %le_hi20(le) +; LA32NOPIC-NEXT: ori $a0, $a0, %le_lo12(le) +; LA32NOPIC-NEXT: add.w $a0, $a0, $tp +; LA32NOPIC-NEXT: ret +; +; LA64NOPIC-LABEL: f4: +; LA64NOPIC: # %bb.0: # %entry +; LA64NOPIC-NEXT: lu12i.w $a0, %le_hi20(le) +; LA64NOPIC-NEXT: ori $a0, $a0, %le_lo12(le) +; LA64NOPIC-NEXT: add.d $a0, $a0, $tp +; LA64NOPIC-NEXT: ret +entry: + ret ptr @le +} diff --git a/llvm/test/CodeGen/LoongArch/trap.ll b/llvm/test/CodeGen/LoongArch/trap.ll new file mode 100644 index 0000000000000000000000000000000000000000..718b99160b20122ba07ed5de0aa940eaa37b7024 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/trap.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --verify-machineinstrs < %s | FileCheck %s +; RUN: llc --mtriple=loongarch64 --verify-machineinstrs < %s | FileCheck %s + +;; Verify that we lower @llvm.trap() and @llvm.debugtrap() correctly. + +declare void @llvm.trap() +declare void @llvm.debugtrap() + +define void @test_trap() nounwind { +; CHECK-LABEL: test_trap: +; CHECK: # %bb.0: +; CHECK-NEXT: amswap.w $zero, $ra, $zero +; CHECK-NEXT: ret + tail call void @llvm.trap() + ret void +} + +define void @test_debugtrap() nounwind { +; CHECK-LABEL: test_debugtrap: +; CHECK: # %bb.0: +; CHECK-NEXT: break 0 +; CHECK-NEXT: ret + tail call void @llvm.debugtrap() + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/unaligned-access.ll b/llvm/test/CodeGen/LoongArch/unaligned-access.ll new file mode 100644 index 0000000000000000000000000000000000000000..871c17f06e968d2e158a524cde01ff2dbddd98c9 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/unaligned-access.ll @@ -0,0 +1,72 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 + +;; Test the ual feature which is similar to AArch64/arm64-strict-align.ll. + +; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefix=LA32-ALIGNED +; RUN: llc --mtriple=loongarch32 --mattr=+ual < %s | FileCheck %s --check-prefix=LA32-UNALIGNED +; RUN: llc --mtriple=loongarch32 --mattr=-ual < %s | FileCheck %s --check-prefix=LA32-ALIGNED + +; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64-UNALIGNED +; RUN: llc --mtriple=loongarch64 --mattr=+ual < %s | FileCheck %s --check-prefix=LA64-UNALIGNED +; RUN: llc --mtriple=loongarch64 --mattr=-ual < %s | FileCheck %s --check-prefix=LA64-ALIGNED + +define i32 @f0(ptr %p) nounwind { +; LA32-ALIGNED-LABEL: f0: +; LA32-ALIGNED: # %bb.0: +; LA32-ALIGNED-NEXT: ld.hu $a1, $a0, 0 +; LA32-ALIGNED-NEXT: ld.hu $a0, $a0, 2 +; LA32-ALIGNED-NEXT: slli.w $a0, $a0, 16 +; LA32-ALIGNED-NEXT: or $a0, $a0, $a1 +; LA32-ALIGNED-NEXT: ret +; +; LA32-UNALIGNED-LABEL: f0: +; LA32-UNALIGNED: # %bb.0: +; LA32-UNALIGNED-NEXT: ld.w $a0, $a0, 0 +; LA32-UNALIGNED-NEXT: ret +; +; LA64-UNALIGNED-LABEL: f0: +; LA64-UNALIGNED: # %bb.0: +; LA64-UNALIGNED-NEXT: ld.w $a0, $a0, 0 +; LA64-UNALIGNED-NEXT: ret +; +; LA64-ALIGNED-LABEL: f0: +; LA64-ALIGNED: # %bb.0: +; LA64-ALIGNED-NEXT: ld.hu $a1, $a0, 0 +; LA64-ALIGNED-NEXT: ld.h $a0, $a0, 2 +; LA64-ALIGNED-NEXT: slli.d $a0, $a0, 16 +; LA64-ALIGNED-NEXT: or $a0, $a0, $a1 +; LA64-ALIGNED-NEXT: ret + %tmp = load i32, ptr %p, align 2 + ret i32 %tmp +} + +define i64 @f1(ptr %p) nounwind { +; LA32-ALIGNED-LABEL: f1: +; LA32-ALIGNED: # %bb.0: +; LA32-ALIGNED-NEXT: ld.w $a2, $a0, 0 +; LA32-ALIGNED-NEXT: ld.w $a1, $a0, 4 +; LA32-ALIGNED-NEXT: move $a0, $a2 +; LA32-ALIGNED-NEXT: ret +; +; LA32-UNALIGNED-LABEL: f1: +; LA32-UNALIGNED: # %bb.0: +; LA32-UNALIGNED-NEXT: ld.w $a2, $a0, 0 +; LA32-UNALIGNED-NEXT: ld.w $a1, $a0, 4 +; LA32-UNALIGNED-NEXT: move $a0, $a2 +; LA32-UNALIGNED-NEXT: ret +; +; LA64-UNALIGNED-LABEL: f1: +; LA64-UNALIGNED: # %bb.0: +; LA64-UNALIGNED-NEXT: ld.d $a0, $a0, 0 +; LA64-UNALIGNED-NEXT: ret +; +; LA64-ALIGNED-LABEL: f1: +; LA64-ALIGNED: # %bb.0: +; LA64-ALIGNED-NEXT: ld.wu $a1, $a0, 0 +; LA64-ALIGNED-NEXT: ld.wu $a0, $a0, 4 +; LA64-ALIGNED-NEXT: slli.d $a0, $a0, 32 +; LA64-ALIGNED-NEXT: or $a0, $a0, $a1 +; LA64-ALIGNED-NEXT: ret + %tmp = load i64, ptr %p, align 4 + ret i64 %tmp +} diff --git a/llvm/test/CodeGen/LoongArch/unaligned-memcpy-inline.ll b/llvm/test/CodeGen/LoongArch/unaligned-memcpy-inline.ll new file mode 100644 index 0000000000000000000000000000000000000000..37afe7e3ed2acae73ac109a4aa9dff81415e3e56 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/unaligned-memcpy-inline.ll @@ -0,0 +1,97 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 + +;; Test how memcpy is optimized when ual is turned off which is similar to AArch64/arm64-misaligned-memcpy-inline.ll. + +; RUN: llc --mtriple=loongarch32 --mattr=-ual < %s | FileCheck %s --check-prefix=LA32 +; RUN: llc --mtriple=loongarch64 --mattr=-ual < %s | FileCheck %s --check-prefix=LA64 + +;; Small (16 bytes here) unaligned memcpy() should be a function call if +;; ual is turned off. +define void @t0(ptr %out, ptr %in) { +; LA32-LABEL: t0: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: .cfi_def_cfa_offset 16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: .cfi_offset 1, -4 +; LA32-NEXT: ori $a2, $zero, 16 +; LA32-NEXT: bl %plt(memcpy) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: t0: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: .cfi_def_cfa_offset 16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: .cfi_offset 1, -8 +; LA64-NEXT: ori $a2, $zero, 16 +; LA64-NEXT: bl %plt(memcpy) +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + call void @llvm.memcpy.p0.p0.i64(ptr %out, ptr %in, i64 16, i1 false) + ret void +} + +;; Small (16 bytes here) aligned memcpy() should be inlined even if +;; ual is turned off. +define void @t1(ptr align 8 %out, ptr align 8 %in) { +; LA32-LABEL: t1: +; LA32: # %bb.0: # %entry +; LA32-NEXT: ld.w $a2, $a1, 12 +; LA32-NEXT: st.w $a2, $a0, 12 +; LA32-NEXT: ld.w $a2, $a1, 8 +; LA32-NEXT: st.w $a2, $a0, 8 +; LA32-NEXT: ld.w $a2, $a1, 4 +; LA32-NEXT: st.w $a2, $a0, 4 +; LA32-NEXT: ld.w $a1, $a1, 0 +; LA32-NEXT: st.w $a1, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: t1: +; LA64: # %bb.0: # %entry +; LA64-NEXT: ld.d $a2, $a1, 8 +; LA64-NEXT: st.d $a2, $a0, 8 +; LA64-NEXT: ld.d $a1, $a1, 0 +; LA64-NEXT: st.d $a1, $a0, 0 +; LA64-NEXT: ret +entry: + call void @llvm.memcpy.p0.p0.i64(ptr align 8 %out, ptr align 8 %in, i64 16, i1 false) + ret void +} + +;; Tiny (4 bytes here) unaligned memcpy() should be inlined with byte sized +;; loads and stores if ual is turned off. +define void @t2(ptr %out, ptr %in) { +; LA32-LABEL: t2: +; LA32: # %bb.0: # %entry +; LA32-NEXT: ld.b $a2, $a1, 3 +; LA32-NEXT: st.b $a2, $a0, 3 +; LA32-NEXT: ld.b $a2, $a1, 2 +; LA32-NEXT: st.b $a2, $a0, 2 +; LA32-NEXT: ld.b $a2, $a1, 1 +; LA32-NEXT: st.b $a2, $a0, 1 +; LA32-NEXT: ld.b $a1, $a1, 0 +; LA32-NEXT: st.b $a1, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: t2: +; LA64: # %bb.0: # %entry +; LA64-NEXT: ld.b $a2, $a1, 3 +; LA64-NEXT: st.b $a2, $a0, 3 +; LA64-NEXT: ld.b $a2, $a1, 2 +; LA64-NEXT: st.b $a2, $a0, 2 +; LA64-NEXT: ld.b $a2, $a1, 1 +; LA64-NEXT: st.b $a2, $a0, 1 +; LA64-NEXT: ld.b $a1, $a1, 0 +; LA64-NEXT: st.b $a1, $a0, 0 +; LA64-NEXT: ret +entry: + call void @llvm.memcpy.p0.p0.i64(ptr %out, ptr %in, i64 4, i1 false) + ret void +} + +declare void @llvm.memcpy.p0.p0.i64(ptr nocapture, ptr nocapture readonly, i64, i1) diff --git a/llvm/test/CodeGen/LoongArch/vararg.ll b/llvm/test/CodeGen/LoongArch/vararg.ll new file mode 100644 index 0000000000000000000000000000000000000000..90881e2aa4cbd89503d8f6d170c64e3295256afb --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/vararg.ll @@ -0,0 +1,356 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+d --target-abi=lp64d < %s \ +; RUN: | FileCheck --check-prefix=LA64-FPELIM %s +; RUN: llc --mtriple=loongarch64 --mattr=+d --target-abi=lp64d < %s \ +; RUN: --frame-pointer=all < %s \ +; RUN: | FileCheck --check-prefix=LA64-WITHFP %s + +declare void @llvm.va_start(ptr) +declare void @llvm.va_end(ptr) + +declare void @notdead(ptr) + +define i64 @va1(ptr %fmt, ...) { +; LA64-FPELIM-LABEL: va1: +; LA64-FPELIM: # %bb.0: +; LA64-FPELIM-NEXT: addi.d $sp, $sp, -80 +; LA64-FPELIM-NEXT: .cfi_def_cfa_offset 80 +; LA64-FPELIM-NEXT: move $a0, $a1 +; LA64-FPELIM-NEXT: st.d $a7, $sp, 72 +; LA64-FPELIM-NEXT: st.d $a6, $sp, 64 +; LA64-FPELIM-NEXT: st.d $a5, $sp, 56 +; LA64-FPELIM-NEXT: st.d $a4, $sp, 48 +; LA64-FPELIM-NEXT: st.d $a3, $sp, 40 +; LA64-FPELIM-NEXT: st.d $a2, $sp, 32 +; LA64-FPELIM-NEXT: addi.d $a1, $sp, 32 +; LA64-FPELIM-NEXT: st.d $a1, $sp, 8 +; LA64-FPELIM-NEXT: st.d $a0, $sp, 24 +; LA64-FPELIM-NEXT: addi.d $sp, $sp, 80 +; LA64-FPELIM-NEXT: ret +; +; LA64-WITHFP-LABEL: va1: +; LA64-WITHFP: # %bb.0: +; LA64-WITHFP-NEXT: addi.d $sp, $sp, -96 +; LA64-WITHFP-NEXT: .cfi_def_cfa_offset 96 +; LA64-WITHFP-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill +; LA64-WITHFP-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill +; LA64-WITHFP-NEXT: .cfi_offset 1, -72 +; LA64-WITHFP-NEXT: .cfi_offset 22, -80 +; LA64-WITHFP-NEXT: addi.d $fp, $sp, 32 +; LA64-WITHFP-NEXT: .cfi_def_cfa 22, 64 +; LA64-WITHFP-NEXT: move $a0, $a1 +; LA64-WITHFP-NEXT: st.d $a7, $fp, 56 +; LA64-WITHFP-NEXT: st.d $a6, $fp, 48 +; LA64-WITHFP-NEXT: st.d $a5, $fp, 40 +; LA64-WITHFP-NEXT: st.d $a4, $fp, 32 +; LA64-WITHFP-NEXT: st.d $a3, $fp, 24 +; LA64-WITHFP-NEXT: st.d $a2, $fp, 16 +; LA64-WITHFP-NEXT: addi.d $a1, $fp, 16 +; LA64-WITHFP-NEXT: st.d $a1, $fp, -24 +; LA64-WITHFP-NEXT: st.d $a0, $fp, 8 +; LA64-WITHFP-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload +; LA64-WITHFP-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload +; LA64-WITHFP-NEXT: addi.d $sp, $sp, 96 +; LA64-WITHFP-NEXT: ret + %va = alloca ptr, align 8 + call void @llvm.va_start(ptr %va) + %argp.cur = load ptr, ptr %va, align 8 + %argp.next = getelementptr inbounds i64, ptr %argp.cur, i32 1 + store ptr %argp.next, ptr %va, align 8 + %1 = load i64, ptr %argp.cur, align 8 + call void @llvm.va_end(ptr %va) + ret i64 %1 +} + +define i64 @va1_va_arg(ptr %fmt, ...) nounwind { +; LA64-FPELIM-LABEL: va1_va_arg: +; LA64-FPELIM: # %bb.0: +; LA64-FPELIM-NEXT: addi.d $sp, $sp, -80 +; LA64-FPELIM-NEXT: move $a0, $a1 +; LA64-FPELIM-NEXT: st.d $a7, $sp, 72 +; LA64-FPELIM-NEXT: st.d $a6, $sp, 64 +; LA64-FPELIM-NEXT: st.d $a5, $sp, 56 +; LA64-FPELIM-NEXT: st.d $a4, $sp, 48 +; LA64-FPELIM-NEXT: st.d $a3, $sp, 40 +; LA64-FPELIM-NEXT: st.d $a2, $sp, 32 +; LA64-FPELIM-NEXT: addi.d $a1, $sp, 32 +; LA64-FPELIM-NEXT: st.d $a1, $sp, 8 +; LA64-FPELIM-NEXT: st.d $a0, $sp, 24 +; LA64-FPELIM-NEXT: addi.d $sp, $sp, 80 +; LA64-FPELIM-NEXT: ret +; +; LA64-WITHFP-LABEL: va1_va_arg: +; LA64-WITHFP: # %bb.0: +; LA64-WITHFP-NEXT: addi.d $sp, $sp, -96 +; LA64-WITHFP-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill +; LA64-WITHFP-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill +; LA64-WITHFP-NEXT: addi.d $fp, $sp, 32 +; LA64-WITHFP-NEXT: move $a0, $a1 +; LA64-WITHFP-NEXT: st.d $a7, $fp, 56 +; LA64-WITHFP-NEXT: st.d $a6, $fp, 48 +; LA64-WITHFP-NEXT: st.d $a5, $fp, 40 +; LA64-WITHFP-NEXT: st.d $a4, $fp, 32 +; LA64-WITHFP-NEXT: st.d $a3, $fp, 24 +; LA64-WITHFP-NEXT: st.d $a2, $fp, 16 +; LA64-WITHFP-NEXT: addi.d $a1, $fp, 16 +; LA64-WITHFP-NEXT: st.d $a1, $fp, -24 +; LA64-WITHFP-NEXT: st.d $a0, $fp, 8 +; LA64-WITHFP-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload +; LA64-WITHFP-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload +; LA64-WITHFP-NEXT: addi.d $sp, $sp, 96 +; LA64-WITHFP-NEXT: ret + %va = alloca ptr, align 8 + call void @llvm.va_start(ptr %va) + %1 = va_arg ptr %va, i64 + call void @llvm.va_end(ptr %va) + ret i64 %1 +} + +;; Ensure the adjustment when restoring the stack pointer using the frame +;; pointer is correct + +define i64 @va1_va_arg_alloca(ptr %fmt, ...) nounwind { +; LA64-FPELIM-LABEL: va1_va_arg_alloca: +; LA64-FPELIM: # %bb.0: +; LA64-FPELIM-NEXT: addi.d $sp, $sp, -96 +; LA64-FPELIM-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill +; LA64-FPELIM-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill +; LA64-FPELIM-NEXT: st.d $s0, $sp, 8 # 8-byte Folded Spill +; LA64-FPELIM-NEXT: addi.d $fp, $sp, 32 +; LA64-FPELIM-NEXT: move $s0, $a1 +; LA64-FPELIM-NEXT: st.d $a7, $fp, 56 +; LA64-FPELIM-NEXT: st.d $a6, $fp, 48 +; LA64-FPELIM-NEXT: st.d $a5, $fp, 40 +; LA64-FPELIM-NEXT: st.d $a4, $fp, 32 +; LA64-FPELIM-NEXT: st.d $a3, $fp, 24 +; LA64-FPELIM-NEXT: st.d $a2, $fp, 16 +; LA64-FPELIM-NEXT: addi.d $a0, $fp, 16 +; LA64-FPELIM-NEXT: st.d $a0, $fp, -32 +; LA64-FPELIM-NEXT: addi.d $a0, $a1, 15 +; LA64-FPELIM-NEXT: addi.w $a1, $zero, -16 +; LA64-FPELIM-NEXT: and $a0, $a0, $a1 +; LA64-FPELIM-NEXT: st.d $s0, $fp, 8 +; LA64-FPELIM-NEXT: sub.d $a0, $sp, $a0 +; LA64-FPELIM-NEXT: move $sp, $a0 +; LA64-FPELIM-NEXT: bl %plt(notdead) +; LA64-FPELIM-NEXT: move $a0, $s0 +; LA64-FPELIM-NEXT: addi.d $sp, $fp, -32 +; LA64-FPELIM-NEXT: ld.d $s0, $sp, 8 # 8-byte Folded Reload +; LA64-FPELIM-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload +; LA64-FPELIM-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload +; LA64-FPELIM-NEXT: addi.d $sp, $sp, 96 +; LA64-FPELIM-NEXT: ret +; +; LA64-WITHFP-LABEL: va1_va_arg_alloca: +; LA64-WITHFP: # %bb.0: +; LA64-WITHFP-NEXT: addi.d $sp, $sp, -96 +; LA64-WITHFP-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill +; LA64-WITHFP-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill +; LA64-WITHFP-NEXT: st.d $s0, $sp, 8 # 8-byte Folded Spill +; LA64-WITHFP-NEXT: addi.d $fp, $sp, 32 +; LA64-WITHFP-NEXT: move $s0, $a1 +; LA64-WITHFP-NEXT: st.d $a7, $fp, 56 +; LA64-WITHFP-NEXT: st.d $a6, $fp, 48 +; LA64-WITHFP-NEXT: st.d $a5, $fp, 40 +; LA64-WITHFP-NEXT: st.d $a4, $fp, 32 +; LA64-WITHFP-NEXT: st.d $a3, $fp, 24 +; LA64-WITHFP-NEXT: st.d $a2, $fp, 16 +; LA64-WITHFP-NEXT: addi.d $a0, $fp, 16 +; LA64-WITHFP-NEXT: st.d $a0, $fp, -32 +; LA64-WITHFP-NEXT: addi.d $a0, $a1, 15 +; LA64-WITHFP-NEXT: addi.w $a1, $zero, -16 +; LA64-WITHFP-NEXT: and $a0, $a0, $a1 +; LA64-WITHFP-NEXT: st.d $s0, $fp, 8 +; LA64-WITHFP-NEXT: sub.d $a0, $sp, $a0 +; LA64-WITHFP-NEXT: move $sp, $a0 +; LA64-WITHFP-NEXT: bl %plt(notdead) +; LA64-WITHFP-NEXT: move $a0, $s0 +; LA64-WITHFP-NEXT: addi.d $sp, $fp, -32 +; LA64-WITHFP-NEXT: ld.d $s0, $sp, 8 # 8-byte Folded Reload +; LA64-WITHFP-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload +; LA64-WITHFP-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload +; LA64-WITHFP-NEXT: addi.d $sp, $sp, 96 +; LA64-WITHFP-NEXT: ret + %va = alloca ptr, align 8 + call void @llvm.va_start(ptr %va) + %1 = va_arg ptr %va, i64 + %2 = alloca i8, i64 %1 + call void @notdead(ptr %2) + call void @llvm.va_end(ptr %va) + ret i64 %1 +} + +define void @va1_caller() nounwind { +; LA64-FPELIM-LABEL: va1_caller: +; LA64-FPELIM: # %bb.0: +; LA64-FPELIM-NEXT: addi.d $sp, $sp, -16 +; LA64-FPELIM-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-FPELIM-NEXT: lu52i.d $a1, $zero, 1023 +; LA64-FPELIM-NEXT: ori $a2, $zero, 2 +; LA64-FPELIM-NEXT: bl %plt(va1) +; LA64-FPELIM-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-FPELIM-NEXT: addi.d $sp, $sp, 16 +; LA64-FPELIM-NEXT: ret +; +; LA64-WITHFP-LABEL: va1_caller: +; LA64-WITHFP: # %bb.0: +; LA64-WITHFP-NEXT: addi.d $sp, $sp, -16 +; LA64-WITHFP-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-WITHFP-NEXT: st.d $fp, $sp, 0 # 8-byte Folded Spill +; LA64-WITHFP-NEXT: addi.d $fp, $sp, 16 +; LA64-WITHFP-NEXT: lu52i.d $a1, $zero, 1023 +; LA64-WITHFP-NEXT: ori $a2, $zero, 2 +; LA64-WITHFP-NEXT: bl %plt(va1) +; LA64-WITHFP-NEXT: ld.d $fp, $sp, 0 # 8-byte Folded Reload +; LA64-WITHFP-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-WITHFP-NEXT: addi.d $sp, $sp, 16 +; LA64-WITHFP-NEXT: ret + %1 = call i64 (ptr, ...) @va1(ptr undef, double 1.0, i64 2) + ret void +} + +;; Ensure a named 2*GRLen argument is passed in a1 and a2, while the +;; vararg long double is passed in a4 and a5 (rather than a3 and a4) + +declare i64 @va_aligned_register(i64 %a, i128 %b, ...) + +define void @va_aligned_register_caller() nounwind { +; LA64-FPELIM-LABEL: va_aligned_register_caller: +; LA64-FPELIM: # %bb.0: +; LA64-FPELIM-NEXT: addi.d $sp, $sp, -16 +; LA64-FPELIM-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-FPELIM-NEXT: lu12i.w $a0, 335544 +; LA64-FPELIM-NEXT: ori $a0, $a0, 1311 +; LA64-FPELIM-NEXT: lu32i.d $a0, 335544 +; LA64-FPELIM-NEXT: lu52i.d $a4, $a0, -328 +; LA64-FPELIM-NEXT: lu12i.w $a0, -503317 +; LA64-FPELIM-NEXT: ori $a0, $a0, 2129 +; LA64-FPELIM-NEXT: lu32i.d $a0, 37355 +; LA64-FPELIM-NEXT: lu52i.d $a5, $a0, 1024 +; LA64-FPELIM-NEXT: ori $a0, $zero, 2 +; LA64-FPELIM-NEXT: ori $a1, $zero, 1111 +; LA64-FPELIM-NEXT: move $a2, $zero +; LA64-FPELIM-NEXT: bl %plt(va_aligned_register) +; LA64-FPELIM-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-FPELIM-NEXT: addi.d $sp, $sp, 16 +; LA64-FPELIM-NEXT: ret +; +; LA64-WITHFP-LABEL: va_aligned_register_caller: +; LA64-WITHFP: # %bb.0: +; LA64-WITHFP-NEXT: addi.d $sp, $sp, -16 +; LA64-WITHFP-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-WITHFP-NEXT: st.d $fp, $sp, 0 # 8-byte Folded Spill +; LA64-WITHFP-NEXT: addi.d $fp, $sp, 16 +; LA64-WITHFP-NEXT: lu12i.w $a0, 335544 +; LA64-WITHFP-NEXT: ori $a0, $a0, 1311 +; LA64-WITHFP-NEXT: lu32i.d $a0, 335544 +; LA64-WITHFP-NEXT: lu52i.d $a4, $a0, -328 +; LA64-WITHFP-NEXT: lu12i.w $a0, -503317 +; LA64-WITHFP-NEXT: ori $a0, $a0, 2129 +; LA64-WITHFP-NEXT: lu32i.d $a0, 37355 +; LA64-WITHFP-NEXT: lu52i.d $a5, $a0, 1024 +; LA64-WITHFP-NEXT: ori $a0, $zero, 2 +; LA64-WITHFP-NEXT: ori $a1, $zero, 1111 +; LA64-WITHFP-NEXT: move $a2, $zero +; LA64-WITHFP-NEXT: bl %plt(va_aligned_register) +; LA64-WITHFP-NEXT: ld.d $fp, $sp, 0 # 8-byte Folded Reload +; LA64-WITHFP-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-WITHFP-NEXT: addi.d $sp, $sp, 16 +; LA64-WITHFP-NEXT: ret + %1 = call i64 (i64, i128, ...) @va_aligned_register(i64 2, i128 1111, + fp128 0xLEB851EB851EB851F400091EB851EB851) + ret void +} + +;; Check 2*GRLen values are aligned appropriately when passed on the stack +;; in a vararg call + +declare i32 @va_aligned_stack_callee(i32, ...) + +define void @va_aligned_stack_caller() nounwind { +; LA64-FPELIM-LABEL: va_aligned_stack_caller: +; LA64-FPELIM: # %bb.0: +; LA64-FPELIM-NEXT: addi.d $sp, $sp, -112 +; LA64-FPELIM-NEXT: st.d $ra, $sp, 104 # 8-byte Folded Spill +; LA64-FPELIM-NEXT: ori $a0, $zero, 17 +; LA64-FPELIM-NEXT: st.d $a0, $sp, 48 +; LA64-FPELIM-NEXT: ori $a0, $zero, 16 +; LA64-FPELIM-NEXT: st.d $a0, $sp, 40 +; LA64-FPELIM-NEXT: ori $a0, $zero, 15 +; LA64-FPELIM-NEXT: st.d $a0, $sp, 32 +; LA64-FPELIM-NEXT: ori $a0, $zero, 14 +; LA64-FPELIM-NEXT: st.d $a0, $sp, 0 +; LA64-FPELIM-NEXT: lu12i.w $a0, -503317 +; LA64-FPELIM-NEXT: ori $a0, $a0, 2129 +; LA64-FPELIM-NEXT: lu32i.d $a0, 37355 +; LA64-FPELIM-NEXT: lu52i.d $a0, $a0, 1024 +; LA64-FPELIM-NEXT: st.d $a0, $sp, 24 +; LA64-FPELIM-NEXT: lu12i.w $a0, 335544 +; LA64-FPELIM-NEXT: ori $a0, $a0, 1311 +; LA64-FPELIM-NEXT: lu32i.d $a0, 335544 +; LA64-FPELIM-NEXT: lu52i.d $a0, $a0, -328 +; LA64-FPELIM-NEXT: st.d $a0, $sp, 16 +; LA64-FPELIM-NEXT: ori $a0, $zero, 1000 +; LA64-FPELIM-NEXT: st.d $a0, $sp, 64 +; LA64-FPELIM-NEXT: st.d $zero, $sp, 88 +; LA64-FPELIM-NEXT: st.d $zero, $sp, 80 +; LA64-FPELIM-NEXT: st.d $zero, $sp, 72 +; LA64-FPELIM-NEXT: ori $a1, $zero, 11 +; LA64-FPELIM-NEXT: addi.d $a2, $sp, 64 +; LA64-FPELIM-NEXT: ori $a3, $zero, 12 +; LA64-FPELIM-NEXT: ori $a4, $zero, 13 +; LA64-FPELIM-NEXT: ori $a0, $zero, 1 +; LA64-FPELIM-NEXT: move $a6, $zero +; LA64-FPELIM-NEXT: move $a7, $a0 +; LA64-FPELIM-NEXT: bl %plt(va_aligned_stack_callee) +; LA64-FPELIM-NEXT: ld.d $ra, $sp, 104 # 8-byte Folded Reload +; LA64-FPELIM-NEXT: addi.d $sp, $sp, 112 +; LA64-FPELIM-NEXT: ret +; +; LA64-WITHFP-LABEL: va_aligned_stack_caller: +; LA64-WITHFP: # %bb.0: +; LA64-WITHFP-NEXT: addi.d $sp, $sp, -112 +; LA64-WITHFP-NEXT: st.d $ra, $sp, 104 # 8-byte Folded Spill +; LA64-WITHFP-NEXT: st.d $fp, $sp, 96 # 8-byte Folded Spill +; LA64-WITHFP-NEXT: addi.d $fp, $sp, 112 +; LA64-WITHFP-NEXT: ori $a0, $zero, 17 +; LA64-WITHFP-NEXT: st.d $a0, $sp, 48 +; LA64-WITHFP-NEXT: ori $a0, $zero, 16 +; LA64-WITHFP-NEXT: st.d $a0, $sp, 40 +; LA64-WITHFP-NEXT: ori $a0, $zero, 15 +; LA64-WITHFP-NEXT: st.d $a0, $sp, 32 +; LA64-WITHFP-NEXT: ori $a0, $zero, 14 +; LA64-WITHFP-NEXT: st.d $a0, $sp, 0 +; LA64-WITHFP-NEXT: lu12i.w $a0, -503317 +; LA64-WITHFP-NEXT: ori $a0, $a0, 2129 +; LA64-WITHFP-NEXT: lu32i.d $a0, 37355 +; LA64-WITHFP-NEXT: lu52i.d $a0, $a0, 1024 +; LA64-WITHFP-NEXT: st.d $a0, $sp, 24 +; LA64-WITHFP-NEXT: lu12i.w $a0, 335544 +; LA64-WITHFP-NEXT: ori $a0, $a0, 1311 +; LA64-WITHFP-NEXT: lu32i.d $a0, 335544 +; LA64-WITHFP-NEXT: lu52i.d $a0, $a0, -328 +; LA64-WITHFP-NEXT: st.d $a0, $sp, 16 +; LA64-WITHFP-NEXT: ori $a0, $zero, 1000 +; LA64-WITHFP-NEXT: st.d $a0, $fp, -48 +; LA64-WITHFP-NEXT: st.d $zero, $fp, -24 +; LA64-WITHFP-NEXT: st.d $zero, $fp, -32 +; LA64-WITHFP-NEXT: st.d $zero, $fp, -40 +; LA64-WITHFP-NEXT: ori $a1, $zero, 11 +; LA64-WITHFP-NEXT: addi.d $a2, $fp, -48 +; LA64-WITHFP-NEXT: ori $a3, $zero, 12 +; LA64-WITHFP-NEXT: ori $a4, $zero, 13 +; LA64-WITHFP-NEXT: ori $a0, $zero, 1 +; LA64-WITHFP-NEXT: move $a6, $zero +; LA64-WITHFP-NEXT: move $a7, $a0 +; LA64-WITHFP-NEXT: bl %plt(va_aligned_stack_callee) +; LA64-WITHFP-NEXT: ld.d $fp, $sp, 96 # 8-byte Folded Reload +; LA64-WITHFP-NEXT: ld.d $ra, $sp, 104 # 8-byte Folded Reload +; LA64-WITHFP-NEXT: addi.d $sp, $sp, 112 +; LA64-WITHFP-NEXT: ret + %1 = call i32 (i32, ...) @va_aligned_stack_callee(i32 1, i32 11, + i256 1000, i32 12, i32 13, i128 18446744073709551616, i32 14, + fp128 0xLEB851EB851EB851F400091EB851EB851, i64 15, + [2 x i64] [i64 16, i64 17]) + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/vector-fp-imm.ll b/llvm/test/CodeGen/LoongArch/vector-fp-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..8009866d3953e19bc30e9ca0c1d6bde2b4951ccd --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/vector-fp-imm.ll @@ -0,0 +1,895 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+f,-d < %s | FileCheck %s --check-prefix=LA32F +; RUN: llc --mtriple=loongarch32 --mattr=+d < %s | FileCheck %s --check-prefix=LA32D +; RUN: llc --mtriple=loongarch64 --mattr=+f,-d < %s | FileCheck %s --check-prefix=LA64F +; RUN: llc --mtriple=loongarch64 --mattr=+d < %s | FileCheck %s --check-prefix=LA64D + +;; TODO: Merge the offset of address calculation into the offset field of instructions. + +%f2 = type <2 x float> +%f4 = type <4 x float> +%f8 = type <8 x float> +%d2 = type <2 x double> +%d4 = type <4 x double> +%d8 = type <8 x double> + +define void @test_zero(ptr %P, ptr %S) nounwind { +; LA32F-LABEL: test_zero: +; LA32F: # %bb.0: +; LA32F-NEXT: fld.s $fa0, $a0, 12 +; LA32F-NEXT: movgr2fr.w $fa1, $zero +; LA32F-NEXT: fadd.s $fa0, $fa0, $fa1 +; LA32F-NEXT: fst.s $fa0, $a1, 12 +; LA32F-NEXT: fld.s $fa0, $a0, 8 +; LA32F-NEXT: fadd.s $fa0, $fa0, $fa1 +; LA32F-NEXT: fst.s $fa0, $a1, 8 +; LA32F-NEXT: fld.s $fa0, $a0, 4 +; LA32F-NEXT: fadd.s $fa0, $fa0, $fa1 +; LA32F-NEXT: fst.s $fa0, $a1, 4 +; LA32F-NEXT: fld.s $fa0, $a0, 0 +; LA32F-NEXT: fadd.s $fa0, $fa0, $fa1 +; LA32F-NEXT: fst.s $fa0, $a1, 0 +; LA32F-NEXT: ret +; +; LA32D-LABEL: test_zero: +; LA32D: # %bb.0: +; LA32D-NEXT: fld.s $fa0, $a0, 12 +; LA32D-NEXT: movgr2fr.w $fa1, $zero +; LA32D-NEXT: fadd.s $fa0, $fa0, $fa1 +; LA32D-NEXT: fst.s $fa0, $a1, 12 +; LA32D-NEXT: fld.s $fa0, $a0, 8 +; LA32D-NEXT: fadd.s $fa0, $fa0, $fa1 +; LA32D-NEXT: fst.s $fa0, $a1, 8 +; LA32D-NEXT: fld.s $fa0, $a0, 4 +; LA32D-NEXT: fadd.s $fa0, $fa0, $fa1 +; LA32D-NEXT: fst.s $fa0, $a1, 4 +; LA32D-NEXT: fld.s $fa0, $a0, 0 +; LA32D-NEXT: fadd.s $fa0, $fa0, $fa1 +; LA32D-NEXT: fst.s $fa0, $a1, 0 +; LA32D-NEXT: ret +; +; LA64F-LABEL: test_zero: +; LA64F: # %bb.0: +; LA64F-NEXT: fld.s $fa0, $a0, 12 +; LA64F-NEXT: movgr2fr.w $fa1, $zero +; LA64F-NEXT: fadd.s $fa0, $fa0, $fa1 +; LA64F-NEXT: fst.s $fa0, $a1, 12 +; LA64F-NEXT: fld.s $fa0, $a0, 8 +; LA64F-NEXT: fadd.s $fa0, $fa0, $fa1 +; LA64F-NEXT: fst.s $fa0, $a1, 8 +; LA64F-NEXT: fld.s $fa0, $a0, 4 +; LA64F-NEXT: fadd.s $fa0, $fa0, $fa1 +; LA64F-NEXT: fst.s $fa0, $a1, 4 +; LA64F-NEXT: fld.s $fa0, $a0, 0 +; LA64F-NEXT: fadd.s $fa0, $fa0, $fa1 +; LA64F-NEXT: fst.s $fa0, $a1, 0 +; LA64F-NEXT: ret +; +; LA64D-LABEL: test_zero: +; LA64D: # %bb.0: +; LA64D-NEXT: fld.s $fa0, $a0, 12 +; LA64D-NEXT: movgr2fr.w $fa1, $zero +; LA64D-NEXT: fadd.s $fa0, $fa0, $fa1 +; LA64D-NEXT: fst.s $fa0, $a1, 12 +; LA64D-NEXT: fld.s $fa0, $a0, 8 +; LA64D-NEXT: fadd.s $fa0, $fa0, $fa1 +; LA64D-NEXT: fst.s $fa0, $a1, 8 +; LA64D-NEXT: fld.s $fa0, $a0, 4 +; LA64D-NEXT: fadd.s $fa0, $fa0, $fa1 +; LA64D-NEXT: fst.s $fa0, $a1, 4 +; LA64D-NEXT: fld.s $fa0, $a0, 0 +; LA64D-NEXT: fadd.s $fa0, $fa0, $fa1 +; LA64D-NEXT: fst.s $fa0, $a1, 0 +; LA64D-NEXT: ret + %p = load %f4, ptr %P + %R = fadd %f4 %p, zeroinitializer + store %f4 %R, ptr %S + ret void +} + +define void @test_f2(ptr %P, ptr %S) nounwind { +; LA32F-LABEL: test_f2: +; LA32F: # %bb.0: +; LA32F-NEXT: pcalau12i $a2, %pc_hi20(.LCPI1_0) +; LA32F-NEXT: addi.w $a2, $a2, %pc_lo12(.LCPI1_0) +; LA32F-NEXT: fld.s $fa0, $a2, 0 +; LA32F-NEXT: fld.s $fa1, $a0, 4 +; LA32F-NEXT: fadd.s $fa0, $fa1, $fa0 +; LA32F-NEXT: fst.s $fa0, $a1, 4 +; LA32F-NEXT: fld.s $fa0, $a0, 0 +; LA32F-NEXT: addi.w $a0, $zero, 1 +; LA32F-NEXT: movgr2fr.w $fa1, $a0 +; LA32F-NEXT: ffint.s.w $fa1, $fa1 +; LA32F-NEXT: fadd.s $fa0, $fa0, $fa1 +; LA32F-NEXT: fst.s $fa0, $a1, 0 +; LA32F-NEXT: ret +; +; LA32D-LABEL: test_f2: +; LA32D: # %bb.0: +; LA32D-NEXT: pcalau12i $a2, %pc_hi20(.LCPI1_0) +; LA32D-NEXT: addi.w $a2, $a2, %pc_lo12(.LCPI1_0) +; LA32D-NEXT: fld.s $fa0, $a2, 0 +; LA32D-NEXT: fld.s $fa1, $a0, 4 +; LA32D-NEXT: fadd.s $fa0, $fa1, $fa0 +; LA32D-NEXT: fst.s $fa0, $a1, 4 +; LA32D-NEXT: fld.s $fa0, $a0, 0 +; LA32D-NEXT: addi.w $a0, $zero, 1 +; LA32D-NEXT: movgr2fr.w $fa1, $a0 +; LA32D-NEXT: ffint.s.w $fa1, $fa1 +; LA32D-NEXT: fadd.s $fa0, $fa0, $fa1 +; LA32D-NEXT: fst.s $fa0, $a1, 0 +; LA32D-NEXT: ret +; +; LA64F-LABEL: test_f2: +; LA64F: # %bb.0: +; LA64F-NEXT: pcalau12i $a2, %pc_hi20(.LCPI1_0) +; LA64F-NEXT: addi.d $a2, $a2, %pc_lo12(.LCPI1_0) +; LA64F-NEXT: fld.s $fa0, $a2, 0 +; LA64F-NEXT: fld.s $fa1, $a0, 4 +; LA64F-NEXT: fadd.s $fa0, $fa1, $fa0 +; LA64F-NEXT: fst.s $fa0, $a1, 4 +; LA64F-NEXT: fld.s $fa0, $a0, 0 +; LA64F-NEXT: addi.w $a0, $zero, 1 +; LA64F-NEXT: movgr2fr.w $fa1, $a0 +; LA64F-NEXT: ffint.s.w $fa1, $fa1 +; LA64F-NEXT: fadd.s $fa0, $fa0, $fa1 +; LA64F-NEXT: fst.s $fa0, $a1, 0 +; LA64F-NEXT: ret +; +; LA64D-LABEL: test_f2: +; LA64D: # %bb.0: +; LA64D-NEXT: pcalau12i $a2, %pc_hi20(.LCPI1_0) +; LA64D-NEXT: addi.d $a2, $a2, %pc_lo12(.LCPI1_0) +; LA64D-NEXT: fld.s $fa0, $a2, 0 +; LA64D-NEXT: fld.s $fa1, $a0, 4 +; LA64D-NEXT: fadd.s $fa0, $fa1, $fa0 +; LA64D-NEXT: fst.s $fa0, $a1, 4 +; LA64D-NEXT: fld.s $fa0, $a0, 0 +; LA64D-NEXT: addi.w $a0, $zero, 1 +; LA64D-NEXT: movgr2fr.w $fa1, $a0 +; LA64D-NEXT: ffint.s.w $fa1, $fa1 +; LA64D-NEXT: fadd.s $fa0, $fa0, $fa1 +; LA64D-NEXT: fst.s $fa0, $a1, 0 +; LA64D-NEXT: ret + %p = load %f2, ptr %P + %R = fadd %f2 %p, < float 1.000000e+00, float 2.000000e+00 > + store %f2 %R, ptr %S + ret void +} + +define void @test_f4(ptr %P, ptr %S) nounwind { +; LA32F-LABEL: test_f4: +; LA32F: # %bb.0: +; LA32F-NEXT: pcalau12i $a2, %pc_hi20(.LCPI2_0) +; LA32F-NEXT: addi.w $a2, $a2, %pc_lo12(.LCPI2_0) +; LA32F-NEXT: fld.s $fa0, $a2, 0 +; LA32F-NEXT: fld.s $fa1, $a0, 4 +; LA32F-NEXT: fadd.s $fa0, $fa1, $fa0 +; LA32F-NEXT: pcalau12i $a2, %pc_hi20(.LCPI2_1) +; LA32F-NEXT: addi.w $a2, $a2, %pc_lo12(.LCPI2_1) +; LA32F-NEXT: fld.s $fa1, $a2, 0 +; LA32F-NEXT: fld.s $fa2, $a0, 8 +; LA32F-NEXT: fadd.s $fa1, $fa2, $fa1 +; LA32F-NEXT: pcalau12i $a2, %pc_hi20(.LCPI2_2) +; LA32F-NEXT: addi.w $a2, $a2, %pc_lo12(.LCPI2_2) +; LA32F-NEXT: fld.s $fa2, $a2, 0 +; LA32F-NEXT: fld.s $fa3, $a0, 12 +; LA32F-NEXT: fadd.s $fa2, $fa3, $fa2 +; LA32F-NEXT: fst.s $fa2, $a1, 12 +; LA32F-NEXT: fst.s $fa1, $a1, 8 +; LA32F-NEXT: fst.s $fa0, $a1, 4 +; LA32F-NEXT: fld.s $fa0, $a0, 0 +; LA32F-NEXT: addi.w $a0, $zero, 1 +; LA32F-NEXT: movgr2fr.w $fa1, $a0 +; LA32F-NEXT: ffint.s.w $fa1, $fa1 +; LA32F-NEXT: fadd.s $fa0, $fa0, $fa1 +; LA32F-NEXT: fst.s $fa0, $a1, 0 +; LA32F-NEXT: ret +; +; LA32D-LABEL: test_f4: +; LA32D: # %bb.0: +; LA32D-NEXT: pcalau12i $a2, %pc_hi20(.LCPI2_0) +; LA32D-NEXT: addi.w $a2, $a2, %pc_lo12(.LCPI2_0) +; LA32D-NEXT: fld.s $fa0, $a2, 0 +; LA32D-NEXT: fld.s $fa1, $a0, 4 +; LA32D-NEXT: fadd.s $fa0, $fa1, $fa0 +; LA32D-NEXT: pcalau12i $a2, %pc_hi20(.LCPI2_1) +; LA32D-NEXT: addi.w $a2, $a2, %pc_lo12(.LCPI2_1) +; LA32D-NEXT: fld.s $fa1, $a2, 0 +; LA32D-NEXT: fld.s $fa2, $a0, 8 +; LA32D-NEXT: fadd.s $fa1, $fa2, $fa1 +; LA32D-NEXT: pcalau12i $a2, %pc_hi20(.LCPI2_2) +; LA32D-NEXT: addi.w $a2, $a2, %pc_lo12(.LCPI2_2) +; LA32D-NEXT: fld.s $fa2, $a2, 0 +; LA32D-NEXT: fld.s $fa3, $a0, 12 +; LA32D-NEXT: fadd.s $fa2, $fa3, $fa2 +; LA32D-NEXT: fst.s $fa2, $a1, 12 +; LA32D-NEXT: fst.s $fa1, $a1, 8 +; LA32D-NEXT: fst.s $fa0, $a1, 4 +; LA32D-NEXT: fld.s $fa0, $a0, 0 +; LA32D-NEXT: addi.w $a0, $zero, 1 +; LA32D-NEXT: movgr2fr.w $fa1, $a0 +; LA32D-NEXT: ffint.s.w $fa1, $fa1 +; LA32D-NEXT: fadd.s $fa0, $fa0, $fa1 +; LA32D-NEXT: fst.s $fa0, $a1, 0 +; LA32D-NEXT: ret +; +; LA64F-LABEL: test_f4: +; LA64F: # %bb.0: +; LA64F-NEXT: pcalau12i $a2, %pc_hi20(.LCPI2_0) +; LA64F-NEXT: addi.d $a2, $a2, %pc_lo12(.LCPI2_0) +; LA64F-NEXT: fld.s $fa0, $a2, 0 +; LA64F-NEXT: fld.s $fa1, $a0, 4 +; LA64F-NEXT: fadd.s $fa0, $fa1, $fa0 +; LA64F-NEXT: pcalau12i $a2, %pc_hi20(.LCPI2_1) +; LA64F-NEXT: addi.d $a2, $a2, %pc_lo12(.LCPI2_1) +; LA64F-NEXT: fld.s $fa1, $a2, 0 +; LA64F-NEXT: fld.s $fa2, $a0, 8 +; LA64F-NEXT: fadd.s $fa1, $fa2, $fa1 +; LA64F-NEXT: pcalau12i $a2, %pc_hi20(.LCPI2_2) +; LA64F-NEXT: addi.d $a2, $a2, %pc_lo12(.LCPI2_2) +; LA64F-NEXT: fld.s $fa2, $a2, 0 +; LA64F-NEXT: fld.s $fa3, $a0, 12 +; LA64F-NEXT: fadd.s $fa2, $fa3, $fa2 +; LA64F-NEXT: fst.s $fa2, $a1, 12 +; LA64F-NEXT: fst.s $fa1, $a1, 8 +; LA64F-NEXT: fst.s $fa0, $a1, 4 +; LA64F-NEXT: fld.s $fa0, $a0, 0 +; LA64F-NEXT: addi.w $a0, $zero, 1 +; LA64F-NEXT: movgr2fr.w $fa1, $a0 +; LA64F-NEXT: ffint.s.w $fa1, $fa1 +; LA64F-NEXT: fadd.s $fa0, $fa0, $fa1 +; LA64F-NEXT: fst.s $fa0, $a1, 0 +; LA64F-NEXT: ret +; +; LA64D-LABEL: test_f4: +; LA64D: # %bb.0: +; LA64D-NEXT: pcalau12i $a2, %pc_hi20(.LCPI2_0) +; LA64D-NEXT: addi.d $a2, $a2, %pc_lo12(.LCPI2_0) +; LA64D-NEXT: fld.s $fa0, $a2, 0 +; LA64D-NEXT: fld.s $fa1, $a0, 4 +; LA64D-NEXT: fadd.s $fa0, $fa1, $fa0 +; LA64D-NEXT: pcalau12i $a2, %pc_hi20(.LCPI2_1) +; LA64D-NEXT: addi.d $a2, $a2, %pc_lo12(.LCPI2_1) +; LA64D-NEXT: fld.s $fa1, $a2, 0 +; LA64D-NEXT: fld.s $fa2, $a0, 8 +; LA64D-NEXT: fadd.s $fa1, $fa2, $fa1 +; LA64D-NEXT: pcalau12i $a2, %pc_hi20(.LCPI2_2) +; LA64D-NEXT: addi.d $a2, $a2, %pc_lo12(.LCPI2_2) +; LA64D-NEXT: fld.s $fa2, $a2, 0 +; LA64D-NEXT: fld.s $fa3, $a0, 12 +; LA64D-NEXT: fadd.s $fa2, $fa3, $fa2 +; LA64D-NEXT: fst.s $fa2, $a1, 12 +; LA64D-NEXT: fst.s $fa1, $a1, 8 +; LA64D-NEXT: fst.s $fa0, $a1, 4 +; LA64D-NEXT: fld.s $fa0, $a0, 0 +; LA64D-NEXT: addi.w $a0, $zero, 1 +; LA64D-NEXT: movgr2fr.w $fa1, $a0 +; LA64D-NEXT: ffint.s.w $fa1, $fa1 +; LA64D-NEXT: fadd.s $fa0, $fa0, $fa1 +; LA64D-NEXT: fst.s $fa0, $a1, 0 +; LA64D-NEXT: ret + %p = load %f4, ptr %P + %R = fadd %f4 %p, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 > + store %f4 %R, ptr %S + ret void +} + +define void @test_f8(ptr %P, ptr %S) nounwind { +; LA32F-LABEL: test_f8: +; LA32F: # %bb.0: +; LA32F-NEXT: pcalau12i $a2, %pc_hi20(.LCPI3_0) +; LA32F-NEXT: addi.w $a2, $a2, %pc_lo12(.LCPI3_0) +; LA32F-NEXT: fld.s $fa0, $a2, 0 +; LA32F-NEXT: fld.s $fa1, $a0, 4 +; LA32F-NEXT: fadd.s $fa1, $fa1, $fa0 +; LA32F-NEXT: fld.s $fa2, $a0, 20 +; LA32F-NEXT: fadd.s $fa0, $fa2, $fa0 +; LA32F-NEXT: pcalau12i $a2, %pc_hi20(.LCPI3_1) +; LA32F-NEXT: addi.w $a2, $a2, %pc_lo12(.LCPI3_1) +; LA32F-NEXT: fld.s $fa2, $a2, 0 +; LA32F-NEXT: fld.s $fa3, $a0, 8 +; LA32F-NEXT: fadd.s $fa3, $fa3, $fa2 +; LA32F-NEXT: fld.s $fa4, $a0, 24 +; LA32F-NEXT: fadd.s $fa2, $fa4, $fa2 +; LA32F-NEXT: pcalau12i $a2, %pc_hi20(.LCPI3_2) +; LA32F-NEXT: addi.w $a2, $a2, %pc_lo12(.LCPI3_2) +; LA32F-NEXT: fld.s $fa4, $a2, 0 +; LA32F-NEXT: fld.s $fa5, $a0, 12 +; LA32F-NEXT: fadd.s $fa5, $fa5, $fa4 +; LA32F-NEXT: fld.s $fa6, $a0, 28 +; LA32F-NEXT: fadd.s $fa4, $fa6, $fa4 +; LA32F-NEXT: fst.s $fa4, $a1, 28 +; LA32F-NEXT: fst.s $fa2, $a1, 24 +; LA32F-NEXT: fst.s $fa0, $a1, 20 +; LA32F-NEXT: fst.s $fa5, $a1, 12 +; LA32F-NEXT: fst.s $fa3, $a1, 8 +; LA32F-NEXT: fst.s $fa1, $a1, 4 +; LA32F-NEXT: addi.w $a2, $zero, 1 +; LA32F-NEXT: movgr2fr.w $fa0, $a2 +; LA32F-NEXT: ffint.s.w $fa0, $fa0 +; LA32F-NEXT: fld.s $fa1, $a0, 16 +; LA32F-NEXT: fadd.s $fa1, $fa1, $fa0 +; LA32F-NEXT: fst.s $fa1, $a1, 16 +; LA32F-NEXT: fld.s $fa1, $a0, 0 +; LA32F-NEXT: fadd.s $fa0, $fa1, $fa0 +; LA32F-NEXT: fst.s $fa0, $a1, 0 +; LA32F-NEXT: ret +; +; LA32D-LABEL: test_f8: +; LA32D: # %bb.0: +; LA32D-NEXT: pcalau12i $a2, %pc_hi20(.LCPI3_0) +; LA32D-NEXT: addi.w $a2, $a2, %pc_lo12(.LCPI3_0) +; LA32D-NEXT: fld.s $fa0, $a2, 0 +; LA32D-NEXT: fld.s $fa1, $a0, 4 +; LA32D-NEXT: fadd.s $fa1, $fa1, $fa0 +; LA32D-NEXT: fld.s $fa2, $a0, 20 +; LA32D-NEXT: fadd.s $fa0, $fa2, $fa0 +; LA32D-NEXT: pcalau12i $a2, %pc_hi20(.LCPI3_1) +; LA32D-NEXT: addi.w $a2, $a2, %pc_lo12(.LCPI3_1) +; LA32D-NEXT: fld.s $fa2, $a2, 0 +; LA32D-NEXT: fld.s $fa3, $a0, 8 +; LA32D-NEXT: fadd.s $fa3, $fa3, $fa2 +; LA32D-NEXT: fld.s $fa4, $a0, 24 +; LA32D-NEXT: fadd.s $fa2, $fa4, $fa2 +; LA32D-NEXT: pcalau12i $a2, %pc_hi20(.LCPI3_2) +; LA32D-NEXT: addi.w $a2, $a2, %pc_lo12(.LCPI3_2) +; LA32D-NEXT: fld.s $fa4, $a2, 0 +; LA32D-NEXT: fld.s $fa5, $a0, 12 +; LA32D-NEXT: fadd.s $fa5, $fa5, $fa4 +; LA32D-NEXT: fld.s $fa6, $a0, 28 +; LA32D-NEXT: fadd.s $fa4, $fa6, $fa4 +; LA32D-NEXT: fst.s $fa4, $a1, 28 +; LA32D-NEXT: fst.s $fa2, $a1, 24 +; LA32D-NEXT: fst.s $fa0, $a1, 20 +; LA32D-NEXT: fst.s $fa5, $a1, 12 +; LA32D-NEXT: fst.s $fa3, $a1, 8 +; LA32D-NEXT: fst.s $fa1, $a1, 4 +; LA32D-NEXT: addi.w $a2, $zero, 1 +; LA32D-NEXT: movgr2fr.w $fa0, $a2 +; LA32D-NEXT: ffint.s.w $fa0, $fa0 +; LA32D-NEXT: fld.s $fa1, $a0, 16 +; LA32D-NEXT: fadd.s $fa1, $fa1, $fa0 +; LA32D-NEXT: fst.s $fa1, $a1, 16 +; LA32D-NEXT: fld.s $fa1, $a0, 0 +; LA32D-NEXT: fadd.s $fa0, $fa1, $fa0 +; LA32D-NEXT: fst.s $fa0, $a1, 0 +; LA32D-NEXT: ret +; +; LA64F-LABEL: test_f8: +; LA64F: # %bb.0: +; LA64F-NEXT: pcalau12i $a2, %pc_hi20(.LCPI3_0) +; LA64F-NEXT: addi.d $a2, $a2, %pc_lo12(.LCPI3_0) +; LA64F-NEXT: fld.s $fa0, $a2, 0 +; LA64F-NEXT: fld.s $fa1, $a0, 4 +; LA64F-NEXT: fadd.s $fa1, $fa1, $fa0 +; LA64F-NEXT: fld.s $fa2, $a0, 20 +; LA64F-NEXT: fadd.s $fa0, $fa2, $fa0 +; LA64F-NEXT: pcalau12i $a2, %pc_hi20(.LCPI3_1) +; LA64F-NEXT: addi.d $a2, $a2, %pc_lo12(.LCPI3_1) +; LA64F-NEXT: fld.s $fa2, $a2, 0 +; LA64F-NEXT: fld.s $fa3, $a0, 8 +; LA64F-NEXT: fadd.s $fa3, $fa3, $fa2 +; LA64F-NEXT: fld.s $fa4, $a0, 24 +; LA64F-NEXT: fadd.s $fa2, $fa4, $fa2 +; LA64F-NEXT: pcalau12i $a2, %pc_hi20(.LCPI3_2) +; LA64F-NEXT: addi.d $a2, $a2, %pc_lo12(.LCPI3_2) +; LA64F-NEXT: fld.s $fa4, $a2, 0 +; LA64F-NEXT: fld.s $fa5, $a0, 12 +; LA64F-NEXT: fadd.s $fa5, $fa5, $fa4 +; LA64F-NEXT: fld.s $fa6, $a0, 28 +; LA64F-NEXT: fadd.s $fa4, $fa6, $fa4 +; LA64F-NEXT: fst.s $fa4, $a1, 28 +; LA64F-NEXT: fst.s $fa2, $a1, 24 +; LA64F-NEXT: fst.s $fa0, $a1, 20 +; LA64F-NEXT: fst.s $fa5, $a1, 12 +; LA64F-NEXT: fst.s $fa3, $a1, 8 +; LA64F-NEXT: fst.s $fa1, $a1, 4 +; LA64F-NEXT: addi.w $a2, $zero, 1 +; LA64F-NEXT: movgr2fr.w $fa0, $a2 +; LA64F-NEXT: ffint.s.w $fa0, $fa0 +; LA64F-NEXT: fld.s $fa1, $a0, 16 +; LA64F-NEXT: fadd.s $fa1, $fa1, $fa0 +; LA64F-NEXT: fst.s $fa1, $a1, 16 +; LA64F-NEXT: fld.s $fa1, $a0, 0 +; LA64F-NEXT: fadd.s $fa0, $fa1, $fa0 +; LA64F-NEXT: fst.s $fa0, $a1, 0 +; LA64F-NEXT: ret +; +; LA64D-LABEL: test_f8: +; LA64D: # %bb.0: +; LA64D-NEXT: pcalau12i $a2, %pc_hi20(.LCPI3_0) +; LA64D-NEXT: addi.d $a2, $a2, %pc_lo12(.LCPI3_0) +; LA64D-NEXT: fld.s $fa0, $a2, 0 +; LA64D-NEXT: fld.s $fa1, $a0, 4 +; LA64D-NEXT: fadd.s $fa1, $fa1, $fa0 +; LA64D-NEXT: fld.s $fa2, $a0, 20 +; LA64D-NEXT: fadd.s $fa0, $fa2, $fa0 +; LA64D-NEXT: pcalau12i $a2, %pc_hi20(.LCPI3_1) +; LA64D-NEXT: addi.d $a2, $a2, %pc_lo12(.LCPI3_1) +; LA64D-NEXT: fld.s $fa2, $a2, 0 +; LA64D-NEXT: fld.s $fa3, $a0, 8 +; LA64D-NEXT: fadd.s $fa3, $fa3, $fa2 +; LA64D-NEXT: fld.s $fa4, $a0, 24 +; LA64D-NEXT: fadd.s $fa2, $fa4, $fa2 +; LA64D-NEXT: pcalau12i $a2, %pc_hi20(.LCPI3_2) +; LA64D-NEXT: addi.d $a2, $a2, %pc_lo12(.LCPI3_2) +; LA64D-NEXT: fld.s $fa4, $a2, 0 +; LA64D-NEXT: fld.s $fa5, $a0, 12 +; LA64D-NEXT: fadd.s $fa5, $fa5, $fa4 +; LA64D-NEXT: fld.s $fa6, $a0, 28 +; LA64D-NEXT: fadd.s $fa4, $fa6, $fa4 +; LA64D-NEXT: fst.s $fa4, $a1, 28 +; LA64D-NEXT: fst.s $fa2, $a1, 24 +; LA64D-NEXT: fst.s $fa0, $a1, 20 +; LA64D-NEXT: fst.s $fa5, $a1, 12 +; LA64D-NEXT: fst.s $fa3, $a1, 8 +; LA64D-NEXT: fst.s $fa1, $a1, 4 +; LA64D-NEXT: addi.w $a2, $zero, 1 +; LA64D-NEXT: movgr2fr.w $fa0, $a2 +; LA64D-NEXT: ffint.s.w $fa0, $fa0 +; LA64D-NEXT: fld.s $fa1, $a0, 16 +; LA64D-NEXT: fadd.s $fa1, $fa1, $fa0 +; LA64D-NEXT: fst.s $fa1, $a1, 16 +; LA64D-NEXT: fld.s $fa1, $a0, 0 +; LA64D-NEXT: fadd.s $fa0, $fa1, $fa0 +; LA64D-NEXT: fst.s $fa0, $a1, 0 +; LA64D-NEXT: ret + %p = load %f8, ptr %P + %R = fadd %f8 %p, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 > + store %f8 %R, ptr %S + ret void +} + +define void @test_d2(ptr %P, ptr %S) nounwind { +; LA32F-LABEL: test_d2: +; LA32F: # %bb.0: +; LA32F-NEXT: addi.w $sp, $sp, -16 +; LA32F-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32F-NEXT: st.w $fp, $sp, 8 # 4-byte Folded Spill +; LA32F-NEXT: st.w $s0, $sp, 4 # 4-byte Folded Spill +; LA32F-NEXT: move $fp, $a1 +; LA32F-NEXT: move $s0, $a0 +; LA32F-NEXT: ld.w $a0, $a0, 8 +; LA32F-NEXT: ld.w $a1, $s0, 12 +; LA32F-NEXT: lu12i.w $a3, 262144 +; LA32F-NEXT: move $a2, $zero +; LA32F-NEXT: bl %plt(__adddf3) +; LA32F-NEXT: st.w $a0, $fp, 8 +; LA32F-NEXT: st.w $a1, $fp, 12 +; LA32F-NEXT: ld.w $a0, $s0, 0 +; LA32F-NEXT: ld.w $a1, $s0, 4 +; LA32F-NEXT: lu12i.w $a3, 261888 +; LA32F-NEXT: move $a2, $zero +; LA32F-NEXT: bl %plt(__adddf3) +; LA32F-NEXT: st.w $a0, $fp, 0 +; LA32F-NEXT: st.w $a1, $fp, 4 +; LA32F-NEXT: ld.w $s0, $sp, 4 # 4-byte Folded Reload +; LA32F-NEXT: ld.w $fp, $sp, 8 # 4-byte Folded Reload +; LA32F-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32F-NEXT: addi.w $sp, $sp, 16 +; LA32F-NEXT: ret +; +; LA32D-LABEL: test_d2: +; LA32D: # %bb.0: +; LA32D-NEXT: pcalau12i $a2, %pc_hi20(.LCPI4_0) +; LA32D-NEXT: addi.w $a2, $a2, %pc_lo12(.LCPI4_0) +; LA32D-NEXT: fld.d $fa0, $a2, 0 +; LA32D-NEXT: fld.d $fa1, $a0, 8 +; LA32D-NEXT: fadd.d $fa0, $fa1, $fa0 +; LA32D-NEXT: fst.d $fa0, $a1, 8 +; LA32D-NEXT: fld.d $fa0, $a0, 0 +; LA32D-NEXT: addi.w $a0, $zero, 1 +; LA32D-NEXT: movgr2fr.w $fa1, $a0 +; LA32D-NEXT: ffint.s.w $fa1, $fa1 +; LA32D-NEXT: fcvt.d.s $fa1, $fa1 +; LA32D-NEXT: fadd.d $fa0, $fa0, $fa1 +; LA32D-NEXT: fst.d $fa0, $a1, 0 +; LA32D-NEXT: ret +; +; LA64F-LABEL: test_d2: +; LA64F: # %bb.0: +; LA64F-NEXT: addi.d $sp, $sp, -32 +; LA64F-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill +; LA64F-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s0, $sp, 8 # 8-byte Folded Spill +; LA64F-NEXT: move $fp, $a1 +; LA64F-NEXT: move $s0, $a0 +; LA64F-NEXT: ld.d $a0, $a0, 8 +; LA64F-NEXT: lu52i.d $a1, $zero, 1024 +; LA64F-NEXT: bl %plt(__adddf3) +; LA64F-NEXT: st.d $a0, $fp, 8 +; LA64F-NEXT: ld.d $a0, $s0, 0 +; LA64F-NEXT: lu52i.d $a1, $zero, 1023 +; LA64F-NEXT: bl %plt(__adddf3) +; LA64F-NEXT: st.d $a0, $fp, 0 +; LA64F-NEXT: ld.d $s0, $sp, 8 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload +; LA64F-NEXT: addi.d $sp, $sp, 32 +; LA64F-NEXT: ret +; +; LA64D-LABEL: test_d2: +; LA64D: # %bb.0: +; LA64D-NEXT: pcalau12i $a2, %pc_hi20(.LCPI4_0) +; LA64D-NEXT: addi.d $a2, $a2, %pc_lo12(.LCPI4_0) +; LA64D-NEXT: fld.d $fa0, $a2, 0 +; LA64D-NEXT: fld.d $fa1, $a0, 8 +; LA64D-NEXT: fadd.d $fa0, $fa1, $fa0 +; LA64D-NEXT: fst.d $fa0, $a1, 8 +; LA64D-NEXT: fld.d $fa0, $a0, 0 +; LA64D-NEXT: addi.d $a0, $zero, 1 +; LA64D-NEXT: movgr2fr.d $fa1, $a0 +; LA64D-NEXT: ffint.d.l $fa1, $fa1 +; LA64D-NEXT: fadd.d $fa0, $fa0, $fa1 +; LA64D-NEXT: fst.d $fa0, $a1, 0 +; LA64D-NEXT: ret + %p = load %d2, ptr %P + %R = fadd %d2 %p, < double 1.000000e+00, double 2.000000e+00 > + store %d2 %R, ptr %S + ret void +} + +define void @test_d4(ptr %P, ptr %S) nounwind { +; LA32F-LABEL: test_d4: +; LA32F: # %bb.0: +; LA32F-NEXT: addi.w $sp, $sp, -16 +; LA32F-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32F-NEXT: st.w $fp, $sp, 8 # 4-byte Folded Spill +; LA32F-NEXT: st.w $s0, $sp, 4 # 4-byte Folded Spill +; LA32F-NEXT: move $fp, $a1 +; LA32F-NEXT: move $s0, $a0 +; LA32F-NEXT: ld.w $a0, $a0, 24 +; LA32F-NEXT: ld.w $a1, $s0, 28 +; LA32F-NEXT: lu12i.w $a3, 262400 +; LA32F-NEXT: move $a2, $zero +; LA32F-NEXT: bl %plt(__adddf3) +; LA32F-NEXT: st.w $a0, $fp, 24 +; LA32F-NEXT: st.w $a1, $fp, 28 +; LA32F-NEXT: ld.w $a0, $s0, 16 +; LA32F-NEXT: ld.w $a1, $s0, 20 +; LA32F-NEXT: lu12i.w $a3, 262272 +; LA32F-NEXT: move $a2, $zero +; LA32F-NEXT: bl %plt(__adddf3) +; LA32F-NEXT: st.w $a0, $fp, 16 +; LA32F-NEXT: st.w $a1, $fp, 20 +; LA32F-NEXT: ld.w $a0, $s0, 8 +; LA32F-NEXT: ld.w $a1, $s0, 12 +; LA32F-NEXT: lu12i.w $a3, 262144 +; LA32F-NEXT: move $a2, $zero +; LA32F-NEXT: bl %plt(__adddf3) +; LA32F-NEXT: st.w $a0, $fp, 8 +; LA32F-NEXT: st.w $a1, $fp, 12 +; LA32F-NEXT: ld.w $a0, $s0, 0 +; LA32F-NEXT: ld.w $a1, $s0, 4 +; LA32F-NEXT: lu12i.w $a3, 261888 +; LA32F-NEXT: move $a2, $zero +; LA32F-NEXT: bl %plt(__adddf3) +; LA32F-NEXT: st.w $a0, $fp, 0 +; LA32F-NEXT: st.w $a1, $fp, 4 +; LA32F-NEXT: ld.w $s0, $sp, 4 # 4-byte Folded Reload +; LA32F-NEXT: ld.w $fp, $sp, 8 # 4-byte Folded Reload +; LA32F-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32F-NEXT: addi.w $sp, $sp, 16 +; LA32F-NEXT: ret +; +; LA32D-LABEL: test_d4: +; LA32D: # %bb.0: +; LA32D-NEXT: pcalau12i $a2, %pc_hi20(.LCPI5_0) +; LA32D-NEXT: addi.w $a2, $a2, %pc_lo12(.LCPI5_0) +; LA32D-NEXT: fld.d $fa0, $a2, 0 +; LA32D-NEXT: fld.d $fa1, $a0, 8 +; LA32D-NEXT: fadd.d $fa0, $fa1, $fa0 +; LA32D-NEXT: pcalau12i $a2, %pc_hi20(.LCPI5_1) +; LA32D-NEXT: addi.w $a2, $a2, %pc_lo12(.LCPI5_1) +; LA32D-NEXT: fld.d $fa1, $a2, 0 +; LA32D-NEXT: fld.d $fa2, $a0, 16 +; LA32D-NEXT: fadd.d $fa1, $fa2, $fa1 +; LA32D-NEXT: pcalau12i $a2, %pc_hi20(.LCPI5_2) +; LA32D-NEXT: addi.w $a2, $a2, %pc_lo12(.LCPI5_2) +; LA32D-NEXT: fld.d $fa2, $a2, 0 +; LA32D-NEXT: fld.d $fa3, $a0, 24 +; LA32D-NEXT: fadd.d $fa2, $fa3, $fa2 +; LA32D-NEXT: fst.d $fa2, $a1, 24 +; LA32D-NEXT: fst.d $fa1, $a1, 16 +; LA32D-NEXT: fst.d $fa0, $a1, 8 +; LA32D-NEXT: fld.d $fa0, $a0, 0 +; LA32D-NEXT: addi.w $a0, $zero, 1 +; LA32D-NEXT: movgr2fr.w $fa1, $a0 +; LA32D-NEXT: ffint.s.w $fa1, $fa1 +; LA32D-NEXT: fcvt.d.s $fa1, $fa1 +; LA32D-NEXT: fadd.d $fa0, $fa0, $fa1 +; LA32D-NEXT: fst.d $fa0, $a1, 0 +; LA32D-NEXT: ret +; +; LA64F-LABEL: test_d4: +; LA64F: # %bb.0: +; LA64F-NEXT: addi.d $sp, $sp, -32 +; LA64F-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill +; LA64F-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s0, $sp, 8 # 8-byte Folded Spill +; LA64F-NEXT: move $fp, $a1 +; LA64F-NEXT: move $s0, $a0 +; LA64F-NEXT: ld.d $a0, $a0, 24 +; LA64F-NEXT: lu52i.d $a1, $zero, 1025 +; LA64F-NEXT: bl %plt(__adddf3) +; LA64F-NEXT: st.d $a0, $fp, 24 +; LA64F-NEXT: ld.d $a0, $s0, 8 +; LA64F-NEXT: lu52i.d $a1, $zero, 1024 +; LA64F-NEXT: bl %plt(__adddf3) +; LA64F-NEXT: st.d $a0, $fp, 8 +; LA64F-NEXT: ld.d $a0, $s0, 0 +; LA64F-NEXT: lu52i.d $a1, $zero, 1023 +; LA64F-NEXT: bl %plt(__adddf3) +; LA64F-NEXT: st.d $a0, $fp, 0 +; LA64F-NEXT: ld.d $a0, $s0, 16 +; LA64F-NEXT: ori $a1, $zero, 0 +; LA64F-NEXT: lu32i.d $a1, -524288 +; LA64F-NEXT: lu52i.d $a1, $a1, 1024 +; LA64F-NEXT: bl %plt(__adddf3) +; LA64F-NEXT: st.d $a0, $fp, 16 +; LA64F-NEXT: ld.d $s0, $sp, 8 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload +; LA64F-NEXT: addi.d $sp, $sp, 32 +; LA64F-NEXT: ret +; +; LA64D-LABEL: test_d4: +; LA64D: # %bb.0: +; LA64D-NEXT: pcalau12i $a2, %pc_hi20(.LCPI5_0) +; LA64D-NEXT: addi.d $a2, $a2, %pc_lo12(.LCPI5_0) +; LA64D-NEXT: fld.d $fa0, $a2, 0 +; LA64D-NEXT: fld.d $fa1, $a0, 8 +; LA64D-NEXT: fadd.d $fa0, $fa1, $fa0 +; LA64D-NEXT: pcalau12i $a2, %pc_hi20(.LCPI5_1) +; LA64D-NEXT: addi.d $a2, $a2, %pc_lo12(.LCPI5_1) +; LA64D-NEXT: fld.d $fa1, $a2, 0 +; LA64D-NEXT: fld.d $fa2, $a0, 16 +; LA64D-NEXT: fadd.d $fa1, $fa2, $fa1 +; LA64D-NEXT: pcalau12i $a2, %pc_hi20(.LCPI5_2) +; LA64D-NEXT: addi.d $a2, $a2, %pc_lo12(.LCPI5_2) +; LA64D-NEXT: fld.d $fa2, $a2, 0 +; LA64D-NEXT: fld.d $fa3, $a0, 24 +; LA64D-NEXT: fadd.d $fa2, $fa3, $fa2 +; LA64D-NEXT: fst.d $fa2, $a1, 24 +; LA64D-NEXT: fst.d $fa1, $a1, 16 +; LA64D-NEXT: fst.d $fa0, $a1, 8 +; LA64D-NEXT: fld.d $fa0, $a0, 0 +; LA64D-NEXT: addi.d $a0, $zero, 1 +; LA64D-NEXT: movgr2fr.d $fa1, $a0 +; LA64D-NEXT: ffint.d.l $fa1, $fa1 +; LA64D-NEXT: fadd.d $fa0, $fa0, $fa1 +; LA64D-NEXT: fst.d $fa0, $a1, 0 +; LA64D-NEXT: ret + %p = load %d4, ptr %P + %R = fadd %d4 %p, < double 1.000000e+00, double 2.000000e+00, double 3.000000e+00, double 4.000000e+00 > + store %d4 %R, ptr %S + ret void +} + +define void @test_d8(ptr %P, ptr %S) nounwind { +; LA32F-LABEL: test_d8: +; LA32F: # %bb.0: +; LA32F-NEXT: addi.w $sp, $sp, -32 +; LA32F-NEXT: st.w $ra, $sp, 28 # 4-byte Folded Spill +; LA32F-NEXT: st.w $fp, $sp, 24 # 4-byte Folded Spill +; LA32F-NEXT: st.w $s0, $sp, 20 # 4-byte Folded Spill +; LA32F-NEXT: st.w $s1, $sp, 16 # 4-byte Folded Spill +; LA32F-NEXT: st.w $s2, $sp, 12 # 4-byte Folded Spill +; LA32F-NEXT: st.w $s3, $sp, 8 # 4-byte Folded Spill +; LA32F-NEXT: st.w $s4, $sp, 4 # 4-byte Folded Spill +; LA32F-NEXT: move $fp, $a1 +; LA32F-NEXT: move $s0, $a0 +; LA32F-NEXT: ld.w $a0, $a0, 56 +; LA32F-NEXT: ld.w $a1, $s0, 60 +; LA32F-NEXT: lu12i.w $s1, 262400 +; LA32F-NEXT: move $a2, $zero +; LA32F-NEXT: move $a3, $s1 +; LA32F-NEXT: bl %plt(__adddf3) +; LA32F-NEXT: st.w $a0, $fp, 56 +; LA32F-NEXT: st.w $a1, $fp, 60 +; LA32F-NEXT: ld.w $a0, $s0, 48 +; LA32F-NEXT: ld.w $a1, $s0, 52 +; LA32F-NEXT: lu12i.w $s2, 262272 +; LA32F-NEXT: move $a2, $zero +; LA32F-NEXT: move $a3, $s2 +; LA32F-NEXT: bl %plt(__adddf3) +; LA32F-NEXT: st.w $a0, $fp, 48 +; LA32F-NEXT: st.w $a1, $fp, 52 +; LA32F-NEXT: ld.w $a0, $s0, 40 +; LA32F-NEXT: ld.w $a1, $s0, 44 +; LA32F-NEXT: lu12i.w $s3, 262144 +; LA32F-NEXT: move $a2, $zero +; LA32F-NEXT: move $a3, $s3 +; LA32F-NEXT: bl %plt(__adddf3) +; LA32F-NEXT: st.w $a0, $fp, 40 +; LA32F-NEXT: st.w $a1, $fp, 44 +; LA32F-NEXT: ld.w $a0, $s0, 32 +; LA32F-NEXT: ld.w $a1, $s0, 36 +; LA32F-NEXT: lu12i.w $s4, 261888 +; LA32F-NEXT: move $a2, $zero +; LA32F-NEXT: move $a3, $s4 +; LA32F-NEXT: bl %plt(__adddf3) +; LA32F-NEXT: st.w $a0, $fp, 32 +; LA32F-NEXT: st.w $a1, $fp, 36 +; LA32F-NEXT: ld.w $a0, $s0, 24 +; LA32F-NEXT: ld.w $a1, $s0, 28 +; LA32F-NEXT: move $a2, $zero +; LA32F-NEXT: move $a3, $s1 +; LA32F-NEXT: bl %plt(__adddf3) +; LA32F-NEXT: st.w $a0, $fp, 24 +; LA32F-NEXT: st.w $a1, $fp, 28 +; LA32F-NEXT: ld.w $a0, $s0, 16 +; LA32F-NEXT: ld.w $a1, $s0, 20 +; LA32F-NEXT: move $a2, $zero +; LA32F-NEXT: move $a3, $s2 +; LA32F-NEXT: bl %plt(__adddf3) +; LA32F-NEXT: st.w $a0, $fp, 16 +; LA32F-NEXT: st.w $a1, $fp, 20 +; LA32F-NEXT: ld.w $a0, $s0, 8 +; LA32F-NEXT: ld.w $a1, $s0, 12 +; LA32F-NEXT: move $a2, $zero +; LA32F-NEXT: move $a3, $s3 +; LA32F-NEXT: bl %plt(__adddf3) +; LA32F-NEXT: st.w $a0, $fp, 8 +; LA32F-NEXT: st.w $a1, $fp, 12 +; LA32F-NEXT: ld.w $a0, $s0, 0 +; LA32F-NEXT: ld.w $a1, $s0, 4 +; LA32F-NEXT: move $a2, $zero +; LA32F-NEXT: move $a3, $s4 +; LA32F-NEXT: bl %plt(__adddf3) +; LA32F-NEXT: st.w $a0, $fp, 0 +; LA32F-NEXT: st.w $a1, $fp, 4 +; LA32F-NEXT: ld.w $s4, $sp, 4 # 4-byte Folded Reload +; LA32F-NEXT: ld.w $s3, $sp, 8 # 4-byte Folded Reload +; LA32F-NEXT: ld.w $s2, $sp, 12 # 4-byte Folded Reload +; LA32F-NEXT: ld.w $s1, $sp, 16 # 4-byte Folded Reload +; LA32F-NEXT: ld.w $s0, $sp, 20 # 4-byte Folded Reload +; LA32F-NEXT: ld.w $fp, $sp, 24 # 4-byte Folded Reload +; LA32F-NEXT: ld.w $ra, $sp, 28 # 4-byte Folded Reload +; LA32F-NEXT: addi.w $sp, $sp, 32 +; LA32F-NEXT: ret +; +; LA32D-LABEL: test_d8: +; LA32D: # %bb.0: +; LA32D-NEXT: pcalau12i $a2, %pc_hi20(.LCPI6_0) +; LA32D-NEXT: addi.w $a2, $a2, %pc_lo12(.LCPI6_0) +; LA32D-NEXT: fld.d $fa0, $a2, 0 +; LA32D-NEXT: fld.d $fa1, $a0, 8 +; LA32D-NEXT: fadd.d $fa1, $fa1, $fa0 +; LA32D-NEXT: fld.d $fa2, $a0, 40 +; LA32D-NEXT: fadd.d $fa0, $fa2, $fa0 +; LA32D-NEXT: pcalau12i $a2, %pc_hi20(.LCPI6_1) +; LA32D-NEXT: addi.w $a2, $a2, %pc_lo12(.LCPI6_1) +; LA32D-NEXT: fld.d $fa2, $a2, 0 +; LA32D-NEXT: fld.d $fa3, $a0, 16 +; LA32D-NEXT: fadd.d $fa3, $fa3, $fa2 +; LA32D-NEXT: fld.d $fa4, $a0, 48 +; LA32D-NEXT: fadd.d $fa2, $fa4, $fa2 +; LA32D-NEXT: pcalau12i $a2, %pc_hi20(.LCPI6_2) +; LA32D-NEXT: addi.w $a2, $a2, %pc_lo12(.LCPI6_2) +; LA32D-NEXT: fld.d $fa4, $a2, 0 +; LA32D-NEXT: fld.d $fa5, $a0, 24 +; LA32D-NEXT: fadd.d $fa5, $fa5, $fa4 +; LA32D-NEXT: fld.d $fa6, $a0, 56 +; LA32D-NEXT: fadd.d $fa4, $fa6, $fa4 +; LA32D-NEXT: fst.d $fa4, $a1, 56 +; LA32D-NEXT: fst.d $fa2, $a1, 48 +; LA32D-NEXT: fst.d $fa0, $a1, 40 +; LA32D-NEXT: fst.d $fa5, $a1, 24 +; LA32D-NEXT: fst.d $fa3, $a1, 16 +; LA32D-NEXT: fst.d $fa1, $a1, 8 +; LA32D-NEXT: addi.w $a2, $zero, 1 +; LA32D-NEXT: movgr2fr.w $fa0, $a2 +; LA32D-NEXT: ffint.s.w $fa0, $fa0 +; LA32D-NEXT: fcvt.d.s $fa0, $fa0 +; LA32D-NEXT: fld.d $fa1, $a0, 32 +; LA32D-NEXT: fadd.d $fa1, $fa1, $fa0 +; LA32D-NEXT: fst.d $fa1, $a1, 32 +; LA32D-NEXT: fld.d $fa1, $a0, 0 +; LA32D-NEXT: fadd.d $fa0, $fa1, $fa0 +; LA32D-NEXT: fst.d $fa0, $a1, 0 +; LA32D-NEXT: ret +; +; LA64F-LABEL: test_d8: +; LA64F: # %bb.0: +; LA64F-NEXT: addi.d $sp, $sp, -48 +; LA64F-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill +; LA64F-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s0, $sp, 24 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s1, $sp, 16 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s2, $sp, 8 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s3, $sp, 0 # 8-byte Folded Spill +; LA64F-NEXT: move $fp, $a1 +; LA64F-NEXT: move $s0, $a0 +; LA64F-NEXT: ld.d $a0, $a0, 56 +; LA64F-NEXT: lu52i.d $s1, $zero, 1025 +; LA64F-NEXT: move $a1, $s1 +; LA64F-NEXT: bl %plt(__adddf3) +; LA64F-NEXT: st.d $a0, $fp, 56 +; LA64F-NEXT: ld.d $a0, $s0, 40 +; LA64F-NEXT: lu52i.d $s2, $zero, 1024 +; LA64F-NEXT: move $a1, $s2 +; LA64F-NEXT: bl %plt(__adddf3) +; LA64F-NEXT: st.d $a0, $fp, 40 +; LA64F-NEXT: ld.d $a0, $s0, 32 +; LA64F-NEXT: lu52i.d $s3, $zero, 1023 +; LA64F-NEXT: move $a1, $s3 +; LA64F-NEXT: bl %plt(__adddf3) +; LA64F-NEXT: st.d $a0, $fp, 32 +; LA64F-NEXT: ld.d $a0, $s0, 24 +; LA64F-NEXT: move $a1, $s1 +; LA64F-NEXT: bl %plt(__adddf3) +; LA64F-NEXT: st.d $a0, $fp, 24 +; LA64F-NEXT: ld.d $a0, $s0, 8 +; LA64F-NEXT: move $a1, $s2 +; LA64F-NEXT: bl %plt(__adddf3) +; LA64F-NEXT: st.d $a0, $fp, 8 +; LA64F-NEXT: ld.d $a0, $s0, 0 +; LA64F-NEXT: move $a1, $s3 +; LA64F-NEXT: bl %plt(__adddf3) +; LA64F-NEXT: st.d $a0, $fp, 0 +; LA64F-NEXT: ori $a0, $zero, 0 +; LA64F-NEXT: lu32i.d $a0, -524288 +; LA64F-NEXT: lu52i.d $s1, $a0, 1024 +; LA64F-NEXT: ld.d $a0, $s0, 48 +; LA64F-NEXT: move $a1, $s1 +; LA64F-NEXT: bl %plt(__adddf3) +; LA64F-NEXT: st.d $a0, $fp, 48 +; LA64F-NEXT: ld.d $a0, $s0, 16 +; LA64F-NEXT: move $a1, $s1 +; LA64F-NEXT: bl %plt(__adddf3) +; LA64F-NEXT: st.d $a0, $fp, 16 +; LA64F-NEXT: ld.d $s3, $sp, 0 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s2, $sp, 8 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s1, $sp, 16 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s0, $sp, 24 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload +; LA64F-NEXT: addi.d $sp, $sp, 48 +; LA64F-NEXT: ret +; +; LA64D-LABEL: test_d8: +; LA64D: # %bb.0: +; LA64D-NEXT: pcalau12i $a2, %pc_hi20(.LCPI6_0) +; LA64D-NEXT: addi.d $a2, $a2, %pc_lo12(.LCPI6_0) +; LA64D-NEXT: fld.d $fa0, $a2, 0 +; LA64D-NEXT: fld.d $fa1, $a0, 8 +; LA64D-NEXT: fadd.d $fa1, $fa1, $fa0 +; LA64D-NEXT: fld.d $fa2, $a0, 40 +; LA64D-NEXT: fadd.d $fa0, $fa2, $fa0 +; LA64D-NEXT: pcalau12i $a2, %pc_hi20(.LCPI6_1) +; LA64D-NEXT: addi.d $a2, $a2, %pc_lo12(.LCPI6_1) +; LA64D-NEXT: fld.d $fa2, $a2, 0 +; LA64D-NEXT: fld.d $fa3, $a0, 16 +; LA64D-NEXT: fadd.d $fa3, $fa3, $fa2 +; LA64D-NEXT: fld.d $fa4, $a0, 48 +; LA64D-NEXT: fadd.d $fa2, $fa4, $fa2 +; LA64D-NEXT: pcalau12i $a2, %pc_hi20(.LCPI6_2) +; LA64D-NEXT: addi.d $a2, $a2, %pc_lo12(.LCPI6_2) +; LA64D-NEXT: fld.d $fa4, $a2, 0 +; LA64D-NEXT: fld.d $fa5, $a0, 24 +; LA64D-NEXT: fadd.d $fa5, $fa5, $fa4 +; LA64D-NEXT: fld.d $fa6, $a0, 56 +; LA64D-NEXT: fadd.d $fa4, $fa6, $fa4 +; LA64D-NEXT: fst.d $fa4, $a1, 56 +; LA64D-NEXT: fst.d $fa2, $a1, 48 +; LA64D-NEXT: fst.d $fa0, $a1, 40 +; LA64D-NEXT: fst.d $fa5, $a1, 24 +; LA64D-NEXT: fst.d $fa3, $a1, 16 +; LA64D-NEXT: fst.d $fa1, $a1, 8 +; LA64D-NEXT: addi.d $a2, $zero, 1 +; LA64D-NEXT: movgr2fr.d $fa0, $a2 +; LA64D-NEXT: ffint.d.l $fa0, $fa0 +; LA64D-NEXT: fld.d $fa1, $a0, 32 +; LA64D-NEXT: fadd.d $fa1, $fa1, $fa0 +; LA64D-NEXT: fst.d $fa1, $a1, 32 +; LA64D-NEXT: fld.d $fa1, $a0, 0 +; LA64D-NEXT: fadd.d $fa0, $fa1, $fa0 +; LA64D-NEXT: fst.d $fa0, $a1, 0 +; LA64D-NEXT: ret + %p = load %d8, ptr %P + %R = fadd %d8 %p, < double 1.000000e+00, double 2.000000e+00, double 3.000000e+00, double 4.000000e+00, double 1.000000e+00, double 2.000000e+00, double 3.000000e+00, double 4.000000e+00 > + store %d8 %R, ptr %S + ret void +} diff --git a/llvm/test/CodeGen/X86/callbr-asm-kill.mir b/llvm/test/CodeGen/X86/callbr-asm-kill.mir index 6a38d620984e9cae886b486069a84f62873219c7..227d8e65105c01e0315e99bf6772eb8a8b08606c 100644 --- a/llvm/test/CodeGen/X86/callbr-asm-kill.mir +++ b/llvm/test/CodeGen/X86/callbr-asm-kill.mir @@ -67,7 +67,7 @@ body: | ; CHECK-NEXT: CALL64pcrel32 target-flags(x86-plt) @foo, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp ; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gr64 = COPY [[MOV64rm]] - ; CHECK-NEXT: INLINEASM_BR &"", 9 /* sideeffect mayload attdialect */, 196654 /* mem:m */, killed [[MOV64rm]], 1, $noreg, 0, $noreg, 13 /* imm */, blockaddress(@test1, %ir-block.loop) + ; CHECK-NEXT: INLINEASM_BR &"", 9 /* sideeffect mayload attdialect */, 262190 /* mem:m */, killed [[MOV64rm]], 1, $noreg, 0, $noreg, 13 /* imm */, blockaddress(@test1, %ir-block.loop) ; CHECK-NEXT: JMP_1 %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2.end: @@ -87,7 +87,7 @@ body: | $rdi = COPY killed %0 CALL64pcrel32 target-flags(x86-plt) @foo, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp - INLINEASM_BR &"", 9 /* sideeffect mayload attdialect */, 196654 /* mem:m */, %1, 1, $noreg, 0, $noreg, 13 /* imm */, blockaddress(@test1, %ir-block.loop) + INLINEASM_BR &"", 9 /* sideeffect mayload attdialect */, 262190 /* mem:m */, %1, 1, $noreg, 0, $noreg, 13 /* imm */, blockaddress(@test1, %ir-block.loop) JMP_1 %bb.2 bb.2.end: diff --git a/llvm/test/DebugInfo/Generic/missing-abstract-variable.ll b/llvm/test/DebugInfo/Generic/missing-abstract-variable.ll index 8f8d404b0e568a86e82817fe071c3a5107d522cd..80e278e51194b7e0b89fd3eb7068f09c440db315 100644 --- a/llvm/test/DebugInfo/Generic/missing-abstract-variable.ll +++ b/llvm/test/DebugInfo/Generic/missing-abstract-variable.ll @@ -4,7 +4,7 @@ ; powerpc64 (and on x86_64 at at least -O2). Presumably this is a SelectionDAG ; issue. ; FIXME: arm64 is an alias for aarch64 on macs, apparently? -; XFAIL: powerpc64, aarch64, arm64, hexagon, riscv, sparc +; XFAIL: powerpc64, aarch64, arm64, hexagon, riscv, sparc, loongarch ; Build from the following source with clang -O2. diff --git a/llvm/test/DebugInfo/X86/dbg-value-no-crash.ll b/llvm/test/DebugInfo/X86/dbg-value-no-crash.ll new file mode 100644 index 0000000000000000000000000000000000000000..8ff055f13ad15df1cebcaa1b15e01f2e7ffe6abd --- /dev/null +++ b/llvm/test/DebugInfo/X86/dbg-value-no-crash.ll @@ -0,0 +1,39 @@ +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=avx512bw,avx512vl -o - %s + +;; Check this won't result in crash. +define <8 x i32> @foo(ptr %0, <8 x i32> %1, i8 %2, i8 %3) { + %5 = call <8 x i32> @llvm.smax.v8i32(<8 x i32> %1, <8 x i32> zeroinitializer) + %6 = add nsw <8 x i32> %1, + call void @llvm.dbg.value(metadata <8 x i32> %6, metadata !4, metadata !DIExpression()), !dbg !15 + %7 = bitcast i8 %2 to <8 x i1> + %8 = select <8 x i1> %7, <8 x i32> %6, <8 x i32> %5 + %9 = call <8 x i32> @llvm.smax.v8i32(<8 x i32> %8, <8 x i32> zeroinitializer) + %10 = bitcast i8 %3 to <8 x i1> + %11 = select <8 x i1> %10, <8 x i32> %9, <8 x i32> + ret <8 x i32> %11 +} + +declare <8 x i32> @llvm.smax.v8i32(<8 x i32>, <8 x i32>) +declare void @llvm.dbg.value(metadata, metadata, metadata) + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang version 16.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, retainedTypes: !2) +!1 = !DIFile(filename: "a.cpp", directory: "/") +!2 = !{} +!3 = !{i32 2, !"Debug Info Version", i32 3} +!4 = !DILocalVariable(name: "a", arg: 2, scope: !5, file: !1, line: 12, type: !11) +!5 = distinct !DISubprogram(name: "foo", scope: !6, file: !1, line: 12, type: !7, scopeLine: 12, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, declaration: !9, retainedNodes: !10) +!6 = !DINamespace(name: "ns1", scope: null) +!7 = !DISubroutineType(types: !8) +!8 = !{null} +!9 = !DISubprogram(name: "foo", scope: !6, file: !1, line: 132, type: !7, scopeLine: 12, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized) +!10 = !{!4} +!11 = !DICompositeType(tag: DW_TAG_array_type, baseType: !12, size: 256, flags: DIFlagVector, elements: !13) +!12 = !DIBasicType(name: "long long", size: 64, encoding: DW_ATE_signed) +!13 = !{!14} +!14 = !DISubrange(count: 4) +!15 = !DILocation(line: 0, scope: !5, inlinedAt: !16) +!16 = !DILocation(line: 18, scope: !17) +!17 = distinct !DISubprogram(name: "foo", scope: null, file: !1, type: !7, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2) diff --git a/llvm/test/ExecutionEngine/JITLink/LoongArch/ELF_loongarch32_relocations.s b/llvm/test/ExecutionEngine/JITLink/LoongArch/ELF_loongarch32_relocations.s new file mode 100644 index 0000000000000000000000000000000000000000..23f6acc307b98f57df16ca4e17718e2e2cf60ec7 --- /dev/null +++ b/llvm/test/ExecutionEngine/JITLink/LoongArch/ELF_loongarch32_relocations.s @@ -0,0 +1,113 @@ +# RUN: rm -rf %t && mkdir -p %t +# RUN: llvm-mc --triple=loongarch32 --filetype=obj -o %t/elf_reloc.o %s +# RUN: llvm-jitlink --noexec \ +# RUN: --abs external_data=0xdeadbeef \ +# RUN: --abs external_func=0xcafef00d \ +# RUN: --check %s %t/elf_reloc.o + .text + + .globl main + .p2align 2 + .type main,@function +main: + ret + + .size main, .-main + +## Check R_LARCH_B26 relocation of a local function call. + +# jitlink-check: decode_operand(local_func_call26, 0)[27:0] = \ +# jitlink-check: (local_func - local_func_call26)[27:0] +# jitlink-check: decode_operand(local_func_jump26, 0)[27:0] = \ +# jitlink-check: (local_func - local_func_jump26)[27:0] + .globl local_func + .p2align 2 + .type local_func,@function +local_func: + ret + .size local_func, .-local_func + + .globl local_func_call26 + .p2align 2 +local_func_call26: + bl local_func + .size local_func_call26, .-local_func_call26 + + .globl local_func_jump26 + .p2align 2 +local_func_jump26: + b local_func + .size local_func_jump26, .-local_func_jump26 + +## Check R_LARCH_PCALA_HI20 / R_LARCH_PCALA_LO12 relocation of a local symbol. + +# jitlink-check: decode_operand(test_pcalau12i_pcrel, 1)[19:0] = \ +# jitlink-check: (named_data - test_pcalau12i_pcrel)[31:12] + \ +# jitlink-check: named_data[11:11] +# jitlink-check: decode_operand(test_addi_pcrel_lo12, 2)[11:0] = \ +# jitlink-check: (named_data)[11:0] + .globl test_pcalau12i_pcrel + .p2align 2 +test_pcalau12i_pcrel: + pcalau12i $a0, %pc_hi20(named_data) + .size test_pcalau12i_pcrel, .-test_pcalau12i_pcrel + + .globl test_addi_pcrel_lo12 + .p2align 2 +test_addi_pcrel_lo12: + addi.w $a0, $a0, %pc_lo12(named_data) + .size test_addi_pcrel_lo12, .-test_addi_pcrel_lo12 + +## Check that calls/jumps to external functions trigger the generation of stubs +## and GOT entries. + +# jitlink-check: *{4}(got_addr(elf_reloc.o, external_func)) = external_func +# jitlink-check: decode_operand(test_external_call, 0) = \ +# jitlink-check: (stub_addr(elf_reloc.o, external_func) - \ +# jitlink-check: test_external_call)[27:0] +# jitlink-check: decode_operand(test_external_jump, 0) = \ +# jitlink-check: (stub_addr(elf_reloc.o, external_func) - \ +# jitlink-check: test_external_jump)[27:0] + .globl test_external_call + .p2align 2 +test_external_call: + bl external_func + .size test_external_call, .-test_external_call + + .globl test_external_jump + .p2align 2 +test_external_jump: + b external_func + .size test_external_jump, .-test_external_jump + +## Check R_LARCH_GOT_PC_HI20 / R_LARCH_GOT_PC_LO12 handling with a reference to +## an external symbol. Validate both the reference to the GOT entry, and also +## the content of the GOT entry. + +# jitlink-check: *{4}(got_addr(elf_reloc.o, external_data)) = external_data +# jitlink-check: decode_operand(test_gotpage_external, 1)[19:0] = \ +# jitlink-check: (got_addr(elf_reloc.o, external_data)[31:12] - \ +# jitlink-check: test_gotpage_external[31:12] + \ +# jitlink-check: got_addr(elf_reloc.o, external_data)[11:11])[19:0] +# jitlink-check: decode_operand(test_gotoffset12_external, 2)[11:0] = \ +# jitlink-check: got_addr(elf_reloc.o, external_data)[11:0] + .globl test_gotpage_external + .p2align 2 +test_gotpage_external: + pcalau12i $a0, %got_pc_hi20(external_data) + .size test_gotpage_external, .-test_gotpage_external + + .globl test_gotoffset12_external + .p2align 2 +test_gotoffset12_external: + ld.w $a0, $a0, %got_pc_lo12(external_data) + .size test_gotoffset12_external, .-test_gotoffset12_external + + + .globl named_data + .p2align 4 + .type named_data,@object +named_data: + .quad 0x2222222222222222 + .quad 0x3333333333333333 + .size named_data, .-named_data diff --git a/llvm/test/ExecutionEngine/JITLink/LoongArch/ELF_loongarch64_ehframe.s b/llvm/test/ExecutionEngine/JITLink/LoongArch/ELF_loongarch64_ehframe.s new file mode 100644 index 0000000000000000000000000000000000000000..4a1a1649508eb627c760432b38649a3bdc06293c --- /dev/null +++ b/llvm/test/ExecutionEngine/JITLink/LoongArch/ELF_loongarch64_ehframe.s @@ -0,0 +1,72 @@ +# REQUIRES: asserts +# RUN: llvm-mc --triple=loongarch64-linux-gnu --filetype=obj -o %t %s +# RUN: llvm-jitlink --noexec --phony-externals --debug-only=jitlink %t 2>&1 | \ +# RUN: FileCheck %s + +## Check that splitting of eh-frame sections works. + +# CHECK: DWARFRecordSectionSplitter: Processing .eh_frame... +# CHECK: Processing block at +# CHECK: Processing CFI record at +# CHECK: Extracted {{.*}} section = .eh_frame +# CHECK: Processing CFI record at +# CHECK: Extracted {{.*}} section = .eh_frame +# CHECK: EHFrameEdgeFixer: Processing .eh_frame in "{{.*}}"... +# CHECK: Processing block at +# CHECK: Processing CFI record at +# CHECK: Record is CIE +# CHECK: Processing block at +# CHECK: Processing CFI record at +# CHECK: Record is FDE +# CHECK: Adding edge at {{.*}} to CIE at: {{.*}} +# CHECK: Existing edge at {{.*}} to PC begin at {{.*}} +# CHECK: Adding keep-alive edge from target at {{.*}} to FDE at {{.*}} +# CHECK: Processing block at +# CHECK: Processing CFI record at +# CHECK: Record is FDE +# CHECK: Adding edge at {{.*}} to CIE at: {{.*}} +# CHECK: Existing edge at {{.*}} to PC begin at {{.*}} +# CHECK: Adding keep-alive edge from target at {{.*}} to FDE at {{.*}} + + .text + .globl main + .p2align 2 + .type main,@function +main: + .cfi_startproc + addi.d $sp, $sp, -16 + .cfi_def_cfa_offset 16 + st.d $ra, $sp, 8 + .cfi_offset 1, -8 + ori $a0, $zero, 4 + bl %plt(__cxa_allocate_exception) + ori $a1, $zero, 5 + st.w $a1, $a0, 0 + pcalau12i $a1, %got_pc_hi20(_ZTIi) + ld.d $a1, $a1, %got_pc_lo12(_ZTIi) + move $a2, $zero + bl %plt(__cxa_throw) +.main_end: + .size main, .main_end-main + .cfi_endproc + + .globl dup + .p2align 2 + .type main,@function +dup: + .cfi_startproc + addi.d $sp, $sp, -16 + .cfi_def_cfa_offset 16 + st.d $ra, $sp, 8 + .cfi_offset 1, -8 + ori $a0, $zero, 4 + bl %plt(__cxa_allocate_exception) + ori $a1, $zero, 5 + st.w $a1, $a0, 0 + pcalau12i $a1, %got_pc_hi20(_ZTIi) + ld.d $a1, $a1, %got_pc_lo12(_ZTIi) + move $a2, $zero + bl %plt(__cxa_throw) +.dup_end: + .size main, .dup_end-dup + .cfi_endproc diff --git a/llvm/test/ExecutionEngine/JITLink/LoongArch/ELF_loongarch64_relocations.s b/llvm/test/ExecutionEngine/JITLink/LoongArch/ELF_loongarch64_relocations.s new file mode 100644 index 0000000000000000000000000000000000000000..74eb8118d10e3cb664c3a400582803540826ea65 --- /dev/null +++ b/llvm/test/ExecutionEngine/JITLink/LoongArch/ELF_loongarch64_relocations.s @@ -0,0 +1,113 @@ +# RUN: rm -rf %t && mkdir -p %t +# RUN: llvm-mc --triple=loongarch64 --filetype=obj -o %t/elf_reloc.o %s +# RUN: llvm-jitlink --noexec \ +# RUN: --abs external_data=0xdeadbeef \ +# RUN: --abs external_func=0xcafef00d \ +# RUN: --check %s %t/elf_reloc.o + .text + + .globl main + .p2align 2 + .type main,@function +main: + ret + + .size main, .-main + +## Check R_LARCH_B26 relocation of a local function call. + +# jitlink-check: decode_operand(local_func_call26, 0)[27:0] = \ +# jitlink-check: (local_func - local_func_call26)[27:0] +# jitlink-check: decode_operand(local_func_jump26, 0)[27:0] = \ +# jitlink-check: (local_func - local_func_jump26)[27:0] + .globl local_func + .p2align 2 + .type local_func,@function +local_func: + ret + .size local_func, .-local_func + + .globl local_func_call26 + .p2align 2 +local_func_call26: + bl local_func + .size local_func_call26, .-local_func_call26 + + .globl local_func_jump26 + .p2align 2 +local_func_jump26: + b local_func + .size local_func_jump26, .-local_func_jump26 + +## Check R_LARCH_PCALA_HI20 / R_LARCH_PCALA_LO12 relocation of a local symbol. + +# jitlink-check: decode_operand(test_pcalau12i_pcrel, 1)[19:0] = \ +# jitlink-check: (named_data - test_pcalau12i_pcrel)[31:12] + \ +# jitlink-check: named_data[11:11] +# jitlink-check: decode_operand(test_addi_pcrel_lo12, 2)[11:0] = \ +# jitlink-check: (named_data)[11:0] + .globl test_pcalau12i_pcrel + .p2align 2 +test_pcalau12i_pcrel: + pcalau12i $a0, %pc_hi20(named_data) + .size test_pcalau12i_pcrel, .-test_pcalau12i_pcrel + + .globl test_addi_pcrel_lo12 + .p2align 2 +test_addi_pcrel_lo12: + addi.d $a0, $a0, %pc_lo12(named_data) + .size test_addi_pcrel_lo12, .-test_addi_pcrel_lo12 + +## Check that calls/jumps to external functions trigger the generation of stubs +## and GOT entries. + +# jitlink-check: *{8}(got_addr(elf_reloc.o, external_func)) = external_func +# jitlink-check: decode_operand(test_external_call, 0) = \ +# jitlink-check: (stub_addr(elf_reloc.o, external_func) - \ +# jitlink-check: test_external_call)[27:0] +# jitlink-check: decode_operand(test_external_jump, 0) = \ +# jitlink-check: (stub_addr(elf_reloc.o, external_func) - \ +# jitlink-check: test_external_jump)[27:0] + .globl test_external_call + .p2align 2 +test_external_call: + bl external_func + .size test_external_call, .-test_external_call + + .globl test_external_jump + .p2align 2 +test_external_jump: + b external_func + .size test_external_jump, .-test_external_jump + +## Check R_LARCH_GOT_PC_HI20 / R_LARCH_GOT_PC_LO12 handling with a reference to +## an external symbol. Validate both the reference to the GOT entry, and also +## the content of the GOT entry. + +# jitlink-check: *{8}(got_addr(elf_reloc.o, external_data)) = external_data +# jitlink-check: decode_operand(test_gotpage_external, 1)[19:0] = \ +# jitlink-check: (got_addr(elf_reloc.o, external_data)[31:12] - \ +# jitlink-check: test_gotpage_external[31:12] + \ +# jitlink-check: got_addr(elf_reloc.o, external_data)[11:11])[19:0] +# jitlink-check: decode_operand(test_gotoffset12_external, 2)[11:0] = \ +# jitlink-check: got_addr(elf_reloc.o, external_data)[11:0] + .globl test_gotpage_external + .p2align 2 +test_gotpage_external: + pcalau12i $a0, %got_pc_hi20(external_data) + .size test_gotpage_external, .-test_gotpage_external + + .globl test_gotoffset12_external + .p2align 2 +test_gotoffset12_external: + ld.d $a0, $a0, %got_pc_lo12(external_data) + .size test_gotoffset12_external, .-test_gotoffset12_external + + + .globl named_data + .p2align 4 + .type named_data,@object +named_data: + .quad 0x2222222222222222 + .quad 0x3333333333333333 + .size named_data, .-named_data diff --git a/llvm/test/ExecutionEngine/JITLink/LoongArch/lit.local.cfg b/llvm/test/ExecutionEngine/JITLink/LoongArch/lit.local.cfg new file mode 100644 index 0000000000000000000000000000000000000000..a4b6c6f59bdbda324c1a52024427fd796825c707 --- /dev/null +++ b/llvm/test/ExecutionEngine/JITLink/LoongArch/lit.local.cfg @@ -0,0 +1,2 @@ +if not 'LoongArch' in config.root.targets: + config.unsupported = True diff --git a/llvm/test/ExecutionEngine/JITLink/X86/MachO_GOTAndStubsOptimization.s b/llvm/test/ExecutionEngine/JITLink/X86/MachO_GOTAndStubsOptimization.s index caeae4fa441f7935923df49211c8cdfba3c9d614..3faf2b30b25369ce526dae9098c302df4eeac8ef 100644 --- a/llvm/test/ExecutionEngine/JITLink/X86/MachO_GOTAndStubsOptimization.s +++ b/llvm/test/ExecutionEngine/JITLink/X86/MachO_GOTAndStubsOptimization.s @@ -1,3 +1,4 @@ +# UNSUPPORTED: loongarch # RUN: rm -rf %t && mkdir -p %t # RUN: llvm-mc -triple=x86_64-apple-macos10.9 -filetype=obj \ # RUN: -o %t/helper.o %S/Inputs/MachO_GOTAndStubsOptimizationHelper.s diff --git a/llvm/test/ExecutionEngine/OrcLazy/emulated-tls.ll b/llvm/test/ExecutionEngine/OrcLazy/emulated-tls.ll index 067052c0714ae09cc840f0c4d1983f110bef698b..93ac6f8ccea5b92e4d4f53b69c14b0914ba48267 100644 --- a/llvm/test/ExecutionEngine/OrcLazy/emulated-tls.ll +++ b/llvm/test/ExecutionEngine/OrcLazy/emulated-tls.ll @@ -1,3 +1,6 @@ +; OHOS_LOCAL begin +; delete code +; OHOS_LOCAL end ; RUN: not lli -no-process-syms -emulated-tls -jit-kind=orc-lazy %s 2>&1 \ ; RUN: | FileCheck %s ; diff --git a/llvm/test/ExecutionEngine/OrcLazy/lit.local.cfg b/llvm/test/ExecutionEngine/OrcLazy/lit.local.cfg index 4161b4f3ce4b699520d256f6b1b7b56421a52a65..3a3d23f2bb33572f9e4458f1be6a5a0ccbb5037f 100644 --- a/llvm/test/ExecutionEngine/OrcLazy/lit.local.cfg +++ b/llvm/test/ExecutionEngine/OrcLazy/lit.local.cfg @@ -1,6 +1,8 @@ import sys -if config.root.host_arch not in ['i386', 'x86', 'x86_64', 'AMD64', 'mips', 'mipsel', 'mips64', 'mips64el']: +if config.root.host_arch not in ['i386', 'x86', 'x86_64', 'AMD64', + 'mips', 'mipsel', 'mips64', 'mips64el', + 'loongarch64']: config.unsupported = True # FIXME: These tests don't pass with the COFF rtld. diff --git a/llvm/test/ExecutionEngine/frem.ll b/llvm/test/ExecutionEngine/frem.ll index 390e0f9563b854a2d9e0eef8317fb753733a9daf..52150f04c2b131bfca06ed34629ac598561561d0 100644 --- a/llvm/test/ExecutionEngine/frem.ll +++ b/llvm/test/ExecutionEngine/frem.ll @@ -1,3 +1,6 @@ +; LoongArch does not support mcjit. +; UNSUPPORTED: loongarch + ; LLI.exe used to crash on Windows\X86 when certain single precession ; floating point intrinsics (defined as macros) are used. ; This unit test guards against the failure. diff --git a/llvm/test/ExecutionEngine/mov64zext32.ll b/llvm/test/ExecutionEngine/mov64zext32.ll index bba1a1987350ab4e39a134ee89e4916a4e5c1e46..f370ebeb528b14336d3125a8ca23b130ed2995d9 100644 --- a/llvm/test/ExecutionEngine/mov64zext32.ll +++ b/llvm/test/ExecutionEngine/mov64zext32.ll @@ -1,3 +1,6 @@ +; LoongArch does not support mcjit. +; UNSUPPORTED: loongarch + ; RUN: %lli -jit-kind=mcjit %s > /dev/null ; RUN: %lli %s > /dev/null diff --git a/llvm/test/ExecutionEngine/test-interp-vec-arithm_float.ll b/llvm/test/ExecutionEngine/test-interp-vec-arithm_float.ll index 6f784265a73e1826451d50548cc458a47726fd76..03cdb24ab90ee1c16462860c98683d211fa58f6d 100644 --- a/llvm/test/ExecutionEngine/test-interp-vec-arithm_float.ll +++ b/llvm/test/ExecutionEngine/test-interp-vec-arithm_float.ll @@ -1,3 +1,6 @@ +; LoongArch does not support mcjit. +; UNSUPPORTED: loongarch + ; RUN: %lli -jit-kind=mcjit %s > /dev/null ; RUN: %lli %s > /dev/null diff --git a/llvm/test/ExecutionEngine/test-interp-vec-arithm_int.ll b/llvm/test/ExecutionEngine/test-interp-vec-arithm_int.ll index 6896af83c44fbf0d94a0e9dd49597204172bab2b..2775bbaf234ce85767527a3c543b04e657e2925e 100644 --- a/llvm/test/ExecutionEngine/test-interp-vec-arithm_int.ll +++ b/llvm/test/ExecutionEngine/test-interp-vec-arithm_int.ll @@ -1,3 +1,6 @@ +; LoongArch does not support mcjit. +; UNSUPPORTED: loongarch + ; RUN: %lli -jit-kind=mcjit %s > /dev/null ; RUN: %lli %s > /dev/null diff --git a/llvm/test/ExecutionEngine/test-interp-vec-logical.ll b/llvm/test/ExecutionEngine/test-interp-vec-logical.ll index f654120eaf8e0864dd60eefe63f9c36074cbf99a..d7742fe3219f815209a63bfb1fdb44d468a143e3 100644 --- a/llvm/test/ExecutionEngine/test-interp-vec-logical.ll +++ b/llvm/test/ExecutionEngine/test-interp-vec-logical.ll @@ -1,3 +1,6 @@ +; LoongArch does not support mcjit. +; UNSUPPORTED: loongarch + ; RUN: %lli -jit-kind=mcjit %s > /dev/null ; RUN: %lli %s > /dev/null diff --git a/llvm/test/ExecutionEngine/test-interp-vec-setcond-fp.ll b/llvm/test/ExecutionEngine/test-interp-vec-setcond-fp.ll index 84bdec1cf5597fdeaa516c6aa9e1d3f3648bbe75..835a1f0deaf035f8df832a22bccc70f11d692098 100644 --- a/llvm/test/ExecutionEngine/test-interp-vec-setcond-fp.ll +++ b/llvm/test/ExecutionEngine/test-interp-vec-setcond-fp.ll @@ -1,3 +1,6 @@ +; LoongArch does not support mcjit. +; UNSUPPORTED: loongarch + ; RUN: %lli -jit-kind=mcjit %s > /dev/null ; RUN: %lli %s > /dev/null diff --git a/llvm/test/ExecutionEngine/test-interp-vec-setcond-int.ll b/llvm/test/ExecutionEngine/test-interp-vec-setcond-int.ll index 5a20fc4f11721cfb2f072ea47489ff06fc4ecfea..0a06b23f290712950098fc39a9d4bd3a5231f2d9 100644 --- a/llvm/test/ExecutionEngine/test-interp-vec-setcond-int.ll +++ b/llvm/test/ExecutionEngine/test-interp-vec-setcond-int.ll @@ -1,3 +1,6 @@ +; LoongArch does not support mcjit. +; UNSUPPORTED: loongarch + ; RUN: %lli -jit-kind=mcjit %s > /dev/null ; RUN: %lli %s > /dev/null diff --git a/llvm/test/MC/LoongArch/Basic/Float/d-arith.s b/llvm/test/MC/LoongArch/Basic/Float/d-arith.s index a10845d7422af6f942f4c9e7939d16a2bda40fd2..5639ec886017ffc8854a700b019cbcb3668e6c62 100644 --- a/llvm/test/MC/LoongArch/Basic/Float/d-arith.s +++ b/llvm/test/MC/LoongArch/Basic/Float/d-arith.s @@ -3,10 +3,10 @@ # RUN: llvm-mc %s --triple=loongarch64 --mattr=+d --show-encoding \ # RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM %s # RUN: llvm-mc %s --triple=loongarch32 --mattr=+d --filetype=obj \ -# RUN: | llvm-objdump -d --mattr=+d - \ +# RUN: | llvm-objdump -d - \ # RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s # RUN: llvm-mc %s --triple=loongarch64 --mattr=+d --filetype=obj \ -# RUN: | llvm-objdump -d --mattr=+d - \ +# RUN: | llvm-objdump -d - \ # RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s ## Support for the 'D' extension implies support for 'F' diff --git a/llvm/test/MC/LoongArch/Basic/Float/d-bound-check.s b/llvm/test/MC/LoongArch/Basic/Float/d-bound-check.s index 1d6b489f33b04799b5412fd2724199bd2375b548..bd625dc5549f8bba905eb41323d8f6a35be80af4 100644 --- a/llvm/test/MC/LoongArch/Basic/Float/d-bound-check.s +++ b/llvm/test/MC/LoongArch/Basic/Float/d-bound-check.s @@ -3,10 +3,10 @@ # RUN: llvm-mc %s --triple=loongarch64 --mattr=+d --show-encoding \ # RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM %s # RUN: llvm-mc %s --triple=loongarch32 --mattr=+d --filetype=obj \ -# RUN: | llvm-objdump -d --mattr=+d - \ +# RUN: | llvm-objdump -d - \ # RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s # RUN: llvm-mc %s --triple=loongarch64 --mattr=+d --filetype=obj \ -# RUN: | llvm-objdump -d --mattr=+d - \ +# RUN: | llvm-objdump -d - \ # RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s ## Support for the 'D' extension implies support for 'F' diff --git a/llvm/test/MC/LoongArch/Basic/Float/d-branch.s b/llvm/test/MC/LoongArch/Basic/Float/d-branch.s index 838b7e9330d77372d4063ac5648190e391428910..a310cb755fcdc29bce62fc8eb9e429b04b308029 100644 --- a/llvm/test/MC/LoongArch/Basic/Float/d-branch.s +++ b/llvm/test/MC/LoongArch/Basic/Float/d-branch.s @@ -3,10 +3,10 @@ # RUN: llvm-mc %s --triple=loongarch64 --mattr=+d --show-encoding \ # RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM %s # RUN: llvm-mc %s --triple=loongarch32 --mattr=+d --filetype=obj \ -# RUN: | llvm-objdump -d --mattr=+d - \ +# RUN: | llvm-objdump -d - \ # RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s # RUN: llvm-mc %s --triple=loongarch64 --mattr=+d --filetype=obj \ -# RUN: | llvm-objdump -d --mattr=+d - \ +# RUN: | llvm-objdump -d - \ # RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s ## Support for the 'D' extension implies support for 'F' diff --git a/llvm/test/MC/LoongArch/Basic/Float/d-comp.s b/llvm/test/MC/LoongArch/Basic/Float/d-comp.s index 3ddae6d0567f8a9c1a67b7148be5943fb02eec23..07f3b6276017f16d7a40cce61d3b7f2e9be4cde3 100644 --- a/llvm/test/MC/LoongArch/Basic/Float/d-comp.s +++ b/llvm/test/MC/LoongArch/Basic/Float/d-comp.s @@ -3,10 +3,10 @@ # RUN: llvm-mc %s --triple=loongarch64 --mattr=+d --show-encoding \ # RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM %s # RUN: llvm-mc %s --triple=loongarch32 --mattr=+d --filetype=obj \ -# RUN: | llvm-objdump -d --mattr=+d - \ +# RUN: | llvm-objdump -d - \ # RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s # RUN: llvm-mc %s --triple=loongarch64 --mattr=+d --filetype=obj \ -# RUN: | llvm-objdump -d --mattr=+d - \ +# RUN: | llvm-objdump -d - \ # RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s ## Support for the 'D' extension implies support for 'F' diff --git a/llvm/test/MC/LoongArch/Basic/Float/d-conv.s b/llvm/test/MC/LoongArch/Basic/Float/d-conv.s index fa5a5088e6a772fc1f64ddd6e14256b590e4bb6a..10dd822a4c922ef915e7f6c1bec8f7353a808cc6 100644 --- a/llvm/test/MC/LoongArch/Basic/Float/d-conv.s +++ b/llvm/test/MC/LoongArch/Basic/Float/d-conv.s @@ -3,10 +3,10 @@ # RUN: llvm-mc %s --triple=loongarch64 --mattr=+d --show-encoding \ # RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM %s # RUN: llvm-mc %s --triple=loongarch32 --mattr=+d --filetype=obj \ -# RUN: | llvm-objdump -d --mattr=+d - \ +# RUN: | llvm-objdump -d - \ # RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s # RUN: llvm-mc %s --triple=loongarch64 --mattr=+d --filetype=obj \ -# RUN: | llvm-objdump -d --mattr=+d - \ +# RUN: | llvm-objdump -d - \ # RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s ## Support for the 'D' extension implies support for 'F' diff --git a/llvm/test/MC/LoongArch/Basic/Float/d-memory.s b/llvm/test/MC/LoongArch/Basic/Float/d-memory.s index a8f04cefe059bf1be0c52015b7e5e8b809299780..4cb7e6fe951c1d6eee860910843ddb07b2cb515b 100644 --- a/llvm/test/MC/LoongArch/Basic/Float/d-memory.s +++ b/llvm/test/MC/LoongArch/Basic/Float/d-memory.s @@ -3,10 +3,10 @@ # RUN: llvm-mc %s --triple=loongarch64 --mattr=+d --show-encoding \ # RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM %s # RUN: llvm-mc %s --triple=loongarch32 --mattr=+d --filetype=obj \ -# RUN: | llvm-objdump -d --mattr=+d - \ +# RUN: | llvm-objdump -d - \ # RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s # RUN: llvm-mc %s --triple=loongarch64 --mattr=+d --filetype=obj \ -# RUN: | llvm-objdump -d --mattr=+d - \ +# RUN: | llvm-objdump -d - \ # RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s ## Support for the 'D' extension implies support for 'F' diff --git a/llvm/test/MC/LoongArch/Basic/Float/d-move.s b/llvm/test/MC/LoongArch/Basic/Float/d-move.s index c3008add628497e040e01a5f93cbf8d5fe34dc46..c5d4b6a5fda97fa94a0bf1e799af27371a7636b4 100644 --- a/llvm/test/MC/LoongArch/Basic/Float/d-move.s +++ b/llvm/test/MC/LoongArch/Basic/Float/d-move.s @@ -3,10 +3,10 @@ # RUN: llvm-mc %s --triple=loongarch64 --mattr=+d --show-encoding --defsym=LA64=1 \ # RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM,ASM-AND-OBJ64,ASM64 %s # RUN: llvm-mc %s --triple=loongarch32 --mattr=+d --filetype=obj \ -# RUN: | llvm-objdump -d --mattr=+d - \ +# RUN: | llvm-objdump -d - \ # RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s # RUN: llvm-mc %s --triple=loongarch64 --mattr=+d --filetype=obj --defsym=LA64=1 \ -# RUN: | llvm-objdump -d --mattr=+d - \ +# RUN: | llvm-objdump -d - \ # RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM-AND-OBJ64 %s ## Support for the 'D' extension implies support for 'F' diff --git a/llvm/test/MC/LoongArch/Basic/Float/f-arith.s b/llvm/test/MC/LoongArch/Basic/Float/f-arith.s index a5873a54511ac71f1d44b847cc8351210bd4d874..5865d6b6e152f74a1e43c3661095310fa09cecb1 100644 --- a/llvm/test/MC/LoongArch/Basic/Float/f-arith.s +++ b/llvm/test/MC/LoongArch/Basic/Float/f-arith.s @@ -3,10 +3,10 @@ # RUN: llvm-mc %s --triple=loongarch64 --mattr=+f --show-encoding \ # RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM %s # RUN: llvm-mc %s --triple=loongarch32 --mattr=+f --filetype=obj \ -# RUN: | llvm-objdump -d --mattr=+f - \ +# RUN: | llvm-objdump -d - \ # RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s # RUN: llvm-mc %s --triple=loongarch64 --mattr=+f --filetype=obj \ -# RUN: | llvm-objdump -d --mattr=+f - \ +# RUN: | llvm-objdump -d - \ # RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s # ASM-AND-OBJ: fadd.s $fs5, $ft7, $fs1 diff --git a/llvm/test/MC/LoongArch/Basic/Float/f-bound-check.s b/llvm/test/MC/LoongArch/Basic/Float/f-bound-check.s index bfff92ff8a061d60e480af192f2179ef96099200..cdfb67b52af0c10db44f9d20c0a74f58bce43275 100644 --- a/llvm/test/MC/LoongArch/Basic/Float/f-bound-check.s +++ b/llvm/test/MC/LoongArch/Basic/Float/f-bound-check.s @@ -3,10 +3,10 @@ # RUN: llvm-mc %s --triple=loongarch64 --mattr=+f --show-encoding \ # RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM %s # RUN: llvm-mc %s --triple=loongarch32 --mattr=+f --filetype=obj \ -# RUN: | llvm-objdump -d --mattr=+f - \ +# RUN: | llvm-objdump -d - \ # RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s # RUN: llvm-mc %s --triple=loongarch64 --mattr=+f --filetype=obj \ -# RUN: | llvm-objdump -d --mattr=+f - \ +# RUN: | llvm-objdump -d - \ # RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s # ASM-AND-OBJ: fldgt.s $fa3, $s4, $t1 diff --git a/llvm/test/MC/LoongArch/Basic/Float/f-branch.s b/llvm/test/MC/LoongArch/Basic/Float/f-branch.s index 583008b5a4f644de7afa00a498207eb79d3a4c7d..656808f60f4f6d1b689ef17ba009e2f8623c2e9b 100644 --- a/llvm/test/MC/LoongArch/Basic/Float/f-branch.s +++ b/llvm/test/MC/LoongArch/Basic/Float/f-branch.s @@ -3,10 +3,10 @@ # RUN: llvm-mc %s --triple=loongarch64 --mattr=+f --show-encoding \ # RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM %s # RUN: llvm-mc %s --triple=loongarch32 --mattr=+f --filetype=obj \ -# RUN: | llvm-objdump -d --mattr=+f - \ +# RUN: | llvm-objdump -d - \ # RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s # RUN: llvm-mc %s --triple=loongarch64 --mattr=+f --filetype=obj \ -# RUN: | llvm-objdump -d --mattr=+f - \ +# RUN: | llvm-objdump -d - \ # RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s # ASM-AND-OBJ: bceqz $fcc6, 12 diff --git a/llvm/test/MC/LoongArch/Basic/Float/f-comp.s b/llvm/test/MC/LoongArch/Basic/Float/f-comp.s index cc4e1470d52515b1f1c03a426d514dd2fc5a9f50..8ba38426d3aa7ef7e22369567368d61615700549 100644 --- a/llvm/test/MC/LoongArch/Basic/Float/f-comp.s +++ b/llvm/test/MC/LoongArch/Basic/Float/f-comp.s @@ -3,10 +3,10 @@ # RUN: llvm-mc %s --triple=loongarch64 --mattr=+f --show-encoding \ # RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM %s # RUN: llvm-mc %s --triple=loongarch32 --mattr=+f --filetype=obj \ -# RUN: | llvm-objdump -d --mattr=+f - \ +# RUN: | llvm-objdump -d - \ # RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s # RUN: llvm-mc %s --triple=loongarch64 --mattr=+f --filetype=obj \ -# RUN: | llvm-objdump -d --mattr=+f - \ +# RUN: | llvm-objdump -d - \ # RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s # ASM-AND-OBJ: fcmp.caf.s $fcc0, $fa0, $fa1 diff --git a/llvm/test/MC/LoongArch/Basic/Float/f-conv.s b/llvm/test/MC/LoongArch/Basic/Float/f-conv.s index db44077dfc38f89e842f3fe021d04a15b1968547..069dab10c25a0f776f5d8c90d9dabff7f4355760 100644 --- a/llvm/test/MC/LoongArch/Basic/Float/f-conv.s +++ b/llvm/test/MC/LoongArch/Basic/Float/f-conv.s @@ -3,10 +3,10 @@ # RUN: llvm-mc %s --triple=loongarch64 --mattr=+f --show-encoding \ # RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM %s # RUN: llvm-mc %s --triple=loongarch32 --mattr=+f --filetype=obj \ -# RUN: | llvm-objdump -d --mattr=+f - \ +# RUN: | llvm-objdump -d - \ # RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s # RUN: llvm-mc %s --triple=loongarch64 --mattr=+f --filetype=obj \ -# RUN: | llvm-objdump -d --mattr=+f - \ +# RUN: | llvm-objdump -d - \ # RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s # ASM-AND-OBJ: ffint.s.w $fs6, $fa5 diff --git a/llvm/test/MC/LoongArch/Basic/Float/f-memory.s b/llvm/test/MC/LoongArch/Basic/Float/f-memory.s index b5fbd9abd2baa298c67c348a7bf671a0bcaf9f03..a614e867e1d921a737a930be3be66c653cafc9cb 100644 --- a/llvm/test/MC/LoongArch/Basic/Float/f-memory.s +++ b/llvm/test/MC/LoongArch/Basic/Float/f-memory.s @@ -3,10 +3,10 @@ # RUN: llvm-mc %s --triple=loongarch64 --mattr=+f --show-encoding \ # RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM %s # RUN: llvm-mc %s --triple=loongarch32 --mattr=+f --filetype=obj \ -# RUN: | llvm-objdump -d --mattr=+f - \ +# RUN: | llvm-objdump -d - \ # RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s # RUN: llvm-mc %s --triple=loongarch64 --mattr=+f --filetype=obj \ -# RUN: | llvm-objdump -d --mattr=+f - \ +# RUN: | llvm-objdump -d - \ # RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s # ASM-AND-OBJ: fld.s $ft15, $t3, 250 diff --git a/llvm/test/MC/LoongArch/Basic/Float/f-move.s b/llvm/test/MC/LoongArch/Basic/Float/f-move.s index da9107686d35f0f55a85a44e0d763c233a6d27bb..26702d60b68a05e7e755f0a29abda8b190d1ba37 100644 --- a/llvm/test/MC/LoongArch/Basic/Float/f-move.s +++ b/llvm/test/MC/LoongArch/Basic/Float/f-move.s @@ -3,10 +3,10 @@ # RUN: llvm-mc %s --triple=loongarch64 --mattr=+f --show-encoding \ # RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM %s # RUN: llvm-mc %s --triple=loongarch32 --mattr=+f --filetype=obj \ -# RUN: | llvm-objdump -d --mattr=+f - \ +# RUN: | llvm-objdump -d - \ # RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s # RUN: llvm-mc %s --triple=loongarch64 --mattr=+f --filetype=obj \ -# RUN: | llvm-objdump -d --mattr=+f - \ +# RUN: | llvm-objdump -d - \ # RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s # ASM-AND-OBJ: fmov.s $ft5, $ft15 diff --git a/llvm/test/MC/LoongArch/Basic/Integer/atomic.s b/llvm/test/MC/LoongArch/Basic/Integer/atomic.s index 64274018081caf6f98a6993236ef3160aac53537..a35211db885141ac8281cc5506baa99060e8de14 100644 --- a/llvm/test/MC/LoongArch/Basic/Integer/atomic.s +++ b/llvm/test/MC/LoongArch/Basic/Integer/atomic.s @@ -29,6 +29,18 @@ sc.w $t7, $t2, 56 .ifdef LA64 +# CHECK64-ASM-AND-OBJ: amswap.w $a2, $t0, $s1 +# CHECK64-ASM: encoding: [0x06,0x33,0x60,0x38] +amswap.w $a2, $t0, $s1, 0 + +# CHECK64-ASM-AND-OBJ: amswap.w $zero, $t0, $zero +# CHECK64-ASM: encoding: [0x00,0x30,0x60,0x38] +amswap.w $zero, $t0, $zero + +# CHECK64-ASM-AND-OBJ: amadd_db.w $zero, $zero, $a1 +# CHECK64-ASM: encoding: [0xa0,0x00,0x6a,0x38] +amadd_db.w $zero, $zero, $a1 + # CHECK64-ASM-AND-OBJ: amswap.w $a2, $t0, $s1 # CHECK64-ASM: encoding: [0x06,0x33,0x60,0x38] amswap.w $a2, $t0, $s1 diff --git a/llvm/test/MC/LoongArch/Basic/Integer/invalid.s b/llvm/test/MC/LoongArch/Basic/Integer/invalid.s index 94b3976f5bfd09795ef490aa61bfcef4873fdb39..b226d8c9b7d24f2273873bc287760f5d7cdb73de 100644 --- a/llvm/test/MC/LoongArch/Basic/Integer/invalid.s +++ b/llvm/test/MC/LoongArch/Basic/Integer/invalid.s @@ -1,7 +1,9 @@ ## Test invalid instructions on both loongarch32 and loongarch64 target. -# RUN: not llvm-mc --triple=loongarch32 --mattr=-f %s 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK64 -# RUN: not llvm-mc --triple=loongarch64 --mattr=-f %s 2>&1 --defsym=LA64=1 | FileCheck %s +# RUN: not llvm-mc --triple=loongarch32 --mattr=-f %s 2>&1 \ +# RUN: | FileCheck %s --check-prefixes=CHECK,CHECK64 +# RUN: not llvm-mc --triple=loongarch64 --mattr=-f %s 2>&1 --defsym=LA64=1 \ +# RUN: | FileCheck %s ## Out of range immediates ## uimm2 @@ -37,36 +39,40 @@ preld 32, $a0, 0 ## uimm12 andi $a0, $a0, -1 # CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [0, 4095] -ori $a0, $a0, 4096 -# CHECK: :[[#@LINE-1]]:15: error: immediate must be an integer in the range [0, 4095] xori $a0, $a0, 4096 # CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [0, 4095] +## uimm12_ori +ori $a0, $a0, 4096 +# CHECK: :[[#@LINE-1]]:15: error: operand must be a symbol with modifier (e.g. %abs_lo12) or an integer in the range [0, 4095] + ## simm12 -addi.w $a0, $a0, -2049 -# CHECK: :[[#@LINE-1]]:18: error: immediate must be an integer in the range [-2048, 2047] slti $a0, $a0, -2049 # CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-2048, 2047] sltui $a0, $a0, 2048 # CHECK: :[[#@LINE-1]]:17: error: immediate must be an integer in the range [-2048, 2047] preld 0, $a0, 2048 # CHECK: :[[#@LINE-1]]:15: error: immediate must be an integer in the range [-2048, 2047] + +## simm12_addlike +addi.w $a0, $a0, -2049 +# CHECK: :[[#@LINE-1]]:18: error: operand must be a symbol with modifier (e.g. %pc_lo12) or an integer in the range [-2048, 2047] ld.b $a0, $a0, 2048 -# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-2048, 2047] +# CHECK: :[[#@LINE-1]]:16: error: operand must be a symbol with modifier (e.g. %pc_lo12) or an integer in the range [-2048, 2047] ld.h $a0, $a0, 2048 -# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-2048, 2047] +# CHECK: :[[#@LINE-1]]:16: error: operand must be a symbol with modifier (e.g. %pc_lo12) or an integer in the range [-2048, 2047] ld.w $a0, $a0, 2048 -# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-2048, 2047] +# CHECK: :[[#@LINE-1]]:16: error: operand must be a symbol with modifier (e.g. %pc_lo12) or an integer in the range [-2048, 2047] ld.bu $a0, $a0, -2049 -# CHECK: :[[#@LINE-1]]:17: error: immediate must be an integer in the range [-2048, 2047] +# CHECK: :[[#@LINE-1]]:17: error: operand must be a symbol with modifier (e.g. %pc_lo12) or an integer in the range [-2048, 2047] ld.hu $a0, $a0, -2049 -# CHECK: :[[#@LINE-1]]:17: error: immediate must be an integer in the range [-2048, 2047] +# CHECK: :[[#@LINE-1]]:17: error: operand must be a symbol with modifier (e.g. %pc_lo12) or an integer in the range [-2048, 2047] st.b $a0, $a0, 2048 -# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-2048, 2047] +# CHECK: :[[#@LINE-1]]:16: error: operand must be a symbol with modifier (e.g. %pc_lo12) or an integer in the range [-2048, 2047] st.h $a0, $a0, 2048 -# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-2048, 2047] +# CHECK: :[[#@LINE-1]]:16: error: operand must be a symbol with modifier (e.g. %pc_lo12) or an integer in the range [-2048, 2047] st.w $a0, $a0, -2049 -# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-2048, 2047] +# CHECK: :[[#@LINE-1]]:16: error: operand must be a symbol with modifier (e.g. %pc_lo12) or an integer in the range [-2048, 2047] ## simm14_lsl2 ll.w $a0, $a0, -32772 @@ -80,49 +86,53 @@ sc.w $a0, $a0, 32768 ## simm16_lsl2 beq $a0, $a0, -0x20004 -# CHECK: :[[#@LINE-1]]:15: error: immediate must be a multiple of 4 in the range [-131072, 131068] +# CHECK: :[[#@LINE-1]]:15: error: operand must be a symbol with modifier (e.g. %b16) or an integer in the range [-131072, 131068] bne $a0, $a0, -0x20004 -# CHECK: :[[#@LINE-1]]:15: error: immediate must be a multiple of 4 in the range [-131072, 131068] +# CHECK: :[[#@LINE-1]]:15: error: operand must be a symbol with modifier (e.g. %b16) or an integer in the range [-131072, 131068] blt $a0, $a0, -0x1FFFF -# CHECK: :[[#@LINE-1]]:15: error: immediate must be a multiple of 4 in the range [-131072, 131068] +# CHECK: :[[#@LINE-1]]:15: error: operand must be a symbol with modifier (e.g. %b16) or an integer in the range [-131072, 131068] bge $a0, $a0, -0x1FFFF -# CHECK: :[[#@LINE-1]]:15: error: immediate must be a multiple of 4 in the range [-131072, 131068] +# CHECK: :[[#@LINE-1]]:15: error: operand must be a symbol with modifier (e.g. %b16) or an integer in the range [-131072, 131068] bltu $a0, $a0, 0x1FFFF -# CHECK: :[[#@LINE-1]]:16: error: immediate must be a multiple of 4 in the range [-131072, 131068] +# CHECK: :[[#@LINE-1]]:16: error: operand must be a symbol with modifier (e.g. %b16) or an integer in the range [-131072, 131068] bgeu $a0, $a0, 0x1FFFF -# CHECK: :[[#@LINE-1]]:16: error: immediate must be a multiple of 4 in the range [-131072, 131068] +# CHECK: :[[#@LINE-1]]:16: error: operand must be a symbol with modifier (e.g. %b16) or an integer in the range [-131072, 131068] jirl $a0, $a0, 0x20000 -# CHECK: :[[#@LINE-1]]:16: error: immediate must be a multiple of 4 in the range [-131072, 131068] +# CHECK: :[[#@LINE-1]]:16: error: operand must be a symbol with modifier (e.g. %b16) or an integer in the range [-131072, 131068] ## simm20 -lu12i.w $a0, -0x80001 -# CHECK: :[[#@LINE-1]]:14: error: immediate must be an integer in the range [-524288, 524287] pcaddi $a0, -0x80001 # CHECK: :[[#@LINE-1]]:13: error: immediate must be an integer in the range [-524288, 524287] pcaddu12i $a0, 0x80000 # CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-524288, 524287] + +## simm20_lu12iw +lu12i.w $a0, -0x80001 +# CHECK: :[[#@LINE-1]]:14: error: operand must be a symbol with modifier (e.g. %abs_hi20) or an integer in the range [-524288, 524287] + +## simm20_pcalau12i pcalau12i $a0, 0x80000 -# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-524288, 524287] +# CHECK: :[[#@LINE-1]]:16: error: operand must be a symbol with modifier (e.g. %pc_hi20) or an integer in the range [-524288, 524287] ## simm21_lsl2 beqz $a0, -0x400001 -# CHECK: :[[#@LINE-1]]:11: error: immediate must be a multiple of 4 in the range [-4194304, 4194300] +# CHECK: :[[#@LINE-1]]:11: error: operand must be a symbol with modifier (e.g. %b21) or an integer in the range [-4194304, 4194300] bnez $a0, -0x3FFFFF -# CHECK: :[[#@LINE-1]]:11: error: immediate must be a multiple of 4 in the range [-4194304, 4194300] +# CHECK: :[[#@LINE-1]]:11: error: operand must be a symbol with modifier (e.g. %b21) or an integer in the range [-4194304, 4194300] beqz $a0, 0x3FFFFF -# CHECK: :[[#@LINE-1]]:11: error: immediate must be a multiple of 4 in the range [-4194304, 4194300] +# CHECK: :[[#@LINE-1]]:11: error: operand must be a symbol with modifier (e.g. %b21) or an integer in the range [-4194304, 4194300] bnez $a0, 0x400000 -# CHECK: :[[#@LINE-1]]:11: error: immediate must be a multiple of 4 in the range [-4194304, 4194300] +# CHECK: :[[#@LINE-1]]:11: error: operand must be a symbol with modifier (e.g. %b21) or an integer in the range [-4194304, 4194300] ## simm26_lsl2 b -0x8000001 -# CHECK: :[[#@LINE-1]]:3: error: immediate must be a multiple of 4 in the range [-134217728, 134217724] +# CHECK: :[[#@LINE-1]]:3: error: operand must be a bare symbol name or an immediate must be a multiple of 4 in the range [-134217728, 134217724] b 0x1 -# CHECK: :[[#@LINE-1]]:3: error: immediate must be a multiple of 4 in the range [-134217728, 134217724] +# CHECK: :[[#@LINE-1]]:3: error: operand must be a bare symbol name or an immediate must be a multiple of 4 in the range [-134217728, 134217724] bl 0x7FFFFFF -# CHECK: :[[#@LINE-1]]:4: error: immediate must be a multiple of 4 in the range [-134217728, 134217724] +# CHECK: :[[#@LINE-1]]:4: error: operand must be a bare symbol name or an immediate must be a multiple of 4 in the range [-134217728, 134217724] bl 0x8000000 -# CHECK: :[[#@LINE-1]]:4: error: immediate must be a multiple of 4 in the range [-134217728, 134217724] +# CHECK: :[[#@LINE-1]]:4: error: operand must be a bare symbol name or an immediate must be a multiple of 4 in the range [-134217728, 134217724] ## Invalid mnemonics nori $a0, $a0, 0 diff --git a/llvm/test/MC/LoongArch/Basic/Integer/invalid64.s b/llvm/test/MC/LoongArch/Basic/Integer/invalid64.s index a8b175a886cc3b322d095a95f6caed75f5f4a6fa..acddca9432a698aa30f7bff95dd60b7417edd72f 100644 --- a/llvm/test/MC/LoongArch/Basic/Integer/invalid64.s +++ b/llvm/test/MC/LoongArch/Basic/Integer/invalid64.s @@ -29,17 +29,19 @@ bstrins.d $a0, $a0, 63, -1 bstrpick.d $a0, $a0, 64, 0 # CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 63] -## simm12 +## simm12_addlike addi.d $a0, $a0, -2049 -# CHECK: :[[#@LINE-1]]:18: error: immediate must be an integer in the range [-2048, 2047] -lu52i.d $a0, $a0, -2049 -# CHECK: :[[#@LINE-1]]:19: error: immediate must be an integer in the range [-2048, 2047] +# CHECK: :[[#@LINE-1]]:18: error: operand must be a symbol with modifier (e.g. %pc_lo12) or an integer in the range [-2048, 2047] ld.wu $a0, $a0, 2048 -# CHECK: :[[#@LINE-1]]:17: error: immediate must be an integer in the range [-2048, 2047] +# CHECK: :[[#@LINE-1]]:17: error: operand must be a symbol with modifier (e.g. %pc_lo12) or an integer in the range [-2048, 2047] ld.d $a0, $a0, 2048 -# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-2048, 2047] +# CHECK: :[[#@LINE-1]]:16: error: operand must be a symbol with modifier (e.g. %pc_lo12) or an integer in the range [-2048, 2047] st.d $a0, $a0, 2048 -# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-2048, 2047] +# CHECK: :[[#@LINE-1]]:16: error: operand must be a symbol with modifier (e.g. %pc_lo12) or an integer in the range [-2048, 2047] + +## simm12_lu52id +lu52i.d $a0, $a0, 2048 +# CHECK-LA64: :[[#@LINE-1]]:19: error: operand must be a symbol with modifier (e.g. %pc64_hi12) or an integer in the range [-2048, 2047] ## simm14_lsl2 ldptr.w $a0, $a0, -32772 @@ -62,11 +64,13 @@ addu16i.d $a0, $a0, 32768 # CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-32768, 32767] ## simm20 -lu32i.d $a0, -0x80001 -# CHECK: :[[#@LINE-1]]:14: error: immediate must be an integer in the range [-524288, 524287] pcaddu18i $a0, 0x80000 # CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-524288, 524287] +## simm20_lu32id +lu32i.d $a0, 0x80000 +# CHECK-LA64: :[[#@LINE-1]]:14: error: operand must be a symbol with modifier (e.g. %abs64_lo20) or an integer in the range [-524288, 524287] + ## msbd < lsbd # CHECK: :[[#@LINE+1]]:21: error: msb is less than lsb bstrins.d $a0, $a0, 1, 2 @@ -75,3 +79,15 @@ bstrins.d $a0, $a0, 1, 2 # CHECK: :[[#@LINE+1]]:22: error: msb is less than lsb bstrpick.d $a0, $a0, 32, 63 # CHECK: ^~~~~~ + +# CHECK: :[[#@LINE+1]]:10: error: $rd must be different from both $rk and $rj +amadd.d $a0, $a0, $a0 +# CHECK: :[[#@LINE+1]]:10: error: $rd must be different from both $rk and $rj +ammin.w $a0, $a0, $a1 +# CHECK: :[[#@LINE+1]]:10: error: $rd must be different from both $rk and $rj +amxor.w $a0, $a1, $a0 + +# CHECK: :[[#@LINE+1]]:24: error: expected optional integer offset +amadd.d $a0, $a1, $a2, $a3 +# CHECK: :[[#@LINE+1]]:24: error: optional integer offset must be 0 +amadd.d $a0, $a1, $a2, 1 diff --git a/llvm/test/MC/LoongArch/Macros/aliases-br.s b/llvm/test/MC/LoongArch/Macros/aliases-br.s new file mode 100644 index 0000000000000000000000000000000000000000..e8d85bdec763b731cfff6b72a1b0dc75c9ff6c06 --- /dev/null +++ b/llvm/test/MC/LoongArch/Macros/aliases-br.s @@ -0,0 +1,18 @@ +# RUN: llvm-mc --triple=loongarch64 %s | FileCheck %s + +bgt $a1, $a0, 16 +# CHECK: blt $a0, $a1, 16 +bgtu $a1, $a0, 16 +# CHECK-NEXT: bltu $a0, $a1, 16 +ble $a1, $a0, 16 +# CHECK-NEXT: bge $a0, $a1, 16 +bleu $a1, $a0, 16 +# CHECK-NEXT: bgeu $a0, $a1, 16 +bltz $a0, 16 +# CHECK-NEXT: blt $a0, $zero, 16 +bgtz $a0, 16 +# CHECK-NEXT: blt $zero, $a0, 16 +blez $a0, 16 +# CHECK-NEXT: bge $zero, $a0, 16 +bgez $a0, 16 +# CHECK-NEXT: bge $a0, $zero, 16 diff --git a/llvm/test/MC/LoongArch/Macros/aliases-la-bad.s b/llvm/test/MC/LoongArch/Macros/aliases-la-bad.s new file mode 100644 index 0000000000000000000000000000000000000000..d371ec23935f4769a6d0cc8b7e964932cba17061 --- /dev/null +++ b/llvm/test/MC/LoongArch/Macros/aliases-la-bad.s @@ -0,0 +1,10 @@ +# RUN: not llvm-mc --triple=loongarch64 %s 2>&1 | FileCheck %s + +la $a0, $a1, sym +# CHECK: :[[#@LINE-1]]:10: error: operand must be a bare symbol name + +la $a0, 1 +# CHECK: :[[#@LINE-1]]:9: error: operand must be a bare symbol name + +la.global $a0, $a1, 1 +# CHECK: :[[#@LINE-1]]:21: error: operand must be a bare symbol name diff --git a/llvm/test/MC/LoongArch/Macros/aliases-la.s b/llvm/test/MC/LoongArch/Macros/aliases-la.s new file mode 100644 index 0000000000000000000000000000000000000000..dd5a4d474e001e76288f53ebc1bdca38e8ee6060 --- /dev/null +++ b/llvm/test/MC/LoongArch/Macros/aliases-la.s @@ -0,0 +1,74 @@ +## Test la/la.global/la.local expand to different instructions sequence under +## different features. + +# RUN: llvm-mc --triple=loongarch64 %s \ +# RUN: | FileCheck %s --check-prefix=NORMAL +# RUN: llvm-mc --triple=loongarch64 --mattr=+la-global-with-pcrel < %s \ +# RUN: | FileCheck %s --check-prefix=GTOPCR +# RUN: llvm-mc --triple=loongarch64 --mattr=+la-global-with-abs < %s \ +# RUN: | FileCheck %s --check-prefix=GTOABS +# RUN: llvm-mc --triple=loongarch64 --mattr=+la-local-with-abs < %s \ +# RUN: | FileCheck %s --check-prefix=LTOABS + +la $a0, sym +# NORMAL: pcalau12i $a0, %got_pc_hi20(sym) +# NORMAL-NEXT: ld.d $a0, $a0, %got_pc_lo12(sym) + +# GTOPCR: pcalau12i $a0, %pc_hi20(sym) +# GTOPCR-NEXT: addi.d $a0, $a0, %pc_lo12(sym) + +# GTOABS: lu12i.w $a0, %abs_hi20(sym) +# GTOABS-NEXT: ori $a0, $a0, %abs_lo12(sym) +# GTOABS-NEXT: lu32i.d $a0, %abs64_lo20(sym) +# GTOABS-NEXT: lu52i.d $a0, $a0, %abs64_hi12(sym) + +la.global $a0, sym_global +# NORMAL: pcalau12i $a0, %got_pc_hi20(sym_global) +# NORMAL-NEXT: ld.d $a0, $a0, %got_pc_lo12(sym_global) + +# GTOPCR: pcalau12i $a0, %pc_hi20(sym_global) +# GTOPCR-NEXT: addi.d $a0, $a0, %pc_lo12(sym_global) + +# GTOABS: lu12i.w $a0, %abs_hi20(sym_global) +# GTOABS-NEXT: ori $a0, $a0, %abs_lo12(sym_global) +# GTOABS-NEXT: lu32i.d $a0, %abs64_lo20(sym_global) +# GTOABS-NEXT: lu52i.d $a0, $a0, %abs64_hi12(sym_global) + +la.global $a0, $a1, sym_global_large +# NORMAL: pcalau12i $a0, %got_pc_hi20(sym_global_large) +# NORMAL-NEXT: addi.d $a1, $zero, %got_pc_lo12(sym_global_large) +# NORMAL-NEXT: lu32i.d $a1, %got64_pc_lo20(sym_global_large) +# NORMAL-NEXT: lu52i.d $a1, $a1, %got64_pc_hi12(sym_global_large) +# NORMAL-NEXT: ldx.d $a0, $a0, $a1 + +# GTOPCR: pcalau12i $a0, %pc_hi20(sym_global_large) +# GTOPCR-NEXT: addi.d $a1, $zero, %pc_lo12(sym_global_large) +# GTOPCR-NEXT: lu32i.d $a1, %pc64_lo20(sym_global_large) +# GTOPCR-NEXT: lu52i.d $a1, $a1, %pc64_hi12(sym_global_large) +# GTOPCR-NEXT: add.d $a0, $a0, $a1 + +# GTOABS: lu12i.w $a0, %abs_hi20(sym_global_large) +# GTOABS-NEXT: ori $a0, $a0, %abs_lo12(sym_global_large) +# GTOABS-NEXT: lu32i.d $a0, %abs64_lo20(sym_global_large) +# GTOABS-NEXT: lu52i.d $a0, $a0, %abs64_hi12(sym_global_large) + +la.local $a0, sym_local +# NORMAL: pcalau12i $a0, %pc_hi20(sym_local) +# NORMAL-NEXT: addi.d $a0, $a0, %pc_lo12(sym_local) + +# LTOABS: lu12i.w $a0, %abs_hi20(sym_local) +# LTOABS-NEXT: ori $a0, $a0, %abs_lo12(sym_local) +# LTOABS-NEXT: lu32i.d $a0, %abs64_lo20(sym_local) +# LTOABS-NEXT: lu52i.d $a0, $a0, %abs64_hi12(sym_local) + +la.local $a0, $a1, sym_local_large +# NORMAL: pcalau12i $a0, %pc_hi20(sym_local_large) +# NORMAL-NEXT: addi.d $a1, $zero, %pc_lo12(sym_local_large) +# NORMAL-NEXT: lu32i.d $a1, %pc64_lo20(sym_local_large) +# NORMAL-NEXT: lu52i.d $a1, $a1, %pc64_hi12(sym_local_large) +# NORMAL-NEXT: add.d $a0, $a0, $a1 + +# LTOABS: lu12i.w $a0, %abs_hi20(sym_local_large) +# LTOABS-NEXT: ori $a0, $a0, %abs_lo12(sym_local_large) +# LTOABS-NEXT: lu32i.d $a0, %abs64_lo20(sym_local_large) +# LTOABS-NEXT: lu52i.d $a0, $a0, %abs64_hi12(sym_local_large) diff --git a/llvm/test/MC/LoongArch/Macros/macros-la-bad.s b/llvm/test/MC/LoongArch/Macros/macros-la-bad.s new file mode 100644 index 0000000000000000000000000000000000000000..03c6355e40b0928e18c16a5215e24fd8f1ce1b97 --- /dev/null +++ b/llvm/test/MC/LoongArch/Macros/macros-la-bad.s @@ -0,0 +1,13 @@ +# RUN: not llvm-mc --triple=loongarch64 %s 2>&1 | FileCheck %s + +la.got $a0, 1 +# CHECK: :[[#@LINE-1]]:13: error: operand must be a bare symbol name + +la.pcrel $a0, $a1, 1 +# CHECK: :[[#@LINE-1]]:20: error: operand must be a bare symbol name + +la.abs $a0, $a1, sym +# CHECK: :[[#@LINE-1]]:14: error: operand must be a bare symbol name + +la.pcrel $a0, $a0, sym +# CHECK: :[[#@LINE-1]]:11: error: $rd must be different from $rj diff --git a/llvm/test/MC/LoongArch/Macros/macros-la.s b/llvm/test/MC/LoongArch/Macros/macros-la.s new file mode 100644 index 0000000000000000000000000000000000000000..924e4326b8e5d66f8bd225548f0db4f6743c9c25 --- /dev/null +++ b/llvm/test/MC/LoongArch/Macros/macros-la.s @@ -0,0 +1,66 @@ +# RUN: llvm-mc --triple=loongarch64 %s | FileCheck %s + +la.abs $a0, sym_abs +# CHECK: lu12i.w $a0, %abs_hi20(sym_abs) +# CHECK-NEXT: ori $a0, $a0, %abs_lo12(sym_abs) +# CHECK-NEXT: lu32i.d $a0, %abs64_lo20(sym_abs) +# CHECK-NEXT: lu52i.d $a0, $a0, %abs64_hi12(sym_abs) + +la.pcrel $a0, sym_pcrel +# CHECK: pcalau12i $a0, %pc_hi20(sym_pcrel) +# CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(sym_pcrel) + +la.pcrel $a0, $a1, sym_pcrel_large +# CHECK: pcalau12i $a0, %pc_hi20(sym_pcrel_large) +# CHECK-NEXT: addi.d $a1, $zero, %pc_lo12(sym_pcrel_large) +# CHECK-NEXT: lu32i.d $a1, %pc64_lo20(sym_pcrel_large) +# CHECK-NEXT: lu52i.d $a1, $a1, %pc64_hi12(sym_pcrel_large) +# CHECK-NEXT: add.d $a0, $a0, $a1 + +la.got $a0, sym_got +# CHECK: pcalau12i $a0, %got_pc_hi20(sym_got) +# CHECK-NEXT: ld.d $a0, $a0, %got_pc_lo12(sym_got) + +la.got $a0, $a1, sym_got_large +# CHECK: pcalau12i $a0, %got_pc_hi20(sym_got_large) +# CHECK-NEXT: addi.d $a1, $zero, %got_pc_lo12(sym_got_large) +# CHECK-NEXT: lu32i.d $a1, %got64_pc_lo20(sym_got_large) +# CHECK-NEXT: lu52i.d $a1, $a1, %got64_pc_hi12(sym_got_large) +# CHECK-NEXT: ldx.d $a0, $a0, $a1 + +la.tls.le $a0, sym_le +# CHECK: lu12i.w $a0, %le_hi20(sym_le) +# CHECK-NEXT: ori $a0, $a0, %le_lo12(sym_le) + +la.tls.ie $a0, sym_ie +# CHECK: pcalau12i $a0, %ie_pc_hi20(sym_ie) +# CHECK-NEXT: ld.d $a0, $a0, %ie_pc_lo12(sym_ie) + +la.tls.ie $a0, $a1, sym_ie_large +# CHECK: pcalau12i $a0, %ie_pc_hi20(sym_ie_large) +# CHECK-NEXT: addi.d $a1, $zero, %ie_pc_lo12(sym_ie_large) +# CHECK-NEXT: lu32i.d $a1, %ie64_pc_lo20(sym_ie_large) +# CHECK-NEXT: lu52i.d $a1, $a1, %ie64_pc_hi12(sym_ie_large) +# CHECK-NEXT: ldx.d $a0, $a0, $a1 + +la.tls.ld $a0, sym_ld +# CHECK: pcalau12i $a0, %ld_pc_hi20(sym_ld) +# CHECK-NEXT: addi.d $a0, $a0, %got_pc_lo12(sym_ld) + +la.tls.ld $a0, $a1, sym_ld_large +# CHECK: pcalau12i $a0, %ld_pc_hi20(sym_ld_large) +# CHECK-NEXT: addi.d $a1, $zero, %got_pc_lo12(sym_ld_large) +# CHECK-NEXT: lu32i.d $a1, %got64_pc_lo20(sym_ld_large) +# CHECK-NEXT: lu52i.d $a1, $a1, %got64_pc_hi12(sym_ld_large) +# CHECK-NEXT: add.d $a0, $a0, $a1 + +la.tls.gd $a0, sym_gd +# CHECK: pcalau12i $a0, %gd_pc_hi20(sym_gd) +# CHECK-NEXT: addi.d $a0, $a0, %got_pc_lo12(sym_gd) + +la.tls.gd $a0, $a1, sym_gd_large +# CHECK: pcalau12i $a0, %gd_pc_hi20(sym_gd_large) +# CHECK-NEXT: addi.d $a1, $zero, %got_pc_lo12(sym_gd_large) +# CHECK-NEXT: lu32i.d $a1, %got64_pc_lo20(sym_gd_large) +# CHECK-NEXT: lu52i.d $a1, $a1, %got64_pc_hi12(sym_gd_large) +# CHECK-NEXT: add.d $a0, $a0, $a1 diff --git a/llvm/test/MC/LoongArch/Macros/macros-li-bad.s b/llvm/test/MC/LoongArch/Macros/macros-li-bad.s new file mode 100644 index 0000000000000000000000000000000000000000..194b86bfed2733557a536d6623a4e7421681afd1 --- /dev/null +++ b/llvm/test/MC/LoongArch/Macros/macros-li-bad.s @@ -0,0 +1,7 @@ +# RUN: not llvm-mc --triple=loongarch64 %s 2>&1 | FileCheck %s + +li.w $a0, 0x100000000 +# CHECK: :[[#@LINE-1]]:11: error: operand must be a 32 bit immediate + +li.d $a0, 0x10000000000000000 +# CHECK: :[[#@LINE-1]]:11: error: unknown operand diff --git a/llvm/test/MC/LoongArch/Macros/macros-li.s b/llvm/test/MC/LoongArch/Macros/macros-li.s new file mode 100644 index 0000000000000000000000000000000000000000..994aa439effa1bad7728e75cb2e301e63e9ac264 --- /dev/null +++ b/llvm/test/MC/LoongArch/Macros/macros-li.s @@ -0,0 +1,90 @@ +# RUN: llvm-mc --triple=loongarch64 %s | FileCheck %s + +li.w $a0, 0x0 +# CHECK: ori $a0, $zero, 0 +li.d $a0, 0x0 +# CHECK-NEXT: ori $a0, $zero, 0 + +li.w $a0, 0xfff +# CHECK: ori $a0, $zero, 4095 +li.d $a0, 0xfff +# CHECK-NEXT: ori $a0, $zero, 4095 + +li.w $a0, 0x7ffff000 +# CHECK: lu12i.w $a0, 524287 +li.d $a0, 0x7ffff000 +# CHECK-NEXT: lu12i.w $a0, 524287 + +li.w $a0, 0x80000000 +# CHECK: lu12i.w $a0, -524288 +li.d $a0, 0x80000000 +# CHECK-NEXT: lu12i.w $a0, -524288 +# CHECK-NEXT: lu32i.d $a0, 0 + +li.w $a0, 0xfffff800 +# CHECK: addi.w $a0, $zero, -2048 +li.d $a0, 0xfffff800 +# CHECK-NEXT: addi.w $a0, $zero, -2048 +# CHECK-NEXT: lu32i.d $a0, 0 + +li.w $a0, 0xfffffffffffff800 +# CHECK: addi.w $a0, $zero, -2048 +li.d $a0, 0xfffffffffffff800 +# CHECK-NEXT: addi.w $a0, $zero, -2048 + +li.w $a0, 0xffffffff80000800 +# CHECK: lu12i.w $a0, -524288 +# CHECK-NEXT: ori $a0, $a0, 2048 +li.d $a0, 0xffffffff80000800 +# CHECK-NEXT: lu12i.w $a0, -524288 +# CHECK-NEXT: ori $a0, $a0, 2048 + +li.d $a0, 0x7ffff00000800 +# CHECK: ori $a0, $zero, 2048 +# CHECK-NEXT: lu32i.d $a0, 524287 + +li.d $a0, 0x8000000000fff +# CHECK: ori $a0, $zero, 4095 +# CHECK-NEXT: lu32i.d $a0, -524288 +# CHECK-NEXT: lu52i.d $a0, $a0, 0 + +li.d $a0, 0x8000080000800 +# CHECK: lu12i.w $a0, -524288 +# CHECK-NEXT: ori $a0, $a0, 2048 +# CHECK-NEXT: lu32i.d $a0, -524288 +# CHECK-NEXT: lu52i.d $a0, $a0, 0 + +li.d $a0, 0x80000fffff800 +# CHECK: addi.w $a0, $zero, -2048 +# CHECK-NEXT: lu32i.d $a0, -524288 +# CHECK-NEXT: lu52i.d $a0, $a0, 0 + +li.d $a0, 0xffffffffff000 +# CHECK: lu12i.w $a0, -1 +# CHECK-NEXT: lu52i.d $a0, $a0, 0 + +li.d $a0, 0xffffffffff800 +# CHECK: addi.w $a0, $zero, -2048 +# CHECK-NEXT: lu52i.d $a0, $a0, 0 + +li.d $a0, 0x7ff0000000000000 +# CHECK: lu52i.d $a0, $zero, 2047 + +li.d $a0, 0x7ff0000080000000 +# CHECK: lu12i.w $a0, -524288 +# CHECK-NEXT: lu32i.d $a0, 0 +# CHECK-NEXT: lu52i.d $a0, $a0, 2047 + +li.d $a0, 0x7fffffff800007ff +# CHECK: lu12i.w $a0, -524288 +# CHECK-NEXT: ori $a0, $a0, 2047 +# CHECK-NEXT: lu52i.d $a0, $a0, 2047 + +li.d $a0, 0xfff0000000000fff +# CHECK: ori $a0, $zero, 4095 +# CHECK-NEXT: lu52i.d $a0, $a0, -1 + +li.d $a0, 0xffffffff7ffff800 +# CHECK: lu12i.w $a0, 524287 +# CHECK-NEXT: ori $a0, $a0, 2048 +# CHECK-NEXT: lu32i.d $a0, -1 diff --git a/llvm/test/MC/LoongArch/Misc/numeric-reg-names.s b/llvm/test/MC/LoongArch/Misc/numeric-reg-names.s new file mode 100644 index 0000000000000000000000000000000000000000..b724e47425203a21fd94e1879637b9cc126dcecf --- /dev/null +++ b/llvm/test/MC/LoongArch/Misc/numeric-reg-names.s @@ -0,0 +1,64 @@ +# RUN: llvm-mc --triple=loongarch32 --mattr=+f --loongarch-numeric-reg %s \ +# RUN: | FileCheck %s +# RUN: llvm-mc --triple=loongarch32 --mattr=+f -M numeric %s \ +# RUN: | FileCheck %s +# RUN: llvm-mc --triple=loongarch32 --mattr=+f --filetype=obj %s -o %t.32 +# RUN: llvm-objdump -d -M numeric %t.32 | FileCheck %s +# RUN: llvm-mc --triple=loongarch64 --mattr=+f --loongarch-numeric-reg %s \ +# RUN: | FileCheck %s +# RUN: llvm-mc --triple=loongarch64 --mattr=+f -M numeric %s \ +# RUN: | FileCheck %s +# RUN: llvm-mc --triple=loongarch64 --mattr=+f --filetype=obj %s -o %t.64 +# RUN: llvm-objdump -d -M numeric %t.64 | FileCheck %s + +addi.w $zero, $ra, 1 +addi.w $tp, $sp, 1 +addi.w $a0, $a1, 1 +addi.w $a2, $a3, 1 +addi.w $a4, $a5, 1 +addi.w $a6, $a7, 1 +addi.w $t0, $t1, 1 +addi.w $t2, $t3, 1 +addi.w $t4, $t5, 1 +addi.w $t6, $t7, 1 +addi.w $t8, $r21, 1 +addi.w $fp, $s0, 1 +addi.w $s1, $s2, 1 +addi.w $s3, $s4, 1 +addi.w $s5, $s6, 1 +addi.w $s7, $s8, 1 + +# CHECK: addi.w $r0, $r1, 1 +# CHECK-NEXT: addi.w $r2, $r3, 1 +# CHECK-NEXT: addi.w $r4, $r5, 1 +# CHECK-NEXT: addi.w $r6, $r7, 1 +# CHECK-NEXT: addi.w $r8, $r9, 1 +# CHECK-NEXT: addi.w $r10, $r11, 1 +# CHECK-NEXT: addi.w $r12, $r13, 1 +# CHECK-NEXT: addi.w $r14, $r15, 1 +# CHECK-NEXT: addi.w $r16, $r17, 1 +# CHECK-NEXT: addi.w $r18, $r19, 1 +# CHECK-NEXT: addi.w $r20, $r21, 1 +# CHECK-NEXT: addi.w $r22, $r23, 1 +# CHECK-NEXT: addi.w $r24, $r25, 1 +# CHECK-NEXT: addi.w $r26, $r27, 1 +# CHECK-NEXT: addi.w $r28, $r29, 1 +# CHECK-NEXT: addi.w $r30, $r31, 1 + +fmadd.s $fa0, $fa1, $fa2, $fa3 +fmadd.s $fa4, $fa5, $fa6, $fa7 +fmadd.s $ft0, $ft1, $ft2, $ft3 +fmadd.s $ft4, $ft5, $ft6, $ft7 +fmadd.s $ft8, $ft9, $ft10, $ft11 +fmadd.s $ft12, $ft13, $ft14, $ft15 +fmadd.s $fs0, $fs1, $fs2, $fs3 +fmadd.s $fs4, $fs5, $fs6, $fs7 + +# CHECK: fmadd.s $f0, $f1, $f2, $f3 +# CHECK-NEXT: fmadd.s $f4, $f5, $f6, $f7 +# CHECK-NEXT: fmadd.s $f8, $f9, $f10, $f11 +# CHECK-NEXT: fmadd.s $f12, $f13, $f14, $f15 +# CHECK-NEXT: fmadd.s $f16, $f17, $f18, $f19 +# CHECK-NEXT: fmadd.s $f20, $f21, $f22, $f23 +# CHECK-NEXT: fmadd.s $f24, $f25, $f26, $f27 +# CHECK-NEXT: fmadd.s $f28, $f29, $f30, $f31 diff --git a/llvm/test/MC/LoongArch/Misc/tls-symbols.s b/llvm/test/MC/LoongArch/Misc/tls-symbols.s new file mode 100644 index 0000000000000000000000000000000000000000..2f91cbe004d27d318e350208dd61b43130f316c5 --- /dev/null +++ b/llvm/test/MC/LoongArch/Misc/tls-symbols.s @@ -0,0 +1,79 @@ +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s -o %t +# RUN: llvm-readobj -s %t | FileCheck %s + +lu12i.w $a1, %gd_hi20(gd_abs) +# CHECK: Symbol { +# CHECK: Name: gd_abs +# CHECK-NEXT: Value: 0x0 +# CHECK-NEXT: Size: 0 +# CHECK-NEXT: Binding: Global +# CHECK-NEXT: Type: TLS +# CHECK-NEXT: Other: 0 +# CHECK-NEXT: Section: Undefined +# CHECK-NEXT: } + +pcalau12i $a1, %gd_pc_hi20(gd_pcrel) +# CHECK-NEXT: Symbol { +# CHECK-NEXT: Name: gd_pcrel +# CHECK-NEXT: Value: 0x0 +# CHECK-NEXT: Size: 0 +# CHECK-NEXT: Binding: Global +# CHECK-NEXT: Type: TLS +# CHECK-NEXT: Other: 0 +# CHECK-NEXT: Section: Undefined +# CHECK-NEXT: } + +lu12i.w $a1, %ld_hi20(ld_abs) +# CHECK-NEXT: Symbol { +# CHECK-NEXT: Name: ld_abs +# CHECK-NEXT: Value: 0x0 +# CHECK-NEXT: Size: 0 +# CHECK-NEXT: Binding: Global +# CHECK-NEXT: Type: TLS +# CHECK-NEXT: Other: 0 +# CHECK-NEXT: Section: Undefined +# CHECK-NEXT: } + +pcalau12i $a1, %ld_pc_hi20(ld_pcrel) +# CHECK-NEXT: Symbol { +# CHECK-NEXT: Name: ld_pcrel +# CHECK-NEXT: Value: 0x0 +# CHECK-NEXT: Size: 0 +# CHECK-NEXT: Binding: Global +# CHECK-NEXT: Type: TLS +# CHECK-NEXT: Other: 0 +# CHECK-NEXT: Section: Undefined +# CHECK-NEXT: } + +lu12i.w $a1, %ie_hi20(ie_abs) +# CHECK-NEXT: Symbol { +# CHECK-NEXT: Name: ie_abs +# CHECK-NEXT: Value: 0x0 +# CHECK-NEXT: Size: 0 +# CHECK-NEXT: Binding: Global +# CHECK-NEXT: Type: TLS +# CHECK-NEXT: Other: 0 +# CHECK-NEXT: Section: Undefined +# CHECK-NEXT: } + +pcalau12i $a1, %ie_pc_hi20(ie_pcrel) +# CHECK-NEXT: Symbol { +# CHECK-NEXT: Name: ie_pcrel +# CHECK-NEXT: Value: 0x0 +# CHECK-NEXT: Size: 0 +# CHECK-NEXT: Binding: Global +# CHECK-NEXT: Type: TLS +# CHECK-NEXT: Other: 0 +# CHECK-NEXT: Section: Undefined +# CHECK-NEXT: } + +lu12i.w $a1, %le_hi20(le) +# CHECK-NEXT: Symbol { +# CHECK-NEXT: Name: le +# CHECK-NEXT: Value: 0x0 +# CHECK-NEXT: Size: 0 +# CHECK-NEXT: Binding: Global +# CHECK-NEXT: Type: TLS +# CHECK-NEXT: Other: 0 +# CHECK-NEXT: Section: Undefined +# CHECK-NEXT: } diff --git a/llvm/test/MC/LoongArch/Misc/unaligned-nops.s b/llvm/test/MC/LoongArch/Misc/unaligned-nops.s index 5952540b46d09ee5af7593677353718626f56bbf..5a515dc9e9ce82cfaa701c5086e413c9dba80f2b 100644 --- a/llvm/test/MC/LoongArch/Misc/unaligned-nops.s +++ b/llvm/test/MC/LoongArch/Misc/unaligned-nops.s @@ -1,5 +1,8 @@ -# RUN: not --crash llvm-mc --filetype=obj --triple=loongarch64 %s -o %t +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s -o %t +# RUN: llvm-objdump -d %t | FileCheck %s + +# CHECK: 01 00 00 00 +# CHECK-NEXT: 00 00 40 03 nop .byte 1 -# CHECK: LLVM ERROR: unable to write nop sequence of 3 bytes -.p2align 2 +.p2align 3 foo: diff --git a/llvm/test/MC/LoongArch/Relocations/fde-reloc.s b/llvm/test/MC/LoongArch/Relocations/fde-reloc.s new file mode 100644 index 0000000000000000000000000000000000000000..990e07c7f00bd07e47ef6befb520ec249482ce29 --- /dev/null +++ b/llvm/test/MC/LoongArch/Relocations/fde-reloc.s @@ -0,0 +1,14 @@ +# RUN: llvm-mc --filetype=obj --triple=loongarch64 < %s \ +# RUN: | llvm-readobj -r - | FileCheck %s + +## Ensure that the eh_frame records the symbolic difference with +## the R_LARCH_32_PCREL relocation. + +func: + .cfi_startproc + ret + .cfi_endproc + +# CHECK: Section (4) .rela.eh_frame { +# CHECK-NEXT: 0x1C R_LARCH_32_PCREL .text 0x0 +# CHECK-NEXT: } diff --git a/llvm/test/MC/LoongArch/Relocations/fixups-diagnostics.s b/llvm/test/MC/LoongArch/Relocations/fixups-diagnostics.s new file mode 100644 index 0000000000000000000000000000000000000000..c72eef7cd991681c3580965c9c4fc5220569dc38 --- /dev/null +++ b/llvm/test/MC/LoongArch/Relocations/fixups-diagnostics.s @@ -0,0 +1,20 @@ +# RUN: not llvm-mc --triple=loongarch64 --filetype=obj %s -o /dev/null 2>&1 | FileCheck %s + + beq $a0, $a1, unaligned # CHECK: :[[#@LINE]]:3: error: fixup value must be 4-byte aligned + beqz $a0, unaligned # CHECK: :[[#@LINE]]:3: error: fixup value must be 4-byte aligned + b unaligned # CHECK: :[[#@LINE]]:3: error: fixup value must be 4-byte aligned + .byte 0 +unaligned: + .byte 0 + .byte 0 + .byte 0 + + beq $a0, $a1, out_of_range_b18 # CHECK: :[[#@LINE]]:3: error: fixup value out of range [-131072, 131071] + .space 1<<18 +out_of_range_b18: + beqz $a0, out_of_range_b23 # CHECK: :[[#@LINE]]:3: error: fixup value out of range [-4194304, 4194303] + .space 1<<23 +out_of_range_b23: + b out_of_range_b28 # CHECK: :[[#@LINE]]:3: error: fixup value out of range [-134217728, 134217727] + .space 1<<28 +out_of_range_b28: diff --git a/llvm/test/MC/LoongArch/Relocations/fixups-invalid.s b/llvm/test/MC/LoongArch/Relocations/fixups-invalid.s new file mode 100644 index 0000000000000000000000000000000000000000..8251a71d74d71c4837fb9ef89e5c63393c9c452b --- /dev/null +++ b/llvm/test/MC/LoongArch/Relocations/fixups-invalid.s @@ -0,0 +1,7 @@ +# RUN: not llvm-mc --filetype=obj %s --triple=loongarch32 -o /dev/null 2>&1 \ +# RUN: | FileCheck %s +# RUN: not llvm-mc --filetype=obj %s --triple=loongarch64 -o /dev/null 2>&1 \ +# RUN: | FileCheck %s + +.byte foo # CHECK: [[#@LINE]]:7: error: 1-byte data relocations not supported +.2byte foo # CHECK: [[#@LINE]]:8: error: 2-byte data relocations not supported diff --git a/llvm/test/MC/LoongArch/Relocations/fixups.s b/llvm/test/MC/LoongArch/Relocations/fixups.s new file mode 100644 index 0000000000000000000000000000000000000000..cdb38c09d4c0c4ef0566a7d021d7c1188e9894f6 --- /dev/null +++ b/llvm/test/MC/LoongArch/Relocations/fixups.s @@ -0,0 +1,67 @@ +# RUN: llvm-mc --triple=loongarch64 %s --show-encoding \ +# RUN: | FileCheck --check-prefix=CHECK-FIXUP %s +# RUN: llvm-mc --filetype=obj --triple=loongarch64 %s \ +# RUN: | llvm-objdump -d - | FileCheck --check-prefix=CHECK-INSTR %s +# RUN: llvm-mc --filetype=obj --triple=loongarch64 %s \ +# RUN: | llvm-readobj -r - | FileCheck --check-prefix=CHECK-REL %s + +## Checks that fixups that can be resolved within the same object file are +## applied correctly. + +.LBB0: +lu12i.w $t1, %abs_hi20(val) +# CHECK-FIXUP: fixup A - offset: 0, value: %abs_hi20(val), kind: fixup_loongarch_abs_hi20 +# CHECK-INSTR: lu12i.w $t1, 74565 + +ori $t1, $t1, %abs_lo12(val) +# CHECK-FIXUP: fixup A - offset: 0, value: %abs_lo12(val), kind: fixup_loongarch_abs_lo12 +# CHECK-INSTR: ori $t1, $t1, 1656 + +b .LBB0 +# CHECK-FIXUP: fixup A - offset: 0, value: .LBB0, kind: fixup_loongarch_b26 +# CHECK-INSTR: b -8 +b .LBB2 +# CHECK-FIXUP: fixup A - offset: 0, value: .LBB2, kind: fixup_loongarch_b26 +# CHECK-INSTR: b 331004 +beq $a0, $a1, .LBB0 +# CHECK-FIXUP: fixup A - offset: 0, value: .LBB0, kind: fixup_loongarch_b16 +# CHECK-INSTR: beq $a0, $a1, -16 +blt $a0, $a1, .LBB1 +# CHECK-FIXUP: fixup A - offset: 0, value: .LBB1, kind: fixup_loongarch_b16 +# CHECK-INSTR: blt $a0, $a1, 1116 +beqz $a0, .LBB0 +# CHECK-FIXUP: fixup A - offset: 0, value: .LBB0, kind: fixup_loongarch_b21 +# CHECK-INSTR: beqz $a0, -24 +bnez $a0, .LBB1 +# CHECK-FIXUP: fixup A - offset: 0, value: .LBB1, kind: fixup_loongarch_b21 +# CHECK-INSTR: bnez $a0, 1108 + +.fill 1104 + +.LBB1: + +.fill 329876 +nop +.LBB2: + +.set val, 0x12345678 + +# CHECK-REL-NOT: R_LARCH + +## Testing the function call offset could resolved by assembler +## when the function and the callsite within the same compile unit. +func: +.fill 100 +bl func +# CHECK-FIXUP: fixup A - offset: 0, value: func, kind: fixup_loongarch_b26 +# CHECK-INSTR: bl -100 + +.fill 10000 +bl func +# CHECK-FIXUP: fixup A - offset: 0, value: func, kind: fixup_loongarch_b26 +# CHECK-INSTR: bl -10104 + +.fill 20888 +bl func +# CHECK-FIXUP: fixup A - offset: 0, value: func, kind: fixup_loongarch_b26 +# CHECK-INSTR: bl -30996 diff --git a/llvm/test/MC/LoongArch/Relocations/reloc-directive-err.s b/llvm/test/MC/LoongArch/Relocations/reloc-directive-err.s new file mode 100644 index 0000000000000000000000000000000000000000..60fd145564ae56df85ea19961ea74e7b096cee93 --- /dev/null +++ b/llvm/test/MC/LoongArch/Relocations/reloc-directive-err.s @@ -0,0 +1,7 @@ +# RUN: llvm-mc --triple=loongarch64 %s | FileCheck --check-prefix=PRINT %s +# RUN: not llvm-mc --filetype=obj --triple=loongarch64 %s -o /dev/null 2>&1 \ +# RUN: | FileCheck %s + +# PRINT: .reloc 0, R_INVALID, 0 +# CHECK: {{.*}}.s:[[# @LINE+1]]:11: error: unknown relocation name +.reloc 0, R_INVALID, 0 diff --git a/llvm/test/MC/LoongArch/Relocations/reloc-directive.s b/llvm/test/MC/LoongArch/Relocations/reloc-directive.s new file mode 100644 index 0000000000000000000000000000000000000000..f900f17c06c3919d6a90d2b7035a6f3125bda689 --- /dev/null +++ b/llvm/test/MC/LoongArch/Relocations/reloc-directive.s @@ -0,0 +1,46 @@ +# RUN: llvm-mc --triple=loongarch64 %s | FileCheck --check-prefix=PRINT %s +# RUN: llvm-mc --filetype=obj --triple=loongarch64 %s \ +# RUN: | llvm-readobj -r - | FileCheck %s + +# PRINT: .reloc 8, R_LARCH_NONE, .data +# PRINT: .reloc 4, R_LARCH_NONE, foo+4 +# PRINT: .reloc 0, R_LARCH_NONE, 8 +# PRINT: .reloc 0, R_LARCH_32, .data+2 +# PRINT: .reloc 0, R_LARCH_TLS_DTPMOD32, foo+3 +# PRINT: .reloc 0, R_LARCH_IRELATIVE, 5 +# PRINT: .reloc 0, BFD_RELOC_NONE, 9 +# PRINT-NEXT: .reloc 0, BFD_RELOC_32, 9 +# PRINT-NEXT: .reloc 0, BFD_RELOC_64, 9 + +.text + ret + nop + nop + .reloc 8, R_LARCH_NONE, .data + .reloc 4, R_LARCH_NONE, foo+4 + .reloc 0, R_LARCH_NONE, 8 + + .reloc 0, R_LARCH_32, .data+2 + .reloc 0, R_LARCH_TLS_DTPMOD32, foo+3 + .reloc 0, R_LARCH_IRELATIVE, 5 + + .reloc 0, BFD_RELOC_NONE, 9 + .reloc 0, BFD_RELOC_32, 9 + .reloc 0, BFD_RELOC_64, 9 + +.data +.globl foo +foo: + .word 0 + .word 0 + .word 0 + +# CHECK: 0x8 R_LARCH_NONE .data 0x0 +# CHECK-NEXT: 0x4 R_LARCH_NONE foo 0x4 +# CHECK-NEXT: 0x0 R_LARCH_NONE - 0x8 +# CHECK-NEXT: 0x0 R_LARCH_32 .data 0x2 +# CHECK-NEXT: 0x0 R_LARCH_TLS_DTPMOD32 foo 0x3 +# CHECK-NEXT: 0x0 R_LARCH_IRELATIVE - 0x5 +# CHECK-NEXT: 0x0 R_LARCH_NONE - 0x9 +# CHECK-NEXT: 0x0 R_LARCH_32 - 0x9 +# CHECK-NEXT: 0x0 R_LARCH_64 - 0x9 diff --git a/llvm/test/MC/LoongArch/Relocations/relocations.s b/llvm/test/MC/LoongArch/Relocations/relocations.s new file mode 100644 index 0000000000000000000000000000000000000000..042cc93470a1e5b1b72b55814e0d78529bc0bbe1 --- /dev/null +++ b/llvm/test/MC/LoongArch/Relocations/relocations.s @@ -0,0 +1,220 @@ +# RUN: llvm-mc --triple=loongarch64 < %s --show-encoding \ +# RUN: | FileCheck --check-prefixes=INSTR,FIXUP %s +# RUN: llvm-mc --filetype=obj --triple=loongarch64 < %s \ +# RUN: | llvm-readobj -r - | FileCheck --check-prefix=RELOC %s + +## Check prefixes: +## RELOC - Check the relocation in the object. +## FIXUP - Check the fixup on the instruction. +## INSTR - Check the instruction is handled properly by the ASMPrinter. + +.long foo +# RELOC: R_LARCH_32 foo + +.quad foo +# RELOC: R_LARCH_64 foo + +bne $t1, $t2, %b16(foo) +# RELOC: R_LARCH_B16 +# INSTR: bne $t1, $t2, %b16(foo) +# FIXUP: fixup A - offset: 0, value: %b16(foo), kind: fixup_loongarch_b16 + +bnez $t1, %b21(foo) +# RELOC: R_LARCH_B21 +# INSTR: bnez $t1, %b21(foo) +# FIXUP: fixup A - offset: 0, value: %b21(foo), kind: fixup_loongarch_b21 + +bl %plt(foo) +# RELOC: R_LARCH_B26 +# INSTR: bl %plt(foo) +# FIXUP: fixup A - offset: 0, value: %plt(foo), kind: fixup_loongarch_b26 + +bl foo +# RELOC: R_LARCH_B26 +# INSTR: bl foo +# FIXUP: fixup A - offset: 0, value: foo, kind: fixup_loongarch_b26 + +lu12i.w $t1, %abs_hi20(foo) +# RELOC: R_LARCH_ABS_HI20 foo 0x0 +# INSTR: lu12i.w $t1, %abs_hi20(foo) +# FIXUP: fixup A - offset: 0, value: %abs_hi20(foo), kind: fixup_loongarch_abs_hi20 + +ori $t1, $t1, %abs_lo12(foo) +# RELOC: R_LARCH_ABS_LO12 foo 0x0 +# INSTR: ori $t1, $t1, %abs_lo12(foo) +# FIXUP: fixup A - offset: 0, value: %abs_lo12(foo), kind: fixup_loongarch_abs_lo12 + +lu32i.d $t1, %abs64_lo20(foo) +# RELOC: R_LARCH_ABS64_LO20 foo 0x0 +# INSTR: lu32i.d $t1, %abs64_lo20(foo) +# FIXUP: fixup A - offset: 0, value: %abs64_lo20(foo), kind: fixup_loongarch_abs64_lo20 + +lu52i.d $t1, $t1, %abs64_hi12(foo) +# RELOC: R_LARCH_ABS64_HI12 foo 0x0 +# INSTR: lu52i.d $t1, $t1, %abs64_hi12(foo) +# FIXUP: fixup A - offset: 0, value: %abs64_hi12(foo), kind: fixup_loongarch_abs64_hi12 + +pcalau12i $t1, %pc_hi20(foo) +# RELOC: R_LARCH_PCALA_HI20 foo 0x0 +# INSTR: pcalau12i $t1, %pc_hi20(foo) +# FIXUP: fixup A - offset: 0, value: %pc_hi20(foo), kind: FK_NONE + +pcalau12i $t1, %pc_hi20(foo+4) +# RELOC: R_LARCH_PCALA_HI20 foo 0x4 +# INSTR: pcalau12i $t1, %pc_hi20(foo+4) +# FIXUP: fixup A - offset: 0, value: %pc_hi20(foo+4), kind: FK_NONE + +addi.d $t1, $t1, %pc_lo12(foo) +# RELOC: R_LARCH_PCALA_LO12 foo 0x0 +# INSTR: addi.d $t1, $t1, %pc_lo12(foo) +# FIXUP: fixup A - offset: 0, value: %pc_lo12(foo), kind: FK_NONE + +addi.d $t1, $t1, %pc_lo12(foo+4) +# RELOC: R_LARCH_PCALA_LO12 foo 0x4 +# INSTR: addi.d $t1, $t1, %pc_lo12(foo+4) +# FIXUP: fixup A - offset: 0, value: %pc_lo12(foo+4), kind: FK_NONE + +jirl $zero, $t1, %pc_lo12(foo) +# RELOC: R_LARCH_PCALA_LO12 foo 0x0 +# INSTR: jirl $zero, $t1, %pc_lo12(foo) +# FIXUP: fixup A - offset: 0, value: %pc_lo12(foo), kind: FK_NONE + +st.b $t1, $a2, %pc_lo12(foo) +# RELOC: R_LARCH_PCALA_LO12 foo 0x0 +# INSTR: st.b $t1, $a2, %pc_lo12(foo) +# FIXUP: fixup A - offset: 0, value: %pc_lo12(foo), kind: FK_NONE + +st.b $t1, $a2, %pc_lo12(foo+4) +# RELOC: R_LARCH_PCALA_LO12 foo 0x4 +# INSTR: st.b $t1, $a2, %pc_lo12(foo+4) +# FIXUP: fixup A - offset: 0, value: %pc_lo12(foo+4), kind: FK_NONE + +lu32i.d $t1, %pc64_lo20(foo) +# RELOC: R_LARCH_PCALA64_LO20 foo 0x0 +# INSTR: lu32i.d $t1, %pc64_lo20(foo) +# FIXUP: fixup A - offset: 0, value: %pc64_lo20(foo), kind: FK_NONE + +lu52i.d $t1, $t1, %pc64_hi12(foo) +# RELOC: R_LARCH_PCALA64_HI12 foo 0x0 +# INSTR: lu52i.d $t1, $t1, %pc64_hi12(foo) +# FIXUP: fixup A - offset: 0, value: %pc64_hi12(foo), kind: FK_NONE + +pcalau12i $t1, %got_pc_hi20(foo) +# RELOC: R_LARCH_GOT_PC_HI20 foo 0x0 +# INSTR: pcalau12i $t1, %got_pc_hi20(foo) +# FIXUP: fixup A - offset: 0, value: %got_pc_hi20(foo), kind: FK_NONE + +ld.d $t1, $a2, %got_pc_lo12(foo) +# RELOC: R_LARCH_GOT_PC_LO12 foo 0x0 +# INSTR: ld.d $t1, $a2, %got_pc_lo12(foo) +# FIXUP: fixup A - offset: 0, value: %got_pc_lo12(foo), kind: FK_NONE + +lu32i.d $t1, %got64_pc_lo20(foo) +# RELOC: R_LARCH_GOT64_PC_LO20 foo 0x0 +# INSTR: lu32i.d $t1, %got64_pc_lo20(foo) +# FIXUP: fixup A - offset: 0, value: %got64_pc_lo20(foo), kind: FK_NONE + +lu52i.d $t1, $t1, %got64_pc_hi12(foo) +# RELOC: R_LARCH_GOT64_PC_HI12 foo 0x0 +# INSTR: lu52i.d $t1, $t1, %got64_pc_hi12(foo) +# FIXUP: fixup A - offset: 0, value: %got64_pc_hi12(foo), kind: FK_NONE + +lu12i.w $t1, %got_hi20(foo) +# RELOC: R_LARCH_GOT_HI20 foo 0x0 +# INSTR: lu12i.w $t1, %got_hi20(foo) +# FIXUP: fixup A - offset: 0, value: %got_hi20(foo), kind: FK_NONE + +ori $t1, $a2, %got_lo12(foo) +# RELOC: R_LARCH_GOT_LO12 foo 0x0 +# INSTR: ori $t1, $a2, %got_lo12(foo) +# FIXUP: fixup A - offset: 0, value: %got_lo12(foo), kind: FK_NONE + +lu32i.d $t1, %got64_lo20(foo) +# RELOC: R_LARCH_GOT64_LO20 foo 0x0 +# INSTR: lu32i.d $t1, %got64_lo20(foo) +# FIXUP: fixup A - offset: 0, value: %got64_lo20(foo), kind: FK_NONE + +lu52i.d $t1, $t1, %got64_hi12(foo) +# RELOC: R_LARCH_GOT64_HI12 foo 0x0 +# INSTR: lu52i.d $t1, $t1, %got64_hi12(foo) +# FIXUP: fixup A - offset: 0, value: %got64_hi12(foo), kind: FK_NONE + +lu12i.w $t1, %le_hi20(foo) +# RELOC: R_LARCH_TLS_LE_HI20 foo 0x0 +# INSTR: lu12i.w $t1, %le_hi20(foo) +# FIXUP: fixup A - offset: 0, value: %le_hi20(foo), kind: fixup_loongarch_tls_le_hi20 + +ori $t1, $a2, %le_lo12(foo) +# RELOC: R_LARCH_TLS_LE_LO12 foo 0x0 +# INSTR: ori $t1, $a2, %le_lo12(foo) +# FIXUP: fixup A - offset: 0, value: %le_lo12(foo), kind: fixup_loongarch_tls_le_lo12 + +lu32i.d $t1, %le64_lo20(foo) +# RELOC: R_LARCH_TLS_LE64_LO20 foo 0x0 +# INSTR: lu32i.d $t1, %le64_lo20(foo) +# FIXUP: fixup A - offset: 0, value: %le64_lo20(foo), kind: fixup_loongarch_tls_le64_lo20 + +lu52i.d $t1, $t1, %le64_hi12(foo) +# RELOC: R_LARCH_TLS_LE64_HI12 foo 0x0 +# INSTR: lu52i.d $t1, $t1, %le64_hi12(foo) +# FIXUP: fixup A - offset: 0, value: %le64_hi12(foo), kind: fixup_loongarch_tls_le64_hi12 + +pcalau12i $t1, %ie_pc_hi20(foo) +# RELOC: R_LARCH_TLS_IE_PC_HI20 foo 0x0 +# INSTR: pcalau12i $t1, %ie_pc_hi20(foo) +# FIXUP: fixup A - offset: 0, value: %ie_pc_hi20(foo), kind: FK_NONE + +ld.d $t1, $a2, %ie_pc_lo12(foo) +# RELOC: R_LARCH_TLS_IE_PC_LO12 foo 0x0 +# INSTR: ld.d $t1, $a2, %ie_pc_lo12(foo) +# FIXUP: fixup A - offset: 0, value: %ie_pc_lo12(foo), kind: FK_NONE + +lu32i.d $t1, %ie64_pc_lo20(foo) +# RELOC: R_LARCH_TLS_IE64_PC_LO20 foo 0x0 +# INSTR: lu32i.d $t1, %ie64_pc_lo20(foo) +# FIXUP: fixup A - offset: 0, value: %ie64_pc_lo20(foo), kind: FK_NONE + +lu52i.d $t1, $t1, %ie64_pc_hi12(foo) +# RELOC: R_LARCH_TLS_IE64_PC_HI12 foo 0x0 +# INSTR: lu52i.d $t1, $t1, %ie64_pc_hi12(foo) +# FIXUP: fixup A - offset: 0, value: %ie64_pc_hi12(foo), kind: FK_NONE + +lu12i.w $t1, %ie_hi20(foo) +# RELOC: R_LARCH_TLS_IE_HI20 foo 0x0 +# INSTR: lu12i.w $t1, %ie_hi20(foo) +# FIXUP: fixup A - offset: 0, value: %ie_hi20(foo), kind: FK_NONE + +ori $t1, $a2, %ie_lo12(foo) +# RELOC: R_LARCH_TLS_IE_LO12 foo 0x0 +# INSTR: ori $t1, $a2, %ie_lo12(foo) +# FIXUP: fixup A - offset: 0, value: %ie_lo12(foo), kind: FK_NONE + +lu32i.d $t1, %ie64_lo20(foo) +# RELOC: R_LARCH_TLS_IE64_LO20 foo 0x0 +# INSTR: lu32i.d $t1, %ie64_lo20(foo) +# FIXUP: fixup A - offset: 0, value: %ie64_lo20(foo), kind: FK_NONE + +lu52i.d $t1, $t1, %ie64_hi12(foo) +# RELOC: R_LARCH_TLS_IE64_HI12 foo 0x0 +# INSTR: lu52i.d $t1, $t1, %ie64_hi12(foo) +# FIXUP: fixup A - offset: 0, value: %ie64_hi12(foo), kind: FK_NONE + +pcalau12i $t1, %ld_pc_hi20(foo) +# RELOC: R_LARCH_TLS_LD_PC_HI20 foo 0x0 +# INSTR: pcalau12i $t1, %ld_pc_hi20(foo) +# FIXUP: fixup A - offset: 0, value: %ld_pc_hi20(foo), kind: FK_NONE + +lu12i.w $t1, %ld_hi20(foo) +# RELOC: R_LARCH_TLS_LD_HI20 foo 0x0 +# INSTR: lu12i.w $t1, %ld_hi20(foo) +# FIXUP: fixup A - offset: 0, value: %ld_hi20(foo), kind: FK_NONE + +pcalau12i $t1, %gd_pc_hi20(foo) +# RELOC: R_LARCH_TLS_GD_PC_HI20 foo 0x0 +# INSTR: pcalau12i $t1, %gd_pc_hi20(foo) +# FIXUP: fixup A - offset: 0, value: %gd_pc_hi20(foo), kind: FK_NONE + +lu12i.w $t1, %gd_hi20(foo) +# RELOC: R_LARCH_TLS_GD_HI20 foo 0x0 +# INSTR: lu12i.w $t1, %gd_hi20(foo) +# FIXUP: fixup A - offset: 0, value: %gd_hi20(foo), kind: FK_NONE diff --git a/llvm/test/MC/LoongArch/Relocations/sub-expr.s b/llvm/test/MC/LoongArch/Relocations/sub-expr.s new file mode 100644 index 0000000000000000000000000000000000000000..0179e1027af8f2dc9dbbe66172976293c9200e7f --- /dev/null +++ b/llvm/test/MC/LoongArch/Relocations/sub-expr.s @@ -0,0 +1,28 @@ +# RUN: llvm-mc --filetype=obj --triple=loongarch64 %s -o %t +# RUN: llvm-readobj -r %t | FileCheck %s + +## Check that subtraction expressions emit R_LARCH_32_PCREL and R_LARCH_64_PCREL relocations. + +## TODO: 1- or 2-byte data relocations are not supported for now. + +# CHECK: Relocations [ +# CHECK-NEXT: Section ({{.*}}) .rela.data { +# CHECK-NEXT: 0x0 R_LARCH_64_PCREL sx 0x0 +# CHECK-NEXT: 0x8 R_LARCH_64_PCREL sy 0x0 +# CHECK-NEXT: 0x10 R_LARCH_32_PCREL sx 0x0 +# CHECK-NEXT: 0x14 R_LARCH_32_PCREL sy 0x0 +# CHECK-NEXT: } + +.section sx,"a" +x: +nop + +.data +.8byte x-. +.8byte y-. +.4byte x-. +.4byte y-. + +.section sy,"a" +y: +nop diff --git a/llvm/test/Transforms/AtomicExpand/LoongArch/atomicrmw-fp.ll b/llvm/test/Transforms/AtomicExpand/LoongArch/atomicrmw-fp.ll new file mode 100644 index 0000000000000000000000000000000000000000..43fdd25e257b82fde4006557890eee664511a1ce --- /dev/null +++ b/llvm/test/Transforms/AtomicExpand/LoongArch/atomicrmw-fp.ll @@ -0,0 +1,170 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S --mtriple=loongarch64 --atomic-expand --mattr=+d %s | FileCheck %s + +define float @atomicrmw_fadd_float(ptr %ptr, float %value) { +; CHECK-LABEL: @atomicrmw_fadd_float( +; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[PTR:%.*]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = cmpxchg ptr [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 +; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 +; CHECK-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: ret float [[TMP5]] +; + %res = atomicrmw fadd ptr %ptr, float %value seq_cst + ret float %res +} + +define float @atomicrmw_fsub_float(ptr %ptr, float %value) { +; CHECK-LABEL: @atomicrmw_fsub_float( +; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[PTR:%.*]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[NEW:%.*]] = fsub float [[LOADED]], [[VALUE:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = cmpxchg ptr [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 +; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 +; CHECK-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: ret float [[TMP5]] +; + %res = atomicrmw fsub ptr %ptr, float %value seq_cst + ret float %res +} + +define float @atomicrmw_fmin_float(ptr %ptr, float %value) { +; CHECK-LABEL: @atomicrmw_fmin_float( +; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[PTR:%.*]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE:%.*]]) +; CHECK-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 +; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg ptr [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 +; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 +; CHECK-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: ret float [[TMP6]] +; + %res = atomicrmw fmin ptr %ptr, float %value seq_cst + ret float %res +} + +define float @atomicrmw_fmax_float(ptr %ptr, float %value) { +; CHECK-LABEL: @atomicrmw_fmax_float( +; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[PTR:%.*]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE:%.*]]) +; CHECK-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 +; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg ptr [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 +; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 +; CHECK-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: ret float [[TMP6]] +; + %res = atomicrmw fmax ptr %ptr, float %value seq_cst + ret float %res +} + +define double @atomicrmw_fadd_double(ptr %ptr, double %value) { +; CHECK-LABEL: @atomicrmw_fadd_double( +; CHECK-NEXT: [[TMP1:%.*]] = load double, ptr [[PTR:%.*]], align 8 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = cmpxchg ptr [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1 +; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0 +; CHECK-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: ret double [[TMP5]] +; + %res = atomicrmw fadd ptr %ptr, double %value seq_cst + ret double %res +} + +define double @atomicrmw_fsub_double(ptr %ptr, double %value) { +; CHECK-LABEL: @atomicrmw_fsub_double( +; CHECK-NEXT: [[TMP1:%.*]] = load double, ptr [[PTR:%.*]], align 8 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[NEW:%.*]] = fsub double [[LOADED]], [[VALUE:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = cmpxchg ptr [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1 +; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0 +; CHECK-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: ret double [[TMP5]] +; + %res = atomicrmw fsub ptr %ptr, double %value seq_cst + ret double %res +} + +define double @atomicrmw_fmin_double(ptr %ptr, double %value) { +; CHECK-LABEL: @atomicrmw_fmin_double( +; CHECK-NEXT: [[TMP1:%.*]] = load double, ptr [[PTR:%.*]], align 8 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE:%.*]]) +; CHECK-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg ptr [[PTR]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 +; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 +; CHECK-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: ret double [[TMP6]] +; + %res = atomicrmw fmin ptr %ptr, double %value seq_cst + ret double %res +} + +define double @atomicrmw_fmax_double(ptr %ptr, double %value) { +; CHECK-LABEL: @atomicrmw_fmax_double( +; CHECK-NEXT: [[TMP1:%.*]] = load double, ptr [[PTR:%.*]], align 8 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE:%.*]]) +; CHECK-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg ptr [[PTR]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 +; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 +; CHECK-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: ret double [[TMP6]] +; + %res = atomicrmw fmax ptr %ptr, double %value seq_cst + ret double %res +} diff --git a/llvm/test/Transforms/AtomicExpand/LoongArch/load-store-atomic.ll b/llvm/test/Transforms/AtomicExpand/LoongArch/load-store-atomic.ll index 4acf9761421ae702996851b70b99e82f0565b5cd..b0875669bc3a2127dfe3b0d9001b0d3cfa9d9b77 100644 --- a/llvm/test/Transforms/AtomicExpand/LoongArch/load-store-atomic.ll +++ b/llvm/test/Transforms/AtomicExpand/LoongArch/load-store-atomic.ll @@ -98,8 +98,7 @@ define void @store_release_i32(ptr %ptr, i32 signext %v) { ; LA32-NEXT: ret void ; ; LA64-LABEL: @store_release_i32( -; LA64-NEXT: fence release -; LA64-NEXT: store atomic i32 [[V:%.*]], ptr [[PTR:%.*]] monotonic, align 4 +; LA64-NEXT: store atomic i32 [[V:%.*]], ptr [[PTR:%.*]] release, align 4 ; LA64-NEXT: ret void ; store atomic i32 %v, ptr %ptr release, align 4 @@ -112,8 +111,7 @@ define void @store_release_i64(ptr %ptr, i64 %v) { ; LA32-NEXT: ret void ; ; LA64-LABEL: @store_release_i64( -; LA64-NEXT: fence release -; LA64-NEXT: store atomic i64 [[V:%.*]], ptr [[PTR:%.*]] monotonic, align 8 +; LA64-NEXT: store atomic i64 [[V:%.*]], ptr [[PTR:%.*]] release, align 8 ; LA64-NEXT: ret void ; store atomic i64 %v, ptr %ptr release, align 8 diff --git a/llvm/test/Verifier/LoongArch/intrinsic-immarg.ll b/llvm/test/Verifier/LoongArch/intrinsic-immarg.ll new file mode 100644 index 0000000000000000000000000000000000000000..488f77ff55ed4d100488096e572bfdc86e966986 --- /dev/null +++ b/llvm/test/Verifier/LoongArch/intrinsic-immarg.ll @@ -0,0 +1,20 @@ +; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s + +declare ptr @llvm.frameaddress(i32 immarg) +declare ptr @llvm.returnaddress(i32 immarg) + +define ptr @non_const_depth_frameaddress(i32 %x) nounwind { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %x + ; CHECK-NEXT: %1 = call ptr @llvm.frameaddress.p0(i32 %x) + %1 = call ptr @llvm.frameaddress(i32 %x) + ret ptr %1 +} + +define ptr @non_const_depth_returnaddress(i32 %x) nounwind { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %x + ; CHECK-NEXT: %1 = call ptr @llvm.returnaddress(i32 %x) + %1 = call ptr @llvm.returnaddress(i32 %x) + ret ptr %1 +} diff --git a/llvm/test/Verifier/LoongArch/lit.local.cfg b/llvm/test/Verifier/LoongArch/lit.local.cfg new file mode 100644 index 0000000000000000000000000000000000000000..2b5a4893e686ffc57c1e1d73ea80bc64ed1514b6 --- /dev/null +++ b/llvm/test/Verifier/LoongArch/lit.local.cfg @@ -0,0 +1,2 @@ +if not 'LoongArch' in config.root.targets: + config.unsupported = True diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/loongarch_function_name.ll.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/loongarch_function_name.ll.expected index 692941b506b8227bc92c222045814058204fe36c..060a6b397457b7b9a304ebf16483a5287cd67538 100644 --- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/loongarch_function_name.ll.expected +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/loongarch_function_name.ll.expected @@ -7,7 +7,7 @@ define hidden i32 @"_Z54bar$ompvariant$bar"() { ; CHECK-LABEL: _Z54bar$ompvariant$bar: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: ori $a0, $zero, 2 -; CHECK-NEXT: jirl $zero, $ra, 0 +; CHECK-NEXT: ret entry: ret i32 2 } diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/loongarch_generated_funcs.ll.generated.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/loongarch_generated_funcs.ll.generated.expected index 5d091d7352b7965c7345227b474a4df6e25b9e20..e5bdc8b010e4da8ffe4a185ebb603a6640a6f121 100644 --- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/loongarch_generated_funcs.ll.generated.expected +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/loongarch_generated_funcs.ll.generated.expected @@ -74,25 +74,17 @@ attributes #0 = { noredzone nounwind ssp uwtable "frame-pointer"="all" } ; CHECK-NEXT: .cfi_def_cfa 22, 0 ; CHECK-NEXT: st.w $zero, $fp, -16 ; CHECK-NEXT: st.w $zero, $fp, -12 -; CHECK-NEXT: bnez $zero, .LBB0_2 -; CHECK-NEXT: b .LBB0_1 -; CHECK-NEXT: .LBB0_2: +; CHECK-NEXT: beqz $zero, .LBB0_3 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: ori $a0, $zero, 1 ; CHECK-NEXT: st.w $a0, $fp, -24 -; CHECK-NEXT: .LBB0_3: ; CHECK-NEXT: ld.w $a0, $fp, -16 -; CHECK-NEXT: bne $a0, $zero, .LBB0_5 -; CHECK-NEXT: b .LBB0_4 -; CHECK-NEXT: .LBB0_5: +; CHECK-NEXT: beqz $a0, .LBB0_4 +; CHECK-NEXT: .LBB0_2: ; CHECK-NEXT: ori $a0, $zero, 1 ; CHECK-NEXT: st.w $a0, $fp, -24 -; CHECK-NEXT: .LBB0_6: -; CHECK-NEXT: move $a0, $zero -; CHECK-NEXT: ld.w $fp, $sp, 24 # 4-byte Folded Reload -; CHECK-NEXT: ld.w $ra, $sp, 28 # 4-byte Folded Reload -; CHECK-NEXT: addi.w $sp, $sp, 32 -; CHECK-NEXT: jirl $zero, $ra, 0 -; CHECK-NEXT: .LBB0_1: +; CHECK-NEXT: b .LBB0_5 +; CHECK-NEXT: .LBB0_3: ; CHECK-NEXT: ori $a0, $zero, 2 ; CHECK-NEXT: st.w $a0, $fp, -20 ; CHECK-NEXT: ori $a0, $zero, 1 @@ -101,7 +93,8 @@ attributes #0 = { noredzone nounwind ssp uwtable "frame-pointer"="all" } ; CHECK-NEXT: st.w $a0, $fp, -24 ; CHECK-NEXT: ori $a0, $zero, 4 ; CHECK-NEXT: st.w $a0, $fp, -28 -; CHECK-NEXT: b .LBB0_3 +; CHECK-NEXT: ld.w $a0, $fp, -16 +; CHECK-NEXT: bnez $a0, .LBB0_2 ; CHECK-NEXT: .LBB0_4: ; CHECK-NEXT: ori $a0, $zero, 2 ; CHECK-NEXT: st.w $a0, $fp, -20 @@ -111,7 +104,12 @@ attributes #0 = { noredzone nounwind ssp uwtable "frame-pointer"="all" } ; CHECK-NEXT: st.w $a0, $fp, -24 ; CHECK-NEXT: ori $a0, $zero, 4 ; CHECK-NEXT: st.w $a0, $fp, -28 -; CHECK-NEXT: b .LBB0_6 +; CHECK-NEXT: .LBB0_5: +; CHECK-NEXT: move $a0, $zero +; CHECK-NEXT: ld.w $fp, $sp, 24 # 4-byte Folded Reload +; CHECK-NEXT: ld.w $ra, $sp, 28 # 4-byte Folded Reload +; CHECK-NEXT: addi.w $sp, $sp, 32 +; CHECK-NEXT: ret ; ; CHECK-LABEL: main: ; CHECK: # %bb.0: @@ -123,8 +121,8 @@ attributes #0 = { noredzone nounwind ssp uwtable "frame-pointer"="all" } ; CHECK-NEXT: .cfi_offset 22, -8 ; CHECK-NEXT: addi.w $fp, $sp, 32 ; CHECK-NEXT: .cfi_def_cfa 22, 0 -; CHECK-NEXT: pcalau12i $a0, x -; CHECK-NEXT: addi.w $a0, $a0, x +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(x) +; CHECK-NEXT: addi.w $a0, $a0, %pc_lo12(x) ; CHECK-NEXT: ori $a1, $zero, 1 ; CHECK-NEXT: st.w $a1, $a0, 0 ; CHECK-NEXT: st.w $zero, $fp, -12 @@ -145,4 +143,4 @@ attributes #0 = { noredzone nounwind ssp uwtable "frame-pointer"="all" } ; CHECK-NEXT: ld.w $fp, $sp, 24 # 4-byte Folded Reload ; CHECK-NEXT: ld.w $ra, $sp, 28 # 4-byte Folded Reload ; CHECK-NEXT: addi.w $sp, $sp, 32 -; CHECK-NEXT: jirl $zero, $ra, 0 +; CHECK-NEXT: ret diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/loongarch_generated_funcs.ll.nogenerated.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/loongarch_generated_funcs.ll.nogenerated.expected index d4edfe5e0854246ac7986c9e3caddfbe7df2d0c3..20e34cdf3c64c51fbf2079f61d66894c16a78fe0 100644 --- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/loongarch_generated_funcs.ll.nogenerated.expected +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/loongarch_generated_funcs.ll.nogenerated.expected @@ -15,25 +15,17 @@ define dso_local i32 @check_boundaries() #0 { ; CHECK-NEXT: .cfi_def_cfa 22, 0 ; CHECK-NEXT: st.w $zero, $fp, -16 ; CHECK-NEXT: st.w $zero, $fp, -12 -; CHECK-NEXT: bnez $zero, .LBB0_2 -; CHECK-NEXT: b .LBB0_1 -; CHECK-NEXT: .LBB0_2: +; CHECK-NEXT: beqz $zero, .LBB0_3 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: ori $a0, $zero, 1 ; CHECK-NEXT: st.w $a0, $fp, -24 -; CHECK-NEXT: .LBB0_3: ; CHECK-NEXT: ld.w $a0, $fp, -16 -; CHECK-NEXT: bne $a0, $zero, .LBB0_5 -; CHECK-NEXT: b .LBB0_4 -; CHECK-NEXT: .LBB0_5: +; CHECK-NEXT: beqz $a0, .LBB0_4 +; CHECK-NEXT: .LBB0_2: ; CHECK-NEXT: ori $a0, $zero, 1 ; CHECK-NEXT: st.w $a0, $fp, -24 -; CHECK-NEXT: .LBB0_6: -; CHECK-NEXT: move $a0, $zero -; CHECK-NEXT: ld.w $fp, $sp, 24 # 4-byte Folded Reload -; CHECK-NEXT: ld.w $ra, $sp, 28 # 4-byte Folded Reload -; CHECK-NEXT: addi.w $sp, $sp, 32 -; CHECK-NEXT: jirl $zero, $ra, 0 -; CHECK-NEXT: .LBB0_1: +; CHECK-NEXT: b .LBB0_5 +; CHECK-NEXT: .LBB0_3: ; CHECK-NEXT: ori $a0, $zero, 2 ; CHECK-NEXT: st.w $a0, $fp, -20 ; CHECK-NEXT: ori $a0, $zero, 1 @@ -42,7 +34,8 @@ define dso_local i32 @check_boundaries() #0 { ; CHECK-NEXT: st.w $a0, $fp, -24 ; CHECK-NEXT: ori $a0, $zero, 4 ; CHECK-NEXT: st.w $a0, $fp, -28 -; CHECK-NEXT: b .LBB0_3 +; CHECK-NEXT: ld.w $a0, $fp, -16 +; CHECK-NEXT: bnez $a0, .LBB0_2 ; CHECK-NEXT: .LBB0_4: ; CHECK-NEXT: ori $a0, $zero, 2 ; CHECK-NEXT: st.w $a0, $fp, -20 @@ -52,7 +45,12 @@ define dso_local i32 @check_boundaries() #0 { ; CHECK-NEXT: st.w $a0, $fp, -24 ; CHECK-NEXT: ori $a0, $zero, 4 ; CHECK-NEXT: st.w $a0, $fp, -28 -; CHECK-NEXT: b .LBB0_6 +; CHECK-NEXT: .LBB0_5: +; CHECK-NEXT: move $a0, $zero +; CHECK-NEXT: ld.w $fp, $sp, 24 # 4-byte Folded Reload +; CHECK-NEXT: ld.w $ra, $sp, 28 # 4-byte Folded Reload +; CHECK-NEXT: addi.w $sp, $sp, 32 +; CHECK-NEXT: ret %1 = alloca i32, align 4 %2 = alloca i32, align 4 %3 = alloca i32, align 4 @@ -100,8 +98,8 @@ define dso_local i32 @main() #0 { ; CHECK-NEXT: .cfi_offset 22, -8 ; CHECK-NEXT: addi.w $fp, $sp, 32 ; CHECK-NEXT: .cfi_def_cfa 22, 0 -; CHECK-NEXT: pcalau12i $a0, x -; CHECK-NEXT: addi.w $a0, $a0, x +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(x) +; CHECK-NEXT: addi.w $a0, $a0, %pc_lo12(x) ; CHECK-NEXT: ori $a1, $zero, 1 ; CHECK-NEXT: st.w $a1, $a0, 0 ; CHECK-NEXT: st.w $zero, $fp, -12 @@ -122,7 +120,7 @@ define dso_local i32 @main() #0 { ; CHECK-NEXT: ld.w $fp, $sp, 24 # 4-byte Folded Reload ; CHECK-NEXT: ld.w $ra, $sp, 28 # 4-byte Folded Reload ; CHECK-NEXT: addi.w $sp, $sp, 32 -; CHECK-NEXT: jirl $zero, $ra, 0 +; CHECK-NEXT: ret %1 = alloca i32, align 4 %2 = alloca i32, align 4 %3 = alloca i32, align 4 diff --git a/llvm/test/tools/llvm-objcopy/ELF/binary-output-target.test b/llvm/test/tools/llvm-objcopy/ELF/binary-output-target.test index 78fc1435518cf4a452434e915e173e5c215f8b01..fc5856691f8dca099b39883c78bc75e0a834d9dc 100644 --- a/llvm/test/tools/llvm-objcopy/ELF/binary-output-target.test +++ b/llvm/test/tools/llvm-objcopy/ELF/binary-output-target.test @@ -42,6 +42,12 @@ # RUN: llvm-objcopy -I binary -O elf32-hexagon %t.txt %t.hexagon.o # RUN: llvm-readobj --file-headers %t.hexagon.o | FileCheck %s --check-prefixes=CHECK,LE,HEXAGON,32 +# RUN: llvm-objcopy -I binary -O elf32-loongarch %t.txt %t.la32.o +# RUN: llvm-readobj --file-headers %t.la32.o | FileCheck %s --check-prefixes=CHECK,LE,LA32,32 + +# RUN: llvm-objcopy -I binary -O elf64-loongarch %t.txt %t.la64.o +# RUN: llvm-readobj --file-headers %t.la64.o | FileCheck %s --check-prefixes=CHECK,LE,LA64,64 + # CHECK: Format: # 32-SAME: elf32- # 64-SAME: elf64- @@ -49,6 +55,8 @@ # ARM-SAME: littlearm # HEXAGON-SAME: hexagon # I386-SAME: i386 +# LA32-SAME: loongarch{{$}} +# LA64-SAME: loongarch{{$}} # MIPS-SAME: mips{{$}} # RISCV32-SAME: riscv{{$}} # RISCV64-SAME: riscv{{$}} @@ -62,6 +70,8 @@ # ARM-NEXT: Arch: arm # HEXAGON-NEXT: Arch: hexagon # I386-NEXT: Arch: i386 +# LA32-NEXT: Arch: loongarch32 +# LA64-NEXT: Arch: loongarch64 # MIPS-NEXT: Arch: mips{{$}} # PPC32BE-NEXT: Arch: powerpc{{$}} # PPC32LE-NEXT: Arch: powerpcle{{$}} @@ -97,6 +107,8 @@ # ARM-NEXT: Machine: EM_ARM (0x28) # HEXAGON-NEXT: Machine: EM_HEXAGON (0xA4) # I386-NEXT: Machine: EM_386 (0x3) +# LA32-NEXT: Machine: EM_LOONGARCH (0x102) +# LA64-NEXT: Machine: EM_LOONGARCH (0x102) # MIPS-NEXT: Machine: EM_MIPS (0x8) # PPC32-NEXT: Machine: EM_PPC (0x14) # PPC64-NEXT: Machine: EM_PPC64 (0x15) diff --git a/llvm/test/tools/llvm-objcopy/ELF/cross-arch-headers.test b/llvm/test/tools/llvm-objcopy/ELF/cross-arch-headers.test index 98f1b3c644d8ba0721b11492d387e8440dcf0abe..882940c05e19c2d49dca5a766b732de07392e4d5 100644 --- a/llvm/test/tools/llvm-objcopy/ELF/cross-arch-headers.test +++ b/llvm/test/tools/llvm-objcopy/ELF/cross-arch-headers.test @@ -109,6 +109,14 @@ # RUN: llvm-readobj --file-headers %t.elf32_hexagon.o | FileCheck %s --check-prefixes=CHECK,LE,HEXAGON,32,SYSV # RUN: llvm-readobj --file-headers %t.elf32_hexagon.dwo | FileCheck %s --check-prefixes=CHECK,LE,HEXAGON,32,SYSV +# RUN: llvm-objcopy %t.o -O elf32-loongarch %t.elf32_loongarch.o --split-dwo=%t.elf32_loongarch.dwo +# RUN: llvm-readobj --file-headers %t.elf32_loongarch.o | FileCheck %s --check-prefixes=CHECK,LE,LA32,32,SYSV +# RUN: llvm-readobj --file-headers %t.elf32_loongarch.dwo | FileCheck %s --check-prefixes=CHECK,LE,LA32,32,SYSV + +# RUN: llvm-objcopy %t.o -O elf64-loongarch %t.elf64_loongarch.o --split-dwo=%t.elf64_loongarch.dwo +# RUN: llvm-readobj --file-headers %t.elf64_loongarch.o | FileCheck %s --check-prefixes=CHECK,LE,LA64,64,SYSV +# RUN: llvm-readobj --file-headers %t.elf64_loongarch.dwo | FileCheck %s --check-prefixes=CHECK,LE,LA64,64,SYSV + !ELF FileHeader: Class: ELFCLASS32 @@ -144,6 +152,8 @@ Symbols: # AARCH-SAME: aarch64 # ARM-SAME: littlearm # HEXAGON-SAME: hexagon +# LA32-SAME: loongarch{{$}} +# LA64-SAME: loongarch{{$}} # MIPS-SAME: mips # PPCBE-SAME: powerpc{{$}} # PPCLE-SAME: powerpcle{{$}} @@ -158,6 +168,8 @@ Symbols: # AARCH-NEXT: Arch: aarch64 # ARM-NEXT: Arch: arm # HEXAGON-NEXT: Arch: hexagon +# LA32-NEXT: Arch: loongarch32 +# LA64-NEXT: Arch: loongarch64 # MIPSBE-NEXT: Arch: mips{{$}} # MIPSLE-NEXT: Arch: mipsel{{$}} # MIPS64BE-NEXT: Arch: mips64{{$}} @@ -190,6 +202,8 @@ Symbols: # HEXAGON: Machine: EM_HEXAGON (0xA4) # I386: Machine: EM_386 (0x3) # IAMCU: Machine: EM_IAMCU (0x6) +# LA32: Machine: EM_LOONGARCH (0x102) +# LA64: Machine: EM_LOONGARCH (0x102) # MIPS: Machine: EM_MIPS (0x8) # PPC32: Machine: EM_PPC (0x14) # PPC64: Machine: EM_PPC64 (0x15) diff --git a/llvm/test/tools/llvm-profgen/lit.local.cfg b/llvm/test/tools/llvm-profgen/lit.local.cfg index 197150e220e86bb52739cb75020cd1f7ac1cde3d..0ca12783a2eb90b268fb2cb17d46520f1005b31c 100644 --- a/llvm/test/tools/llvm-profgen/lit.local.cfg +++ b/llvm/test/tools/llvm-profgen/lit.local.cfg @@ -3,5 +3,5 @@ import lit.util config.suffixes = ['.test', '.ll', '.s', '.yaml'] -if not 'X86' in config.root.targets: +if not ('X86' in config.root.targets and 'LoongArch' in config.root.targets): config.unsupported = True diff --git a/llvm/test/tools/llvm-readobj/ELF/loongarch-eflags.test b/llvm/test/tools/llvm-readobj/ELF/loongarch-eflags.test index b6627364054b66afcd2fc1908f1e0b56c0836de0..824dcb2c05af291f2bc61ee4248f3e8b60a5a36a 100644 --- a/llvm/test/tools/llvm-readobj/ELF/loongarch-eflags.test +++ b/llvm/test/tools/llvm-readobj/ELF/loongarch-eflags.test @@ -1,59 +1,95 @@ -## Check llvm-readobj is able to decode all possible LoongArch e_flags field values. +## Check llvm-readobj's ability to decode all possible LoongArch e_flags field +## values. -# RUN: yaml2obj %s -o %t-lp64s -DCLASS=64 -DFLAG=LP64S -# RUN: llvm-readobj -h %t-lp64s | FileCheck --check-prefix=READOBJ-LP64S %s -# RUN: llvm-readelf -h %t-lp64s | FileCheck --check-prefix=READELF-LP64S --match-full-lines %s +## Not all combinations covered here exist in reality (such as the v0 ILP32* +## objects) but they are included nevertheless for completeness. -# RUN: yaml2obj %s -o %t-lp64f -DCLASS=64 -DFLAG=LP64F -# RUN: llvm-readobj -h %t-lp64f | FileCheck --check-prefix=READOBJ-LP64F %s -# RUN: llvm-readelf -h %t-lp64f | FileCheck --check-prefix=READELF-LP64F --match-full-lines %s +# RUN: yaml2obj %s -o %t-lp64s -DCLASS=64 -DABI_MODIFIER=SOFT -DOBJABI_VER=0 +# RUN: llvm-readobj -h %t-lp64s | FileCheck --check-prefixes=READOBJ-LP64,READOBJ-SOFT-V0 %s +# RUN: llvm-readelf -h %t-lp64s | FileCheck --check-prefixes=READELF-LP64,READELF-SOFT-V0 --match-full-lines %s -# RUN: yaml2obj %s -o %t-lp64d -DCLASS=64 -DFLAG=LP64D -# RUN: llvm-readobj -h %t-lp64d | FileCheck --check-prefix=READOBJ-LP64D %s -# RUN: llvm-readelf -h %t-lp64d | FileCheck --check-prefix=READELF-LP64D --match-full-lines %s +# RUN: yaml2obj %s -o %t-lp64f -DCLASS=64 -DABI_MODIFIER=SINGLE -DOBJABI_VER=0 +# RUN: llvm-readobj -h %t-lp64f | FileCheck --check-prefixes=READOBJ-LP64,READOBJ-SINGLE-V0 %s +# RUN: llvm-readelf -h %t-lp64f | FileCheck --check-prefixes=READELF-LP64,READELF-SINGLE-V0 --match-full-lines %s -# RUN: yaml2obj %s -o %t-ilp32s -DCLASS=32 -DFLAG=ILP32S -# RUN: llvm-readobj -h %t-ilp32s | FileCheck --check-prefix=READOBJ-ILP32S %s -# RUN: llvm-readelf -h %t-ilp32s | FileCheck --check-prefix=READELF-ILP32S --match-full-lines %s +# RUN: yaml2obj %s -o %t-lp64d -DCLASS=64 -DABI_MODIFIER=DOUBLE -DOBJABI_VER=0 +# RUN: llvm-readobj -h %t-lp64d | FileCheck --check-prefixes=READOBJ-LP64,READOBJ-DOUBLE-V0 %s +# RUN: llvm-readelf -h %t-lp64d | FileCheck --check-prefixes=READELF-LP64,READELF-DOUBLE-V0 --match-full-lines %s -# RUN: yaml2obj %s -o %t-ilp32f -DCLASS=32 -DFLAG=ILP32F -# RUN: llvm-readobj -h %t-ilp32f | FileCheck --check-prefix=READOBJ-ILP32F %s -# RUN: llvm-readelf -h %t-ilp32f | FileCheck --check-prefix=READELF-ILP32F --match-full-lines %s +# RUN: yaml2obj %s -o %t-ilp32s -DCLASS=32 -DABI_MODIFIER=SOFT -DOBJABI_VER=0 +# RUN: llvm-readobj -h %t-ilp32s | FileCheck --check-prefixes=READOBJ-ILP32,READOBJ-SOFT-V0 %s +# RUN: llvm-readelf -h %t-ilp32s | FileCheck --check-prefixes=READELF-ILP32,READELF-SOFT-V0 --match-full-lines %s -# RUN: yaml2obj %s -o %t-ilp32d -DCLASS=32 -DFLAG=ILP32D -# RUN: llvm-readobj -h %t-ilp32d | FileCheck --check-prefix=READOBJ-ILP32D %s -# RUN: llvm-readelf -h %t-ilp32d | FileCheck --check-prefix=READELF-ILP32D --match-full-lines %s +# RUN: yaml2obj %s -o %t-ilp32f -DCLASS=32 -DABI_MODIFIER=SINGLE -DOBJABI_VER=0 +# RUN: llvm-readobj -h %t-ilp32f | FileCheck --check-prefixes=READOBJ-ILP32,READOBJ-SINGLE-V0 %s +# RUN: llvm-readelf -h %t-ilp32f | FileCheck --check-prefixes=READELF-ILP32,READELF-SINGLE-V0 --match-full-lines %s -# READOBJ-LP64S: Flags [ (0x1) -# READOBJ-LP64S-NEXT: EF_LOONGARCH_BASE_ABI_LP64S (0x1) -# READOBJ-LP64S-NEXT: ] +# RUN: yaml2obj %s -o %t-ilp32d -DCLASS=32 -DABI_MODIFIER=DOUBLE -DOBJABI_VER=0 +# RUN: llvm-readobj -h %t-ilp32d | FileCheck --check-prefixes=READOBJ-ILP32,READOBJ-DOUBLE-V0 %s +# RUN: llvm-readelf -h %t-ilp32d | FileCheck --check-prefixes=READELF-ILP32,READELF-DOUBLE-V0 --match-full-lines %s -# READOBJ-LP64F: Flags [ (0x2) -# READOBJ-LP64F-NEXT: EF_LOONGARCH_BASE_ABI_LP64F (0x2) -# READOBJ-LP64F-NEXT: ] +# RUN: yaml2obj %s -o %t-lp64s -DCLASS=64 -DABI_MODIFIER=SOFT -DOBJABI_VER=1 +# RUN: llvm-readobj -h %t-lp64s | FileCheck --check-prefixes=READOBJ-LP64,READOBJ-SOFT-V1 %s +# RUN: llvm-readelf -h %t-lp64s | FileCheck --check-prefixes=READELF-LP64,READELF-SOFT-V1 --match-full-lines %s -# READOBJ-LP64D: Flags [ (0x3) -# READOBJ-LP64D-NEXT: EF_LOONGARCH_BASE_ABI_LP64D (0x3) -# READOBJ-LP64D-NEXT: ] +# RUN: yaml2obj %s -o %t-lp64f -DCLASS=64 -DABI_MODIFIER=SINGLE -DOBJABI_VER=1 +# RUN: llvm-readobj -h %t-lp64f | FileCheck --check-prefixes=READOBJ-LP64,READOBJ-SINGLE-V1 %s +# RUN: llvm-readelf -h %t-lp64f | FileCheck --check-prefixes=READELF-LP64,READELF-SINGLE-V1 --match-full-lines %s -# READOBJ-ILP32S: Flags [ (0x5) -# READOBJ-ILP32S-NEXT: EF_LOONGARCH_BASE_ABI_ILP32S (0x5) -# READOBJ-ILP32S-NEXT: ] +# RUN: yaml2obj %s -o %t-lp64d -DCLASS=64 -DABI_MODIFIER=DOUBLE -DOBJABI_VER=1 +# RUN: llvm-readobj -h %t-lp64d | FileCheck --check-prefixes=READOBJ-LP64,READOBJ-DOUBLE-V1 %s +# RUN: llvm-readelf -h %t-lp64d | FileCheck --check-prefixes=READELF-LP64,READELF-DOUBLE-V1 --match-full-lines %s -# READOBJ-ILP32F: Flags [ (0x6) -# READOBJ-ILP32F-NEXT: EF_LOONGARCH_BASE_ABI_ILP32F (0x6) -# READOBJ-ILP32F-NEXT: ] +# RUN: yaml2obj %s -o %t-ilp32s -DCLASS=32 -DABI_MODIFIER=SOFT -DOBJABI_VER=1 +# RUN: llvm-readobj -h %t-ilp32s | FileCheck --check-prefixes=READOBJ-ILP32,READOBJ-SOFT-V1 %s +# RUN: llvm-readelf -h %t-ilp32s | FileCheck --check-prefixes=READELF-ILP32,READELF-SOFT-V1 --match-full-lines %s -# READOBJ-ILP32D: Flags [ (0x7) -# READOBJ-ILP32D-NEXT: EF_LOONGARCH_BASE_ABI_ILP32D (0x7) -# READOBJ-ILP32D-NEXT: ] +# RUN: yaml2obj %s -o %t-ilp32f -DCLASS=32 -DABI_MODIFIER=SINGLE -DOBJABI_VER=1 +# RUN: llvm-readobj -h %t-ilp32f | FileCheck --check-prefixes=READOBJ-ILP32,READOBJ-SINGLE-V1 %s +# RUN: llvm-readelf -h %t-ilp32f | FileCheck --check-prefixes=READELF-ILP32,READELF-SINGLE-V1 --match-full-lines %s -# READELF-LP64S: Flags: 0x1, LP64, SOFT-FLOAT -# READELF-LP64F: Flags: 0x2, LP64, SINGLE-FLOAT -# READELF-LP64D: Flags: 0x3, LP64, DOUBLE-FLOAT -# READELF-ILP32S: Flags: 0x5, ILP32, SOFT-FLOAT -# READELF-ILP32F: Flags: 0x6, ILP32, SINGLE-FLOAT -# READELF-ILP32D: Flags: 0x7, ILP32, DOUBLE-FLOAT +# RUN: yaml2obj %s -o %t-ilp32d -DCLASS=32 -DABI_MODIFIER=DOUBLE -DOBJABI_VER=1 +# RUN: llvm-readobj -h %t-ilp32d | FileCheck --check-prefixes=READOBJ-ILP32,READOBJ-DOUBLE-V1 %s +# RUN: llvm-readelf -h %t-ilp32d | FileCheck --check-prefixes=READELF-ILP32,READELF-DOUBLE-V1 --match-full-lines %s + +# READOBJ-LP64: Class: 64-bit (0x2) +# READELF-LP64: Class: ELF64 +# READOBJ-ILP32: Class: 32-bit (0x1) +# READELF-ILP32: Class: ELF32 + +# READOBJ-SOFT-V0: Flags [ (0x1) +# READOBJ-SOFT-V0-NEXT: EF_LOONGARCH_ABI_SOFT_FLOAT (0x1) +# READOBJ-SOFT-V0-NEXT: ] + +# READOBJ-SINGLE-V0: Flags [ (0x2) +# READOBJ-SINGLE-V0-NEXT: EF_LOONGARCH_ABI_SINGLE_FLOAT (0x2) +# READOBJ-SINGLE-V0-NEXT: ] + +# READOBJ-DOUBLE-V0: Flags [ (0x3) +# READOBJ-DOUBLE-V0-NEXT: EF_LOONGARCH_ABI_DOUBLE_FLOAT (0x3) +# READOBJ-DOUBLE-V0-NEXT: ] + +# READOBJ-SOFT-V1: Flags [ (0x41) +# READOBJ-SOFT-V1-NEXT: EF_LOONGARCH_ABI_SOFT_FLOAT (0x1) +# READOBJ-SOFT-V1-NEXT: EF_LOONGARCH_OBJABI_V1 (0x40) +# READOBJ-SOFT-V1-NEXT: ] + +# READOBJ-SINGLE-V1: Flags [ (0x42) +# READOBJ-SINGLE-V1-NEXT: EF_LOONGARCH_ABI_SINGLE_FLOAT (0x2) +# READOBJ-SINGLE-V1-NEXT: EF_LOONGARCH_OBJABI_V1 (0x40) +# READOBJ-SINGLE-V1-NEXT: ] + +# READOBJ-DOUBLE-V1: Flags [ (0x43) +# READOBJ-DOUBLE-V1-NEXT: EF_LOONGARCH_ABI_DOUBLE_FLOAT (0x3) +# READOBJ-DOUBLE-V1-NEXT: EF_LOONGARCH_OBJABI_V1 (0x40) +# READOBJ-DOUBLE-V1-NEXT: ] + +# READELF-SOFT-V0: Flags: 0x1, SOFT-FLOAT +# READELF-SINGLE-V0: Flags: 0x2, SINGLE-FLOAT +# READELF-DOUBLE-V0: Flags: 0x3, DOUBLE-FLOAT +# READELF-SOFT-V1: Flags: 0x41, SOFT-FLOAT, OBJ-v1 +# READELF-SINGLE-V1: Flags: 0x42, SINGLE-FLOAT, OBJ-v1 +# READELF-DOUBLE-V1: Flags: 0x43, DOUBLE-FLOAT, OBJ-v1 --- !ELF FileHeader: @@ -61,4 +97,7 @@ FileHeader: Data: ELFDATA2LSB Type: ET_EXEC Machine: EM_LOONGARCH - Flags: [ EF_LOONGARCH_BASE_ABI_[[FLAG]] ] + Flags: [ + EF_LOONGARCH_ABI_[[ABI_MODIFIER]]_FLOAT, + EF_LOONGARCH_OBJABI_V[[OBJABI_VER]], + ] diff --git a/llvm/test/tools/llvm-readobj/ELF/reloc-types-loongarch64.test b/llvm/test/tools/llvm-readobj/ELF/reloc-types-loongarch64.test index 0aed3fff4413c64201b6c53bd7db60db8750a658..e32dc893fa7985d41986aab7c874c25117a87323 100644 --- a/llvm/test/tools/llvm-readobj/ELF/reloc-types-loongarch64.test +++ b/llvm/test/tools/llvm-readobj/ELF/reloc-types-loongarch64.test @@ -56,6 +56,52 @@ # CHECK: Type: R_LARCH_SUB64 (56) # CHECK: Type: R_LARCH_GNU_VTINHERIT (57) # CHECK: Type: R_LARCH_GNU_VTENTRY (58) +# CHECK: Type: R_LARCH_B16 (64) +# CHECK: Type: R_LARCH_B21 (65) +# CHECK: Type: R_LARCH_B26 (66) +# CHECK: Type: R_LARCH_ABS_HI20 (67) +# CHECK: Type: R_LARCH_ABS_LO12 (68) +# CHECK: Type: R_LARCH_ABS64_LO20 (69) +# CHECK: Type: R_LARCH_ABS64_HI12 (70) +# CHECK: Type: R_LARCH_PCALA_HI20 (71) +# CHECK: Type: R_LARCH_PCALA_LO12 (72) +# CHECK: Type: R_LARCH_PCALA64_LO20 (73) +# CHECK: Type: R_LARCH_PCALA64_HI12 (74) +# CHECK: Type: R_LARCH_GOT_PC_HI20 (75) +# CHECK: Type: R_LARCH_GOT_PC_LO12 (76) +# CHECK: Type: R_LARCH_GOT64_PC_LO20 (77) +# CHECK: Type: R_LARCH_GOT64_PC_HI12 (78) +# CHECK: Type: R_LARCH_GOT_HI20 (79) +# CHECK: Type: R_LARCH_GOT_LO12 (80) +# CHECK: Type: R_LARCH_GOT64_LO20 (81) +# CHECK: Type: R_LARCH_GOT64_HI12 (82) +# CHECK: Type: R_LARCH_TLS_LE_HI20 (83) +# CHECK: Type: R_LARCH_TLS_LE_LO12 (84) +# CHECK: Type: R_LARCH_TLS_LE64_LO20 (85) +# CHECK: Type: R_LARCH_TLS_LE64_HI12 (86) +# CHECK: Type: R_LARCH_TLS_IE_PC_HI20 (87) +# CHECK: Type: R_LARCH_TLS_IE_PC_LO12 (88) +# CHECK: Type: R_LARCH_TLS_IE64_PC_LO20 (89) +# CHECK: Type: R_LARCH_TLS_IE64_PC_HI12 (90) +# CHECK: Type: R_LARCH_TLS_IE_HI20 (91) +# CHECK: Type: R_LARCH_TLS_IE_LO12 (92) +# CHECK: Type: R_LARCH_TLS_IE64_LO20 (93) +# CHECK: Type: R_LARCH_TLS_IE64_HI12 (94) +# CHECK: Type: R_LARCH_TLS_LD_PC_HI20 (95) +# CHECK: Type: R_LARCH_TLS_LD_HI20 (96) +# CHECK: Type: R_LARCH_TLS_GD_PC_HI20 (97) +# CHECK: Type: R_LARCH_TLS_GD_HI20 (98) +# CHECK: Type: R_LARCH_32_PCREL (99) +# CHECK: Type: R_LARCH_RELAX (100) +# CHECK: Type: R_LARCH_DELETE (101) +# CHECK: Type: R_LARCH_ALIGN (102) +# CHECK: Type: R_LARCH_PCREL20_S2 (103) +# CHECK: Type: R_LARCH_CFA (104) +# CHECK: Type: R_LARCH_ADD6 (105) +# CHECK: Type: R_LARCH_SUB6 (106) +# CHECK: Type: R_LARCH_ADD_ULEB128 (107) +# CHECK: Type: R_LARCH_SUB_ULEB128 (108) +# CHECK: Type: R_LARCH_64_PCREL (109) --- !ELF FileHeader: @@ -119,3 +165,49 @@ Sections: - Type: R_LARCH_SUB64 - Type: R_LARCH_GNU_VTINHERIT - Type: R_LARCH_GNU_VTENTRY + - Type: R_LARCH_B16 + - Type: R_LARCH_B21 + - Type: R_LARCH_B26 + - Type: R_LARCH_ABS_HI20 + - Type: R_LARCH_ABS_LO12 + - Type: R_LARCH_ABS64_LO20 + - Type: R_LARCH_ABS64_HI12 + - Type: R_LARCH_PCALA_HI20 + - Type: R_LARCH_PCALA_LO12 + - Type: R_LARCH_PCALA64_LO20 + - Type: R_LARCH_PCALA64_HI12 + - Type: R_LARCH_GOT_PC_HI20 + - Type: R_LARCH_GOT_PC_LO12 + - Type: R_LARCH_GOT64_PC_LO20 + - Type: R_LARCH_GOT64_PC_HI12 + - Type: R_LARCH_GOT_HI20 + - Type: R_LARCH_GOT_LO12 + - Type: R_LARCH_GOT64_LO20 + - Type: R_LARCH_GOT64_HI12 + - Type: R_LARCH_TLS_LE_HI20 + - Type: R_LARCH_TLS_LE_LO12 + - Type: R_LARCH_TLS_LE64_LO20 + - Type: R_LARCH_TLS_LE64_HI12 + - Type: R_LARCH_TLS_IE_PC_HI20 + - Type: R_LARCH_TLS_IE_PC_LO12 + - Type: R_LARCH_TLS_IE64_PC_LO20 + - Type: R_LARCH_TLS_IE64_PC_HI12 + - Type: R_LARCH_TLS_IE_HI20 + - Type: R_LARCH_TLS_IE_LO12 + - Type: R_LARCH_TLS_IE64_LO20 + - Type: R_LARCH_TLS_IE64_HI12 + - Type: R_LARCH_TLS_LD_PC_HI20 + - Type: R_LARCH_TLS_LD_HI20 + - Type: R_LARCH_TLS_GD_PC_HI20 + - Type: R_LARCH_TLS_GD_HI20 + - Type: R_LARCH_32_PCREL + - Type: R_LARCH_RELAX + - Type: R_LARCH_DELETE + - Type: R_LARCH_ALIGN + - Type: R_LARCH_PCREL20_S2 + - Type: R_LARCH_CFA + - Type: R_LARCH_ADD6 + - Type: R_LARCH_SUB6 + - Type: R_LARCH_ADD_ULEB128 + - Type: R_LARCH_SUB_ULEB128 + - Type: R_LARCH_64_PCREL diff --git a/llvm/test/tools/obj2yaml/ELF/loongarch-eflags.yaml b/llvm/test/tools/obj2yaml/ELF/loongarch-eflags.yaml index e4c4c292e0da518da88decf7a97bc7b5cfb1d2a1..2e4ee1dabb1863f6725ec3baaac41abadcd997f1 100644 --- a/llvm/test/tools/obj2yaml/ELF/loongarch-eflags.yaml +++ b/llvm/test/tools/obj2yaml/ELF/loongarch-eflags.yaml @@ -1,24 +1,43 @@ ## Check obj2yaml is able to decode all possible LoongArch e_flags field values. -# RUN: yaml2obj %s -o %t-lp64s -DCLASS=64 -DFLAG=LP64S -# RUN: obj2yaml %t-lp64s | FileCheck -DFLAG=LP64S %s +# RUN: yaml2obj %s -o %t-lp64s -DCLASS=64 -DABI_MODIFIER=SOFT -DOBJABI_VER=0 +# RUN: obj2yaml %t-lp64s | FileCheck -DCLASS=64 -DABI_MODIFIER=SOFT -DOBJABI_VER=0 %s -# RUN: yaml2obj %s -o %t-lp64f -DCLASS=64 -DFLAG=LP64F -# RUN: obj2yaml %t-lp64f | FileCheck -DFLAG=LP64F %s +# RUN: yaml2obj %s -o %t-lp64f -DCLASS=64 -DABI_MODIFIER=SINGLE -DOBJABI_VER=0 +# RUN: obj2yaml %t-lp64f | FileCheck -DCLASS=64 -DABI_MODIFIER=SINGLE -DOBJABI_VER=0 %s -# RUN: yaml2obj %s -o %t-lp64d -DCLASS=64 -DFLAG=LP64D -# RUN: obj2yaml %t-lp64d | FileCheck -DFLAG=LP64D %s +# RUN: yaml2obj %s -o %t-lp64d -DCLASS=64 -DABI_MODIFIER=DOUBLE -DOBJABI_VER=0 +# RUN: obj2yaml %t-lp64d | FileCheck -DCLASS=64 -DABI_MODIFIER=DOUBLE -DOBJABI_VER=0 %s -# RUN: yaml2obj %s -o %t-ilp32s -DCLASS=32 -DFLAG=ILP32S -# RUN: obj2yaml %t-ilp32s | FileCheck -DFLAG=ILP32S %s +# RUN: yaml2obj %s -o %t-ilp32s -DCLASS=32 -DABI_MODIFIER=SOFT -DOBJABI_VER=0 +# RUN: obj2yaml %t-ilp32s | FileCheck -DCLASS=32 -DABI_MODIFIER=SOFT -DOBJABI_VER=0 %s -# RUN: yaml2obj %s -o %t-ilp32f -DCLASS=32 -DFLAG=ILP32F -# RUN: obj2yaml %t-ilp32f | FileCheck -DFLAG=ILP32F %s +# RUN: yaml2obj %s -o %t-ilp32f -DCLASS=32 -DABI_MODIFIER=SINGLE -DOBJABI_VER=0 +# RUN: obj2yaml %t-ilp32f | FileCheck -DCLASS=32 -DABI_MODIFIER=SINGLE -DOBJABI_VER=0 %s -# RUN: yaml2obj %s -o %t-ilp32d -DCLASS=32 -DFLAG=ILP32D -# RUN: obj2yaml %t-ilp32d | FileCheck -DFLAG=ILP32D %s +# RUN: yaml2obj %s -o %t-ilp32d -DCLASS=32 -DABI_MODIFIER=DOUBLE -DOBJABI_VER=0 +# RUN: obj2yaml %t-ilp32d | FileCheck -DCLASS=32 -DABI_MODIFIER=DOUBLE -DOBJABI_VER=0 %s -# CHECK: Flags: [ EF_LOONGARCH_BASE_ABI_[[FLAG]] ] +# RUN: yaml2obj %s -o %t-lp64s -DCLASS=64 -DABI_MODIFIER=SOFT -DOBJABI_VER=1 +# RUN: obj2yaml %t-lp64s | FileCheck -DCLASS=64 -DABI_MODIFIER=SOFT -DOBJABI_VER=1 %s + +# RUN: yaml2obj %s -o %t-lp64f -DCLASS=64 -DABI_MODIFIER=SINGLE -DOBJABI_VER=1 +# RUN: obj2yaml %t-lp64f | FileCheck -DCLASS=64 -DABI_MODIFIER=SINGLE -DOBJABI_VER=1 %s + +# RUN: yaml2obj %s -o %t-lp64d -DCLASS=64 -DABI_MODIFIER=DOUBLE -DOBJABI_VER=1 +# RUN: obj2yaml %t-lp64d | FileCheck -DCLASS=64 -DABI_MODIFIER=DOUBLE -DOBJABI_VER=1 %s + +# RUN: yaml2obj %s -o %t-ilp32s -DCLASS=32 -DABI_MODIFIER=SOFT -DOBJABI_VER=1 +# RUN: obj2yaml %t-ilp32s | FileCheck -DCLASS=32 -DABI_MODIFIER=SOFT -DOBJABI_VER=1 %s + +# RUN: yaml2obj %s -o %t-ilp32f -DCLASS=32 -DABI_MODIFIER=SINGLE -DOBJABI_VER=1 +# RUN: obj2yaml %t-ilp32f | FileCheck -DCLASS=32 -DABI_MODIFIER=SINGLE -DOBJABI_VER=1 %s + +# RUN: yaml2obj %s -o %t-ilp32d -DCLASS=32 -DABI_MODIFIER=DOUBLE -DOBJABI_VER=1 +# RUN: obj2yaml %t-ilp32d | FileCheck -DCLASS=32 -DABI_MODIFIER=DOUBLE -DOBJABI_VER=1 %s + +# CHECK: Class: ELFCLASS[[CLASS]] +# CHECK: Flags: [ EF_LOONGARCH_ABI_[[ABI_MODIFIER]]_FLOAT, EF_LOONGARCH_OBJABI_V[[OBJABI_VER]] ] --- !ELF FileHeader: @@ -26,4 +45,7 @@ FileHeader: Data: ELFDATA2LSB Type: ET_EXEC Machine: EM_LOONGARCH - Flags: [ EF_LOONGARCH_BASE_ABI_[[FLAG]] ] + Flags: [ + EF_LOONGARCH_ABI_[[ABI_MODIFIER]]_FLOAT, + EF_LOONGARCH_OBJABI_V[[OBJABI_VER]], + ] diff --git a/llvm/tools/llvm-objcopy/ObjcopyOptions.cpp b/llvm/tools/llvm-objcopy/ObjcopyOptions.cpp index cfacaa0aa030d65724aac20fbd2341a86db8ff95..2a5a18f04bea19c1824995b806a73cdc3da3fdbc 100644 --- a/llvm/tools/llvm-objcopy/ObjcopyOptions.cpp +++ b/llvm/tools/llvm-objcopy/ObjcopyOptions.cpp @@ -315,7 +315,11 @@ static const StringMap TargetMap{ // SPARC {"elf32-sparc", {ELF::EM_SPARC, false, false}}, {"elf32-sparcel", {ELF::EM_SPARC, false, true}}, + // Hexagon {"elf32-hexagon", {ELF::EM_HEXAGON, false, true}}, + // LoongArch + {"elf32-loongarch", {ELF::EM_LOONGARCH, false, true}}, + {"elf64-loongarch", {ELF::EM_LOONGARCH, true, true}}, }; static Expected diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp index eef5b8eb8a0faf26fb533f0d79509d468dd08de9..ceac763078e107f46bdeb7603f22cedd597e9d28 100644 --- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp +++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp @@ -196,9 +196,6 @@ void ProfiledBinary::load() { exitWithError("not a valid Elf image", Path); TheTriple = Obj->makeTriple(); - // Current only support X86 - if (!TheTriple.isX86()) - exitWithError("unsupported target", TheTriple.getTriple()); LLVM_DEBUG(dbgs() << "Loading " << Path << "\n"); // Find the preferred load address for text sections. diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp index c9a239f785d2ebceeb33208d8eddf52032dae619..bf8f9600f232c8e5318598dcfe2bb7eec00b3e3d 100644 --- a/llvm/tools/llvm-readobj/ELFDumper.cpp +++ b/llvm/tools/llvm-readobj/ELFDumper.cpp @@ -1651,12 +1651,11 @@ const EnumEntry ElfHeaderAVRFlags[] = { }; const EnumEntry ElfHeaderLoongArchFlags[] = { - ENUM_ENT(EF_LOONGARCH_BASE_ABI_ILP32S, "ILP32, SOFT-FLOAT"), - ENUM_ENT(EF_LOONGARCH_BASE_ABI_ILP32F, "ILP32, SINGLE-FLOAT"), - ENUM_ENT(EF_LOONGARCH_BASE_ABI_ILP32D, "ILP32, DOUBLE-FLOAT"), - ENUM_ENT(EF_LOONGARCH_BASE_ABI_LP64S, "LP64, SOFT-FLOAT"), - ENUM_ENT(EF_LOONGARCH_BASE_ABI_LP64F, "LP64, SINGLE-FLOAT"), - ENUM_ENT(EF_LOONGARCH_BASE_ABI_LP64D, "LP64, DOUBLE-FLOAT"), + ENUM_ENT(EF_LOONGARCH_ABI_SOFT_FLOAT, "SOFT-FLOAT"), + ENUM_ENT(EF_LOONGARCH_ABI_SINGLE_FLOAT, "SINGLE-FLOAT"), + ENUM_ENT(EF_LOONGARCH_ABI_DOUBLE_FLOAT, "DOUBLE-FLOAT"), + ENUM_ENT(EF_LOONGARCH_OBJABI_V0, "OBJ-v0"), + ENUM_ENT(EF_LOONGARCH_OBJABI_V1, "OBJ-v1"), }; @@ -3370,7 +3369,8 @@ template void GNUELFDumper::printFileHeaders() { unsigned(ELF::EF_AVR_ARCH_MASK)); else if (e.e_machine == EM_LOONGARCH) ElfFlags = printFlags(e.e_flags, makeArrayRef(ElfHeaderLoongArchFlags), - unsigned(ELF::EF_LOONGARCH_BASE_ABI_MASK)); + unsigned(ELF::EF_LOONGARCH_ABI_MODIFIER_MASK), + unsigned(ELF::EF_LOONGARCH_OBJABI_MASK)); Str = "0x" + utohexstr(e.e_flags); if (!ElfFlags.empty()) Str = Str + ", " + ElfFlags; @@ -6523,7 +6523,8 @@ template void LLVMELFDumper::printFileHeaders() { unsigned(ELF::EF_AVR_ARCH_MASK)); else if (E.e_machine == EM_LOONGARCH) W.printFlags("Flags", E.e_flags, makeArrayRef(ElfHeaderLoongArchFlags), - unsigned(ELF::EF_LOONGARCH_BASE_ABI_MASK)); + unsigned(ELF::EF_LOONGARCH_ABI_MODIFIER_MASK), + unsigned(ELF::EF_LOONGARCH_OBJABI_MASK)); else W.printFlags("Flags", E.e_flags); W.printNumber("HeaderSize", E.e_ehsize); diff --git a/llvm/unittests/ADT/TripleTest.cpp b/llvm/unittests/ADT/TripleTest.cpp index 3b6a582c42fd13dd059c3f32230756c0bad58ff6..9b7aaf8478df89a995076c4508307f76e1840131 100644 --- a/llvm/unittests/ADT/TripleTest.cpp +++ b/llvm/unittests/ADT/TripleTest.cpp @@ -439,12 +439,72 @@ TEST(TripleTest, ParsedIDs) { EXPECT_EQ(Triple::UnknownOS, T.getOS()); EXPECT_EQ(Triple::UnknownEnvironment, T.getEnvironment()); + T = Triple("loongarch32-unknown-linux-gnu"); + EXPECT_EQ(Triple::loongarch32, T.getArch()); + EXPECT_EQ(Triple::UnknownVendor, T.getVendor()); + EXPECT_EQ(Triple::Linux, T.getOS()); + EXPECT_EQ(Triple::GNU, T.getEnvironment()); + + T = Triple("loongarch32-unknown-linux-gnuf32"); + EXPECT_EQ(Triple::loongarch32, T.getArch()); + EXPECT_EQ(Triple::UnknownVendor, T.getVendor()); + EXPECT_EQ(Triple::Linux, T.getOS()); + EXPECT_EQ(Triple::GNUF32, T.getEnvironment()); + + T = Triple("loongarch32-unknown-linux-gnuf64"); + EXPECT_EQ(Triple::loongarch32, T.getArch()); + EXPECT_EQ(Triple::UnknownVendor, T.getVendor()); + EXPECT_EQ(Triple::Linux, T.getOS()); + EXPECT_EQ(Triple::GNUF64, T.getEnvironment()); + + T = Triple("loongarch32-unknown-linux-gnusf"); + EXPECT_EQ(Triple::loongarch32, T.getArch()); + EXPECT_EQ(Triple::UnknownVendor, T.getVendor()); + EXPECT_EQ(Triple::Linux, T.getOS()); + EXPECT_EQ(Triple::GNUSF, T.getEnvironment()); + + T = Triple("loongarch32-unknown-linux-musl"); + EXPECT_EQ(Triple::loongarch32, T.getArch()); + EXPECT_EQ(Triple::UnknownVendor, T.getVendor()); + EXPECT_EQ(Triple::Linux, T.getOS()); + EXPECT_EQ(Triple::Musl, T.getEnvironment()); + T = Triple("loongarch64-unknown-linux"); EXPECT_EQ(Triple::loongarch64, T.getArch()); EXPECT_EQ(Triple::UnknownVendor, T.getVendor()); EXPECT_EQ(Triple::Linux, T.getOS()); EXPECT_EQ(Triple::UnknownEnvironment, T.getEnvironment()); + T = Triple("loongarch64-unknown-linux-gnu"); + EXPECT_EQ(Triple::loongarch64, T.getArch()); + EXPECT_EQ(Triple::UnknownVendor, T.getVendor()); + EXPECT_EQ(Triple::Linux, T.getOS()); + EXPECT_EQ(Triple::GNU, T.getEnvironment()); + + T = Triple("loongarch64-unknown-linux-gnuf32"); + EXPECT_EQ(Triple::loongarch64, T.getArch()); + EXPECT_EQ(Triple::UnknownVendor, T.getVendor()); + EXPECT_EQ(Triple::Linux, T.getOS()); + EXPECT_EQ(Triple::GNUF32, T.getEnvironment()); + + T = Triple("loongarch64-unknown-linux-gnuf64"); + EXPECT_EQ(Triple::loongarch64, T.getArch()); + EXPECT_EQ(Triple::UnknownVendor, T.getVendor()); + EXPECT_EQ(Triple::Linux, T.getOS()); + EXPECT_EQ(Triple::GNUF64, T.getEnvironment()); + + T = Triple("loongarch64-unknown-linux-gnusf"); + EXPECT_EQ(Triple::loongarch64, T.getArch()); + EXPECT_EQ(Triple::UnknownVendor, T.getVendor()); + EXPECT_EQ(Triple::Linux, T.getOS()); + EXPECT_EQ(Triple::GNUSF, T.getEnvironment()); + + T = Triple("loongarch64-unknown-linux-musl"); + EXPECT_EQ(Triple::loongarch64, T.getArch()); + EXPECT_EQ(Triple::UnknownVendor, T.getVendor()); + EXPECT_EQ(Triple::Linux, T.getOS()); + EXPECT_EQ(Triple::Musl, T.getEnvironment()); + T = Triple("riscv32-unknown-unknown"); EXPECT_EQ(Triple::riscv32, T.getArch()); EXPECT_EQ(Triple::UnknownVendor, T.getVendor()); @@ -1139,12 +1199,14 @@ TEST(TripleTest, BitWidthPredicates) { EXPECT_TRUE(T.isArch32Bit()); EXPECT_FALSE(T.isArch64Bit()); EXPECT_TRUE(T.isLoongArch()); + EXPECT_TRUE(T.isLoongArch32()); T.setArch(Triple::loongarch64); EXPECT_FALSE(T.isArch16Bit()); EXPECT_FALSE(T.isArch32Bit()); EXPECT_TRUE(T.isArch64Bit()); EXPECT_TRUE(T.isLoongArch()); + EXPECT_TRUE(T.isLoongArch64()); T.setArch(Triple::dxil); EXPECT_FALSE(T.isArch16Bit()); diff --git a/llvm/unittests/Object/ELFTest.cpp b/llvm/unittests/Object/ELFTest.cpp index 83e7363f3425d63e02258794554df8c01662e85c..35fc2ec698fbd7093109200479d7734a7fc781ff 100644 --- a/llvm/unittests/Object/ELFTest.cpp +++ b/llvm/unittests/Object/ELFTest.cpp @@ -159,10 +159,103 @@ TEST(ELFTest, getELFRelocationTypeNameForLoongArch) { getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_GNU_VTINHERIT)); EXPECT_EQ("R_LARCH_GNU_VTENTRY", getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_GNU_VTENTRY)); + EXPECT_EQ("R_LARCH_B16", + getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_B16)); + EXPECT_EQ("R_LARCH_B21", + getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_B21)); + EXPECT_EQ("R_LARCH_B26", + getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_B26)); + EXPECT_EQ("R_LARCH_ABS_HI20", + getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_ABS_HI20)); + EXPECT_EQ("R_LARCH_ABS_LO12", + getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_ABS_LO12)); + EXPECT_EQ("R_LARCH_ABS64_LO20", + getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_ABS64_LO20)); + EXPECT_EQ("R_LARCH_ABS64_HI12", + getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_ABS64_HI12)); + EXPECT_EQ("R_LARCH_PCALA_HI20", + getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_PCALA_HI20)); + EXPECT_EQ("R_LARCH_PCALA_LO12", + getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_PCALA_LO12)); + EXPECT_EQ("R_LARCH_PCALA64_LO20", + getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_PCALA64_LO20)); + EXPECT_EQ("R_LARCH_PCALA64_HI12", + getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_PCALA64_HI12)); + EXPECT_EQ("R_LARCH_GOT_PC_HI20", + getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_GOT_PC_HI20)); + EXPECT_EQ("R_LARCH_GOT_PC_LO12", + getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_GOT_PC_LO12)); + EXPECT_EQ("R_LARCH_GOT64_PC_LO20", + getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_GOT64_PC_LO20)); + EXPECT_EQ("R_LARCH_GOT64_PC_HI12", + getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_GOT64_PC_HI12)); + EXPECT_EQ("R_LARCH_GOT_HI20", + getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_GOT_HI20)); + EXPECT_EQ("R_LARCH_GOT_LO12", + getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_GOT_LO12)); + EXPECT_EQ("R_LARCH_GOT64_LO20", + getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_GOT64_LO20)); + EXPECT_EQ("R_LARCH_GOT64_HI12", + getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_GOT64_HI12)); + EXPECT_EQ("R_LARCH_TLS_LE_HI20", + getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_TLS_LE_HI20)); + EXPECT_EQ("R_LARCH_TLS_LE_LO12", + getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_TLS_LE_LO12)); + EXPECT_EQ("R_LARCH_TLS_LE64_LO20", + getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_TLS_LE64_LO20)); + EXPECT_EQ("R_LARCH_TLS_LE64_HI12", + getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_TLS_LE64_HI12)); + EXPECT_EQ("R_LARCH_TLS_IE_PC_HI20", + getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_TLS_IE_PC_HI20)); + EXPECT_EQ("R_LARCH_TLS_IE_PC_LO12", + getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_TLS_IE_PC_LO12)); + EXPECT_EQ("R_LARCH_TLS_IE64_PC_LO20", + getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_TLS_IE64_PC_LO20)); + EXPECT_EQ("R_LARCH_TLS_IE64_PC_HI12", + getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_TLS_IE64_PC_HI12)); + EXPECT_EQ("R_LARCH_TLS_IE_HI20", + getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_TLS_IE_HI20)); + EXPECT_EQ("R_LARCH_TLS_IE_LO12", + getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_TLS_IE_LO12)); + EXPECT_EQ("R_LARCH_TLS_IE64_LO20", + getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_TLS_IE64_LO20)); + EXPECT_EQ("R_LARCH_TLS_IE64_HI12", + getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_TLS_IE64_HI12)); + EXPECT_EQ("R_LARCH_TLS_LD_PC_HI20", + getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_TLS_LD_PC_HI20)); + EXPECT_EQ("R_LARCH_TLS_LD_HI20", + getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_TLS_LD_HI20)); + EXPECT_EQ("R_LARCH_TLS_GD_PC_HI20", + getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_TLS_GD_PC_HI20)); + EXPECT_EQ("R_LARCH_TLS_GD_HI20", + getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_TLS_GD_HI20)); + EXPECT_EQ("R_LARCH_32_PCREL", + getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_32_PCREL)); + EXPECT_EQ("R_LARCH_RELAX", + getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_RELAX)); + EXPECT_EQ("R_LARCH_DELETE", + getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_DELETE)); + EXPECT_EQ("R_LARCH_ALIGN", + getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_ALIGN)); + EXPECT_EQ("R_LARCH_PCREL20_S2", + getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_PCREL20_S2)); + EXPECT_EQ("R_LARCH_CFA", + getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_CFA)); + EXPECT_EQ("R_LARCH_ADD6", + getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_ADD6)); + EXPECT_EQ("R_LARCH_SUB6", + getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_SUB6)); + EXPECT_EQ("R_LARCH_ADD_ULEB128", + getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_ADD_ULEB128)); + EXPECT_EQ("R_LARCH_SUB_ULEB128", + getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_SUB_ULEB128)); + EXPECT_EQ("R_LARCH_64_PCREL", + getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_64_PCREL)); } TEST(ELFTest, getELFRelativeRelocationType) { EXPECT_EQ(ELF::R_VE_RELATIVE, getELFRelativeRelocationType(EM_VE)); + EXPECT_EQ(ELF::R_LARCH_RELATIVE, getELFRelativeRelocationType(EM_LOONGARCH)); } // This is a test for the DataRegion helper struct, defined in ELF.h header. diff --git a/llvm/unittests/Target/LoongArch/CMakeLists.txt b/llvm/unittests/Target/LoongArch/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..04c0efdc927acf3d38be98c410f5474a481b1ec5 --- /dev/null +++ b/llvm/unittests/Target/LoongArch/CMakeLists.txt @@ -0,0 +1,23 @@ +include_directories( + ${LLVM_MAIN_SRC_DIR}/lib/Target/LoongArch + ${LLVM_BINARY_DIR}/lib/Target/LoongArch + ) + +set(LLVM_LINK_COMPONENTS + LoongArchCodeGen + LoongArchDesc + LoongArchInfo + CodeGen + Core + MC + MIRParser + SelectionDAG + Support + Target + ) + +add_llvm_target_unittest(LoongArchTests + InstSizes.cpp + ) + +set_property(TARGET LoongArchTests PROPERTY FOLDER "Tests/UnitTests/TargetTests") diff --git a/llvm/unittests/Target/LoongArch/InstSizes.cpp b/llvm/unittests/Target/LoongArch/InstSizes.cpp new file mode 100644 index 0000000000000000000000000000000000000000..a3adb926006976d607f10bebab9c9b0a976277a5 --- /dev/null +++ b/llvm/unittests/Target/LoongArch/InstSizes.cpp @@ -0,0 +1,141 @@ +#include "LoongArchSubtarget.h" +#include "LoongArchTargetMachine.h" +#include "llvm/CodeGen/MIRParser/MIRParser.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/MC/TargetRegistry.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/TargetSelect.h" +#include + +#include "gtest/gtest.h" + +using namespace llvm; + +namespace { +std::unique_ptr createTargetMachine() { + auto TT(Triple::normalize("loongarch64--")); + std::string CPU("generic-la64"); + std::string FS("+64bit"); + + LLVMInitializeLoongArchTargetInfo(); + LLVMInitializeLoongArchTarget(); + LLVMInitializeLoongArchTargetMC(); + + std::string Error; + const Target *TheTarget = TargetRegistry::lookupTarget(TT, Error); + + return std::unique_ptr( + static_cast(TheTarget->createTargetMachine( + TT, CPU, FS, TargetOptions(), None, None, CodeGenOpt::Default))); +} + +std::unique_ptr createInstrInfo(TargetMachine *TM) { + LoongArchSubtarget ST(TM->getTargetTriple(), std::string(TM->getTargetCPU()), + std::string(TM->getTargetCPU()), + std::string(TM->getTargetFeatureString()), "lp64d", + *TM); + return std::make_unique(ST); +} + +/// The \p InputIRSnippet is only needed for things that can't be expressed in +/// the \p InputMIRSnippet (global variables etc) +/// Inspired by AArch64 +void runChecks( + LLVMTargetMachine *TM, LoongArchInstrInfo *II, + const StringRef InputIRSnippet, const StringRef InputMIRSnippet, + std::function Checks) { + LLVMContext Context; + + auto MIRString = "--- |\n" + " declare void @sizes()\n" + + InputIRSnippet.str() + + "...\n" + "---\n" + "name: sizes\n" + "jumpTable:\n" + " kind: block-address\n" + " entries:\n" + " - id: 0\n" + " blocks: [ '%bb.0' ]\n" + "body: |\n" + " bb.0:\n" + + InputMIRSnippet.str(); + + std::unique_ptr MBuffer = MemoryBuffer::getMemBuffer(MIRString); + std::unique_ptr MParser = + createMIRParser(std::move(MBuffer), Context); + ASSERT_TRUE(MParser); + + std::unique_ptr M = MParser->parseIRModule(); + ASSERT_TRUE(M); + + M->setTargetTriple(TM->getTargetTriple().getTriple()); + M->setDataLayout(TM->createDataLayout()); + + MachineModuleInfo MMI(TM); + bool Res = MParser->parseMachineFunctions(*M, MMI); + ASSERT_FALSE(Res); + + auto F = M->getFunction("sizes"); + ASSERT_TRUE(F != nullptr); + auto &MF = MMI.getOrCreateMachineFunction(*F); + + Checks(*II, MF); +} + +} // anonymous namespace + +TEST(InstSizes, INLINEASM_BR) { + std::unique_ptr TM = createTargetMachine(); + std::unique_ptr II = createInstrInfo(TM.get()); + + runChecks(TM.get(), II.get(), "", + // clang-format off + " INLINEASM_BR &nop, 1 /* sideeffect attdialect */, 13 /* imm */, %jump-table.0\n", + // clang-format on + [](LoongArchInstrInfo &II, MachineFunction &MF) { + auto I = MF.begin()->begin(); + EXPECT_EQ(4u, II.getInstSizeInBytes(*I)); + }); +} + +TEST(InstSizes, SPACE) { + std::unique_ptr TM = createTargetMachine(); + std::unique_ptr II = createInstrInfo(TM.get()); + + runChecks(TM.get(), II.get(), "", " INLINEASM &\".space 1024\", 1\n", + [](LoongArchInstrInfo &II, MachineFunction &MF) { + auto I = MF.begin()->begin(); + EXPECT_EQ(1024u, II.getInstSizeInBytes(*I)); + }); +} + +TEST(InstSizes, AtomicPseudo) { + std::unique_ptr TM = createTargetMachine(); + std::unique_ptr II = createInstrInfo(TM.get()); + + runChecks( + TM.get(), II.get(), "", + // clang-format off + " dead early-clobber renamable $r10, dead early-clobber renamable $r11 = PseudoMaskedAtomicLoadAdd32 renamable $r7, renamable $r6, renamable $r8, 4\n" + " dead early-clobber renamable $r10, dead early-clobber renamable $r11 = PseudoAtomicLoadAdd32 renamable $r7, renamable $r6, renamable $r8\n" + " dead early-clobber renamable $r5, dead early-clobber renamable $r9, dead early-clobber renamable $r10 = PseudoMaskedAtomicLoadUMax32 renamable $r7, renamable $r6, renamable $r8, 4\n" + " early-clobber renamable $r9, dead early-clobber renamable $r10, dead early-clobber renamable $r11 = PseudoMaskedAtomicLoadMax32 killed renamable $r6, killed renamable $r5, killed renamable $r7, killed renamable $r8, 4\n" + " dead early-clobber renamable $r5, dead early-clobber renamable $r9 = PseudoCmpXchg32 renamable $r7, renamable $r4, renamable $r6\n" + " dead early-clobber renamable $r5, dead early-clobber renamable $r9 = PseudoMaskedCmpXchg32 killed renamable $r7, killed renamable $r4, killed renamable $r6, killed renamable $r8, 4\n", + // clang-format on + [](LoongArchInstrInfo &II, MachineFunction &MF) { + auto I = MF.begin()->begin(); + EXPECT_EQ(36u, II.getInstSizeInBytes(*I)); + ++I; + EXPECT_EQ(24u, II.getInstSizeInBytes(*I)); + ++I; + EXPECT_EQ(48u, II.getInstSizeInBytes(*I)); + ++I; + EXPECT_EQ(56u, II.getInstSizeInBytes(*I)); + ++I; + EXPECT_EQ(36u, II.getInstSizeInBytes(*I)); + ++I; + EXPECT_EQ(44u, II.getInstSizeInBytes(*I)); + }); +} diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/LoongArch/AsmParser/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/LoongArch/AsmParser/BUILD.gn new file mode 100644 index 0000000000000000000000000000000000000000..2d31092645916cfc07aa318ef0e1d19f53e5ba2f --- /dev/null +++ b/llvm/utils/gn/secondary/llvm/lib/Target/LoongArch/AsmParser/BUILD.gn @@ -0,0 +1,24 @@ +import("//llvm/utils/TableGen/tablegen.gni") + +tablegen("LoongArchGenAsmMatcher") { + visibility = [ + ":AsmParser", + "//llvm/lib/Target/LoongArch:LLVMLoongArchCodeGen", + ] + args = [ "-gen-asm-matcher" ] + td_file = "../LoongArch.td" +} + +static_library("AsmParser") { + output_name = "LLVMLoongArchAsmParser" + deps = [ + ":LoongArchGenAsmMatcher", + "//llvm/lib/MC", + "//llvm/lib/MC/MCParser", + "//llvm/lib/Support", + "//llvm/lib/Target/LoongArch/MCTargetDesc", + "//llvm/lib/Target/LoongArch/TargetInfo", + ] + include_dirs = [ ".." ] + sources = [ "LoongArchAsmParser.cpp" ] +} diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/LoongArch/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/LoongArch/BUILD.gn new file mode 100644 index 0000000000000000000000000000000000000000..cf7fe990e4615759d5889cfe0af8718eb698252d --- /dev/null +++ b/llvm/utils/gn/secondary/llvm/lib/Target/LoongArch/BUILD.gn @@ -0,0 +1,65 @@ +import("//llvm/utils/TableGen/tablegen.gni") + +tablegen("LoongArchGenDAGISel") { + visibility = [ ":LLVMLoongArchCodeGen" ] + args = [ "-gen-dag-isel" ] + td_file = "LoongArch.td" +} + +tablegen("LoongArchGenMCPseudoLowering") { + visibility = [ ":LLVMLoongArchCodeGen" ] + args = [ "-gen-pseudo-lowering" ] + td_file = "LoongArch.td" +} + + +static_library("LLVMLoongArchCodeGen") { + deps = [ + ":LoongArchGenDAGISel", + ":LoongArchGenMCPseudoLowering", + + # See https://reviews.llvm.org/D137532 + "AsmParser:LoongArchGenAsmMatcher", + "MCTargetDesc", + "TargetInfo", + "//llvm/include/llvm/Config:llvm-config", + "//llvm/lib/Analysis", + "//llvm/lib/CodeGen", + "//llvm/lib/CodeGen/AsmPrinter", + "//llvm/lib/CodeGen/SelectionDAG", + "//llvm/lib/IR", + "//llvm/lib/MC", + "//llvm/lib/Support", + "//llvm/lib/Target", + ] + include_dirs = [ "." ] + sources = [ + "LoongArchAsmPrinter.cpp", + "LoongArchExpandAtomicPseudoInsts.cpp", + "LoongArchExpandPseudoInsts.cpp", + "LoongArchFrameLowering.cpp", + "LoongArchInstrInfo.cpp", + "LoongArchISelDAGToDAG.cpp", + "LoongArchISelLowering.cpp", + "LoongArchMCInstLower.cpp", + "LoongArchRegisterInfo.cpp", + "LoongArchSubtarget.cpp", + "LoongArchTargetMachine.cpp", + ] +} + +# This is a bit different from most build files: Due to this group +# having the directory's name, "//llvm/lib/Target/LoongArch" will refer to this +# target, which pulls in the code in this directory *and all subdirectories*. +# For most other directories, "//llvm/lib/Foo" only pulls in the code directly +# in "llvm/lib/Foo". The forwarding targets in //llvm/lib/Target expect this +# different behavior. +group("LoongArch") { + deps = [ + ":LLVMLoongArchCodeGen", + "AsmParser", + "Disassembler", + "MCTargetDesc", + "TargetInfo", + ] +} diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/LoongArch/Disassembler/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/LoongArch/Disassembler/BUILD.gn new file mode 100644 index 0000000000000000000000000000000000000000..0eac793d40279f27b4a26d295de5b6abdcf7f1c9 --- /dev/null +++ b/llvm/utils/gn/secondary/llvm/lib/Target/LoongArch/Disassembler/BUILD.gn @@ -0,0 +1,20 @@ +import("//llvm/utils/TableGen/tablegen.gni") + +tablegen("LoongArchGenDisassemblerTables") { + visibility = [ ":Disassembler" ] + args = [ "-gen-disassembler" ] + td_file = "../LoongArch.td" +} + +static_library("Disassembler") { + output_name = "LLVMLoongArchDisassembler" + deps = [ + ":LoongArchGenDisassemblerTables", + "//llvm/lib/MC/MCDisassembler", + "//llvm/lib/Support", + "//llvm/lib/Target/LoongArch/MCTargetDesc", + "//llvm/lib/Target/LoongArch/TargetInfo", + ] + include_dirs = [ ".." ] + sources = [ "LoongArchDisassembler.cpp" ] +} diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/LoongArch/MCTargetDesc/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/LoongArch/MCTargetDesc/BUILD.gn new file mode 100644 index 0000000000000000000000000000000000000000..424502bccd9dfbcacb216c5b297ce329fe413252 --- /dev/null +++ b/llvm/utils/gn/secondary/llvm/lib/Target/LoongArch/MCTargetDesc/BUILD.gn @@ -0,0 +1,65 @@ +import("//llvm/utils/TableGen/tablegen.gni") + +tablegen("LoongArchGenAsmWriter") { + visibility = [ ":MCTargetDesc" ] + args = [ "-gen-asm-writer" ] + td_file = "../LoongArch.td" +} + +tablegen("LoongArchGenInstrInfo") { + visibility = [ ":MCTargetDesc" ] + args = [ "-gen-instr-info" ] + td_file = "../LoongArch.td" +} + +tablegen("LoongArchGenMCCodeEmitter") { + visibility = [ ":MCTargetDesc" ] + args = [ "-gen-emitter" ] + td_file = "../LoongArch.td" +} + +tablegen("LoongArchGenRegisterInfo") { + visibility = [ ":MCTargetDesc" ] + args = [ "-gen-register-info" ] + td_file = "../LoongArch.td" +} + +tablegen("LoongArchGenSubtargetInfo") { + visibility = [ ":MCTargetDesc" ] + args = [ "-gen-subtarget" ] + td_file = "../LoongArch.td" +} + +static_library("MCTargetDesc") { + output_name = "LLVMLoongArchDesc" + + # This should contain tablegen targets generating .inc files included + # by other targets. .inc files only used by .cpp files in this directory + # should be in deps instead. + public_deps = [ + ":LoongArchGenInstrInfo", + ":LoongArchGenRegisterInfo", + ":LoongArchGenSubtargetInfo", + ] + deps = [ + ":LoongArchGenAsmWriter", + ":LoongArchGenMCCodeEmitter", + "//llvm/lib/MC", + "//llvm/lib/Support", + "//llvm/lib/Target/LoongArch/TargetInfo", + ] + include_dirs = [ ".." ] + sources = [ + "LoongArchAsmBackend.cpp", + "LoongArchBaseInfo.cpp", + "LoongArchELFObjectWriter.cpp", + "LoongArchELFStreamer.cpp", + "LoongArchInstPrinter.cpp", + "LoongArchMCAsmInfo.cpp", + "LoongArchMCCodeEmitter.cpp", + "LoongArchMCExpr.cpp", + "LoongArchMCTargetDesc.cpp", + "LoongArchMatInt.cpp", + "LoongArchTargetStreamer.cpp", + ] +} diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/LoongArch/TargetInfo/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/LoongArch/TargetInfo/BUILD.gn new file mode 100644 index 0000000000000000000000000000000000000000..9512d414d89638b482a4757489143cabc467d33d --- /dev/null +++ b/llvm/utils/gn/secondary/llvm/lib/Target/LoongArch/TargetInfo/BUILD.gn @@ -0,0 +1,6 @@ +static_library("TargetInfo") { + output_name = "LLVMLoongArchInfo" + deps = [ "//llvm/lib/Support" ] + include_dirs = [ ".." ] + sources = [ "LoongArchTargetInfo.cpp" ] +} diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/targets.gni b/llvm/utils/gn/secondary/llvm/lib/Target/targets.gni index f5048f2824a91def10faddfb2b7cb992692bd539..14c5fdc6dd4bed04ef9915f7046abf4bb436d887 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Target/targets.gni +++ b/llvm/utils/gn/secondary/llvm/lib/Target/targets.gni @@ -16,6 +16,7 @@ llvm_all_targets = [ "BPF", "Hexagon", "Lanai", + "LoongArch", "Mips", "NVPTX", "PowerPC", @@ -52,6 +53,7 @@ llvm_build_AArch64 = false llvm_build_AMDGPU = false llvm_build_ARM = false llvm_build_BPF = false +llvm_build_LoongArch = false llvm_build_Mips = false llvm_build_PowerPC = false llvm_build_SystemZ = false @@ -70,6 +72,8 @@ foreach(target, llvm_targets_to_build) { llvm_build_ARM = true } else if (target == "BPF") { llvm_build_BPF = true + } else if (target == "LoongArch") { + llvm_build_LoongArch = true } else if (target == "Mips") { llvm_build_Mips = true } else if (target == "PowerPC") { diff --git a/llvm/utils/gn/secondary/llvm/unittests/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/BUILD.gn index 49e6e957ec1c9c1ded21635fd4b871d83b065e9d..f35e3e3b0d21275c879b6c60507529b21930dfa0 100644 --- a/llvm/utils/gn/secondary/llvm/unittests/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/unittests/BUILD.gn @@ -78,6 +78,9 @@ group("unittests") { "tools/llvm-exegesis/ARM:LLVMExegesisARMTests", ] } + if (llvm_build_LoongArch) { + deps += [ "Target/LoongArch:LoongArchTests" ] + } if (llvm_build_Mips) { deps += [ "tools/llvm-exegesis/Mips:LLVMExegesisMipsTests" ] } diff --git a/llvm/utils/gn/secondary/llvm/unittests/Target/LoongArch/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/Target/LoongArch/BUILD.gn new file mode 100644 index 0000000000000000000000000000000000000000..794b9f8b113dd6f79db7d9505e5f7d65b22431b8 --- /dev/null +++ b/llvm/utils/gn/secondary/llvm/unittests/Target/LoongArch/BUILD.gn @@ -0,0 +1,14 @@ +import("//third-party/unittest/unittest.gni") + +unittest("LoongArchTests") { + deps = [ + "//llvm/lib/CodeGen/MIRParser", + "//llvm/lib/Support", + "//llvm/lib/Target", + "//llvm/lib/Target/LoongArch:LLVMLoongArchCodeGen", + "//llvm/lib/Target/LoongArch/MCTargetDesc", + "//llvm/lib/Target/LoongArch/TargetInfo", + ] + include_dirs = [ "//llvm/lib/Target/LoongArch" ] + sources = [ "InstSizes.cpp" ] +} diff --git a/openmp/README.rst b/openmp/README.rst index ffa49e4d2a49d1b92bdf3122a009bb8e24838e35..eb4bb04cd3da77d0f72ef578ac70d920981e4da2 100644 --- a/openmp/README.rst +++ b/openmp/README.rst @@ -137,7 +137,7 @@ Options for all Libraries Options for ``libomp`` ---------------------- -**LIBOMP_ARCH** = ``aarch64|arm|i386|mic|mips|mips64|ppc64|ppc64le|x86_64|riscv64`` +**LIBOMP_ARCH** = ``aarch64|arm|i386|loongarch64|mic|mips|mips64|ppc64|ppc64le|x86_64|riscv64`` The default value for this option is chosen based on probing the compiler for architecture macros (e.g., is ``__x86_64__`` predefined by compiler?). @@ -194,7 +194,7 @@ Optional Features **LIBOMP_OMPT_SUPPORT** = ``ON|OFF`` Include support for the OpenMP Tools Interface (OMPT). This option is supported and ``ON`` by default for x86, x86_64, AArch64, - PPC64 and RISCV64 on Linux* and macOS*. + PPC64, RISCV64 and LoongArch64 on Linux* and macOS*. This option is ``OFF`` if this feature is not supported for the platform. **LIBOMP_OMPT_OPTIONAL** = ``ON|OFF`` diff --git a/openmp/runtime/CMakeLists.txt b/openmp/runtime/CMakeLists.txt index c9dbcb2ab22c0cf8dc1bbef416a0887c9a555e4c..ce65a07f185fef58a4daec92e26b3a8776db60f0 100644 --- a/openmp/runtime/CMakeLists.txt +++ b/openmp/runtime/CMakeLists.txt @@ -30,7 +30,7 @@ if(${OPENMP_STANDALONE_BUILD}) # If adding a new architecture, take a look at cmake/LibompGetArchitecture.cmake libomp_get_architecture(LIBOMP_DETECTED_ARCH) set(LIBOMP_ARCH ${LIBOMP_DETECTED_ARCH} CACHE STRING - "The architecture to build for (x86_64/i386/arm/ppc64/ppc64le/aarch64/mic/mips/mips64/riscv64).") + "The architecture to build for (x86_64/i386/arm/ppc64/ppc64le/aarch64/mic/mips/mips64/riscv64/loongarch64).") # Should assertions be enabled? They are on by default. set(LIBOMP_ENABLE_ASSERTIONS TRUE CACHE BOOL "enable assertions?") @@ -61,6 +61,8 @@ else() # Part of LLVM build set(LIBOMP_ARCH arm) elseif(LIBOMP_NATIVE_ARCH MATCHES "riscv64") set(LIBOMP_ARCH riscv64) + elseif(LIBOMP_NATIVE_ARCH MATCHES "loongarch64") + set(LIBOMP_ARCH loongarch64) else() # last ditch effort libomp_get_architecture(LIBOMP_ARCH) @@ -81,7 +83,7 @@ if(LIBOMP_ARCH STREQUAL "aarch64") endif() endif() -libomp_check_variable(LIBOMP_ARCH 32e x86_64 32 i386 arm ppc64 ppc64le aarch64 aarch64_a64fx mic mips mips64 riscv64) +libomp_check_variable(LIBOMP_ARCH 32e x86_64 32 i386 arm ppc64 ppc64le aarch64 aarch64_a64fx mic mips mips64 riscv64 loongarch64) set(LIBOMP_LIB_TYPE normal CACHE STRING "Performance,Profiling,Stubs library (normal/profile/stubs)") @@ -159,6 +161,7 @@ set(MIC FALSE) set(MIPS64 FALSE) set(MIPS FALSE) set(RISCV64 FALSE) +set(LOONGARCH64 FALSE) if("${LIBOMP_ARCH}" STREQUAL "i386" OR "${LIBOMP_ARCH}" STREQUAL "32") # IA-32 architecture set(IA32 TRUE) elseif("${LIBOMP_ARCH}" STREQUAL "x86_64" OR "${LIBOMP_ARCH}" STREQUAL "32e") # Intel(R) 64 architecture @@ -181,8 +184,10 @@ elseif("${LIBOMP_ARCH}" STREQUAL "mips") # MIPS architecture set(MIPS TRUE) elseif("${LIBOMP_ARCH}" STREQUAL "mips64") # MIPS64 architecture set(MIPS64 TRUE) - elseif("${LIBOMP_ARCH}" STREQUAL "riscv64") # RISCV64 architecture +elseif("${LIBOMP_ARCH}" STREQUAL "riscv64") # RISCV64 architecture set(RISCV64 TRUE) +elseif("${LIBOMP_ARCH}" STREQUAL "loongarch64") # LoongArch64 architecture + set(LOONGARCH64 TRUE) endif() # Set some flags based on build_type diff --git a/openmp/runtime/README.txt b/openmp/runtime/README.txt index 874a5634e93e24807fa87eb6bd4b42b1118e51a9..ddd8b0e4282d8acc051d94a0c85f5972839925cf 100644 --- a/openmp/runtime/README.txt +++ b/openmp/runtime/README.txt @@ -54,6 +54,7 @@ Architectures Supported * IBM(R) Power architecture (little endian) * MIPS and MIPS64 architecture * RISCV64 architecture +* LoongArch64 architecture Supported RTL Build Configurations ================================== diff --git a/openmp/runtime/cmake/LibompGetArchitecture.cmake b/openmp/runtime/cmake/LibompGetArchitecture.cmake index dd60a2d347b171f255f20367941afdfb16218c94..c164c04c6165dee4e1778b7a1b5b389bc1f0df60 100644 --- a/openmp/runtime/cmake/LibompGetArchitecture.cmake +++ b/openmp/runtime/cmake/LibompGetArchitecture.cmake @@ -47,6 +47,8 @@ function(libomp_get_architecture return_arch) #error ARCHITECTURE=mips #elif defined(__riscv) && __riscv_xlen == 64 #error ARCHITECTURE=riscv64 + #elif defined(__loongarch__) && __loongarch_grlen == 64 + #error ARCHITECTURE=loongarch64 #else #error ARCHITECTURE=UnknownArchitecture #endif diff --git a/openmp/runtime/cmake/LibompMicroTests.cmake b/openmp/runtime/cmake/LibompMicroTests.cmake index 1ca3412edc8e0051b22571be6f10e96f2d904e1f..88deb461dbaf3a2d3c6377e9a49e39f65e1adc3c 100644 --- a/openmp/runtime/cmake/LibompMicroTests.cmake +++ b/openmp/runtime/cmake/LibompMicroTests.cmake @@ -214,6 +214,9 @@ else() elseif(${RISCV64}) libomp_append(libomp_expected_library_deps libc.so.6) libomp_append(libomp_expected_library_deps ld.so.1) + elseif(${LOONGARCH64}) + libomp_append(libomp_expected_library_deps libc.so.6) + libomp_append(libomp_expected_library_deps ld.so.1) endif() libomp_append(libomp_expected_library_deps libpthread.so.0 IF_FALSE STUBS_LIBRARY) libomp_append(libomp_expected_library_deps libhwloc.so.5 LIBOMP_USE_HWLOC) diff --git a/openmp/runtime/cmake/LibompUtils.cmake b/openmp/runtime/cmake/LibompUtils.cmake index b1de242372b88beb10d88ef6837cb646b833e80f..b5ffc97fca3d217a63091a552154f93215940db1 100644 --- a/openmp/runtime/cmake/LibompUtils.cmake +++ b/openmp/runtime/cmake/LibompUtils.cmake @@ -109,6 +109,8 @@ function(libomp_get_legal_arch return_arch_string) set(${return_arch_string} "MIPS64" PARENT_SCOPE) elseif(${RISCV64}) set(${return_arch_string} "RISCV64" PARENT_SCOPE) + elseif(${LOONGARCH64}) + set(${return_arch_string} "LOONGARCH64" PARENT_SCOPE) else() set(${return_arch_string} "${LIBOMP_ARCH}" PARENT_SCOPE) libomp_warning_say("libomp_get_legal_arch(): Warning: Unknown architecture: Using ${LIBOMP_ARCH}") diff --git a/openmp/runtime/cmake/config-ix.cmake b/openmp/runtime/cmake/config-ix.cmake index 775c58f98484118e62181e7ff1b8c8ef584ccaea..356366e4f9e797b76d4c628df77481ebef6c3810 100644 --- a/openmp/runtime/cmake/config-ix.cmake +++ b/openmp/runtime/cmake/config-ix.cmake @@ -321,7 +321,8 @@ else() (LIBOMP_ARCH STREQUAL aarch64_a64fx) OR (LIBOMP_ARCH STREQUAL ppc64le) OR (LIBOMP_ARCH STREQUAL ppc64) OR - (LIBOMP_ARCH STREQUAL riscv64)) + (LIBOMP_ARCH STREQUAL riscv64) OR + (LIBOMP_ARCH STREQUAL loongarch64)) AND # OS supported? ((WIN32 AND LIBOMP_HAVE_PSAPI) OR APPLE OR (NOT WIN32 AND LIBOMP_HAVE_WEAK_ATTRIBUTE))) set(LIBOMP_HAVE_OMPT_SUPPORT TRUE) diff --git a/openmp/runtime/src/kmp_affinity.cpp b/openmp/runtime/src/kmp_affinity.cpp index 817b7ae881005ae45cb5714771f7345fbcbd05e4..26839929dd6ab24eda0205b58d081de1df697f71 100644 --- a/openmp/runtime/src/kmp_affinity.cpp +++ b/openmp/runtime/src/kmp_affinity.cpp @@ -2931,6 +2931,17 @@ static bool __kmp_affinity_create_cpuinfo_map(int *line, } (*line)++; +#if KMP_ARCH_LOONGARCH64 + // The parsing logic of /proc/cpuinfo in this function highly depends on + // the blank lines between each processor info block. But on LoongArch a + // blank line exists before the first processor info block (i.e. after the + // "system type" line). This blank line was added because the "system + // type" line is unrelated to any of the CPUs. We must skip this line so + // that the original logic works on LoongArch. + if (*buf == '\n' && *line == 2) + continue; +#endif + char s1[] = "processor"; if (strncmp(buf, s1, sizeof(s1) - 1) == 0) { CHECK_LINE; diff --git a/openmp/runtime/src/kmp_affinity.h b/openmp/runtime/src/kmp_affinity.h index ce00362f04cabbd28a10afdccbf5f6a7691ed8d0..06fd323a52a39c6370a9db633905320a277524bc 100644 --- a/openmp/runtime/src/kmp_affinity.h +++ b/openmp/runtime/src/kmp_affinity.h @@ -254,6 +254,18 @@ public: #elif __NR_sched_getaffinity != 5196 #error Wrong code for getaffinity system call. #endif /* __NR_sched_getaffinity */ +#elif KMP_ARCH_LOONGARCH64 +#ifndef __NR_sched_setaffinity +#define __NR_sched_setaffinity 122 +#elif __NR_sched_setaffinity != 122 +#error Wrong code for setaffinity system call. +#endif /* __NR_sched_setaffinity */ +#ifndef __NR_sched_getaffinity +#define __NR_sched_getaffinity 123 +#elif __NR_sched_getaffinity != 123 +#error Wrong code for getaffinity system call. +#endif /* __NR_sched_getaffinity */ +#else #error Unknown or unsupported architecture #endif /* KMP_ARCH_* */ #elif KMP_OS_FREEBSD diff --git a/openmp/runtime/src/kmp_csupport.cpp b/openmp/runtime/src/kmp_csupport.cpp index c932d450c84e9c9c90fcb040b9da2eb102e07de0..ba25d2e169bb216a203bf3bc3f84ab0e5d6ae4d7 100644 --- a/openmp/runtime/src/kmp_csupport.cpp +++ b/openmp/runtime/src/kmp_csupport.cpp @@ -700,7 +700,7 @@ void __kmpc_flush(ident_t *loc) { } #endif // KMP_MIC #elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64 || \ - KMP_ARCH_RISCV64) + KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64) // Nothing to see here move along #elif KMP_ARCH_PPC64 // Nothing needed here (we have a real MB above). diff --git a/openmp/runtime/src/kmp_os.h b/openmp/runtime/src/kmp_os.h index 02efaa1b2613fb6f30b00c5a45015d611c880163..f1d24962acb921f644b89b6fcf0f4f7aafe3436c 100644 --- a/openmp/runtime/src/kmp_os.h +++ b/openmp/runtime/src/kmp_os.h @@ -178,7 +178,7 @@ typedef unsigned long long kmp_uint64; #if KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_MIPS #define KMP_SIZE_T_SPEC KMP_UINT32_SPEC #elif KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || \ - KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 + KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 #define KMP_SIZE_T_SPEC KMP_UINT64_SPEC #else #error "Can't determine size_t printf format specifier." @@ -1044,7 +1044,7 @@ extern kmp_real64 __kmp_xchg_real64(volatile kmp_real64 *p, kmp_real64 v); #endif /* KMP_OS_WINDOWS */ #if KMP_ARCH_PPC64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || \ - KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 + KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 #if KMP_OS_WINDOWS #undef KMP_MB #define KMP_MB() std::atomic_thread_fence(std::memory_order_seq_cst) diff --git a/openmp/runtime/src/kmp_platform.h b/openmp/runtime/src/kmp_platform.h index bbbd72dd6951fb2bc7bbe22268610f6bfdfaefd0..830d00d7e0ddff4fd2a90d5a421162feab773629 100644 --- a/openmp/runtime/src/kmp_platform.h +++ b/openmp/runtime/src/kmp_platform.h @@ -92,6 +92,7 @@ #define KMP_ARCH_MIPS 0 #define KMP_ARCH_MIPS64 0 #define KMP_ARCH_RISCV64 0 +#define KMP_ARCH_LOONGARCH64 0 #if KMP_OS_WINDOWS #if defined(_M_AMD64) || defined(__x86_64) @@ -135,6 +136,9 @@ #elif defined __riscv && __riscv_xlen == 64 #undef KMP_ARCH_RISCV64 #define KMP_ARCH_RISCV64 1 +#elif defined __loongarch__ && __loongarch_grlen == 64 +#undef KMP_ARCH_LOONGARCH64 +#define KMP_ARCH_LOONGARCH64 1 #endif #endif @@ -199,7 +203,7 @@ // TODO: Fixme - This is clever, but really fugly #if (1 != KMP_ARCH_X86 + KMP_ARCH_X86_64 + KMP_ARCH_ARM + KMP_ARCH_PPC64 + \ KMP_ARCH_AARCH64 + KMP_ARCH_MIPS + KMP_ARCH_MIPS64 + \ - KMP_ARCH_RISCV64) + KMP_ARCH_RISCV64 + KMP_ARCH_LOONGARCH64) #error Unknown or unsupported architecture #endif diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp index b8d470528798db8fd2a55520b5ce237dae10fd4b..3895ece65adc66b89c6abb2d16cf66e6af92b5c1 100644 --- a/openmp/runtime/src/kmp_runtime.cpp +++ b/openmp/runtime/src/kmp_runtime.cpp @@ -8751,7 +8751,7 @@ __kmp_determine_reduction_method( int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED; #if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || \ - KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 + KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 #if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \ KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN || KMP_OS_HURD diff --git a/openmp/runtime/src/z_Linux_asm.S b/openmp/runtime/src/z_Linux_asm.S index b4a45c1ac6f5b833904a490d06a2c00ca67bbb3f..b3767c9490bfaa5ee85fd322551b140a295f8a73 100644 --- a/openmp/runtime/src/z_Linux_asm.S +++ b/openmp/runtime/src/z_Linux_asm.S @@ -1725,6 +1725,164 @@ __kmp_invoke_microtask: #endif /* KMP_ARCH_RISCV64 */ +#if KMP_ARCH_LOONGARCH64 + +//------------------------------------------------------------------------ +// +// typedef void (*microtask_t)(int *gtid, int *tid, ...); +// +// int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int tid, int argc, +// void *p_argv[] +// #if OMPT_SUPPORT +// , +// void **exit_frame_ptr +// #endif +// ) { +// #if OMPT_SUPPORT +// *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0); +// #endif +// +// (*pkfn)(>id, &tid, argv[0], ...); +// +// return 1; +// } +// +// Parameters: +// a0: pkfn +// a1: gtid +// a2: tid +// a3: argc +// a4: p_argv +// a5: exit_frame_ptr +// +// Locals: +// __gtid: gtid param pushed on stack so can pass >id to pkfn +// __tid: tid param pushed on stack so can pass &tid to pkfn +// +// Temp registers: +// +// t0: used to calculate the dynamic stack size / used to hold pkfn address +// t1: used as temporary for stack placement calculation +// t2: used as temporary for stack arguments +// t3: used as temporary for number of remaining pkfn parms +// t4: used to traverse p_argv array +// +// return: a0 (always 1/TRUE) +// + +// -- Begin __kmp_invoke_microtask +// mark_begin; + .text + .globl __kmp_invoke_microtask + .p2align 2 + .type __kmp_invoke_microtask,@function +__kmp_invoke_microtask: + .cfi_startproc + + // First, save ra and fp + addi.d $sp, $sp, -16 + st.d $ra, $sp, 8 + st.d $fp, $sp, 0 + addi.d $fp, $sp, 16 + .cfi_def_cfa 22, 0 + .cfi_offset 1, -8 + .cfi_offset 22, -16 + + // Compute the dynamic stack size: + // + // - We need 8 bytes for storing 'gtid' and 'tid', so we can pass them by + // reference + // - We need 8 bytes for each argument that cannot be passed to the 'pkfn' + // function by register. Given that we have 8 of such registers (a[0-7]) + // and two + 'argc' arguments (consider >id and &tid), we need to + // reserve max(0, argc - 6)*8 extra bytes + // + // The total number of bytes is then max(0, argc - 6)*8 + 8 + + addi.d $t0, $a3, -6 + slt $t1, $t0, $zero + masknez $t0, $t0, $t1 + addi.d $t0, $t0, 1 + slli.d $t0, $t0, 3 + sub.d $sp, $sp, $t0 + + // Align the stack to 16 bytes + bstrins.d $sp, $zero, 3, 0 + + move $t0, $a0 + move $t3, $a3 + move $t4, $a4 + +#if OMPT_SUPPORT + // Save frame pointer into exit_frame + st.d $fp, $a5, 0 +#endif + + // Prepare arguments for the pkfn function (first 8 using a0-a7 registers) + + st.w $a1, $fp, -20 + st.w $a2, $fp, -24 + + addi.d $a0, $fp, -20 + addi.d $a1, $fp, -24 + + beqz $t3, .L_kmp_3 + ld.d $a2, $t4, 0 + + addi.d $t3, $t3, -1 + beqz $t3, .L_kmp_3 + ld.d $a3, $t4, 8 + + addi.d $t3, $t3, -1 + beqz $t3, .L_kmp_3 + ld.d $a4, $t4, 16 + + addi.d $t3, $t3, -1 + beqz $t3, .L_kmp_3 + ld.d $a5, $t4, 24 + + addi.d $t3, $t3, -1 + beqz $t3, .L_kmp_3 + ld.d $a6, $t4, 32 + + addi.d $t3, $t3, -1 + beqz $t3, .L_kmp_3 + ld.d $a7, $t4, 40 + + // Prepare any additional argument passed through the stack + addi.d $t4, $t4, 48 + move $t1, $sp + b .L_kmp_2 +.L_kmp_1: + ld.d $t2, $t4, 0 + st.d $t2, $t1, 0 + addi.d $t4, $t4, 8 + addi.d $t1, $t1, 8 +.L_kmp_2: + addi.d $t3, $t3, -1 + bnez $t3, .L_kmp_1 + +.L_kmp_3: + // Call pkfn function + jirl $ra, $t0, 0 + + // Restore stack and return + + addi.d $a0, $zero, 1 + + addi.d $sp, $fp, -16 + ld.d $fp, $sp, 0 + ld.d $ra, $sp, 8 + addi.d $sp, $sp, 16 + jr $ra +.Lfunc_end0: + .size __kmp_invoke_microtask, .Lfunc_end0-__kmp_invoke_microtask + .cfi_endproc + +// -- End __kmp_invoke_microtask + +#endif /* KMP_ARCH_LOONGARCH64 */ + #if KMP_ARCH_ARM || KMP_ARCH_MIPS .data .comm .gomp_critical_user_,32,8 @@ -1736,7 +1894,7 @@ __kmp_unnamed_critical_addr: .size __kmp_unnamed_critical_addr,4 #endif /* KMP_ARCH_ARM */ -#if KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 +#if KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 #ifndef KMP_PREFIX_UNDERSCORE # define KMP_PREFIX_UNDERSCORE(x) x #endif @@ -1751,7 +1909,7 @@ KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr): .size KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr),8 #endif #endif /* KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 || - KMP_ARCH_RISCV64 */ + KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 */ #if KMP_OS_LINUX # if KMP_ARCH_ARM diff --git a/openmp/runtime/src/z_Linux_util.cpp b/openmp/runtime/src/z_Linux_util.cpp index 91edf0254a772d42cc41357f0e6d7db0b091ace4..2a0154ee8a6675fd6ea225ed621ae6700895fe52 100644 --- a/openmp/runtime/src/z_Linux_util.cpp +++ b/openmp/runtime/src/z_Linux_util.cpp @@ -2447,7 +2447,7 @@ finish: // Clean up and exit. #if !(KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_MIC || \ ((KMP_OS_LINUX || KMP_OS_DARWIN) && KMP_ARCH_AARCH64) || \ - KMP_ARCH_PPC64 || KMP_ARCH_RISCV64) + KMP_ARCH_PPC64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64) // we really only need the case with 1 argument, because CLANG always build // a struct of pointers to shared variables referenced in the outlined function diff --git a/openmp/runtime/test/ompt/callback.h b/openmp/runtime/test/ompt/callback.h index 8d3ec164b7ee14043a1282c582b0cb0b3aa53b5c..dc228e449725f9db7f57e072d54c09c7bba58988 100644 --- a/openmp/runtime/test/ompt/callback.h +++ b/openmp/runtime/test/ompt/callback.h @@ -207,6 +207,15 @@ ompt_label_##id: printf("%" PRIu64 ": current_address=%p or %p\n", \ ompt_get_thread_data()->value, ((char *)addr) - 8, ((char *)addr) - 12) #endif +#elif KMP_ARCH_LOONGARCH64 +// On LoongArch64 the NOP instruction is 4 bytes long, can be followed by +// inserted jump instruction (another 4 bytes long). And an additional jump +// instruction may appear (adding 4 more bytes) when the NOP is referenced +// elsewhere (ie. another branch). +#define print_possible_return_addresses(addr) \ + printf("%" PRIu64 ": current_address=%p or %p or %p\n", \ + ompt_get_thread_data()->value, ((char *)addr) - 4, \ + ((char *)addr) - 8, ((char *)addr) - 12) #else #error Unsupported target architecture, cannot determine address offset! #endif diff --git a/openmp/runtime/test/tasking/hidden_helper_task/common.h b/openmp/runtime/test/tasking/hidden_helper_task/common.h index 3f9a77d8d23f43036003c92fcb2d9ff4462a63f9..402ecf3ed553c93f794d0792ac53c8ceef19a7cf 100644 --- a/openmp/runtime/test/tasking/hidden_helper_task/common.h +++ b/openmp/runtime/test/tasking/hidden_helper_task/common.h @@ -1,4 +1,5 @@ #include +#include #include #include diff --git a/openmp/runtime/tools/lib/Platform.pm b/openmp/runtime/tools/lib/Platform.pm index 38593a154d03f5c867aa5285788aecb37f6eaa45..d62d450e9e5dcf565ebf24a858fc39e7a194f798 100644 --- a/openmp/runtime/tools/lib/Platform.pm +++ b/openmp/runtime/tools/lib/Platform.pm @@ -63,6 +63,8 @@ sub canon_arch($) { $arch = "mips"; } elsif ( $arch =~ m{\Ariscv64} ) { $arch = "riscv64"; + } elsif ( $arch =~ m{\Aloongarch64} ) { + $arch = "loongarch64"; } else { $arch = undef; }; # if @@ -93,6 +95,7 @@ sub canon_mic_arch($) { "32e" => "Intel(R) 64", "arm" => "ARM", "aarch64" => "AArch64", + "loongarch64" => "LoongArch64", "mic" => "Intel(R) Many Integrated Core Architecture", "mips" => "MIPS", "mips64" => "MIPS64", @@ -225,6 +228,8 @@ sub target_options() { $_host_arch = "mips"; } elsif ( $hardware_platform eq "riscv64" ) { $_host_arch = "riscv64"; + } elsif ( $hardware_platform eq "loongarch64" ) { + $_host_arch = "loongarch64"; } else { die "Unsupported host hardware platform: \"$hardware_platform\"; stopped"; }; # if @@ -414,7 +419,7 @@ the script assumes host architecture is target one. Input string is an architecture name to canonize. The function recognizes many variants, for example: C<32e>, C, C, etc. Returned string is a canonized architecture name, -one of: C<32>, C<32e>, C<64>, C, C, C, C, C, C, C or C is input string is not recognized. +one of: C<32>, C<32e>, C<64>, C, C, C, C, C, C, C, C or C is input string is not recognized. =item B diff --git a/openmp/runtime/tools/lib/Uname.pm b/openmp/runtime/tools/lib/Uname.pm index 99fe1cdbf00cc692fd1504107c85e1fe64a22458..8a976addcff03e0dc41e037440dd48b8565f0964 100644 --- a/openmp/runtime/tools/lib/Uname.pm +++ b/openmp/runtime/tools/lib/Uname.pm @@ -158,6 +158,8 @@ if ( 0 ) { $values{ hardware_platform } = "mips"; } elsif ( $values{ machine } =~ m{\Ariscv64\z} ) { $values{ hardware_platform } = "riscv64"; + } elsif ( $values{ machine } =~ m{\Aloongarch64\z} ) { + $values{ hardware_platform } = "loongarch64"; } else { die "Unsupported machine (\"$values{ machine }\") returned by POSIX::uname(); stopped"; }; # if diff --git a/utils/bazel/configure.bzl b/utils/bazel/configure.bzl index 6fca2060491fc718cd2290defd9b3091eed0a571..0a731a897ee85ff9a2ed5711416c907d58833d22 100644 --- a/utils/bazel/configure.bzl +++ b/utils/bazel/configure.bzl @@ -19,6 +19,7 @@ DEFAULT_TARGETS = [ "BPF", "Hexagon", "Lanai", + "LoongArch", "Mips", "MSP430", "NVPTX", diff --git a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel index 2a1efb2a0840ef50b229db5dbcf315148c016c99..9c470513ac0f86a298935807f2d96876bef92b1d 100644 --- a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel @@ -591,6 +591,10 @@ llvm_target_intrinsics_list = [ "name": "Hexagon", "intrinsic_prefix": "hexagon", }, + { + "name": "LoongArch", + "intrinsic_prefix": "loongarch", + }, { "name": "Mips", "intrinsic_prefix": "mips", @@ -1628,6 +1632,21 @@ llvm_target_lib_list = [lib for lib in [ ("-gen-subtarget", "lib/Target/Lanai/LanaiGenSubtargetInfo.inc"), ], }, + { + "name": "LoongArch", + "short_name": "LoongArch", + "tbl_outs": [ + ("-gen-asm-matcher", "lib/Target/LoongArch/LoongArchGenAsmMatcher.inc"), + ("-gen-asm-writer", "lib/Target/LoongArch/LoongArchGenAsmWriter.inc"), + ("-gen-dag-isel", "lib/Target/LoongArch/LoongArchGenDAGISel.inc"), + ("-gen-disassembler", "lib/Target/LoongArch/LoongArchGenDisassemblerTables.inc"), + ("-gen-emitter", "lib/Target/LoongArch/LoongArchGenMCCodeEmitter.inc"), + ("-gen-instr-info", "lib/Target/LoongArch/LoongArchGenInstrInfo.inc"), + ("-gen-pseudo-lowering", "lib/Target/LoongArch/LoongArchGenMCPseudoLowering.inc"), + ("-gen-register-info", "lib/Target/LoongArch/LoongArchGenRegisterInfo.inc"), + ("-gen-subtarget", "lib/Target/LoongArch/LoongArchGenSubtargetInfo.inc"), + ], + }, { "name": "Mips", "short_name": "Mips",