diff --git a/clang/include/clang/Basic/BuiltinsLoongArchBase.def b/clang/include/clang/Basic/BuiltinsLoongArchBase.def index cbb239223aae3b22e8ef15ee4627c60825aeea39..a5a07c167908ce4b4fd2f49536507225e5b31e0b 100644 --- a/clang/include/clang/Basic/BuiltinsLoongArchBase.def +++ b/clang/include/clang/Basic/BuiltinsLoongArchBase.def @@ -51,3 +51,8 @@ TARGET_BUILTIN(__builtin_loongarch_iocsrwr_d, "vUWiUi", "nc", "64bit") TARGET_BUILTIN(__builtin_loongarch_lddir_d, "WiWiIUWi", "nc", "64bit") TARGET_BUILTIN(__builtin_loongarch_ldpte_d, "vWiIUWi", "nc", "64bit") + +TARGET_BUILTIN(__builtin_loongarch_frecipe_s, "ff", "nc", "f,frecipe") +TARGET_BUILTIN(__builtin_loongarch_frecipe_d, "dd", "nc", "d,frecipe") +TARGET_BUILTIN(__builtin_loongarch_frsqrte_s, "ff", "nc", "f,frecipe") +TARGET_BUILTIN(__builtin_loongarch_frsqrte_d, "dd", "nc", "d,frecipe") diff --git a/clang/include/clang/Basic/BuiltinsLoongArchLASX.def b/clang/include/clang/Basic/BuiltinsLoongArchLASX.def index 3de200f665b680afdebc08a57c79a844a0783998..4cf51cc000f6fcbf5f20f11627bceb1110b1c8bb 100644 --- a/clang/include/clang/Basic/BuiltinsLoongArchLASX.def +++ b/clang/include/clang/Basic/BuiltinsLoongArchLASX.def @@ -657,9 +657,15 @@ TARGET_BUILTIN(__builtin_lasx_xvfsqrt_d, "V4dV4d", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvfrecip_s, "V8fV8f", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvfrecip_d, "V4dV4d", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfrecipe_s, "V8fV8f", "nc", "lasx,frecipe") +TARGET_BUILTIN(__builtin_lasx_xvfrecipe_d, "V4dV4d", "nc", "lasx,frecipe") + TARGET_BUILTIN(__builtin_lasx_xvfrsqrt_s, "V8fV8f", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvfrsqrt_d, "V4dV4d", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfrsqrte_s, "V8fV8f", "nc", "lasx,frecipe") +TARGET_BUILTIN(__builtin_lasx_xvfrsqrte_d, "V4dV4d", "nc", "lasx,frecipe") + TARGET_BUILTIN(__builtin_lasx_xvfcvtl_s_h, "V8fV16s", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvfcvth_s_h, "V8fV16s", "nc", "lasx") TARGET_BUILTIN(__builtin_lasx_xvfcvtl_d_s, "V4dV8f", "nc", "lasx") diff --git a/clang/include/clang/Basic/BuiltinsLoongArchLSX.def b/clang/include/clang/Basic/BuiltinsLoongArchLSX.def index 8e6aec886c50cd912d5993809cd9bf26b1f92da6..c90f4dc5458fa6ed73ac1de2e4eb43c768cf5f4e 100644 --- a/clang/include/clang/Basic/BuiltinsLoongArchLSX.def +++ b/clang/include/clang/Basic/BuiltinsLoongArchLSX.def @@ -641,9 +641,15 @@ TARGET_BUILTIN(__builtin_lsx_vfsqrt_d, "V2dV2d", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vfrecip_s, "V4fV4f", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vfrecip_d, "V2dV2d", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfrecipe_s, "V4fV4f", "nc", "lsx,frecipe") +TARGET_BUILTIN(__builtin_lsx_vfrecipe_d, "V2dV2d", "nc", "lsx,frecipe") + TARGET_BUILTIN(__builtin_lsx_vfrsqrt_s, "V4fV4f", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vfrsqrt_d, "V2dV2d", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfrsqrte_s, "V4fV4f", "nc", "lsx,frecipe") +TARGET_BUILTIN(__builtin_lsx_vfrsqrte_d, "V2dV2d", "nc", "lsx,frecipe") + TARGET_BUILTIN(__builtin_lsx_vfcvtl_s_h, "V4fV8s", "nc", "lsx") TARGET_BUILTIN(__builtin_lsx_vfcvtl_d_s, "V2dV4f", "nc", "lsx") diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td index 6b68bc458b939a1806f918f07ccb4df7b9c6fab3..060f96118364d5cee84c08bbf9f98e53fca149bc 100644 --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -757,6 +757,8 @@ def err_drv_loongarch_wrong_fpu_width_for_lasx : Error< "wrong fpu width; LASX depends on 64-bit FPU.">; def err_drv_loongarch_invalid_simd_option_combination : Error< "invalid option combination; LASX depends on LSX.">; +def err_drv_loongarch_invalid_msimd_EQ : Error< + "invalid argument '%0' to -msimd=; must be one of: none, lsx, lasx">; def err_drv_expand_response_file : Error< "failed to expand response file: %0">; diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 344c8bd49da77736b86d9d14a380d585412dfdfd..530bb53ea9b5e6f49768721af5e743e21b152a14 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -4236,6 +4236,9 @@ def mlasx : Flag<["-"], "mlasx">, Group, HelpText<"Enable Loongson Advanced SIMD Extension (LASX).">; def mno_lasx : Flag<["-"], "mno-lasx">, Group, HelpText<"Disable Loongson Advanced SIMD Extension (LASX).">; +def msimd_EQ : Joined<["-"], "msimd=">, Group, + Flags<[TargetSpecific]>, + HelpText<"Select the SIMD extension(s) to be enabled in LoongArch either 'none', 'lsx', 'lasx'.">; def mnop_mcount : Flag<["-"], "mnop-mcount">, HelpText<"Generate mcount/__fentry__ calls as nops. To activate they need to be patched in.">, Flags<[CC1Option]>, Group, MarshallingInfoFlag>; diff --git a/clang/lib/Basic/Targets/LoongArch.cpp b/clang/lib/Basic/Targets/LoongArch.cpp index 88537989a05129f553b89ac2e74a444e9472bea0..5fede3d7cdc4ff424818a33504916df0024d544e 100644 --- a/clang/lib/Basic/Targets/LoongArch.cpp +++ b/clang/lib/Basic/Targets/LoongArch.cpp @@ -200,7 +200,24 @@ void LoongArchTargetInfo::getTargetDefines(const LangOptions &Opts, // Define __loongarch_arch. StringRef ArchName = getCPU(); - Builder.defineMacro("__loongarch_arch", Twine('"') + ArchName + Twine('"')); + if (ArchName == "loongarch64") { + if (HasFeatureLSX) { + // TODO: As more features of the V1.1 ISA are supported, a unified "v1.1" + // arch feature set will be used to include all sub-features belonging to + // the V1.1 ISA version. + if (HasFeatureFrecipe) + Builder.defineMacro("__loongarch_arch", + Twine('"') + "la64v1.1" + Twine('"')); + else + Builder.defineMacro("__loongarch_arch", + Twine('"') + "la64v1.0" + Twine('"')); + } else { + Builder.defineMacro("__loongarch_arch", + Twine('"') + ArchName + Twine('"')); + } + } else { + Builder.defineMacro("__loongarch_arch", Twine('"') + ArchName + Twine('"')); + } // Define __loongarch_tune. StringRef TuneCPU = getTargetOpts().TuneCPU; @@ -208,10 +225,16 @@ void LoongArchTargetInfo::getTargetDefines(const LangOptions &Opts, TuneCPU = ArchName; Builder.defineMacro("__loongarch_tune", Twine('"') + TuneCPU + Twine('"')); - if (HasFeatureLSX) + if (HasFeatureLASX) { + Builder.defineMacro("__loongarch_simd_width", "256"); Builder.defineMacro("__loongarch_sx", Twine(1)); - if (HasFeatureLASX) Builder.defineMacro("__loongarch_asx", Twine(1)); + } else if (HasFeatureLSX) { + Builder.defineMacro("__loongarch_simd_width", "128"); + Builder.defineMacro("__loongarch_sx", Twine(1)); + } + if (HasFeatureFrecipe) + Builder.defineMacro("__loongarch_frecipe", Twine(1)); StringRef ABI = getABI(); if (ABI == "lp64d" || ABI == "lp64f" || ABI == "lp64s") @@ -285,6 +308,8 @@ bool LoongArchTargetInfo::handleTargetFeatures( HasFeatureLSX = true; else if (Feature == "+lasx") HasFeatureLASX = true; + else if (Feature == "+frecipe") + HasFeatureFrecipe = true; } return true; } diff --git a/clang/lib/Basic/Targets/LoongArch.h b/clang/lib/Basic/Targets/LoongArch.h index 3313102492cb8dc41b484c650630d5f451992c8c..4d2965f5b3a3b32c90b4771e5585cbda1181c5d2 100644 --- a/clang/lib/Basic/Targets/LoongArch.h +++ b/clang/lib/Basic/Targets/LoongArch.h @@ -29,6 +29,7 @@ protected: bool HasFeatureF; bool HasFeatureLSX; bool HasFeatureLASX; + bool HasFeatureFrecipe; public: LoongArchTargetInfo(const llvm::Triple &Triple, const TargetOptions &) @@ -37,6 +38,7 @@ public: HasFeatureF = false; HasFeatureLSX = false; HasFeatureLASX = false; + HasFeatureFrecipe = false; LongDoubleWidth = 128; LongDoubleAlign = 128; LongDoubleFormat = &llvm::APFloat::IEEEquad(); diff --git a/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp b/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp index 31153a67ad284033d7a63282d093a849d8ec176a..21106c425206d8776bc232880374a30724ccab90 100644 --- a/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp +++ b/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp @@ -127,6 +127,11 @@ void loongarch::getLoongArchTargetFeatures(const Driver &D, const llvm::Triple &Triple, const ArgList &Args, std::vector &Features) { + // Enable the `lsx` feature on 64-bit LoongArch by default. + if (Triple.isLoongArch64() && + (!Args.hasArgNoClaim(clang::driver::options::OPT_march_EQ))) + Features.push_back("+lsx"); + std::string ArchName; if (const Arg *A = Args.getLastArg(options::OPT_march_EQ)) ArchName = A->getValue(); @@ -145,9 +150,11 @@ void loongarch::getLoongArchTargetFeatures(const Driver &D, } else if (A->getOption().matches(options::OPT_msingle_float)) { Features.push_back("+f"); Features.push_back("-d"); + Features.push_back("-lsx"); } else /*Soft-float*/ { Features.push_back("-f"); Features.push_back("-d"); + Features.push_back("-lsx"); } } else if (const Arg *A = Args.getLastArg(options::OPT_mfpu_EQ)) { StringRef FPU = A->getValue(); @@ -157,9 +164,11 @@ void loongarch::getLoongArchTargetFeatures(const Driver &D, } else if (FPU == "32") { Features.push_back("+f"); Features.push_back("-d"); + Features.push_back("-lsx"); } else if (FPU == "0" || FPU == "none") { Features.push_back("-f"); Features.push_back("-d"); + Features.push_back("-lsx"); } else { D.Diag(diag::err_drv_loongarch_invalid_mfpu_EQ) << FPU; } @@ -175,6 +184,42 @@ void loongarch::getLoongArchTargetFeatures(const Driver &D, A->ignoreTargetSpecific(); if (Arg *A = Args.getLastArgNoClaim(options::OPT_mfpu_EQ)) A->ignoreTargetSpecific(); + if (Arg *A = Args.getLastArgNoClaim(options::OPT_msimd_EQ)) + A->ignoreTargetSpecific(); + + // Select lsx/lasx feature determined by -msimd=. + // Option -msimd= precedes -m[no-]lsx and -m[no-]lasx. + if (const Arg *A = Args.getLastArg(options::OPT_msimd_EQ)) { + StringRef MSIMD = A->getValue(); + if (MSIMD == "lsx") { + // Option -msimd=lsx depends on 64-bit FPU. + // -m*-float and -mfpu=none/0/32 conflict with -msimd=lsx. + if (llvm::find(Features, "-d") != Features.end()) + D.Diag(diag::err_drv_loongarch_wrong_fpu_width_for_lsx); + else + Features.push_back("+lsx"); + } else if (MSIMD == "lasx") { + // Option -msimd=lasx depends on 64-bit FPU and LSX. + // -m*-float, -mfpu=none/0/32 and -mno-lsx conflict with -msimd=lasx. + if (llvm::find(Features, "-d") != Features.end()) + D.Diag(diag::err_drv_loongarch_wrong_fpu_width_for_lasx); + else if (llvm::find(Features, "-lsx") != Features.end()) + D.Diag(diag::err_drv_loongarch_invalid_simd_option_combination); + + // The command options do not contain -mno-lasx. + if (!Args.getLastArg(options::OPT_mno_lasx)) { + Features.push_back("+lsx"); + Features.push_back("+lasx"); + } + } else if (MSIMD == "none") { + if (llvm::find(Features, "+lsx") != Features.end()) + Features.push_back("-lsx"); + if (llvm::find(Features, "+lasx") != Features.end()) + Features.push_back("-lasx"); + } else { + D.Diag(diag::err_drv_loongarch_invalid_msimd_EQ) << MSIMD; + } + } // Select lsx feature determined by -m[no-]lsx. if (const Arg *A = Args.getLastArg(options::OPT_mlsx, options::OPT_mno_lsx)) { @@ -198,8 +243,6 @@ void loongarch::getLoongArchTargetFeatures(const Driver &D, if (A->getOption().matches(options::OPT_mlasx)) { if (llvm::find(Features, "-d") != Features.end()) D.Diag(diag::err_drv_loongarch_wrong_fpu_width_for_lasx); - else if (llvm::find(Features, "-lsx") != Features.end()) - D.Diag(diag::err_drv_loongarch_invalid_simd_option_combination); else { /*-mlasx*/ Features.push_back("+lsx"); Features.push_back("+lasx"); @@ -225,8 +268,14 @@ std::string loongarch::postProcessTargetCPUString(const std::string &CPU, std::string loongarch::getLoongArchTargetCPU(const llvm::opt::ArgList &Args, const llvm::Triple &Triple) { std::string CPU; + std::string Arch; // If we have -march, use that. - if (const Arg *A = Args.getLastArg(options::OPT_march_EQ)) - CPU = A->getValue(); + if (const Arg *A = Args.getLastArg(options::OPT_march_EQ)) { + Arch = A->getValue(); + if (Arch == "la64v1.0" || Arch == "la64v1.1") + CPU = llvm::LoongArch::getDefaultArch(Triple.isLoongArch64()); + else + CPU = Arch; + } return postProcessTargetCPUString(CPU, Triple); } diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index fac4f03d6193245aaa75891d78677b4a42e2f934..4e5f689498d69b35aba7e843b73405bf4fab71ee 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -5773,18 +5773,38 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, if (Arg *A = Args.getLastArg(options::OPT_mcmodel_EQ)) { StringRef CM = A->getValue(); - if (CM == "small" || CM == "kernel" || CM == "medium" || CM == "large" || - CM == "tiny") { - if (Triple.isOSAIX() && CM == "medium") - CmdArgs.push_back("-mcmodel=large"); - else if (Triple.isAArch64() && (CM == "kernel" || CM == "medium")) + if (Triple.isLoongArch()) { + bool Ok = false; + if (CM == "extreme" && + Args.hasFlagNoClaim(options::OPT_fplt, options::OPT_fno_plt, false)) + D.Diag(diag::err_drv_argument_not_allowed_with) + << A->getAsString(Args) << "-fplt"; + Ok = CM == "normal" || CM == "medium" || CM == "extreme"; + // Convert to LLVM recognizable names. + if (Ok) { + CM = llvm::StringSwitch(CM) + .Case("normal", "small") + .Case("extreme", "large") + .Default(CM); + CmdArgs.push_back(Args.MakeArgString("-mcmodel=" + CM)); + } else { D.Diag(diag::err_drv_invalid_argument_to_option) << CM << A->getOption().getName(); - else - A->render(Args, CmdArgs); + } } else { - D.Diag(diag::err_drv_invalid_argument_to_option) - << CM << A->getOption().getName(); + if (CM == "small" || CM == "kernel" || CM == "medium" || CM == "large" || + CM == "tiny") { + if (Triple.isOSAIX() && CM == "medium") + CmdArgs.push_back("-mcmodel=large"); + else if (Triple.isAArch64() && (CM == "kernel" || CM == "medium")) + D.Diag(diag::err_drv_invalid_argument_to_option) + << CM << A->getOption().getName(); + else + A->render(Args, CmdArgs); + } else { + D.Diag(diag::err_drv_invalid_argument_to_option) + << CM << A->getOption().getName(); + } } } diff --git a/clang/lib/Headers/larchintrin.h b/clang/lib/Headers/larchintrin.h index 24dd29ce91ffb9a7f7982ad4b3310fc82d1568d2..f4218295919a0d0d633e6a7b2f1bb81f83c96eda 100644 --- a/clang/lib/Headers/larchintrin.h +++ b/clang/lib/Headers/larchintrin.h @@ -228,6 +228,18 @@ extern __inline void ((void)__builtin_loongarch_ldpte_d((long int)(_1), (_2))) #endif +#define __frecipe_s(/*float*/ _1) \ + (float)__builtin_loongarch_frecipe_s((float)_1) + +#define __frecipe_d(/*double*/ _1) \ + (double)__builtin_loongarch_frecipe_d((double)_1) + +#define __frsqrte_s(/*float*/ _1) \ + (float)__builtin_loongarch_frsqrte_s((float)_1) + +#define __frsqrte_d(/*double*/ _1) \ + (double)__builtin_loongarch_frsqrte_d((double)_1) + #ifdef __cplusplus } #endif diff --git a/clang/lib/Headers/lasxintrin.h b/clang/lib/Headers/lasxintrin.h index 6b4d5012a24b5893024424c6613265fbbe81c830..dafc2a2f3e6a70bd1aa46b2f7998ee3f223fc55c 100644 --- a/clang/lib/Headers/lasxintrin.h +++ b/clang/lib/Headers/lasxintrin.h @@ -1726,6 +1726,18 @@ extern __inline return (__m256d)__builtin_lasx_xvfrecip_d((v4f64)_1); } +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfrecipe_s(__m256 _1) { + return (__m256)__builtin_lasx_xvfrecipe_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfrecipe_d(__m256d _1) { + return (__m256d)__builtin_lasx_xvfrecipe_d((v4f64)_1); +} + extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 __lasx_xvfrint_s(__m256 _1) { @@ -1750,6 +1762,18 @@ extern __inline return (__m256d)__builtin_lasx_xvfrsqrt_d((v4f64)_1); } +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfrsqrte_s(__m256 _1) { + return (__m256)__builtin_lasx_xvfrsqrte_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfrsqrte_d(__m256d _1) { + return (__m256d)__builtin_lasx_xvfrsqrte_d((v4f64)_1); +} + extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 __lasx_xvflogb_s(__m256 _1) { diff --git a/clang/lib/Headers/lsxintrin.h b/clang/lib/Headers/lsxintrin.h index a29bc7757ab5680e733561da9700716512885f71..f347955ce6fb51303a8a70f28e229c9a8b00aec7 100644 --- a/clang/lib/Headers/lsxintrin.h +++ b/clang/lib/Headers/lsxintrin.h @@ -1776,6 +1776,18 @@ extern __inline return (__m128d)__builtin_lsx_vfrecip_d((v2f64)_1); } +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfrecipe_s(__m128 _1) { + return (__m128)__builtin_lsx_vfrecipe_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfrecipe_d(__m128d _1) { + return (__m128d)__builtin_lsx_vfrecipe_d((v2f64)_1); +} + extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 __lsx_vfrint_s(__m128 _1) { @@ -1800,6 +1812,18 @@ extern __inline return (__m128d)__builtin_lsx_vfrsqrt_d((v2f64)_1); } +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfrsqrte_s(__m128 _1) { + return (__m128)__builtin_lsx_vfrsqrte_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfrsqrte_d(__m128d _1) { + return (__m128d)__builtin_lsx_vfrsqrte_d((v2f64)_1); +} + extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 __lsx_vflogb_s(__m128 _1) { diff --git a/clang/test/CodeGen/LoongArch/builtin-dbl-approximate.c b/clang/test/CodeGen/LoongArch/builtin-dbl-approximate.c new file mode 100644 index 0000000000000000000000000000000000000000..e5fe684346c00de3ace41b75f58a5732344bc577 --- /dev/null +++ b/clang/test/CodeGen/LoongArch/builtin-dbl-approximate.c @@ -0,0 +1,45 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// RUN: %clang_cc1 -triple loongarch32 -target-feature +d -target-feature +frecipe -O2 -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -triple loongarch64 -target-feature +d -target-feature +frecipe -O2 -emit-llvm %s -o - | FileCheck %s + +#include + +// CHECK-LABEL: @frecipe_d +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call double @llvm.loongarch.frecipe.d(double [[A:%.*]]) +// CHECK-NEXT: ret double [[TMP0]] +// +double frecipe_d (double _1) +{ + return __builtin_loongarch_frecipe_d (_1); +} + +// CHECK-LABEL: @frsqrte_d +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call double @llvm.loongarch.frsqrte.d(double [[A:%.*]]) +// CHECK-NEXT: ret double [[TMP0]] +// +double frsqrte_d (double _1) +{ + return __builtin_loongarch_frsqrte_d (_1); +} + +// CHECK-LABEL: @frecipe_d_alia +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call double @llvm.loongarch.frecipe.d(double [[A:%.*]]) +// CHECK-NEXT: ret double [[TMP0]] +// +double frecipe_d_alia (double _1) +{ + return __frecipe_d (_1); +} + +// CHECK-LABEL: @frsqrte_d_alia +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call double @llvm.loongarch.frsqrte.d(double [[A:%.*]]) +// CHECK-NEXT: ret double [[TMP0]] +// +double frsqrte_d_alia (double _1) +{ + return __frsqrte_d (_1); +} diff --git a/clang/test/CodeGen/LoongArch/builtin-flt-approximate.c b/clang/test/CodeGen/LoongArch/builtin-flt-approximate.c new file mode 100644 index 0000000000000000000000000000000000000000..47bb47084364b8baf89d274813820a723b7d298c --- /dev/null +++ b/clang/test/CodeGen/LoongArch/builtin-flt-approximate.c @@ -0,0 +1,45 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// RUN: %clang_cc1 -triple loongarch32 -target-feature +f -target-feature +frecipe -O2 -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -triple loongarch64 -target-feature +f -target-feature +frecipe -O2 -emit-llvm %s -o - | FileCheck %s + +#include + +// CHECK-LABEL: @frecipe_s +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call float @llvm.loongarch.frecipe.s(float [[A:%.*]]) +// CHECK-NEXT: ret float [[TMP0]] +// +float frecipe_s (float _1) +{ + return __builtin_loongarch_frecipe_s (_1); +} + +// CHECK-LABEL: @frsqrte_s +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call float @llvm.loongarch.frsqrte.s(float [[A:%.*]]) +// CHECK-NEXT: ret float [[TMP0]] +// +float frsqrte_s (float _1) +{ + return __builtin_loongarch_frsqrte_s (_1); +} + +// CHECK-LABEL: @frecipe_s_alia +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call float @llvm.loongarch.frecipe.s(float [[A:%.*]]) +// CHECK-NEXT: ret float [[TMP0]] +// +float frecipe_s_alia (float _1) +{ + return __frecipe_s (_1); +} + +// CHECK-LABEL: @frsqrte_s_alia +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call float @llvm.loongarch.frsqrte.s(float [[A:%.*]]) +// CHECK-NEXT: ret float [[TMP0]] +// +float frsqrte_s_alia (float _1) +{ + return __frsqrte_s (_1); +} diff --git a/clang/test/CodeGen/LoongArch/intrinsic-la64-error.c b/clang/test/CodeGen/LoongArch/intrinsic-la64-error.c index efb3b94175cfad4e3bab9b5934058daac22a65e9..a3242dfd41e9b852da326e5488b311cdae4ec2fb 100644 --- a/clang/test/CodeGen/LoongArch/intrinsic-la64-error.c +++ b/clang/test/CodeGen/LoongArch/intrinsic-la64-error.c @@ -1,7 +1,28 @@ // RUN: %clang_cc1 -triple loongarch64 -emit-llvm -S -verify %s -o /dev/null +// RUN: not %clang_cc1 -triple loongarch64 -DFEATURE_CHECK -emit-llvm %s -o /dev/null 2>&1 \ +// RUN: | FileCheck %s #include +#ifdef FEATURE_CHECK +void test_feature(unsigned long *v_ul, int *v_i, float a, double b) { +// CHECK: error: '__builtin_loongarch_cacop_w' needs target feature 32bit + __builtin_loongarch_cacop_w(1, v_ul[0], 1024); +// CHECK: error: '__builtin_loongarch_movfcsr2gr' needs target feature f + v_i[0] = __builtin_loongarch_movfcsr2gr(1); +// CHECK: error: '__builtin_loongarch_movgr2fcsr' needs target feature f + __builtin_loongarch_movgr2fcsr(1, v_i[1]); +// CHECK: error: '__builtin_loongarch_frecipe_s' needs target feature f,frecipe + float f1 = __builtin_loongarch_frecipe_s(a); +// CHECK: error: '__builtin_loongarch_frsqrte_s' needs target feature f,frecipe + float f2 = __builtin_loongarch_frsqrte_s(a); +// CHECK: error: '__builtin_loongarch_frecipe_d' needs target feature d,frecipe + double d1 = __builtin_loongarch_frecipe_d(b); +// CHECK: error: '__builtin_loongarch_frsqrte_d' needs target feature d,frecipe + double d2 = __builtin_loongarch_frsqrte_d(b); +} +#endif + void csrrd_d(int a) { __builtin_loongarch_csrrd_d(16384); // expected-error {{argument value 16384 is outside the valid range [0, 16383]}} __builtin_loongarch_csrrd_d(-1); // expected-error {{argument value 4294967295 is outside the valid range [0, 16383]}} diff --git a/clang/test/CodeGen/LoongArch/lasx/builtin-approximate-alias.c b/clang/test/CodeGen/LoongArch/lasx/builtin-approximate-alias.c new file mode 100644 index 0000000000000000000000000000000000000000..b79f939403993c87813aaa26d352356b9e9cfcf7 --- /dev/null +++ b/clang/test/CodeGen/LoongArch/lasx/builtin-approximate-alias.c @@ -0,0 +1,37 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple loongarch64 -target-feature +lasx -target-feature +frecipe -O2 -emit-llvm %s -o - | FileCheck %s + +#include + +// CHECK-LABEL: @xvfrecipe_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2:![0-9]+]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrecipe.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 xvfrecipe_s(v8f32 _1) { return __lasx_xvfrecipe_s(_1); } +// CHECK-LABEL: @xvfrecipe_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2:![0-9]+]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrecipe.d(<4 x double> [[_1]]) +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 xvfrecipe_d(v4f64 _1) { return __lasx_xvfrecipe_d(_1); } +// CHECK-LABEL: @xvfrsqrte_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2:![0-9]+]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrsqrte.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 xvfrsqrte_s(v8f32 _1) { return __lasx_xvfrsqrte_s(_1); } +// CHECK-LABEL: @xvfrsqrte_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2:![0-9]+]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrsqrte.d(<4 x double> [[_1]]) +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 xvfrsqrte_d(v4f64 _1) { return __lasx_xvfrsqrte_d(_1); } diff --git a/clang/test/CodeGen/LoongArch/lasx/builtin-approximate.c b/clang/test/CodeGen/LoongArch/lasx/builtin-approximate.c new file mode 100644 index 0000000000000000000000000000000000000000..63e9ba639ea2c94bc591d0baa61fd939ce76c26d --- /dev/null +++ b/clang/test/CodeGen/LoongArch/lasx/builtin-approximate.c @@ -0,0 +1,38 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple loongarch64 -target-feature +lasx -target-feature +frecipe -O2 -emit-llvm %s -o - | FileCheck %s + +typedef float v8f32 __attribute__((vector_size(32), aligned(32))); +typedef double v4f64 __attribute__((vector_size(32), aligned(32))); + +// CHECK-LABEL: @xvfrecipe_s +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2:![0-9]+]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrecipe.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 xvfrecipe_s(v8f32 _1) { return __builtin_lasx_xvfrecipe_s(_1); } +// CHECK-LABEL: @xvfrecipe_d +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2:![0-9]+]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrecipe.d(<4 x double> [[_1]]) +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 xvfrecipe_d(v4f64 _1) { return __builtin_lasx_xvfrecipe_d(_1); } +// CHECK-LABEL: @xvfrsqrte_s +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2:![0-9]+]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrsqrte.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 xvfrsqrte_s(v8f32 _1) { return __builtin_lasx_xvfrsqrte_s(_1); } +// CHECK-LABEL: @xvfrsqrte_d +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2:![0-9]+]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrsqrte.d(<4 x double> [[_1]]) +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 xvfrsqrte_d(v4f64 _1) { return __builtin_lasx_xvfrsqrte_d(_1); } diff --git a/clang/test/CodeGen/LoongArch/lsx/builtin-approximate-alias.c b/clang/test/CodeGen/LoongArch/lsx/builtin-approximate-alias.c new file mode 100644 index 0000000000000000000000000000000000000000..f26f032c878e6dfbf2779aa50d666e1e5bb482f6 --- /dev/null +++ b/clang/test/CodeGen/LoongArch/lsx/builtin-approximate-alias.c @@ -0,0 +1,37 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple loongarch64 -target-feature +lsx -target-feature +frecipe -O2 -emit-llvm %s -o - | FileCheck %s + +#include + +// CHECK-LABEL: @vfrecipe_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrecipe.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4f32 vfrecipe_s(v4f32 _1) { return __lsx_vfrecipe_s(_1); } +// CHECK-LABEL: @vfrecipe_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrecipe.d(<2 x double> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2f64 vfrecipe_d(v2f64 _1) { return __lsx_vfrecipe_d(_1); } +// CHECK-LABEL: @vfrsqrte_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrsqrte.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4f32 vfrsqrte_s(v4f32 _1) { return __lsx_vfrsqrte_s(_1); } +// CHECK-LABEL: @vfrsqrte_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrsqrte.d(<2 x double> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2f64 vfrsqrte_d(v2f64 _1) { return __lsx_vfrsqrte_d(_1); } diff --git a/clang/test/CodeGen/LoongArch/lsx/builtin-approximate.c b/clang/test/CodeGen/LoongArch/lsx/builtin-approximate.c new file mode 100644 index 0000000000000000000000000000000000000000..39fa1663db349b0cba038cc3b6d31ebe76608573 --- /dev/null +++ b/clang/test/CodeGen/LoongArch/lsx/builtin-approximate.c @@ -0,0 +1,38 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple loongarch64 -target-feature +lsx -target-feature +frecipe -O2 -emit-llvm %s -o - | FileCheck %s + +typedef float v4f32 __attribute__ ((vector_size(16), aligned(16))); +typedef double v2f64 __attribute__ ((vector_size(16), aligned(16))); + +// CHECK-LABEL: @vfrecipe_s +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrecipe.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4f32 vfrecipe_s (v4f32 _1) { return __builtin_lsx_vfrecipe_s (_1); } +// CHECK-LABEL: @vfrecipe_d +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrecipe.d(<2 x double> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2f64 vfrecipe_d (v2f64 _1) { return __builtin_lsx_vfrecipe_d (_1); } +// CHECK-LABEL: @vfrsqrte_s +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrsqrte.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4f32 vfrsqrte_s (v4f32 _1) { return __builtin_lsx_vfrsqrte_s (_1); } +// CHECK-LABEL: @vfrsqrte_d +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrsqrte.d(<2 x double> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2f64 vfrsqrte_d (v2f64 _1) { return __builtin_lsx_vfrsqrte_d (_1); } diff --git a/clang/test/Driver/loongarch-default-features.c b/clang/test/Driver/loongarch-default-features.c index 3cdf3ba3d23e14407edae2dc21f35bd1599a4fb7..90634bbcf00358e86e5a93b23235ab0bf36724cd 100644 --- a/clang/test/Driver/loongarch-default-features.c +++ b/clang/test/Driver/loongarch-default-features.c @@ -2,7 +2,7 @@ // RUN: %clang --target=loongarch64 -S -emit-llvm %s -o - | FileCheck %s --check-prefix=LA64 // LA32: "target-features"="+32bit" -// LA64: "target-features"="+64bit,+d,+f,+ual" +// LA64: "target-features"="+64bit,+d,+f,+lsx,+ual" int foo(void) { return 3; diff --git a/clang/test/Driver/loongarch-march.c b/clang/test/Driver/loongarch-march.c index 9214130cd034fd58164b4860c5199d198f21f577..2d5b315d962a1e9ca669bab92a6a0ef2c6f9726e 100644 --- a/clang/test/Driver/loongarch-march.c +++ b/clang/test/Driver/loongarch-march.c @@ -2,10 +2,22 @@ // RUN: FileCheck %s --check-prefix=CC1-LOONGARCH64 // RUN: %clang --target=loongarch64 -march=la464 -fsyntax-only %s -### 2>&1 | \ // RUN: FileCheck %s --check-prefix=CC1-LA464 +// RUN: %clang --target=loongarch64 -march=la64v1.0 -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CC1-LA64V1P0 +// RUN: %clang --target=loongarch64 -march=la64v1.1 -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CC1-LA64V1P1 +// RUN: %clang --target=loongarch64 -march=la664 -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CC1-LA664 // RUN: %clang --target=loongarch64 -march=loongarch64 -S -emit-llvm %s -o - | \ // RUN: FileCheck %s --check-prefix=IR-LOONGARCH64 // RUN: %clang --target=loongarch64 -march=la464 -S -emit-llvm %s -o - | \ // RUN: FileCheck %s --check-prefix=IR-LA464 +// RUN: %clang --target=loongarch64 -march=la64v1.0 -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefix=IR-LA64V1P0 +// RUN: %clang --target=loongarch64 -march=la64v1.1 -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefix=IR-LA64V1P1 +// RUN: %clang --target=loongarch64 -march=la664 -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefix=IR-LA664 // CC1-LOONGARCH64: "-target-cpu" "loongarch64" // CC1-LOONGARCH64-NOT: "-target-feature" @@ -19,8 +31,29 @@ // CC1-LA464-NOT: "-target-feature" // CC1-LA464: "-target-abi" "lp64d" +// CC1-LA64V1P0: "-target-cpu" "loongarch64" +// CC1-LA64V1P0-NOT: "-target-feature" +// CC1-LA64V1P0: "-target-feature" "+64bit" "-target-feature" "+d" "-target-feature" "+lsx" "-target-feature" "+ual" +// CC1-LA64V1P0-NOT: "-target-feature" +// CC1-LA64V1P0: "-target-abi" "lp64d" + +// CC1-LA64V1P1: "-target-cpu" "loongarch64" +// CC1-LA64V1P1-NOT: "-target-feature" +// CC1-LA64V1P1: "-target-feature" "+64bit" "-target-feature" "+d" "-target-feature" "+lsx" "-target-feature" "+ual" "-target-feature" "+frecipe" +// CC1-LA64V1P1-NOT: "-target-feature" +// CC1-LA64V1P1: "-target-abi" "lp64d" + +// CC1-LA664: "-target-cpu" "la664" +// CC1-LA664-NOT: "-target-feature" +// CC1-LA664: "-target-feature" "+64bit" "-target-feature" "+f" "-target-feature" "+d" "-target-feature" "+lsx" "-target-feature" "+lasx" "-target-feature" "+ual" "-target-feature" "+frecipe" +// CC1-LA664-NOT: "-target-feature" +// CC1-LA664: "-target-abi" "lp64d" + // IR-LOONGARCH64: attributes #[[#]] ={{.*}}"target-cpu"="loongarch64" {{.*}}"target-features"="+64bit,+d,+f,+ual" // IR-LA464: attributes #[[#]] ={{.*}}"target-cpu"="la464" {{.*}}"target-features"="+64bit,+d,+f,+lasx,+lsx,+ual" +// IR-LA64V1P0: attributes #[[#]] ={{.*}}"target-cpu"="loongarch64" {{.*}}"target-features"="+64bit,+d,+lsx,+ual" +// IR-LA64V1P1: attributes #[[#]] ={{.*}}"target-cpu"="loongarch64" {{.*}}"target-features"="+64bit,+d,+frecipe,+lsx,+ual" +// IR-LA664: attributes #[[#]] ={{.*}}"target-cpu"="la664" {{.*}}"target-features"="+64bit,+d,+f,+frecipe,+lasx,+lsx,+ual" int foo(void) { return 3; diff --git a/clang/test/Driver/loongarch-mlasx.c b/clang/test/Driver/loongarch-mlasx.c index 0b934f125c9e462b5d7176367deb7ee90acd8b87..87634ff5a9a40b3063cf75473f2caa97f1f6bddb 100644 --- a/clang/test/Driver/loongarch-mlasx.c +++ b/clang/test/Driver/loongarch-mlasx.c @@ -5,7 +5,7 @@ // RUN: %clang --target=loongarch64 -mno-lasx -fsyntax-only %s -### 2>&1 | \ // RUN: FileCheck %s --check-prefix=CC1-NOLASX // RUN: %clang --target=loongarch64 -mlasx -mno-lasx -fsyntax-only %s -### 2>&1 | \ -// RUN: FileCheck %s --check-prefix=CC1-NOLASX +// RUN: FileCheck %s --check-prefix=CC1-LSX // RUN: %clang --target=loongarch64 -mno-lasx -mlasx -fsyntax-only %s -### 2>&1 | \ // RUN: FileCheck %s --check-prefix=CC1-LASX // RUN: %clang --target=loongarch64 -mlsx -mlasx -fsyntax-only %s -### 2>&1 | \ @@ -18,7 +18,7 @@ // RUN: %clang --target=loongarch64 -mno-lasx -S -emit-llvm %s -o - | \ // RUN: FileCheck %s --check-prefix=IR-NOLASX // RUN: %clang --target=loongarch64 -mlasx -mno-lasx -S -emit-llvm %s -o - | \ -// RUN: FileCheck %s --check-prefix=IR-NOLASX +// RUN: FileCheck %s --check-prefix=IR-LSX // RUN: %clang --target=loongarch64 -mno-lasx -mlasx -S -emit-llvm %s -o - | \ // RUN: FileCheck %s --check-prefix=IR-LASX // RUN: %clang --target=loongarch64 -mlsx -mlasx -S -emit-llvm %s -o - | \ @@ -26,9 +26,11 @@ // RUN: %clang --target=loongarch64 -mlasx -mlsx -S -emit-llvm %s -o - | \ // RUN: FileCheck %s --check-prefix=IR-LASX +// CC1-LSX: "-target-feature" "+lsx" // CC1-LASX: "-target-feature" "+lsx" "-target-feature" "+lasx" // CC1-NOLASX: "-target-feature" "-lasx" +// IR-LSX: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}+lsx{{(,.*)?}}" // IR-LASX: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}+lasx{{(,.*)?}}" // IR-NOLASX: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}-lasx{{(,.*)?}}" diff --git a/clang/test/Driver/loongarch-msimd.c b/clang/test/Driver/loongarch-msimd.c new file mode 100644 index 0000000000000000000000000000000000000000..49d298e1b2e3f08990825f6e5f92556103293e5f --- /dev/null +++ b/clang/test/Driver/loongarch-msimd.c @@ -0,0 +1,95 @@ +/// Test -msimd options. + +/// COM: -msimd=none +// RUN: %clang --target=loongarch64 -mlasx -msimd=none -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=LSX,LASX +// RUN: %clang --target=loongarch64 -mlasx -mlsx -msimd=none -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=LSX,LASX + +// RUN: %clang --target=loongarch64 -msimd=none -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX +// RUN: %clang --target=loongarch64 -mlasx -mno-lasx -msimd=none -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX +// RUN: %clang --target=loongarch64 -mlasx -mno-lasx -mlsx -mno-lsx -msimd=none -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX +// RUN: %clang --target=loongarch64 -mlasx -mno-lasx -mno-lsx -msimd=none -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX +// RUN: %clang --target=loongarch64 -mlsx -mno-lsx -msimd=none -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX +// RUN: %clang --target=loongarch64 -mno-lasx -msimd=none -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX +// RUN: %clang --target=loongarch64 -mno-lasx -mlsx -mno-lsx -msimd=none -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX +// RUN: %clang --target=loongarch64 -mno-lasx -mno-lsx -msimd=none -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX +// RUN: %clang --target=loongarch64 -mno-lsx -msimd=none -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX + +// RUN: %clang --target=loongarch64 -mlasx -mno-lasx -mlsx -msimd=none -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=LSX,NOLASX +// RUN: %clang --target=loongarch64 -mno-lasx -mlsx -msimd=none -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=LSX,NOLASX +// RUN: %clang --target=loongarch64 -mlsx -msimd=none -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=LSX,NOLASX + + +/// COM: -msimd=lsx +// RUN: %clang --target=loongarch64 -mlasx -msimd=lsx -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=LSX,LASX +// RUN: %clang --target=loongarch64 -mlasx -mlsx -msimd=lsx -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=LSX,LASX + +// RUN: %clang --target=loongarch64 -mlasx -mno-lasx -mno-lsx -msimd=lsx -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX +// RUN: %clang --target=loongarch64 -mlsx -mno-lsx -msimd=lsx -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX +// RUN: %clang --target=loongarch64 -mno-lasx -mlsx -mno-lsx -msimd=lsx -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX +// RUN: %clang --target=loongarch64 -mno-lasx -mno-lsx -msimd=lsx -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX +// RUN: %clang --target=loongarch64 -mno-lsx -msimd=lsx -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX +// RUN: %clang --target=loongarch64 -mlasx -mno-lasx -mlsx -mno-lsx -msimd=lsx -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX + +// RUN: %clang --target=loongarch64 -msimd=lsx -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=LSX,NOLASX +// RUN: %clang --target=loongarch64 -mlasx -mno-lasx -msimd=lsx -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=LSX,NOLASX +// RUN: %clang --target=loongarch64 -mlsx -msimd=lsx -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=LSX,NOLASX +// RUN: %clang --target=loongarch64 -mno-lasx -msimd=lsx -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=LSX,NOLASX +// RUN: %clang --target=loongarch64 -mno-lasx -mlsx -msimd=lsx -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=LSX,NOLASX + + +/// COM: -msimd=lasx +// RUN: %clang --target=loongarch64 -msimd=lasx -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=LSX,LASX +// RUN: %clang --target=loongarch64 -mlasx -msimd=lasx -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=LSX,LASX +// RUN: %clang --target=loongarch64 -mlasx -mlsx -msimd=lasx -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=LSX,LASX +// RUN: %clang --target=loongarch64 -mlsx -msimd=lasx -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=LSX,LASX + +// RUN: %clang --target=loongarch64 -mlasx -mno-lasx -msimd=lasx -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=LSX,NOLASX +// RUN: %clang --target=loongarch64 -mno-lasx -msimd=lasx -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=LSX,NOLASX + +// RUN: %clang --target=loongarch64 -mlasx -mno-lasx -mlsx -msimd=lasx -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=LSX,NOLASX +// RUN: %clang --target=loongarch64 -mno-lasx -mlsx -msimd=lasx -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=LSX,NOLASX +// RUN: %clang --target=loongarch64 -mlasx -mno-lasx -mlsx -msimd=lsx -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=LSX,NOLASX + + +// NOLSX-NOT: "-target-feature" "+lsx" +// NOLASX-NOT: "-target-feature" "+lasx" +// LSX-DAG: "-target-feature" "+lsx" +// LASX-DAG: "-target-feature" "+lasx" +// NOLSX-NOT: "-target-feature" "+lsx" +// NOLASX-NOT: "-target-feature" "+lasx" diff --git a/clang/test/Driver/loongarch-msingle-float.c b/clang/test/Driver/loongarch-msingle-float.c index bd9b3e8a8c019d02ae154f5791192061909bd389..4eb0865b53a59e04f21b53dccb71c96fb69a5840 100644 --- a/clang/test/Driver/loongarch-msingle-float.c +++ b/clang/test/Driver/loongarch-msingle-float.c @@ -11,10 +11,10 @@ // WARN: warning: ignoring '-mabi=lp64s' as it conflicts with that implied by '-msingle-float' (lp64f) // WARN: warning: ignoring '-mfpu=64' as it conflicts with that implied by '-msingle-float' (32) -// CC1: "-target-feature" "+f"{{.*}} "-target-feature" "-d" +// CC1: "-target-feature" "+f"{{.*}} "-target-feature" "-d" "-target-feature" "-lsx" // CC1: "-target-abi" "lp64f" -// IR: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}+f,{{(.*,)?}}-d" +// IR: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}+f,{{(.*,)?}}-d,-lsx" int foo(void) { return 3; diff --git a/clang/test/Driver/loongarch-msoft-float.c b/clang/test/Driver/loongarch-msoft-float.c index 0e5121ac84b4c1ca51eee930b8fea47a28f4ccdb..ebf27fb00e309ed136227eb9d2ffda31ddcbf487 100644 --- a/clang/test/Driver/loongarch-msoft-float.c +++ b/clang/test/Driver/loongarch-msoft-float.c @@ -11,10 +11,10 @@ // WARN: warning: ignoring '-mabi=lp64d' as it conflicts with that implied by '-msoft-float' (lp64s) // WARN: warning: ignoring '-mfpu=64' as it conflicts with that implied by '-msoft-float' (0) -// CC1: "-target-feature" "-f"{{.*}} "-target-feature" "-d" +// CC1: "-target-feature" "-f"{{.*}} "-target-feature" "-d" "-target-feature" "-lsx" // CC1: "-target-abi" "lp64s" -// IR: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}-d,{{(.*,)?}}-f{{(,.*)?}}" +// IR: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}-d,{{(.*,)?}}-f,-lsx" int foo(void) { return 3; diff --git a/clang/test/Driver/loongarch-mtune.c b/clang/test/Driver/loongarch-mtune.c index 6f3f39e9bbd86a351d161bc2bc213783a612829f..face12e1a1a82a70dedc7540c720f79524121fdb 100644 --- a/clang/test/Driver/loongarch-mtune.c +++ b/clang/test/Driver/loongarch-mtune.c @@ -8,6 +8,11 @@ // RUN: %clang --target=loongarch64 -mtune=la464 -S -emit-llvm %s -o - | \ // RUN: FileCheck %s --check-prefix=IRATTR -DCPU=la464 +// RUN: %clang --target=loongarch64 -mtune=la664 -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CC1ARG -DCPU=la664 +// RUN: %clang --target=loongarch64 -mtune=la664 -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefix=IRATTR -DCPU=la664 + // RUN: %clang --target=loongarch64 -mtune=invalidcpu -fsyntax-only %s -### 2>&1 | \ // RUN: FileCheck %s --check-prefix=CC1ARG -DCPU=invalidcpu // RUN: not %clang --target=loongarch64 -mtune=invalidcpu -S -emit-llvm %s -o /dev/null 2>&1 | \ diff --git a/clang/test/Driver/mcmodel.c b/clang/test/Driver/mcmodel.c index 63b4320361592ab7967aaf87b5582953ac60c7e1..4aada126cf06949a56918f83f17d2806e2596047 100644 --- a/clang/test/Driver/mcmodel.c +++ b/clang/test/Driver/mcmodel.c @@ -8,6 +8,14 @@ // RUN: not %clang -c -mcmodel=lager %s 2>&1 | FileCheck --check-prefix=INVALID %s // RUN: not %clang -c --target=aarch64 -mcmodel=medium %s 2>&1 | FileCheck --check-prefix=AARCH64-MEDIUM %s // RUN: not %clang -c --target=aarch64 -mcmodel=kernel %s 2>&1 | FileCheck --check-prefix=AARCH64-KERNEL %s +// RUN: %clang --target=loongarch64 -### -S -mcmodel=normal %s 2>&1 | FileCheck --check-prefix=SMALL %s +// RUN: %clang --target=loongarch64 -### -S -mcmodel=medium %s 2>&1 | FileCheck --check-prefix=MEDIUM %s +// RUN: %clang --target=loongarch64 -### -S -mcmodel=extreme %s 2>&1 | FileCheck --check-prefix=LARGE %s +// RUN: not %clang -c --target=loongarch64 -mcmodel=tiny %s 2>&1 | FileCheck --check-prefix=ERR-LOONGARCH64-TINY %s +// RUN: not %clang -c --target=loongarch64 -mcmodel=small %s 2>&1 | FileCheck --check-prefix=ERR-LOONGARCH64-SMALL %s +// RUN: not %clang -c --target=loongarch64 -mcmodel=kernel %s 2>&1 | FileCheck --check-prefix=ERR-LOONGARCH64-KERNEL %s +// RUN: not %clang -c --target=loongarch64 -mcmodel=large %s 2>&1 | FileCheck --check-prefix=ERR-LOONGARCH64-LARGE %s +// RUN: not %clang -c --target=loongarch64 -mcmodel=extreme -fplt %s 2>&1 | FileCheck --check-prefix=ERR-LOONGARCH64-PLT-EXTREME %s // TINY: "-mcmodel=tiny" // SMALL: "-mcmodel=small" @@ -20,3 +28,10 @@ // AARCH64-MEDIUM: error: invalid argument 'medium' to -mcmodel= // AARCH64-KERNEL: error: invalid argument 'kernel' to -mcmodel= + +// ERR-LOONGARCH64-TINY: error: invalid argument 'tiny' to -mcmodel= +// ERR-LOONGARCH64-SMALL: error: invalid argument 'small' to -mcmodel= +// ERR-LOONGARCH64-KERNEL: error: invalid argument 'kernel' to -mcmodel= +// ERR-LOONGARCH64-LARGE: error: invalid argument 'large' to -mcmodel= + +// ERR-LOONGARCH64-PLT-EXTREME: error: invalid argument '-mcmodel=extreme' not allowed with '-fplt' diff --git a/clang/test/Preprocessor/init-loongarch.c b/clang/test/Preprocessor/init-loongarch.c index e235a728302153a62617eae9c1eb17e87347fe34..887b6d6af7e1bf50713be1f169499b837b88ad92 100644 --- a/clang/test/Preprocessor/init-loongarch.c +++ b/clang/test/Preprocessor/init-loongarch.c @@ -788,24 +788,51 @@ // LA64-FPU0-LP64S-NOT: #define __loongarch_single_float // LA64-FPU0-LP64S: #define __loongarch_soft_float 1 -/// Check __loongarch_arch and __loongarch_tune. +/// Check __loongarch_arch{_tune/_frecipe}. // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - | \ -// RUN: FileCheck --match-full-lines --check-prefix=ARCH-TUNE -DARCH=loongarch64 -DTUNE=loongarch64 %s +// RUN: FileCheck --match-full-lines --check-prefix=ARCH-TUNE -DARCH=la64v1.0 -DTUNE=loongarch64 %s // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=loongarch64 | \ // RUN: FileCheck --match-full-lines --check-prefix=ARCH-TUNE -DARCH=loongarch64 -DTUNE=loongarch64 %s // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la464 | \ // RUN: FileCheck --match-full-lines --check-prefix=ARCH-TUNE -DARCH=la464 -DTUNE=la464 %s // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -mtune=loongarch64 | \ -// RUN: FileCheck --match-full-lines --check-prefix=ARCH-TUNE -DARCH=loongarch64 -DTUNE=loongarch64 %s +// RUN: FileCheck --match-full-lines --check-prefix=ARCH-TUNE -DARCH=la64v1.0 -DTUNE=loongarch64 %s // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -mtune=la464 | \ -// RUN: FileCheck --match-full-lines --check-prefix=ARCH-TUNE -DARCH=loongarch64 -DTUNE=la464 %s +// RUN: FileCheck --match-full-lines --check-prefix=ARCH-TUNE -DARCH=la64v1.0 -DTUNE=la464 %s // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=loongarch64 -mtune=la464 | \ // RUN: FileCheck --match-full-lines --check-prefix=ARCH-TUNE -DARCH=loongarch64 -DTUNE=la464 %s // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la464 -mtune=loongarch64 | \ // RUN: FileCheck --match-full-lines --check-prefix=ARCH-TUNE -DARCH=la464 -DTUNE=loongarch64 %s +// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la64v1.0 | \ +// RUN: FileCheck --match-full-lines --check-prefix=ARCH-TUNE -DARCH=la64v1.0 -DTUNE=loongarch64 %s +// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la64v1.0 -Xclang -target-feature -Xclang -lsx | \ +// RUN: FileCheck --match-full-lines --check-prefix=ARCH-TUNE -DARCH=loongarch64 -DTUNE=loongarch64 %s +// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la64v1.0 -Xclang -target-feature -Xclang +frecipe | \ +// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE -DARCH=la64v1.1 -DTUNE=loongarch64 %s +// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=loongarch64 -Xclang -target-feature -Xclang +lsx | \ +// RUN: FileCheck --match-full-lines --check-prefix=ARCH-TUNE -DARCH=la64v1.0 -DTUNE=loongarch64 %s +// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la64v1.1 | \ +// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE -DARCH=la64v1.1 -DTUNE=loongarch64 %s +// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la64v1.1 -Xclang -target-feature -Xclang -frecipe | \ +// RUN: FileCheck --match-full-lines --check-prefix=ARCH-TUNE -DARCH=la64v1.0 -DTUNE=loongarch64 %s +// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la64v1.1 -Xclang -target-feature -Xclang -lsx | \ +// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE -DARCH=loongarch64 -DTUNE=loongarch64 %s +// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=loongarch64 -Xclang -target-feature -Xclang +frecipe | \ +// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE -DARCH=loongarch64 -DTUNE=loongarch64 %s +// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=loongarch64 -Xclang -target-feature -Xclang +lsx -Xclang -target-feature -Xclang +frecipe | \ +// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE -DARCH=la64v1.1 -DTUNE=loongarch64 %s +// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la664 | \ +// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE -DARCH=la664 -DTUNE=la664 %s +// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -mtune=la664 | \ +// RUN: FileCheck --match-full-lines --check-prefix=ARCH-TUNE -DARCH=la64v1.0 -DTUNE=la664 %s +// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=loongarch64 -mtune=la664 | \ +// RUN: FileCheck --match-full-lines --check-prefix=ARCH-TUNE -DARCH=loongarch64 -DTUNE=la664 %s +// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la664 -mtune=loongarch64 | \ +// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE -DARCH=la664 -DTUNE=loongarch64 %s // ARCH-TUNE: #define __loongarch_arch "[[ARCH]]" +// FRECIPE: #define __loongarch_frecipe 1 // ARCH-TUNE: #define __loongarch_tune "[[TUNE]]" // RUN: %clang --target=loongarch64 -mlsx -x c -E -dM %s -o - \ @@ -814,20 +841,24 @@ // RUN: | FileCheck --match-full-lines --check-prefix=MLSX %s // RUN: %clang --target=loongarch64 -mlsx -mno-lasx -x c -E -dM %s -o - \ // RUN: | FileCheck --match-full-lines --check-prefix=MLSX %s +// RUN: %clang --target=loongarch64 -mno-lasx -x c -E -dM %s -o - \ +// RUN: | FileCheck --match-full-lines --check-prefix=MLSX %s // RUN: %clang --target=loongarch64 -mno-lasx -mlsx -x c -E -dM %s -o - \ // RUN: | FileCheck --match-full-lines --check-prefix=MLSX %s // MLSX-NOT: #define __loongarch_asx +// MLSX: #define __loongarch_simd_width 128 // MLSX: #define __loongarch_sx 1 // RUN: %clang --target=loongarch64 -mlasx -x c -E -dM %s -o - \ // RUN: | FileCheck --match-full-lines --check-prefix=MLASX %s -// RUN: %clang --target=loongarch64 -mno-lasx -mlasx -x c -E -dM %s -o - \ -// RUN: | FileCheck --match-full-lines --check-prefix=MLASX %s // RUN: %clang --target=loongarch64 -mlsx -mlasx -x c -E -dM %s -o - \ // RUN: | FileCheck --match-full-lines --check-prefix=MLASX %s // RUN: %clang --target=loongarch64 -mlasx -mlsx -x c -E -dM %s -o - \ // RUN: | FileCheck --match-full-lines --check-prefix=MLASX %s +// RUN: %clang --target=loongarch64 -mno-lasx -mlasx -x c -E -dM %s -o - \ +// RUN: | FileCheck --match-full-lines --check-prefix=MLASX %s // MLASX: #define __loongarch_asx 1 +// MLASX: #define __loongarch_simd_width 256 // MLASX: #define __loongarch_sx 1 // RUN: %clang --target=loongarch64 -mno-lsx -x c -E -dM %s -o - \ @@ -838,7 +869,6 @@ // RUN: | FileCheck --match-full-lines --check-prefix=MNO-LSX %s // RUN: %clang --target=loongarch64 -mno-lasx -mno-lsx -x c -E -dM %s -o - \ // RUN: | FileCheck --match-full-lines --check-prefix=MNO-LSX %s -// RUN: %clang --target=loongarch64 -mno-lasx -x c -E -dM %s -o - \ -// RUN: | FileCheck --match-full-lines --check-prefix=MNO-LSX %s // MNO-LSX-NOT: #define __loongarch_asx +// MNO-LSX-NOT: #define __loongarch_simd_width // MNO-LSX-NOT: #define __loongarch_sx diff --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp index 160fab4aeba9a8da8ed95add75e401d49b701d2d..19147a0f6df69e35e61951d99855f08ba4186e6c 100644 --- a/lld/ELF/Arch/LoongArch.cpp +++ b/lld/ELF/Arch/LoongArch.cpp @@ -85,90 +85,33 @@ static uint64_t getLoongArchPage(uint64_t p) { static uint32_t lo12(uint32_t val) { return val & 0xfff; } // Calculate the adjusted page delta between dest and PC. -uint64_t elf::getLoongArchPageDelta(uint64_t dest, uint64_t pc) { - // Consider the large code model access pattern, of which the smaller code - // models' access patterns are a subset: - // - // pcalau12i U, %foo_hi20(sym) ; b in [-0x80000, 0x7ffff] - // addi.d T, zero, %foo_lo12(sym) ; a in [-0x800, 0x7ff] - // lu32i.d T, %foo64_lo20(sym) ; c in [-0x80000, 0x7ffff] - // lu52i.d T, T, %foo64_hi12(sym) ; d in [-0x800, 0x7ff] - // {ldx,stx,add}.* dest, U, T - // - // Let page(pc) = 0xRRR'QQQQQ'PPPPP'000 and dest = 0xZZZ'YYYYY'XXXXX'AAA, - // with RQ, P, ZY, X and A representing the respective bitfields as unsigned - // integers. We have: - // - // page(dest) = 0xZZZ'YYYYY'XXXXX'000 - // - page(pc) = 0xRRR'QQQQQ'PPPPP'000 - // ---------------------------------- - // 0xddd'ccccc'bbbbb'000 - // - // Now consider the above pattern's actual effects: - // - // page(pc) 0xRRR'QQQQQ'PPPPP'000 - // pcalau12i + 0xiii'iiiii'bbbbb'000 - // addi + 0xjjj'jjjjj'kkkkk'AAA - // lu32i.d & lu52i.d + 0xddd'ccccc'00000'000 - // -------------------------------------------------- - // dest = U + T - // = ((RQ<<32) + (P<<12) + i + (b<<12)) + (j + k + A + (cd<<32)) - // = (((RQ+cd)<<32) + i + j) + (((P+b)<<12) + k) + A - // = (ZY<<32) + (X<<12) + A - // - // ZY<<32 = (RQ<<32)+(cd<<32)+i+j, X<<12 = (P<<12)+(b<<12)+k - // cd<<32 = (ZY<<32)-(RQ<<32)-i-j, b<<12 = (X<<12)-(P<<12)-k - // - // where i and k are terms representing the effect of b's and A's sign - // extension respectively. - // - // i = signed b < 0 ? -0x10000'0000 : 0 - // k = signed A < 0 ? -0x1000 : 0 - // - // The j term is a bit complex: it represents the higher half of - // sign-extended bits from A that are effectively lost if i == 0 but k != 0, - // due to overwriting by lu32i.d & lu52i.d. - // - // j = signed A < 0 && signed b >= 0 ? 0x10000'0000 : 0 - // - // The actual effect of the instruction sequence before the final addition, - // i.e. our desired result value, is thus: - // - // result = (cd<<32) + (b<<12) - // = (ZY<<32)-(RQ<<32)-i-j + (X<<12)-(P<<12)-k - // = ((ZY<<32)+(X<<12)) - ((RQ<<32)+(P<<12)) - i - j - k - // = page(dest) - page(pc) - i - j - k - // - // when signed A >= 0 && signed b >= 0: - // - // i = j = k = 0 - // result = page(dest) - page(pc) - // - // when signed A >= 0 && signed b < 0: - // - // i = -0x10000'0000, j = k = 0 - // result = page(dest) - page(pc) + 0x10000'0000 - // - // when signed A < 0 && signed b >= 0: - // - // i = 0, j = 0x10000'0000, k = -0x1000 - // result = page(dest) - page(pc) - 0x10000'0000 + 0x1000 - // - // when signed A < 0 && signed b < 0: - // - // i = -0x10000'0000, j = 0, k = -0x1000 - // result = page(dest) - page(pc) + 0x1000 - uint64_t result = getLoongArchPage(dest) - getLoongArchPage(pc); - bool negativeA = lo12(dest) > 0x7ff; - bool negativeB = (result & 0x8000'0000) != 0; - - if (negativeA) - result += 0x1000; - if (negativeA && !negativeB) - result -= 0x10000'0000; - else if (!negativeA && negativeB) - result += 0x10000'0000; - +uint64_t elf::getLoongArchPageDelta(uint64_t dest, uint64_t pc, RelType type) { + // Note that if the sequence being relocated is `pcalau12i + addi.d + lu32i.d + // + lu52i.d`, they must be adjancent so that we can infer the PC of + // `pcalau12i` when calculating the page delta for the other two instructions + // (lu32i.d and lu52i.d). Compensate all the sign-extensions is a bit + // complicated. Just use psABI recommended algorithm. + uint64_t pcalau12i_pc; + switch (type) { + case R_LARCH_PCALA64_LO20: + case R_LARCH_GOT64_PC_LO20: + case R_LARCH_TLS_IE64_PC_LO20: + pcalau12i_pc = pc - 8; + break; + case R_LARCH_PCALA64_HI12: + case R_LARCH_GOT64_PC_HI12: + case R_LARCH_TLS_IE64_PC_HI12: + pcalau12i_pc = pc - 12; + break; + default: + pcalau12i_pc = pc; + break; + } + uint64_t result = getLoongArchPage(dest) - getLoongArchPage(pcalau12i_pc); + if (dest & 0x800) + result += 0x1000 - 0x1'0000'0000; + if (result & 0x8000'0000) + result += 0x1'0000'0000; return result; } @@ -479,6 +422,7 @@ RelExpr LoongArch::getRelExpr(const RelType type, const Symbol &s, case R_LARCH_B16: case R_LARCH_B21: case R_LARCH_B26: + case R_LARCH_CALL36: return R_PLT_PC; case R_LARCH_GOT_PC_HI20: case R_LARCH_GOT64_PC_LO20: @@ -607,6 +551,25 @@ void LoongArch::relocate(uint8_t *loc, const Relocation &rel, write32le(loc, setD10k16(read32le(loc), val >> 2)); return; + case R_LARCH_CALL36: { + // This relocation is designed for adjancent pcaddu18i+jirl pairs that + // are patched in one time. Because of sign extension of these insns' + // immediate fields, the relocation range is [-128G - 0x20000, +128G - + // 0x20000) (of course must be 4-byte aligned). + if (((int64_t)val + 0x20000) != llvm::SignExtend64(val + 0x20000, 38)) + reportRangeError(loc, rel, Twine(val), llvm::minIntN(38) - 0x20000, + llvm::maxIntN(38) - 0x20000); + checkAlignment(loc, val, 4, rel); + // Since jirl performs sign extension on the offset immediate, adds (1<<17) + // to original val to get the correct hi20. + uint32_t hi20 = extractBits(val + (1 << 17), 37, 18); + // Despite the name, the lower part is actually 18 bits with 4-byte aligned. + uint32_t lo16 = extractBits(val, 17, 2); + write32le(loc, setJ20(read32le(loc), hi20)); + write32le(loc + 4, setK16(read32le(loc + 4), lo16)); + return; + } + // Relocs intended for `addi`, `ld` or `st`. case R_LARCH_PCALA_LO12: // We have to again inspect the insn word to handle the R_LARCH_PCALA_LO12 diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp index b178d82407e30703427804f9d6d14016540284f6..44444b62251da5426ec72ff26080628c16a42f00 100644 --- a/lld/ELF/InputSection.cpp +++ b/lld/ELF/InputSection.cpp @@ -712,8 +712,8 @@ uint64_t InputSectionBase::getRelocTargetVA(const InputFile *file, RelType type, return sym.getGotVA() + a - p; case R_LOONGARCH_GOT_PAGE_PC: if (sym.hasFlag(NEEDS_TLSGD)) - return getLoongArchPageDelta(in.got->getGlobalDynAddr(sym) + a, p); - return getLoongArchPageDelta(sym.getGotVA() + a, p); + return getLoongArchPageDelta(in.got->getGlobalDynAddr(sym) + a, p, type); + return getLoongArchPageDelta(sym.getGotVA() + a, p, type); case R_MIPS_GOTREL: return sym.getVA(a) - in.mipsGot->getGp(file); case R_MIPS_GOT_GP: @@ -763,7 +763,7 @@ uint64_t InputSectionBase::getRelocTargetVA(const InputFile *file, RelType type, return 0; } case R_LOONGARCH_PAGE_PC: - return getLoongArchPageDelta(sym.getVA(a), p); + return getLoongArchPageDelta(sym.getVA(a), p, type); case R_PC: case R_ARM_PCA: { uint64_t dest; @@ -798,7 +798,7 @@ uint64_t InputSectionBase::getRelocTargetVA(const InputFile *file, RelType type, case R_PPC64_CALL_PLT: return sym.getPltVA() + a - p; case R_LOONGARCH_PLT_PAGE_PC: - return getLoongArchPageDelta(sym.getPltVA() + a, p); + return getLoongArchPageDelta(sym.getPltVA() + a, p, type); case R_PLT_GOTPLT: return sym.getPltVA() + a - in.gotPlt->getVA(); case R_PPC32_PLTREL: @@ -860,7 +860,7 @@ uint64_t InputSectionBase::getRelocTargetVA(const InputFile *file, RelType type, case R_TLSGD_PC: return in.got->getGlobalDynAddr(sym) + a - p; case R_LOONGARCH_TLSGD_PAGE_PC: - return getLoongArchPageDelta(in.got->getGlobalDynAddr(sym) + a, p); + return getLoongArchPageDelta(in.got->getGlobalDynAddr(sym) + a, p, type); case R_TLSLD_GOTPLT: return in.got->getVA() + in.got->getTlsIndexOff() + a - in.gotPlt->getVA(); case R_TLSLD_GOT: diff --git a/lld/ELF/Target.h b/lld/ELF/Target.h index bf831afa179305e4f2b8a5d84b333871b80857d6..aeabe47f92a10b40538f06eeb9e66ff2ed3deff5 100644 --- a/lld/ELF/Target.h +++ b/lld/ELF/Target.h @@ -229,7 +229,7 @@ void addPPC64SaveRestore(); uint64_t getPPC64TocBase(); uint64_t getAArch64Page(uint64_t expr); template void writeARMCmseImportLib(); -uint64_t getLoongArchPageDelta(uint64_t dest, uint64_t pc); +uint64_t getLoongArchPageDelta(uint64_t dest, uint64_t pc, RelType type); void riscvFinalizeRelax(int passes); void mergeRISCVAttributesSections(); void addArmInputSectionMappingSymbols(); diff --git a/lld/test/ELF/loongarch-call36.s b/lld/test/ELF/loongarch-call36.s new file mode 100644 index 0000000000000000000000000000000000000000..b593fdf1f6045e21cfa7d61f668dea2e06efbca9 --- /dev/null +++ b/lld/test/ELF/loongarch-call36.s @@ -0,0 +1,69 @@ +# REQUIRES: loongarch + +# RUN: rm -rf %t && split-file %s %t +# RUN: llvm-mc --filetype=obj --triple=loongarch64-unknown-elf %t/a.s -o %t/a.o + +# RUN: ld.lld %t/a.o --section-start=.text=0x20010 --section-start=.sec.foo=0x60020 -o %t/exe1 +# RUN: llvm-objdump --no-show-raw-insn -d %t/exe1 | FileCheck --match-full-lines %s --check-prefix=EXE1 +## hi20 = target - pc + (1 << 17) >> 18 = 0x60020 - 0x20010 + 0x20000 >> 18 = 1 +## lo18 = target - pc & (1 << 18) - 1 = 0x60020 - 0x20010 & 0x3ffff = 16 +# EXE1: 20010: pcaddu18i $t0, 1 +# EXE1-NEXT: 20014: jirl $zero, $t0, 16 + +# RUN: ld.lld %t/a.o --section-start=.text=0x20010 --section-start=.sec.foo=0x40020 -o %t/exe2 +# RUN: llvm-objdump --no-show-raw-insn -d %t/exe2 | FileCheck --match-full-lines %s --check-prefix=EXE2 +## hi20 = target - pc + (1 << 17) >> 18 = 0x40020 - 0x20010 + 0x20000 >> 18 = 1 +## lo18 = target - pc & (1 << 18) - 1 = 0x40020 - 0x20010 & 0x3ffff = -131056 +# EXE2: 20010: pcaddu18i $t0, 1 +# EXE2-NEXT: 20014: jirl $zero, $t0, -131056 + +# RUN: ld.lld %t/a.o -shared -T %t/a.t -o %t/a.so +# RUN: llvm-readelf -x .got.plt %t/a.so | FileCheck --check-prefix=GOTPLT %s +# RUN: llvm-objdump -d --no-show-raw-insn %t/a.so | FileCheck --check-prefix=SO %s +## PLT should be present in this case. +# SO: Disassembly of section .plt: +# SO: <.plt>: +## foo@plt: +# SO: 1234520: pcaddu12i $t3, 64{{$}} +# SO-NEXT: ld.d $t3, $t3, 544{{$}} +# SO-NEXT: jirl $t1, $t3, 0 +# SO-NEXT: nop + +# SO: Disassembly of section .text: +# SO: <_start>: +## hi20 = foo@plt - pc + (1 << 17) >> 18 = 0x1234520 - 0x1274670 + 0x20000 >> 18 = -1 +## lo18 = foo@plt - pc & (1 << 18) - 1 = 0x1234520 - 0x1274670 & 0x3ffff = -336 +# SO-NEXT: pcaddu18i $t0, -1{{$}} +# SO-NEXT: jirl $zero, $t0, -336{{$}} + +# GOTPLT: section '.got.plt': +# GOTPLT-NEXT: 0x01274730 00000000 00000000 00000000 00000000 +# GOTPLT-NEXT: 0x01274740 00452301 00000000 + +# RUN: not ld.lld %t/a.o --section-start=.text=0x20000 --section-start=.sec.foo=0x2000020000 -o /dev/null 2>&1 | \ +# RUN: FileCheck -DFILE=%t/a.o --check-prefix=ERROR-RANGE %s +# ERROR-RANGE: error: [[FILE]]:(.text+0x0): relocation R_LARCH_CALL36 out of range: 137438953472 is not in [-137439084544, 137438822399]; references 'foo' + +## Impossible case in reality becasue all LoongArch instructions are fixed 4-bytes long. +# RUN: not ld.lld %t/a.o --section-start=.text=0x20000 --section-start=.sec.foo=0x40001 -o /dev/null 2>&1 | \ +# RUN: FileCheck -DFILE=%t/a.o --check-prefix=ERROR-ALIGN %s +# ERROR-ALIGN: error: [[FILE]]:(.text+0x0): improper alignment for relocation R_LARCH_CALL36: 0x20001 is not aligned to 4 bytes + +#--- a.t +SECTIONS { + .plt 0x1234500: { *(.plt) } + .text 0x1274670: { *(.text) } +} + +#--- a.s +.text +.global _start +_start: + .reloc ., R_LARCH_CALL36, foo + pcaddu18i $t0, 0 + jirl $zero, $t0, 0 + +.section .sec.foo,"awx" +.global foo +foo: + ret diff --git a/lld/test/ELF/loongarch-pc-aligned.s b/lld/test/ELF/loongarch-pc-aligned.s index 9df3492d18772ba59901e1351b9c669090a8581b..0405961e5f74ec236cf58b6acb2b5c66218173c7 100644 --- a/lld/test/ELF/loongarch-pc-aligned.s +++ b/lld/test/ELF/loongarch-pc-aligned.s @@ -75,8 +75,8 @@ ## %pc64_hi12 = 0x444 = 1092 # RUN: ld.lld %t/extreme.o --section-start=.rodata=0x4443333334567111 --section-start=.text=0x0000000012345678 -o %t/extreme0 # RUN: llvm-objdump -d --no-show-raw-insn %t/extreme0 | FileCheck %s --check-prefix=EXTREME0 -# EXTREME0: addi.d $t0, $zero, 273 -# EXTREME0-NEXT: pcalau12i $t1, 139810 +# EXTREME0: pcalau12i $t1, 139810 +# EXTREME0-NEXT: addi.d $t0, $zero, 273 # EXTREME0-NEXT: lu32i.d $t0, 209715 # EXTREME0-NEXT: lu52i.d $t0, $t0, 1092 @@ -87,8 +87,8 @@ ## %pc64_hi12 = 0x444 = 1092 # RUN: ld.lld %t/extreme.o --section-start=.rodata=0x4443333334567888 --section-start=.text=0x0000000012345678 -o %t/extreme1 # RUN: llvm-objdump -d --no-show-raw-insn %t/extreme1 | FileCheck %s --check-prefix=EXTREME1 -# EXTREME1: addi.d $t0, $zero, -1912 -# EXTREME1-NEXT: pcalau12i $t1, 139811 +# EXTREME1: pcalau12i $t1, 139811 +# EXTREME1-NEXT: addi.d $t0, $zero, -1912 # EXTREME1-NEXT: lu32i.d $t0, 209714 # EXTREME1-NEXT: lu52i.d $t0, $t0, 1092 @@ -99,8 +99,8 @@ ## %pc64_hi12 = 0x444 = 1092 # RUN: ld.lld %t/extreme.o --section-start=.rodata=0x44433333abcde111 --section-start=.text=0x0000000012345678 -o %t/extreme2 # RUN: llvm-objdump -d --no-show-raw-insn %t/extreme2 | FileCheck %s --check-prefix=EXTREME2 -# EXTREME2: addi.d $t0, $zero, 273 -# EXTREME2-NEXT: pcalau12i $t1, -419431 +# EXTREME2: pcalau12i $t1, -419431 +# EXTREME2-NEXT: addi.d $t0, $zero, 273 # EXTREME2-NEXT: lu32i.d $t0, 209716 # EXTREME2-NEXT: lu52i.d $t0, $t0, 1092 @@ -111,8 +111,8 @@ ## %pc64_hi12 = 0x444 = 1092 # RUN: ld.lld %t/extreme.o --section-start=.rodata=0x44433333abcde888 --section-start=.text=0x0000000012345678 -o %t/extreme3 # RUN: llvm-objdump -d --no-show-raw-insn %t/extreme3 | FileCheck %s --check-prefix=EXTREME3 -# EXTREME3: addi.d $t0, $zero, -1912 -# EXTREME3-NEXT: pcalau12i $t1, -419430 +# EXTREME3: pcalau12i $t1, -419430 +# EXTREME3-NEXT: addi.d $t0, $zero, -1912 # EXTREME3-NEXT: lu32i.d $t0, 209715 # EXTREME3-NEXT: lu52i.d $t0, $t0, 1092 @@ -123,8 +123,8 @@ ## %pc64_hi12 = 0x444 = 1092 # RUN: ld.lld %t/extreme.o --section-start=.rodata=0x444aaaaa34567111 --section-start=.text=0x0000000012345678 -o %t/extreme4 # RUN: llvm-objdump -d --no-show-raw-insn %t/extreme4 | FileCheck %s --check-prefix=EXTREME4 -# EXTREME4: addi.d $t0, $zero, 273 -# EXTREME4-NEXT: pcalau12i $t1, 139810 +# EXTREME4: pcalau12i $t1, 139810 +# EXTREME4-NEXT: addi.d $t0, $zero, 273 # EXTREME4-NEXT: lu32i.d $t0, -349526 # EXTREME4-NEXT: lu52i.d $t0, $t0, 1092 @@ -135,8 +135,8 @@ ## %pc64_hi12 = 0x444 = 1092 # RUN: ld.lld %t/extreme.o --section-start=.rodata=0x444aaaaa34567888 --section-start=.text=0x0000000012345678 -o %t/extreme5 # RUN: llvm-objdump -d --no-show-raw-insn %t/extreme5 | FileCheck %s --check-prefix=EXTREME5 -# EXTREME5: addi.d $t0, $zero, -1912 -# EXTREME5-NEXT: pcalau12i $t1, 139811 +# EXTREME5: pcalau12i $t1, 139811 +# EXTREME5-NEXT: addi.d $t0, $zero, -1912 # EXTREME5-NEXT: lu32i.d $t0, -349527 # EXTREME5-NEXT: lu52i.d $t0, $t0, 1092 @@ -147,8 +147,8 @@ ## %pc64_hi12 = 0x444 = 1092 # RUN: ld.lld %t/extreme.o --section-start=.rodata=0x444aaaaaabcde111 --section-start=.text=0x0000000012345678 -o %t/extreme6 # RUN: llvm-objdump -d --no-show-raw-insn %t/extreme6 | FileCheck %s --check-prefix=EXTREME6 -# EXTREME6: addi.d $t0, $zero, 273 -# EXTREME6-NEXT: pcalau12i $t1, -419431 +# EXTREME6: pcalau12i $t1, -419431 +# EXTREME6-NEXT: addi.d $t0, $zero, 273 # EXTREME6-NEXT: lu32i.d $t0, -349525 # EXTREME6-NEXT: lu52i.d $t0, $t0, 1092 @@ -159,8 +159,8 @@ ## %pc64_hi12 = 0x444 = 1092 # RUN: ld.lld %t/extreme.o --section-start=.rodata=0x444aaaaaabcde888 --section-start=.text=0x0000000012345678 -o %t/extreme7 # RUN: llvm-objdump -d --no-show-raw-insn %t/extreme7 | FileCheck %s --check-prefix=EXTREME7 -# EXTREME7: addi.d $t0, $zero, -1912 -# EXTREME7-NEXT: pcalau12i $t1, -419430 +# EXTREME7: pcalau12i $t1, -419430 +# EXTREME7-NEXT: addi.d $t0, $zero, -1912 # EXTREME7-NEXT: lu32i.d $t0, -349526 # EXTREME7-NEXT: lu52i.d $t0, $t0, 1092 @@ -171,8 +171,8 @@ ## %pc64_hi12 = 0xbbb = -1093 # RUN: ld.lld %t/extreme.o --section-start=.rodata=0xbbb3333334567111 --section-start=.text=0x0000000012345678 -o %t/extreme8 # RUN: llvm-objdump -d --no-show-raw-insn %t/extreme8 | FileCheck %s --check-prefix=EXTREME8 -# EXTREME8: addi.d $t0, $zero, 273 -# EXTREME8-NEXT: pcalau12i $t1, 139810 +# EXTREME8: pcalau12i $t1, 139810 +# EXTREME8-NEXT: addi.d $t0, $zero, 273 # EXTREME8-NEXT: lu32i.d $t0, 209715 # EXTREME8-NEXT: lu52i.d $t0, $t0, -1093 @@ -183,8 +183,8 @@ ## %pc64_hi12 = 0xbbb = -1093 # RUN: ld.lld %t/extreme.o --section-start=.rodata=0xbbb3333334567888 --section-start=.text=0x0000000012345678 -o %t/extreme9 # RUN: llvm-objdump -d --no-show-raw-insn %t/extreme9 | FileCheck %s --check-prefix=EXTREME9 -# EXTREME9: addi.d $t0, $zero, -1912 -# EXTREME9-NEXT: pcalau12i $t1, 139811 +# EXTREME9: pcalau12i $t1, 139811 +# EXTREME9-NEXT: addi.d $t0, $zero, -1912 # EXTREME9-NEXT: lu32i.d $t0, 209714 # EXTREME9-NEXT: lu52i.d $t0, $t0, -1093 @@ -195,8 +195,8 @@ ## %pc64_hi12 = 0xbbb = -1093 # RUN: ld.lld %t/extreme.o --section-start=.rodata=0xbbb33333abcde111 --section-start=.text=0x0000000012345678 -o %t/extreme10 # RUN: llvm-objdump -d --no-show-raw-insn %t/extreme10 | FileCheck %s --check-prefix=EXTREME10 -# EXTREME10: addi.d $t0, $zero, 273 -# EXTREME10-NEXT: pcalau12i $t1, -419431 +# EXTREME10: pcalau12i $t1, -419431 +# EXTREME10-NEXT: addi.d $t0, $zero, 273 # EXTREME10-NEXT: lu32i.d $t0, 209716 # EXTREME10-NEXT: lu52i.d $t0, $t0, -1093 @@ -207,8 +207,8 @@ ## %pc64_hi12 = 0xbbb = -1093 # RUN: ld.lld %t/extreme.o --section-start=.rodata=0xbbb33333abcde888 --section-start=.text=0x0000000012345678 -o %t/extreme11 # RUN: llvm-objdump -d --no-show-raw-insn %t/extreme11 | FileCheck %s --check-prefix=EXTREME11 -# EXTREME11: addi.d $t0, $zero, -1912 -# EXTREME11-NEXT: pcalau12i $t1, -419430 +# EXTREME11: pcalau12i $t1, -419430 +# EXTREME11-NEXT: addi.d $t0, $zero, -1912 # EXTREME11-NEXT: lu32i.d $t0, 209715 # EXTREME11-NEXT: lu52i.d $t0, $t0, -1093 @@ -219,8 +219,8 @@ ## %pc64_hi12 = 0xbbb = -1093 # RUN: ld.lld %t/extreme.o --section-start=.rodata=0xbbbaaaaa34567111 --section-start=.text=0x0000000012345678 -o %t/extreme12 # RUN: llvm-objdump -d --no-show-raw-insn %t/extreme12 | FileCheck %s --check-prefix=EXTREME12 -# EXTREME12: addi.d $t0, $zero, 273 -# EXTREME12-NEXT: pcalau12i $t1, 139810 +# EXTREME12: pcalau12i $t1, 139810 +# EXTREME12-NEXT: addi.d $t0, $zero, 273 # EXTREME12-NEXT: lu32i.d $t0, -349526 # EXTREME12-NEXT: lu52i.d $t0, $t0, -1093 @@ -231,8 +231,8 @@ ## %pc64_hi12 = 0xbbb = -1093 # RUN: ld.lld %t/extreme.o --section-start=.rodata=0xbbbaaaaa34567888 --section-start=.text=0x0000000012345678 -o %t/extreme13 # RUN: llvm-objdump -d --no-show-raw-insn %t/extreme13 | FileCheck %s --check-prefix=EXTREME13 -# EXTREME13: addi.d $t0, $zero, -1912 -# EXTREME13-NEXT: pcalau12i $t1, 139811 +# EXTREME13: pcalau12i $t1, 139811 +# EXTREME13-NEXT: addi.d $t0, $zero, -1912 # EXTREME13-NEXT: lu32i.d $t0, -349527 # EXTREME13-NEXT: lu52i.d $t0, $t0, -1093 @@ -243,8 +243,8 @@ ## %pc64_hi12 = 0xbbb = -1093 # RUN: ld.lld %t/extreme.o --section-start=.rodata=0xbbbaaaaaabcde111 --section-start=.text=0x0000000012345678 -o %t/extreme14 # RUN: llvm-objdump -d --no-show-raw-insn %t/extreme14 | FileCheck %s --check-prefix=EXTREME14 -# EXTREME14: addi.d $t0, $zero, 273 -# EXTREME14-NEXT: pcalau12i $t1, -419431 +# EXTREME14: pcalau12i $t1, -419431 +# EXTREME14-NEXT: addi.d $t0, $zero, 273 # EXTREME14-NEXT: lu32i.d $t0, -349525 # EXTREME14-NEXT: lu52i.d $t0, $t0, -1093 @@ -255,11 +255,48 @@ ## %pc64_hi12 = 0xbbb = -1093 # RUN: ld.lld %t/extreme.o --section-start=.rodata=0xbbbaaaaaabcde888 --section-start=.text=0x0000000012345678 -o %t/extreme15 # RUN: llvm-objdump -d --no-show-raw-insn %t/extreme15 | FileCheck %s --check-prefix=EXTREME15 -# EXTREME15: addi.d $t0, $zero, -1912 -# EXTREME15-NEXT: pcalau12i $t1, -419430 +# EXTREME15: pcalau12i $t1, -419430 +# EXTREME15-NEXT: addi.d $t0, $zero, -1912 # EXTREME15-NEXT: lu32i.d $t0, -349526 # EXTREME15-NEXT: lu52i.d $t0, $t0, -1093 +## page delta = 0xffffffff00000000, page offset = 0x888 +## %pc_lo12 = 0x888 = -1912 +## %pc_hi20 = 0x00000 = 0 +## %pc64_lo20 = 0xfffff = -1 +## %pc64_hi12 = 0xfff = -1 +# RUN: ld.lld %t/extreme.o --section-start=.rodata=0x0000000012344888 --section-start=.text=0x0000000012345678 -o %t/extreme16 +# RUN: llvm-objdump -d --no-show-raw-insn %t/extreme16 | FileCheck %s --check-prefix=EXTREME16 +# EXTREME16: pcalau12i $t1, 0 +# EXTREME16-NEXT: addi.d $t0, $zero, -1912 +# EXTREME16-NEXT: lu32i.d $t0, -1 +# EXTREME16-NEXT: lu52i.d $t0, $t0, -1 + +## page delta = 0x0000000080000000, page offset = 0x888 +## %pc_lo12 = 0x888 = -1912 +## %pc_hi20 = 0x80000 = -524288 +## %pc64_lo20 = 0xfffff = 0 +## %pc64_hi12 = 0xfff = 0 +# RUN: ld.lld %t/extreme.o --section-start=.rodata=0x000071238ffff888 --section-start=.text=0x0000712310000678 -o %t/extreme17 +# RUN: llvm-objdump -d --no-show-raw-insn %t/extreme17 | FileCheck %s --check-prefix=EXTREME17 +# EXTREME17: pcalau12i $t1, -524288 +# EXTREME17-NEXT: addi.d $t0, $zero, -1912 +# EXTREME17-NEXT: lu32i.d $t0, 0 +# EXTREME17-NEXT: lu52i.d $t0, $t0, 0 + +## A case that pcalau12i, lu32i.d and lu52i.d are in different pages. +## page delta = 0x0000000080000000, page offset = 0x123 +## %pc_lo12 = 0x111 = 273 +## %pc_hi20 = 0x80000 = -524288 +## %pc64_lo20 = 0x00001 = 1 +## %pc64_hi12 = 0x000 = 0 +# RUN: ld.lld %t/extreme.o --section-start=.rodata=0x80000111 --section-start=.text=0xff8 -o %t/extreme18 +# RUN: llvm-objdump -d --no-show-raw-insn %t/extreme18 | FileCheck %s --check-prefix=EXTREME18 +# EXTREME18: pcalau12i $t1, -524288 +# EXTREME18-NEXT: addi.d $t0, $zero, 273 +# EXTREME18-NEXT: lu32i.d $t0, 1 +# EXTREME18-NEXT: lu52i.d $t0, $t0, 0 + #--- a.s .rodata x: @@ -277,7 +314,7 @@ x: .text .global _start _start: - addi.d $t0, $zero, %pc_lo12(x) pcalau12i $t1, %pc_hi20(x) + addi.d $t0, $zero, %pc_lo12(x) lu32i.d $t0, %pc64_lo20(x) lu52i.d $t0, $t0, %pc64_hi12(x) diff --git a/llvm/include/llvm/BinaryFormat/ELFRelocs/LoongArch.def b/llvm/include/llvm/BinaryFormat/ELFRelocs/LoongArch.def index 02bce3c71712743cb2951b0df9a6c304dcd54c32..c4393432677b8e3e818460a4e2f42ec4abc55b4d 100644 --- a/llvm/include/llvm/BinaryFormat/ELFRelocs/LoongArch.def +++ b/llvm/include/llvm/BinaryFormat/ELFRelocs/LoongArch.def @@ -118,3 +118,9 @@ ELF_RELOC(R_LARCH_SUB6, 106) ELF_RELOC(R_LARCH_ADD_ULEB128, 107) ELF_RELOC(R_LARCH_SUB_ULEB128, 108) ELF_RELOC(R_LARCH_64_PCREL, 109) + +// Relocs added in ELF for the LoongArchâ„¢ Architecture v20231102, part of the +// v2.20 LoongArch ABI specs. +// +// Spec addition: https://github.com/loongson/la-abi-specs/pull/4 +ELF_RELOC(R_LARCH_CALL36, 110) diff --git a/llvm/include/llvm/IR/IntrinsicsLoongArch.td b/llvm/include/llvm/IR/IntrinsicsLoongArch.td index 685deaec7709bd7896a65fedcd121b6a3d410a45..9002076e7aecea6260eb9c5161cbfb0cb2870563 100644 --- a/llvm/include/llvm/IR/IntrinsicsLoongArch.td +++ b/llvm/include/llvm/IR/IntrinsicsLoongArch.td @@ -122,6 +122,15 @@ def int_loongarch_lddir_d : BaseInt<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [ImmArg>]>; def int_loongarch_ldpte_d : BaseInt<[], [llvm_i64_ty, llvm_i64_ty], [ImmArg>]>; + +def int_loongarch_frecipe_s : BaseInt<[llvm_float_ty], [llvm_float_ty], + [IntrNoMem]>; +def int_loongarch_frecipe_d : BaseInt<[llvm_double_ty], [llvm_double_ty], + [IntrNoMem]>; +def int_loongarch_frsqrte_s : BaseInt<[llvm_float_ty], [llvm_float_ty], + [IntrNoMem]>; +def int_loongarch_frsqrte_d : BaseInt<[llvm_double_ty], [llvm_double_ty], + [IntrNoMem]>; } // TargetPrefix = "loongarch" /// Vector intrinsic @@ -527,10 +536,12 @@ foreach inst = ["vfmadd_d", "vfmsub_d", "vfnmadd_d", "vfnmsub_d"] in [IntrNoMem]>; foreach inst = ["vflogb_s", "vfsqrt_s", "vfrecip_s", "vfrsqrt_s", "vfrint_s", + "vfrecipe_s", "vfrsqrte_s", "vfrintrne_s", "vfrintrz_s", "vfrintrp_s", "vfrintrm_s"] in def int_loongarch_lsx_#inst : VecInt<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>; foreach inst = ["vflogb_d", "vfsqrt_d", "vfrecip_d", "vfrsqrt_d", "vfrint_d", + "vfrecipe_d", "vfrsqrte_d", "vfrintrne_d", "vfrintrz_d", "vfrintrp_d", "vfrintrm_d"] in def int_loongarch_lsx_#inst : VecInt<[llvm_v2f64_ty], [llvm_v2f64_ty], [IntrNoMem]>; @@ -1044,10 +1055,12 @@ foreach inst = ["xvfmadd_d", "xvfmsub_d", "xvfnmadd_d", "xvfnmsub_d"] in [IntrNoMem]>; foreach inst = ["xvflogb_s", "xvfsqrt_s", "xvfrecip_s", "xvfrsqrt_s", "xvfrint_s", + "xvfrecipe_s", "xvfrsqrte_s", "xvfrintrne_s", "xvfrintrz_s", "xvfrintrp_s", "xvfrintrm_s"] in def int_loongarch_lasx_#inst : VecInt<[llvm_v8f32_ty], [llvm_v8f32_ty], [IntrNoMem]>; foreach inst = ["xvflogb_d", "xvfsqrt_d", "xvfrecip_d", "xvfrsqrt_d", "xvfrint_d", + "xvfrecipe_d", "xvfrsqrte_d", "xvfrintrne_d", "xvfrintrz_d", "xvfrintrp_d", "xvfrintrm_d"] in def int_loongarch_lasx_#inst : VecInt<[llvm_v4f64_ty], [llvm_v4f64_ty], [IntrNoMem]>; diff --git a/llvm/include/llvm/TargetParser/LoongArchTargetParser.def b/llvm/include/llvm/TargetParser/LoongArchTargetParser.def index b20d124953f882e49eeb3082f629160028a29397..101a48cbd5399474b1c231cc5f20cf5187ea1083 100644 --- a/llvm/include/llvm/TargetParser/LoongArchTargetParser.def +++ b/llvm/include/llvm/TargetParser/LoongArchTargetParser.def @@ -10,6 +10,7 @@ LOONGARCH_FEATURE("+lasx", FK_LASX) LOONGARCH_FEATURE("+lbt", FK_LBT) LOONGARCH_FEATURE("+lvz", FK_LVZ) LOONGARCH_FEATURE("+ual", FK_UAL) +LOONGARCH_FEATURE("+frecipe", FK_FRECIPE) #undef LOONGARCH_FEATURE @@ -19,5 +20,6 @@ LOONGARCH_FEATURE("+ual", FK_UAL) LOONGARCH_ARCH("loongarch64", AK_LOONGARCH64, FK_64BIT | FK_FP32 | FK_FP64 | FK_UAL) LOONGARCH_ARCH("la464", AK_LA464, FK_64BIT | FK_FP32 | FK_FP64 | FK_LSX | FK_LASX | FK_UAL) +LOONGARCH_ARCH("la664", AK_LA664, FK_64BIT | FK_FP32 | FK_FP64 | FK_LSX | FK_LASX | FK_UAL | FK_FRECIPE) #undef LOONGARCH_ARCH diff --git a/llvm/include/llvm/TargetParser/LoongArchTargetParser.h b/llvm/include/llvm/TargetParser/LoongArchTargetParser.h index 028844187584b236644a3283fa05a5b2602e828d..c0bb15a5163b128b259d46b96ce569a7b48d8aa2 100644 --- a/llvm/include/llvm/TargetParser/LoongArchTargetParser.h +++ b/llvm/include/llvm/TargetParser/LoongArchTargetParser.h @@ -46,6 +46,9 @@ enum FeatureKind : uint32_t { // Allow memory accesses to be unaligned. FK_UAL = 1 << 8, + + // Floating-point approximate reciprocal instructions are available. + FK_FRECIPE = 1 << 9, }; struct FeatureInfo { diff --git a/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp b/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp index a132e645c8644f6a4cec2b8caff5fa4e801134df..f908e5bc63d3107ed775e8a30b69bcb08594b841 100644 --- a/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp +++ b/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp @@ -122,6 +122,10 @@ class LoongArchAsmParser : public MCTargetAsmParser { // Helper to emit pseudo instruction "li.w/d $rd, $imm". void emitLoadImm(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out); + // Helper to emit pseudo instruction "call36 sym" or "tail36 $rj, sym". + void emitFuncCall36(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out, + bool IsTailCall); + public: enum LoongArchMatchResultTy { Match_Dummy = FIRST_TARGET_MATCH_RESULT_TY, @@ -401,6 +405,22 @@ public: IsValidKind; } + bool isSImm20pcaddu18i() const { + if (!isImm()) + return false; + + int64_t Imm; + LoongArchMCExpr::VariantKind VK = LoongArchMCExpr::VK_LoongArch_None; + bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK); + bool IsValidKind = VK == LoongArchMCExpr::VK_LoongArch_None || + VK == LoongArchMCExpr::VK_LoongArch_CALL36; + + return IsConstantImm + ? isInt<20>(Imm) && IsValidKind + : LoongArchAsmParser::classifySymbolRef(getImm(), VK) && + IsValidKind; + } + bool isSImm21lsl2() const { if (!isImm()) return false; @@ -1111,6 +1131,35 @@ void LoongArchAsmParser::emitLoadImm(MCInst &Inst, SMLoc IDLoc, } } +void LoongArchAsmParser::emitFuncCall36(MCInst &Inst, SMLoc IDLoc, + MCStreamer &Out, bool IsTailCall) { + // call36 sym + // expands to: + // pcaddu18i $ra, %call36(sym) + // jirl $ra, $ra, 0 + // + // tail36 $rj, sym + // expands to: + // pcaddu18i $rj, %call36(sym) + // jirl $r0, $rj, 0 + unsigned ScratchReg = + IsTailCall ? Inst.getOperand(0).getReg() : (unsigned)LoongArch::R1; + const MCExpr *Sym = + IsTailCall ? Inst.getOperand(1).getExpr() : Inst.getOperand(0).getExpr(); + const LoongArchMCExpr *LE = LoongArchMCExpr::create( + Sym, llvm::LoongArchMCExpr::VK_LoongArch_CALL36, getContext()); + + Out.emitInstruction( + MCInstBuilder(LoongArch::PCADDU18I).addReg(ScratchReg).addExpr(LE), + getSTI()); + Out.emitInstruction( + MCInstBuilder(LoongArch::JIRL) + .addReg(IsTailCall ? (unsigned)LoongArch::R0 : ScratchReg) + .addReg(ScratchReg) + .addImm(0), + getSTI()); +} + bool LoongArchAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc, OperandVector &Operands, MCStreamer &Out) { @@ -1159,6 +1208,12 @@ bool LoongArchAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc, case LoongArch::PseudoLI_D: emitLoadImm(Inst, IDLoc, Out); return false; + case LoongArch::PseudoCALL36: + emitFuncCall36(Inst, IDLoc, Out, /*IsTailCall=*/false); + return false; + case LoongArch::PseudoTAIL36: + emitFuncCall36(Inst, IDLoc, Out, /*IsTailCall=*/true); + return false; } Out.emitInstruction(Inst, getSTI()); return false; @@ -1440,6 +1495,12 @@ bool LoongArchAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, /*Upper=*/(1 << 19) - 1, "operand must be a symbol with modifier (e.g. %pc_hi20) or an integer " "in the range"); + case Match_InvalidSImm20pcaddu18i: + return generateImmOutOfRangeError( + Operands, ErrorInfo, /*Lower=*/-(1 << 19), + /*Upper=*/(1 << 19) - 1, + "operand must be a symbol with modifier (e.g. %call36) or an integer " + "in the range"); case Match_InvalidSImm21lsl2: return generateImmOutOfRangeError( Operands, ErrorInfo, /*Lower=*/-(1 << 22), /*Upper=*/(1 << 22) - 4, diff --git a/llvm/lib/Target/LoongArch/LoongArch.td b/llvm/lib/Target/LoongArch/LoongArch.td index 2a4c991a43b09cfb52844f58e6dcb760e2b2abae..5f85cace71af7f4bd1a1dafb146d7148642f67ee 100644 --- a/llvm/lib/Target/LoongArch/LoongArch.td +++ b/llvm/lib/Target/LoongArch/LoongArch.td @@ -105,10 +105,13 @@ def FeatureUAL def FeatureRelax : SubtargetFeature<"relax", "HasLinkerRelax", "true", "Enable Linker relaxation">; -// Experimental auto vectorization -def FeatureAutoVec - : SubtargetFeature<"auto-vec", "HasExpAutoVec", "true", - "Experimental auto vectorization">; + +// Floating point approximation operation +def FeatureFrecipe + : SubtargetFeature<"frecipe", "HasFrecipe", "true", + "Support frecipe.{s/d} and frsqrte.{s/d} instructions.">; +def HasFrecipe : Predicate<"Subtarget->hasFrecipe()">; + //===----------------------------------------------------------------------===// // Registers, instruction descriptions ... @@ -140,6 +143,13 @@ def : ProcessorModel<"la464", NoSchedModel, [Feature64Bit, FeatureExtLVZ, FeatureExtLBT]>; +def : ProcessorModel<"la664", NoSchedModel, [Feature64Bit, + FeatureUAL, + FeatureExtLASX, + FeatureExtLVZ, + FeatureExtLBT, + FeatureFrecipe]>; + //===----------------------------------------------------------------------===// // Define the LoongArch target. //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp b/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp index 72c1f1cec198347f4cc0ec710fbb380cf042bf40..ad39658f698e7b8ede8ed3297eec204c8c9f9b09 100644 --- a/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp @@ -62,43 +62,24 @@ private: MachineBasicBlock::iterator &NextMBBI, unsigned FlagsHi, unsigned SecondOpcode, unsigned FlagsLo); - bool expandLargeAddressLoad(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - MachineBasicBlock::iterator &NextMBBI, - unsigned LastOpcode, unsigned IdentifyingMO); - bool expandLargeAddressLoad(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - MachineBasicBlock::iterator &NextMBBI, - unsigned LastOpcode, unsigned IdentifyingMO, - const MachineOperand &Symbol, Register DestReg, - bool EraseFromParent); bool expandLoadAddressPcrel(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - MachineBasicBlock::iterator &NextMBBI, - bool Large = false); + MachineBasicBlock::iterator &NextMBBI); bool expandLoadAddressGot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - MachineBasicBlock::iterator &NextMBBI, - bool Large = false); + MachineBasicBlock::iterator &NextMBBI); bool expandLoadAddressTLSLE(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, MachineBasicBlock::iterator &NextMBBI); bool expandLoadAddressTLSIE(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - MachineBasicBlock::iterator &NextMBBI, - bool Large = false); + MachineBasicBlock::iterator &NextMBBI); bool expandLoadAddressTLSLD(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - MachineBasicBlock::iterator &NextMBBI, - bool Large = false); + MachineBasicBlock::iterator &NextMBBI); bool expandLoadAddressTLSGD(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - MachineBasicBlock::iterator &NextMBBI, - bool Large = false); - bool expandFunctionCALL(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - MachineBasicBlock::iterator &NextMBBI, - bool IsTailCall); + MachineBasicBlock::iterator &NextMBBI); }; char LoongArchPreRAExpandPseudo::ID = 0; @@ -131,30 +112,16 @@ bool LoongArchPreRAExpandPseudo::expandMI( switch (MBBI->getOpcode()) { case LoongArch::PseudoLA_PCREL: return expandLoadAddressPcrel(MBB, MBBI, NextMBBI); - case LoongArch::PseudoLA_PCREL_LARGE: - return expandLoadAddressPcrel(MBB, MBBI, NextMBBI, /*Large=*/true); case LoongArch::PseudoLA_GOT: return expandLoadAddressGot(MBB, MBBI, NextMBBI); - case LoongArch::PseudoLA_GOT_LARGE: - return expandLoadAddressGot(MBB, MBBI, NextMBBI, /*Large=*/true); case LoongArch::PseudoLA_TLS_LE: return expandLoadAddressTLSLE(MBB, MBBI, NextMBBI); case LoongArch::PseudoLA_TLS_IE: return expandLoadAddressTLSIE(MBB, MBBI, NextMBBI); - case LoongArch::PseudoLA_TLS_IE_LARGE: - return expandLoadAddressTLSIE(MBB, MBBI, NextMBBI, /*Large=*/true); case LoongArch::PseudoLA_TLS_LD: return expandLoadAddressTLSLD(MBB, MBBI, NextMBBI); - case LoongArch::PseudoLA_TLS_LD_LARGE: - return expandLoadAddressTLSLD(MBB, MBBI, NextMBBI, /*Large=*/true); case LoongArch::PseudoLA_TLS_GD: return expandLoadAddressTLSGD(MBB, MBBI, NextMBBI); - case LoongArch::PseudoLA_TLS_GD_LARGE: - return expandLoadAddressTLSGD(MBB, MBBI, NextMBBI, /*Large=*/true); - case LoongArch::PseudoCALL: - return expandFunctionCALL(MBB, MBBI, NextMBBI, /*IsTailCall=*/false); - case LoongArch::PseudoTAIL: - return expandFunctionCALL(MBB, MBBI, NextMBBI, /*IsTailCall=*/true); } return false; } @@ -187,118 +154,9 @@ bool LoongArchPreRAExpandPseudo::expandPcalau12iInstPair( return true; } -bool LoongArchPreRAExpandPseudo::expandLargeAddressLoad( - MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - MachineBasicBlock::iterator &NextMBBI, unsigned LastOpcode, - unsigned IdentifyingMO) { - MachineInstr &MI = *MBBI; - return expandLargeAddressLoad(MBB, MBBI, NextMBBI, LastOpcode, IdentifyingMO, - MI.getOperand(2), MI.getOperand(0).getReg(), - true); -} - -bool LoongArchPreRAExpandPseudo::expandLargeAddressLoad( - MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - MachineBasicBlock::iterator &NextMBBI, unsigned LastOpcode, - unsigned IdentifyingMO, const MachineOperand &Symbol, Register DestReg, - bool EraseFromParent) { - // Code Sequence: - // - // Part1: pcalau12i $scratch, %MO1(sym) - // Part0: addi.d $dest, $zero, %MO0(sym) - // Part2: lu32i.d $dest, %MO2(sym) - // Part3: lu52i.d $dest, $dest, %MO3(sym) - // Fin: LastOpcode $dest, $dest, $scratch - - unsigned MO0, MO1, MO2, MO3; - switch (IdentifyingMO) { - default: - llvm_unreachable("unsupported identifying MO"); - case LoongArchII::MO_PCREL_LO: - MO0 = IdentifyingMO; - MO1 = LoongArchII::MO_PCREL_HI; - MO2 = LoongArchII::MO_PCREL64_LO; - MO3 = LoongArchII::MO_PCREL64_HI; - break; - case LoongArchII::MO_GOT_PC_HI: - case LoongArchII::MO_LD_PC_HI: - case LoongArchII::MO_GD_PC_HI: - // These cases relocate just like the GOT case, except for Part1. - MO0 = LoongArchII::MO_GOT_PC_LO; - MO1 = IdentifyingMO; - MO2 = LoongArchII::MO_GOT_PC64_LO; - MO3 = LoongArchII::MO_GOT_PC64_HI; - break; - case LoongArchII::MO_IE_PC_LO: - MO0 = IdentifyingMO; - MO1 = LoongArchII::MO_IE_PC_HI; - MO2 = LoongArchII::MO_IE_PC64_LO; - MO3 = LoongArchII::MO_IE_PC64_HI; - break; - } - - MachineFunction *MF = MBB.getParent(); - MachineInstr &MI = *MBBI; - DebugLoc DL = MI.getDebugLoc(); - - assert(MF->getSubtarget().is64Bit() && - "Large code model requires LA64"); - - Register TmpPart1 = - MF->getRegInfo().createVirtualRegister(&LoongArch::GPRRegClass); - Register TmpPart0 = - DestReg.isVirtual() - ? MF->getRegInfo().createVirtualRegister(&LoongArch::GPRRegClass) - : DestReg; - Register TmpParts02 = - DestReg.isVirtual() - ? MF->getRegInfo().createVirtualRegister(&LoongArch::GPRRegClass) - : DestReg; - Register TmpParts023 = - DestReg.isVirtual() - ? MF->getRegInfo().createVirtualRegister(&LoongArch::GPRRegClass) - : DestReg; - - auto Part1 = BuildMI(MBB, MBBI, DL, TII->get(LoongArch::PCALAU12I), TmpPart1); - auto Part0 = BuildMI(MBB, MBBI, DL, TII->get(LoongArch::ADDI_D), TmpPart0) - .addReg(LoongArch::R0); - auto Part2 = BuildMI(MBB, MBBI, DL, TII->get(LoongArch::LU32I_D), TmpParts02) - // "rj" is needed due to InstrInfo pattern requirement. - .addReg(TmpPart0, RegState::Kill); - auto Part3 = BuildMI(MBB, MBBI, DL, TII->get(LoongArch::LU52I_D), TmpParts023) - .addReg(TmpParts02, RegState::Kill); - BuildMI(MBB, MBBI, DL, TII->get(LastOpcode), DestReg) - .addReg(TmpParts023) - .addReg(TmpPart1, RegState::Kill); - - if (Symbol.getType() == MachineOperand::MO_ExternalSymbol) { - const char *SymName = Symbol.getSymbolName(); - Part0.addExternalSymbol(SymName, MO0); - Part1.addExternalSymbol(SymName, MO1); - Part2.addExternalSymbol(SymName, MO2); - Part3.addExternalSymbol(SymName, MO3); - } else { - Part0.addDisp(Symbol, 0, MO0); - Part1.addDisp(Symbol, 0, MO1); - Part2.addDisp(Symbol, 0, MO2); - Part3.addDisp(Symbol, 0, MO3); - } - - if (EraseFromParent) - MI.eraseFromParent(); - - return true; -} - bool LoongArchPreRAExpandPseudo::expandLoadAddressPcrel( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - MachineBasicBlock::iterator &NextMBBI, bool Large) { - if (Large) - // Emit the 5-insn large address load sequence with the `%pc` family of - // relocs. - return expandLargeAddressLoad(MBB, MBBI, NextMBBI, LoongArch::ADD_D, - LoongArchII::MO_PCREL_LO); - + MachineBasicBlock::iterator &NextMBBI) { // Code Sequence: // pcalau12i $rd, %pc_hi20(sym) // addi.w/d $rd, $rd, %pc_lo12(sym) @@ -311,13 +169,7 @@ bool LoongArchPreRAExpandPseudo::expandLoadAddressPcrel( bool LoongArchPreRAExpandPseudo::expandLoadAddressGot( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - MachineBasicBlock::iterator &NextMBBI, bool Large) { - if (Large) - // Emit the 5-insn large address load sequence with the `%got_pc` family - // of relocs, loading the result from GOT with `ldx.d` in the end. - return expandLargeAddressLoad(MBB, MBBI, NextMBBI, LoongArch::LDX_D, - LoongArchII::MO_GOT_PC_HI); - + MachineBasicBlock::iterator &NextMBBI) { // Code Sequence: // pcalau12i $rd, %got_pc_hi20(sym) // ld.w/d $rd, $rd, %got_pc_lo12(sym) @@ -378,13 +230,7 @@ bool LoongArchPreRAExpandPseudo::expandLoadAddressTLSLE( bool LoongArchPreRAExpandPseudo::expandLoadAddressTLSIE( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - MachineBasicBlock::iterator &NextMBBI, bool Large) { - if (Large) - // Emit the 5-insn large address load sequence with the `%ie_pc` family - // of relocs, loading the result with `ldx.d` in the end. - return expandLargeAddressLoad(MBB, MBBI, NextMBBI, LoongArch::LDX_D, - LoongArchII::MO_IE_PC_LO); - + MachineBasicBlock::iterator &NextMBBI) { // Code Sequence: // pcalau12i $rd, %ie_pc_hi20(sym) // ld.w/d $rd, $rd, %ie_pc_lo12(sym) @@ -397,13 +243,7 @@ bool LoongArchPreRAExpandPseudo::expandLoadAddressTLSIE( bool LoongArchPreRAExpandPseudo::expandLoadAddressTLSLD( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - MachineBasicBlock::iterator &NextMBBI, bool Large) { - if (Large) - // Emit the 5-insn large address load sequence with the `%got_pc` family - // of relocs, with the `pcalau12i` insn relocated with `%ld_pc_hi20`. - return expandLargeAddressLoad(MBB, MBBI, NextMBBI, LoongArch::ADD_D, - LoongArchII::MO_LD_PC_HI); - + MachineBasicBlock::iterator &NextMBBI) { // Code Sequence: // pcalau12i $rd, %ld_pc_hi20(sym) // addi.w/d $rd, $rd, %got_pc_lo12(sym) @@ -416,13 +256,7 @@ bool LoongArchPreRAExpandPseudo::expandLoadAddressTLSLD( bool LoongArchPreRAExpandPseudo::expandLoadAddressTLSGD( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - MachineBasicBlock::iterator &NextMBBI, bool Large) { - if (Large) - // Emit the 5-insn large address load sequence with the `%got_pc` family - // of relocs, with the `pcalau12i` insn relocated with `%gd_pc_hi20`. - return expandLargeAddressLoad(MBB, MBBI, NextMBBI, LoongArch::ADD_D, - LoongArchII::MO_GD_PC_HI); - + MachineBasicBlock::iterator &NextMBBI) { // Code Sequence: // pcalau12i $rd, %gd_pc_hi20(sym) // addi.w/d $rd, $rd, %got_pc_lo12(sym) @@ -433,88 +267,6 @@ bool LoongArchPreRAExpandPseudo::expandLoadAddressTLSGD( SecondOpcode, LoongArchII::MO_GOT_PC_LO); } -bool LoongArchPreRAExpandPseudo::expandFunctionCALL( - MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - MachineBasicBlock::iterator &NextMBBI, bool IsTailCall) { - MachineFunction *MF = MBB.getParent(); - MachineInstr &MI = *MBBI; - DebugLoc DL = MI.getDebugLoc(); - const MachineOperand &Func = MI.getOperand(0); - MachineInstrBuilder CALL; - unsigned Opcode; - - switch (MF->getTarget().getCodeModel()) { - default: - report_fatal_error("Unsupported code model"); - break; - case CodeModel::Small: { - // CALL: - // bl func - // TAIL: - // b func - Opcode = IsTailCall ? LoongArch::PseudoB_TAIL : LoongArch::BL; - CALL = BuildMI(MBB, MBBI, DL, TII->get(Opcode)).add(Func); - break; - } - case CodeModel::Medium: { - // CALL: - // pcalau12i $ra, %pc_hi20(func) - // jirl $ra, $ra, %pc_lo12(func) - // TAIL: - // pcalau12i $scratch, %pc_hi20(func) - // jirl $r0, $scratch, %pc_lo12(func) - Opcode = - IsTailCall ? LoongArch::PseudoJIRL_TAIL : LoongArch::PseudoJIRL_CALL; - Register ScratchReg = - IsTailCall - ? MF->getRegInfo().createVirtualRegister(&LoongArch::GPRRegClass) - : LoongArch::R1; - MachineInstrBuilder MIB = - BuildMI(MBB, MBBI, DL, TII->get(LoongArch::PCALAU12I), ScratchReg); - CALL = BuildMI(MBB, MBBI, DL, TII->get(Opcode)).addReg(ScratchReg); - if (Func.isSymbol()) { - const char *FnName = Func.getSymbolName(); - MIB.addExternalSymbol(FnName, LoongArchII::MO_PCREL_HI); - CALL.addExternalSymbol(FnName, LoongArchII::MO_PCREL_LO); - break; - } - assert(Func.isGlobal() && "Expected a GlobalValue at this time"); - const GlobalValue *GV = Func.getGlobal(); - MIB.addGlobalAddress(GV, 0, LoongArchII::MO_PCREL_HI); - CALL.addGlobalAddress(GV, 0, LoongArchII::MO_PCREL_LO); - break; - } - case CodeModel::Large: { - // Emit the 5-insn large address load sequence, either directly or - // indirectly in case of going through the GOT, then JIRL_TAIL or - // JIRL_CALL to $addr. - Opcode = - IsTailCall ? LoongArch::PseudoJIRL_TAIL : LoongArch::PseudoJIRL_CALL; - Register AddrReg = - IsTailCall - ? MF->getRegInfo().createVirtualRegister(&LoongArch::GPRRegClass) - : LoongArch::R1; - - bool UseGOT = Func.isGlobal() && !Func.getGlobal()->isDSOLocal(); - unsigned MO = UseGOT ? LoongArchII::MO_GOT_PC_HI : LoongArchII::MO_PCREL_LO; - unsigned LAOpcode = UseGOT ? LoongArch::LDX_D : LoongArch::ADD_D; - expandLargeAddressLoad(MBB, MBBI, NextMBBI, LAOpcode, MO, Func, AddrReg, - false); - CALL = BuildMI(MBB, MBBI, DL, TII->get(Opcode)).addReg(AddrReg).addImm(0); - break; - } - } - - // Transfer implicit operands. - CALL.copyImplicitOps(MI); - - // Transfer MI flags. - CALL.setMIFlags(MI.getFlags()); - - MI.eraseFromParent(); - return true; -} - class LoongArchExpandPseudo : public MachineFunctionPass { public: const LoongArchInstrInfo *TII; @@ -536,6 +288,35 @@ private: MachineBasicBlock::iterator &NextMBBI); bool expandCopyCFR(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, MachineBasicBlock::iterator &NextMBBI); + bool expandLargeAddressLoad(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI, + unsigned LastOpcode, unsigned IdentifyingMO); + bool expandLargeAddressLoad(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI, + unsigned LastOpcode, unsigned IdentifyingMO, + const MachineOperand &Symbol, Register DestReg, + bool EraseFromParent); + bool expandLoadAddressPcrelLarge(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI); + bool expandLoadAddressGotLarge(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI); + bool expandLoadAddressTLSIELarge(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI); + bool expandLoadAddressTLSLDLarge(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI); + bool expandLoadAddressTLSGDLarge(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI); + bool expandFunctionCALL(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI, + bool IsTailCall); }; char LoongArchExpandPseudo::ID = 0; @@ -570,6 +351,24 @@ bool LoongArchExpandPseudo::expandMI(MachineBasicBlock &MBB, switch (MBBI->getOpcode()) { case LoongArch::PseudoCopyCFR: return expandCopyCFR(MBB, MBBI, NextMBBI); + case LoongArch::PseudoLA_PCREL_LARGE: + return expandLoadAddressPcrelLarge(MBB, MBBI, NextMBBI); + case LoongArch::PseudoLA_GOT_LARGE: + return expandLoadAddressGotLarge(MBB, MBBI, NextMBBI); + case LoongArch::PseudoLA_TLS_IE_LARGE: + return expandLoadAddressTLSIELarge(MBB, MBBI, NextMBBI); + case LoongArch::PseudoLA_TLS_LD_LARGE: + return expandLoadAddressTLSLDLarge(MBB, MBBI, NextMBBI); + case LoongArch::PseudoLA_TLS_GD_LARGE: + return expandLoadAddressTLSGDLarge(MBB, MBBI, NextMBBI); + case LoongArch::PseudoCALL: + case LoongArch::PseudoCALL_MEDIUM: + case LoongArch::PseudoCALL_LARGE: + return expandFunctionCALL(MBB, MBBI, NextMBBI, /*IsTailCall=*/false); + case LoongArch::PseudoTAIL: + case LoongArch::PseudoTAIL_MEDIUM: + case LoongArch::PseudoTAIL_LARGE: + return expandFunctionCALL(MBB, MBBI, NextMBBI, /*IsTailCall=*/true); } return false; @@ -628,6 +427,212 @@ bool LoongArchExpandPseudo::expandCopyCFR( return true; } +bool LoongArchExpandPseudo::expandLargeAddressLoad( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI, unsigned LastOpcode, + unsigned IdentifyingMO) { + MachineInstr &MI = *MBBI; + return expandLargeAddressLoad(MBB, MBBI, NextMBBI, LastOpcode, IdentifyingMO, + MI.getOperand(2), MI.getOperand(0).getReg(), + true); +} + +bool LoongArchExpandPseudo::expandLargeAddressLoad( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI, unsigned LastOpcode, + unsigned IdentifyingMO, const MachineOperand &Symbol, Register DestReg, + bool EraseFromParent) { + // Code Sequence: + // + // Part1: pcalau12i $dst, %MO1(sym) + // Part0: addi.d $t8, $zero, %MO0(sym) + // Part2: lu32i.d $t8, %MO2(sym) + // Part3: lu52i.d $t8, $t8, %MO3(sym) + // Fin: LastOpcode $dst, $t8, $dst + + unsigned MO0, MO1, MO2, MO3; + switch (IdentifyingMO) { + default: + llvm_unreachable("unsupported identifying MO"); + case LoongArchII::MO_PCREL_LO: + MO0 = IdentifyingMO; + MO1 = LoongArchII::MO_PCREL_HI; + MO2 = LoongArchII::MO_PCREL64_LO; + MO3 = LoongArchII::MO_PCREL64_HI; + break; + case LoongArchII::MO_GOT_PC_HI: + case LoongArchII::MO_LD_PC_HI: + case LoongArchII::MO_GD_PC_HI: + // These cases relocate just like the GOT case, except for Part1. + MO0 = LoongArchII::MO_GOT_PC_LO; + MO1 = IdentifyingMO; + MO2 = LoongArchII::MO_GOT_PC64_LO; + MO3 = LoongArchII::MO_GOT_PC64_HI; + break; + case LoongArchII::MO_IE_PC_LO: + MO0 = IdentifyingMO; + MO1 = LoongArchII::MO_IE_PC_HI; + MO2 = LoongArchII::MO_IE_PC64_LO; + MO3 = LoongArchII::MO_IE_PC64_HI; + break; + } + + MachineInstr &MI = *MBBI; + DebugLoc DL = MI.getDebugLoc(); + Register ScratchReg = LoongArch::R20; // $t8 + + assert(MBB.getParent()->getSubtarget().is64Bit() && + "Large code model requires LA64"); + + auto Part1 = BuildMI(MBB, MBBI, DL, TII->get(LoongArch::PCALAU12I), DestReg); + auto Part0 = BuildMI(MBB, MBBI, DL, TII->get(LoongArch::ADDI_D), ScratchReg) + .addReg(LoongArch::R0); + auto Part2 = BuildMI(MBB, MBBI, DL, TII->get(LoongArch::LU32I_D), ScratchReg) + // "rj" is needed due to InstrInfo pattern requirement. + .addReg(ScratchReg); + auto Part3 = BuildMI(MBB, MBBI, DL, TII->get(LoongArch::LU52I_D), ScratchReg) + .addReg(ScratchReg); + BuildMI(MBB, MBBI, DL, TII->get(LastOpcode), DestReg) + .addReg(ScratchReg) + .addReg(DestReg); + + if (Symbol.getType() == MachineOperand::MO_ExternalSymbol) { + const char *SymName = Symbol.getSymbolName(); + Part0.addExternalSymbol(SymName, MO0); + Part1.addExternalSymbol(SymName, MO1); + Part2.addExternalSymbol(SymName, MO2); + Part3.addExternalSymbol(SymName, MO3); + } else { + Part0.addDisp(Symbol, 0, MO0); + Part1.addDisp(Symbol, 0, MO1); + Part2.addDisp(Symbol, 0, MO2); + Part3.addDisp(Symbol, 0, MO3); + } + + if (EraseFromParent) + MI.eraseFromParent(); + + return true; +} + +bool LoongArchExpandPseudo::expandLoadAddressPcrelLarge( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI) { + // Emit the 5-insn large address load sequence with the `%pc` family of + // relocs. + return expandLargeAddressLoad(MBB, MBBI, NextMBBI, LoongArch::ADD_D, + LoongArchII::MO_PCREL_LO); +} + +bool LoongArchExpandPseudo::expandLoadAddressGotLarge( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI) { + // Emit the 5-insn large address load sequence with the `%got_pc` family + // of relocs, loading the result from GOT with `ldx.d` in the end. + return expandLargeAddressLoad(MBB, MBBI, NextMBBI, LoongArch::LDX_D, + LoongArchII::MO_GOT_PC_HI); +} + +bool LoongArchExpandPseudo::expandLoadAddressTLSIELarge( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI) { + // Emit the 5-insn large address load sequence with the `%ie_pc` family + // of relocs, loading the result with `ldx.d` in the end. + return expandLargeAddressLoad(MBB, MBBI, NextMBBI, LoongArch::LDX_D, + LoongArchII::MO_IE_PC_LO); +} + +bool LoongArchExpandPseudo::expandLoadAddressTLSLDLarge( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI) { + // Emit the 5-insn large address load sequence with the `%got_pc` family + // of relocs, with the `pcalau12i` insn relocated with `%ld_pc_hi20`. + return expandLargeAddressLoad(MBB, MBBI, NextMBBI, LoongArch::ADD_D, + LoongArchII::MO_LD_PC_HI); +} + +bool LoongArchExpandPseudo::expandLoadAddressTLSGDLarge( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI) { + // Emit the 5-insn large address load sequence with the `%got_pc` family + // of relocs, with the `pcalau12i` insn relocated with `%gd_pc_hi20`. + return expandLargeAddressLoad(MBB, MBBI, NextMBBI, LoongArch::ADD_D, + LoongArchII::MO_GD_PC_HI); +} + +bool LoongArchExpandPseudo::expandFunctionCALL( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI, bool IsTailCall) { + MachineFunction *MF = MBB.getParent(); + MachineInstr &MI = *MBBI; + DebugLoc DL = MI.getDebugLoc(); + const MachineOperand &Func = MI.getOperand(0); + MachineInstrBuilder CALL; + unsigned Opcode; + + switch (MF->getTarget().getCodeModel()) { + default: + report_fatal_error("Unsupported code model"); + break; + case CodeModel::Small: { + // CALL: + // bl func + // TAIL: + // b func + Opcode = IsTailCall ? LoongArch::PseudoB_TAIL : LoongArch::BL; + CALL = BuildMI(MBB, MBBI, DL, TII->get(Opcode)).add(Func); + break; + } + case CodeModel::Medium: { + // CALL: + // pcaddu18i $ra, %call36(func) + // jirl $ra, $ra, 0 + // TAIL: + // pcaddu18i $t8, %call36(func) + // jr $t8 + Opcode = + IsTailCall ? LoongArch::PseudoJIRL_TAIL : LoongArch::PseudoJIRL_CALL; + Register ScratchReg = IsTailCall ? LoongArch::R20 : LoongArch::R1; + MachineInstrBuilder MIB = + BuildMI(MBB, MBBI, DL, TII->get(LoongArch::PCADDU18I), ScratchReg); + + CALL = + BuildMI(MBB, MBBI, DL, TII->get(Opcode)).addReg(ScratchReg).addImm(0); + + if (Func.isSymbol()) + MIB.addExternalSymbol(Func.getSymbolName(), LoongArchII::MO_CALL36); + else + MIB.addDisp(Func, 0, LoongArchII::MO_CALL36); + break; + } + case CodeModel::Large: { + // Emit the 5-insn large address load sequence, either directly or + // indirectly in case of going through the GOT, then JIRL_TAIL or + // JIRL_CALL to $addr. + Opcode = + IsTailCall ? LoongArch::PseudoJIRL_TAIL : LoongArch::PseudoJIRL_CALL; + Register AddrReg = IsTailCall ? LoongArch::R19 : LoongArch::R1; + + bool UseGOT = Func.isGlobal() && !Func.getGlobal()->isDSOLocal(); + unsigned MO = UseGOT ? LoongArchII::MO_GOT_PC_HI : LoongArchII::MO_PCREL_LO; + unsigned LAOpcode = UseGOT ? LoongArch::LDX_D : LoongArch::ADD_D; + expandLargeAddressLoad(MBB, MBBI, NextMBBI, LAOpcode, MO, Func, AddrReg, + false); + CALL = BuildMI(MBB, MBBI, DL, TII->get(Opcode)).addReg(AddrReg).addImm(0); + break; + } + } + + // Transfer implicit operands. + CALL.copyImplicitOps(MI); + + // Transfer MI flags. + CALL.setMIFlags(MI.getFlags()); + + MI.eraseFromParent(); + return true; +} + } // end namespace INITIALIZE_PASS(LoongArchPreRAExpandPseudo, "loongarch-prera-expand-pseudo", diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td index 65120c083f498dc85ada7fef08e66a199200838a..e27896768818cb6d7a6eedc978db92c46c5e4bf6 100644 --- a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td @@ -50,6 +50,8 @@ def FNEG_S : FP_ALU_2R<0x01141400>; def FSQRT_S : FP_ALU_2R<0x01144400>; def FRECIP_S : FP_ALU_2R<0x01145400>; def FRSQRT_S : FP_ALU_2R<0x01146400>; +def FRECIPE_S : FP_ALU_2R<0x01147400>; +def FRSQRTE_S : FP_ALU_2R<0x01148400>; def FSCALEB_S : FP_ALU_3R<0x01108000>; def FLOGB_S : FP_ALU_2R<0x01142400>; def FCOPYSIGN_S : FP_ALU_3R<0x01128000>; @@ -279,6 +281,12 @@ def : Pat<(loongarch_ftint FPR32:$src), (FTINTRZ_W_S FPR32:$src)>; // FP reciprocal operation def : Pat<(fdiv fpimm1, FPR32:$src), (FRECIP_S $src)>; +let Predicates = [HasFrecipe] in { +// FP approximate reciprocal operation +def : Pat<(int_loongarch_frecipe_s FPR32:$src), (FRECIPE_S FPR32:$src)>; +def : Pat<(int_loongarch_frsqrte_s FPR32:$src), (FRSQRTE_S FPR32:$src)>; +} + // fmadd.s: fj * fk + fa def : Pat<(fma FPR32:$fj, FPR32:$fk, FPR32:$fa), (FMADD_S $fj, $fk, $fa)>; diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td index 437c1e4d7be272525a47db4dcb9b3a560f5fb0cc..26bed67ac22215d2fce4cfde2115bf9018eb5f39 100644 --- a/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td @@ -34,6 +34,8 @@ def FNEG_D : FP_ALU_2R<0x01141800, FPR64>; def FSQRT_D : FP_ALU_2R<0x01144800, FPR64>; def FRECIP_D : FP_ALU_2R<0x01145800, FPR64>; def FRSQRT_D : FP_ALU_2R<0x01146800, FPR64>; +def FRECIPE_D : FP_ALU_2R<0x01147800, FPR64>; +def FRSQRTE_D : FP_ALU_2R<0x01148800, FPR64>; def FSCALEB_D : FP_ALU_3R<0x01110000, FPR64>; def FLOGB_D : FP_ALU_2R<0x01142800, FPR64>; def FCOPYSIGN_D : FP_ALU_3R<0x01130000, FPR64>; @@ -240,6 +242,12 @@ def : Pat<(f64 (fpextend FPR32:$src)), (FCVT_D_S FPR32:$src)>; // FP reciprocal operation def : Pat<(fdiv fpimm1, FPR64:$src), (FRECIP_D $src)>; +let Predicates = [HasFrecipe] in { +// FP approximate reciprocal operation +def : Pat<(int_loongarch_frecipe_d FPR64:$src), (FRECIPE_D FPR64:$src)>; +def : Pat<(int_loongarch_frsqrte_d FPR64:$src), (FRSQRTE_D FPR64:$src)>; +} + // fmadd.d: fj * fk + fa def : Pat<(fma FPR64:$fj, FPR64:$fk, FPR64:$fa), (FMADD_D $fj, $fk, $fa)>; diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index 4fc2b4709840bfb3ab9cfa1015d80597e6c0d7c5..618ae7056425451e5a3927b70ea2cbc200a988c1 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -247,9 +247,9 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, setOperationAction(ISD::SETCC, VT, Legal); setOperationAction(ISD::VSELECT, VT, Legal); + setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); } for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) { - setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); setOperationAction({ISD::ADD, ISD::SUB}, VT, Legal); setOperationAction({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, VT, Legal); @@ -293,9 +293,9 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, setOperationAction(ISD::SETCC, VT, Legal); setOperationAction(ISD::VSELECT, VT, Legal); + setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); } for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) { - setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); setOperationAction({ISD::ADD, ISD::SUB}, VT, Legal); setOperationAction({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, VT, Legal); @@ -422,9 +422,926 @@ SDValue LoongArchTargetLowering::LowerOperation(SDValue Op, return SDValue(); } +/// Determine whether a range fits a regular pattern of values. +/// This function accounts for the possibility of jumping over the End iterator. +template +static bool +fitsRegularPattern(typename SmallVectorImpl::const_iterator Begin, + unsigned CheckStride, + typename SmallVectorImpl::const_iterator End, + ValType ExpectedIndex, unsigned ExpectedIndexStride) { + auto &I = Begin; + + while (I != End) { + if (*I != -1 && *I != ExpectedIndex) + return false; + ExpectedIndex += ExpectedIndexStride; + + // Incrementing past End is undefined behaviour so we must increment one + // step at a time and check for End at each step. + for (unsigned n = 0; n < CheckStride && I != End; ++n, ++I) + ; // Empty loop body. + } + return true; +} + +/// Lower VECTOR_SHUFFLE into VREPLVEI (if possible). +/// +/// VREPLVEI performs vector broadcast based on an element specified by an +/// integer immediate, with its mask being similar to: +/// +/// where x is any valid index. +/// +/// When undef's appear in the mask they are treated as if they were whatever +/// value is necessary in order to fit the above form. +static SDValue lowerVECTOR_SHUFFLE_VREPLVEI(const SDLoc &DL, ArrayRef Mask, + MVT VT, SDValue V1, SDValue V2, + SelectionDAG &DAG) { + int SplatIndex = -1; + for (const auto &M : Mask) { + if (M != -1) { + SplatIndex = M; + break; + } + } + + if (SplatIndex == -1) + return DAG.getUNDEF(VT); + + assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index"); + if (fitsRegularPattern(Mask.begin(), 1, Mask.end(), SplatIndex, 0)) { + APInt Imm(64, SplatIndex); + return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1, + DAG.getConstant(Imm, DL, MVT::i64)); + } + + return SDValue(); +} + +/// Lower VECTOR_SHUFFLE into VSHUF4I (if possible). +/// +/// VSHUF4I splits the vector into blocks of four elements, then shuffles these +/// elements according to a <4 x i2> constant (encoded as an integer immediate). +/// +/// It is therefore possible to lower into VSHUF4I when the mask takes the form: +/// +/// When undef's appear they are treated as if they were whatever value is +/// necessary in order to fit the above forms. +/// +/// For example: +/// %2 = shufflevector <8 x i16> %0, <8 x i16> undef, +/// <8 x i32> +/// is lowered to: +/// (VSHUF4I_H $v0, $v1, 27) +/// where the 27 comes from: +/// 3 + (2 << 2) + (1 << 4) + (0 << 6) +static SDValue lowerVECTOR_SHUFFLE_VSHUF4I(const SDLoc &DL, ArrayRef Mask, + MVT VT, SDValue V1, SDValue V2, + SelectionDAG &DAG) { + + // When the size is less than 4, lower cost instructions may be used. + if (Mask.size() < 4) + return SDValue(); + + int SubMask[4] = {-1, -1, -1, -1}; + for (unsigned i = 0; i < 4; ++i) { + for (unsigned j = i; j < Mask.size(); j += 4) { + int Idx = Mask[j]; + + // Convert from vector index to 4-element subvector index + // If an index refers to an element outside of the subvector then give up + if (Idx != -1) { + Idx -= 4 * (j / 4); + if (Idx < 0 || Idx >= 4) + return SDValue(); + } + + // If the mask has an undef, replace it with the current index. + // Note that it might still be undef if the current index is also undef + if (SubMask[i] == -1) + SubMask[i] = Idx; + // Check that non-undef values are the same as in the mask. If they + // aren't then give up + else if (Idx != -1 && Idx != SubMask[i]) + return SDValue(); + } + } + + // Calculate the immediate. Replace any remaining undefs with zero + APInt Imm(64, 0); + for (int i = 3; i >= 0; --i) { + int Idx = SubMask[i]; + + if (Idx == -1) + Idx = 0; + + Imm <<= 2; + Imm |= Idx & 0x3; + } + + return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT, V1, + DAG.getConstant(Imm, DL, MVT::i64)); +} + +/// Lower VECTOR_SHUFFLE into VPACKEV (if possible). +/// +/// VPACKEV interleaves the even elements from each vector. +/// +/// It is possible to lower into VPACKEV when the mask consists of two of the +/// following forms interleaved: +/// <0, 2, 4, ...> +/// +/// where n is the number of elements in the vector. +/// For example: +/// <0, 0, 2, 2, 4, 4, ...> +/// <0, n, 2, n+2, 4, n+4, ...> +/// +/// When undef's appear in the mask they are treated as if they were whatever +/// value is necessary in order to fit the above forms. +static SDValue lowerVECTOR_SHUFFLE_VPACKEV(const SDLoc &DL, ArrayRef Mask, + MVT VT, SDValue V1, SDValue V2, + SelectionDAG &DAG) { + + const auto &Begin = Mask.begin(); + const auto &End = Mask.end(); + SDValue OriV1 = V1, OriV2 = V2; + + if (fitsRegularPattern(Begin, 2, End, 0, 2)) + V1 = OriV1; + else if (fitsRegularPattern(Begin, 2, End, Mask.size(), 2)) + V1 = OriV2; + else + return SDValue(); + + if (fitsRegularPattern(Begin + 1, 2, End, 0, 2)) + V2 = OriV1; + else if (fitsRegularPattern(Begin + 1, 2, End, Mask.size(), 2)) + V2 = OriV2; + else + return SDValue(); + + return DAG.getNode(LoongArchISD::VPACKEV, DL, VT, V2, V1); +} + +/// Lower VECTOR_SHUFFLE into VPACKOD (if possible). +/// +/// VPACKOD interleaves the odd elements from each vector. +/// +/// It is possible to lower into VPACKOD when the mask consists of two of the +/// following forms interleaved: +/// <1, 3, 5, ...> +/// +/// where n is the number of elements in the vector. +/// For example: +/// <1, 1, 3, 3, 5, 5, ...> +/// <1, n+1, 3, n+3, 5, n+5, ...> +/// +/// When undef's appear in the mask they are treated as if they were whatever +/// value is necessary in order to fit the above forms. +static SDValue lowerVECTOR_SHUFFLE_VPACKOD(const SDLoc &DL, ArrayRef Mask, + MVT VT, SDValue V1, SDValue V2, + SelectionDAG &DAG) { + + const auto &Begin = Mask.begin(); + const auto &End = Mask.end(); + SDValue OriV1 = V1, OriV2 = V2; + + if (fitsRegularPattern(Begin, 2, End, 1, 2)) + V1 = OriV1; + else if (fitsRegularPattern(Begin, 2, End, Mask.size() + 1, 2)) + V1 = OriV2; + else + return SDValue(); + + if (fitsRegularPattern(Begin + 1, 2, End, 1, 2)) + V2 = OriV1; + else if (fitsRegularPattern(Begin + 1, 2, End, Mask.size() + 1, 2)) + V2 = OriV2; + else + return SDValue(); + + return DAG.getNode(LoongArchISD::VPACKOD, DL, VT, V2, V1); +} + +/// Lower VECTOR_SHUFFLE into VILVH (if possible). +/// +/// VILVH interleaves consecutive elements from the left (highest-indexed) half +/// of each vector. +/// +/// It is possible to lower into VILVH when the mask consists of two of the +/// following forms interleaved: +/// +/// +/// where n is the number of elements in the vector and x is half n. +/// For example: +/// +/// +/// +/// When undef's appear in the mask they are treated as if they were whatever +/// value is necessary in order to fit the above forms. +static SDValue lowerVECTOR_SHUFFLE_VILVH(const SDLoc &DL, ArrayRef Mask, + MVT VT, SDValue V1, SDValue V2, + SelectionDAG &DAG) { + + const auto &Begin = Mask.begin(); + const auto &End = Mask.end(); + unsigned HalfSize = Mask.size() / 2; + SDValue OriV1 = V1, OriV2 = V2; + + if (fitsRegularPattern(Begin, 2, End, HalfSize, 1)) + V1 = OriV1; + else if (fitsRegularPattern(Begin, 2, End, Mask.size() + HalfSize, 1)) + V1 = OriV2; + else + return SDValue(); + + if (fitsRegularPattern(Begin + 1, 2, End, HalfSize, 1)) + V2 = OriV1; + else if (fitsRegularPattern(Begin + 1, 2, End, Mask.size() + HalfSize, + 1)) + V2 = OriV2; + else + return SDValue(); + + return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1); +} + +/// Lower VECTOR_SHUFFLE into VILVL (if possible). +/// +/// VILVL interleaves consecutive elements from the right (lowest-indexed) half +/// of each vector. +/// +/// It is possible to lower into VILVL when the mask consists of two of the +/// following forms interleaved: +/// <0, 1, 2, ...> +/// +/// where n is the number of elements in the vector. +/// For example: +/// <0, 0, 1, 1, 2, 2, ...> +/// <0, n, 1, n+1, 2, n+2, ...> +/// +/// When undef's appear in the mask they are treated as if they were whatever +/// value is necessary in order to fit the above forms. +static SDValue lowerVECTOR_SHUFFLE_VILVL(const SDLoc &DL, ArrayRef Mask, + MVT VT, SDValue V1, SDValue V2, + SelectionDAG &DAG) { + + const auto &Begin = Mask.begin(); + const auto &End = Mask.end(); + SDValue OriV1 = V1, OriV2 = V2; + + if (fitsRegularPattern(Begin, 2, End, 0, 1)) + V1 = OriV1; + else if (fitsRegularPattern(Begin, 2, End, Mask.size(), 1)) + V1 = OriV2; + else + return SDValue(); + + if (fitsRegularPattern(Begin + 1, 2, End, 0, 1)) + V2 = OriV1; + else if (fitsRegularPattern(Begin + 1, 2, End, Mask.size(), 1)) + V2 = OriV2; + else + return SDValue(); + + return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1); +} + +/// Lower VECTOR_SHUFFLE into VPICKEV (if possible). +/// +/// VPICKEV copies the even elements of each vector into the result vector. +/// +/// It is possible to lower into VPICKEV when the mask consists of two of the +/// following forms concatenated: +/// <0, 2, 4, ...> +/// +/// where n is the number of elements in the vector. +/// For example: +/// <0, 2, 4, ..., 0, 2, 4, ...> +/// <0, 2, 4, ..., n, n+2, n+4, ...> +/// +/// When undef's appear in the mask they are treated as if they were whatever +/// value is necessary in order to fit the above forms. +static SDValue lowerVECTOR_SHUFFLE_VPICKEV(const SDLoc &DL, ArrayRef Mask, + MVT VT, SDValue V1, SDValue V2, + SelectionDAG &DAG) { + + const auto &Begin = Mask.begin(); + const auto &Mid = Mask.begin() + Mask.size() / 2; + const auto &End = Mask.end(); + SDValue OriV1 = V1, OriV2 = V2; + + if (fitsRegularPattern(Begin, 1, Mid, 0, 2)) + V1 = OriV1; + else if (fitsRegularPattern(Begin, 1, Mid, Mask.size(), 2)) + V1 = OriV2; + else + return SDValue(); + + if (fitsRegularPattern(Mid, 1, End, 0, 2)) + V2 = OriV1; + else if (fitsRegularPattern(Mid, 1, End, Mask.size(), 2)) + V2 = OriV2; + + else + return SDValue(); + + return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1); +} + +/// Lower VECTOR_SHUFFLE into VPICKOD (if possible). +/// +/// VPICKOD copies the odd elements of each vector into the result vector. +/// +/// It is possible to lower into VPICKOD when the mask consists of two of the +/// following forms concatenated: +/// <1, 3, 5, ...> +/// +/// where n is the number of elements in the vector. +/// For example: +/// <1, 3, 5, ..., 1, 3, 5, ...> +/// <1, 3, 5, ..., n+1, n+3, n+5, ...> +/// +/// When undef's appear in the mask they are treated as if they were whatever +/// value is necessary in order to fit the above forms. +static SDValue lowerVECTOR_SHUFFLE_VPICKOD(const SDLoc &DL, ArrayRef Mask, + MVT VT, SDValue V1, SDValue V2, + SelectionDAG &DAG) { + + const auto &Begin = Mask.begin(); + const auto &Mid = Mask.begin() + Mask.size() / 2; + const auto &End = Mask.end(); + SDValue OriV1 = V1, OriV2 = V2; + + if (fitsRegularPattern(Begin, 1, Mid, 1, 2)) + V1 = OriV1; + else if (fitsRegularPattern(Begin, 1, Mid, Mask.size() + 1, 2)) + V1 = OriV2; + else + return SDValue(); + + if (fitsRegularPattern(Mid, 1, End, 1, 2)) + V2 = OriV1; + else if (fitsRegularPattern(Mid, 1, End, Mask.size() + 1, 2)) + V2 = OriV2; + else + return SDValue(); + + return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1); +} + +/// Lower VECTOR_SHUFFLE into VSHUF. +/// +/// This mostly consists of converting the shuffle mask into a BUILD_VECTOR and +/// adding it as an operand to the resulting VSHUF. +static SDValue lowerVECTOR_SHUFFLE_VSHUF(const SDLoc &DL, ArrayRef Mask, + MVT VT, SDValue V1, SDValue V2, + SelectionDAG &DAG) { + + SmallVector Ops; + for (auto M : Mask) + Ops.push_back(DAG.getConstant(M, DL, MVT::i64)); + + EVT MaskVecTy = VT.changeVectorElementTypeToInteger(); + SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, Ops); + + // VECTOR_SHUFFLE concatenates the vectors in an vectorwise fashion. + // <0b00, 0b01> + <0b10, 0b11> -> <0b00, 0b01, 0b10, 0b11> + // VSHF concatenates the vectors in a bitwise fashion: + // <0b00, 0b01> + <0b10, 0b11> -> + // 0b0100 + 0b1110 -> 0b01001110 + // <0b10, 0b11, 0b00, 0b01> + // We must therefore swap the operands to get the correct result. + return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1); +} + +/// Dispatching routine to lower various 128-bit LoongArch vector shuffles. +/// +/// This routine breaks down the specific type of 128-bit shuffle and +/// dispatches to the lowering routines accordingly. +static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef Mask, MVT VT, + SDValue V1, SDValue V2, SelectionDAG &DAG) { + assert((VT.SimpleTy == MVT::v16i8 || VT.SimpleTy == MVT::v8i16 || + VT.SimpleTy == MVT::v4i32 || VT.SimpleTy == MVT::v2i64 || + VT.SimpleTy == MVT::v4f32 || VT.SimpleTy == MVT::v2f64) && + "Vector type is unsupported for lsx!"); + assert(V1.getSimpleValueType() == V2.getSimpleValueType() && + "Two operands have different types!"); + assert(VT.getVectorNumElements() == Mask.size() && + "Unexpected mask size for shuffle!"); + assert(Mask.size() % 2 == 0 && "Expected even mask size."); + + SDValue Result; + // TODO: Add more comparison patterns. + if (V2.isUndef()) { + if ((Result = lowerVECTOR_SHUFFLE_VREPLVEI(DL, Mask, VT, V1, V2, DAG))) + return Result; + if ((Result = lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG))) + return Result; + + // TODO: This comment may be enabled in the future to better match the + // pattern for instruction selection. + /* V2 = V1; */ + } + + // It is recommended not to change the pattern comparison order for better + // performance. + if ((Result = lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG))) + return Result; + if ((Result = lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG))) + return Result; + if ((Result = lowerVECTOR_SHUFFLE_VILVH(DL, Mask, VT, V1, V2, DAG))) + return Result; + if ((Result = lowerVECTOR_SHUFFLE_VILVL(DL, Mask, VT, V1, V2, DAG))) + return Result; + if ((Result = lowerVECTOR_SHUFFLE_VPICKEV(DL, Mask, VT, V1, V2, DAG))) + return Result; + if ((Result = lowerVECTOR_SHUFFLE_VPICKOD(DL, Mask, VT, V1, V2, DAG))) + return Result; + if ((Result = lowerVECTOR_SHUFFLE_VSHUF(DL, Mask, VT, V1, V2, DAG))) + return Result; + + return SDValue(); +} + +/// Lower VECTOR_SHUFFLE into XVREPLVEI (if possible). +/// +/// It is a XVREPLVEI when the mask is: +/// +/// where the number of x is equal to n and n is half the length of vector. +/// +/// When undef's appear in the mask they are treated as if they were whatever +/// value is necessary in order to fit the above form. +static SDValue lowerVECTOR_SHUFFLE_XVREPLVEI(const SDLoc &DL, + ArrayRef Mask, MVT VT, + SDValue V1, SDValue V2, + SelectionDAG &DAG) { + int SplatIndex = -1; + for (const auto &M : Mask) { + if (M != -1) { + SplatIndex = M; + break; + } + } + + if (SplatIndex == -1) + return DAG.getUNDEF(VT); + + const auto &Begin = Mask.begin(); + const auto &End = Mask.end(); + unsigned HalfSize = Mask.size() / 2; + + assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index"); + if (fitsRegularPattern(Begin, 1, End - HalfSize, SplatIndex, 0) && + fitsRegularPattern(Begin + HalfSize, 1, End, SplatIndex + HalfSize, + 0)) { + APInt Imm(64, SplatIndex); + return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1, + DAG.getConstant(Imm, DL, MVT::i64)); + } + + return SDValue(); +} + +/// Lower VECTOR_SHUFFLE into XVSHUF4I (if possible). +static SDValue lowerVECTOR_SHUFFLE_XVSHUF4I(const SDLoc &DL, ArrayRef Mask, + MVT VT, SDValue V1, SDValue V2, + SelectionDAG &DAG) { + // When the size is less than or equal to 4, lower cost instructions may be + // used. + if (Mask.size() <= 4) + return SDValue(); + return lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG); +} + +/// Lower VECTOR_SHUFFLE into XVPACKEV (if possible). +static SDValue lowerVECTOR_SHUFFLE_XVPACKEV(const SDLoc &DL, ArrayRef Mask, + MVT VT, SDValue V1, SDValue V2, + SelectionDAG &DAG) { + return lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG); +} + +/// Lower VECTOR_SHUFFLE into XVPACKOD (if possible). +static SDValue lowerVECTOR_SHUFFLE_XVPACKOD(const SDLoc &DL, ArrayRef Mask, + MVT VT, SDValue V1, SDValue V2, + SelectionDAG &DAG) { + return lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG); +} + +/// Lower VECTOR_SHUFFLE into XVILVH (if possible). +static SDValue lowerVECTOR_SHUFFLE_XVILVH(const SDLoc &DL, ArrayRef Mask, + MVT VT, SDValue V1, SDValue V2, + SelectionDAG &DAG) { + + const auto &Begin = Mask.begin(); + const auto &End = Mask.end(); + unsigned HalfSize = Mask.size() / 2; + unsigned LeftSize = HalfSize / 2; + SDValue OriV1 = V1, OriV2 = V2; + + if (fitsRegularPattern(Begin, 2, End - HalfSize, HalfSize - LeftSize, + 1) && + fitsRegularPattern(Begin + HalfSize, 2, End, HalfSize + LeftSize, 1)) + V1 = OriV1; + else if (fitsRegularPattern(Begin, 2, End - HalfSize, + Mask.size() + HalfSize - LeftSize, 1) && + fitsRegularPattern(Begin + HalfSize, 2, End, + Mask.size() + HalfSize + LeftSize, 1)) + V1 = OriV2; + else + return SDValue(); + + if (fitsRegularPattern(Begin + 1, 2, End - HalfSize, HalfSize - LeftSize, + 1) && + fitsRegularPattern(Begin + 1 + HalfSize, 2, End, HalfSize + LeftSize, + 1)) + V2 = OriV1; + else if (fitsRegularPattern(Begin + 1, 2, End - HalfSize, + Mask.size() + HalfSize - LeftSize, 1) && + fitsRegularPattern(Begin + 1 + HalfSize, 2, End, + Mask.size() + HalfSize + LeftSize, 1)) + V2 = OriV2; + else + return SDValue(); + + return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1); +} + +/// Lower VECTOR_SHUFFLE into XVILVL (if possible). +static SDValue lowerVECTOR_SHUFFLE_XVILVL(const SDLoc &DL, ArrayRef Mask, + MVT VT, SDValue V1, SDValue V2, + SelectionDAG &DAG) { + + const auto &Begin = Mask.begin(); + const auto &End = Mask.end(); + unsigned HalfSize = Mask.size() / 2; + SDValue OriV1 = V1, OriV2 = V2; + + if (fitsRegularPattern(Begin, 2, End - HalfSize, 0, 1) && + fitsRegularPattern(Begin + HalfSize, 2, End, HalfSize, 1)) + V1 = OriV1; + else if (fitsRegularPattern(Begin, 2, End - HalfSize, Mask.size(), 1) && + fitsRegularPattern(Begin + HalfSize, 2, End, + Mask.size() + HalfSize, 1)) + V1 = OriV2; + else + return SDValue(); + + if (fitsRegularPattern(Begin + 1, 2, End - HalfSize, 0, 1) && + fitsRegularPattern(Begin + 1 + HalfSize, 2, End, HalfSize, 1)) + V2 = OriV1; + else if (fitsRegularPattern(Begin + 1, 2, End - HalfSize, Mask.size(), + 1) && + fitsRegularPattern(Begin + 1 + HalfSize, 2, End, + Mask.size() + HalfSize, 1)) + V2 = OriV2; + else + return SDValue(); + + return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1); +} + +/// Lower VECTOR_SHUFFLE into XVPICKEV (if possible). +static SDValue lowerVECTOR_SHUFFLE_XVPICKEV(const SDLoc &DL, ArrayRef Mask, + MVT VT, SDValue V1, SDValue V2, + SelectionDAG &DAG) { + + const auto &Begin = Mask.begin(); + const auto &LeftMid = Mask.begin() + Mask.size() / 4; + const auto &Mid = Mask.begin() + Mask.size() / 2; + const auto &RightMid = Mask.end() - Mask.size() / 4; + const auto &End = Mask.end(); + unsigned HalfSize = Mask.size() / 2; + SDValue OriV1 = V1, OriV2 = V2; + + if (fitsRegularPattern(Begin, 1, LeftMid, 0, 2) && + fitsRegularPattern(Mid, 1, RightMid, HalfSize, 2)) + V1 = OriV1; + else if (fitsRegularPattern(Begin, 1, LeftMid, Mask.size(), 2) && + fitsRegularPattern(Mid, 1, RightMid, Mask.size() + HalfSize, 2)) + V1 = OriV2; + else + return SDValue(); + + if (fitsRegularPattern(LeftMid, 1, Mid, 0, 2) && + fitsRegularPattern(RightMid, 1, End, HalfSize, 2)) + V2 = OriV1; + else if (fitsRegularPattern(LeftMid, 1, Mid, Mask.size(), 2) && + fitsRegularPattern(RightMid, 1, End, Mask.size() + HalfSize, 2)) + V2 = OriV2; + + else + return SDValue(); + + return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1); +} + +/// Lower VECTOR_SHUFFLE into XVPICKOD (if possible). +static SDValue lowerVECTOR_SHUFFLE_XVPICKOD(const SDLoc &DL, ArrayRef Mask, + MVT VT, SDValue V1, SDValue V2, + SelectionDAG &DAG) { + + const auto &Begin = Mask.begin(); + const auto &LeftMid = Mask.begin() + Mask.size() / 4; + const auto &Mid = Mask.begin() + Mask.size() / 2; + const auto &RightMid = Mask.end() - Mask.size() / 4; + const auto &End = Mask.end(); + unsigned HalfSize = Mask.size() / 2; + SDValue OriV1 = V1, OriV2 = V2; + + if (fitsRegularPattern(Begin, 1, LeftMid, 1, 2) && + fitsRegularPattern(Mid, 1, RightMid, HalfSize + 1, 2)) + V1 = OriV1; + else if (fitsRegularPattern(Begin, 1, LeftMid, Mask.size() + 1, 2) && + fitsRegularPattern(Mid, 1, RightMid, Mask.size() + HalfSize + 1, + 2)) + V1 = OriV2; + else + return SDValue(); + + if (fitsRegularPattern(LeftMid, 1, Mid, 1, 2) && + fitsRegularPattern(RightMid, 1, End, HalfSize + 1, 2)) + V2 = OriV1; + else if (fitsRegularPattern(LeftMid, 1, Mid, Mask.size() + 1, 2) && + fitsRegularPattern(RightMid, 1, End, Mask.size() + HalfSize + 1, + 2)) + V2 = OriV2; + else + return SDValue(); + + return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1); +} + +/// Lower VECTOR_SHUFFLE into XVSHUF (if possible). +static SDValue lowerVECTOR_SHUFFLE_XVSHUF(const SDLoc &DL, ArrayRef Mask, + MVT VT, SDValue V1, SDValue V2, + SelectionDAG &DAG) { + + int MaskSize = Mask.size(); + int HalfSize = Mask.size() / 2; + const auto &Begin = Mask.begin(); + const auto &Mid = Mask.begin() + HalfSize; + const auto &End = Mask.end(); + + // VECTOR_SHUFFLE concatenates the vectors: + // <0, 1, 2, 3, 4, 5, 6, 7> + <8, 9, 10, 11, 12, 13, 14, 15> + // shuffling -> + // <0, 1, 2, 3, 8, 9, 10, 11> <4, 5, 6, 7, 12, 13, 14, 15> + // + // XVSHUF concatenates the vectors: + // + + // shuffling -> + // + + SmallVector MaskAlloc; + for (auto it = Begin; it < Mid; it++) { + if (*it < 0) // UNDEF + MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64)); + else if ((*it >= 0 && *it < HalfSize) || + (*it >= MaskSize && *it <= MaskSize + HalfSize)) { + int M = *it < HalfSize ? *it : *it - HalfSize; + MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64)); + } else + return SDValue(); + } + assert((int)MaskAlloc.size() == HalfSize && "xvshuf convert failed!"); + + for (auto it = Mid; it < End; it++) { + if (*it < 0) // UNDEF + MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64)); + else if ((*it >= HalfSize && *it < MaskSize) || + (*it >= MaskSize + HalfSize && *it < MaskSize * 2)) { + int M = *it < MaskSize ? *it - HalfSize : *it - MaskSize; + MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64)); + } else + return SDValue(); + } + assert((int)MaskAlloc.size() == MaskSize && "xvshuf convert failed!"); + + EVT MaskVecTy = VT.changeVectorElementTypeToInteger(); + SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, MaskAlloc); + return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1); +} + +/// Shuffle vectors by lane to generate more optimized instructions. +/// 256-bit shuffles are always considered as 2-lane 128-bit shuffles. +/// +/// Therefore, except for the following four cases, other cases are regarded +/// as cross-lane shuffles, where optimization is relatively limited. +/// +/// - Shuffle high, low lanes of two inputs vector +/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 3, 6> +/// - Shuffle low, high lanes of two inputs vector +/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 0, 5> +/// - Shuffle low, low lanes of two inputs vector +/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 3, 6> +/// - Shuffle high, high lanes of two inputs vector +/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 0, 5> +/// +/// The first case is the closest to LoongArch instructions and the other +/// cases need to be converted to it for processing. +/// +/// This function may modify V1, V2 and Mask +static void canonicalizeShuffleVectorByLane(const SDLoc &DL, + MutableArrayRef Mask, MVT VT, + SDValue &V1, SDValue &V2, + SelectionDAG &DAG) { + + enum HalfMaskType { HighLaneTy, LowLaneTy, None }; + + int MaskSize = Mask.size(); + int HalfSize = Mask.size() / 2; + + HalfMaskType preMask = None, postMask = None; + + if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) { + return M < 0 || (M >= 0 && M < HalfSize) || + (M >= MaskSize && M < MaskSize + HalfSize); + })) + preMask = HighLaneTy; + else if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) { + return M < 0 || (M >= HalfSize && M < MaskSize) || + (M >= MaskSize + HalfSize && M < MaskSize * 2); + })) + preMask = LowLaneTy; + + if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) { + return M < 0 || (M >= 0 && M < HalfSize) || + (M >= MaskSize && M < MaskSize + HalfSize); + })) + postMask = HighLaneTy; + else if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) { + return M < 0 || (M >= HalfSize && M < MaskSize) || + (M >= MaskSize + HalfSize && M < MaskSize * 2); + })) + postMask = LowLaneTy; + + // The pre-half of mask is high lane type, and the post-half of mask + // is low lane type, which is closest to the LoongArch instructions. + // + // Note: In the LoongArch architecture, the high lane of mask corresponds + // to the lower 128-bit of vector register, and the low lane of mask + // corresponds the higher 128-bit of vector register. + if (preMask == HighLaneTy && postMask == LowLaneTy) { + return; + } + if (preMask == LowLaneTy && postMask == HighLaneTy) { + V1 = DAG.getBitcast(MVT::v4i64, V1); + V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1, + DAG.getConstant(0b01001110, DL, MVT::i64)); + V1 = DAG.getBitcast(VT, V1); + + if (!V2.isUndef()) { + V2 = DAG.getBitcast(MVT::v4i64, V2); + V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2, + DAG.getConstant(0b01001110, DL, MVT::i64)); + V2 = DAG.getBitcast(VT, V2); + } + + for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) { + *it = *it < 0 ? *it : *it - HalfSize; + } + for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) { + *it = *it < 0 ? *it : *it + HalfSize; + } + } else if (preMask == LowLaneTy && postMask == LowLaneTy) { + V1 = DAG.getBitcast(MVT::v4i64, V1); + V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1, + DAG.getConstant(0b11101110, DL, MVT::i64)); + V1 = DAG.getBitcast(VT, V1); + + if (!V2.isUndef()) { + V2 = DAG.getBitcast(MVT::v4i64, V2); + V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2, + DAG.getConstant(0b11101110, DL, MVT::i64)); + V2 = DAG.getBitcast(VT, V2); + } + + for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) { + *it = *it < 0 ? *it : *it - HalfSize; + } + } else if (preMask == HighLaneTy && postMask == HighLaneTy) { + V1 = DAG.getBitcast(MVT::v4i64, V1); + V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1, + DAG.getConstant(0b01000100, DL, MVT::i64)); + V1 = DAG.getBitcast(VT, V1); + + if (!V2.isUndef()) { + V2 = DAG.getBitcast(MVT::v4i64, V2); + V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2, + DAG.getConstant(0b01000100, DL, MVT::i64)); + V2 = DAG.getBitcast(VT, V2); + } + + for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) { + *it = *it < 0 ? *it : *it + HalfSize; + } + } else { // cross-lane + return; + } +} + +/// Dispatching routine to lower various 256-bit LoongArch vector shuffles. +/// +/// This routine breaks down the specific type of 256-bit shuffle and +/// dispatches to the lowering routines accordingly. +static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef Mask, MVT VT, + SDValue V1, SDValue V2, SelectionDAG &DAG) { + assert((VT.SimpleTy == MVT::v32i8 || VT.SimpleTy == MVT::v16i16 || + VT.SimpleTy == MVT::v8i32 || VT.SimpleTy == MVT::v4i64 || + VT.SimpleTy == MVT::v8f32 || VT.SimpleTy == MVT::v4f64) && + "Vector type is unsupported for lasx!"); + assert(V1.getSimpleValueType() == V2.getSimpleValueType() && + "Two operands have different types!"); + assert(VT.getVectorNumElements() == Mask.size() && + "Unexpected mask size for shuffle!"); + assert(Mask.size() % 2 == 0 && "Expected even mask size."); + assert(Mask.size() >= 4 && "Mask size is less than 4."); + + // canonicalize non cross-lane shuffle vector + SmallVector NewMask(Mask); + canonicalizeShuffleVectorByLane(DL, NewMask, VT, V1, V2, DAG); + + SDValue Result; + // TODO: Add more comparison patterns. + if (V2.isUndef()) { + if ((Result = lowerVECTOR_SHUFFLE_XVREPLVEI(DL, NewMask, VT, V1, V2, DAG))) + return Result; + if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, NewMask, VT, V1, V2, DAG))) + return Result; + + // TODO: This comment may be enabled in the future to better match the + // pattern for instruction selection. + /* V2 = V1; */ + } + + // It is recommended not to change the pattern comparison order for better + // performance. + if ((Result = lowerVECTOR_SHUFFLE_XVPACKEV(DL, NewMask, VT, V1, V2, DAG))) + return Result; + if ((Result = lowerVECTOR_SHUFFLE_XVPACKOD(DL, NewMask, VT, V1, V2, DAG))) + return Result; + if ((Result = lowerVECTOR_SHUFFLE_XVILVH(DL, NewMask, VT, V1, V2, DAG))) + return Result; + if ((Result = lowerVECTOR_SHUFFLE_XVILVL(DL, NewMask, VT, V1, V2, DAG))) + return Result; + if ((Result = lowerVECTOR_SHUFFLE_XVPICKEV(DL, NewMask, VT, V1, V2, DAG))) + return Result; + if ((Result = lowerVECTOR_SHUFFLE_XVPICKOD(DL, NewMask, VT, V1, V2, DAG))) + return Result; + if ((Result = lowerVECTOR_SHUFFLE_XVSHUF(DL, NewMask, VT, V1, V2, DAG))) + return Result; + + return SDValue(); +} + SDValue LoongArchTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { - // TODO: custom shuffle. + ShuffleVectorSDNode *SVOp = cast(Op); + ArrayRef OrigMask = SVOp->getMask(); + SDValue V1 = Op.getOperand(0); + SDValue V2 = Op.getOperand(1); + MVT VT = Op.getSimpleValueType(); + int NumElements = VT.getVectorNumElements(); + SDLoc DL(Op); + + bool V1IsUndef = V1.isUndef(); + bool V2IsUndef = V2.isUndef(); + if (V1IsUndef && V2IsUndef) + return DAG.getUNDEF(VT); + + // When we create a shuffle node we put the UNDEF node to second operand, + // but in some cases the first operand may be transformed to UNDEF. + // In this case we should just commute the node. + if (V1IsUndef) + return DAG.getCommutedVectorShuffle(*SVOp); + + // Check for non-undef masks pointing at an undef vector and make the masks + // undef as well. This makes it easier to match the shuffle based solely on + // the mask. + if (V2IsUndef && + any_of(OrigMask, [NumElements](int M) { return M >= NumElements; })) { + SmallVector NewMask(OrigMask); + for (int &M : NewMask) + if (M >= NumElements) + M = -1; + return DAG.getVectorShuffle(VT, DL, V1, V2, NewMask); + } + + // Check for illegal shuffle mask element index values. + int MaskUpperLimit = OrigMask.size() * (V2IsUndef ? 1 : 2); + (void)MaskUpperLimit; + assert(llvm::all_of(OrigMask, + [&](int M) { return -1 <= M && M < MaskUpperLimit; }) && + "Out of bounds shuffle index"); + + // For each vector width, delegate to a specialized lowering routine. + if (VT.is128BitVector()) + return lower128BitShuffle(DL, OrigMask, VT, V1, V2, DAG); + + if (VT.is256BitVector()) + return lower256BitShuffle(DL, OrigMask, VT, V1, V2, DAG); + return SDValue(); } @@ -3389,8 +4306,12 @@ const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const { // TODO: Add more target-dependent nodes later. NODE_NAME_CASE(CALL) + NODE_NAME_CASE(CALL_MEDIUM) + NODE_NAME_CASE(CALL_LARGE) NODE_NAME_CASE(RET) NODE_NAME_CASE(TAIL) + NODE_NAME_CASE(TAIL_MEDIUM) + NODE_NAME_CASE(TAIL_LARGE) NODE_NAME_CASE(SLL_W) NODE_NAME_CASE(SRA_W) NODE_NAME_CASE(SRL_W) @@ -3435,6 +4356,16 @@ const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(MOVFCSR2GR) NODE_NAME_CASE(CACOP_D) NODE_NAME_CASE(CACOP_W) + NODE_NAME_CASE(VSHUF) + NODE_NAME_CASE(VPICKEV) + NODE_NAME_CASE(VPICKOD) + NODE_NAME_CASE(VPACKEV) + NODE_NAME_CASE(VPACKOD) + NODE_NAME_CASE(VILVL) + NODE_NAME_CASE(VILVH) + NODE_NAME_CASE(VSHUF4I) + NODE_NAME_CASE(VREPLVEI) + NODE_NAME_CASE(XVPERMI) NODE_NAME_CASE(VPICK_SEXT_ELT) NODE_NAME_CASE(VPICK_ZEXT_ELT) NODE_NAME_CASE(VREPLVE) @@ -4248,15 +5179,31 @@ LoongArchTargetLowering::LowerCall(CallLoweringInfo &CLI, // Emit the call. SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + unsigned Op; + switch (DAG.getTarget().getCodeModel()) { + default: + report_fatal_error("Unsupported code model"); + case CodeModel::Small: + Op = IsTailCall ? LoongArchISD::TAIL : LoongArchISD::CALL; + break; + case CodeModel::Medium: + assert(Subtarget.is64Bit() && "Medium code model requires LA64"); + Op = IsTailCall ? LoongArchISD::TAIL_MEDIUM : LoongArchISD::CALL_MEDIUM; + break; + case CodeModel::Large: + assert(Subtarget.is64Bit() && "Large code model requires LA64"); + Op = IsTailCall ? LoongArchISD::TAIL_LARGE : LoongArchISD::CALL_LARGE; + break; + } if (IsTailCall) { MF.getFrameInfo().setHasTailCall(); - SDValue Ret = DAG.getNode(LoongArchISD::TAIL, DL, NodeTys, Ops); + SDValue Ret = DAG.getNode(Op, DL, NodeTys, Ops); DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge); return Ret; } - Chain = DAG.getNode(LoongArchISD::CALL, DL, NodeTys, Ops); + Chain = DAG.getNode(Op, DL, NodeTys, Ops); DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge); Glue = Chain.getValue(1); diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h index 2c9826a13237b4bb5992c78d8cbd5ed68f933632..a5ee740c1261eefd7246b2bf822b9b338691ff35 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h @@ -28,8 +28,12 @@ enum NodeType : unsigned { // TODO: add more LoongArchISDs CALL, + CALL_MEDIUM, + CALL_LARGE, RET, TAIL, + TAIL_MEDIUM, + TAIL_LARGE, // 32-bit shifts, directly matching the semantics of the named LoongArch // instructions. @@ -113,6 +117,16 @@ enum NodeType : unsigned { // Vector Shuffle VREPLVE, + VSHUF, + VPICKEV, + VPICKOD, + VPACKEV, + VPACKOD, + VILVL, + VILVH, + VSHUF4I, + VREPLVEI, + XVPERMI, // Extended vector element extraction VPICK_SEXT_ELT, diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td index ab189055681498eedfbfc91879d1efd7606e332c..756c460f916b71e800890c05ef32e2f951d3109d 100644 --- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td @@ -69,6 +69,18 @@ def loongarch_ret : SDNode<"LoongArchISD::RET", SDTNone, def loongarch_tail : SDNode<"LoongArchISD::TAIL", SDT_LoongArchCall, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; +def loongarch_call_medium : SDNode<"LoongArchISD::CALL_MEDIUM", SDT_LoongArchCall, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, + SDNPVariadic]>; +def loongarch_tail_medium : SDNode<"LoongArchISD::TAIL_MEDIUM", SDT_LoongArchCall, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, + SDNPVariadic]>; +def loongarch_call_large : SDNode<"LoongArchISD::CALL_LARGE", SDT_LoongArchCall, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, + SDNPVariadic]>; +def loongarch_tail_large : SDNode<"LoongArchISD::TAIL_LARGE", SDT_LoongArchCall, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, + SDNPVariadic]>; def loongarch_sll_w : SDNode<"LoongArchISD::SLL_W", SDT_LoongArchIntBinOpW>; def loongarch_sra_w : SDNode<"LoongArchISD::SRA_W", SDT_LoongArchIntBinOpW>; def loongarch_srl_w : SDNode<"LoongArchISD::SRL_W", SDT_LoongArchIntBinOpW>; @@ -351,6 +363,10 @@ def simm20_lu32id : SImm20Operand { let ParserMatchClass = SImmAsmOperand<20, "lu32id">; } +def simm20_pcaddu18i : SImm20Operand { + let ParserMatchClass = SImmAsmOperand<20, "pcaddu18i">; +} + def simm21_lsl2 : Operand { let ParserMatchClass = SImmAsmOperand<21, "lsl2">; let EncoderMethod = "getImmOpValueAsr<2>"; @@ -618,15 +634,24 @@ class AM_3R op> : Fmt3R; -let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in +let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in { class LLBase op> : Fmt2RI14; +class LLBase_ACQ op> + : Fmt2R; +} -let hasSideEffects = 0, mayLoad = 0, mayStore = 1, Constraints = "$rd = $dst" in +let hasSideEffects = 0, mayLoad = 0, mayStore = 1, Constraints = "$rd = $dst" in { class SCBase op> : Fmt2RI14; +class SCBase_128 op> + : Fmt3R; +class SCBase_REL op> + : Fmt2R; +} let hasSideEffects = 1 in class IOCSRRD op> @@ -738,6 +763,8 @@ def PRELD : FmtPRELD<(outs), (ins uimm5:$imm5, GPR:$rj, simm12:$imm12), // Atomic Memory Access Instructions def LL_W : LLBase<0x20000000>; def SC_W : SCBase<0x21000000>; +def LLACQ_W : LLBase_ACQ<0x38578000>; +def SCREL_W : SCBase_REL<0x38578400>; // Barrier Instructions def DBAR : MISC_I15<0x38720000>; @@ -772,7 +799,7 @@ def LU32I_D : Fmt1RI20<0x16000000, (outs GPR:$dst), "$rd, $imm20">; } def LU52I_D : ALU_2RI12<0x03000000, simm12_lu52id>; -def PCADDU18I : ALU_1RI20<0x1e000000, simm20>; +def PCADDU18I : ALU_1RI20<0x1e000000, simm20_pcaddu18i>; def MUL_D : ALU_3R<0x001d8000>; def MULH_D : ALU_3R<0x001e0000>; def MULH_DU : ALU_3R<0x001e8000>; @@ -859,8 +886,12 @@ def STLE_W : STORE_3R<0x387f0000>; def STLE_D : STORE_3R<0x387f8000>; // Atomic Memory Access Instructions for 64-bits +def AMSWAP_B : AM_3R<0x385c0000>; +def AMSWAP_H : AM_3R<0x385c8000>; def AMSWAP_W : AM_3R<0x38600000>; def AMSWAP_D : AM_3R<0x38608000>; +def AMADD_B : AM_3R<0x385d0000>; +def AMADD_H : AM_3R<0x385d8000>; def AMADD_W : AM_3R<0x38610000>; def AMADD_D : AM_3R<0x38618000>; def AMAND_W : AM_3R<0x38620000>; @@ -877,8 +908,12 @@ def AMMAX_WU : AM_3R<0x38670000>; def AMMAX_DU : AM_3R<0x38678000>; def AMMIN_WU : AM_3R<0x38680000>; def AMMIN_DU : AM_3R<0x38688000>; +def AMSWAP__DB_B : AM_3R<0x385e0000>; +def AMSWAP__DB_H : AM_3R<0x385e8000>; def AMSWAP__DB_W : AM_3R<0x38690000>; def AMSWAP__DB_D : AM_3R<0x38698000>; +def AMADD__DB_B : AM_3R<0x385f0000>; +def AMADD__DB_H : AM_3R<0x385f8000>; def AMADD__DB_W : AM_3R<0x386a0000>; def AMADD__DB_D : AM_3R<0x386a8000>; def AMAND__DB_W : AM_3R<0x386b0000>; @@ -895,8 +930,19 @@ def AMMAX__DB_WU : AM_3R<0x38700000>; def AMMAX__DB_DU : AM_3R<0x38708000>; def AMMIN__DB_WU : AM_3R<0x38710000>; def AMMIN__DB_DU : AM_3R<0x38718000>; +def AMCAS_B : AM_3R<0x38580000>; +def AMCAS_H : AM_3R<0x38588000>; +def AMCAS_W : AM_3R<0x38590000>; +def AMCAS_D : AM_3R<0x38598000>; +def AMCAS__DB_B : AM_3R<0x385a0000>; +def AMCAS__DB_H : AM_3R<0x385a8000>; +def AMCAS__DB_W : AM_3R<0x385b0000>; +def AMCAS__DB_D : AM_3R<0x385b8000>; def LL_D : LLBase<0x22000000>; def SC_D : SCBase<0x23000000>; +def SC_Q : SCBase_128<0x38570000>; +def LLACQ_D : LLBase_ACQ<0x38578800>; +def SCREL_D : SCBase_REL<0x38578C00>; // CRC Check Instructions def CRC_W_B_W : ALU_3R<0x00240000>; @@ -1323,16 +1369,43 @@ def : Pat<(brind GPR:$rj), (PseudoBRIND GPR:$rj, 0)>; def : Pat<(brind (add GPR:$rj, simm16_lsl2:$imm16)), (PseudoBRIND GPR:$rj, simm16_lsl2:$imm16)>; +// Function call with 'Small' code model. let isCall = 1, Defs = [R1] in -def PseudoCALL : Pseudo<(outs), (ins simm26_symbol:$func)>; +def PseudoCALL : Pseudo<(outs), (ins bare_symbol:$func)>; def : Pat<(loongarch_call tglobaladdr:$func), (PseudoCALL tglobaladdr:$func)>; def : Pat<(loongarch_call texternalsym:$func), (PseudoCALL texternalsym:$func)>; +// Function call with 'Medium' code model. +let isCall = 1, Defs = [R1, R20], Size = 8 in +def PseudoCALL_MEDIUM : Pseudo<(outs), (ins bare_symbol:$func)>; + +let Predicates = [IsLA64] in { +def : Pat<(loongarch_call_medium tglobaladdr:$func), + (PseudoCALL_MEDIUM tglobaladdr:$func)>; +def : Pat<(loongarch_call_medium texternalsym:$func), + (PseudoCALL_MEDIUM texternalsym:$func)>; +} // Predicates = [IsLA64] + +// Function call with 'Large' code model. +let isCall = 1, Defs = [R1, R20], Size = 24 in +def PseudoCALL_LARGE: Pseudo<(outs), (ins bare_symbol:$func)>; + +let Predicates = [IsLA64] in { +def : Pat<(loongarch_call_large tglobaladdr:$func), + (PseudoCALL_LARGE tglobaladdr:$func)>; +def : Pat<(loongarch_call_large texternalsym:$func), + (PseudoCALL_LARGE texternalsym:$func)>; +} // Predicates = [IsLA64] + let isCall = 1, Defs = [R1] in def PseudoCALLIndirect : Pseudo<(outs), (ins GPR:$rj), [(loongarch_call GPR:$rj)]>, PseudoInstExpansion<(JIRL R1, GPR:$rj, 0)>; +let Predicates = [IsLA64] in { +def : Pat<(loongarch_call_medium GPR:$rj), (PseudoCALLIndirect GPR:$rj)>; +def : Pat<(loongarch_call_large GPR:$rj), (PseudoCALLIndirect GPR:$rj)>; +} let isCall = 1, hasSideEffects = 0, mayStore = 0, mayLoad = 0, Defs = [R1] in def PseudoJIRL_CALL : Pseudo<(outs), (ins GPR:$rj, simm16_lsl2:$imm16)>, @@ -1343,18 +1416,47 @@ let isBarrier = 1, isReturn = 1, isTerminator = 1 in def PseudoRET : Pseudo<(outs), (ins), [(loongarch_ret)]>, PseudoInstExpansion<(JIRL R0, R1, 0)>; +// Tail call with 'Small' code model. let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [R3] in -def PseudoTAIL : Pseudo<(outs), (ins simm26_symbol:$dst)>; +def PseudoTAIL : Pseudo<(outs), (ins bare_symbol:$dst)>; def : Pat<(loongarch_tail (iPTR tglobaladdr:$dst)), (PseudoTAIL tglobaladdr:$dst)>; def : Pat<(loongarch_tail (iPTR texternalsym:$dst)), (PseudoTAIL texternalsym:$dst)>; +// Tail call with 'Medium' code model. +let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, + Uses = [R3], Defs = [R20], Size = 8 in +def PseudoTAIL_MEDIUM : Pseudo<(outs), (ins bare_symbol:$dst)>; + +let Predicates = [IsLA64] in { +def : Pat<(loongarch_tail_medium (iPTR tglobaladdr:$dst)), + (PseudoTAIL_MEDIUM tglobaladdr:$dst)>; +def : Pat<(loongarch_tail_medium (iPTR texternalsym:$dst)), + (PseudoTAIL_MEDIUM texternalsym:$dst)>; +} // Predicates = [IsLA64] + +// Tail call with 'Large' code model. +let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, + Uses = [R3], Defs = [R19, R20], Size = 24 in +def PseudoTAIL_LARGE : Pseudo<(outs), (ins bare_symbol:$dst)>; + +let Predicates = [IsLA64] in { +def : Pat<(loongarch_tail_large (iPTR tglobaladdr:$dst)), + (PseudoTAIL_LARGE tglobaladdr:$dst)>; +def : Pat<(loongarch_tail_large (iPTR texternalsym:$dst)), + (PseudoTAIL_LARGE texternalsym:$dst)>; +} // Predicates = [IsLA64] + let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [R3] in def PseudoTAILIndirect : Pseudo<(outs), (ins GPRT:$rj), [(loongarch_tail GPRT:$rj)]>, PseudoInstExpansion<(JIRL R0, GPR:$rj, 0)>; +let Predicates = [IsLA64] in { +def : Pat<(loongarch_tail_medium GPR:$rj), (PseudoTAILIndirect GPR:$rj)>; +def : Pat<(loongarch_tail_large GPR:$rj), (PseudoTAILIndirect GPR:$rj)>; +} let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, hasSideEffects = 0, mayStore = 0, mayLoad = 0, Uses = [R3] in @@ -1367,6 +1469,19 @@ def PseudoJIRL_TAIL : Pseudo<(outs), (ins GPR:$rj, simm16_lsl2:$imm16)>, PseudoInstExpansion<(JIRL R0, GPR:$rj, simm16_lsl2:$imm16)>; +/// call36/taill36 macro instructions +let isCall = 1, isBarrier = 1, isCodeGenOnly = 0, isAsmParserOnly = 1, + Defs = [R1], Size = 8, hasSideEffects = 0, mayStore = 0, mayLoad = 0 in +def PseudoCALL36 : Pseudo<(outs), (ins bare_symbol:$dst), [], + "call36", "$dst">, + Requires<[IsLA64]>; +let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [R3], + isCodeGenOnly = 0, isAsmParserOnly = 1, Size = 8, hasSideEffects = 0, + mayStore = 0, mayLoad = 0 in +def PseudoTAIL36 : Pseudo<(outs), (ins GPR:$tmp, bare_symbol:$dst), [], + "tail36", "$tmp, $dst">, + Requires<[IsLA64]>; + /// Load address (la*) macro instructions. // Define isCodeGenOnly = 0 to expose them to tablegened assembly parser. @@ -1379,6 +1494,7 @@ def PseudoLA_ABS_LARGE : Pseudo<(outs GPR:$dst), "la.abs", "$dst, $src">; def PseudoLA_PCREL : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [], "la.pcrel", "$dst, $src">; +let Defs = [R20], Size = 20 in def PseudoLA_PCREL_LARGE : Pseudo<(outs GPR:$dst), (ins GPR:$tmp, bare_symbol:$src), [], "la.pcrel", "$dst, $tmp, $src">, @@ -1390,28 +1506,30 @@ let hasSideEffects = 0, mayLoad = 1, mayStore = 0, isCodeGenOnly = 0, isAsmParserOnly = 1 in { def PseudoLA_GOT : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [], "la.got", "$dst, $src">; +def PseudoLA_TLS_IE : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [], + "la.tls.ie", "$dst, $src">; +def PseudoLA_TLS_LD : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [], + "la.tls.ld", "$dst, $src">; +def PseudoLA_TLS_GD : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [], + "la.tls.gd", "$dst, $src">; +let Defs = [R20], Size = 20 in { def PseudoLA_GOT_LARGE : Pseudo<(outs GPR:$dst), (ins GPR:$tmp, bare_symbol:$src), [], "la.got", "$dst, $tmp, $src">, Requires<[IsLA64]>; -def PseudoLA_TLS_IE : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [], - "la.tls.ie", "$dst, $src">; def PseudoLA_TLS_IE_LARGE : Pseudo<(outs GPR:$dst), (ins GPR:$tmp, bare_symbol:$src), [], "la.tls.ie", "$dst, $tmp, $src">, Requires<[IsLA64]>; -def PseudoLA_TLS_LD : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [], - "la.tls.ld", "$dst, $src">; def PseudoLA_TLS_LD_LARGE : Pseudo<(outs GPR:$dst), (ins GPR:$tmp, bare_symbol:$src), [], "la.tls.ld", "$dst, $tmp, $src">, Requires<[IsLA64]>; -def PseudoLA_TLS_GD : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [], - "la.tls.gd", "$dst, $src">; def PseudoLA_TLS_GD_LARGE : Pseudo<(outs GPR:$dst), (ins GPR:$tmp, bare_symbol:$src), [], "la.tls.gd", "$dst, $tmp, $src">, Requires<[IsLA64]>; +} // Defs = [R20], Size = 20 } // Load address inst alias: "la", "la.global" and "la.local". diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td index 492b62da6ce7806dff589e3adc944a7397fddd83..6f1969bf8cae051e9d792e9609d8513c9233bbc3 100644 --- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td @@ -10,6 +10,8 @@ // //===----------------------------------------------------------------------===// +def loongarch_xvpermi: SDNode<"LoongArchISD::XVPERMI", SDT_loongArchV1RUimm>; + def lasxsplati8 : PatFrag<(ops node:$e0), (v32i8 (build_vector node:$e0, node:$e0, node:$e0, node:$e0, @@ -771,6 +773,10 @@ def XVFRECIP_S : LASX2R_XX<0x769cf400>; def XVFRECIP_D : LASX2R_XX<0x769cf800>; def XVFRSQRT_S : LASX2R_XX<0x769d0400>; def XVFRSQRT_D : LASX2R_XX<0x769d0800>; +def XVFRECIPE_S : LASX2R_XX<0x769d1400>; +def XVFRECIPE_D : LASX2R_XX<0x769d1800>; +def XVFRSQRTE_S : LASX2R_XX<0x769d2400>; +def XVFRSQRTE_D : LASX2R_XX<0x769d2800>; def XVFCVTL_S_H : LASX2R_XX<0x769de800>; def XVFCVTH_S_H : LASX2R_XX<0x769dec00>; @@ -1571,6 +1577,134 @@ def : Pat<(loongarch_vreplve v8i32:$xj, GRLenVT:$rk), def : Pat<(loongarch_vreplve v4i64:$xj, GRLenVT:$rk), (XVREPLVE_D v4i64:$xj, GRLenVT:$rk)>; +// XVSHUF_{B/H/W/D} +def : Pat<(loongarch_vshuf v32i8:$xa, v32i8:$xj, v32i8:$xk), + (XVSHUF_B v32i8:$xj, v32i8:$xk, v32i8:$xa)>; +def : Pat<(loongarch_vshuf v16i16:$xd, v16i16:$xj, v16i16:$xk), + (XVSHUF_H v16i16:$xd, v16i16:$xj, v16i16:$xk)>; +def : Pat<(loongarch_vshuf v8i32:$xd, v8i32:$xj, v8i32:$xk), + (XVSHUF_W v8i32:$xd, v8i32:$xj, v8i32:$xk)>; +def : Pat<(loongarch_vshuf v4i64:$xd, v4i64:$xj, v4i64:$xk), + (XVSHUF_D v4i64:$xd, v4i64:$xj, v4i64:$xk)>; +def : Pat<(loongarch_vshuf v8i32:$xd, v8f32:$xj, v8f32:$xk), + (XVSHUF_W v8i32:$xd, v8f32:$xj, v8f32:$xk)>; +def : Pat<(loongarch_vshuf v4i64:$xd, v4f64:$xj, v4f64:$xk), + (XVSHUF_D v4i64:$xd, v4f64:$xj, v4f64:$xk)>; + +// XVPICKEV_{B/H/W/D} +def : Pat<(loongarch_vpickev v32i8:$xj, v32i8:$xk), + (XVPICKEV_B v32i8:$xj, v32i8:$xk)>; +def : Pat<(loongarch_vpickev v16i16:$xj, v16i16:$xk), + (XVPICKEV_H v16i16:$xj, v16i16:$xk)>; +def : Pat<(loongarch_vpickev v8i32:$xj, v8i32:$xk), + (XVPICKEV_W v8i32:$xj, v8i32:$xk)>; +def : Pat<(loongarch_vpickev v4i64:$xj, v4i64:$xk), + (XVPICKEV_D v4i64:$xj, v4i64:$xk)>; +def : Pat<(loongarch_vpickev v8f32:$xj, v8f32:$xk), + (XVPICKEV_W v8f32:$xj, v8f32:$xk)>; +def : Pat<(loongarch_vpickev v4f64:$xj, v4f64:$xk), + (XVPICKEV_D v4f64:$xj, v4f64:$xk)>; + +// XVPICKOD_{B/H/W/D} +def : Pat<(loongarch_vpickod v32i8:$xj, v32i8:$xk), + (XVPICKOD_B v32i8:$xj, v32i8:$xk)>; +def : Pat<(loongarch_vpickod v16i16:$xj, v16i16:$xk), + (XVPICKOD_H v16i16:$xj, v16i16:$xk)>; +def : Pat<(loongarch_vpickod v8i32:$xj, v8i32:$xk), + (XVPICKOD_W v8i32:$xj, v8i32:$xk)>; +def : Pat<(loongarch_vpickod v4i64:$xj, v4i64:$xk), + (XVPICKOD_D v4i64:$xj, v4i64:$xk)>; +def : Pat<(loongarch_vpickod v8f32:$xj, v8f32:$xk), + (XVPICKOD_W v8f32:$xj, v8f32:$xk)>; +def : Pat<(loongarch_vpickod v4f64:$xj, v4f64:$xk), + (XVPICKOD_D v4f64:$xj, v4f64:$xk)>; + +// XVPACKEV_{B/H/W/D} +def : Pat<(loongarch_vpackev v32i8:$xj, v32i8:$xk), + (XVPACKEV_B v32i8:$xj, v32i8:$xk)>; +def : Pat<(loongarch_vpackev v16i16:$xj, v16i16:$xk), + (XVPACKEV_H v16i16:$xj, v16i16:$xk)>; +def : Pat<(loongarch_vpackev v8i32:$xj, v8i32:$xk), + (XVPACKEV_W v8i32:$xj, v8i32:$xk)>; +def : Pat<(loongarch_vpackev v4i64:$xj, v4i64:$xk), + (XVPACKEV_D v4i64:$xj, v4i64:$xk)>; +def : Pat<(loongarch_vpackev v8f32:$xj, v8f32:$xk), + (XVPACKEV_W v8f32:$xj, v8f32:$xk)>; +def : Pat<(loongarch_vpackev v4f64:$xj, v4f64:$xk), + (XVPACKEV_D v4f64:$xj, v4f64:$xk)>; + +// XVPACKOD_{B/H/W/D} +def : Pat<(loongarch_vpackod v32i8:$xj, v32i8:$xk), + (XVPACKOD_B v32i8:$xj, v32i8:$xk)>; +def : Pat<(loongarch_vpackod v16i16:$xj, v16i16:$xk), + (XVPACKOD_H v16i16:$xj, v16i16:$xk)>; +def : Pat<(loongarch_vpackod v8i32:$xj, v8i32:$xk), + (XVPACKOD_W v8i32:$xj, v8i32:$xk)>; +def : Pat<(loongarch_vpackod v4i64:$xj, v4i64:$xk), + (XVPACKOD_D v4i64:$xj, v4i64:$xk)>; +def : Pat<(loongarch_vpackod v8f32:$xj, v8f32:$xk), + (XVPACKOD_W v8f32:$xj, v8f32:$xk)>; +def : Pat<(loongarch_vpackod v4f64:$xj, v4f64:$xk), + (XVPACKOD_D v4f64:$xj, v4f64:$xk)>; + +// XVILVL_{B/H/W/D} +def : Pat<(loongarch_vilvl v32i8:$xj, v32i8:$xk), + (XVILVL_B v32i8:$xj, v32i8:$xk)>; +def : Pat<(loongarch_vilvl v16i16:$xj, v16i16:$xk), + (XVILVL_H v16i16:$xj, v16i16:$xk)>; +def : Pat<(loongarch_vilvl v8i32:$xj, v8i32:$xk), + (XVILVL_W v8i32:$xj, v8i32:$xk)>; +def : Pat<(loongarch_vilvl v4i64:$xj, v4i64:$xk), + (XVILVL_D v4i64:$xj, v4i64:$xk)>; +def : Pat<(loongarch_vilvl v8f32:$xj, v8f32:$xk), + (XVILVL_W v8f32:$xj, v8f32:$xk)>; +def : Pat<(loongarch_vilvl v4f64:$xj, v4f64:$xk), + (XVILVL_D v4f64:$xj, v4f64:$xk)>; + +// XVILVH_{B/H/W/D} +def : Pat<(loongarch_vilvh v32i8:$xj, v32i8:$xk), + (XVILVH_B v32i8:$xj, v32i8:$xk)>; +def : Pat<(loongarch_vilvh v16i16:$xj, v16i16:$xk), + (XVILVH_H v16i16:$xj, v16i16:$xk)>; +def : Pat<(loongarch_vilvh v8i32:$xj, v8i32:$xk), + (XVILVH_W v8i32:$xj, v8i32:$xk)>; +def : Pat<(loongarch_vilvh v4i64:$xj, v4i64:$xk), + (XVILVH_D v4i64:$xj, v4i64:$xk)>; +def : Pat<(loongarch_vilvh v8f32:$xj, v8f32:$xk), + (XVILVH_W v8f32:$xj, v8f32:$xk)>; +def : Pat<(loongarch_vilvh v4f64:$xj, v4f64:$xk), + (XVILVH_D v4f64:$xj, v4f64:$xk)>; + +// XVSHUF4I_{B/H/W} +def : Pat<(loongarch_vshuf4i v32i8:$xj, immZExt8:$ui8), + (XVSHUF4I_B v32i8:$xj, immZExt8:$ui8)>; +def : Pat<(loongarch_vshuf4i v16i16:$xj, immZExt8:$ui8), + (XVSHUF4I_H v16i16:$xj, immZExt8:$ui8)>; +def : Pat<(loongarch_vshuf4i v8i32:$xj, immZExt8:$ui8), + (XVSHUF4I_W v8i32:$xj, immZExt8:$ui8)>; +def : Pat<(loongarch_vshuf4i v8f32:$xj, immZExt8:$ui8), + (XVSHUF4I_W v8f32:$xj, immZExt8:$ui8)>; + +// XVREPL128VEI_{B/H/W/D} +def : Pat<(loongarch_vreplvei v32i8:$xj, immZExt4:$ui4), + (XVREPL128VEI_B v32i8:$xj, immZExt4:$ui4)>; +def : Pat<(loongarch_vreplvei v16i16:$xj, immZExt3:$ui3), + (XVREPL128VEI_H v16i16:$xj, immZExt3:$ui3)>; +def : Pat<(loongarch_vreplvei v8i32:$xj, immZExt2:$ui2), + (XVREPL128VEI_W v8i32:$xj, immZExt2:$ui2)>; +def : Pat<(loongarch_vreplvei v4i64:$xj, immZExt1:$ui1), + (XVREPL128VEI_D v4i64:$xj, immZExt1:$ui1)>; +def : Pat<(loongarch_vreplvei v8f32:$xj, immZExt2:$ui2), + (XVREPL128VEI_W v8f32:$xj, immZExt2:$ui2)>; +def : Pat<(loongarch_vreplvei v4f64:$xj, immZExt1:$ui1), + (XVREPL128VEI_D v4f64:$xj, immZExt1:$ui1)>; + +// XVPERMI_D +def : Pat<(loongarch_xvpermi v4i64:$xj, immZExt8: $ui8), + (XVPERMI_D v4i64:$xj, immZExt8: $ui8)>; +def : Pat<(loongarch_xvpermi v4f64:$xj, immZExt8: $ui8), + (XVPERMI_D v4f64:$xj, immZExt8: $ui8)>; + // XVREPLVE0_{W/D} def : Pat<(lasxsplatf32 FPR32:$fj), (XVREPLVE0_W (SUBREG_TO_REG (i64 0), FPR32:$fj, sub_32))>; @@ -1946,6 +2080,16 @@ foreach Inst = ["XVFLOGB_D", "XVFCLASS_D", "XVFSQRT_D", "XVFRECIP_D", "XVFRSQRT_ def : Pat<(deriveLASXIntrinsic.ret (v4f64 LASX256:$xj)), (!cast(Inst) LASX256:$xj)>; +// 256-Bit vector FP approximate reciprocal operation +let Predicates = [HasFrecipe] in { +foreach Inst = ["XVFRECIPE_S", "XVFRSQRTE_S"] in + def : Pat<(deriveLASXIntrinsic.ret (v8f32 LASX256:$xj)), + (!cast(Inst) LASX256:$xj)>; +foreach Inst = ["XVFRECIPE_D", "XVFRSQRTE_D"] in + def : Pat<(deriveLASXIntrinsic.ret (v4f64 LASX256:$xj)), + (!cast(Inst) LASX256:$xj)>; +} + def : Pat<(int_loongarch_lasx_xvpickve_w_f v8f32:$xj, timm:$imm), (XVPICKVE_W v8f32:$xj, (to_valid_timm timm:$imm))>; def : Pat<(int_loongarch_lasx_xvpickve_d_f v4f64:$xj, timm:$imm), diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td index 99ac2f3c162fea31e076b5c267363e9fef7d0807..0580683c3ce303366cd5f18e93186b6cd907cf5a 100644 --- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td @@ -15,6 +15,15 @@ def SDT_LoongArchVreplve : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisInt<2>]>; def SDT_LoongArchVecCond : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisVec<1>]>; +def SDT_LoongArchVShuf : SDTypeProfile<1, 3, [SDTCisVec<0>, + SDTCisInt<1>, SDTCisVec<1>, + SDTCisSameAs<0, 2>, + SDTCisSameAs<2, 3>]>; +def SDT_LoongArchV2R : SDTypeProfile<1, 2, [SDTCisVec<0>, + SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>]>; +def SDT_loongArchV1RUimm: SDTypeProfile<1, 2, [SDTCisVec<0>, + SDTCisSameAs<0,1>, SDTCisVT<2, i64>]>; + // Target nodes. def loongarch_vreplve : SDNode<"LoongArchISD::VREPLVE", SDT_LoongArchVreplve>; def loongarch_vall_nonzero : SDNode<"LoongArchISD::VALL_NONZERO", @@ -31,6 +40,23 @@ def loongarch_vpick_sext_elt : SDNode<"LoongArchISD::VPICK_SEXT_ELT", def loongarch_vpick_zext_elt : SDNode<"LoongArchISD::VPICK_ZEXT_ELT", SDTypeProfile<1, 3, [SDTCisPtrTy<2>]>>; +def loongarch_vshuf: SDNode<"LoongArchISD::VSHUF", SDT_LoongArchVShuf>; +def loongarch_vpickev: SDNode<"LoongArchISD::VPICKEV", SDT_LoongArchV2R>; +def loongarch_vpickod: SDNode<"LoongArchISD::VPICKOD", SDT_LoongArchV2R>; +def loongarch_vpackev: SDNode<"LoongArchISD::VPACKEV", SDT_LoongArchV2R>; +def loongarch_vpackod: SDNode<"LoongArchISD::VPACKOD", SDT_LoongArchV2R>; +def loongarch_vilvl: SDNode<"LoongArchISD::VILVL", SDT_LoongArchV2R>; +def loongarch_vilvh: SDNode<"LoongArchISD::VILVH", SDT_LoongArchV2R>; + +def loongarch_vshuf4i: SDNode<"LoongArchISD::VSHUF4I", SDT_loongArchV1RUimm>; +def loongarch_vreplvei: SDNode<"LoongArchISD::VREPLVEI", SDT_loongArchV1RUimm>; + +def immZExt1 : ImmLeaf(Imm);}]>; +def immZExt2 : ImmLeaf(Imm);}]>; +def immZExt3 : ImmLeaf(Imm);}]>; +def immZExt4 : ImmLeaf(Imm);}]>; +def immZExt8 : ImmLeaf(Imm);}]>; + class VecCond : Pseudo<(outs GPR:$rd), (ins RC:$vj), @@ -892,6 +918,10 @@ def VFRECIP_S : LSX2R_VV<0x729cf400>; def VFRECIP_D : LSX2R_VV<0x729cf800>; def VFRSQRT_S : LSX2R_VV<0x729d0400>; def VFRSQRT_D : LSX2R_VV<0x729d0800>; +def VFRECIPE_S : LSX2R_VV<0x729d1400>; +def VFRECIPE_D : LSX2R_VV<0x729d1800>; +def VFRSQRTE_S : LSX2R_VV<0x729d2400>; +def VFRSQRTE_D : LSX2R_VV<0x729d2800>; def VFCVTL_S_H : LSX2R_VV<0x729de800>; def VFCVTH_S_H : LSX2R_VV<0x729dec00>; @@ -1678,6 +1708,128 @@ def : Pat<(loongarch_vreplve v4i32:$vj, GRLenVT:$rk), def : Pat<(loongarch_vreplve v2i64:$vj, GRLenVT:$rk), (VREPLVE_D v2i64:$vj, GRLenVT:$rk)>; +// VSHUF_{B/H/W/D} +def : Pat<(loongarch_vshuf v16i8:$va, v16i8:$vj, v16i8:$vk), + (VSHUF_B v16i8:$vj, v16i8:$vk, v16i8:$va)>; +def : Pat<(loongarch_vshuf v8i16:$vd, v8i16:$vj, v8i16:$vk), + (VSHUF_H v8i16:$vd, v8i16:$vj, v8i16:$vk)>; +def : Pat<(loongarch_vshuf v4i32:$vd, v4i32:$vj, v4i32:$vk), + (VSHUF_W v4i32:$vd, v4i32:$vj, v4i32:$vk)>; +def : Pat<(loongarch_vshuf v2i64:$vd, v2i64:$vj, v2i64:$vk), + (VSHUF_D v2i64:$vd, v2i64:$vj, v2i64:$vk)>; +def : Pat<(loongarch_vshuf v4i32:$vd, v4f32:$vj, v4f32:$vk), + (VSHUF_W v4i32:$vd, v4f32:$vj, v4f32:$vk)>; +def : Pat<(loongarch_vshuf v2i64:$vd, v2f64:$vj, v2f64:$vk), + (VSHUF_D v2i64:$vd, v2f64:$vj, v2f64:$vk)>; + +// VPICKEV_{B/H/W/D} +def : Pat<(loongarch_vpickev v16i8:$vj, v16i8:$vk), + (VPICKEV_B v16i8:$vj, v16i8:$vk)>; +def : Pat<(loongarch_vpickev v8i16:$vj, v8i16:$vk), + (VPICKEV_H v8i16:$vj, v8i16:$vk)>; +def : Pat<(loongarch_vpickev v4i32:$vj, v4i32:$vk), + (VPICKEV_W v4i32:$vj, v4i32:$vk)>; +def : Pat<(loongarch_vpickev v2i64:$vj, v2i64:$vk), + (VPICKEV_D v2i64:$vj, v2i64:$vk)>; +def : Pat<(loongarch_vpickev v4f32:$vj, v4f32:$vk), + (VPICKEV_W v4f32:$vj, v4f32:$vk)>; +def : Pat<(loongarch_vpickev v2f64:$vj, v2f64:$vk), + (VPICKEV_D v2f64:$vj, v2f64:$vk)>; + +// VPICKOD_{B/H/W/D} +def : Pat<(loongarch_vpickod v16i8:$vj, v16i8:$vk), + (VPICKOD_B v16i8:$vj, v16i8:$vk)>; +def : Pat<(loongarch_vpickod v8i16:$vj, v8i16:$vk), + (VPICKOD_H v8i16:$vj, v8i16:$vk)>; +def : Pat<(loongarch_vpickod v4i32:$vj, v4i32:$vk), + (VPICKOD_W v4i32:$vj, v4i32:$vk)>; +def : Pat<(loongarch_vpickod v2i64:$vj, v2i64:$vk), + (VPICKOD_D v2i64:$vj, v2i64:$vk)>; +def : Pat<(loongarch_vpickod v4f32:$vj, v4f32:$vk), + (VPICKOD_W v4f32:$vj, v4f32:$vk)>; +def : Pat<(loongarch_vpickod v2f64:$vj, v2f64:$vk), + (VPICKOD_D v2f64:$vj, v2f64:$vk)>; + +// VPACKEV_{B/H/W/D} +def : Pat<(loongarch_vpackev v16i8:$vj, v16i8:$vk), + (VPACKEV_B v16i8:$vj, v16i8:$vk)>; +def : Pat<(loongarch_vpackev v8i16:$vj, v8i16:$vk), + (VPACKEV_H v8i16:$vj, v8i16:$vk)>; +def : Pat<(loongarch_vpackev v4i32:$vj, v4i32:$vk), + (VPACKEV_W v4i32:$vj, v4i32:$vk)>; +def : Pat<(loongarch_vpackev v2i64:$vj, v2i64:$vk), + (VPACKEV_D v2i64:$vj, v2i64:$vk)>; +def : Pat<(loongarch_vpackev v4f32:$vj, v4f32:$vk), + (VPACKEV_W v4f32:$vj, v4f32:$vk)>; +def : Pat<(loongarch_vpackev v2f64:$vj, v2f64:$vk), + (VPACKEV_D v2f64:$vj, v2f64:$vk)>; + +// VPACKOD_{B/H/W/D} +def : Pat<(loongarch_vpackod v16i8:$vj, v16i8:$vk), + (VPACKOD_B v16i8:$vj, v16i8:$vk)>; +def : Pat<(loongarch_vpackod v8i16:$vj, v8i16:$vk), + (VPACKOD_H v8i16:$vj, v8i16:$vk)>; +def : Pat<(loongarch_vpackod v4i32:$vj, v4i32:$vk), + (VPACKOD_W v4i32:$vj, v4i32:$vk)>; +def : Pat<(loongarch_vpackod v2i64:$vj, v2i64:$vk), + (VPACKOD_D v2i64:$vj, v2i64:$vk)>; +def : Pat<(loongarch_vpackod v4f32:$vj, v4f32:$vk), + (VPACKOD_W v4f32:$vj, v4f32:$vk)>; +def : Pat<(loongarch_vpackod v2f64:$vj, v2f64:$vk), + (VPACKOD_D v2f64:$vj, v2f64:$vk)>; + +// VILVL_{B/H/W/D} +def : Pat<(loongarch_vilvl v16i8:$vj, v16i8:$vk), + (VILVL_B v16i8:$vj, v16i8:$vk)>; +def : Pat<(loongarch_vilvl v8i16:$vj, v8i16:$vk), + (VILVL_H v8i16:$vj, v8i16:$vk)>; +def : Pat<(loongarch_vilvl v4i32:$vj, v4i32:$vk), + (VILVL_W v4i32:$vj, v4i32:$vk)>; +def : Pat<(loongarch_vilvl v2i64:$vj, v2i64:$vk), + (VILVL_D v2i64:$vj, v2i64:$vk)>; +def : Pat<(loongarch_vilvl v4f32:$vj, v4f32:$vk), + (VILVL_W v4f32:$vj, v4f32:$vk)>; +def : Pat<(loongarch_vilvl v2f64:$vj, v2f64:$vk), + (VILVL_D v2f64:$vj, v2f64:$vk)>; + +// VILVH_{B/H/W/D} +def : Pat<(loongarch_vilvh v16i8:$vj, v16i8:$vk), + (VILVH_B v16i8:$vj, v16i8:$vk)>; +def : Pat<(loongarch_vilvh v8i16:$vj, v8i16:$vk), + (VILVH_H v8i16:$vj, v8i16:$vk)>; +def : Pat<(loongarch_vilvh v4i32:$vj, v4i32:$vk), + (VILVH_W v4i32:$vj, v4i32:$vk)>; +def : Pat<(loongarch_vilvh v2i64:$vj, v2i64:$vk), + (VILVH_D v2i64:$vj, v2i64:$vk)>; +def : Pat<(loongarch_vilvh v4f32:$vj, v4f32:$vk), + (VILVH_W v4f32:$vj, v4f32:$vk)>; +def : Pat<(loongarch_vilvh v2f64:$vj, v2f64:$vk), + (VILVH_D v2f64:$vj, v2f64:$vk)>; + +// VSHUF4I_{B/H/W} +def : Pat<(loongarch_vshuf4i v16i8:$vj, immZExt8:$ui8), + (VSHUF4I_B v16i8:$vj, immZExt8:$ui8)>; +def : Pat<(loongarch_vshuf4i v8i16:$vj, immZExt8:$ui8), + (VSHUF4I_H v8i16:$vj, immZExt8:$ui8)>; +def : Pat<(loongarch_vshuf4i v4i32:$vj, immZExt8:$ui8), + (VSHUF4I_W v4i32:$vj, immZExt8:$ui8)>; +def : Pat<(loongarch_vshuf4i v4f32:$vj, immZExt8:$ui8), + (VSHUF4I_W v4f32:$vj, immZExt8:$ui8)>; + +// VREPLVEI_{B/H/W/D} +def : Pat<(loongarch_vreplvei v16i8:$vj, immZExt4:$ui4), + (VREPLVEI_B v16i8:$vj, immZExt4:$ui4)>; +def : Pat<(loongarch_vreplvei v8i16:$vj, immZExt3:$ui3), + (VREPLVEI_H v8i16:$vj, immZExt3:$ui3)>; +def : Pat<(loongarch_vreplvei v4i32:$vj, immZExt2:$ui2), + (VREPLVEI_W v4i32:$vj, immZExt2:$ui2)>; +def : Pat<(loongarch_vreplvei v2i64:$vj, immZExt1:$ui1), + (VREPLVEI_D v2i64:$vj, immZExt1:$ui1)>; +def : Pat<(loongarch_vreplvei v4f32:$vj, immZExt2:$ui2), + (VREPLVEI_W v4f32:$vj, immZExt2:$ui2)>; +def : Pat<(loongarch_vreplvei v2f64:$vj, immZExt1:$ui1), + (VREPLVEI_D v2f64:$vj, immZExt1:$ui1)>; + // VREPLVEI_{W/D} def : Pat<(lsxsplatf32 FPR32:$fj), (VREPLVEI_W (SUBREG_TO_REG (i64 0), FPR32:$fj, sub_32), 0)>; @@ -2043,6 +2195,16 @@ foreach Inst = ["VFLOGB_D", "VFCLASS_D", "VFSQRT_D", "VFRECIP_D", "VFRSQRT_D", def : Pat<(deriveLSXIntrinsic.ret (v2f64 LSX128:$vj)), (!cast(Inst) LSX128:$vj)>; +// 128-Bit vector FP approximate reciprocal operation +let Predicates = [HasFrecipe] in { +foreach Inst = ["VFRECIPE_S", "VFRSQRTE_S"] in + def : Pat<(deriveLSXIntrinsic.ret (v4f32 LSX128:$vj)), + (!cast(Inst) LSX128:$vj)>; +foreach Inst = ["VFRECIPE_D", "VFRSQRTE_D"] in + def : Pat<(deriveLSXIntrinsic.ret (v2f64 LSX128:$vj)), + (!cast(Inst) LSX128:$vj)>; +} + // load def : Pat<(int_loongarch_lsx_vld GPR:$rj, timm:$imm), (VLD GPR:$rj, (to_valid_timm timm:$imm))>; diff --git a/llvm/lib/Target/LoongArch/LoongArchMCInstLower.cpp b/llvm/lib/Target/LoongArch/LoongArchMCInstLower.cpp index 5daa9481c9072e3507c9c92cd95fcdc3ea7ff17c..98ad49f25e3f2fddab1684ede35a93311cee023f 100644 --- a/llvm/lib/Target/LoongArch/LoongArchMCInstLower.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchMCInstLower.cpp @@ -95,6 +95,9 @@ static MCOperand lowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym, case LoongArchII::MO_GD_PC_HI: Kind = LoongArchMCExpr::VK_LoongArch_TLS_GD_PC_HI20; break; + case LoongArchII::MO_CALL36: + Kind = LoongArchMCExpr::VK_LoongArch_CALL36; + break; // TODO: Handle more target-flags. } diff --git a/llvm/lib/Target/LoongArch/LoongArchSubtarget.h b/llvm/lib/Target/LoongArch/LoongArchSubtarget.h index 174e4cba83263348ee0564a3e81349790e708078..11c0b39e176e61ff32ce51aab0f77fe3c4460d4e 100644 --- a/llvm/lib/Target/LoongArch/LoongArchSubtarget.h +++ b/llvm/lib/Target/LoongArch/LoongArchSubtarget.h @@ -45,6 +45,7 @@ class LoongArchSubtarget : public LoongArchGenSubtargetInfo { bool HasUAL = false; bool HasLinkerRelax = false; bool HasExpAutoVec = false; + bool HasFrecipe = false; unsigned GRLen = 32; MVT GRLenVT = MVT::i32; LoongArchABI::ABI TargetABI = LoongArchABI::ABI_Unknown; @@ -104,6 +105,7 @@ public: bool hasUAL() const { return HasUAL; } bool hasLinkerRelax() const { return HasLinkerRelax; } bool hasExpAutoVec() const { return HasExpAutoVec; } + bool hasFrecipe() const { return HasFrecipe; } MVT getGRLenVT() const { return GRLenVT; } unsigned getGRLen() const { return GRLen; } LoongArchABI::ABI getTargetABI() const { return TargetABI; } diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp index d0a4e937504800ca2606706dc19054f25468f17d..0efc5e6ebb99d6109a121850b97df538ea66c3b1 100644 --- a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp @@ -63,11 +63,11 @@ getEffectiveLoongArchCodeModel(const Triple &TT, switch (*CM) { case CodeModel::Small: - case CodeModel::Medium: return *CM; + case CodeModel::Medium: case CodeModel::Large: if (!TT.isArch64Bit()) - report_fatal_error("Large code model requires LA64"); + report_fatal_error("Medium/Large code model requires LA64"); return *CM; default: report_fatal_error( diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp index d47dded9ea6ecf260c773cd6ac9684ae8a191938..7961bb141e64997c59fd11c67fe957ec35e7fc2e 100644 --- a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp @@ -26,8 +26,6 @@ TypeSize LoongArchTTIImpl::getRegisterBitWidth( case TargetTransformInfo::RGK_Scalar: return TypeSize::getFixed(ST->is64Bit() ? 64 : 32); case TargetTransformInfo::RGK_FixedWidthVector: - if (!ST->hasExpAutoVec()) - return DefSize; if (ST->hasExtLASX()) return TypeSize::getFixed(256); if (ST->hasExtLSX()) diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h index cee6dad1f095e12feb6f12ebace67b3ea540e6a0..0692cb92b694404e16f299903d7797b10c2f79c1 100644 --- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h @@ -47,6 +47,7 @@ enum { MO_IE_PC64_HI, MO_LD_PC_HI, MO_GD_PC_HI, + MO_CALL36 // TODO: Add more flags. }; } // end namespace LoongArchII diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp index e60b9c2cfd97cc02dfbf71a7db7e7bdc11d5c250..0a52380dd2cdd1c4a097df7094d735c86e275124 100644 --- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp @@ -90,6 +90,8 @@ unsigned LoongArchELFObjectWriter::getRelocType(MCContext &Ctx, return ELF::R_LARCH_TLS_LE64_LO20; case LoongArch::fixup_loongarch_tls_le64_hi12: return ELF::R_LARCH_TLS_LE64_HI12; + case LoongArch::fixup_loongarch_call36: + return ELF::R_LARCH_CALL36; // TODO: Handle more fixup-kinds. } } diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchFixupKinds.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchFixupKinds.h index 78414408f21f070a1205ad2645f4a20e32f1132d..0d19d2b0fb1fe8cf39744ac981a1fbd3aaadc16d 100644 --- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchFixupKinds.h +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchFixupKinds.h @@ -111,6 +111,9 @@ enum Fixups { fixup_loongarch_relax = FirstLiteralRelocationKind + ELF::R_LARCH_RELAX, // Generate an R_LARCH_ALIGN which indicates the linker may fixup align here. fixup_loongarch_align = FirstLiteralRelocationKind + ELF::R_LARCH_ALIGN, + // 36-bit fixup corresponding to %call36(foo) for a pair instructions: + // pcaddu18i+jirl. + fixup_loongarch_call36 = FirstLiteralRelocationKind + ELF::R_LARCH_CALL36, }; } // end namespace LoongArch } // end namespace llvm diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp index 09d92ac9aa3a868da94f7c0a0e6027adef33c3d3..7c4fe9674d4e1f0d608ea470143034d81c0603cd 100644 --- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp @@ -241,6 +241,9 @@ LoongArchMCCodeEmitter::getExprOpValue(const MCInst &MI, const MCOperand &MO, case LoongArchMCExpr::VK_LoongArch_TLS_GD_HI20: FixupKind = LoongArch::fixup_loongarch_tls_gd_hi20; break; + case LoongArchMCExpr::VK_LoongArch_CALL36: + FixupKind = LoongArch::fixup_loongarch_call36; + break; } } else if (Kind == MCExpr::SymbolRef && cast(Expr)->getKind() == diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.cpp index 82c992b1cc8c4e899d4c390eb700b63ad591eb26..8ca8876a19b936f60a4a381d1b47325d73fd8802 100644 --- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.cpp +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.cpp @@ -138,6 +138,8 @@ StringRef LoongArchMCExpr::getVariantKindName(VariantKind Kind) { return "gd_pc_hi20"; case VK_LoongArch_TLS_GD_HI20: return "gd_hi20"; + case VK_LoongArch_CALL36: + return "call36"; } } @@ -180,6 +182,7 @@ LoongArchMCExpr::getVariantKindForName(StringRef name) { .Case("ld_hi20", VK_LoongArch_TLS_LD_HI20) .Case("gd_pc_hi20", VK_LoongArch_TLS_GD_PC_HI20) .Case("gd_hi20", VK_LoongArch_TLS_GD_HI20) + .Case("call36", VK_LoongArch_CALL36) .Default(VK_LoongArch_Invalid); } diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.h index 93251f8241033b60e6fcdd42dd0dc27a2f757772..bd828116d7fa460f856d9760d832c806a9a69d0c 100644 --- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.h +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.h @@ -61,6 +61,7 @@ public: VK_LoongArch_TLS_LD_HI20, VK_LoongArch_TLS_GD_PC_HI20, VK_LoongArch_TLS_GD_HI20, + VK_LoongArch_CALL36, VK_LoongArch_Invalid // Must be the last item. }; diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp index 8b23be02edc0145ee854c041985ab7db5928bedd..87e3e0b434d5bc0dd83ed06cbb545deea1b494d1 100644 --- a/llvm/lib/TargetParser/Host.cpp +++ b/llvm/lib/TargetParser/Host.cpp @@ -1469,6 +1469,8 @@ StringRef sys::getHostCPUName() { switch (processor_id & 0xf000) { case 0xc000: // Loongson 64bit, 4-issue return "la464"; + case 0xd000: // Loongson 64bit, 6-issue + return "la664"; // TODO: Others. default: break; diff --git a/llvm/lib/TargetParser/LoongArchTargetParser.cpp b/llvm/lib/TargetParser/LoongArchTargetParser.cpp index 772d24c5ce3deb95f539e0231f561c7a2322f119..8e86d18de2ad9a38548fdf9879a01a46958a362d 100644 --- a/llvm/lib/TargetParser/LoongArchTargetParser.cpp +++ b/llvm/lib/TargetParser/LoongArchTargetParser.cpp @@ -44,6 +44,17 @@ bool LoongArch::getArchFeatures(StringRef Arch, return true; } } + + if (Arch == "la64v1.0" || Arch == "la64v1.1") { + Features.push_back("+64bit"); + Features.push_back("+d"); + Features.push_back("+lsx"); + Features.push_back("+ual"); + if (Arch == "la64v1.1") + Features.push_back("+frecipe"); + return true; + } + return false; } diff --git a/llvm/test/CodeGen/LoongArch/code-models.ll b/llvm/test/CodeGen/LoongArch/code-models.ll index c610f645a06aebe40c7e58ba00ba57600db6f0ee..f93c316709284bd1c2db408b46a3c87c5a946f5c 100644 --- a/llvm/test/CodeGen/LoongArch/code-models.ll +++ b/llvm/test/CodeGen/LoongArch/code-models.ll @@ -23,8 +23,8 @@ define i32 @call_globaladdress(i32 %a) nounwind { ; MEDIUM: # %bb.0: ; MEDIUM-NEXT: addi.d $sp, $sp, -16 ; MEDIUM-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -; MEDIUM-NEXT: pcalau12i $ra, %pc_hi20(callee) -; MEDIUM-NEXT: jirl $ra, $ra, %pc_lo12(callee) +; MEDIUM-NEXT: pcaddu18i $ra, %call36(callee) +; MEDIUM-NEXT: jirl $ra, $ra, 0 ; MEDIUM-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload ; MEDIUM-NEXT: addi.d $sp, $sp, 16 ; MEDIUM-NEXT: ret @@ -33,11 +33,11 @@ define i32 @call_globaladdress(i32 %a) nounwind { ; LARGE: # %bb.0: ; LARGE-NEXT: addi.d $sp, $sp, -16 ; LARGE-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -; LARGE-NEXT: pcalau12i $a1, %got_pc_hi20(callee) -; LARGE-NEXT: addi.d $ra, $zero, %got_pc_lo12(callee) -; LARGE-NEXT: lu32i.d $ra, %got64_pc_lo20(callee) -; LARGE-NEXT: lu52i.d $ra, $ra, %got64_pc_hi12(callee) -; LARGE-NEXT: ldx.d $ra, $ra, $a1 +; LARGE-NEXT: pcalau12i $ra, %got_pc_hi20(callee) +; LARGE-NEXT: addi.d $t8, $zero, %got_pc_lo12(callee) +; LARGE-NEXT: lu32i.d $t8, %got64_pc_lo20(callee) +; LARGE-NEXT: lu52i.d $t8, $t8, %got64_pc_hi12(callee) +; LARGE-NEXT: ldx.d $ra, $t8, $ra ; LARGE-NEXT: jirl $ra, $ra, 0 ; LARGE-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload ; LARGE-NEXT: addi.d $sp, $sp, 16 @@ -68,8 +68,8 @@ define void @call_external_sym(ptr %dst) { ; MEDIUM-NEXT: .cfi_offset 1, -8 ; MEDIUM-NEXT: ori $a2, $zero, 1000 ; MEDIUM-NEXT: move $a1, $zero -; MEDIUM-NEXT: pcalau12i $ra, %pc_hi20(memset) -; MEDIUM-NEXT: jirl $ra, $ra, %pc_lo12(memset) +; MEDIUM-NEXT: pcaddu18i $ra, %call36(memset) +; MEDIUM-NEXT: jirl $ra, $ra, 0 ; MEDIUM-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload ; MEDIUM-NEXT: addi.d $sp, $sp, 16 ; MEDIUM-NEXT: ret @@ -82,11 +82,11 @@ define void @call_external_sym(ptr %dst) { ; LARGE-NEXT: .cfi_offset 1, -8 ; LARGE-NEXT: ori $a2, $zero, 1000 ; LARGE-NEXT: move $a1, $zero -; LARGE-NEXT: pcalau12i $a3, %pc_hi20(memset) -; LARGE-NEXT: addi.d $ra, $zero, %pc_lo12(memset) -; LARGE-NEXT: lu32i.d $ra, %pc64_lo20(memset) -; LARGE-NEXT: lu52i.d $ra, $ra, %pc64_hi12(memset) -; LARGE-NEXT: add.d $ra, $ra, $a3 +; LARGE-NEXT: pcalau12i $ra, %pc_hi20(memset) +; LARGE-NEXT: addi.d $t8, $zero, %pc_lo12(memset) +; LARGE-NEXT: lu32i.d $t8, %pc64_lo20(memset) +; LARGE-NEXT: lu52i.d $t8, $t8, %pc64_hi12(memset) +; LARGE-NEXT: add.d $ra, $t8, $ra ; LARGE-NEXT: jirl $ra, $ra, 0 ; LARGE-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload ; LARGE-NEXT: addi.d $sp, $sp, 16 @@ -105,17 +105,17 @@ define i32 @caller_tail(i32 %i) nounwind { ; ; MEDIUM-LABEL: caller_tail: ; MEDIUM: # %bb.0: # %entry -; MEDIUM-NEXT: pcalau12i $a1, %pc_hi20(callee_tail) -; MEDIUM-NEXT: jirl $zero, $a1, %pc_lo12(callee_tail) +; MEDIUM-NEXT: pcaddu18i $t8, %call36(callee_tail) +; MEDIUM-NEXT: jr $t8 ; ; LARGE-LABEL: caller_tail: ; LARGE: # %bb.0: # %entry -; LARGE-NEXT: pcalau12i $a1, %got_pc_hi20(callee_tail) -; LARGE-NEXT: addi.d $a2, $zero, %got_pc_lo12(callee_tail) -; LARGE-NEXT: lu32i.d $a2, %got64_pc_lo20(callee_tail) -; LARGE-NEXT: lu52i.d $a2, $a2, %got64_pc_hi12(callee_tail) -; LARGE-NEXT: ldx.d $a1, $a2, $a1 -; LARGE-NEXT: jr $a1 +; LARGE-NEXT: pcalau12i $t7, %got_pc_hi20(callee_tail) +; LARGE-NEXT: addi.d $t8, $zero, %got_pc_lo12(callee_tail) +; LARGE-NEXT: lu32i.d $t8, %got64_pc_lo20(callee_tail) +; LARGE-NEXT: lu52i.d $t8, $t8, %got64_pc_hi12(callee_tail) +; LARGE-NEXT: ldx.d $t7, $t8, $t7 +; LARGE-NEXT: jr $t7 entry: %r = tail call i32 @callee_tail(i32 %i) ret i32 %r diff --git a/llvm/test/CodeGen/LoongArch/cpus.ll b/llvm/test/CodeGen/LoongArch/cpus.ll index 35945ae4de71fb5decca5c5516aecee09a8757f3..087cf887b81386e3d68d1ac5b052ac032e519e14 100644 --- a/llvm/test/CodeGen/LoongArch/cpus.ll +++ b/llvm/test/CodeGen/LoongArch/cpus.ll @@ -3,6 +3,7 @@ ; RUN: llc < %s --mtriple=loongarch64 --mcpu=loongarch64 2>&1 | FileCheck %s ; RUN: llc < %s --mtriple=loongarch64 --mcpu=la464 2>&1 | FileCheck %s +; RUN: llc < %s --mtriple=loongarch64 --mcpu=la664 2>&1 | FileCheck %s ; RUN: llc < %s --mtriple=loongarch64 2>&1 | FileCheck %s ; CHECK-NOT: {{.*}} is not a recognized processor for this target @@ -18,3 +19,7 @@ define void @tune_cpu_loongarch64() "tune-cpu"="loongarch64" { define void @tune_cpu_la464() "tune-cpu"="la464" { ret void } + +define void @tune_cpu_la664() "tune-cpu"="la664" { + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/expand-call.ll b/llvm/test/CodeGen/LoongArch/expand-call.ll index 86bf4292665b72c88b4ba5ab3cf056b9c327edda..e0d179f92de6824b28b328127b9303c948d17e3c 100644 --- a/llvm/test/CodeGen/LoongArch/expand-call.ll +++ b/llvm/test/CodeGen/LoongArch/expand-call.ll @@ -1,6 +1,6 @@ ; RUN: llc --mtriple=loongarch64 --stop-before loongarch-prera-expand-pseudo \ ; RUN: --verify-machineinstrs < %s | FileCheck %s --check-prefix=NOEXPAND -; RUN: llc --mtriple=loongarch64 --stop-after loongarch-prera-expand-pseudo \ +; RUN: llc --mtriple=loongarch64 --stop-before machine-opt-remark-emitter \ ; RUN: --verify-machineinstrs < %s | FileCheck %s --check-prefix=EXPAND declare void @callee() diff --git a/llvm/test/CodeGen/LoongArch/global-address.ll b/llvm/test/CodeGen/LoongArch/global-address.ll index a8f0ef648aa7c4372980e7c47f93df028bda034d..d32a17f488b1422df12553801fb0b170836b6738 100644 --- a/llvm/test/CodeGen/LoongArch/global-address.ll +++ b/llvm/test/CodeGen/LoongArch/global-address.ll @@ -53,32 +53,32 @@ define void @foo() nounwind { ; LA64LARGENOPIC-LABEL: foo: ; LA64LARGENOPIC: # %bb.0: ; LA64LARGENOPIC-NEXT: pcalau12i $a0, %got_pc_hi20(G) -; LA64LARGENOPIC-NEXT: addi.d $a1, $zero, %got_pc_lo12(G) -; LA64LARGENOPIC-NEXT: lu32i.d $a1, %got64_pc_lo20(G) -; LA64LARGENOPIC-NEXT: lu52i.d $a1, $a1, %got64_pc_hi12(G) -; LA64LARGENOPIC-NEXT: ldx.d $a0, $a1, $a0 +; LA64LARGENOPIC-NEXT: addi.d $t8, $zero, %got_pc_lo12(G) +; LA64LARGENOPIC-NEXT: lu32i.d $t8, %got64_pc_lo20(G) +; LA64LARGENOPIC-NEXT: lu52i.d $t8, $t8, %got64_pc_hi12(G) +; LA64LARGENOPIC-NEXT: ldx.d $a0, $t8, $a0 ; LA64LARGENOPIC-NEXT: ld.w $a0, $a0, 0 ; LA64LARGENOPIC-NEXT: pcalau12i $a0, %pc_hi20(g) -; LA64LARGENOPIC-NEXT: addi.d $a1, $zero, %pc_lo12(g) -; LA64LARGENOPIC-NEXT: lu32i.d $a1, %pc64_lo20(g) -; LA64LARGENOPIC-NEXT: lu52i.d $a1, $a1, %pc64_hi12(g) -; LA64LARGENOPIC-NEXT: add.d $a0, $a1, $a0 +; LA64LARGENOPIC-NEXT: addi.d $t8, $zero, %pc_lo12(g) +; LA64LARGENOPIC-NEXT: lu32i.d $t8, %pc64_lo20(g) +; LA64LARGENOPIC-NEXT: lu52i.d $t8, $t8, %pc64_hi12(g) +; LA64LARGENOPIC-NEXT: add.d $a0, $t8, $a0 ; LA64LARGENOPIC-NEXT: ld.w $a0, $a0, 0 ; LA64LARGENOPIC-NEXT: ret ; ; LA64LARGEPIC-LABEL: foo: ; LA64LARGEPIC: # %bb.0: ; LA64LARGEPIC-NEXT: pcalau12i $a0, %got_pc_hi20(G) -; LA64LARGEPIC-NEXT: addi.d $a1, $zero, %got_pc_lo12(G) -; LA64LARGEPIC-NEXT: lu32i.d $a1, %got64_pc_lo20(G) -; LA64LARGEPIC-NEXT: lu52i.d $a1, $a1, %got64_pc_hi12(G) -; LA64LARGEPIC-NEXT: ldx.d $a0, $a1, $a0 +; LA64LARGEPIC-NEXT: addi.d $t8, $zero, %got_pc_lo12(G) +; LA64LARGEPIC-NEXT: lu32i.d $t8, %got64_pc_lo20(G) +; LA64LARGEPIC-NEXT: lu52i.d $t8, $t8, %got64_pc_hi12(G) +; LA64LARGEPIC-NEXT: ldx.d $a0, $t8, $a0 ; LA64LARGEPIC-NEXT: ld.w $a0, $a0, 0 ; LA64LARGEPIC-NEXT: pcalau12i $a0, %pc_hi20(.Lg$local) -; LA64LARGEPIC-NEXT: addi.d $a1, $zero, %pc_lo12(.Lg$local) -; LA64LARGEPIC-NEXT: lu32i.d $a1, %pc64_lo20(.Lg$local) -; LA64LARGEPIC-NEXT: lu52i.d $a1, $a1, %pc64_hi12(.Lg$local) -; LA64LARGEPIC-NEXT: add.d $a0, $a1, $a0 +; LA64LARGEPIC-NEXT: addi.d $t8, $zero, %pc_lo12(.Lg$local) +; LA64LARGEPIC-NEXT: lu32i.d $t8, %pc64_lo20(.Lg$local) +; LA64LARGEPIC-NEXT: lu52i.d $t8, $t8, %pc64_hi12(.Lg$local) +; LA64LARGEPIC-NEXT: add.d $a0, $t8, $a0 ; LA64LARGEPIC-NEXT: ld.w $a0, $a0, 0 ; LA64LARGEPIC-NEXT: ret %V = load volatile i32, ptr @G diff --git a/llvm/test/CodeGen/LoongArch/intrinsic-frecipe-dbl.ll b/llvm/test/CodeGen/LoongArch/intrinsic-frecipe-dbl.ll new file mode 100644 index 0000000000000000000000000000000000000000..9f572500caa0ea20e39fc28c37c62b107e66cbc8 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/intrinsic-frecipe-dbl.ll @@ -0,0 +1,26 @@ +; RUN: llc --mtriple=loongarch32 --mattr=+d,+frecipe < %s | FileCheck %s +; RUN: llc --mtriple=loongarch64 --mattr=+d,+frecipe < %s | FileCheck %s + +declare double @llvm.loongarch.frecipe.d(double) + +define double @frecipe_d(double %a) { +; CHECK-LABEL: frecipe_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: frecipe.d $fa0, $fa0 +; CHECK-NEXT: ret +entry: + %res = call double @llvm.loongarch.frecipe.d(double %a) + ret double %res +} + +declare double @llvm.loongarch.frsqrte.d(double) + +define double @frsqrte_d(double %a) { +; CHECK-LABEL: frsqrte_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: frsqrte.d $fa0, $fa0 +; CHECK-NEXT: ret +entry: + %res = call double @llvm.loongarch.frsqrte.d(double %a) + ret double %res +} diff --git a/llvm/test/CodeGen/LoongArch/intrinsic-frecipe-flt.ll b/llvm/test/CodeGen/LoongArch/intrinsic-frecipe-flt.ll new file mode 100644 index 0000000000000000000000000000000000000000..0b2029f2e44a01c82b9346e603a954ce613c6dc6 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/intrinsic-frecipe-flt.ll @@ -0,0 +1,26 @@ +; RUN: llc --mtriple=loongarch32 --mattr=+f,+frecipe < %s | FileCheck %s +; RUN: llc --mtriple=loongarch64 --mattr=+f,+frecipe < %s | FileCheck %s + +declare float @llvm.loongarch.frecipe.s(float) + +define float @frecipe_s(float %a) { +; CHECK-LABEL: frecipe_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: frecipe.s $fa0, $fa0 +; CHECK-NEXT: ret +entry: + %res = call float @llvm.loongarch.frecipe.s(float %a) + ret float %res +} + +declare float @llvm.loongarch.frsqrte.s(float) + +define float @frsqrte_s(float %a) { +; CHECK-LABEL: frsqrte_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: frsqrte.s $fa0, $fa0 +; CHECK-NEXT: ret +entry: + %res = call float @llvm.loongarch.frsqrte.s(float %a) + ret float %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frecipe.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frecipe.ll new file mode 100644 index 0000000000000000000000000000000000000000..215436823af8313d530f1c8ed27f734c43a790c5 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frecipe.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx,+frecipe < %s | FileCheck %s + +declare <8 x float> @llvm.loongarch.lasx.xvfrecipe.s(<8 x float>) + +define <8 x float> @lasx_xvfrecipe_s(<8 x float> %va) nounwind { +; CHECK-LABEL: lasx_xvfrecipe_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfrecipe.s $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <8 x float> @llvm.loongarch.lasx.xvfrecipe.s(<8 x float> %va) + ret <8 x float> %res +} + +declare <4 x double> @llvm.loongarch.lasx.xvfrecipe.d(<4 x double>) + +define <4 x double> @lasx_xvfrecipe_d(<4 x double> %va) nounwind { +; CHECK-LABEL: lasx_xvfrecipe_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfrecipe.d $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x double> @llvm.loongarch.lasx.xvfrecipe.d(<4 x double> %va) + ret <4 x double> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frsqrte.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frsqrte.ll new file mode 100644 index 0000000000000000000000000000000000000000..ad36c3aa5c29d8a568cfafc19968ed10be918851 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frsqrte.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx,+frecipe < %s | FileCheck %s + +declare <8 x float> @llvm.loongarch.lasx.xvfrsqrte.s(<8 x float>) + +define <8 x float> @lasx_xvfrsqrte_s(<8 x float> %va) nounwind { +; CHECK-LABEL: lasx_xvfrsqrte_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfrsqrte.s $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <8 x float> @llvm.loongarch.lasx.xvfrsqrte.s(<8 x float> %va) + ret <8 x float> %res +} + +declare <4 x double> @llvm.loongarch.lasx.xvfrsqrte.d(<4 x double>) + +define <4 x double> @lasx_xvfrsqrte_d(<4 x double> %va) nounwind { +; CHECK-LABEL: lasx_xvfrsqrte_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfrsqrte.d $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x double> @llvm.loongarch.lasx.xvfrsqrte.d(<4 x double> %va) + ret <4 x double> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvilv.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvilv.ll new file mode 100644 index 0000000000000000000000000000000000000000..22ab19b9fa44674d836c4e162f9e7f508985e885 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvilv.ll @@ -0,0 +1,74 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx %s -o - | FileCheck %s + +;; xvilvl.b +define <32 x i8> @shufflevector_xvilvl_v32i8(<32 x i8> %a, <32 x i8> %b) { +; CHECK-LABEL: shufflevector_xvilvl_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: xvilvl.b $xr0, $xr1, $xr0 +; CHECK-NEXT: ret + %c = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> + ret <32 x i8> %c +} + +;; xvilvl.h +define <16 x i16> @shufflevector_xvilvl_v16i16(<16 x i16> %a, <16 x i16> %b) { +; CHECK-LABEL: shufflevector_xvilvl_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: xvilvl.h $xr0, $xr1, $xr0 +; CHECK-NEXT: ret + %c = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> + ret <16 x i16> %c +} + +;; xvilvl.w +define <8 x i32> @shufflevector_xvilvl_v8i32(<8 x i32> %a, <8 x i32> %b) { +; CHECK-LABEL: shufflevector_xvilvl_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvilvl.w $xr0, $xr1, $xr0 +; CHECK-NEXT: ret + %c = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> + ret <8 x i32> %c +} + +;; xvilvh.b +define <32 x i8> @shufflevector_xvilvh_v32i8(<32 x i8> %a, <32 x i8> %b) { +; CHECK-LABEL: shufflevector_xvilvh_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: xvilvh.b $xr0, $xr1, $xr0 +; CHECK-NEXT: ret + %c = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> + ret <32 x i8> %c +} + +;; xvilvh.h +define <16 x i16> @shufflevector_xvilvh_v16i16(<16 x i16> %a, <16 x i16> %b) { +; CHECK-LABEL: shufflevector_xvilvh_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: xvilvh.h $xr0, $xr1, $xr0 +; CHECK-NEXT: ret + %c = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> + ret <16 x i16> %c +} + +;; xvilvh.w +define <8 x i32> @shufflevector_xvilvh_v8i32(<8 x i32> %a, <8 x i32> %b) { +; CHECK-LABEL: shufflevector_xvilvh_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvilvh.w $xr0, $xr1, $xr0 +; CHECK-NEXT: ret + %c = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> + ret <8 x i32> %c +} + +;; xvilvh.w +define <8 x float> @shufflevector_xvilvh_v8f32(<8 x float> %a, <8 x float> %b) { +; CHECK-LABEL: shufflevector_xvilvh_v8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvilvh.w $xr0, $xr1, $xr0 +; CHECK-NEXT: ret + %c = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> + ret <8 x float> %c +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvpack.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvpack.ll new file mode 100644 index 0000000000000000000000000000000000000000..2ff9af4069b9bd873fe72248fdfe5a4a5a1b80da --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvpack.ll @@ -0,0 +1,124 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx %s -o - | FileCheck %s + +;; xvpackev.b +define <32 x i8> @shufflevector_pack_ev_v32i8(<32 x i8> %a, <32 x i8> %b) { +; CHECK-LABEL: shufflevector_pack_ev_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: xvpackev.b $xr0, $xr1, $xr0 +; CHECK-NEXT: ret + %c = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> + ret <32 x i8> %c +} + +;; xvpackev.h +define <16 x i16> @shufflevector_pack_ev_v16i16(<16 x i16> %a, <16 x i16> %b) { +; CHECK-LABEL: shufflevector_pack_ev_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: xvpackev.h $xr0, $xr1, $xr0 +; CHECK-NEXT: ret + %c = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> + ret <16 x i16> %c +} + +;; xvpackev.w +define <8 x i32> @shufflevector_pack_ev_v8i32(<8 x i32> %a, <8 x i32> %b) { +; CHECK-LABEL: shufflevector_pack_ev_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvpackev.w $xr0, $xr1, $xr0 +; CHECK-NEXT: ret + %c = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> + ret <8 x i32> %c +} + +;; xvpickev.d/xvpackev.d/xvilvl.d +define <4 x i64> @shufflevector_pack_ev_v4i64(<4 x i64> %a, <4 x i64> %b) { +; CHECK-LABEL: shufflevector_pack_ev_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: xvpackev.d $xr0, $xr1, $xr0 +; CHECK-NEXT: ret + %c = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> + ret <4 x i64> %c +} + +;; xvpackev.w +define <8 x float> @shufflevector_pack_ev_v8f32(<8 x float> %a, <8 x float> %b) { +; CHECK-LABEL: shufflevector_pack_ev_v8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvpackev.w $xr0, $xr1, $xr0 +; CHECK-NEXT: ret + %c = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> + ret <8 x float> %c +} + +;; xvpickev.d/xvpackev.d/xvilvl.d +define <4 x double> @shufflevector_pack_ev_v4f64(<4 x double> %a, <4 x double> %b) { +; CHECK-LABEL: shufflevector_pack_ev_v4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: xvpackev.d $xr0, $xr1, $xr0 +; CHECK-NEXT: ret + %c = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> + ret <4 x double> %c +} + +;; xvpackod.b +define <32 x i8> @shufflevector_pack_od_v32i8(<32 x i8> %a, <32 x i8> %b) { +; CHECK-LABEL: shufflevector_pack_od_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: xvpackod.b $xr0, $xr1, $xr0 +; CHECK-NEXT: ret + %c = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> + ret <32 x i8> %c +} + +;; xvpackod.h +define <16 x i16> @shufflevector_pack_od_v16i16(<16 x i16> %a, <16 x i16> %b) { +; CHECK-LABEL: shufflevector_pack_od_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: xvpackod.h $xr0, $xr1, $xr0 +; CHECK-NEXT: ret + %c = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> + ret <16 x i16> %c +} + +;; xvpackod.w +define <8 x i32> @shufflevector_pack_od_v8i32(<8 x i32> %a, <8 x i32> %b) { +; CHECK-LABEL: shufflevector_pack_od_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvpackod.w $xr0, $xr1, $xr0 +; CHECK-NEXT: ret + %c = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> + ret <8 x i32> %c +} + +;; xvpickod.d/xvpackod.d/xvilvh.d +define <4 x i64> @shufflodector_pack_od_v4i64(<4 x i64> %a, <4 x i64> %b) { +; CHECK-LABEL: shufflodector_pack_od_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: xvpackod.d $xr0, $xr1, $xr0 +; CHECK-NEXT: ret + %c = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> + ret <4 x i64> %c +} + +;; xvpackod.w +define <8 x float> @shufflodector_pack_od_v8f32(<8 x float> %a, <8 x float> %b) { +; CHECK-LABEL: shufflodector_pack_od_v8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvpackod.w $xr0, $xr1, $xr0 +; CHECK-NEXT: ret + %c = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> + ret <8 x float> %c +} + +;; xvpickod.d/xvpackod.d/xvilvh.d +define <4 x double> @shufflodector_pack_od_v4f64(<4 x double> %a, <4 x double> %b) { +; CHECK-LABEL: shufflodector_pack_od_v4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: xvpackod.d $xr0, $xr1, $xr0 +; CHECK-NEXT: ret + %c = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> + ret <4 x double> %c +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvpick.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvpick.ll new file mode 100644 index 0000000000000000000000000000000000000000..294d292d1764067c80a79af83c6aeb87f6982d53 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvpick.ll @@ -0,0 +1,84 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx %s -o - | FileCheck %s + +;; xvpickev.b +define <32 x i8> @shufflevector_pick_ev_v32i8(<32 x i8> %a, <32 x i8> %b) { +; CHECK-LABEL: shufflevector_pick_ev_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: xvpickev.b $xr0, $xr1, $xr0 +; CHECK-NEXT: ret + %c = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> + ret <32 x i8> %c +} + +;; xvpickev.h +define <16 x i16> @shufflevector_pick_ev_v16i16(<16 x i16> %a, <16 x i16> %b) { +; CHECK-LABEL: shufflevector_pick_ev_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: xvpickev.h $xr0, $xr1, $xr0 +; CHECK-NEXT: ret + %c = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> + ret <16 x i16> %c +} + +;; xvpickev.w +define <8 x i32> @shufflevector_pick_ev_v8i32(<8 x i32> %a, <8 x i32> %b) { +; CHECK-LABEL: shufflevector_pick_ev_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvpickev.w $xr0, $xr1, $xr0 +; CHECK-NEXT: ret + %c = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> + ret <8 x i32> %c +} + +;; xvpickev.w +define <8 x float> @shufflevector_pick_ev_v8f32(<8 x float> %a, <8 x float> %b) { +; CHECK-LABEL: shufflevector_pick_ev_v8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvpickev.w $xr0, $xr1, $xr0 +; CHECK-NEXT: ret + %c = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> + ret <8 x float> %c +} + +;; xvpickod.b +define <32 x i8> @shufflevector_pick_od_v32i8(<32 x i8> %a, <32 x i8> %b) { +; CHECK-LABEL: shufflevector_pick_od_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: xvpickod.b $xr0, $xr1, $xr0 +; CHECK-NEXT: ret + %c = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> + ret <32 x i8> %c +} + +;; xvpickod.h +define <16 x i16> @shufflevector_pick_od_v16i16(<16 x i16> %a, <16 x i16> %b) { +; CHECK-LABEL: shufflevector_pick_od_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: xvpickod.h $xr0, $xr1, $xr0 +; CHECK-NEXT: ret + %c = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> + ret <16 x i16> %c +} + +;; xvpickod.w +define <8 x i32> @shufflevector_pick_od_v8i32(<8 x i32> %a, <8 x i32> %b) { +; CHECK-LABEL: shufflevector_pick_od_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvpickod.w $xr0, $xr1, $xr0 +; CHECK-NEXT: ret + %c = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> + ret <8 x i32> %c +} + +;; xvpickod.w +define <8 x float> @shufflodector_pick_od_v8f32(<8 x float> %a, <8 x float> %b) { +; CHECK-LABEL: shufflodector_pick_od_v8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvpickod.w $xr0, $xr1, $xr0 +; CHECK-NEXT: ret + %c = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> + ret <8 x float> %c +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvrepl128vei.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvrepl128vei.ll new file mode 100644 index 0000000000000000000000000000000000000000..dce1e4b777e291c95482d441dcc14900f18e86bf --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvrepl128vei.ll @@ -0,0 +1,65 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx %s -o - | FileCheck %s + +;; xvrepl128vei.b +define <32 x i8> @shufflevector_v32i8(<32 x i8> %a, <32 x i8> %b) { +; CHECK-LABEL: shufflevector_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: xvrepl128vei.b $xr0, $xr0, 1 +; CHECK-NEXT: ret + %c = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> + ret <32 x i8> %c +} + +;; xvrepl128vei.h +define <16 x i16> @shufflevector_v16i16(<16 x i16> %a, <16 x i16> %b) { +; CHECK-LABEL: shufflevector_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: xvrepl128vei.h $xr0, $xr0, 3 +; CHECK-NEXT: ret + %c = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> + ret <16 x i16> %c +} + +;; xvrepl128vei.w +define <8 x i32> @shufflevector_v8i32(<8 x i32> %a, <8 x i32> %b) { +; CHECK-LABEL: shufflevector_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvpermi.d $xr0, $xr0, 78 +; CHECK-NEXT: xvrepl128vei.w $xr0, $xr0, 3 +; CHECK-NEXT: ret + %c = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> + ret <8 x i32> %c +} + +;; xvrepl128vei.d +define <4 x i64> @shufflevector_v4i64(<4 x i64> %a, <4 x i64> %b) { +; CHECK-LABEL: shufflevector_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: xvrepl128vei.d $xr0, $xr0, 1 +; CHECK-NEXT: ret + %c = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> + ret <4 x i64> %c +} + +;; xvrepl128vei.w +define <8 x float> @shufflevector_v8f32(<8 x float> %a, <8 x float> %b) { +; CHECK-LABEL: shufflevector_v8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvrepl128vei.w $xr0, $xr0, 3 +; CHECK-NEXT: ret + %c = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> + ret <8 x float> %c +} + +;; xvrepl128vei.d +define <4 x double> @shufflevector_v4f64(<4 x double> %a, <4 x double> %b) { +; CHECK-LABEL: shufflevector_v4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: xvrepl128vei.d $xr0, $xr1, 1 +; CHECK-NEXT: ret + %c = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> + ret <4 x double> %c +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvshuf.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvshuf.ll new file mode 100644 index 0000000000000000000000000000000000000000..fce32647da3de78a4de2635dba6b78496183e352 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvshuf.ll @@ -0,0 +1,76 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx %s -o - | FileCheck %s + +;; xvshuf.b +define <32 x i8> @shufflevector_v32i8(<32 x i8> %a, <32 x i8> %b) { +; CHECK-LABEL: shufflevector_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI0_0) +; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI0_0) +; CHECK-NEXT: xvld $xr2, $a0, 0 +; CHECK-NEXT: xvshuf.b $xr0, $xr1, $xr0, $xr2 +; CHECK-NEXT: ret + %c = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> + ret <32 x i8> %c +} + +;; xvshuf.h +define <16 x i16> @shufflevector_v16i16(<16 x i16> %a, <16 x i16> %b) { +; CHECK-LABEL: shufflevector_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: xvpermi.d $xr2, $xr0, 78 +; CHECK-NEXT: xvpermi.d $xr1, $xr1, 78 +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI1_0) +; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI1_0) +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvshuf.h $xr0, $xr1, $xr2 +; CHECK-NEXT: ret + %c = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> + ret <16 x i16> %c +} + +;; xvshuf.w +define <8 x i32> @shufflevector_v8i32(<8 x i32> %a, <8 x i32> %b) { +; CHECK-LABEL: shufflevector_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvpermi.d $xr2, $xr0, 68 +; CHECK-NEXT: xvpermi.d $xr1, $xr1, 68 +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI2_0) +; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI2_0) +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvshuf.w $xr0, $xr1, $xr2 +; CHECK-NEXT: ret + %c = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> + ret <8 x i32> %c +} + +;; xvshuf.d +define <4 x i64> @shufflevector_v4i64(<4 x i64> %a, <4 x i64> %b) { +; CHECK-LABEL: shufflevector_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: xvpermi.d $xr2, $xr0, 238 +; CHECK-NEXT: xvpermi.d $xr1, $xr1, 238 +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0) +; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI3_0) +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvshuf.d $xr0, $xr1, $xr2 +; CHECK-NEXT: ret + %c = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> + ret <4 x i64> %c +} + +;; xvshuf.w +define <8 x float> @shufflevector_v8f32(<8 x float> %a, <8 x float> %b) { +; CHECK-LABEL: shufflevector_v8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI4_0) +; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI4_0) +; CHECK-NEXT: xvld $xr2, $a0, 0 +; CHECK-NEXT: xvshuf.w $xr2, $xr1, $xr0 +; CHECK-NEXT: xvori.b $xr0, $xr2, 0 +; CHECK-NEXT: ret + %c = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> + ret <8 x float> %c +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvshuf4i.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvshuf4i.ll new file mode 100644 index 0000000000000000000000000000000000000000..dc4532a7292abb660a17bc031afa9372c066281a --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvshuf4i.ll @@ -0,0 +1,43 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx %s -o - | FileCheck %s + +;; xxvshuf4i.b +define <32 x i8> @shufflevector_xvshuf4i_v32i8(<32 x i8> %a, <32 x i8> %b) { +; CHECK-LABEL: shufflevector_xvshuf4i_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: xvshuf4i.b $xr0, $xr0, 27 +; CHECK-NEXT: ret + %c = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> + ret <32 x i8> %c +} + +;; xvshuf4i.h +define <16 x i16> @shufflevector_xvshuf4i_v16i16(<16 x i16> %a, <16 x i16> %b) { +; CHECK-LABEL: shufflevector_xvshuf4i_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: xvshuf4i.h $xr0, $xr0, 27 +; CHECK-NEXT: ret + %c = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> + ret <16 x i16> %c +} + +;; xvshuf4i.w +define <8 x i32> @shufflevector_xvshuf4i_v8i32(<8 x i32> %a, <8 x i32> %b) { +; CHECK-LABEL: shufflevector_xvshuf4i_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvshuf4i.w $xr0, $xr0, 27 +; CHECK-NEXT: ret + %c = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> + ret <8 x i32> %c +} + +;; xvshuf4i.w +define <8 x float> @shufflevector_xvshuf4i_v8f32(<8 x float> %a, <8 x float> %b) { +; CHECK-LABEL: shufflevector_xvshuf4i_v8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvshuf4i.w $xr0, $xr0, 27 +; CHECK-NEXT: ret + %c = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> + ret <8 x float> %c +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frecipe.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frecipe.ll new file mode 100644 index 0000000000000000000000000000000000000000..1b7a97d9f97209e6be3dfd7559fbfdccd3e10c65 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frecipe.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx,+frecipe < %s | FileCheck %s + +declare <4 x float> @llvm.loongarch.lsx.vfrecipe.s(<4 x float>) + +define <4 x float> @lsx_vfrecipe_s(<4 x float> %va) nounwind { +; CHECK-LABEL: lsx_vfrecipe_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfrecipe.s $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x float> @llvm.loongarch.lsx.vfrecipe.s(<4 x float> %va) + ret <4 x float> %res +} + +declare <2 x double> @llvm.loongarch.lsx.vfrecipe.d(<2 x double>) + +define <2 x double> @lsx_vfrecipe_d(<2 x double> %va) nounwind { +; CHECK-LABEL: lsx_vfrecipe_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfrecipe.d $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <2 x double> @llvm.loongarch.lsx.vfrecipe.d(<2 x double> %va) + ret <2 x double> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frsqrte.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frsqrte.ll new file mode 100644 index 0000000000000000000000000000000000000000..3cd6c78e87d78ba92d1fcd99b3db91b75c3d90dc --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frsqrte.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx,+frecipe < %s | FileCheck %s + +declare <4 x float> @llvm.loongarch.lsx.vfrsqrte.s(<4 x float>) + +define <4 x float> @lsx_vfrsqrte_s(<4 x float> %va) nounwind { +; CHECK-LABEL: lsx_vfrsqrte_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfrsqrte.s $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x float> @llvm.loongarch.lsx.vfrsqrte.s(<4 x float> %va) + ret <4 x float> %res +} + +declare <2 x double> @llvm.loongarch.lsx.vfrsqrte.d(<2 x double>) + +define <2 x double> @lsx_vfrsqrte_d(<2 x double> %va) nounwind { +; CHECK-LABEL: lsx_vfrsqrte_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfrsqrte.d $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <2 x double> @llvm.loongarch.lsx.vfrsqrte.d(<2 x double> %va) + ret <2 x double> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vilv.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vilv.ll new file mode 100644 index 0000000000000000000000000000000000000000..31398c6081c0a9357ae3c700dc7ded86c1102355 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vilv.ll @@ -0,0 +1,82 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx %s -o - | FileCheck %s + +;; vilvl.b +define <16 x i8> @shufflevector_vilvl_v16i8(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: shufflevector_vilvl_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vilvl.b $vr0, $vr1, $vr0 +; CHECK-NEXT: ret + %c = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %c +} + +;; vilvl.h +define <8 x i16> @shufflevector_vilvl_v8i16(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: shufflevector_vilvl_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vilvl.h $vr0, $vr1, $vr0 +; CHECK-NEXT: ret + %c = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %c +} + +;; vilvl.w +define <4 x i32> @shufflevector_vilvl_v4i32(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: shufflevector_vilvl_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vilvl.w $vr0, $vr1, $vr0 +; CHECK-NEXT: ret + %c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %c +} + +;; vilvl.w +define <4 x float> @shufflevector_vilvl_v4f32(<4 x float> %a, <4 x float> %b) { +; CHECK-LABEL: shufflevector_vilvl_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vilvl.w $vr0, $vr1, $vr0 +; CHECK-NEXT: ret + %c = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %c +} + +;; vilvh.b +define <16 x i8> @shufflevector_vilvh_v16i8(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: shufflevector_vilvh_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vilvh.b $vr0, $vr1, $vr0 +; CHECK-NEXT: ret + %c = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %c +} + +;; vilvh.h +define <8 x i16> @shufflevector_vilvh_v8i16(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: shufflevector_vilvh_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vilvh.h $vr0, $vr1, $vr0 +; CHECK-NEXT: ret + %c = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %c +} + +;; vilvh.w +define <4 x i32> @shufflevector_vilvh_v4i32(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: shufflevector_vilvh_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vilvh.w $vr0, $vr1, $vr0 +; CHECK-NEXT: ret + %c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %c +} + +;; vilvh.w +define <4 x float> @shufflevector_vilvh_v4f32(<4 x float> %a, <4 x float> %b) { +; CHECK-LABEL: shufflevector_vilvh_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vilvh.w $vr0, $vr1, $vr0 +; CHECK-NEXT: ret + %c = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %c +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vpack.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vpack.ll new file mode 100644 index 0000000000000000000000000000000000000000..171e68306cd11026bf5b422870136fcd7e0b5e81 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vpack.ll @@ -0,0 +1,122 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx %s -o - | FileCheck %s + +;; vpackev.b +define <16 x i8> @shufflevector_pack_ev_v16i8(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: shufflevector_pack_ev_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vpackev.b $vr0, $vr1, $vr0 +; CHECK-NEXT: ret + %c = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %c +} + +;; vpackev.h +define <8 x i16> @shufflevector_pack_ev_v8i16(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: shufflevector_pack_ev_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vpackev.h $vr0, $vr1, $vr0 +; CHECK-NEXT: ret + %c = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %c +} + +;; vpackev.w +define <4 x i32> @shufflevector_pack_ev_v4i32(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: shufflevector_pack_ev_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vpackev.w $vr0, $vr1, $vr0 +; CHECK-NEXT: ret + %c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %c +} + +;; vpickev.d/vpackev.d/vilvl.d +define <2 x i64> @shufflevector_pack_ev_v2i64(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: shufflevector_pack_ev_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vpackev.d $vr0, $vr1, $vr0 +; CHECK-NEXT: ret + %c = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> + ret <2 x i64> %c +} + +;; vpackev.w +define <4 x float> @shufflevector_pack_ev_v4f32(<4 x float> %a, <4 x float> %b) { +; CHECK-LABEL: shufflevector_pack_ev_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vpackev.w $vr0, $vr1, $vr0 +; CHECK-NEXT: ret + %c = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %c +} + +;; vpickev.d/vpackev.d/vilvl.d +define <2 x double> @shufflevector_pack_ev_v2f64(<2 x double> %a, <2 x double> %b) { +; CHECK-LABEL: shufflevector_pack_ev_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vpackev.d $vr0, $vr1, $vr0 +; CHECK-NEXT: ret + %c = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> + ret <2 x double> %c +} + +;; vpackod.b +define <16 x i8> @shufflevector_pack_od_v16i8(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: shufflevector_pack_od_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vpackod.b $vr0, $vr1, $vr0 +; CHECK-NEXT: ret + %c = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %c +} + +;; vpackod.h +define <8 x i16> @shufflevector_pack_od_v8i16(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: shufflevector_pack_od_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vpackod.h $vr0, $vr1, $vr0 +; CHECK-NEXT: ret + %c = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %c +} + +;; vpackod.w +define <4 x i32> @shufflevector_pack_od_v4i32(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: shufflevector_pack_od_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vpackod.w $vr0, $vr1, $vr0 +; CHECK-NEXT: ret + %c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %c +} + +;; vpickod.d/vpackod.d/vilvh.d +define <2 x i64> @shufflodector_pack_od_v2i64(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: shufflodector_pack_od_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vpackod.d $vr0, $vr1, $vr0 +; CHECK-NEXT: ret + %c = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> + ret <2 x i64> %c +} + +;; vpackod.w +define <4 x float> @shufflodector_pack_od_v4f32(<4 x float> %a, <4 x float> %b) { +; CHECK-LABEL: shufflodector_pack_od_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vpackod.w $vr0, $vr1, $vr0 +; CHECK-NEXT: ret + %c = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %c +} + +;; vpickod.d/vpackod.d/vilvh.d +define <2 x double> @shufflodector_pack_od_v2f64(<2 x double> %a, <2 x double> %b) { +; CHECK-LABEL: shufflodector_pack_od_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vpackod.d $vr0, $vr1, $vr0 +; CHECK-NEXT: ret + %c = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> + ret <2 x double> %c +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vpick.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vpick.ll new file mode 100644 index 0000000000000000000000000000000000000000..ca636d942b583814f7bae973dd03500690b9f719 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vpick.ll @@ -0,0 +1,82 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx %s -o - | FileCheck %s + +;; vpickev.b +define <16 x i8> @shufflevector_pick_ev_v16i8(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: shufflevector_pick_ev_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vpickev.b $vr0, $vr1, $vr0 +; CHECK-NEXT: ret + %c = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %c +} + +;; vpickev.h +define <8 x i16> @shufflevector_pick_ev_v8i16(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: shufflevector_pick_ev_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vpickev.h $vr0, $vr1, $vr0 +; CHECK-NEXT: ret + %c = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %c +} + +;; vpickev.w +define <4 x i32> @shufflevector_pick_ev_v4i32(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: shufflevector_pick_ev_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vpickev.w $vr0, $vr1, $vr0 +; CHECK-NEXT: ret + %c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %c +} + +;; vpickev.w +define <4 x float> @shufflevector_pick_ev_v4f32(<4 x float> %a, <4 x float> %b) { +; CHECK-LABEL: shufflevector_pick_ev_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vpickev.w $vr0, $vr1, $vr0 +; CHECK-NEXT: ret + %c = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %c +} + +;; vpickod.b +define <16 x i8> @shufflevector_pick_od_v16i8(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: shufflevector_pick_od_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vpickod.b $vr0, $vr1, $vr0 +; CHECK-NEXT: ret + %c = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %c +} + +;; vpickod.h +define <8 x i16> @shufflevector_pick_od_v8i16(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: shufflevector_pick_od_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vpickod.h $vr0, $vr1, $vr0 +; CHECK-NEXT: ret + %c = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %c +} + +;; vpickod.w +define <4 x i32> @shufflevector_pick_od_v4i32(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: shufflevector_pick_od_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vpickod.w $vr0, $vr1, $vr0 +; CHECK-NEXT: ret + %c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %c +} + +;; vpickod.w +define <4 x float> @shufflodector_pick_od_v4f32(<4 x float> %a, <4 x float> %b) { +; CHECK-LABEL: shufflodector_pick_od_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vpickod.w $vr0, $vr1, $vr0 +; CHECK-NEXT: ret + %c = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %c +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vreplvei.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vreplvei.ll new file mode 100644 index 0000000000000000000000000000000000000000..10510786f3216287237c598ea8042ca853b6a06a --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vreplvei.ll @@ -0,0 +1,62 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx %s -o - | FileCheck %s + +;; vreplvei.b +define <16 x i8> @shufflevector_v16i8(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: shufflevector_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vreplvei.b $vr0, $vr0, 1 +; CHECK-NEXT: ret + %c = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %c +} + +;; vreplvei.h +define <8 x i16> @shufflevector_v8i16(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: shufflevector_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vreplvei.h $vr0, $vr1, 2 +; CHECK-NEXT: ret + %c = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %c +} + +;; vreplvei.w +define <4 x i32> @shufflevector_v4i32(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: shufflevector_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vreplvei.w $vr0, $vr0, 0 +; CHECK-NEXT: ret + %c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %c +} + +;; vreplvei.d +define <2 x i64> @shufflevector_v2i64(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: shufflevector_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vreplvei.d $vr0, $vr0, 1 +; CHECK-NEXT: ret + %c = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> + ret <2 x i64> %c +} + +;; vreplvei.w +define <4 x float> @shufflevector_v4f32(<4 x float> %a, <4 x float> %b) { +; CHECK-LABEL: shufflevector_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vreplvei.w $vr0, $vr0, 0 +; CHECK-NEXT: ret + %c = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %c +} + +;; vreplvei.d +define <2 x double> @shufflevector_v2f64(<2 x double> %a, <2 x double> %b) { +; CHECK-LABEL: shufflevector_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vreplvei.d $vr0, $vr0, 1 +; CHECK-NEXT: ret + %c = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> + ret <2 x double> %c +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vshuf.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vshuf.ll new file mode 100644 index 0000000000000000000000000000000000000000..55800b31446b3d82a5f7efab3483a8f9d2ebb481 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vshuf.ll @@ -0,0 +1,84 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx %s -o - | FileCheck %s + +define <16 x i8> @shufflevector_v16i8(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: shufflevector_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI0_0) +; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI0_0) +; CHECK-NEXT: vld $vr2, $a0, 0 +; CHECK-NEXT: vshuf.b $vr0, $vr1, $vr0, $vr2 +; CHECK-NEXT: ret + %c = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %c +} + +;; vshuf.h +define <8 x i16> @shufflevector_v8i16(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: shufflevector_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI1_0) +; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI1_0) +; CHECK-NEXT: vld $vr2, $a0, 0 +; CHECK-NEXT: vshuf.h $vr2, $vr1, $vr0 +; CHECK-NEXT: vori.b $vr0, $vr2, 0 +; CHECK-NEXT: ret + %c = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %c +} + +;; vshuf.w +define <4 x i32> @shufflevector_v4i32(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: shufflevector_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI2_0) +; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI2_0) +; CHECK-NEXT: vld $vr2, $a0, 0 +; CHECK-NEXT: vshuf.w $vr2, $vr1, $vr0 +; CHECK-NEXT: vori.b $vr0, $vr2, 0 +; CHECK-NEXT: ret + %c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %c +} + +;; vshuf.d +define <2 x i64> @shufflevector_v2i64(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: shufflevector_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0) +; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI3_0) +; CHECK-NEXT: vld $vr2, $a0, 0 +; CHECK-NEXT: vshuf.d $vr2, $vr1, $vr0 +; CHECK-NEXT: vori.b $vr0, $vr2, 0 +; CHECK-NEXT: ret + %c = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> + ret <2 x i64> %c +} + +;; vshuf.w +define <4 x float> @shufflevector_v4f32(<4 x float> %a, <4 x float> %b) { +; CHECK-LABEL: shufflevector_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI4_0) +; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI4_0) +; CHECK-NEXT: vld $vr2, $a0, 0 +; CHECK-NEXT: vshuf.w $vr2, $vr1, $vr0 +; CHECK-NEXT: vori.b $vr0, $vr2, 0 +; CHECK-NEXT: ret + %c = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %c +} + +;; vshuf.d +define <2 x double> @shufflevector_v2f64(<2 x double> %a, <2 x double> %b) { +; CHECK-LABEL: shufflevector_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_0) +; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI5_0) +; CHECK-NEXT: vld $vr2, $a0, 0 +; CHECK-NEXT: vshuf.d $vr2, $vr1, $vr0 +; CHECK-NEXT: vori.b $vr0, $vr2, 0 +; CHECK-NEXT: ret + %c = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> + ret <2 x double> %c +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vshuf4i.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vshuf4i.ll new file mode 100644 index 0000000000000000000000000000000000000000..660b9581c3d1f555b358afd67e2bf664c454355b --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vshuf4i.ll @@ -0,0 +1,42 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx %s -o - | FileCheck %s + +;; vilvh.b +define <16 x i8> @shufflevector_vshuf4i_v16i8(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: shufflevector_vshuf4i_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vshuf4i.b $vr0, $vr0, 27 +; CHECK-NEXT: ret + %c = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %c +} + +;; vilvh.h +define <8 x i16> @shufflevector_vshuf4i_v8i4(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: shufflevector_vshuf4i_v8i4: +; CHECK: # %bb.0: +; CHECK-NEXT: vshuf4i.h $vr0, $vr0, 27 +; CHECK-NEXT: ret + %c = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %c +} + +;; vilvh.w +define <4 x i32> @shufflevector_vshuf4i_v4i32(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: shufflevector_vshuf4i_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vshuf4i.w $vr0, $vr0, 27 +; CHECK-NEXT: ret + %c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %c +} + +;; vilvh.w +define <4 x float> @shufflevector_vshuf4i_v4f32(<4 x float> %a, <4 x float> %b) { +; CHECK-LABEL: shufflevector_vshuf4i_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vshuf4i.w $vr0, $vr0, 27 +; CHECK-NEXT: ret + %c = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %c +} diff --git a/llvm/test/CodeGen/LoongArch/psabi-restricted-scheduling.ll b/llvm/test/CodeGen/LoongArch/psabi-restricted-scheduling.ll new file mode 100644 index 0000000000000000000000000000000000000000..474436a0126b91eb03e71d494a77d518e8fff6cc --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/psabi-restricted-scheduling.ll @@ -0,0 +1,168 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc --mtriple=loongarch64 --code-model=medium --post-RA-scheduler=0 < %s \ +; RUN: | FileCheck %s --check-prefix=MEDIUM_NO_SCH +; RUN: llc --mtriple=loongarch64 --code-model=medium --post-RA-scheduler=1 < %s \ +; RUN: | FileCheck %s --check-prefix=MEDIUM_SCH +; RUN: llc --mtriple=loongarch64 --code-model=large --post-RA-scheduler=0 < %s \ +; RUN: | FileCheck %s --check-prefix=LARGE_NO_SCH +; RUN: llc --mtriple=loongarch64 --code-model=large --post-RA-scheduler=1 < %s \ +; RUN: | FileCheck %s --check-prefix=LARGE_SCH + +@g = dso_local global i64 zeroinitializer, align 4 +@G = global i64 zeroinitializer, align 4 +@gd = external thread_local global i64 +@ld = external thread_local(localdynamic) global i64 +@ie = external thread_local(initialexec) global i64 + +declare ptr @bar(i64) + +define void @foo() nounwind { +; MEDIUM_NO_SCH-LABEL: foo: +; MEDIUM_NO_SCH: # %bb.0: +; MEDIUM_NO_SCH-NEXT: addi.d $sp, $sp, -16 +; MEDIUM_NO_SCH-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; MEDIUM_NO_SCH-NEXT: pcalau12i $a0, %got_pc_hi20(G) +; MEDIUM_NO_SCH-NEXT: ld.d $a0, $a0, %got_pc_lo12(G) +; MEDIUM_NO_SCH-NEXT: ld.d $a0, $a0, 0 +; MEDIUM_NO_SCH-NEXT: pcalau12i $a0, %pc_hi20(g) +; MEDIUM_NO_SCH-NEXT: addi.d $a0, $a0, %pc_lo12(g) +; MEDIUM_NO_SCH-NEXT: ld.d $a0, $a0, 0 +; MEDIUM_NO_SCH-NEXT: ori $a0, $zero, 1 +; MEDIUM_NO_SCH-NEXT: pcaddu18i $ra, %call36(bar) +; MEDIUM_NO_SCH-NEXT: jirl $ra, $ra, 0 +; MEDIUM_NO_SCH-NEXT: pcalau12i $a0, %ie_pc_hi20(gd) +; MEDIUM_NO_SCH-NEXT: ld.d $a0, $a0, %ie_pc_lo12(gd) +; MEDIUM_NO_SCH-NEXT: ldx.d $a0, $a0, $tp +; MEDIUM_NO_SCH-NEXT: pcalau12i $a0, %ie_pc_hi20(ld) +; MEDIUM_NO_SCH-NEXT: ld.d $a0, $a0, %ie_pc_lo12(ld) +; MEDIUM_NO_SCH-NEXT: ldx.d $a0, $a0, $tp +; MEDIUM_NO_SCH-NEXT: pcalau12i $a0, %ie_pc_hi20(ie) +; MEDIUM_NO_SCH-NEXT: ld.d $a0, $a0, %ie_pc_lo12(ie) +; MEDIUM_NO_SCH-NEXT: ldx.d $a0, $a0, $tp +; MEDIUM_NO_SCH-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; MEDIUM_NO_SCH-NEXT: addi.d $sp, $sp, 16 +; MEDIUM_NO_SCH-NEXT: ret +; +; MEDIUM_SCH-LABEL: foo: +; MEDIUM_SCH: # %bb.0: +; MEDIUM_SCH-NEXT: addi.d $sp, $sp, -16 +; MEDIUM_SCH-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; MEDIUM_SCH-NEXT: pcalau12i $a0, %got_pc_hi20(G) +; MEDIUM_SCH-NEXT: ld.d $a0, $a0, %got_pc_lo12(G) +; MEDIUM_SCH-NEXT: ld.d $a0, $a0, 0 +; MEDIUM_SCH-NEXT: pcalau12i $a0, %pc_hi20(g) +; MEDIUM_SCH-NEXT: addi.d $a0, $a0, %pc_lo12(g) +; MEDIUM_SCH-NEXT: ld.d $a0, $a0, 0 +; MEDIUM_SCH-NEXT: ori $a0, $zero, 1 +; MEDIUM_SCH-NEXT: pcaddu18i $ra, %call36(bar) +; MEDIUM_SCH-NEXT: jirl $ra, $ra, 0 +; MEDIUM_SCH-NEXT: pcalau12i $a0, %ie_pc_hi20(gd) +; MEDIUM_SCH-NEXT: ld.d $a0, $a0, %ie_pc_lo12(gd) +; MEDIUM_SCH-NEXT: ldx.d $a0, $a0, $tp +; MEDIUM_SCH-NEXT: pcalau12i $a0, %ie_pc_hi20(ld) +; MEDIUM_SCH-NEXT: ld.d $a0, $a0, %ie_pc_lo12(ld) +; MEDIUM_SCH-NEXT: ldx.d $a0, $a0, $tp +; MEDIUM_SCH-NEXT: pcalau12i $a0, %ie_pc_hi20(ie) +; MEDIUM_SCH-NEXT: ld.d $a0, $a0, %ie_pc_lo12(ie) +; MEDIUM_SCH-NEXT: ldx.d $a0, $a0, $tp +; MEDIUM_SCH-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; MEDIUM_SCH-NEXT: addi.d $sp, $sp, 16 +; MEDIUM_SCH-NEXT: ret +; +; LARGE_NO_SCH-LABEL: foo: +; LARGE_NO_SCH: # %bb.0: +; LARGE_NO_SCH-NEXT: addi.d $sp, $sp, -16 +; LARGE_NO_SCH-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LARGE_NO_SCH-NEXT: pcalau12i $a0, %got_pc_hi20(G) +; LARGE_NO_SCH-NEXT: addi.d $t8, $zero, %got_pc_lo12(G) +; LARGE_NO_SCH-NEXT: lu32i.d $t8, %got64_pc_lo20(G) +; LARGE_NO_SCH-NEXT: lu52i.d $t8, $t8, %got64_pc_hi12(G) +; LARGE_NO_SCH-NEXT: ldx.d $a0, $t8, $a0 +; LARGE_NO_SCH-NEXT: ld.d $a0, $a0, 0 +; LARGE_NO_SCH-NEXT: pcalau12i $a0, %pc_hi20(g) +; LARGE_NO_SCH-NEXT: addi.d $t8, $zero, %pc_lo12(g) +; LARGE_NO_SCH-NEXT: lu32i.d $t8, %pc64_lo20(g) +; LARGE_NO_SCH-NEXT: lu52i.d $t8, $t8, %pc64_hi12(g) +; LARGE_NO_SCH-NEXT: add.d $a0, $t8, $a0 +; LARGE_NO_SCH-NEXT: ld.d $a0, $a0, 0 +; LARGE_NO_SCH-NEXT: ori $a0, $zero, 1 +; LARGE_NO_SCH-NEXT: pcalau12i $ra, %got_pc_hi20(bar) +; LARGE_NO_SCH-NEXT: addi.d $t8, $zero, %got_pc_lo12(bar) +; LARGE_NO_SCH-NEXT: lu32i.d $t8, %got64_pc_lo20(bar) +; LARGE_NO_SCH-NEXT: lu52i.d $t8, $t8, %got64_pc_hi12(bar) +; LARGE_NO_SCH-NEXT: ldx.d $ra, $t8, $ra +; LARGE_NO_SCH-NEXT: jirl $ra, $ra, 0 +; LARGE_NO_SCH-NEXT: pcalau12i $a0, %ie_pc_hi20(gd) +; LARGE_NO_SCH-NEXT: addi.d $t8, $zero, %ie_pc_lo12(gd) +; LARGE_NO_SCH-NEXT: lu32i.d $t8, %ie64_pc_lo20(gd) +; LARGE_NO_SCH-NEXT: lu52i.d $t8, $t8, %ie64_pc_hi12(gd) +; LARGE_NO_SCH-NEXT: ldx.d $a0, $t8, $a0 +; LARGE_NO_SCH-NEXT: ldx.d $a0, $a0, $tp +; LARGE_NO_SCH-NEXT: pcalau12i $a0, %ie_pc_hi20(ld) +; LARGE_NO_SCH-NEXT: addi.d $t8, $zero, %ie_pc_lo12(ld) +; LARGE_NO_SCH-NEXT: lu32i.d $t8, %ie64_pc_lo20(ld) +; LARGE_NO_SCH-NEXT: lu52i.d $t8, $t8, %ie64_pc_hi12(ld) +; LARGE_NO_SCH-NEXT: ldx.d $a0, $t8, $a0 +; LARGE_NO_SCH-NEXT: ldx.d $a0, $a0, $tp +; LARGE_NO_SCH-NEXT: pcalau12i $a0, %ie_pc_hi20(ie) +; LARGE_NO_SCH-NEXT: addi.d $t8, $zero, %ie_pc_lo12(ie) +; LARGE_NO_SCH-NEXT: lu32i.d $t8, %ie64_pc_lo20(ie) +; LARGE_NO_SCH-NEXT: lu52i.d $t8, $t8, %ie64_pc_hi12(ie) +; LARGE_NO_SCH-NEXT: ldx.d $a0, $t8, $a0 +; LARGE_NO_SCH-NEXT: ldx.d $a0, $a0, $tp +; LARGE_NO_SCH-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LARGE_NO_SCH-NEXT: addi.d $sp, $sp, 16 +; LARGE_NO_SCH-NEXT: ret +; +; LARGE_SCH-LABEL: foo: +; LARGE_SCH: # %bb.0: +; LARGE_SCH-NEXT: addi.d $sp, $sp, -16 +; LARGE_SCH-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LARGE_SCH-NEXT: pcalau12i $a0, %got_pc_hi20(G) +; LARGE_SCH-NEXT: addi.d $t8, $zero, %got_pc_lo12(G) +; LARGE_SCH-NEXT: lu32i.d $t8, %got64_pc_lo20(G) +; LARGE_SCH-NEXT: lu52i.d $t8, $t8, %got64_pc_hi12(G) +; LARGE_SCH-NEXT: ldx.d $a0, $t8, $a0 +; LARGE_SCH-NEXT: ld.d $a0, $a0, 0 +; LARGE_SCH-NEXT: pcalau12i $a0, %pc_hi20(g) +; LARGE_SCH-NEXT: addi.d $t8, $zero, %pc_lo12(g) +; LARGE_SCH-NEXT: lu32i.d $t8, %pc64_lo20(g) +; LARGE_SCH-NEXT: lu52i.d $t8, $t8, %pc64_hi12(g) +; LARGE_SCH-NEXT: add.d $a0, $t8, $a0 +; LARGE_SCH-NEXT: ld.d $a0, $a0, 0 +; LARGE_SCH-NEXT: ori $a0, $zero, 1 +; LARGE_SCH-NEXT: pcalau12i $ra, %got_pc_hi20(bar) +; LARGE_SCH-NEXT: addi.d $t8, $zero, %got_pc_lo12(bar) +; LARGE_SCH-NEXT: lu32i.d $t8, %got64_pc_lo20(bar) +; LARGE_SCH-NEXT: lu52i.d $t8, $t8, %got64_pc_hi12(bar) +; LARGE_SCH-NEXT: ldx.d $ra, $t8, $ra +; LARGE_SCH-NEXT: jirl $ra, $ra, 0 +; LARGE_SCH-NEXT: pcalau12i $a0, %ie_pc_hi20(gd) +; LARGE_SCH-NEXT: addi.d $t8, $zero, %ie_pc_lo12(gd) +; LARGE_SCH-NEXT: lu32i.d $t8, %ie64_pc_lo20(gd) +; LARGE_SCH-NEXT: lu52i.d $t8, $t8, %ie64_pc_hi12(gd) +; LARGE_SCH-NEXT: ldx.d $a0, $t8, $a0 +; LARGE_SCH-NEXT: ldx.d $a0, $a0, $tp +; LARGE_SCH-NEXT: pcalau12i $a0, %ie_pc_hi20(ld) +; LARGE_SCH-NEXT: addi.d $t8, $zero, %ie_pc_lo12(ld) +; LARGE_SCH-NEXT: lu32i.d $t8, %ie64_pc_lo20(ld) +; LARGE_SCH-NEXT: lu52i.d $t8, $t8, %ie64_pc_hi12(ld) +; LARGE_SCH-NEXT: ldx.d $a0, $t8, $a0 +; LARGE_SCH-NEXT: ldx.d $a0, $a0, $tp +; LARGE_SCH-NEXT: pcalau12i $a0, %ie_pc_hi20(ie) +; LARGE_SCH-NEXT: addi.d $t8, $zero, %ie_pc_lo12(ie) +; LARGE_SCH-NEXT: lu32i.d $t8, %ie64_pc_lo20(ie) +; LARGE_SCH-NEXT: lu52i.d $t8, $t8, %ie64_pc_hi12(ie) +; LARGE_SCH-NEXT: ldx.d $a0, $t8, $a0 +; LARGE_SCH-NEXT: ldx.d $a0, $a0, $tp +; LARGE_SCH-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LARGE_SCH-NEXT: addi.d $sp, $sp, 16 +; LARGE_SCH-NEXT: ret + %V = load volatile i64, ptr @G + %v = load volatile i64, ptr @g + call void @bar(i64 1) + %v_gd = load volatile i64, ptr @gd + %v_ld = load volatile i64, ptr @ld + %v_ie = load volatile i64, ptr @ie + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/tls-models.ll b/llvm/test/CodeGen/LoongArch/tls-models.ll index a2a3792a6a54bed7faf1038131d76a7ab7d8f18f..3994df1da7163f1e2049b52d37f5d201ac18ef0c 100644 --- a/llvm/test/CodeGen/LoongArch/tls-models.ll +++ b/llvm/test/CodeGen/LoongArch/tls-models.ll @@ -45,15 +45,15 @@ define ptr @f1() nounwind { ; LA64LARGEPIC-NEXT: addi.d $sp, $sp, -16 ; LA64LARGEPIC-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill ; LA64LARGEPIC-NEXT: pcalau12i $a0, %gd_pc_hi20(unspecified) -; LA64LARGEPIC-NEXT: addi.d $a1, $zero, %got_pc_lo12(unspecified) -; LA64LARGEPIC-NEXT: lu32i.d $a1, %got64_pc_lo20(unspecified) -; LA64LARGEPIC-NEXT: lu52i.d $a1, $a1, %got64_pc_hi12(unspecified) -; LA64LARGEPIC-NEXT: add.d $a0, $a1, $a0 -; LA64LARGEPIC-NEXT: pcalau12i $a1, %pc_hi20(__tls_get_addr) -; LA64LARGEPIC-NEXT: addi.d $ra, $zero, %pc_lo12(__tls_get_addr) -; LA64LARGEPIC-NEXT: lu32i.d $ra, %pc64_lo20(__tls_get_addr) -; LA64LARGEPIC-NEXT: lu52i.d $ra, $ra, %pc64_hi12(__tls_get_addr) -; LA64LARGEPIC-NEXT: add.d $ra, $ra, $a1 +; LA64LARGEPIC-NEXT: addi.d $t8, $zero, %got_pc_lo12(unspecified) +; LA64LARGEPIC-NEXT: lu32i.d $t8, %got64_pc_lo20(unspecified) +; LA64LARGEPIC-NEXT: lu52i.d $t8, $t8, %got64_pc_hi12(unspecified) +; LA64LARGEPIC-NEXT: add.d $a0, $t8, $a0 +; LA64LARGEPIC-NEXT: pcalau12i $ra, %pc_hi20(__tls_get_addr) +; LA64LARGEPIC-NEXT: addi.d $t8, $zero, %pc_lo12(__tls_get_addr) +; LA64LARGEPIC-NEXT: lu32i.d $t8, %pc64_lo20(__tls_get_addr) +; LA64LARGEPIC-NEXT: lu52i.d $t8, $t8, %pc64_hi12(__tls_get_addr) +; LA64LARGEPIC-NEXT: add.d $ra, $t8, $ra ; LA64LARGEPIC-NEXT: jirl $ra, $ra, 0 ; LA64LARGEPIC-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload ; LA64LARGEPIC-NEXT: addi.d $sp, $sp, 16 @@ -76,10 +76,10 @@ define ptr @f1() nounwind { ; LA64LARGENOPIC-LABEL: f1: ; LA64LARGENOPIC: # %bb.0: # %entry ; LA64LARGENOPIC-NEXT: pcalau12i $a0, %ie_pc_hi20(unspecified) -; LA64LARGENOPIC-NEXT: addi.d $a1, $zero, %ie_pc_lo12(unspecified) -; LA64LARGENOPIC-NEXT: lu32i.d $a1, %ie64_pc_lo20(unspecified) -; LA64LARGENOPIC-NEXT: lu52i.d $a1, $a1, %ie64_pc_hi12(unspecified) -; LA64LARGENOPIC-NEXT: ldx.d $a0, $a1, $a0 +; LA64LARGENOPIC-NEXT: addi.d $t8, $zero, %ie_pc_lo12(unspecified) +; LA64LARGENOPIC-NEXT: lu32i.d $t8, %ie64_pc_lo20(unspecified) +; LA64LARGENOPIC-NEXT: lu52i.d $t8, $t8, %ie64_pc_hi12(unspecified) +; LA64LARGENOPIC-NEXT: ldx.d $a0, $t8, $a0 ; LA64LARGENOPIC-NEXT: add.d $a0, $a0, $tp ; LA64LARGENOPIC-NEXT: ret entry: @@ -116,15 +116,15 @@ define ptr @f2() nounwind { ; LA64LARGEPIC-NEXT: addi.d $sp, $sp, -16 ; LA64LARGEPIC-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill ; LA64LARGEPIC-NEXT: pcalau12i $a0, %ld_pc_hi20(ld) -; LA64LARGEPIC-NEXT: addi.d $a1, $zero, %got_pc_lo12(ld) -; LA64LARGEPIC-NEXT: lu32i.d $a1, %got64_pc_lo20(ld) -; LA64LARGEPIC-NEXT: lu52i.d $a1, $a1, %got64_pc_hi12(ld) -; LA64LARGEPIC-NEXT: add.d $a0, $a1, $a0 -; LA64LARGEPIC-NEXT: pcalau12i $a1, %pc_hi20(__tls_get_addr) -; LA64LARGEPIC-NEXT: addi.d $ra, $zero, %pc_lo12(__tls_get_addr) -; LA64LARGEPIC-NEXT: lu32i.d $ra, %pc64_lo20(__tls_get_addr) -; LA64LARGEPIC-NEXT: lu52i.d $ra, $ra, %pc64_hi12(__tls_get_addr) -; LA64LARGEPIC-NEXT: add.d $ra, $ra, $a1 +; LA64LARGEPIC-NEXT: addi.d $t8, $zero, %got_pc_lo12(ld) +; LA64LARGEPIC-NEXT: lu32i.d $t8, %got64_pc_lo20(ld) +; LA64LARGEPIC-NEXT: lu52i.d $t8, $t8, %got64_pc_hi12(ld) +; LA64LARGEPIC-NEXT: add.d $a0, $t8, $a0 +; LA64LARGEPIC-NEXT: pcalau12i $ra, %pc_hi20(__tls_get_addr) +; LA64LARGEPIC-NEXT: addi.d $t8, $zero, %pc_lo12(__tls_get_addr) +; LA64LARGEPIC-NEXT: lu32i.d $t8, %pc64_lo20(__tls_get_addr) +; LA64LARGEPIC-NEXT: lu52i.d $t8, $t8, %pc64_hi12(__tls_get_addr) +; LA64LARGEPIC-NEXT: add.d $ra, $t8, $ra ; LA64LARGEPIC-NEXT: jirl $ra, $ra, 0 ; LA64LARGEPIC-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload ; LA64LARGEPIC-NEXT: addi.d $sp, $sp, 16 @@ -147,10 +147,10 @@ define ptr @f2() nounwind { ; LA64LARGENOPIC-LABEL: f2: ; LA64LARGENOPIC: # %bb.0: # %entry ; LA64LARGENOPIC-NEXT: pcalau12i $a0, %ie_pc_hi20(ld) -; LA64LARGENOPIC-NEXT: addi.d $a1, $zero, %ie_pc_lo12(ld) -; LA64LARGENOPIC-NEXT: lu32i.d $a1, %ie64_pc_lo20(ld) -; LA64LARGENOPIC-NEXT: lu52i.d $a1, $a1, %ie64_pc_hi12(ld) -; LA64LARGENOPIC-NEXT: ldx.d $a0, $a1, $a0 +; LA64LARGENOPIC-NEXT: addi.d $t8, $zero, %ie_pc_lo12(ld) +; LA64LARGENOPIC-NEXT: lu32i.d $t8, %ie64_pc_lo20(ld) +; LA64LARGENOPIC-NEXT: lu52i.d $t8, $t8, %ie64_pc_hi12(ld) +; LA64LARGENOPIC-NEXT: ldx.d $a0, $t8, $a0 ; LA64LARGENOPIC-NEXT: add.d $a0, $a0, $tp ; LA64LARGENOPIC-NEXT: ret entry: @@ -177,10 +177,10 @@ define ptr @f3() nounwind { ; LA64LARGEPIC-LABEL: f3: ; LA64LARGEPIC: # %bb.0: # %entry ; LA64LARGEPIC-NEXT: pcalau12i $a0, %ie_pc_hi20(ie) -; LA64LARGEPIC-NEXT: addi.d $a1, $zero, %ie_pc_lo12(ie) -; LA64LARGEPIC-NEXT: lu32i.d $a1, %ie64_pc_lo20(ie) -; LA64LARGEPIC-NEXT: lu52i.d $a1, $a1, %ie64_pc_hi12(ie) -; LA64LARGEPIC-NEXT: ldx.d $a0, $a1, $a0 +; LA64LARGEPIC-NEXT: addi.d $t8, $zero, %ie_pc_lo12(ie) +; LA64LARGEPIC-NEXT: lu32i.d $t8, %ie64_pc_lo20(ie) +; LA64LARGEPIC-NEXT: lu52i.d $t8, $t8, %ie64_pc_hi12(ie) +; LA64LARGEPIC-NEXT: ldx.d $a0, $t8, $a0 ; LA64LARGEPIC-NEXT: add.d $a0, $a0, $tp ; LA64LARGEPIC-NEXT: ret ; @@ -201,10 +201,10 @@ define ptr @f3() nounwind { ; LA64LARGENOPIC-LABEL: f3: ; LA64LARGENOPIC: # %bb.0: # %entry ; LA64LARGENOPIC-NEXT: pcalau12i $a0, %ie_pc_hi20(ie) -; LA64LARGENOPIC-NEXT: addi.d $a1, $zero, %ie_pc_lo12(ie) -; LA64LARGENOPIC-NEXT: lu32i.d $a1, %ie64_pc_lo20(ie) -; LA64LARGENOPIC-NEXT: lu52i.d $a1, $a1, %ie64_pc_hi12(ie) -; LA64LARGENOPIC-NEXT: ldx.d $a0, $a1, $a0 +; LA64LARGENOPIC-NEXT: addi.d $t8, $zero, %ie_pc_lo12(ie) +; LA64LARGENOPIC-NEXT: lu32i.d $t8, %ie64_pc_lo20(ie) +; LA64LARGENOPIC-NEXT: lu52i.d $t8, $t8, %ie64_pc_hi12(ie) +; LA64LARGENOPIC-NEXT: ldx.d $a0, $t8, $a0 ; LA64LARGENOPIC-NEXT: add.d $a0, $a0, $tp ; LA64LARGENOPIC-NEXT: ret entry: diff --git a/llvm/test/MC/LoongArch/Basic/Float/d-arith.s b/llvm/test/MC/LoongArch/Basic/Float/d-arith.s index 6b2c67e9a2cc174dd03d5f43503fe60215c6aa6a..8e19d2e34f3c5a240831f69965b6cf1a0237f90c 100644 --- a/llvm/test/MC/LoongArch/Basic/Float/d-arith.s +++ b/llvm/test/MC/LoongArch/Basic/Float/d-arith.s @@ -78,10 +78,18 @@ fsqrt.d $fa2, $ft3 # ASM: encoding: [0x7b,0x5b,0x14,0x01] frecip.d $fs3, $fs3 +# ASM-AND-OBJ: frecipe.d $fa0, $fa0 +# ASM: encoding: [0x00,0x78,0x14,0x01] +frecipe.d $fa0, $fa0 + # ASM-AND-OBJ: frsqrt.d $ft14, $fa3 # ASM: encoding: [0x76,0x68,0x14,0x01] frsqrt.d $ft14, $fa3 +# ASM-AND-OBJ: frsqrte.d $fa1, $fa1 +# ASM: encoding: [0x21,0x88,0x14,0x01] +frsqrte.d $fa1, $fa1 + # ASM-AND-OBJ: fscaleb.d $ft4, $ft6, $fs2 # ASM: encoding: [0xcc,0x69,0x11,0x01] fscaleb.d $ft4, $ft6, $fs2 diff --git a/llvm/test/MC/LoongArch/Basic/Float/f-arith.s b/llvm/test/MC/LoongArch/Basic/Float/f-arith.s index 155e783cf4350526cc0bf9056bdf1580b19a2b67..c32151adbf3b30ca782e9e99a44de7a1240a7775 100644 --- a/llvm/test/MC/LoongArch/Basic/Float/f-arith.s +++ b/llvm/test/MC/LoongArch/Basic/Float/f-arith.s @@ -73,10 +73,18 @@ fsqrt.s $fs3, $ft10 # ASM: encoding: [0x71,0x57,0x14,0x01] frecip.s $ft9, $fs3 +# ASM-AND-OBJ: frecipe.s $fa0, $fa0 +# ASM: encoding: [0x00,0x74,0x14,0x01] +frecipe.s $fa0, $fa0 + # ASM-AND-OBJ: frsqrt.s $fs1, $ft4 # ASM: encoding: [0x99,0x65,0x14,0x01] frsqrt.s $fs1, $ft4 +# ASM-AND-OBJ: frsqrte.s $fa1, $fa1 +# ASM: encoding: [0x21,0x84,0x14,0x01] +frsqrte.s $fa1, $fa1 + # ASM-AND-OBJ: fscaleb.s $ft13, $ft15, $fa6 # ASM: encoding: [0xf5,0x9a,0x10,0x01] fscaleb.s $ft13, $ft15, $fa6 diff --git a/llvm/test/MC/LoongArch/Basic/Integer/atomic.s b/llvm/test/MC/LoongArch/Basic/Integer/atomic.s index a35211db885141ac8281cc5506baa99060e8de14..69acdeef935ccc362bd2aabe2e345851f5ec2a21 100644 --- a/llvm/test/MC/LoongArch/Basic/Integer/atomic.s +++ b/llvm/test/MC/LoongArch/Basic/Integer/atomic.s @@ -21,6 +21,14 @@ ll.w $tp, $s4, 220 # CHECK-ASM: encoding: [0xd3,0x39,0x00,0x21] sc.w $t7, $t2, 56 +# CHECK-ASM-AND-OBJ: llacq.w $t1, $t2 +# CHECK-ASM: encoding: [0xcd,0x81,0x57,0x38] +llacq.w $t1, $t2 + +# CHECK-ASM-AND-OBJ: screl.w $t1, $t2 +# CHECK-ASM: encoding: [0xcd,0x85,0x57,0x38] +screl.w $t1, $t2 + ############################################################# @@ -29,6 +37,14 @@ sc.w $t7, $t2, 56 .ifdef LA64 +# CHECK64-ASM-AND-OBJ: amswap.b $a2, $t0, $s1 +# CHECK64-ASM: encoding: [0x06,0x33,0x5c,0x38] +amswap.b $a2, $t0, $s1, 0 + +# CHECK64-ASM-AND-OBJ: amswap.h $a2, $t0, $s1 +# CHECK64-ASM: encoding: [0x06,0xb3,0x5c,0x38] +amswap.h $a2, $t0, $s1, 0 + # CHECK64-ASM-AND-OBJ: amswap.w $a2, $t0, $s1 # CHECK64-ASM: encoding: [0x06,0x33,0x60,0x38] amswap.w $a2, $t0, $s1, 0 @@ -41,6 +57,14 @@ amswap.w $zero, $t0, $zero # CHECK64-ASM: encoding: [0xa0,0x00,0x6a,0x38] amadd_db.w $zero, $zero, $a1 +# CHECK64-ASM-AND-OBJ: amswap.b $a2, $t0, $s1 +# CHECK64-ASM: encoding: [0x06,0x33,0x5c,0x38] +amswap.b $a2, $t0, $s1 + +# CHECK64-ASM-AND-OBJ: amswap.h $a2, $t0, $s1 +# CHECK64-ASM: encoding: [0x06,0xb3,0x5c,0x38] +amswap.h $a2, $t0, $s1 + # CHECK64-ASM-AND-OBJ: amswap.w $a2, $t0, $s1 # CHECK64-ASM: encoding: [0x06,0x33,0x60,0x38] amswap.w $a2, $t0, $s1 @@ -49,6 +73,14 @@ amswap.w $a2, $t0, $s1 # CHECK64-ASM: encoding: [0xc2,0xba,0x60,0x38] amswap.d $tp, $t2, $fp +# CHECK64-ASM-AND-OBJ: amadd.b $a4, $t0, $r21 +# CHECK64-ASM: encoding: [0xa8,0x32,0x5d,0x38] +amadd.b $a4, $t0, $r21 + +# CHECK64-ASM-AND-OBJ: amadd.h $a1, $t5, $s6 +# CHECK64-ASM: encoding: [0xa5,0xc7,0x5d,0x38] +amadd.h $a1, $t5, $s6 + # CHECK64-ASM-AND-OBJ: amadd.w $a4, $t0, $r21 # CHECK64-ASM: encoding: [0xa8,0x32,0x61,0x38] amadd.w $a4, $t0, $r21 @@ -113,6 +145,14 @@ ammin.wu $a4, $t6, $s7 # CHECK64-ASM: encoding: [0x27,0xc3,0x68,0x38] ammin.du $a3, $t4, $s2 +# CHECK64-ASM-AND-OBJ: amswap_db.b $a2, $t0, $s1 +# CHECK64-ASM: encoding: [0x06,0x33,0x5e,0x38] +amswap_db.b $a2, $t0, $s1 + +# CHECK64-ASM-AND-OBJ: amswap_db.h $tp, $t2, $fp +# CHECK64-ASM: encoding: [0xc2,0xba,0x5e,0x38] +amswap_db.h $tp, $t2, $fp + # CHECK64-ASM-AND-OBJ: amswap_db.w $a2, $t0, $s1 # CHECK64-ASM: encoding: [0x06,0x33,0x69,0x38] amswap_db.w $a2, $t0, $s1 @@ -121,6 +161,14 @@ amswap_db.w $a2, $t0, $s1 # CHECK64-ASM: encoding: [0xc2,0xba,0x69,0x38] amswap_db.d $tp, $t2, $fp +# CHECK64-ASM-AND-OBJ: amadd_db.b $zero, $zero, $a1 +# CHECK64-ASM: encoding: [0xa0,0x00,0x5f,0x38] +amadd_db.b $zero, $zero, $a1 + +# CHECK64-ASM-AND-OBJ: amadd_db.h $a4, $t0, $r21 +# CHECK64-ASM: encoding: [0xa8,0xb2,0x5f,0x38] +amadd_db.h $a4, $t0, $r21 + # CHECK64-ASM-AND-OBJ: amadd_db.w $a4, $t0, $r21 # CHECK64-ASM: encoding: [0xa8,0x32,0x6a,0x38] amadd_db.w $a4, $t0, $r21 @@ -185,6 +233,38 @@ ammin_db.wu $a4, $t6, $s7 # CHECK64-ASM: encoding: [0x27,0xc3,0x71,0x38] ammin_db.du $a3, $t4, $s2 +# CHECK64-ASM-AND-OBJ: amcas.b $t1, $t2, $t3 +# CHECK64-ASM: encoding: [0xed,0x39,0x58,0x38] +amcas.b $t1, $t2, $t3 + +# CHECK64-ASM-AND-OBJ: amcas.h $t1, $t2, $t3 +# CHECK64-ASM: encoding: [0xed,0xb9,0x58,0x38] +amcas.h $t1, $t2, $t3 + +# CHECK64-ASM-AND-OBJ: amcas.w $t1, $t2, $t3 +# CHECK64-ASM: encoding: [0xed,0x39,0x59,0x38] +amcas.w $t1, $t2, $t3 + +# CHECK64-ASM-AND-OBJ: amcas.d $t1, $t2, $t3 +# CHECK64-ASM: encoding: [0xed,0xb9,0x59,0x38] +amcas.d $t1, $t2, $t3 + +# CHECK64-ASM-AND-OBJ: amcas_db.b $t1, $t2, $t3 +# CHECK64-ASM: encoding: [0xed,0x39,0x5a,0x38] +amcas_db.b $t1, $t2, $t3 + +# CHECK64-ASM-AND-OBJ: amcas_db.h $t1, $t2, $t3 +# CHECK64-ASM: encoding: [0xed,0xb9,0x5a,0x38] +amcas_db.h $t1, $t2, $t3 + +# CHECK64-ASM-AND-OBJ: amcas_db.w $t1, $t2, $t3 +# CHECK64-ASM: encoding: [0xed,0x39,0x5b,0x38] +amcas_db.w $t1, $t2, $t3 + +# CHECK64-ASM-AND-OBJ: amcas_db.d $t1, $t2, $t3 +# CHECK64-ASM: encoding: [0xed,0xb9,0x5b,0x38] +amcas_db.d $t1, $t2, $t3 + # CHECK64-ASM-AND-OBJ: ll.d $s2, $s4, 16 # CHECK64-ASM: encoding: [0x79,0x13,0x00,0x22] ll.d $s2, $s4, 16 @@ -193,5 +273,17 @@ ll.d $s2, $s4, 16 # CHECK64-ASM: encoding: [0x31,0xf6,0x00,0x23] sc.d $t5, $t5, 244 +# CHECK64-ASM-AND-OBJ: sc.q $t7, $t2, $t5 +# CHECK64-ASM: encoding: [0x33,0x3a,0x57,0x38] +sc.q $t7, $t2, $t5 + +# CHECK64-ASM-AND-OBJ: llacq.d $t1, $t2 +# CHECK64-ASM: encoding: [0xcd,0x89,0x57,0x38] +llacq.d $t1, $t2 + +# CHECK64-ASM-AND-OBJ: screl.d $t1, $t2 +# CHECK64-ASM: encoding: [0xcd,0x8d,0x57,0x38] +screl.d $t1, $t2 + .endif diff --git a/llvm/test/MC/LoongArch/Basic/Integer/invalid64.s b/llvm/test/MC/LoongArch/Basic/Integer/invalid64.s index acddca9432a698aa30f7bff95dd60b7417edd72f..1c1c658ad440f83141b32e3290a34f86f2f0fc6f 100644 --- a/llvm/test/MC/LoongArch/Basic/Integer/invalid64.s +++ b/llvm/test/MC/LoongArch/Basic/Integer/invalid64.s @@ -65,7 +65,7 @@ addu16i.d $a0, $a0, 32768 ## simm20 pcaddu18i $a0, 0x80000 -# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-524288, 524287] +# CHECK: :[[#@LINE-1]]:16: error: operand must be a symbol with modifier (e.g. %call36) or an integer in the range [-524288, 524287] ## simm20_lu32id lu32i.d $a0, 0x80000 diff --git a/llvm/test/MC/LoongArch/Macros/macros-call.s b/llvm/test/MC/LoongArch/Macros/macros-call.s new file mode 100644 index 0000000000000000000000000000000000000000..a648a397803817943485aecda02d5e90ffad9543 --- /dev/null +++ b/llvm/test/MC/LoongArch/Macros/macros-call.s @@ -0,0 +1,9 @@ +# RUN: llvm-mc --triple=loongarch64 %s | FileCheck %s + +call36 sym_call +# CHECK: pcaddu18i $ra, %call36(sym_call) +# CHECK-NEXT: jirl $ra, $ra, 0 + +tail36 $t0, sym_tail +# CHECK: pcaddu18i $t0, %call36(sym_tail) +# CHECK-NEXT: jr $t0 diff --git a/llvm/test/MC/LoongArch/Relocations/relocations.s b/llvm/test/MC/LoongArch/Relocations/relocations.s index 042cc93470a1e5b1b72b55814e0d78529bc0bbe1..bec71e103893331e0c3392133442f25fa986064a 100644 --- a/llvm/test/MC/LoongArch/Relocations/relocations.s +++ b/llvm/test/MC/LoongArch/Relocations/relocations.s @@ -218,3 +218,8 @@ lu12i.w $t1, %gd_hi20(foo) # RELOC: R_LARCH_TLS_GD_HI20 foo 0x0 # INSTR: lu12i.w $t1, %gd_hi20(foo) # FIXUP: fixup A - offset: 0, value: %gd_hi20(foo), kind: FK_NONE + +pcaddu18i $t1, %call36(foo) +# RELOC: R_LARCH_CALL36 foo 0x0 +# INSTR: pcaddu18i $t1, %call36(foo) +# FIXUP: fixup A - offset: 0, value: %call36(foo), kind: FK_NONE diff --git a/llvm/test/MC/LoongArch/lasx/frecip.s b/llvm/test/MC/LoongArch/lasx/frecip.s index 1bb3ce02fb9c056f3e0621cb7f418edab2403ab1..e95b03a96ebaf4d921fd8f1bc2f25af879930209 100644 --- a/llvm/test/MC/LoongArch/lasx/frecip.s +++ b/llvm/test/MC/LoongArch/lasx/frecip.s @@ -10,3 +10,11 @@ xvfrecip.s $xr3, $xr16 xvfrecip.d $xr17, $xr24 # CHECK-INST: xvfrecip.d $xr17, $xr24 # CHECK-ENCODING: encoding: [0x11,0xfb,0x9c,0x76] + +xvfrecipe.s $xr3, $xr16 +# CHECK-INST: xvfrecipe.s $xr3, $xr16 +# CHECK-ENCODING: encoding: [0x03,0x16,0x9d,0x76] + +xvfrecipe.d $xr17, $xr24 +# CHECK-INST: xvfrecipe.d $xr17, $xr24 +# CHECK-ENCODING: encoding: [0x11,0x1b,0x9d,0x76] diff --git a/llvm/test/MC/LoongArch/lasx/frsqrt.s b/llvm/test/MC/LoongArch/lasx/frsqrt.s index af96e10832dfb13206953a78f7deec2f0c3f4db9..d1048f9ff8f0ef4fa0dfe6f4b6489489348ea394 100644 --- a/llvm/test/MC/LoongArch/lasx/frsqrt.s +++ b/llvm/test/MC/LoongArch/lasx/frsqrt.s @@ -10,3 +10,11 @@ xvfrsqrt.s $xr31, $xr25 xvfrsqrt.d $xr14, $xr22 # CHECK-INST: xvfrsqrt.d $xr14, $xr22 # CHECK-ENCODING: encoding: [0xce,0x0a,0x9d,0x76] + +xvfrsqrte.s $xr31, $xr25 +# CHECK-INST: xvfrsqrte.s $xr31, $xr25 +# CHECK-ENCODING: encoding: [0x3f,0x27,0x9d,0x76] + +xvfrsqrte.d $xr14, $xr22 +# CHECK-INST: xvfrsqrte.d $xr14, $xr22 +# CHECK-ENCODING: encoding: [0xce,0x2a,0x9d,0x76] diff --git a/llvm/test/MC/LoongArch/lsx/frecip.s b/llvm/test/MC/LoongArch/lsx/frecip.s index d8c8278d16675e3dbb6d3e271cfa695ad2da5d1f..cd6d925e1470c39bda67f52b76f6d0cde0526801 100644 --- a/llvm/test/MC/LoongArch/lsx/frecip.s +++ b/llvm/test/MC/LoongArch/lsx/frecip.s @@ -10,3 +10,11 @@ vfrecip.s $vr29, $vr14 vfrecip.d $vr24, $vr9 # CHECK-INST: vfrecip.d $vr24, $vr9 # CHECK-ENCODING: encoding: [0x38,0xf9,0x9c,0x72] + +vfrecipe.s $vr29, $vr14 +# CHECK-INST: vfrecipe.s $vr29, $vr14 +# CHECK-ENCODING: encoding: [0xdd,0x15,0x9d,0x72] + +vfrecipe.d $vr24, $vr9 +# CHECK-INST: vfrecipe.d $vr24, $vr9 +# CHECK-ENCODING: encoding: [0x38,0x19,0x9d,0x72] diff --git a/llvm/test/MC/LoongArch/lsx/frsqrt.s b/llvm/test/MC/LoongArch/lsx/frsqrt.s index 68b0cc091b8ae254fc929b61647a2694bfe445f5..d8b9fc3d0684544ff626c8914e06024db7f09ef8 100644 --- a/llvm/test/MC/LoongArch/lsx/frsqrt.s +++ b/llvm/test/MC/LoongArch/lsx/frsqrt.s @@ -10,3 +10,11 @@ vfrsqrt.s $vr19, $vr30 vfrsqrt.d $vr1, $vr0 # CHECK-INST: vfrsqrt.d $vr1, $vr0 # CHECK-ENCODING: encoding: [0x01,0x08,0x9d,0x72] + +vfrsqrte.s $vr19, $vr30 +# CHECK-INST: vfrsqrte.s $vr19, $vr30 +# CHECK-ENCODING: encoding: [0xd3,0x27,0x9d,0x72] + +vfrsqrte.d $vr1, $vr0 +# CHECK-INST: vfrsqrte.d $vr1, $vr0 +# CHECK-ENCODING: encoding: [0x01,0x28,0x9d,0x72] diff --git a/llvm/test/Transforms/LoopVectorize/LoongArch/defaults.ll b/llvm/test/Transforms/LoopVectorize/LoongArch/defaults.ll index a8ac2411dd82166c805a79f3f03dd318f999777a..6ab300859f9d539f1eb6185cc4f287758105f972 100644 --- a/llvm/test/Transforms/LoopVectorize/LoongArch/defaults.ll +++ b/llvm/test/Transforms/LoopVectorize/LoongArch/defaults.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 -; RUN: opt < %s -passes=loop-vectorize -mtriple loongarch64-linux-gnu -mattr=+lasx,+auto-vec -S | FileCheck %s +; RUN: opt < %s -passes=loop-vectorize -mtriple loongarch64-linux-gnu -mattr=+lasx -S | FileCheck %s ;; This is a collection of tests whose only purpose is to show changes in the ;; default configuration. Please keep these tests minimal - if you're testing diff --git a/llvm/test/tools/llvm-readobj/ELF/reloc-types-loongarch64.test b/llvm/test/tools/llvm-readobj/ELF/reloc-types-loongarch64.test index e32dc893fa7985d41986aab7c874c25117a87323..88ff7fa405ed95f2aa23507cfd5992f44fce3dc3 100644 --- a/llvm/test/tools/llvm-readobj/ELF/reloc-types-loongarch64.test +++ b/llvm/test/tools/llvm-readobj/ELF/reloc-types-loongarch64.test @@ -102,6 +102,7 @@ # CHECK: Type: R_LARCH_ADD_ULEB128 (107) # CHECK: Type: R_LARCH_SUB_ULEB128 (108) # CHECK: Type: R_LARCH_64_PCREL (109) +# CHECK: Type: R_LARCH_CALL36 (110) --- !ELF FileHeader: @@ -211,3 +212,4 @@ Sections: - Type: R_LARCH_ADD_ULEB128 - Type: R_LARCH_SUB_ULEB128 - Type: R_LARCH_64_PCREL + - Type: R_LARCH_CALL36 diff --git a/llvm/unittests/Object/ELFTest.cpp b/llvm/unittests/Object/ELFTest.cpp index 50b1df124a4a58c862a0cc5de6b5568bf0a98244..ed851dde4c00add5bf40bbd56e246973ac8c4132 100644 --- a/llvm/unittests/Object/ELFTest.cpp +++ b/llvm/unittests/Object/ELFTest.cpp @@ -251,6 +251,8 @@ TEST(ELFTest, getELFRelocationTypeNameForLoongArch) { getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_SUB_ULEB128)); EXPECT_EQ("R_LARCH_64_PCREL", getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_64_PCREL)); + EXPECT_EQ("R_LARCH_CALL36", + getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_CALL36)); } TEST(ELFTest, getELFRelativeRelocationType) {