diff --git a/build.sh b/build.sh index 6569b933b006e3df36fbf8a80c545cb60ebeef63..5c47c21e440e8402a9874a5606c45ae8e9974ed7 100755 --- a/build.sh +++ b/build.sh @@ -10,7 +10,7 @@ enable_autotuner="1" buildtype=RelWithDebInfo backends="all" build_for_openeuler="0" -enabled_projects="clang;lld;compiler-rt;openmp;clang-tools-extra" +enabled_projects="clang;lld;openmp;clang-tools-extra" embedded_toolchain="0" split_dwarf=on use_ccache="0" @@ -322,7 +322,7 @@ fi if [ $embedded_toolchain == "1" ]; then echo "Build for embedded cross tool chain" - enabled_projects="clang;lld;compiler-rt;" + enabled_projects="clang;lld;" CMAKE_OPTIONS="$CMAKE_OPTIONS \ -DLLVM_BUILD_FOR_EMBEDDED=ON" fi @@ -364,14 +364,14 @@ mkdir -p "$build_prefix" && cd "$build_prefix" cmake $CMAKE_OPTIONS \ -DCOMPILER_RT_BUILD_SANITIZERS=on \ -DLLVM_ENABLE_PROJECTS=$enabled_projects \ - -DLLVM_ENABLE_RUNTIMES="libcxx;libcxxabi;libunwind" \ + -DLLVM_ENABLE_RUNTIMES="compiler-rt;libunwind" \ -DLLVM_USE_LINKER=gold \ -DLLVM_LIT_ARGS="-sv -j$threads" \ -DLLVM_USE_SPLIT_DWARF=$split_dwarf \ -DCMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO="-Wl,--gdb-index -Wl,--compress-debug-sections=zlib" \ -DCMAKE_EXE_LINKER_FLAGS_DEBUG="-Wl,--gdb-index -Wl,--compress-debug-sections=zlib" \ -DBUILD_SHARED_LIBS=OFF \ - -DLLVM_ENABLE_LIBCXX=OFF \ + -DLLVM_STATIC_LINK_CXX_STDLIB=ON \ -DLLVM_ENABLE_ZLIB=ON \ -DLLVM_BUILD_RUNTIME=ON \ -DLLVM_INCLUDE_TOOLS=ON \ @@ -409,6 +409,39 @@ if [ $do_install == "1" ]; then make -j$threads $verbose $install fi +# build libcxx/libcxxabi with the just-built clang/clang++ +c_compiler="$install_prefix/bin/clang" +cxx_compiler="$install_prefix/bin/clang++" +if pushd runtimes > /dev/null 2>&1; then + if [ ! -f "$build_prefix"/projects/libcxx/CMakeCache.txt ]; then + mkdir -p "$build_prefix/projects/libcxx" && cd "$build_prefix/projects/libcxx" + cmake -Wno-dev \ + -DCMAKE_BUILD_TYPE=$buildtype \ + -DCMAKE_INSTALL_PREFIX="$install_prefix" \ + -DCMAKE_C_COMPILER="$c_compiler" \ + -DCMAKE_CXX_COMPILER="$cxx_compiler" \ + -DLLVM_ENABLE_RUNTIMES="libcxx;libcxxabi" \ + -DLLVM_LIT_ARGS="-sv -j$threads" \ + -DBUILD_SHARED_LIBS=OFF \ + -DCMAKE_SKIP_RPATH=ON \ + -DLLVM_USE_LINKER=gold \ + -DLLVM_USE_SPLIT_DWARF=$split_dwarf \ + ../../../runtimes + else + cd "$build_prefix"/projects/libcxx + fi + install_libcxx=${install/\/strip/-strripped} + make -j$threads $verbose \ + ${install_libcxx/install/install-cxx} ${install_libcxx/install/install-cxxabi} ${install_libcxx/install/install-cxxabi-headers} + if [ -n "$unit_test" ]; then + make -j$threads $verbose ${unit_test/all/cxx} ${unit_test/all/cxxabi} + fi + popd > /dev/null 2>&1 +else + echo "$0: directory not found: libcxx" + exit 1 +fi + if [ -n "$unit_test" ]; then make -j$threads $verbose check-all fi diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 05dad41c07faf0cae49dfb7fb0459e130edfb9a3..4f81412bd6f16827ba167659bf45c60a2160d76b 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -160,6 +160,9 @@ Resolutions to C++ Defect Reports ``-Wdeprecated-literal-operator`` for the latter, off by default for now. .. code-block:: c++ +- Attributes now expect unevaluated strings in attributes parameters that are string literals. + This is applied to both C++ standard attributes, and other attributes supported by Clang. + This completes the implementation of `P2361R6 Unevaluated Strings _` // What follows is warned by -Wuser-defined-literals // albeit "ill-formed, no diagnostic required". @@ -935,6 +938,11 @@ Arm and AArch64 Support and the selected processor lacks floating point registers. (`#55755 `_) +- New AArch64 asm constraints have been added for r8-r11(Uci) and r12-r15(Ucj). + +Android Support +^^^^^^^^^^^^^^^ + - Clang builtin ``__arithmetic_fence`` and the command line option ``-fprotect-parens`` are now enabled for AArch64. diff --git a/clang/include/clang/AST/Type.h b/clang/include/clang/AST/Type.h index 8d20d088bb63c48d02fa88fc46f7a5de2fa44ecb..80cfd50d91df49833cc504e50f43fee7ce3e4210 100644 --- a/clang/include/clang/AST/Type.h +++ b/clang/include/clang/AST/Type.h @@ -3966,6 +3966,34 @@ public: /// because TrailingObjects cannot handle repeated types. struct ExceptionType { QualType Type; }; + /// The AArch64 SME ACLE (Arm C/C++ Language Extensions) define a number + /// of function type attributes that can be set on function types, including + /// function pointers. + enum AArch64SMETypeAttributes : unsigned { + SME_NormalFunction = 0, + SME_PStateSMEnabledMask = 1 << 0, + SME_PStateSMCompatibleMask = 1 << 1, + + // Describes the value of the state using ArmStateValue. + SME_ZAShift = 2, + SME_ZAMask = 0b111 << SME_ZAShift, + + SME_AttributeMask = 0b111'111 // We only support maximum 6 bits because of + // the bitmask in FunctionTypeExtraBitfields. + }; + + enum ArmStateValue : unsigned { + ARM_None = 0, + ARM_Preserves = 1, + ARM_In = 2, + ARM_Out = 3, + ARM_InOut = 4, + }; + + static ArmStateValue getArmZAState(unsigned AttrBits) { + return (ArmStateValue)((AttrBits & SME_ZAMask) >> SME_ZAShift); + } + /// A simple holder for various uncommon bits which do not fit in /// FunctionTypeBitfields. Aligned to alignof(void *) to maintain the /// alignment of subsequent objects in TrailingObjects. @@ -3973,7 +4001,13 @@ public: /// The number of types in the exception specification. /// A whole unsigned is not needed here and according to /// [implimits] 8 bits would be enough here. - uint16_t NumExceptionType = 0; + unsigned NumExceptionType : 10; + + /// Any AArch64 SME ACLE type attributes that need to be propagated + /// on declarations and function pointers. + unsigned AArch64SMEAttributes : 6; + FunctionTypeExtraBitfields() + : NumExceptionType(0), AArch64SMEAttributes(SME_NormalFunction) {} }; protected: @@ -4152,18 +4186,22 @@ public: /// the various bits of extra information about a function prototype. struct ExtProtoInfo { FunctionType::ExtInfo ExtInfo; - bool Variadic : 1; - bool HasTrailingReturn : 1; + unsigned Variadic : 1; + unsigned HasTrailingReturn : 1; + unsigned AArch64SMEAttributes : 6; Qualifiers TypeQuals; RefQualifierKind RefQualifier = RQ_None; ExceptionSpecInfo ExceptionSpec; const ExtParameterInfo *ExtParameterInfos = nullptr; SourceLocation EllipsisLoc; - ExtProtoInfo() : Variadic(false), HasTrailingReturn(false) {} + ExtProtoInfo() + : Variadic(false), HasTrailingReturn(false), + AArch64SMEAttributes(SME_NormalFunction) {} ExtProtoInfo(CallingConv CC) - : ExtInfo(CC), Variadic(false), HasTrailingReturn(false) {} + : ExtInfo(CC), Variadic(false), HasTrailingReturn(false), + AArch64SMEAttributes(SME_NormalFunction) {} ExtProtoInfo withExceptionSpec(const ExceptionSpecInfo &ESI) { ExtProtoInfo Result(*this); @@ -4172,7 +4210,15 @@ public: } bool requiresFunctionProtoTypeExtraBitfields() const { - return ExceptionSpec.Type == EST_Dynamic; + return ExceptionSpec.Type == EST_Dynamic || + AArch64SMEAttributes != SME_NormalFunction; + } + + void setArmSMEAttribute(AArch64SMETypeAttributes Kind, bool Enable = true) { + if (Enable) + AArch64SMEAttributes |= Kind; + else + AArch64SMEAttributes &= ~Kind; } }; @@ -4299,6 +4345,7 @@ public: EPI.TypeQuals = getMethodQuals(); EPI.RefQualifier = getRefQualifier(); EPI.ExtParameterInfos = getExtParameterInfosOrNull(); + EPI.AArch64SMEAttributes = getAArch64SMEAttributes(); return EPI; } @@ -4480,6 +4527,15 @@ public: return getTrailingObjects(); } + /// Return a bitmask describing the SME attributes on the function type, see + /// AArch64SMETypeAttributes for their values. + unsigned getAArch64SMEAttributes() const { + if (!hasExtraBitfields()) + return SME_NormalFunction; + return getTrailingObjects() + ->AArch64SMEAttributes; + } + ExtParameterInfo getExtParameterInfo(unsigned I) const { assert(I < getNumParams() && "parameter index out of range"); if (hasExtParameterInfos()) diff --git a/clang/include/clang/AST/TypeProperties.td b/clang/include/clang/AST/TypeProperties.td index 3cc826c1463a9fc241c041eb3c358accefc3c356..682c869b0c5847936a02189aa36a1e6b4ebb8323 100644 --- a/clang/include/clang/AST/TypeProperties.td +++ b/clang/include/clang/AST/TypeProperties.td @@ -323,6 +323,9 @@ let Class = FunctionProtoType in { ? node->getExtParameterInfos() : llvm::ArrayRef() }]; } + def : Property<"AArch64SMEAttributes", UInt32> { + let Read = [{ node->getAArch64SMEAttributes() }]; + } def : Creator<[{ auto extInfo = FunctionType::ExtInfo(noReturn, hasRegParm, regParm, @@ -338,6 +341,7 @@ let Class = FunctionProtoType in { epi.ExceptionSpec = exceptionSpecifier; epi.ExtParameterInfos = extParameterInfo.empty() ? nullptr : extParameterInfo.data(); + epi.AArch64SMEAttributes = AArch64SMEAttributes; return ctx.getFunctionType(returnType, parameters, epi); }]>; } diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index d5204b28696672261b253c2d01dc75db8d768b51..ad0ad208a7901ec56f63e6028f632203f3a2ba7d 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -415,7 +415,7 @@ class TargetArch arches> : TargetSpec { let Arches = arches; } def TargetARM : TargetArch<["arm", "thumb", "armeb", "thumbeb"]>; -def TargetAArch64 : TargetArch<["aarch64"]>; +def TargetAArch64 : TargetArch<["aarch64", "aarch64_be", "aarch64_32"]>; def TargetAnyArm : TargetArch; def TargetAVR : TargetArch<["avr"]>; def TargetBPF : TargetArch<["bpfel", "bpfeb"]>; @@ -2435,9 +2435,64 @@ def AArch64SVEPcs: DeclOrTypeAttr { def ArmStreaming : TypeAttr, TargetSpecificAttr { let Spellings = [RegularKeyword<"__arm_streaming">]; - let Documentation = [ArmStreamingDocs]; + let Subjects = SubjectList<[HasFunctionProto], ErrorDiag>; + let Documentation = [ArmSmeStreamingDocs]; +} + +def ArmStreamingCompatible : TypeAttr, TargetSpecificAttr { + let Spellings = [RegularKeyword<"__arm_streaming_compatible">]; + let Subjects = SubjectList<[HasFunctionProto], ErrorDiag>; + let Documentation = [ArmSmeStreamingCompatibleDocs]; +} + +def ArmNew : InheritableAttr, TargetSpecificAttr { + let Spellings = [RegularKeyword<"__arm_new">]; + let Args = [VariadicStringArgument<"NewArgs">]; + let Subjects = SubjectList<[Function], ErrorDiag>; + let Documentation = [ArmNewDocs]; + + let AdditionalMembers = [{ + bool isNewZA() const { + return llvm::is_contained(newArgs(), "za"); + } + }]; +} + +def ArmIn : TypeAttr, TargetSpecificAttr { + let Spellings = [RegularKeyword<"__arm_in">]; + let Args = [VariadicStringArgument<"InArgs">]; + let Subjects = SubjectList<[HasFunctionProto], ErrorDiag>; + let Documentation = [ArmInDocs]; } +def ArmOut : TypeAttr, TargetSpecificAttr { + let Spellings = [RegularKeyword<"__arm_out">]; + let Args = [VariadicStringArgument<"OutArgs">]; + let Subjects = SubjectList<[HasFunctionProto], ErrorDiag>; + let Documentation = [ArmOutDocs]; +} + +def ArmInOut : TypeAttr, TargetSpecificAttr { + let Spellings = [RegularKeyword<"__arm_inout">]; + let Args = [VariadicStringArgument<"InOutArgs">]; + let Subjects = SubjectList<[HasFunctionProto], ErrorDiag>; + let Documentation = [ArmInOutDocs]; +} + +def ArmPreserves : TypeAttr, TargetSpecificAttr { + let Spellings = [RegularKeyword<"__arm_preserves">]; + let Args = [VariadicStringArgument<"PreserveArgs">]; + let Subjects = SubjectList<[HasFunctionProto], ErrorDiag>; + let Documentation = [ArmPreservesDocs]; +} + +def ArmLocallyStreaming : InheritableAttr, TargetSpecificAttr { + let Spellings = [RegularKeyword<"__arm_locally_streaming">]; + let Subjects = SubjectList<[Function], ErrorDiag>; + let Documentation = [ArmSmeLocallyStreamingDocs]; +} + + def Pure : InheritableAttr { let Spellings = [GCC<"pure">]; let Documentation = [Undocumented]; diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index 2c950231255d7ffa7a1b8d678adbe7b51aa1afa6..395982c45ecfa0dfd173582352afe7e0497decbb 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -6578,38 +6578,195 @@ Requirements on Development Tools - Engineering Specification Documentation }]; } -def ArmStreamingDocs : Documentation { - let Category = DocCatType; +def DocCatArmSmeAttributes : DocumentationCategory<"AArch64 SME Attributes"> { let Content = [{ -.. Note:: This attribute has not been implemented yet, but once it is - implemented, it will behave as described below. +Clang supports a number of AArch64-specific attributes to manage state +added by the Scalable Matrix Extension (SME). This state includes the +runtime mode that the processor is in (e.g. non-streaming or streaming) +as well as the state of the ``ZA`` Matrix Storage. -The ``__arm_streaming`` keyword is only available on AArch64 targets. -It applies to function types and specifies that the function has a -"streaming interface". This means that: +The attributes come in the form of type- and declaration attributes: -* the function requires the Scalable Matrix Extension (SME) +* The SME declaration attributes can appear anywhere that a standard + ``[[...]]`` declaration attribute can appear. -* the function must be entered in streaming mode (that is, with PSTATE.SM - set to 1) +* The SME type attributes apply only to prototyped functions and can appear + anywhere that a standard ``[[...]]`` type attribute can appear. The SME + type attributes do not apply to functions having a K&R-style + unprototyped function type. -* the function must return in streaming mode +See `Arm C Language Extensions `_ +for more details about the features related to the SME extension. See `Procedure Call Standard for the Arm® 64-bit Architecture (AArch64) `_ for more details about -streaming-interface functions. +streaming-interface functions and shared/private-ZA interface functions. + }]; +} + +def ArmSmeStreamingDocs : Documentation { + let Category = DocCatArmSmeAttributes; + let Content = [{ +The ``__arm_streaming`` keyword applies to prototyped function types and specifies +that the function has a "streaming interface". This means that: + +* the function requires that the processor implements the Scalable Matrix + Extension (SME). + +* the function must be entered in streaming mode (that is, with PSTATE.SM + set to 1) + +* the function must return in streaming mode Clang manages PSTATE.SM automatically; it is not the source code's -responsibility to do this. For example, if a normal non-streaming +responsibility to do this. For example, if a non-streaming function calls an ``__arm_streaming`` function, Clang generates code that switches into streaming mode before calling the function and switches back to non-streaming mode on return. + }]; +} + +def ArmSmeStreamingCompatibleDocs : Documentation { + let Category = DocCatArmSmeAttributes; + let Content = [{ +The ``__arm_streaming_compatible`` keyword applies to prototyped function types and +specifies that the function has a “streaming compatible interface”. This +means that: -``__arm_streaming`` can appear anywhere that a standard ``[[...]]`` type -attribute can appear. +* the function may be entered in either non-streaming mode (PSTATE.SM=0) or + in streaming mode (PSTATE.SM=1). -See `Arm C Language Extensions `_ -for more details about this extension, and for other related SME features. +* the function must return in the same mode as it was entered. + +* the code executed in the function is compatible with either mode. + +Clang manages PSTATE.SM automatically; it is not the source code's +responsibility to do this. Clang will ensure that the generated code in +streaming-compatible functions is valid in either mode (PSTATE.SM=0 or +PSTATE.SM=1). For example, if an ``__arm_streaming_compatible`` function calls a +non-streaming function, Clang generates code to temporarily switch out of streaming +mode before calling the function and switch back to streaming-mode on return if +``PSTATE.SM`` is ``1`` on entry of the caller. If ``PSTATE.SM`` is ``0`` on +entry to the ``__arm_streaming_compatible`` function, the call will be executed +without changing modes. + }]; +} + +def ArmInDocs : Documentation { + let Category = DocCatArmSmeAttributes; + let Content = [{ +The ``__arm_in`` keyword applies to prototyped function types and specifies +that the function shares a given state S with its caller. For ``__arm_in``, the +function takes the state S as input and returns with the state S unchanged. + +The attribute takes string arguments to instruct the compiler which state +is shared. The supported states for S are: + +* ``"za"`` for Matrix Storage (requires SME) + +The attributes ``__arm_in(S)``, ``__arm_out(S)``, ``__arm_inout(S)`` and +``__arm_preserves(S)`` are all mutually exclusive for the same state S. + }]; +} + +def ArmOutDocs : Documentation { + let Category = DocCatArmSmeAttributes; + let Content = [{ +The ``__arm_out`` keyword applies to prototyped function types and specifies +that the function shares a given state S with its caller. For ``__arm_out``, +the function ignores the incoming state for S and returns new state for S. + +The attribute takes string arguments to instruct the compiler which state +is shared. The supported states for S are: + +* ``"za"`` for Matrix Storage (requires SME) + +The attributes ``__arm_in(S)``, ``__arm_out(S)``, ``__arm_inout(S)`` and +``__arm_preserves(S)`` are all mutually exclusive for the same state S. + }]; +} + +def ArmInOutDocs : Documentation { + let Category = DocCatArmSmeAttributes; + let Content = [{ +The ``__arm_inout`` keyword applies to prototyped function types and specifies +that the function shares a given state S with its caller. For ``__arm_inout``, +the function takes the state S as input and returns new state for S. + +The attribute takes string arguments to instruct the compiler which state +is shared. The supported states for S are: + +* ``"za"`` for Matrix Storage (requires SME) + +The attributes ``__arm_in(S)``, ``__arm_out(S)``, ``__arm_inout(S)`` and +``__arm_preserves(S)`` are all mutually exclusive for the same state S. + }]; +} + +def ArmPreservesDocs : Documentation { + let Category = DocCatArmSmeAttributes; + let Content = [{ +The ``__arm_preserves`` keyword applies to prototyped function types and +specifies that the function does not read a given state S and returns +with state S unchanged. + +The attribute takes string arguments to instruct the compiler which state +is shared. The supported states for S are: + +* ``"za"`` for Matrix Storage (requires SME) + +The attributes ``__arm_in(S)``, ``__arm_out(S)``, ``__arm_inout(S)`` and +``__arm_preserves(S)`` are all mutually exclusive for the same state S. + }]; +} + +def ArmSmeLocallyStreamingDocs : Documentation { + let Category = DocCatArmSmeAttributes; + let Content = [{ +The ``__arm_locally_streaming`` keyword applies to function declarations +and specifies that all the statements in the function are executed in +streaming mode. This means that: + +* the function requires that the target processor implements the Scalable Matrix + Extension (SME). + +* the program automatically puts the machine into streaming mode before + executing the statements and automatically restores the previous mode + afterwards. + +Clang manages PSTATE.SM automatically; it is not the source code's +responsibility to do this. For example, Clang will emit code to enable +streaming mode at the start of the function, and disable streaming mode +at the end of the function. + }]; +} + +def ArmNewDocs : Documentation { + let Category = DocCatArmSmeAttributes; + let Content = [{ +The ``__arm_new`` keyword applies to function declarations and specifies +that the function will create a new scope for state S. + +The attribute takes string arguments to instruct the compiler for which state +to create new scope. The supported states for S are: + +* ``"za"`` for Matrix Storage (requires SME) + +For state ``"za"``, this means that: + +* the function requires that the target processor implements the Scalable Matrix + Extension (SME). + +* the function will commit any lazily saved ZA data. + +* the function will create a new ZA context and enable PSTATE.ZA. + +* the function will disable PSTATE.ZA (by setting it to 0) before returning. + +For ``__arm_new("za")`` functions Clang will set up the ZA context automatically +on entry to the function and disable it before returning. For example, if ZA is +in a dormant state Clang will generate the code to commit a lazy-save and set up +a new ZA state before executing user code. }]; } diff --git a/clang/include/clang/Basic/AttributeCommonInfo.h b/clang/include/clang/Basic/AttributeCommonInfo.h index 6396c0dc6ef0299a530210f0dd14f2d791751245..99846f20482cbba9c05fe360ece460efe42df4e4 100644 --- a/clang/include/clang/Basic/AttributeCommonInfo.h +++ b/clang/include/clang/Basic/AttributeCommonInfo.h @@ -235,6 +235,19 @@ protected: return SpellingIndex != SpellingNotCalculated; } }; + +inline bool doesKeywordAttributeTakeArgs(tok::TokenKind Kind) { + switch (Kind) { + default: + return false; +#define KEYWORD_ATTRIBUTE(NAME, HASARG, ...) \ + case tok::kw_##NAME: \ + return HASARG; +#include "clang/Basic/RegularKeywordAttrInfo.inc" +#undef KEYWORD_ATTRIBUTE + } +} + } // namespace clang #endif // LLVM_CLANG_BASIC_ATTRIBUTECOMMONINFO_H diff --git a/clang/include/clang/Basic/BuiltinsAArch64.def b/clang/include/clang/Basic/BuiltinsAArch64.def index eaae6c9ad84686864fae49d48f56e2ff53b784d3..33eb1dc2f6d8e2c9e4623df419cee627adeec6cf 100644 --- a/clang/include/clang/Basic/BuiltinsAArch64.def +++ b/clang/include/clang/Basic/BuiltinsAArch64.def @@ -68,6 +68,9 @@ TARGET_BUILTIN(__builtin_arm_ldg, "v*v*", "t", "mte") TARGET_BUILTIN(__builtin_arm_stg, "vv*", "t", "mte") TARGET_BUILTIN(__builtin_arm_subp, "Uiv*v*", "t", "mte") +// SME state function +BUILTIN(__builtin_arm_get_sme_state, "vULi*ULi*", "n") + // Memory Operations TARGET_BUILTIN(__builtin_arm_mops_memset_tag, "v*v*iz", "", "mte,mops") diff --git a/clang/include/clang/Basic/CMakeLists.txt b/clang/include/clang/Basic/CMakeLists.txt index 691d14f013cfed3f6b473e32ca09ae71582237f0..096a85af4abdcb3887a6802a225b99b1ceb3eb69 100644 --- a/clang/include/clang/Basic/CMakeLists.txt +++ b/clang/include/clang/Basic/CMakeLists.txt @@ -48,10 +48,10 @@ clang_tablegen(AttrSubMatchRulesList.inc -gen-clang-attr-subject-match-rule-list SOURCE Attr.td TARGET ClangAttrSubjectMatchRuleList) -clang_tablegen(AttrTokenKinds.inc -gen-clang-attr-token-kinds +clang_tablegen(RegularKeywordAttrInfo.inc -gen-clang-regular-keyword-attr-info -I ${CMAKE_CURRENT_SOURCE_DIR}/../../ SOURCE Attr.td - TARGET ClangAttrTokenKinds + TARGET ClangRegularKeywordAttrInfo ) clang_tablegen(AttrHasAttributeImpl.inc -gen-clang-attr-has-attribute-impl @@ -96,6 +96,9 @@ clang_tablegen(arm_sve_sema_rangechecks.inc -gen-arm-sve-sema-rangechecks ${CLANG_BASIC_OPTIONS} SOURCE arm_sve.td TARGET ClangARMSveSemaRangeChecks) +clang_tablegen(arm_sve_streaming_attrs.inc -gen-arm-sve-streaming-attrs + SOURCE arm_sve.td + TARGET ClangARMSveStreamingAttrs) clang_tablegen(arm_sme_builtins.inc -gen-arm-sme-builtins SOURCE arm_sme.td TARGET ClangARMSmeBuiltins) @@ -105,6 +108,12 @@ clang_tablegen(arm_sme_builtin_cg.inc -gen-arm-sme-builtin-codegen clang_tablegen(arm_sme_sema_rangechecks.inc -gen-arm-sme-sema-rangechecks SOURCE arm_sme.td TARGET ClangARMSmeSemaRangeChecks) +clang_tablegen(arm_sme_streaming_attrs.inc -gen-arm-sme-streaming-attrs + SOURCE arm_sme.td + TARGET ClangARMSmeStreamingAttrs) +clang_tablegen(arm_sme_builtins_za_state.inc -gen-arm-sme-builtin-za-state + SOURCE arm_sme.td + TARGET ClangARMSmeBuiltinsZAState) clang_tablegen(arm_cde_builtins.inc -gen-arm-cde-builtin-def SOURCE arm_cde.td TARGET ClangARMCdeBuiltinsDef) diff --git a/clang/include/clang/Basic/DiagnosticCommonKinds.td b/clang/include/clang/Basic/DiagnosticCommonKinds.td index d7cc3cff0797d0dcbe9b3534df38059d7d643795..d35445db231eda6fdc447439e8eba30f367de1d6 100644 --- a/clang/include/clang/Basic/DiagnosticCommonKinds.td +++ b/clang/include/clang/Basic/DiagnosticCommonKinds.td @@ -56,7 +56,9 @@ def err_expected_string_literal : Error<"expected string literal " "%select{in %1|for diagnostic message in static_assert|" "for optional message in 'availability' attribute|" "for %select{language name|source container name|USR}1 in " - "'external_source_symbol' attribute}0">; + "'external_source_symbol' attribute|" + "as argument of '%1' attribute}0">; + def err_invalid_string_udl : Error< "string literal with user-defined suffix cannot be used here">; def err_invalid_character_udl : Error< diff --git a/clang/include/clang/Basic/DiagnosticFrontendKinds.td b/clang/include/clang/Basic/DiagnosticFrontendKinds.td index 11022962ae9e3893c4c2f05101d35a8aca0a3e58..db2428caa5c8972992feea005aabffb3bc37cf08 100644 --- a/clang/include/clang/Basic/DiagnosticFrontendKinds.td +++ b/clang/include/clang/Basic/DiagnosticFrontendKinds.td @@ -273,6 +273,10 @@ def err_builtin_needs_feature : Error<"%0 needs target feature %1">; def err_function_needs_feature : Error< "always_inline function %1 requires target feature '%2', but would " "be inlined into function %0 that is compiled without support for '%2'">; +def err_function_always_inline_attribute_mismatch : Error< + "always_inline function %1 and its caller %0 have mismatching %2 attributes">; +def err_function_always_inline_new_za : Error< + "always_inline function %0 has new za state">; def warn_avx_calling_convention : Warning<"AVX vector %select{return|argument}0 of type %1 without '%2' " diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 0e97620945af4841ab9a3720b5510b57d59a0119..e304ef6aec1cd33ea1b3878552d832f72b0d1ff0 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -2035,6 +2035,10 @@ def err_different_return_type_for_overriding_virtual_function : Error< "than the function it overrides}1,2">; def note_overridden_virtual_function : Note< "overridden virtual function is here">; +def err_conflicting_overriding_attributes : Error< + "virtual function %0 has different attributes " + "%diff{($) than the function it overrides (which has $)|" + "than the function it overrides}1,2">; def err_conflicting_overriding_cc_attributes : Error< "virtual function %0 has different calling convention attributes " "%diff{($) than the function it overrides (which has calling convention $)|" @@ -3097,6 +3101,12 @@ def err_attribute_bad_sve_vector_size : Error< def err_attribute_arm_feature_sve_bits_unsupported : Error< "%0 is only supported when '-msve-vector-bits=' is specified with a " "value of 128, 256, 512, 1024 or 2048.">; +def warn_attribute_arm_sm_incompat_builtin : Warning< + "builtin call has undefined behaviour when called from a %0 function">, + InGroup>; +def warn_attribute_arm_za_builtin_no_za_state : Warning< + "builtin call is not valid when calling from a function without active ZA state">, + InGroup>; def err_sve_vector_in_non_sve_target : Error< "SVE vector type %0 cannot be used in a target without sve">; def err_attribute_riscv_rvv_bits_unsupported : Error< @@ -3621,6 +3631,22 @@ def err_attribute_vecreturn_only_vector_member : Error< "the vecreturn attribute can only be used on a class or structure with one member, which must be a vector">; def err_attribute_vecreturn_only_pod_record : Error< "the vecreturn attribute can only be used on a POD (plain old data) class or structure (i.e. no virtual functions)">; +def err_sme_attr_mismatch : Error< + "function declared %0 was previously declared %1, which has different SME function attributes">; +def err_sme_call_in_non_sme_target : Error< + "call to a streaming function requires 'sme'">; +def err_sme_za_call_no_za_state : Error< + "call to a shared ZA function requires the caller to have ZA state">; +def err_sme_definition_using_sm_in_non_sme_target : Error< + "function executed in streaming-SVE mode requires 'sme'">; +def err_sme_definition_using_za_in_non_sme_target : Error< + "function using ZA state requires 'sme'">; +def err_conflicting_attributes_arm_state : Error< + "conflicting attributes for state '%0'">; +def err_unknown_arm_state : Error< + "unknown state '%0'">; +def err_missing_arm_state : Error< + "missing state for %0">; def err_cconv_change : Error< "function declared '%0' here was previously declared " "%select{'%2'|without calling convention}1">; diff --git a/clang/include/clang/Basic/TokenKinds.def b/clang/include/clang/Basic/TokenKinds.def index afd101b007b4265d1faf92af0d89f80042285c92..a540a3ee1bf0a2164b5f92aa9417d5c929d5d6f5 100644 --- a/clang/include/clang/Basic/TokenKinds.def +++ b/clang/include/clang/Basic/TokenKinds.def @@ -753,9 +753,9 @@ KEYWORD(__builtin_sycl_unique_stable_name, KEYSYCL) // Keywords defined by Attr.td. #ifndef KEYWORD_ATTRIBUTE -#define KEYWORD_ATTRIBUTE(X, EMPTY) KEYWORD(EMPTY ## X, KEYALL) +#define KEYWORD_ATTRIBUTE(X, HASARG, EMPTY) KEYWORD(EMPTY ## X, KEYALL) #endif -#include "clang/Basic/AttrTokenKinds.inc" +#include "clang/Basic/RegularKeywordAttrInfo.inc" // Clang-specific keywords enabled only in testing. TESTING_KEYWORD(__unknown_anytype , KEYALL) diff --git a/clang/include/clang/Basic/TokenKinds.h b/clang/include/clang/Basic/TokenKinds.h index ff117bd5afc56a4948b8aeefe66674a891e25f21..7529b922619ada3cd7993ca75bc943111ad66d1f 100644 --- a/clang/include/clang/Basic/TokenKinds.h +++ b/clang/include/clang/Basic/TokenKinds.h @@ -110,7 +110,7 @@ bool isPragmaAnnotation(TokenKind K); inline constexpr bool isRegularKeywordAttribute(TokenKind K) { return (false #define KEYWORD_ATTRIBUTE(X, ...) || (K == tok::kw_##X) -#include "clang/Basic/AttrTokenKinds.inc" +#include "clang/Basic/RegularKeywordAttrInfo.inc" ); } diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index b950f5cb8acc2090e0f90dc9ff1bf05ed69c53ba..dc998d1f5668828594d228588aaa32259dfdb45e 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -20,120 +20,130 @@ include "arm_sve_sme_incl.td" multiclass ZALoad ch> { let TargetGuard = "sme" in { - def NAME # _H : MInst<"svld1_hor_" # n_suffix, "vimiPQ", t, - [IsLoad, IsOverloadNone, IsStreaming, IsSharedZA], + def NAME # _H : MInst<"svld1_hor_" # n_suffix, "vimPQ", t, + [IsLoad, IsOverloadNone, IsStreaming, IsInOutZA], MemEltTyDefault, i_prefix # "_horiz", ch>; - def NAME # _H_VNUM : MInst<"svld1_hor_vnum_" # n_suffix, "vimiPQl", t, - [IsLoad, IsOverloadNone, IsStreaming, IsSharedZA], + def NAME # _H_VNUM : MInst<"svld1_hor_vnum_" # n_suffix, "vimPQl", t, + [IsLoad, IsOverloadNone, IsStreaming, IsInOutZA], MemEltTyDefault, i_prefix # "_horiz", ch>; - def NAME # _V : MInst<"svld1_ver_" # n_suffix, "vimiPQ", t, - [IsLoad, IsOverloadNone, IsStreaming, IsSharedZA], + def NAME # _V : MInst<"svld1_ver_" # n_suffix, "vimPQ", t, + [IsLoad, IsOverloadNone, IsStreaming, IsInOutZA], MemEltTyDefault, i_prefix # "_vert", ch>; - def NAME # _V_VNUM : MInst<"svld1_ver_vnum_" # n_suffix, "vimiPQl", t, - [IsLoad, IsOverloadNone, IsStreaming, IsSharedZA], + def NAME # _V_VNUM : MInst<"svld1_ver_vnum_" # n_suffix, "vimPQl", t, + [IsLoad, IsOverloadNone, IsStreaming, IsInOutZA], MemEltTyDefault, i_prefix # "_vert", ch>; } } -defm SVLD1_ZA8 : ZALoad<"za8", "c", "aarch64_sme_ld1b", [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_15>]>; -defm SVLD1_ZA16 : ZALoad<"za16", "s", "aarch64_sme_ld1h", [ImmCheck<0, ImmCheck0_1>, ImmCheck<2, ImmCheck0_7>]>; -defm SVLD1_ZA32 : ZALoad<"za32", "i", "aarch64_sme_ld1w", [ImmCheck<0, ImmCheck0_3>, ImmCheck<2, ImmCheck0_3>]>; -defm SVLD1_ZA64 : ZALoad<"za64", "l", "aarch64_sme_ld1d", [ImmCheck<0, ImmCheck0_7>, ImmCheck<2, ImmCheck0_1>]>; -defm SVLD1_ZA128 : ZALoad<"za128", "q", "aarch64_sme_ld1q", [ImmCheck<0, ImmCheck0_15>, ImmCheck<2, ImmCheck0_0>]>; +defm SVLD1_ZA8 : ZALoad<"za8", "c", "aarch64_sme_ld1b", [ImmCheck<0, ImmCheck0_0>]>; +defm SVLD1_ZA16 : ZALoad<"za16", "s", "aarch64_sme_ld1h", [ImmCheck<0, ImmCheck0_1>]>; +defm SVLD1_ZA32 : ZALoad<"za32", "i", "aarch64_sme_ld1w", [ImmCheck<0, ImmCheck0_3>]>; +defm SVLD1_ZA64 : ZALoad<"za64", "l", "aarch64_sme_ld1d", [ImmCheck<0, ImmCheck0_7>]>; +defm SVLD1_ZA128 : ZALoad<"za128", "q", "aarch64_sme_ld1q", [ImmCheck<0, ImmCheck0_15>]>; -def SVLDR_VNUM_ZA : MInst<"svldr_vnum_za", "vmiQ", "", - [IsOverloadNone, IsStreamingCompatible, IsSharedZA], - MemEltTyDefault, "aarch64_sme_ldr", - [ImmCheck<1, ImmCheck0_15>]>; +let TargetGuard = "sme" in { +def SVLDR_VNUM_ZA : MInst<"svldr_vnum_za", "vmQl", "", + [IsOverloadNone, IsStreamingCompatible, IsInOutZA], + MemEltTyDefault, "aarch64_sme_ldr">; + +def SVLDR_ZA : MInst<"svldr_za", "vmQ", "", + [IsOverloadNone, IsStreamingCompatible, IsInOutZA], + MemEltTyDefault, "aarch64_sme_ldr", []>; +} //////////////////////////////////////////////////////////////////////////////// // Stores multiclass ZAStore ch> { let TargetGuard = "sme" in { - def NAME # _H : MInst<"svst1_hor_" # n_suffix, "vimiP%", t, - [IsStore, IsOverloadNone, IsStreaming, IsSharedZA, IsPreservesZA], + def NAME # _H : MInst<"svst1_hor_" # n_suffix, "vimP%", t, + [IsStore, IsOverloadNone, IsStreaming, IsInZA], MemEltTyDefault, i_prefix # "_horiz", ch>; - def NAME # _H_VNUM : MInst<"svst1_hor_vnum_" # n_suffix, "vimiP%l", t, - [IsStore, IsOverloadNone, IsStreaming, IsSharedZA, IsPreservesZA], + def NAME # _H_VNUM : MInst<"svst1_hor_vnum_" # n_suffix, "vimP%l", t, + [IsStore, IsOverloadNone, IsStreaming, IsInZA], MemEltTyDefault, i_prefix # "_horiz", ch>; - def NAME # _V : MInst<"svst1_ver_" # n_suffix, "vimiP%", t, - [IsStore, IsOverloadNone, IsStreaming, IsSharedZA, IsPreservesZA], + def NAME # _V : MInst<"svst1_ver_" # n_suffix, "vimP%", t, + [IsStore, IsOverloadNone, IsStreaming, IsInZA], MemEltTyDefault, i_prefix # "_vert", ch>; - def NAME # _V_VNUM : MInst<"svst1_ver_vnum_" # n_suffix, "vimiP%l", t, - [IsStore, IsOverloadNone, IsStreaming, IsSharedZA, IsPreservesZA], + def NAME # _V_VNUM : MInst<"svst1_ver_vnum_" # n_suffix, "vimP%l", t, + [IsStore, IsOverloadNone, IsStreaming, IsInZA], MemEltTyDefault, i_prefix # "_vert", ch>; } } -defm SVST1_ZA8 : ZAStore<"za8", "c", "aarch64_sme_st1b", [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_15>]>; -defm SVST1_ZA16 : ZAStore<"za16", "s", "aarch64_sme_st1h", [ImmCheck<0, ImmCheck0_1>, ImmCheck<2, ImmCheck0_7>]>; -defm SVST1_ZA32 : ZAStore<"za32", "i", "aarch64_sme_st1w", [ImmCheck<0, ImmCheck0_3>, ImmCheck<2, ImmCheck0_3>]>; -defm SVST1_ZA64 : ZAStore<"za64", "l", "aarch64_sme_st1d", [ImmCheck<0, ImmCheck0_7>, ImmCheck<2, ImmCheck0_1>]>; -defm SVST1_ZA128 : ZAStore<"za128", "q", "aarch64_sme_st1q", [ImmCheck<0, ImmCheck0_15>, ImmCheck<2, ImmCheck0_0>]>; +defm SVST1_ZA8 : ZAStore<"za8", "c", "aarch64_sme_st1b", [ImmCheck<0, ImmCheck0_0>]>; +defm SVST1_ZA16 : ZAStore<"za16", "s", "aarch64_sme_st1h", [ImmCheck<0, ImmCheck0_1>]>; +defm SVST1_ZA32 : ZAStore<"za32", "i", "aarch64_sme_st1w", [ImmCheck<0, ImmCheck0_3>]>; +defm SVST1_ZA64 : ZAStore<"za64", "l", "aarch64_sme_st1d", [ImmCheck<0, ImmCheck0_7>]>; +defm SVST1_ZA128 : ZAStore<"za128", "q", "aarch64_sme_st1q", [ImmCheck<0, ImmCheck0_15>]>; -def SVSTR_VNUM_ZA : MInst<"svstr_vnum_za", "vmi%", "", - [IsOverloadNone, IsStreamingCompatible, IsSharedZA, IsPreservesZA], - MemEltTyDefault, "aarch64_sme_str", - [ImmCheck<1, ImmCheck0_15>]>; +let TargetGuard = "sme" in { +def SVSTR_VNUM_ZA : MInst<"svstr_vnum_za", "vm%l", "", + [IsOverloadNone, IsStreamingCompatible, IsInZA], + MemEltTyDefault, "aarch64_sme_str">; + +def SVSTR_ZA : MInst<"svstr_za", "vm%", "", + [IsOverloadNone, IsStreamingCompatible, IsInZA], + MemEltTyDefault, "aarch64_sme_str", []>; +} //////////////////////////////////////////////////////////////////////////////// // Read horizontal/vertical ZA slices multiclass ZARead ch> { let TargetGuard = "sme" in { - def NAME # _H : SInst<"svread_hor_" # n_suffix # "[_{d}]", "ddPimi", t, + def NAME # _H : SInst<"svread_hor_" # n_suffix # "[_{d}]", "ddPim", t, MergeOp1, i_prefix # "_horiz", - [IsReadZA, IsStreaming, IsSharedZA, IsPreservesZA], ch>; + [IsReadZA, IsStreaming, IsInZA], ch>; - def NAME # _V : SInst<"svread_ver_" # n_suffix # "[_{d}]", "ddPimi", t, + def NAME # _V : SInst<"svread_ver_" # n_suffix # "[_{d}]", "ddPim", t, MergeOp1, i_prefix # "_vert", - [IsReadZA, IsStreaming, IsSharedZA, IsPreservesZA], ch>; + [IsReadZA, IsStreaming, IsInZA], ch>; } } -defm SVREAD_ZA8 : ZARead<"za8", "cUc", "aarch64_sme_read", [ImmCheck<2, ImmCheck0_0>, ImmCheck<4, ImmCheck0_15>]>; -defm SVREAD_ZA16 : ZARead<"za16", "sUshb", "aarch64_sme_read", [ImmCheck<2, ImmCheck0_1>, ImmCheck<4, ImmCheck0_7>]>; -defm SVREAD_ZA32 : ZARead<"za32", "iUif", "aarch64_sme_read", [ImmCheck<2, ImmCheck0_3>, ImmCheck<4, ImmCheck0_3>]>; -defm SVREAD_ZA64 : ZARead<"za64", "lUld", "aarch64_sme_read", [ImmCheck<2, ImmCheck0_7>, ImmCheck<4, ImmCheck0_1>]>; -defm SVREAD_ZA128 : ZARead<"za128", "csilUcUsUiUlhbfd", "aarch64_sme_readq", [ImmCheck<2, ImmCheck0_15>, ImmCheck<4, ImmCheck0_0>]>; +defm SVREAD_ZA8 : ZARead<"za8", "cUc", "aarch64_sme_read", [ImmCheck<2, ImmCheck0_0>]>; +defm SVREAD_ZA16 : ZARead<"za16", "sUshb", "aarch64_sme_read", [ImmCheck<2, ImmCheck0_1>]>; +defm SVREAD_ZA32 : ZARead<"za32", "iUif", "aarch64_sme_read", [ImmCheck<2, ImmCheck0_3>]>; +defm SVREAD_ZA64 : ZARead<"za64", "lUld", "aarch64_sme_read", [ImmCheck<2, ImmCheck0_7>]>; +defm SVREAD_ZA128 : ZARead<"za128", "csilUcUsUiUlhbfd", "aarch64_sme_readq", [ImmCheck<2, ImmCheck0_15>]>; //////////////////////////////////////////////////////////////////////////////// // Write horizontal/vertical ZA slices multiclass ZAWrite ch> { let TargetGuard = "sme" in { - def NAME # _H : SInst<"svwrite_hor_" # n_suffix # "[_{d}]", "vimiPd", t, + def NAME # _H : SInst<"svwrite_hor_" # n_suffix # "[_{d}]", "vimPd", t, MergeOp1, i_prefix # "_horiz", - [IsWriteZA, IsStreaming, IsSharedZA], ch>; + [IsWriteZA, IsStreaming, IsInOutZA], ch>; - def NAME # _V : SInst<"svwrite_ver_" # n_suffix # "[_{d}]", "vimiPd", t, + def NAME # _V : SInst<"svwrite_ver_" # n_suffix # "[_{d}]", "vimPd", t, MergeOp1, i_prefix # "_vert", - [IsWriteZA, IsStreaming, IsSharedZA], ch>; + [IsWriteZA, IsStreaming, IsInOutZA], ch>; } } -defm SVWRITE_ZA8 : ZAWrite<"za8", "cUc", "aarch64_sme_write", [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_15>]>; -defm SVWRITE_ZA16 : ZAWrite<"za16", "sUshb", "aarch64_sme_write", [ImmCheck<0, ImmCheck0_1>, ImmCheck<2, ImmCheck0_7>]>; -defm SVWRITE_ZA32 : ZAWrite<"za32", "iUif", "aarch64_sme_write", [ImmCheck<0, ImmCheck0_3>, ImmCheck<2, ImmCheck0_3>]>; -defm SVWRITE_ZA64 : ZAWrite<"za64", "lUld", "aarch64_sme_write", [ImmCheck<0, ImmCheck0_7>, ImmCheck<2, ImmCheck0_1>]>; -defm SVWRITE_ZA128 : ZAWrite<"za128", "csilUcUsUiUlhbfd", "aarch64_sme_writeq", [ImmCheck<0, ImmCheck0_15>, ImmCheck<2, ImmCheck0_0>]>; +defm SVWRITE_ZA8 : ZAWrite<"za8", "cUc", "aarch64_sme_write", [ImmCheck<0, ImmCheck0_0>]>; +defm SVWRITE_ZA16 : ZAWrite<"za16", "sUshb", "aarch64_sme_write", [ImmCheck<0, ImmCheck0_1>]>; +defm SVWRITE_ZA32 : ZAWrite<"za32", "iUif", "aarch64_sme_write", [ImmCheck<0, ImmCheck0_3>]>; +defm SVWRITE_ZA64 : ZAWrite<"za64", "lUld", "aarch64_sme_write", [ImmCheck<0, ImmCheck0_7>]>; +defm SVWRITE_ZA128 : ZAWrite<"za128", "csilUcUsUiUlhbfd", "aarch64_sme_writeq", [ImmCheck<0, ImmCheck0_15>]>; //////////////////////////////////////////////////////////////////////////////// // SME - Zero let TargetGuard = "sme" in { def SVZERO_MASK_ZA : SInst<"svzero_mask_za", "vi", "", MergeNone, "aarch64_sme_zero", - [IsOverloadNone, IsStreamingCompatible, IsSharedZA], + [IsOverloadNone, IsStreamingCompatible, IsInOutZA], [ImmCheck<0, ImmCheck0_255>]>; def SVZERO_ZA : SInst<"svzero_za", "v", "", MergeNone, "aarch64_sme_zero", - [IsOverloadNone, IsStreamingCompatible, IsSharedZA]>; + [IsOverloadNone, IsStreamingCompatible, IsOutZA]>; } //////////////////////////////////////////////////////////////////////////////// @@ -143,7 +153,7 @@ multiclass ZACount { let TargetGuard = "sme" in { def NAME : SInst<"sv" # n_suffix, "nv", "", MergeNone, "aarch64_sme_" # n_suffix, - [IsOverloadNone, IsStreamingCompatible, IsPreservesZA]>; + [IsOverloadNone, IsStreamingCompatible]>; } } @@ -158,13 +168,13 @@ defm SVCNTSD : ZACount<"cntsd">; multiclass ZAAdd { let TargetGuard = "sme" in { def NAME # _ZA32: SInst<"sv" # n_suffix # "_za32[_{d}]", "viPPd", "iUi", MergeOp1, - "aarch64_sme_" # n_suffix, [IsStreaming, IsSharedZA], + "aarch64_sme_" # n_suffix, [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_3>]>; } let TargetGuard = "sme-i16i64" in { def NAME # _ZA64: SInst<"sv" # n_suffix # "_za64[_{d}]", "viPPd", "lUl", MergeOp1, - "aarch64_sme_" # n_suffix, [IsStreaming, IsSharedZA], + "aarch64_sme_" # n_suffix, [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_7>]>; } } @@ -180,7 +190,7 @@ multiclass ZAIntOuterProd { def NAME # _ZA32_B: SInst<"sv" # n_suffix2 # "_za32[_{d}]", "viPPdd", !cond(!eq(n_suffix1, "s") : "", true: "U") # "c", MergeOp1, "aarch64_sme_" # n_suffix1 # n_suffix2 # "_wide", - [IsStreaming, IsSharedZA], + [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_3>]>; } @@ -188,7 +198,7 @@ multiclass ZAIntOuterProd { def NAME # _ZA64_H: SInst<"sv" # n_suffix2 # "_za64[_{d}]", "viPPdd", !cond(!eq(n_suffix1, "s") : "", true: "U") # "s", MergeOp1, "aarch64_sme_" # n_suffix1 # n_suffix2 # "_wide", - [IsStreaming, IsSharedZA], + [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_7>]>; } } @@ -207,7 +217,7 @@ multiclass ZAIntOuterProdMixedSigns { "viPPd" # !cond(!eq(n_suffix1, "su") : "u", true: "x"), !cond(!eq(n_suffix1, "su") : "", true: "U") # "c", MergeOp1, "aarch64_sme_" # n_suffix1 # n_suffix2 # "_wide", - [IsStreaming, IsSharedZA], + [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_3>]>; } @@ -216,7 +226,7 @@ multiclass ZAIntOuterProdMixedSigns { "viPPd" # !cond(!eq(n_suffix1, "su") : "u", true: "x"), !cond(!eq(n_suffix1, "su") : "", true: "U") # "s", MergeOp1, "aarch64_sme_" # n_suffix1 # n_suffix2 # "_wide", - [IsStreaming, IsSharedZA], + [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_7>]>; } } @@ -233,27 +243,62 @@ multiclass ZAFPOuterProd { let TargetGuard = "sme" in { def NAME # _ZA32_B: SInst<"sv" # n_suffix # "_za32[_{d}]", "viPPdd", "h", MergeOp1, "aarch64_sme_" # n_suffix # "_wide", - [IsStreaming, IsSharedZA], + [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_3>]>; def NAME # _ZA32_H: SInst<"sv" # n_suffix # "_za32[_{d}]", "viPPdd", "b", MergeOp1, "aarch64_sme_" # n_suffix # "_wide", - [IsStreaming, IsSharedZA], + [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_3>]>; def NAME # _ZA32_S: SInst<"sv" # n_suffix # "_za32[_{d}]", "viPPdd", "f", MergeOp1, "aarch64_sme_" # n_suffix, - [IsStreaming, IsSharedZA], + [IsStreaming, IsInOutZA], [ImmCheck<0, ImmCheck0_3>]>; } let TargetGuard = "sme-f64f64" in { def NAME # _ZA64_D: SInst<"sv" # n_suffix # "_za64[_{d}]", "viPPdd", "d", MergeOp1, "aarch64_sme_" # n_suffix, - [IsStreaming, IsSharedZA], - [ImmCheck<0, ImmCheck0_3>]>; + [IsStreaming, IsInOutZA], + [ImmCheck<0, ImmCheck0_7>]>; } } defm SVMOPA : ZAFPOuterProd<"mopa">; defm SVMOPS : ZAFPOuterProd<"mops">; + +//////////////////////////////////////////////////////////////////////////////// +// SME2 - ADD, SUB + +multiclass ZAAddSub { + let TargetGuard = "sme2" in { + def NAME # _WRITE_SINGLE_ZA32_VG1X2_I32 : Inst<"sv" # n_suffix # "_write[_single]_za32[_{d}]_vg1x2", "vm2d", "iUi", MergeNone, "aarch64_sme_" # n_suffix # "_write_single_za_vg1x2", [IsStreaming, IsInOutZA], []>; + def NAME # _WRITE_SINGLE_ZA32_VG1X4_I32 : Inst<"sv" # n_suffix # "_write[_single]_za32[_{d}]_vg1x4", "vm4d", "iUi", MergeNone, "aarch64_sme_" # n_suffix # "_write_single_za_vg1x4", [IsStreaming, IsInOutZA], []>; + + def NAME # _WRITE_ZA32_VG1X2_I32 : Inst<"sv" # n_suffix # "_write_za32[_{d}]_vg1x2", "vm22", "iUi", MergeNone, "aarch64_sme_" # n_suffix # "_write_za_vg1x2", [IsStreaming, IsInOutZA], []>; + def NAME # _WRITE_ZA32_VG1X4_I32 : Inst<"sv" # n_suffix # "_write_za32[_{d}]_vg1x4", "vm44", "iUi", MergeNone, "aarch64_sme_" # n_suffix # "_write_za_vg1x4", [IsStreaming, IsInOutZA], []>; + + def NAME # _ZA32_VG1x2_I32 : Inst<"sv" # n_suffix # "_za32[_{d}]_vg1x2", "vm2", "iUif", MergeNone, "aarch64_sme_" # n_suffix # "_za32_vg1x2", [IsStreaming, IsInOutZA], []>; + def NAME # _ZA32_VG1X4_I32 : Inst<"sv" # n_suffix # "_za32[_{d}]_vg1x4", "vm4", "iUif", MergeNone, "aarch64_sme_" # n_suffix # "_za32_vg1x4", [IsStreaming, IsInOutZA], []>; + } + + let TargetGuard = "sme2,sme-i16i64" in { + def NAME # _WRITE_SINGLE_ZA64_VG1X2_I64 : Inst<"sv" # n_suffix # "_write[_single]_za64[_{d}]_vg1x2", "vm2d", "lUl", MergeNone, "aarch64_sme_" # n_suffix # "_write_single_za_vg1x2", [IsStreaming, IsInOutZA], []>; + def NAME # _WRITE_SINGLE_ZA64_VG1X4_I64 : Inst<"sv" # n_suffix # "_write[_single]_za64[_{d}]_vg1x4", "vm4d", "lUl", MergeNone, "aarch64_sme_" # n_suffix # "_write_single_za_vg1x4", [IsStreaming, IsInOutZA], []>; + + def NAME # _WRITE_ZA64_VG1x2_I64 : Inst<"sv" # n_suffix # "_write_za64[_{d}]_vg1x2", "vm22", "lUl", MergeNone, "aarch64_sme_" # n_suffix # "_write_za_vg1x2", [IsStreaming, IsInOutZA], []>; + def NAME # _WRITE_ZA64_VG1x4_I64 : Inst<"sv" # n_suffix # "_write_za64[_{d}]_vg1x4", "vm44", "lUl", MergeNone, "aarch64_sme_" # n_suffix # "_write_za_vg1x4", [IsStreaming, IsInOutZA], []>; + + def NAME # _ZA64_VG1X2_I64 : Inst<"sv" # n_suffix # "_za64[_{d}]_vg1x2", "vm2", "lUl", MergeNone, "aarch64_sme_" # n_suffix # "_za64_vg1x2", [IsStreaming, IsInOutZA], []>; + def NAME # _ZA64_VG1X4_I64 : Inst<"sv" # n_suffix # "_za64[_{d}]_vg1x4", "vm4", "lUl", MergeNone, "aarch64_sme_" # n_suffix # "_za64_vg1x4", [IsStreaming, IsInOutZA], []>; + } + + let TargetGuard = "sme2,sme-f64f64" in { + def NAME # _ZA64_VG1X2_F64 : Inst<"sv" # n_suffix # "_za64[_{d}]_vg1x2", "vm2", "d", MergeNone, "aarch64_sme_" # n_suffix # "_za64_vg1x2", [IsStreaming, IsInOutZA], []>; + def NAME # _ZA64_VG1X4_F64 : Inst<"sv" # n_suffix # "_za64[_{d}]_vg1x4", "vm4", "d", MergeNone, "aarch64_sme_" # n_suffix # "_za64_vg1x4", [IsStreaming, IsInOutZA], []>; + } +} + +defm SVADD : ZAAddSub<"add">; +defm SVSUB : ZAAddSub<"sub">; diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index 894a0a1296b047332e61438b2e0c296c647b9316..ef013977ffdc344a7851375a7e1a1ed4cd02e31b 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -19,27 +19,27 @@ include "arm_sve_sme_incl.td" // Loads // Load one vector (scalar base) -def SVLD1 : MInst<"svld1[_{2}]", "dPc", "csilUcUsUiUlhfd", [IsLoad], MemEltTyDefault, "aarch64_sve_ld1">; -def SVLD1SB : MInst<"svld1sb_{d}", "dPS", "silUsUiUl", [IsLoad], MemEltTyInt8, "aarch64_sve_ld1">; -def SVLD1UB : MInst<"svld1ub_{d}", "dPW", "silUsUiUl", [IsLoad, IsZExtReturn], MemEltTyInt8, "aarch64_sve_ld1">; -def SVLD1SH : MInst<"svld1sh_{d}", "dPT", "ilUiUl", [IsLoad], MemEltTyInt16, "aarch64_sve_ld1">; -def SVLD1UH : MInst<"svld1uh_{d}", "dPX", "ilUiUl", [IsLoad, IsZExtReturn], MemEltTyInt16, "aarch64_sve_ld1">; -def SVLD1SW : MInst<"svld1sw_{d}", "dPU", "lUl", [IsLoad], MemEltTyInt32, "aarch64_sve_ld1">; -def SVLD1UW : MInst<"svld1uw_{d}", "dPY", "lUl", [IsLoad, IsZExtReturn], MemEltTyInt32, "aarch64_sve_ld1">; +def SVLD1 : MInst<"svld1[_{2}]", "dPc", "csilUcUsUiUlhfd", [IsLoad, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_ld1">; +def SVLD1SB : MInst<"svld1sb_{d}", "dPS", "silUsUiUl", [IsLoad, IsStreamingCompatible], MemEltTyInt8, "aarch64_sve_ld1">; +def SVLD1UB : MInst<"svld1ub_{d}", "dPW", "silUsUiUl", [IsLoad, IsZExtReturn, IsStreamingCompatible], MemEltTyInt8, "aarch64_sve_ld1">; +def SVLD1SH : MInst<"svld1sh_{d}", "dPT", "ilUiUl", [IsLoad, IsStreamingCompatible], MemEltTyInt16, "aarch64_sve_ld1">; +def SVLD1UH : MInst<"svld1uh_{d}", "dPX", "ilUiUl", [IsLoad, IsZExtReturn, IsStreamingCompatible], MemEltTyInt16, "aarch64_sve_ld1">; +def SVLD1SW : MInst<"svld1sw_{d}", "dPU", "lUl", [IsLoad, IsStreamingCompatible], MemEltTyInt32, "aarch64_sve_ld1">; +def SVLD1UW : MInst<"svld1uw_{d}", "dPY", "lUl", [IsLoad, IsZExtReturn, IsStreamingCompatible], MemEltTyInt32, "aarch64_sve_ld1">; let TargetGuard = "sve,bf16" in { - def SVLD1_BF : MInst<"svld1[_{2}]", "dPc", "b", [IsLoad], MemEltTyDefault, "aarch64_sve_ld1">; - def SVLD1_VNUM_BF : MInst<"svld1_vnum[_{2}]", "dPcl", "b", [IsLoad], MemEltTyDefault, "aarch64_sve_ld1">; + def SVLD1_BF : MInst<"svld1[_{2}]", "dPc", "b", [IsLoad, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_ld1">; + def SVLD1_VNUM_BF : MInst<"svld1_vnum[_{2}]", "dPcl", "b", [IsLoad, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_ld1">; } // Load one vector (scalar base, VL displacement) -def SVLD1_VNUM : MInst<"svld1_vnum[_{2}]", "dPcl", "csilUcUsUiUlhfd", [IsLoad], MemEltTyDefault, "aarch64_sve_ld1">; -def SVLD1SB_VNUM : MInst<"svld1sb_vnum_{d}", "dPSl", "silUsUiUl", [IsLoad], MemEltTyInt8, "aarch64_sve_ld1">; -def SVLD1UB_VNUM : MInst<"svld1ub_vnum_{d}", "dPWl", "silUsUiUl", [IsLoad, IsZExtReturn], MemEltTyInt8, "aarch64_sve_ld1">; -def SVLD1SH_VNUM : MInst<"svld1sh_vnum_{d}", "dPTl", "ilUiUl", [IsLoad], MemEltTyInt16, "aarch64_sve_ld1">; -def SVLD1UH_VNUM : MInst<"svld1uh_vnum_{d}", "dPXl", "ilUiUl", [IsLoad, IsZExtReturn], MemEltTyInt16, "aarch64_sve_ld1">; -def SVLD1SW_VNUM : MInst<"svld1sw_vnum_{d}", "dPUl", "lUl", [IsLoad], MemEltTyInt32, "aarch64_sve_ld1">; -def SVLD1UW_VNUM : MInst<"svld1uw_vnum_{d}", "dPYl", "lUl", [IsLoad, IsZExtReturn], MemEltTyInt32, "aarch64_sve_ld1">; +def SVLD1_VNUM : MInst<"svld1_vnum[_{2}]", "dPcl", "csilUcUsUiUlhfd", [IsLoad, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_ld1">; +def SVLD1SB_VNUM : MInst<"svld1sb_vnum_{d}", "dPSl", "silUsUiUl", [IsLoad, IsStreamingCompatible], MemEltTyInt8, "aarch64_sve_ld1">; +def SVLD1UB_VNUM : MInst<"svld1ub_vnum_{d}", "dPWl", "silUsUiUl", [IsLoad, IsZExtReturn, IsStreamingCompatible], MemEltTyInt8, "aarch64_sve_ld1">; +def SVLD1SH_VNUM : MInst<"svld1sh_vnum_{d}", "dPTl", "ilUiUl", [IsLoad, IsStreamingCompatible], MemEltTyInt16, "aarch64_sve_ld1">; +def SVLD1UH_VNUM : MInst<"svld1uh_vnum_{d}", "dPXl", "ilUiUl", [IsLoad, IsZExtReturn, IsStreamingCompatible], MemEltTyInt16, "aarch64_sve_ld1">; +def SVLD1SW_VNUM : MInst<"svld1sw_vnum_{d}", "dPUl", "lUl", [IsLoad, IsStreamingCompatible], MemEltTyInt32, "aarch64_sve_ld1">; +def SVLD1UW_VNUM : MInst<"svld1uw_vnum_{d}", "dPYl", "lUl", [IsLoad, IsZExtReturn, IsStreamingCompatible], MemEltTyInt32, "aarch64_sve_ld1">; // Load one vector (vector base) def SVLD1_GATHER_BASES_U : MInst<"svld1_gather[_{2}base]_{d}", "dPu", "ilUiUlfd", [IsGatherLoad], MemEltTyDefault, "aarch64_sve_ld1_gather_scalar_offset">; @@ -243,27 +243,27 @@ let TargetGuard = "sve,bf16" in { } // Load one vector, unextended load, non-temporal (scalar base) -def SVLDNT1 : MInst<"svldnt1[_{2}]", "dPc", "csilUcUsUiUlhfd", [IsLoad], MemEltTyDefault, "aarch64_sve_ldnt1">; +def SVLDNT1 : MInst<"svldnt1[_{2}]", "dPc", "csilUcUsUiUlhfd", [IsLoad, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_ldnt1">; // Load one vector, unextended load, non-temporal (scalar base, VL displacement) -def SVLDNT1_VNUM : MInst<"svldnt1_vnum[_{2}]", "dPcl", "csilUcUsUiUlhfd", [IsLoad], MemEltTyDefault, "aarch64_sve_ldnt1">; +def SVLDNT1_VNUM : MInst<"svldnt1_vnum[_{2}]", "dPcl", "csilUcUsUiUlhfd", [IsLoad, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_ldnt1">; let TargetGuard = "sve,bf16" in { - def SVLDNT1_BF : MInst<"svldnt1[_{2}]", "dPc", "b", [IsLoad], MemEltTyDefault, "aarch64_sve_ldnt1">; - def SVLDNT1_VNUM_BF : MInst<"svldnt1_vnum[_{2}]", "dPcl", "b", [IsLoad], MemEltTyDefault, "aarch64_sve_ldnt1">; + def SVLDNT1_BF : MInst<"svldnt1[_{2}]", "dPc", "b", [IsLoad, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_ldnt1">; + def SVLDNT1_VNUM_BF : MInst<"svldnt1_vnum[_{2}]", "dPcl", "b", [IsLoad, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_ldnt1">; } // Load one quadword and replicate (scalar base) -def SVLD1RQ : SInst<"svld1rq[_{2}]", "dPc", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_ld1rq">; +def SVLD1RQ : SInst<"svld1rq[_{2}]", "dPc", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_ld1rq", [IsStreamingCompatible]>; let TargetGuard = "sve,bf16" in { - def SVLD1RQ_BF : SInst<"svld1rq[_{2}]", "dPc", "b", MergeNone, "aarch64_sve_ld1rq">; + def SVLD1RQ_BF : SInst<"svld1rq[_{2}]", "dPc", "b", MergeNone, "aarch64_sve_ld1rq", [IsStreamingCompatible]>; } multiclass StructLoad { - def : SInst; + def : SInst; let TargetGuard = "sve,bf16" in { - def: SInst; + def: SInst; } } @@ -286,42 +286,42 @@ let TargetGuard = "sve,f64mm,bf16" in { } let TargetGuard = "sve,bf16" in { - def SVBFDOT : SInst<"svbfdot[_{0}]", "MMdd", "b", MergeNone, "aarch64_sve_bfdot", [IsOverloadNone]>; - def SVBFMLALB : SInst<"svbfmlalb[_{0}]", "MMdd", "b", MergeNone, "aarch64_sve_bfmlalb", [IsOverloadNone]>; - def SVBFMLALT : SInst<"svbfmlalt[_{0}]", "MMdd", "b", MergeNone, "aarch64_sve_bfmlalt", [IsOverloadNone]>; - def SVBFMMLA : SInst<"svbfmmla[_{0}]", "MMdd", "b", MergeNone, "aarch64_sve_bfmmla", [IsOverloadNone]>; - def SVBFDOT_N : SInst<"svbfdot[_n_{0}]", "MMda", "b", MergeNone, "aarch64_sve_bfdot", [IsOverloadNone]>; - def SVBFMLAL_N : SInst<"svbfmlalb[_n_{0}]", "MMda", "b", MergeNone, "aarch64_sve_bfmlalb", [IsOverloadNone]>; - def SVBFMLALT_N : SInst<"svbfmlalt[_n_{0}]", "MMda", "b", MergeNone, "aarch64_sve_bfmlalt", [IsOverloadNone]>; - def SVBFDOT_LANE : SInst<"svbfdot_lane[_{0}]", "MMddi", "b", MergeNone, "aarch64_sve_bfdot_lane_v2", [IsOverloadNone], [ImmCheck<3, ImmCheck0_3>]>; - def SVBFMLALB_LANE : SInst<"svbfmlalb_lane[_{0}]", "MMddi", "b", MergeNone, "aarch64_sve_bfmlalb_lane_v2", [IsOverloadNone], [ImmCheck<3, ImmCheck0_7>]>; - def SVBFMLALT_LANE : SInst<"svbfmlalt_lane[_{0}]", "MMddi", "b", MergeNone, "aarch64_sve_bfmlalt_lane_v2", [IsOverloadNone], [ImmCheck<3, ImmCheck0_7>]>; + def SVBFDOT : SInst<"svbfdot[_{0}]", "MMdd", "b", MergeNone, "aarch64_sve_bfdot", [IsOverloadNone, IsStreamingCompatible]>; + def SVBFMLALB : SInst<"svbfmlalb[_{0}]", "MMdd", "b", MergeNone, "aarch64_sve_bfmlalb", [IsOverloadNone, IsStreamingCompatible]>; + def SVBFMLALT : SInst<"svbfmlalt[_{0}]", "MMdd", "b", MergeNone, "aarch64_sve_bfmlalt", [IsOverloadNone, IsStreamingCompatible]>; + def SVBFMMLA : SInst<"svbfmmla[_{0}]", "MMdd", "b", MergeNone, "aarch64_sve_bfmmla", [IsOverloadNone, IsStreamingCompatible]>; + def SVBFDOT_N : SInst<"svbfdot[_n_{0}]", "MMda", "b", MergeNone, "aarch64_sve_bfdot", [IsOverloadNone, IsStreamingCompatible]>; + def SVBFMLAL_N : SInst<"svbfmlalb[_n_{0}]", "MMda", "b", MergeNone, "aarch64_sve_bfmlalb", [IsOverloadNone, IsStreamingCompatible]>; + def SVBFMLALT_N : SInst<"svbfmlalt[_n_{0}]", "MMda", "b", MergeNone, "aarch64_sve_bfmlalt", [IsOverloadNone, IsStreamingCompatible]>; + def SVBFDOT_LANE : SInst<"svbfdot_lane[_{0}]", "MMddi", "b", MergeNone, "aarch64_sve_bfdot_lane_v2", [IsOverloadNone, IsStreamingCompatible], [ImmCheck<3, ImmCheck0_3>]>; + def SVBFMLALB_LANE : SInst<"svbfmlalb_lane[_{0}]", "MMddi", "b", MergeNone, "aarch64_sve_bfmlalb_lane_v2", [IsOverloadNone, IsStreamingCompatible], [ImmCheck<3, ImmCheck0_7>]>; + def SVBFMLALT_LANE : SInst<"svbfmlalt_lane[_{0}]", "MMddi", "b", MergeNone, "aarch64_sve_bfmlalt_lane_v2", [IsOverloadNone, IsStreamingCompatible], [ImmCheck<3, ImmCheck0_7>]>; } //////////////////////////////////////////////////////////////////////////////// // Stores // Store one vector (scalar base) -def SVST1 : MInst<"svst1[_{d}]", "vPpd", "csilUcUsUiUlhfd", [IsStore], MemEltTyDefault, "aarch64_sve_st1">; -def SVST1B_S : MInst<"svst1b[_{d}]", "vPAd", "sil", [IsStore], MemEltTyInt8, "aarch64_sve_st1">; -def SVST1B_U : MInst<"svst1b[_{d}]", "vPEd", "UsUiUl", [IsStore], MemEltTyInt8, "aarch64_sve_st1">; -def SVST1H_S : MInst<"svst1h[_{d}]", "vPBd", "il", [IsStore], MemEltTyInt16, "aarch64_sve_st1">; -def SVST1H_U : MInst<"svst1h[_{d}]", "vPFd", "UiUl", [IsStore], MemEltTyInt16, "aarch64_sve_st1">; -def SVST1W_S : MInst<"svst1w[_{d}]", "vPCd", "l", [IsStore], MemEltTyInt32, "aarch64_sve_st1">; -def SVST1W_U : MInst<"svst1w[_{d}]", "vPGd", "Ul", [IsStore], MemEltTyInt32, "aarch64_sve_st1">; +def SVST1 : MInst<"svst1[_{d}]", "vPpd", "csilUcUsUiUlhfd", [IsStore, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_st1">; +def SVST1B_S : MInst<"svst1b[_{d}]", "vPAd", "sil", [IsStore, IsStreamingCompatible], MemEltTyInt8, "aarch64_sve_st1">; +def SVST1B_U : MInst<"svst1b[_{d}]", "vPEd", "UsUiUl", [IsStore, IsStreamingCompatible], MemEltTyInt8, "aarch64_sve_st1">; +def SVST1H_S : MInst<"svst1h[_{d}]", "vPBd", "il", [IsStore, IsStreamingCompatible], MemEltTyInt16, "aarch64_sve_st1">; +def SVST1H_U : MInst<"svst1h[_{d}]", "vPFd", "UiUl", [IsStore, IsStreamingCompatible], MemEltTyInt16, "aarch64_sve_st1">; +def SVST1W_S : MInst<"svst1w[_{d}]", "vPCd", "l", [IsStore, IsStreamingCompatible], MemEltTyInt32, "aarch64_sve_st1">; +def SVST1W_U : MInst<"svst1w[_{d}]", "vPGd", "Ul", [IsStore, IsStreamingCompatible], MemEltTyInt32, "aarch64_sve_st1">; // Store one vector (scalar base, VL displacement) -def SVST1_VNUM : MInst<"svst1_vnum[_{d}]", "vPpld", "csilUcUsUiUlhfd", [IsStore], MemEltTyDefault, "aarch64_sve_st1">; -def SVST1B_VNUM_S : MInst<"svst1b_vnum[_{d}]", "vPAld", "sil", [IsStore], MemEltTyInt8, "aarch64_sve_st1">; -def SVST1B_VNUM_U : MInst<"svst1b_vnum[_{d}]", "vPEld", "UsUiUl", [IsStore], MemEltTyInt8, "aarch64_sve_st1">; -def SVST1H_VNUM_S : MInst<"svst1h_vnum[_{d}]", "vPBld", "il", [IsStore], MemEltTyInt16, "aarch64_sve_st1">; -def SVST1H_VNUM_U : MInst<"svst1h_vnum[_{d}]", "vPFld", "UiUl", [IsStore], MemEltTyInt16, "aarch64_sve_st1">; -def SVST1W_VNUM_S : MInst<"svst1w_vnum[_{d}]", "vPCld", "l", [IsStore], MemEltTyInt32, "aarch64_sve_st1">; -def SVST1W_VNUM_U : MInst<"svst1w_vnum[_{d}]", "vPGld", "Ul", [IsStore], MemEltTyInt32, "aarch64_sve_st1">; +def SVST1_VNUM : MInst<"svst1_vnum[_{d}]", "vPpld", "csilUcUsUiUlhfd", [IsStore, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_st1">; +def SVST1B_VNUM_S : MInst<"svst1b_vnum[_{d}]", "vPAld", "sil", [IsStore, IsStreamingCompatible], MemEltTyInt8, "aarch64_sve_st1">; +def SVST1B_VNUM_U : MInst<"svst1b_vnum[_{d}]", "vPEld", "UsUiUl", [IsStore, IsStreamingCompatible], MemEltTyInt8, "aarch64_sve_st1">; +def SVST1H_VNUM_S : MInst<"svst1h_vnum[_{d}]", "vPBld", "il", [IsStore, IsStreamingCompatible], MemEltTyInt16, "aarch64_sve_st1">; +def SVST1H_VNUM_U : MInst<"svst1h_vnum[_{d}]", "vPFld", "UiUl", [IsStore, IsStreamingCompatible], MemEltTyInt16, "aarch64_sve_st1">; +def SVST1W_VNUM_S : MInst<"svst1w_vnum[_{d}]", "vPCld", "l", [IsStore, IsStreamingCompatible], MemEltTyInt32, "aarch64_sve_st1">; +def SVST1W_VNUM_U : MInst<"svst1w_vnum[_{d}]", "vPGld", "Ul", [IsStore, IsStreamingCompatible], MemEltTyInt32, "aarch64_sve_st1">; let TargetGuard = "sve,bf16" in { - def SVST1_BF : MInst<"svst1[_{d}]", "vPpd", "b", [IsStore], MemEltTyDefault, "aarch64_sve_st1">; - def SVST1_VNUM_BF : MInst<"svst1_vnum[_{d}]", "vPpld", "b", [IsStore], MemEltTyDefault, "aarch64_sve_st1">; + def SVST1_BF : MInst<"svst1[_{d}]", "vPpd", "b", [IsStore, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_st1">; + def SVST1_VNUM_BF : MInst<"svst1_vnum[_{d}]", "vPpld", "b", [IsStore, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_st1">; } // Store one vector (vector base) @@ -394,9 +394,9 @@ def SVST1H_SCATTER_INDEX_S : MInst<"svst1h_scatter[_{2}base]_index[_{d}]", "v def SVST1W_SCATTER_INDEX_S : MInst<"svst1w_scatter[_{2}base]_index[_{d}]", "vPuld", "lUl", [IsScatterStore], MemEltTyInt32, "aarch64_sve_st1_scatter_scalar_offset">; multiclass StructStore { - def : SInst; + def : SInst; let TargetGuard = "sve,bf16" in { - def: SInst; + def: SInst; } } // Store N vectors into N-element structure (scalar base) @@ -410,30 +410,30 @@ defm SVST3_VNUM : StructStore<"svst3_vnum[_{d}]", "vPpl3", "aarch64_sve_st3">; defm SVST4_VNUM : StructStore<"svst4_vnum[_{d}]", "vPpl4", "aarch64_sve_st4">; // Store one vector, with no truncation, non-temporal (scalar base) -def SVSTNT1 : MInst<"svstnt1[_{d}]", "vPpd", "csilUcUsUiUlhfd", [IsStore], MemEltTyDefault, "aarch64_sve_stnt1">; +def SVSTNT1 : MInst<"svstnt1[_{d}]", "vPpd", "csilUcUsUiUlhfd", [IsStore, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_stnt1">; // Store one vector, with no truncation, non-temporal (scalar base, VL displacement) -def SVSTNT1_VNUM : MInst<"svstnt1_vnum[_{d}]", "vPpld", "csilUcUsUiUlhfd", [IsStore], MemEltTyDefault, "aarch64_sve_stnt1">; +def SVSTNT1_VNUM : MInst<"svstnt1_vnum[_{d}]", "vPpld", "csilUcUsUiUlhfd", [IsStore, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_stnt1">; let TargetGuard = "sve,bf16" in { - def SVSTNT1_BF : MInst<"svstnt1[_{d}]", "vPpd", "b", [IsStore], MemEltTyDefault, "aarch64_sve_stnt1">; - def SVSTNT1_VNUM_BF : MInst<"svstnt1_vnum[_{d}]", "vPpld", "b", [IsStore], MemEltTyDefault, "aarch64_sve_stnt1">; + def SVSTNT1_BF : MInst<"svstnt1[_{d}]", "vPpd", "b", [IsStore, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_stnt1">; + def SVSTNT1_VNUM_BF : MInst<"svstnt1_vnum[_{d}]", "vPpld", "b", [IsStore, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_stnt1">; } //////////////////////////////////////////////////////////////////////////////// // Prefetches // Prefetch (Scalar base) -def SVPRFB : MInst<"svprfb", "vPQJ", "c", [IsPrefetch], MemEltTyInt8, "aarch64_sve_prf">; -def SVPRFH : MInst<"svprfh", "vPQJ", "s", [IsPrefetch], MemEltTyInt16, "aarch64_sve_prf">; -def SVPRFW : MInst<"svprfw", "vPQJ", "i", [IsPrefetch], MemEltTyInt32, "aarch64_sve_prf">; -def SVPRFD : MInst<"svprfd", "vPQJ", "l", [IsPrefetch], MemEltTyInt64, "aarch64_sve_prf">; +def SVPRFB : MInst<"svprfb", "vPQJ", "c", [IsPrefetch, IsStreamingCompatible], MemEltTyInt8, "aarch64_sve_prf">; +def SVPRFH : MInst<"svprfh", "vPQJ", "s", [IsPrefetch, IsStreamingCompatible], MemEltTyInt16, "aarch64_sve_prf">; +def SVPRFW : MInst<"svprfw", "vPQJ", "i", [IsPrefetch, IsStreamingCompatible], MemEltTyInt32, "aarch64_sve_prf">; +def SVPRFD : MInst<"svprfd", "vPQJ", "l", [IsPrefetch, IsStreamingCompatible], MemEltTyInt64, "aarch64_sve_prf">; // Prefetch (Scalar base, VL displacement) -def SVPRFB_VNUM : MInst<"svprfb_vnum", "vPQlJ", "c", [IsPrefetch], MemEltTyInt8, "aarch64_sve_prf">; -def SVPRFH_VNUM : MInst<"svprfh_vnum", "vPQlJ", "s", [IsPrefetch], MemEltTyInt16, "aarch64_sve_prf">; -def SVPRFW_VNUM : MInst<"svprfw_vnum", "vPQlJ", "i", [IsPrefetch], MemEltTyInt32, "aarch64_sve_prf">; -def SVPRFD_VNUM : MInst<"svprfd_vnum", "vPQlJ", "l", [IsPrefetch], MemEltTyInt64, "aarch64_sve_prf">; +def SVPRFB_VNUM : MInst<"svprfb_vnum", "vPQlJ", "c", [IsPrefetch, IsStreamingCompatible], MemEltTyInt8, "aarch64_sve_prf">; +def SVPRFH_VNUM : MInst<"svprfh_vnum", "vPQlJ", "s", [IsPrefetch, IsStreamingCompatible], MemEltTyInt16, "aarch64_sve_prf">; +def SVPRFW_VNUM : MInst<"svprfw_vnum", "vPQlJ", "i", [IsPrefetch, IsStreamingCompatible], MemEltTyInt32, "aarch64_sve_prf">; +def SVPRFD_VNUM : MInst<"svprfd_vnum", "vPQlJ", "l", [IsPrefetch, IsStreamingCompatible], MemEltTyInt64, "aarch64_sve_prf">; // Prefetch (Vector bases) def SVPRFB_GATHER_BASES : MInst<"svprfb_gather[_{2}base]", "vPdJ", "UiUl", [IsGatherPrefetch], MemEltTyInt8, "aarch64_sve_prfb_gather_scalar_offset">; @@ -488,9 +488,9 @@ def SVDUPQ_32 : SInst<"svdupq[_n]_{d}", "dssss", "iUif", MergeNone>; def SVDUPQ_64 : SInst<"svdupq[_n]_{d}", "dss", "lUld", MergeNone>; multiclass svdup_base { - def NAME : SInst; + def NAME : SInst; let TargetGuard = "sve,bf16" in { - def _BF16: SInst; + def _BF16: SInst; } } @@ -499,14 +499,14 @@ defm SVDUP_M : svdup_base<"svdup[_n]_{d}", "ddPs", MergeOp1, "aarch64_sve_du defm SVDUP_X : svdup_base<"svdup[_n]_{d}", "dPs", MergeAnyExp, "aarch64_sve_dup">; defm SVDUP_Z : svdup_base<"svdup[_n]_{d}", "dPs", MergeZeroExp, "aarch64_sve_dup">; -def SVINDEX : SInst<"svindex_{d}", "dss", "csilUcUsUiUl", MergeNone, "aarch64_sve_index">; +def SVINDEX : SInst<"svindex_{d}", "dss", "csilUcUsUiUl", MergeNone, "aarch64_sve_index", [IsStreamingCompatible]>; // Integer arithmetic -multiclass SInstZPZ flags=[]> { - def _M : SInst; - def _X : SInst; - def _Z : SInst; +multiclass SInstZPZ { + def _M : SInst; + def _X : SInst; + def _Z : SInst; } defm SVABS : SInstZPZ<"svabs", "csil", "aarch64_sve_abs">; @@ -515,13 +515,13 @@ defm SVNEG : SInstZPZ<"svneg", "csil", "aarch64_sve_neg">; //------------------------------------------------------------------------------ multiclass SInstZPZZ flags=[]> { - def _M : SInst; - def _X : SInst; - def _Z : SInst; + def _M : SInst; + def _X : SInst; + def _Z : SInst; - def _N_M : SInst; - def _N_X : SInst; - def _N_Z : SInst; + def _N_M : SInst; + def _N_X : SInst; + def _N_Z : SInst; } defm SVABD_S : SInstZPZZ<"svabd", "csil", "aarch64_sve_sabd", "aarch64_sve_sabd_u">; @@ -553,26 +553,26 @@ multiclass SInstZPZZZ; } -defm SVMAD : SInstZPZZZ<"svmad", "csilUcUsUiUl", "aarch64_sve_mad", "aarch64_sve_mla_u", [ReverseMergeAnyAccOp]>; -defm SVMLA : SInstZPZZZ<"svmla", "csilUcUsUiUl", "aarch64_sve_mla", "aarch64_sve_mla_u">; -defm SVMLS : SInstZPZZZ<"svmls", "csilUcUsUiUl", "aarch64_sve_mls", "aarch64_sve_mls_u">; -defm SVMSB : SInstZPZZZ<"svmsb", "csilUcUsUiUl", "aarch64_sve_msb", "aarch64_sve_mls_u", [ReverseMergeAnyAccOp]>; +defm SVMAD : SInstZPZZZ<"svmad", "csilUcUsUiUl", "aarch64_sve_mad", "aarch64_sve_mla_u", [ReverseMergeAnyAccOp, IsStreamingCompatible]>; +defm SVMLA : SInstZPZZZ<"svmla", "csilUcUsUiUl", "aarch64_sve_mla", "aarch64_sve_mla_u", [IsStreamingCompatible]>; +defm SVMLS : SInstZPZZZ<"svmls", "csilUcUsUiUl", "aarch64_sve_mls", "aarch64_sve_mls_u", [IsStreamingCompatible]>; +defm SVMSB : SInstZPZZZ<"svmsb", "csilUcUsUiUl", "aarch64_sve_msb", "aarch64_sve_mls_u", [ReverseMergeAnyAccOp, IsStreamingCompatible]>; //------------------------------------------------------------------------------ -def SVDOT_S : SInst<"svdot[_{0}]", "ddqq", "il", MergeNone, "aarch64_sve_sdot">; -def SVDOT_U : SInst<"svdot[_{0}]", "ddqq", "UiUl", MergeNone, "aarch64_sve_udot">; -def SVQADD_S : SInst<"svqadd[_{d}]", "ddd", "csil", MergeNone, "aarch64_sve_sqadd_x">; -def SVQADD_U : SInst<"svqadd[_{d}]", "ddd", "UcUsUiUl", MergeNone, "aarch64_sve_uqadd_x">; -def SVQSUB_S : SInst<"svqsub[_{d}]", "ddd", "csil", MergeNone, "aarch64_sve_sqsub_x">; -def SVQSUB_U : SInst<"svqsub[_{d}]", "ddd", "UcUsUiUl", MergeNone, "aarch64_sve_uqsub_x">; +def SVDOT_S : SInst<"svdot[_{0}]", "ddqq", "il", MergeNone, "aarch64_sve_sdot", [IsStreamingCompatible]>; +def SVDOT_U : SInst<"svdot[_{0}]", "ddqq", "UiUl", MergeNone, "aarch64_sve_udot", [IsStreamingCompatible]>; +def SVQADD_S : SInst<"svqadd[_{d}]", "ddd", "csil", MergeNone, "aarch64_sve_sqadd_x", [IsStreamingCompatible]>; +def SVQADD_U : SInst<"svqadd[_{d}]", "ddd", "UcUsUiUl", MergeNone, "aarch64_sve_uqadd_x", [IsStreamingCompatible]>; +def SVQSUB_S : SInst<"svqsub[_{d}]", "ddd", "csil", MergeNone, "aarch64_sve_sqsub_x", [IsStreamingCompatible]>; +def SVQSUB_U : SInst<"svqsub[_{d}]", "ddd", "UcUsUiUl", MergeNone, "aarch64_sve_uqsub_x", [IsStreamingCompatible]>; -def SVDOT_N_S : SInst<"svdot[_n_{0}]", "ddqr", "il", MergeNone, "aarch64_sve_sdot">; -def SVDOT_N_U : SInst<"svdot[_n_{0}]", "ddqr", "UiUl", MergeNone, "aarch64_sve_udot">; -def SVQADD_N_S : SInst<"svqadd[_n_{d}]", "dda", "csil", MergeNone, "aarch64_sve_sqadd_x">; -def SVQADD_N_U : SInst<"svqadd[_n_{d}]", "dda", "UcUsUiUl", MergeNone, "aarch64_sve_uqadd_x">; -def SVQSUB_N_S : SInst<"svqsub[_n_{d}]", "dda", "csil", MergeNone, "aarch64_sve_sqsub_x">; -def SVQSUB_N_U : SInst<"svqsub[_n_{d}]", "dda", "UcUsUiUl", MergeNone, "aarch64_sve_uqsub_x">; +def SVDOT_N_S : SInst<"svdot[_n_{0}]", "ddqr", "il", MergeNone, "aarch64_sve_sdot", [IsStreamingCompatible]>; +def SVDOT_N_U : SInst<"svdot[_n_{0}]", "ddqr", "UiUl", MergeNone, "aarch64_sve_udot", [IsStreamingCompatible]>; +def SVQADD_N_S : SInst<"svqadd[_n_{d}]", "dda", "csil", MergeNone, "aarch64_sve_sqadd_x", [IsStreamingCompatible]>; +def SVQADD_N_U : SInst<"svqadd[_n_{d}]", "dda", "UcUsUiUl", MergeNone, "aarch64_sve_uqadd_x", [IsStreamingCompatible]>; +def SVQSUB_N_S : SInst<"svqsub[_n_{d}]", "dda", "csil", MergeNone, "aarch64_sve_sqsub_x", [IsStreamingCompatible]>; +def SVQSUB_N_U : SInst<"svqsub[_n_{d}]", "dda", "UcUsUiUl", MergeNone, "aarch64_sve_uqsub_x", [IsStreamingCompatible]>; def SVDOT_LANE_S : SInst<"svdot_lane[_{d}]", "ddqqi", "il", MergeNone, "aarch64_sve_sdot_lane", [], [ImmCheck<3, ImmCheckLaneIndexDot, 2>]>; def SVDOT_LANE_U : SInst<"svdot_lane[_{d}]", "ddqqi", "UiUl", MergeNone, "aarch64_sve_udot_lane", [], [ImmCheck<3, ImmCheckLaneIndexDot, 2>]>; @@ -592,107 +592,107 @@ defm SVNOT : SInstZPZ<"svnot", "csilUcUsUiUl", "aarch64_sve_not">; // Shifts multiclass SInst_SHIFT { - def _M : SInst; - def _X : SInst; - def _Z : SInst; + def _M : SInst; + def _X : SInst; + def _Z : SInst; - def _N_M : SInst; - def _N_X : SInst; - def _N_Z : SInst; + def _N_M : SInst; + def _N_X : SInst; + def _N_Z : SInst; - def _WIDE_M : SInst; - def _WIDE_X : SInst; - def _WIDE_Z : SInst; + def _WIDE_M : SInst; + def _WIDE_X : SInst; + def _WIDE_Z : SInst; - def _WIDE_N_M : SInst; - def _WIDE_N_X : SInst; - def _WIDE_N_Z : SInst; + def _WIDE_N_M : SInst; + def _WIDE_N_X : SInst; + def _WIDE_N_Z : SInst; } defm SVASR : SInst_SHIFT<"svasr", "aarch64_sve_asr", "csil", "csi">; defm SVLSL : SInst_SHIFT<"svlsl", "aarch64_sve_lsl", "csilUcUsUiUl", "csiUcUsUi">; defm SVLSR : SInst_SHIFT<"svlsr", "aarch64_sve_lsr", "UcUsUiUl", "UcUsUi">; -def SVASRD_M : SInst<"svasrd[_n_{d}]", "dPdi", "csil", MergeOp1, "aarch64_sve_asrd", [], [ImmCheck<2, ImmCheckShiftRight, 1>]>; -def SVASRD_X : SInst<"svasrd[_n_{d}]", "dPdi", "csil", MergeAny, "aarch64_sve_asrd", [], [ImmCheck<2, ImmCheckShiftRight, 1>]>; -def SVASRD_Z : SInst<"svasrd[_n_{d}]", "dPdi", "csil", MergeZero, "aarch64_sve_asrd", [], [ImmCheck<2, ImmCheckShiftRight, 1>]>; +def SVASRD_M : SInst<"svasrd[_n_{d}]", "dPdi", "csil", MergeOp1, "aarch64_sve_asrd", [IsStreamingCompatible], [ImmCheck<2, ImmCheckShiftRight, 1>]>; +def SVASRD_X : SInst<"svasrd[_n_{d}]", "dPdi", "csil", MergeAny, "aarch64_sve_asrd", [IsStreamingCompatible], [ImmCheck<2, ImmCheckShiftRight, 1>]>; +def SVASRD_Z : SInst<"svasrd[_n_{d}]", "dPdi", "csil", MergeZero, "aarch64_sve_asrd", [IsStreamingCompatible], [ImmCheck<2, ImmCheckShiftRight, 1>]>; -def SVINSR : SInst<"svinsr[_n_{d}]", "dds", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_insr">; +def SVINSR : SInst<"svinsr[_n_{d}]", "dds", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_insr", [IsStreamingCompatible]>; let TargetGuard = "sve,bf16" in { - def SVINSR_BF16 : SInst<"svinsr[_n_{d}]", "dds", "b", MergeNone, "aarch64_sve_insr">; + def SVINSR_BF16 : SInst<"svinsr[_n_{d}]", "dds", "b", MergeNone, "aarch64_sve_insr", [IsStreamingCompatible]>; } //////////////////////////////////////////////////////////////////////////////// // Integer reductions -def SVADDV_S : SInst<"svaddv[_{d}]", "lPd", "csil", MergeNone, "aarch64_sve_saddv">; -def SVADDV_U : SInst<"svaddv[_{d}]", "nPd", "UcUsUiUl", MergeNone, "aarch64_sve_uaddv">; -def SVANDV : SInst<"svandv[_{d}]", "sPd", "csilUcUsUiUl", MergeNone, "aarch64_sve_andv">; -def SVEORV : SInst<"sveorv[_{d}]", "sPd", "csilUcUsUiUl", MergeNone, "aarch64_sve_eorv">; -def SVMAXV_S : SInst<"svmaxv[_{d}]", "sPd", "csil", MergeNone, "aarch64_sve_smaxv">; -def SVMAXV_U : SInst<"svmaxv[_{d}]", "sPd", "UcUsUiUl", MergeNone, "aarch64_sve_umaxv">; -def SVMINV_S : SInst<"svminv[_{d}]", "sPd", "csil", MergeNone, "aarch64_sve_sminv">; -def SVMINV_U : SInst<"svminv[_{d}]", "sPd", "UcUsUiUl", MergeNone, "aarch64_sve_uminv">; -def SVORV : SInst<"svorv[_{d}]", "sPd", "csilUcUsUiUl", MergeNone, "aarch64_sve_orv">; +def SVADDV_S : SInst<"svaddv[_{d}]", "lPd", "csil", MergeNone, "aarch64_sve_saddv", [IsStreamingCompatible]>; +def SVADDV_U : SInst<"svaddv[_{d}]", "nPd", "UcUsUiUl", MergeNone, "aarch64_sve_uaddv", [IsStreamingCompatible]>; +def SVANDV : SInst<"svandv[_{d}]", "sPd", "csilUcUsUiUl", MergeNone, "aarch64_sve_andv", [IsStreamingCompatible]>; +def SVEORV : SInst<"sveorv[_{d}]", "sPd", "csilUcUsUiUl", MergeNone, "aarch64_sve_eorv", [IsStreamingCompatible]>; +def SVMAXV_S : SInst<"svmaxv[_{d}]", "sPd", "csil", MergeNone, "aarch64_sve_smaxv", [IsStreamingCompatible]>; +def SVMAXV_U : SInst<"svmaxv[_{d}]", "sPd", "UcUsUiUl", MergeNone, "aarch64_sve_umaxv", [IsStreamingCompatible]>; +def SVMINV_S : SInst<"svminv[_{d}]", "sPd", "csil", MergeNone, "aarch64_sve_sminv", [IsStreamingCompatible]>; +def SVMINV_U : SInst<"svminv[_{d}]", "sPd", "UcUsUiUl", MergeNone, "aarch64_sve_uminv", [IsStreamingCompatible]>; +def SVORV : SInst<"svorv[_{d}]", "sPd", "csilUcUsUiUl", MergeNone, "aarch64_sve_orv", [IsStreamingCompatible]>; //////////////////////////////////////////////////////////////////////////////// // Integer comparisons -def SVCMPEQ : SInst<"svcmpeq[_{d}]", "PPdd", "csilUcUsUiUl", MergeNone, "aarch64_sve_cmpeq">; -def SVCMPNE : SInst<"svcmpne[_{d}]", "PPdd", "csilUcUsUiUl", MergeNone, "aarch64_sve_cmpne">; -def SVCMPGE : SInst<"svcmpge[_{d}]", "PPdd", "csil", MergeNone, "aarch64_sve_cmpge">; -def SVCMPGT : SInst<"svcmpgt[_{d}]", "PPdd", "csil", MergeNone, "aarch64_sve_cmpgt">; -def SVCMPLE : SInst<"svcmple[_{d}]", "PPdd", "csil", MergeNone, "aarch64_sve_cmpge", [ReverseCompare]>; -def SVCMPLT : SInst<"svcmplt[_{d}]", "PPdd", "csil", MergeNone, "aarch64_sve_cmpgt", [ReverseCompare]>; -def SVCMPHI : SInst<"svcmpgt[_{d}]", "PPdd", "UcUsUiUl", MergeNone, "aarch64_sve_cmphi">; -def SVCMPHS : SInst<"svcmpge[_{d}]", "PPdd", "UcUsUiUl", MergeNone, "aarch64_sve_cmphs">; -def SVCMPLO : SInst<"svcmplt[_{d}]", "PPdd", "UcUsUiUl", MergeNone, "aarch64_sve_cmphi", [ReverseCompare]>; -def SVCMPLS : SInst<"svcmple[_{d}]", "PPdd", "UcUsUiUl", MergeNone, "aarch64_sve_cmphs", [ReverseCompare]>; - -def SVCMPEQ_N : SInst<"svcmpeq[_n_{d}]", "PPda", "csilUcUsUiUl", MergeNone, "aarch64_sve_cmpeq">; -def SVCMPNE_N : SInst<"svcmpne[_n_{d}]", "PPda", "csilUcUsUiUl", MergeNone, "aarch64_sve_cmpne">; -def SVCMPGE_N : SInst<"svcmpge[_n_{d}]", "PPda", "csil", MergeNone, "aarch64_sve_cmpge">; -def SVCMPGT_N : SInst<"svcmpgt[_n_{d}]", "PPda", "csil", MergeNone, "aarch64_sve_cmpgt">; -def SVCMPLE_N : SInst<"svcmple[_n_{d}]", "PPda", "csil", MergeNone, "aarch64_sve_cmpge", [ReverseCompare]>; -def SVCMPLT_N : SInst<"svcmplt[_n_{d}]", "PPda", "csil", MergeNone, "aarch64_sve_cmpgt", [ReverseCompare]>; -def SVCMPHS_N : SInst<"svcmpge[_n_{d}]", "PPda", "UcUsUiUl", MergeNone, "aarch64_sve_cmphs">; -def SVCMPHI_N : SInst<"svcmpgt[_n_{d}]", "PPda", "UcUsUiUl", MergeNone, "aarch64_sve_cmphi">; -def SVCMPLS_N : SInst<"svcmple[_n_{d}]", "PPda", "UcUsUiUl", MergeNone, "aarch64_sve_cmphs", [ReverseCompare]>; -def SVCMPLO_N : SInst<"svcmplt[_n_{d}]", "PPda", "UcUsUiUl", MergeNone, "aarch64_sve_cmphi", [ReverseCompare]>; - -def SVCMPEQ_WIDE : SInst<"svcmpeq_wide[_{d}]", "PPdw", "csi", MergeNone, "aarch64_sve_cmpeq_wide">; -def SVCMPNE_WIDE : SInst<"svcmpne_wide[_{d}]", "PPdw", "csi", MergeNone, "aarch64_sve_cmpne_wide">; -def SVCMPGE_WIDE : SInst<"svcmpge_wide[_{d}]", "PPdw", "csi", MergeNone, "aarch64_sve_cmpge_wide">; -def SVCMPGT_WIDE : SInst<"svcmpgt_wide[_{d}]", "PPdw", "csi", MergeNone, "aarch64_sve_cmpgt_wide">; -def SVCMPLE_WIDE : SInst<"svcmple_wide[_{d}]", "PPdw", "csi", MergeNone, "aarch64_sve_cmple_wide">; -def SVCMPLT_WIDE : SInst<"svcmplt_wide[_{d}]", "PPdw", "csi", MergeNone, "aarch64_sve_cmplt_wide">; -def SVCMPHI_WIDE : SInst<"svcmpgt_wide[_{d}]", "PPdw", "UcUsUi", MergeNone, "aarch64_sve_cmphi_wide">; -def SVCMPHS_WIDE : SInst<"svcmpge_wide[_{d}]", "PPdw", "UcUsUi", MergeNone, "aarch64_sve_cmphs_wide">; -def SVCMPLO_WIDE : SInst<"svcmplt_wide[_{d}]", "PPdw", "UcUsUi", MergeNone, "aarch64_sve_cmplo_wide">; -def SVCMPLS_WIDE : SInst<"svcmple_wide[_{d}]", "PPdw", "UcUsUi", MergeNone, "aarch64_sve_cmpls_wide">; - -def SVCMPEQ_WIDE_N : SInst<"svcmpeq_wide[_n_{d}]", "PPdj", "csi", MergeNone, "aarch64_sve_cmpeq_wide">; -def SVCMPNE_WIDE_N : SInst<"svcmpne_wide[_n_{d}]", "PPdj", "csi", MergeNone, "aarch64_sve_cmpne_wide">; -def SVCMPGE_WIDE_N : SInst<"svcmpge_wide[_n_{d}]", "PPdj", "csi", MergeNone, "aarch64_sve_cmpge_wide">; -def SVCMPGT_WIDE_N : SInst<"svcmpgt_wide[_n_{d}]", "PPdj", "csi", MergeNone, "aarch64_sve_cmpgt_wide">; -def SVCMPLE_WIDE_N : SInst<"svcmple_wide[_n_{d}]", "PPdj", "csi", MergeNone, "aarch64_sve_cmple_wide">; -def SVCMPLT_WIDE_N : SInst<"svcmplt_wide[_n_{d}]", "PPdj", "csi", MergeNone, "aarch64_sve_cmplt_wide">; -def SVCMPHS_WIDE_N : SInst<"svcmpge_wide[_n_{d}]", "PPdj", "UcUsUi", MergeNone, "aarch64_sve_cmphs_wide">; -def SVCMPHI_WIDE_N : SInst<"svcmpgt_wide[_n_{d}]", "PPdj", "UcUsUi", MergeNone, "aarch64_sve_cmphi_wide">; -def SVCMPLO_WIDE_N : SInst<"svcmplt_wide[_n_{d}]", "PPdj", "UcUsUi", MergeNone, "aarch64_sve_cmplo_wide">; -def SVCMPLS_WIDE_N : SInst<"svcmple_wide[_n_{d}]", "PPdj", "UcUsUi", MergeNone, "aarch64_sve_cmpls_wide">; +def SVCMPEQ : SInst<"svcmpeq[_{d}]", "PPdd", "csilUcUsUiUl", MergeNone, "aarch64_sve_cmpeq", [IsStreamingCompatible]>; +def SVCMPNE : SInst<"svcmpne[_{d}]", "PPdd", "csilUcUsUiUl", MergeNone, "aarch64_sve_cmpne", [IsStreamingCompatible]>; +def SVCMPGE : SInst<"svcmpge[_{d}]", "PPdd", "csil", MergeNone, "aarch64_sve_cmpge", [IsStreamingCompatible]>; +def SVCMPGT : SInst<"svcmpgt[_{d}]", "PPdd", "csil", MergeNone, "aarch64_sve_cmpgt", [IsStreamingCompatible]>; +def SVCMPLE : SInst<"svcmple[_{d}]", "PPdd", "csil", MergeNone, "aarch64_sve_cmpge", [ReverseCompare, IsStreamingCompatible]>; +def SVCMPLT : SInst<"svcmplt[_{d}]", "PPdd", "csil", MergeNone, "aarch64_sve_cmpgt", [ReverseCompare, IsStreamingCompatible]>; +def SVCMPHI : SInst<"svcmpgt[_{d}]", "PPdd", "UcUsUiUl", MergeNone, "aarch64_sve_cmphi", [IsStreamingCompatible]>; +def SVCMPHS : SInst<"svcmpge[_{d}]", "PPdd", "UcUsUiUl", MergeNone, "aarch64_sve_cmphs", [IsStreamingCompatible]>; +def SVCMPLO : SInst<"svcmplt[_{d}]", "PPdd", "UcUsUiUl", MergeNone, "aarch64_sve_cmphi", [ReverseCompare, IsStreamingCompatible]>; +def SVCMPLS : SInst<"svcmple[_{d}]", "PPdd", "UcUsUiUl", MergeNone, "aarch64_sve_cmphs", [ReverseCompare, IsStreamingCompatible]>; + +def SVCMPEQ_N : SInst<"svcmpeq[_n_{d}]", "PPda", "csilUcUsUiUl", MergeNone, "aarch64_sve_cmpeq", [IsStreamingCompatible]>; +def SVCMPNE_N : SInst<"svcmpne[_n_{d}]", "PPda", "csilUcUsUiUl", MergeNone, "aarch64_sve_cmpne", [IsStreamingCompatible]>; +def SVCMPGE_N : SInst<"svcmpge[_n_{d}]", "PPda", "csil", MergeNone, "aarch64_sve_cmpge", [IsStreamingCompatible]>; +def SVCMPGT_N : SInst<"svcmpgt[_n_{d}]", "PPda", "csil", MergeNone, "aarch64_sve_cmpgt", [IsStreamingCompatible]>; +def SVCMPLE_N : SInst<"svcmple[_n_{d}]", "PPda", "csil", MergeNone, "aarch64_sve_cmpge", [ReverseCompare, IsStreamingCompatible]>; +def SVCMPLT_N : SInst<"svcmplt[_n_{d}]", "PPda", "csil", MergeNone, "aarch64_sve_cmpgt", [ReverseCompare, IsStreamingCompatible]>; +def SVCMPHS_N : SInst<"svcmpge[_n_{d}]", "PPda", "UcUsUiUl", MergeNone, "aarch64_sve_cmphs", [IsStreamingCompatible]>; +def SVCMPHI_N : SInst<"svcmpgt[_n_{d}]", "PPda", "UcUsUiUl", MergeNone, "aarch64_sve_cmphi", [IsStreamingCompatible]>; +def SVCMPLS_N : SInst<"svcmple[_n_{d}]", "PPda", "UcUsUiUl", MergeNone, "aarch64_sve_cmphs", [ReverseCompare, IsStreamingCompatible]>; +def SVCMPLO_N : SInst<"svcmplt[_n_{d}]", "PPda", "UcUsUiUl", MergeNone, "aarch64_sve_cmphi", [ReverseCompare, IsStreamingCompatible]>; + +def SVCMPEQ_WIDE : SInst<"svcmpeq_wide[_{d}]", "PPdw", "csi", MergeNone, "aarch64_sve_cmpeq_wide", [IsStreamingCompatible]>; +def SVCMPNE_WIDE : SInst<"svcmpne_wide[_{d}]", "PPdw", "csi", MergeNone, "aarch64_sve_cmpne_wide", [IsStreamingCompatible]>; +def SVCMPGE_WIDE : SInst<"svcmpge_wide[_{d}]", "PPdw", "csi", MergeNone, "aarch64_sve_cmpge_wide", [IsStreamingCompatible]>; +def SVCMPGT_WIDE : SInst<"svcmpgt_wide[_{d}]", "PPdw", "csi", MergeNone, "aarch64_sve_cmpgt_wide", [IsStreamingCompatible]>; +def SVCMPLE_WIDE : SInst<"svcmple_wide[_{d}]", "PPdw", "csi", MergeNone, "aarch64_sve_cmple_wide", [IsStreamingCompatible]>; +def SVCMPLT_WIDE : SInst<"svcmplt_wide[_{d}]", "PPdw", "csi", MergeNone, "aarch64_sve_cmplt_wide", [IsStreamingCompatible]>; +def SVCMPHI_WIDE : SInst<"svcmpgt_wide[_{d}]", "PPdw", "UcUsUi", MergeNone, "aarch64_sve_cmphi_wide", [IsStreamingCompatible]>; +def SVCMPHS_WIDE : SInst<"svcmpge_wide[_{d}]", "PPdw", "UcUsUi", MergeNone, "aarch64_sve_cmphs_wide", [IsStreamingCompatible]>; +def SVCMPLO_WIDE : SInst<"svcmplt_wide[_{d}]", "PPdw", "UcUsUi", MergeNone, "aarch64_sve_cmplo_wide", [IsStreamingCompatible]>; +def SVCMPLS_WIDE : SInst<"svcmple_wide[_{d}]", "PPdw", "UcUsUi", MergeNone, "aarch64_sve_cmpls_wide", [IsStreamingCompatible]>; + +def SVCMPEQ_WIDE_N : SInst<"svcmpeq_wide[_n_{d}]", "PPdj", "csi", MergeNone, "aarch64_sve_cmpeq_wide", [IsStreamingCompatible]>; +def SVCMPNE_WIDE_N : SInst<"svcmpne_wide[_n_{d}]", "PPdj", "csi", MergeNone, "aarch64_sve_cmpne_wide", [IsStreamingCompatible]>; +def SVCMPGE_WIDE_N : SInst<"svcmpge_wide[_n_{d}]", "PPdj", "csi", MergeNone, "aarch64_sve_cmpge_wide", [IsStreamingCompatible]>; +def SVCMPGT_WIDE_N : SInst<"svcmpgt_wide[_n_{d}]", "PPdj", "csi", MergeNone, "aarch64_sve_cmpgt_wide", [IsStreamingCompatible]>; +def SVCMPLE_WIDE_N : SInst<"svcmple_wide[_n_{d}]", "PPdj", "csi", MergeNone, "aarch64_sve_cmple_wide", [IsStreamingCompatible]>; +def SVCMPLT_WIDE_N : SInst<"svcmplt_wide[_n_{d}]", "PPdj", "csi", MergeNone, "aarch64_sve_cmplt_wide", [IsStreamingCompatible]>; +def SVCMPHS_WIDE_N : SInst<"svcmpge_wide[_n_{d}]", "PPdj", "UcUsUi", MergeNone, "aarch64_sve_cmphs_wide", [IsStreamingCompatible]>; +def SVCMPHI_WIDE_N : SInst<"svcmpgt_wide[_n_{d}]", "PPdj", "UcUsUi", MergeNone, "aarch64_sve_cmphi_wide", [IsStreamingCompatible]>; +def SVCMPLO_WIDE_N : SInst<"svcmplt_wide[_n_{d}]", "PPdj", "UcUsUi", MergeNone, "aarch64_sve_cmplo_wide", [IsStreamingCompatible]>; +def SVCMPLS_WIDE_N : SInst<"svcmple_wide[_n_{d}]", "PPdj", "UcUsUi", MergeNone, "aarch64_sve_cmpls_wide", [IsStreamingCompatible]>; //////////////////////////////////////////////////////////////////////////////// // While comparisons -def SVWHILELE_S32 : SInst<"svwhilele_{d}[_{1}]", "Pkk", "PcPsPiPl", MergeNone, "aarch64_sve_whilele", [IsOverloadWhile]>; -def SVWHILELE_S64 : SInst<"svwhilele_{d}[_{1}]", "Pll", "PcPsPiPl", MergeNone, "aarch64_sve_whilele", [IsOverloadWhile]>; -def SVWHILELO_U32 : SInst<"svwhilelt_{d}[_{1}]", "Pmm", "PUcPUsPUiPUl", MergeNone, "aarch64_sve_whilelo", [IsOverloadWhile]>; -def SVWHILELO_U64 : SInst<"svwhilelt_{d}[_{1}]", "Pnn", "PUcPUsPUiPUl", MergeNone, "aarch64_sve_whilelo", [IsOverloadWhile]>; -def SVWHILELS_U32 : SInst<"svwhilele_{d}[_{1}]", "Pmm", "PUcPUsPUiPUl", MergeNone, "aarch64_sve_whilels", [IsOverloadWhile]>; -def SVWHILELS_U64 : SInst<"svwhilele_{d}[_{1}]", "Pnn", "PUcPUsPUiPUl", MergeNone, "aarch64_sve_whilels", [IsOverloadWhile]>; -def SVWHILELT_S32 : SInst<"svwhilelt_{d}[_{1}]", "Pkk", "PcPsPiPl", MergeNone, "aarch64_sve_whilelt", [IsOverloadWhile]>; -def SVWHILELT_S64 : SInst<"svwhilelt_{d}[_{1}]", "Pll", "PcPsPiPl", MergeNone, "aarch64_sve_whilelt", [IsOverloadWhile]>; +def SVWHILELE_S32 : SInst<"svwhilele_{d}[_{1}]", "Pkk", "PcPsPiPl", MergeNone, "aarch64_sve_whilele", [IsOverloadWhile, IsStreamingCompatible]>; +def SVWHILELE_S64 : SInst<"svwhilele_{d}[_{1}]", "Pll", "PcPsPiPl", MergeNone, "aarch64_sve_whilele", [IsOverloadWhile, IsStreamingCompatible]>; +def SVWHILELO_U32 : SInst<"svwhilelt_{d}[_{1}]", "Pmm", "PUcPUsPUiPUl", MergeNone, "aarch64_sve_whilelo", [IsOverloadWhile, IsStreamingCompatible]>; +def SVWHILELO_U64 : SInst<"svwhilelt_{d}[_{1}]", "Pnn", "PUcPUsPUiPUl", MergeNone, "aarch64_sve_whilelo", [IsOverloadWhile, IsStreamingCompatible]>; +def SVWHILELS_U32 : SInst<"svwhilele_{d}[_{1}]", "Pmm", "PUcPUsPUiPUl", MergeNone, "aarch64_sve_whilels", [IsOverloadWhile, IsStreamingCompatible]>; +def SVWHILELS_U64 : SInst<"svwhilele_{d}[_{1}]", "Pnn", "PUcPUsPUiPUl", MergeNone, "aarch64_sve_whilels", [IsOverloadWhile, IsStreamingCompatible]>; +def SVWHILELT_S32 : SInst<"svwhilelt_{d}[_{1}]", "Pkk", "PcPsPiPl", MergeNone, "aarch64_sve_whilelt", [IsOverloadWhile, IsStreamingCompatible]>; +def SVWHILELT_S64 : SInst<"svwhilelt_{d}[_{1}]", "Pll", "PcPsPiPl", MergeNone, "aarch64_sve_whilelt", [IsOverloadWhile, IsStreamingCompatible]>; //////////////////////////////////////////////////////////////////////////////// // Counting bit @@ -703,12 +703,12 @@ multiclass SInstCLS def _Z : SInst; } -defm SVCLS : SInstCLS<"svcls", "csil", "aarch64_sve_cls">; -defm SVCLZ : SInstCLS<"svclz", "csilUcUsUiUl", "aarch64_sve_clz">; -defm SVCNT : SInstCLS<"svcnt", "csilUcUsUiUlhfd", "aarch64_sve_cnt">; +defm SVCLS : SInstCLS<"svcls", "csil", "aarch64_sve_cls", [IsStreamingCompatible]>; +defm SVCLZ : SInstCLS<"svclz", "csilUcUsUiUl", "aarch64_sve_clz", [IsStreamingCompatible]>; +defm SVCNT : SInstCLS<"svcnt", "csilUcUsUiUlhfd", "aarch64_sve_cnt", [IsStreamingCompatible]>; let TargetGuard = "sve,bf16" in { - defm SVCNT_BF16 : SInstCLS<"svcnt", "b", "aarch64_sve_cnt">; + defm SVCNT_BF16 : SInstCLS<"svcnt", "b", "aarch64_sve_cnt", [IsStreamingCompatible]>; } //////////////////////////////////////////////////////////////////////////////// @@ -763,13 +763,13 @@ def SVTMAD : SInst<"svtmad[_{d}]", "dddi", "hfd", MergeNone, "aarch64_sve_ftma def SVTSMUL : SInst<"svtsmul[_{d}]", "ddu", "hfd", MergeNone, "aarch64_sve_ftsmul_x">; def SVTSSEL : SInst<"svtssel[_{d}]", "ddu", "hfd", MergeNone, "aarch64_sve_ftssel_x">; -def SVSCALE_M : SInst<"svscale[_{d}]", "dPdx", "hfd", MergeOp1, "aarch64_sve_fscale">; -def SVSCALE_X : SInst<"svscale[_{d}]", "dPdx", "hfd", MergeAny, "aarch64_sve_fscale">; -def SVSCALE_Z : SInst<"svscale[_{d}]", "dPdx", "hfd", MergeZero, "aarch64_sve_fscale">; +def SVSCALE_M : SInst<"svscale[_{d}]", "dPdx", "hfd", MergeOp1, "aarch64_sve_fscale", [IsStreamingCompatible]>; +def SVSCALE_X : SInst<"svscale[_{d}]", "dPdx", "hfd", MergeAny, "aarch64_sve_fscale", [IsStreamingCompatible]>; +def SVSCALE_Z : SInst<"svscale[_{d}]", "dPdx", "hfd", MergeZero, "aarch64_sve_fscale", [IsStreamingCompatible]>; -def SVSCALE_N_M : SInst<"svscale[_n_{d}]", "dPdK", "hfd", MergeOp1, "aarch64_sve_fscale">; -def SVSCALE_N_X : SInst<"svscale[_n_{d}]", "dPdK", "hfd", MergeAny, "aarch64_sve_fscale">; -def SVSCALE_N_Z : SInst<"svscale[_n_{d}]", "dPdK", "hfd", MergeZero, "aarch64_sve_fscale">; +def SVSCALE_N_M : SInst<"svscale[_n_{d}]", "dPdK", "hfd", MergeOp1, "aarch64_sve_fscale", [IsStreamingCompatible]>; +def SVSCALE_N_X : SInst<"svscale[_n_{d}]", "dPdK", "hfd", MergeAny, "aarch64_sve_fscale", [IsStreamingCompatible]>; +def SVSCALE_N_Z : SInst<"svscale[_n_{d}]", "dPdK", "hfd", MergeZero, "aarch64_sve_fscale", [IsStreamingCompatible]>; defm SVMAD_F : SInstZPZZZ<"svmad", "hfd", "aarch64_sve_fmad", "aarch64_sve_fmla_u", [ReverseMergeAnyAccOp]>; defm SVMLA_F : SInstZPZZZ<"svmla", "hfd", "aarch64_sve_fmla", "aarch64_sve_fmla_u">; @@ -780,42 +780,42 @@ defm SVNMLA_F : SInstZPZZZ<"svnmla", "hfd", "aarch64_sve_fnmla", "aarch64_sve_fn defm SVNMLS_F : SInstZPZZZ<"svnmls", "hfd", "aarch64_sve_fnmls", "aarch64_sve_fnmls_u">; defm SVNMSB_F : SInstZPZZZ<"svnmsb", "hfd", "aarch64_sve_fnmsb", "aarch64_sve_fnmls_u", [ReverseMergeAnyAccOp]>; -def SVCADD_M : SInst<"svcadd[_{d}]", "dPddi", "hfd", MergeOp1, "aarch64_sve_fcadd", [], [ImmCheck<3, ImmCheckComplexRot90_270>]>; -def SVCADD_X : SInst<"svcadd[_{d}]", "dPddi", "hfd", MergeAny, "aarch64_sve_fcadd", [], [ImmCheck<3, ImmCheckComplexRot90_270>]>; -def SVCADD_Z : SInst<"svcadd[_{d}]", "dPddi", "hfd", MergeZero, "aarch64_sve_fcadd", [], [ImmCheck<3, ImmCheckComplexRot90_270>]>; -def SVCMLA_M : SInst<"svcmla[_{d}]", "dPdddi", "hfd", MergeOp1, "aarch64_sve_fcmla", [], [ImmCheck<4, ImmCheckComplexRotAll90>]>; -def SVCMLA_X : SInst<"svcmla[_{d}]", "dPdddi", "hfd", MergeAny, "aarch64_sve_fcmla", [], [ImmCheck<4, ImmCheckComplexRotAll90>]>; -def SVCMLA_Z : SInst<"svcmla[_{d}]", "dPdddi", "hfd", MergeZero, "aarch64_sve_fcmla", [], [ImmCheck<4, ImmCheckComplexRotAll90>]>; +def SVCADD_M : SInst<"svcadd[_{d}]", "dPddi", "hfd", MergeOp1, "aarch64_sve_fcadd", [IsStreamingCompatible], [ImmCheck<3, ImmCheckComplexRot90_270>]>; +def SVCADD_X : SInst<"svcadd[_{d}]", "dPddi", "hfd", MergeAny, "aarch64_sve_fcadd", [IsStreamingCompatible], [ImmCheck<3, ImmCheckComplexRot90_270>]>; +def SVCADD_Z : SInst<"svcadd[_{d}]", "dPddi", "hfd", MergeZero, "aarch64_sve_fcadd", [IsStreamingCompatible], [ImmCheck<3, ImmCheckComplexRot90_270>]>; +def SVCMLA_M : SInst<"svcmla[_{d}]", "dPdddi", "hfd", MergeOp1, "aarch64_sve_fcmla", [IsStreamingCompatible], [ImmCheck<4, ImmCheckComplexRotAll90>]>; +def SVCMLA_X : SInst<"svcmla[_{d}]", "dPdddi", "hfd", MergeAny, "aarch64_sve_fcmla", [IsStreamingCompatible], [ImmCheck<4, ImmCheckComplexRotAll90>]>; +def SVCMLA_Z : SInst<"svcmla[_{d}]", "dPdddi", "hfd", MergeZero, "aarch64_sve_fcmla", [IsStreamingCompatible], [ImmCheck<4, ImmCheckComplexRotAll90>]>; -def SVCMLA_LANE : SInst<"svcmla_lane[_{d}]", "ddddii", "hf", MergeNone, "aarch64_sve_fcmla_lane", [], [ImmCheck<3, ImmCheckLaneIndexCompRotate, 2>, +def SVCMLA_LANE : SInst<"svcmla_lane[_{d}]", "ddddii", "hf", MergeNone, "aarch64_sve_fcmla_lane", [IsStreamingCompatible], [ImmCheck<3, ImmCheckLaneIndexCompRotate, 2>, ImmCheck<4, ImmCheckComplexRotAll90>]>; -def SVMLA_LANE : SInst<"svmla_lane[_{d}]", "ddddi", "hfd", MergeNone, "aarch64_sve_fmla_lane", [], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; -def SVMLS_LANE : SInst<"svmls_lane[_{d}]", "ddddi", "hfd", MergeNone, "aarch64_sve_fmls_lane", [], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; -def SVMUL_LANE : SInst<"svmul_lane[_{d}]", "dddi", "hfd", MergeNone, "aarch64_sve_fmul_lane", [], [ImmCheck<2, ImmCheckLaneIndex, 1>]>; +def SVMLA_LANE : SInst<"svmla_lane[_{d}]", "ddddi", "hfd", MergeNone, "aarch64_sve_fmla_lane", [IsStreamingCompatible], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; +def SVMLS_LANE : SInst<"svmls_lane[_{d}]", "ddddi", "hfd", MergeNone, "aarch64_sve_fmls_lane", [IsStreamingCompatible], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; +def SVMUL_LANE : SInst<"svmul_lane[_{d}]", "dddi", "hfd", MergeNone, "aarch64_sve_fmul_lane", [IsStreamingCompatible], [ImmCheck<2, ImmCheckLaneIndex, 1>]>; -def SVRECPE : SInst<"svrecpe[_{d}]", "dd", "hfd", MergeNone, "aarch64_sve_frecpe_x">; -def SVRECPS : SInst<"svrecps[_{d}]", "ddd", "hfd", MergeNone, "aarch64_sve_frecps_x">; -def SVRSQRTE : SInst<"svrsqrte[_{d}]", "dd", "hfd", MergeNone, "aarch64_sve_frsqrte_x">; -def SVRSQRTS : SInst<"svrsqrts[_{d}]", "ddd", "hfd", MergeNone, "aarch64_sve_frsqrts_x">; +def SVRECPE : SInst<"svrecpe[_{d}]", "dd", "hfd", MergeNone, "aarch64_sve_frecpe_x", [IsStreamingCompatible]>; +def SVRECPS : SInst<"svrecps[_{d}]", "ddd", "hfd", MergeNone, "aarch64_sve_frecps_x", [IsStreamingCompatible]>; +def SVRSQRTE : SInst<"svrsqrte[_{d}]", "dd", "hfd", MergeNone, "aarch64_sve_frsqrte_x", [IsStreamingCompatible]>; +def SVRSQRTS : SInst<"svrsqrts[_{d}]", "ddd", "hfd", MergeNone, "aarch64_sve_frsqrts_x", [IsStreamingCompatible]>; //////////////////////////////////////////////////////////////////////////////// // Floating-point reductions -def SVFADDA : SInst<"svadda[_{d}]", "sPsd", "hfd", MergeNone, "aarch64_sve_fadda">; -def SVFADDV : SInst<"svaddv[_{d}]", "sPd", "hfd", MergeNone, "aarch64_sve_faddv">; -def SVFMAXV : SInst<"svmaxv[_{d}]", "sPd", "hfd", MergeNone, "aarch64_sve_fmaxv">; -def SVFMAXNMV : SInst<"svmaxnmv[_{d}]", "sPd", "hfd", MergeNone, "aarch64_sve_fmaxnmv">; -def SVFMINV : SInst<"svminv[_{d}]", "sPd", "hfd", MergeNone, "aarch64_sve_fminv">; -def SVFMINNMV : SInst<"svminnmv[_{d}]", "sPd", "hfd", MergeNone, "aarch64_sve_fminnmv">; +def SVFADDA : SInst<"svadda[_{d}]", "sPsd", "hfd", MergeNone, "aarch64_sve_fadda", [IsStreamingCompatible]>; +def SVFADDV : SInst<"svaddv[_{d}]", "sPd", "hfd", MergeNone, "aarch64_sve_faddv", [IsStreamingCompatible]>; +def SVFMAXV : SInst<"svmaxv[_{d}]", "sPd", "hfd", MergeNone, "aarch64_sve_fmaxv", [IsStreamingCompatible]>; +def SVFMAXNMV : SInst<"svmaxnmv[_{d}]", "sPd", "hfd", MergeNone, "aarch64_sve_fmaxnmv", [IsStreamingCompatible]>; +def SVFMINV : SInst<"svminv[_{d}]", "sPd", "hfd", MergeNone, "aarch64_sve_fminv", [IsStreamingCompatible]>; +def SVFMINNMV : SInst<"svminnmv[_{d}]", "sPd", "hfd", MergeNone, "aarch64_sve_fminnmv", [IsStreamingCompatible]>; //////////////////////////////////////////////////////////////////////////////// // Floating-point comparisons -def SVACGE : SInst<"svacge[_{d}]", "PPdd", "hfd", MergeNone, "aarch64_sve_facge">; -def SVACGT : SInst<"svacgt[_{d}]", "PPdd", "hfd", MergeNone, "aarch64_sve_facgt">; -def SVACLE : SInst<"svacle[_{d}]", "PPdd", "hfd", MergeNone, "aarch64_sve_facge", [ReverseCompare]>; -def SVACLT : SInst<"svaclt[_{d}]", "PPdd", "hfd", MergeNone, "aarch64_sve_facgt", [ReverseCompare]>; -def SVCMPUO : SInst<"svcmpuo[_{d}]", "PPdd", "hfd", MergeNone, "aarch64_sve_fcmpuo">; +def SVACGE : SInst<"svacge[_{d}]", "PPdd", "hfd", MergeNone, "aarch64_sve_facge", [IsStreamingCompatible]>; +def SVACGT : SInst<"svacgt[_{d}]", "PPdd", "hfd", MergeNone, "aarch64_sve_facgt", [IsStreamingCompatible]>; +def SVACLE : SInst<"svacle[_{d}]", "PPdd", "hfd", MergeNone, "aarch64_sve_facge", [ReverseCompare, IsStreamingCompatible]>; +def SVACLT : SInst<"svaclt[_{d}]", "PPdd", "hfd", MergeNone, "aarch64_sve_facgt", [ReverseCompare, IsStreamingCompatible]>; +def SVCMPUO : SInst<"svcmpuo[_{d}]", "PPdd", "hfd", MergeNone, "aarch64_sve_fcmpuo", [IsStreamingCompatible]>; def SVACGE_N : SInst<"svacge[_n_{d}]", "PPda", "hfd", MergeNone, "aarch64_sve_facge">; def SVACGT_N : SInst<"svacgt[_n_{d}]", "PPda", "hfd", MergeNone, "aarch64_sve_facgt">; @@ -823,19 +823,19 @@ def SVACLE_N : SInst<"svacle[_n_{d}]", "PPda", "hfd", MergeNone, "aarch64_sve_ def SVACLT_N : SInst<"svaclt[_n_{d}]", "PPda", "hfd", MergeNone, "aarch64_sve_facgt", [ReverseCompare]>; def SVCMPUO_N : SInst<"svcmpuo[_n_{d}]", "PPda", "hfd", MergeNone, "aarch64_sve_fcmpuo">; -def SVCMPEQ_F : SInst<"svcmpeq[_{d}]", "PPdd", "hfd", MergeNone, "aarch64_sve_fcmpeq">; -def SVCMPNE_F : SInst<"svcmpne[_{d}]", "PPdd", "hfd", MergeNone, "aarch64_sve_fcmpne">; -def SVCMPGE_F : SInst<"svcmpge[_{d}]", "PPdd", "hfd", MergeNone, "aarch64_sve_fcmpge">; -def SVCMPGT_F : SInst<"svcmpgt[_{d}]", "PPdd", "hfd", MergeNone, "aarch64_sve_fcmpgt">; -def SVCMPLE_F : SInst<"svcmple[_{d}]", "PPdd", "hfd", MergeNone, "aarch64_sve_fcmpge", [ReverseCompare]>; -def SVCMPLT_F : SInst<"svcmplt[_{d}]", "PPdd", "hfd", MergeNone, "aarch64_sve_fcmpgt", [ReverseCompare]>; +def SVCMPEQ_F : SInst<"svcmpeq[_{d}]", "PPdd", "hfd", MergeNone, "aarch64_sve_fcmpeq", [IsStreamingCompatible]>; +def SVCMPNE_F : SInst<"svcmpne[_{d}]", "PPdd", "hfd", MergeNone, "aarch64_sve_fcmpne", [IsStreamingCompatible]>; +def SVCMPGE_F : SInst<"svcmpge[_{d}]", "PPdd", "hfd", MergeNone, "aarch64_sve_fcmpge", [IsStreamingCompatible]>; +def SVCMPGT_F : SInst<"svcmpgt[_{d}]", "PPdd", "hfd", MergeNone, "aarch64_sve_fcmpgt", [IsStreamingCompatible]>; +def SVCMPLE_F : SInst<"svcmple[_{d}]", "PPdd", "hfd", MergeNone, "aarch64_sve_fcmpge", [ReverseCompare, IsStreamingCompatible]>; +def SVCMPLT_F : SInst<"svcmplt[_{d}]", "PPdd", "hfd", MergeNone, "aarch64_sve_fcmpgt", [ReverseCompare, IsStreamingCompatible]>; -def SVCMPEQ_F_N : SInst<"svcmpeq[_n_{d}]", "PPda", "hfd", MergeNone, "aarch64_sve_fcmpeq">; -def SVCMPNE_F_N : SInst<"svcmpne[_n_{d}]", "PPda", "hfd", MergeNone, "aarch64_sve_fcmpne">; -def SVCMPGE_F_N : SInst<"svcmpge[_n_{d}]", "PPda", "hfd", MergeNone, "aarch64_sve_fcmpge">; -def SVCMPGT_F_N : SInst<"svcmpgt[_n_{d}]", "PPda", "hfd", MergeNone, "aarch64_sve_fcmpgt">; -def SVCMPLE_F_N : SInst<"svcmple[_n_{d}]", "PPda", "hfd", MergeNone, "aarch64_sve_fcmpge", [ReverseCompare]>; -def SVCMPLT_F_N : SInst<"svcmplt[_n_{d}]", "PPda", "hfd", MergeNone, "aarch64_sve_fcmpgt", [ReverseCompare]>; +def SVCMPEQ_F_N : SInst<"svcmpeq[_n_{d}]", "PPda", "hfd", MergeNone, "aarch64_sve_fcmpeq", [IsStreamingCompatible]>; +def SVCMPNE_F_N : SInst<"svcmpne[_n_{d}]", "PPda", "hfd", MergeNone, "aarch64_sve_fcmpne", [IsStreamingCompatible]>; +def SVCMPGE_F_N : SInst<"svcmpge[_n_{d}]", "PPda", "hfd", MergeNone, "aarch64_sve_fcmpge", [IsStreamingCompatible]>; +def SVCMPGT_F_N : SInst<"svcmpgt[_n_{d}]", "PPda", "hfd", MergeNone, "aarch64_sve_fcmpgt", [IsStreamingCompatible]>; +def SVCMPLE_F_N : SInst<"svcmple[_n_{d}]", "PPda", "hfd", MergeNone, "aarch64_sve_fcmpge", [ReverseCompare, IsStreamingCompatible]>; +def SVCMPLT_F_N : SInst<"svcmplt[_n_{d}]", "PPda", "hfd", MergeNone, "aarch64_sve_fcmpgt", [ReverseCompare, IsStreamingCompatible]>; //////////////////////////////////////////////////////////////////////////////// // Floating-point conversions @@ -843,16 +843,16 @@ def SVCMPLT_F_N : SInst<"svcmplt[_n_{d}]", "PPda", "hfd", MergeNone, "aarch64_sv multiclass SInstCvtMXZ< string name, string m_types, string xz_types, string types, string intrinsic, list flags = [IsOverloadNone]> { - def _M : SInst; - def _X : SInst; - def _Z : SInst; + def _M : SInst; + def _X : SInst; + def _Z : SInst; } multiclass SInstCvtMX flags = [IsOverloadNone]> { - def _M : SInst; - def _X : SInst; + def _M : SInst; + def _X : SInst; } // svcvt_s##_f16 @@ -866,7 +866,7 @@ defm SVFCVTZS_S64_F32 : SInstCvtMXZ<"svcvt_s64[_f32]", "ddPM", "dPM", "l", "aar let TargetGuard = "sve,bf16" in { defm SVCVT_BF16_F32 : SInstCvtMXZ<"svcvt_bf16[_f32]", "ddPM", "dPM", "b", "aarch64_sve_fcvt_bf16f32">; - def SVCVTNT_BF16_F32 : SInst<"svcvtnt_bf16[_f32]", "ddPM", "b", MergeOp1, "aarch64_sve_fcvtnt_bf16f32", [IsOverloadNone]>; + def SVCVTNT_BF16_F32 : SInst<"svcvtnt_bf16[_f32]", "ddPM", "b", MergeOp1, "aarch64_sve_fcvtnt_bf16f32", [IsOverloadNone, IsStreamingCompatible]>; } // svcvt_s##_f64 @@ -930,11 +930,11 @@ defm SVCVTLT_F64 : SInstCvtMX<"svcvtlt_f64[_f32]", "ddPh", "dPh", "d", "aarc defm SVCVTX_F32 : SInstCvtMXZ<"svcvtx_f32[_f64]", "MMPd", "MPd", "d", "aarch64_sve_fcvtx_f32f64">; -def SVCVTNT_F32 : SInst<"svcvtnt_f16[_f32]", "hhPd", "f", MergeOp1, "aarch64_sve_fcvtnt_f16f32", [IsOverloadNone]>; -def SVCVTNT_F64 : SInst<"svcvtnt_f32[_f64]", "hhPd", "d", MergeOp1, "aarch64_sve_fcvtnt_f32f64", [IsOverloadNone]>; +def SVCVTNT_F32 : SInst<"svcvtnt_f16[_f32]", "hhPd", "f", MergeOp1, "aarch64_sve_fcvtnt_f16f32", [IsOverloadNone, IsStreamingCompatible]>; +def SVCVTNT_F64 : SInst<"svcvtnt_f32[_f64]", "hhPd", "d", MergeOp1, "aarch64_sve_fcvtnt_f32f64", [IsOverloadNone, IsStreamingCompatible]>; // SVCVTNT_X : Implemented as macro by SveEmitter.cpp -def SVCVTXNT_F32 : SInst<"svcvtxnt_f32[_f64]", "MMPd", "d", MergeOp1, "aarch64_sve_fcvtxnt_f32f64", [IsOverloadNone]>; +def SVCVTXNT_F32 : SInst<"svcvtxnt_f32[_f64]", "MMPd", "d", MergeOp1, "aarch64_sve_fcvtxnt_f32f64", [IsOverloadNone, IsStreamingCompatible]>; // SVCVTXNT_X_F32 : Implemented as macro by SveEmitter.cpp } @@ -943,9 +943,9 @@ def SVCVTXNT_F32 : SInst<"svcvtxnt_f32[_f64]", "MMPd", "d", MergeOp1, "aarch6 // Permutations and selection multiclass SVEPerm { - def : SInst; + def : SInst; let TargetGuard = "sve,bf16" in { - def: SInst; + def: SInst; } } @@ -969,81 +969,81 @@ def SVDUPQ_LANE : SInst<"svdupq_lane[_{d}]", "ddn", "csilUcUsUiUlhfd", MergeNo let TargetGuard = "sve,bf16" in { def SVDUPQ_LANE_BF16 : SInst<"svdupq_lane[_{d}]", "ddn", "b", MergeNone, "aarch64_sve_dupq_lane">; } -def SVEXT : SInst<"svext[_{d}]", "dddi", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_ext", [], [ImmCheck<2, ImmCheckExtract, 1>]>; +def SVEXT : SInst<"svext[_{d}]", "dddi", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_ext", [IsStreamingCompatible], [ImmCheck<2, ImmCheckExtract, 1>]>; defm SVLASTA : SVEPerm<"svlasta[_{d}]", "sPd", "aarch64_sve_lasta">; defm SVLASTB : SVEPerm<"svlastb[_{d}]", "sPd", "aarch64_sve_lastb">; -def SVREV : SInst<"svrev[_{d}]", "dd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_rev">; -def SVSEL : SInst<"svsel[_{d}]", "dPdd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_sel">; -def SVSPLICE : SInst<"svsplice[_{d}]", "dPdd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_splice">; -def SVTBL : SInst<"svtbl[_{d}]", "ddu", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_tbl">; +def SVREV : SInst<"svrev[_{d}]", "dd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_rev", [IsStreamingCompatible]>; +def SVSEL : SInst<"svsel[_{d}]", "dPdd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_sel", [IsStreamingCompatible]>; +def SVSPLICE : SInst<"svsplice[_{d}]", "dPdd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_splice", [IsStreamingCompatible]>; +def SVTBL : SInst<"svtbl[_{d}]", "ddu", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_tbl", [IsStreamingCompatible]>; let TargetGuard = "sve,bf16" in { - def SVTBL_BF16 : SInst<"svtbl[_{d}]", "ddu", "b", MergeNone, "aarch64_sve_tbl">; + def SVTBL_BF16 : SInst<"svtbl[_{d}]", "ddu", "b", MergeNone, "aarch64_sve_tbl", [IsStreamingCompatible]>; } -def SVTRN1 : SInst<"svtrn1[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_trn1">; -def SVTRN2 : SInst<"svtrn2[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_trn2">; -def SVUNPKHI_S : SInst<"svunpkhi[_{d}]", "dh", "sil", MergeNone, "aarch64_sve_sunpkhi">; -def SVUNPKHI_U : SInst<"svunpkhi[_{d}]", "dh", "UsUiUl", MergeNone, "aarch64_sve_uunpkhi">; -def SVUNPKLO_S : SInst<"svunpklo[_{d}]", "dh", "sil", MergeNone, "aarch64_sve_sunpklo">; -def SVUNPKLO_U : SInst<"svunpklo[_{d}]", "dh", "UsUiUl", MergeNone, "aarch64_sve_uunpklo">; -def SVUZP1 : SInst<"svuzp1[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_uzp1">; -def SVUZP2 : SInst<"svuzp2[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_uzp2">; -def SVZIP1 : SInst<"svzip1[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_zip1">; -def SVZIP2 : SInst<"svzip2[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_zip2">; +def SVTRN1 : SInst<"svtrn1[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_trn1", [IsStreamingCompatible]>; +def SVTRN2 : SInst<"svtrn2[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_trn2", [IsStreamingCompatible]>; +def SVUNPKHI_S : SInst<"svunpkhi[_{d}]", "dh", "sil", MergeNone, "aarch64_sve_sunpkhi", [IsStreamingCompatible]>; +def SVUNPKHI_U : SInst<"svunpkhi[_{d}]", "dh", "UsUiUl", MergeNone, "aarch64_sve_uunpkhi", [IsStreamingCompatible]>; +def SVUNPKLO_S : SInst<"svunpklo[_{d}]", "dh", "sil", MergeNone, "aarch64_sve_sunpklo", [IsStreamingCompatible]>; +def SVUNPKLO_U : SInst<"svunpklo[_{d}]", "dh", "UsUiUl", MergeNone, "aarch64_sve_uunpklo", [IsStreamingCompatible]>; +def SVUZP1 : SInst<"svuzp1[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_uzp1", [IsStreamingCompatible]>; +def SVUZP2 : SInst<"svuzp2[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_uzp2", [IsStreamingCompatible]>; +def SVZIP1 : SInst<"svzip1[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_zip1", [IsStreamingCompatible]>; +def SVZIP2 : SInst<"svzip2[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_zip2", [IsStreamingCompatible]>; let TargetGuard = "sve,bf16" in { -def SVEXT_BF16 : SInst<"svext[_{d}]", "dddi", "b", MergeNone, "aarch64_sve_ext", [], [ImmCheck<2, ImmCheckExtract, 1>]>; -def SVREV_BF16 : SInst<"svrev[_{d}]", "dd", "b", MergeNone, "aarch64_sve_rev">; -def SVSEL_BF16 : SInst<"svsel[_{d}]", "dPdd", "b", MergeNone, "aarch64_sve_sel">; -def SVSPLICE_BF16 : SInst<"svsplice[_{d}]", "dPdd", "b", MergeNone, "aarch64_sve_splice">; -def SVTRN1_BF16 : SInst<"svtrn1[_{d}]", "ddd", "b", MergeNone, "aarch64_sve_trn1">; -def SVTRN2_BF16 : SInst<"svtrn2[_{d}]", "ddd", "b", MergeNone, "aarch64_sve_trn2">; -def SVUZP1_BF16 : SInst<"svuzp1[_{d}]", "ddd", "b", MergeNone, "aarch64_sve_uzp1">; -def SVUZP2_BF16 : SInst<"svuzp2[_{d}]", "ddd", "b", MergeNone, "aarch64_sve_uzp2">; -def SVZIP1_BF16 : SInst<"svzip1[_{d}]", "ddd", "b", MergeNone, "aarch64_sve_zip1">; -def SVZIP2_BF16 : SInst<"svzip2[_{d}]", "ddd", "b", MergeNone, "aarch64_sve_zip2">; -} - -def SVREV_B8 : SInst<"svrev_b8", "PP", "Pc", MergeNone, "aarch64_sve_rev">; -def SVREV_B16 : SInst<"svrev_b16", "PP", "Pc", MergeNone, "aarch64_sve_rev_b16", [IsOverloadNone]>; -def SVREV_B32 : SInst<"svrev_b32", "PP", "Pc", MergeNone, "aarch64_sve_rev_b32", [IsOverloadNone]>; -def SVREV_B64 : SInst<"svrev_b64", "PP", "Pc", MergeNone, "aarch64_sve_rev_b64", [IsOverloadNone]>; -def SVSEL_B : SInst<"svsel[_b]", "PPPP", "Pc", MergeNone, "aarch64_sve_sel">; -def SVTRN1_B8 : SInst<"svtrn1_b8", "PPP", "Pc", MergeNone, "aarch64_sve_trn1">; -def SVTRN1_B16 : SInst<"svtrn1_b16", "PPP", "Pc", MergeNone, "aarch64_sve_trn1_b16", [IsOverloadNone]>; -def SVTRN1_B32 : SInst<"svtrn1_b32", "PPP", "Pc", MergeNone, "aarch64_sve_trn1_b32", [IsOverloadNone]>; -def SVTRN1_B64 : SInst<"svtrn1_b64", "PPP", "Pc", MergeNone, "aarch64_sve_trn1_b64", [IsOverloadNone]>; -def SVTRN2_B8 : SInst<"svtrn2_b8", "PPP", "Pc", MergeNone, "aarch64_sve_trn2">; -def SVTRN2_B16 : SInst<"svtrn2_b16", "PPP", "Pc", MergeNone, "aarch64_sve_trn2_b16", [IsOverloadNone]>; -def SVTRN2_B32 : SInst<"svtrn2_b32", "PPP", "Pc", MergeNone, "aarch64_sve_trn2_b32", [IsOverloadNone]>; -def SVTRN2_B64 : SInst<"svtrn2_b64", "PPP", "Pc", MergeNone, "aarch64_sve_trn2_b64", [IsOverloadNone]>; -def SVPUNPKHI : SInst<"svunpkhi[_b]", "PP", "Pc", MergeNone, "aarch64_sve_punpkhi">; -def SVPUNPKLO : SInst<"svunpklo[_b]", "PP", "Pc", MergeNone, "aarch64_sve_punpklo">; -def SVUZP1_B8 : SInst<"svuzp1_b8", "PPP", "Pc", MergeNone, "aarch64_sve_uzp1">; -def SVUZP1_B16 : SInst<"svuzp1_b16", "PPP", "Pc", MergeNone, "aarch64_sve_uzp1_b16", [IsOverloadNone]>; -def SVUZP1_B32 : SInst<"svuzp1_b32", "PPP", "Pc", MergeNone, "aarch64_sve_uzp1_b32", [IsOverloadNone]>; -def SVUZP1_B64 : SInst<"svuzp1_b64", "PPP", "Pc", MergeNone, "aarch64_sve_uzp1_b64", [IsOverloadNone]>; -def SVUZP2_B8 : SInst<"svuzp2_b8", "PPP", "Pc", MergeNone, "aarch64_sve_uzp2">; -def SVUZP2_B16 : SInst<"svuzp2_b16", "PPP", "Pc", MergeNone, "aarch64_sve_uzp2_b16", [IsOverloadNone]>; -def SVUZP2_B32 : SInst<"svuzp2_b32", "PPP", "Pc", MergeNone, "aarch64_sve_uzp2_b32", [IsOverloadNone]>; -def SVUZP2_B64 : SInst<"svuzp2_b64", "PPP", "Pc", MergeNone, "aarch64_sve_uzp2_b64", [IsOverloadNone]>; -def SVZIP1_B8 : SInst<"svzip1_b8", "PPP", "Pc", MergeNone, "aarch64_sve_zip1">; -def SVZIP1_B16 : SInst<"svzip1_b16", "PPP", "Pc", MergeNone, "aarch64_sve_zip1_b16", [IsOverloadNone]>; -def SVZIP1_B32 : SInst<"svzip1_b32", "PPP", "Pc", MergeNone, "aarch64_sve_zip1_b32", [IsOverloadNone]>; -def SVZIP1_B64 : SInst<"svzip1_b64", "PPP", "Pc", MergeNone, "aarch64_sve_zip1_b64", [IsOverloadNone]>; -def SVZIP2_B : SInst<"svzip2_b8", "PPP", "Pc", MergeNone, "aarch64_sve_zip2">; -def SVZIP2_B16 : SInst<"svzip2_b16", "PPP", "Pc", MergeNone, "aarch64_sve_zip2_b16", [IsOverloadNone]>; -def SVZIP2_B32 : SInst<"svzip2_b32", "PPP", "Pc", MergeNone, "aarch64_sve_zip2_b32", [IsOverloadNone]>; -def SVZIP2_B64 : SInst<"svzip2_b64", "PPP", "Pc", MergeNone, "aarch64_sve_zip2_b64", [IsOverloadNone]>; +def SVEXT_BF16 : SInst<"svext[_{d}]", "dddi", "b", MergeNone, "aarch64_sve_ext", [IsStreamingCompatible], [ImmCheck<2, ImmCheckExtract, 1>]>; +def SVREV_BF16 : SInst<"svrev[_{d}]", "dd", "b", MergeNone, "aarch64_sve_rev", [IsStreamingCompatible]>; +def SVSEL_BF16 : SInst<"svsel[_{d}]", "dPdd", "b", MergeNone, "aarch64_sve_sel", [IsStreamingCompatible]>; +def SVSPLICE_BF16 : SInst<"svsplice[_{d}]", "dPdd", "b", MergeNone, "aarch64_sve_splice", [IsStreamingCompatible]>; +def SVTRN1_BF16 : SInst<"svtrn1[_{d}]", "ddd", "b", MergeNone, "aarch64_sve_trn1", [IsStreamingCompatible]>; +def SVTRN2_BF16 : SInst<"svtrn2[_{d}]", "ddd", "b", MergeNone, "aarch64_sve_trn2", [IsStreamingCompatible]>; +def SVUZP1_BF16 : SInst<"svuzp1[_{d}]", "ddd", "b", MergeNone, "aarch64_sve_uzp1", [IsStreamingCompatible]>; +def SVUZP2_BF16 : SInst<"svuzp2[_{d}]", "ddd", "b", MergeNone, "aarch64_sve_uzp2", [IsStreamingCompatible]>; +def SVZIP1_BF16 : SInst<"svzip1[_{d}]", "ddd", "b", MergeNone, "aarch64_sve_zip1", [IsStreamingCompatible]>; +def SVZIP2_BF16 : SInst<"svzip2[_{d}]", "ddd", "b", MergeNone, "aarch64_sve_zip2", [IsStreamingCompatible]>; +} + +def SVREV_B8 : SInst<"svrev_b8", "PP", "Pc", MergeNone, "aarch64_sve_rev", [IsStreamingCompatible]>; +def SVREV_B16 : SInst<"svrev_b16", "PP", "Pc", MergeNone, "aarch64_sve_rev_b16", [IsOverloadNone, IsStreamingCompatible]>; +def SVREV_B32 : SInst<"svrev_b32", "PP", "Pc", MergeNone, "aarch64_sve_rev_b32", [IsOverloadNone, IsStreamingCompatible]>; +def SVREV_B64 : SInst<"svrev_b64", "PP", "Pc", MergeNone, "aarch64_sve_rev_b64", [IsOverloadNone, IsStreamingCompatible]>; +def SVSEL_B : SInst<"svsel[_b]", "PPPP", "Pc", MergeNone, "aarch64_sve_sel", [IsStreamingCompatible]>; +def SVTRN1_B8 : SInst<"svtrn1_b8", "PPP", "Pc", MergeNone, "aarch64_sve_trn1", [IsStreamingCompatible]>; +def SVTRN1_B16 : SInst<"svtrn1_b16", "PPP", "Pc", MergeNone, "aarch64_sve_trn1_b16", [IsOverloadNone, IsStreamingCompatible]>; +def SVTRN1_B32 : SInst<"svtrn1_b32", "PPP", "Pc", MergeNone, "aarch64_sve_trn1_b32", [IsOverloadNone, IsStreamingCompatible]>; +def SVTRN1_B64 : SInst<"svtrn1_b64", "PPP", "Pc", MergeNone, "aarch64_sve_trn1_b64", [IsOverloadNone, IsStreamingCompatible]>; +def SVTRN2_B8 : SInst<"svtrn2_b8", "PPP", "Pc", MergeNone, "aarch64_sve_trn2", [IsStreamingCompatible]>; +def SVTRN2_B16 : SInst<"svtrn2_b16", "PPP", "Pc", MergeNone, "aarch64_sve_trn2_b16", [IsOverloadNone, IsStreamingCompatible]>; +def SVTRN2_B32 : SInst<"svtrn2_b32", "PPP", "Pc", MergeNone, "aarch64_sve_trn2_b32", [IsOverloadNone, IsStreamingCompatible]>; +def SVTRN2_B64 : SInst<"svtrn2_b64", "PPP", "Pc", MergeNone, "aarch64_sve_trn2_b64", [IsOverloadNone, IsStreamingCompatible]>; +def SVPUNPKHI : SInst<"svunpkhi[_b]", "PP", "Pc", MergeNone, "aarch64_sve_punpkhi", [IsStreamingCompatible]>; +def SVPUNPKLO : SInst<"svunpklo[_b]", "PP", "Pc", MergeNone, "aarch64_sve_punpklo", [IsStreamingCompatible]>; +def SVUZP1_B8 : SInst<"svuzp1_b8", "PPP", "Pc", MergeNone, "aarch64_sve_uzp1", [IsStreamingCompatible]>; +def SVUZP1_B16 : SInst<"svuzp1_b16", "PPP", "Pc", MergeNone, "aarch64_sve_uzp1_b16", [IsOverloadNone, IsStreamingCompatible]>; +def SVUZP1_B32 : SInst<"svuzp1_b32", "PPP", "Pc", MergeNone, "aarch64_sve_uzp1_b32", [IsOverloadNone, IsStreamingCompatible]>; +def SVUZP1_B64 : SInst<"svuzp1_b64", "PPP", "Pc", MergeNone, "aarch64_sve_uzp1_b64", [IsOverloadNone, IsStreamingCompatible]>; +def SVUZP2_B8 : SInst<"svuzp2_b8", "PPP", "Pc", MergeNone, "aarch64_sve_uzp2", [IsStreamingCompatible]>; +def SVUZP2_B16 : SInst<"svuzp2_b16", "PPP", "Pc", MergeNone, "aarch64_sve_uzp2_b16", [IsOverloadNone, IsStreamingCompatible]>; +def SVUZP2_B32 : SInst<"svuzp2_b32", "PPP", "Pc", MergeNone, "aarch64_sve_uzp2_b32", [IsOverloadNone, IsStreamingCompatible]>; +def SVUZP2_B64 : SInst<"svuzp2_b64", "PPP", "Pc", MergeNone, "aarch64_sve_uzp2_b64", [IsOverloadNone, IsStreamingCompatible]>; +def SVZIP1_B8 : SInst<"svzip1_b8", "PPP", "Pc", MergeNone, "aarch64_sve_zip1", [IsStreamingCompatible]>; +def SVZIP1_B16 : SInst<"svzip1_b16", "PPP", "Pc", MergeNone, "aarch64_sve_zip1_b16", [IsOverloadNone, IsStreamingCompatible]>; +def SVZIP1_B32 : SInst<"svzip1_b32", "PPP", "Pc", MergeNone, "aarch64_sve_zip1_b32", [IsOverloadNone, IsStreamingCompatible]>; +def SVZIP1_B64 : SInst<"svzip1_b64", "PPP", "Pc", MergeNone, "aarch64_sve_zip1_b64", [IsOverloadNone, IsStreamingCompatible]>; +def SVZIP2_B : SInst<"svzip2_b8", "PPP", "Pc", MergeNone, "aarch64_sve_zip2", [IsStreamingCompatible]>; +def SVZIP2_B16 : SInst<"svzip2_b16", "PPP", "Pc", MergeNone, "aarch64_sve_zip2_b16", [IsOverloadNone, IsStreamingCompatible]>; +def SVZIP2_B32 : SInst<"svzip2_b32", "PPP", "Pc", MergeNone, "aarch64_sve_zip2_b32", [IsOverloadNone, IsStreamingCompatible]>; +def SVZIP2_B64 : SInst<"svzip2_b64", "PPP", "Pc", MergeNone, "aarch64_sve_zip2_b64", [IsOverloadNone, IsStreamingCompatible]>; //////////////////////////////////////////////////////////////////////////////// // Predicate creation -def SVPFALSE : SInst<"svpfalse[_b]", "Pv", "", MergeNone, "", [IsOverloadNone]>; +def SVPFALSE : SInst<"svpfalse[_b]", "Pv", "", MergeNone, "", [IsOverloadNone, IsStreamingCompatible]>; -def SVPTRUE_PAT : SInst<"svptrue_pat_{d}", "PI", "PcPsPiPl", MergeNone, "aarch64_sve_ptrue">; -def SVPTRUE : SInst<"svptrue_{d}", "Pv", "PcPsPiPl", MergeNone, "aarch64_sve_ptrue", [IsAppendSVALL]>; +def SVPTRUE_PAT : SInst<"svptrue_pat_{d}", "PI", "PcPsPiPl", MergeNone, "aarch64_sve_ptrue", [IsStreamingCompatible]>; +def SVPTRUE : SInst<"svptrue_{d}", "Pv", "PcPsPiPl", MergeNone, "aarch64_sve_ptrue", [IsAppendSVALL, IsStreamingCompatible]>; def SVDUPQ_B8 : SInst<"svdupq[_n]_{d}", "Pssssssssssssssss", "Pc", MergeNone>; def SVDUPQ_B16 : SInst<"svdupq[_n]_{d}", "Pssssssss", "Ps", MergeNone>; @@ -1055,33 +1055,33 @@ def SVDUP_N_B : SInst<"svdup[_n]_{d}", "Ps", "PcPsPiPl", MergeNone>; //////////////////////////////////////////////////////////////////////////////// // Predicate operations -def SVAND_B_Z : SInst<"svand[_b]_z", "PPPP", "Pc", MergeNone, "aarch64_sve_and_z">; -def SVBIC_B_Z : SInst<"svbic[_b]_z", "PPPP", "Pc", MergeNone, "aarch64_sve_bic_z">; -def SVEOR_B_Z : SInst<"sveor[_b]_z", "PPPP", "Pc", MergeNone, "aarch64_sve_eor_z">; -def SVMOV_B_Z : SInst<"svmov[_b]_z", "PPP", "Pc", MergeNone>; // Uses custom expansion -def SVNAND_B_Z : SInst<"svnand[_b]_z", "PPPP", "Pc", MergeNone, "aarch64_sve_nand_z">; -def SVNOR_B_Z : SInst<"svnor[_b]_z", "PPPP", "Pc", MergeNone, "aarch64_sve_nor_z">; -def SVNOT_B_Z : SInst<"svnot[_b]_z", "PPP", "Pc", MergeNone>; // Uses custom expansion -def SVORN_B_Z : SInst<"svorn[_b]_z", "PPPP", "Pc", MergeNone, "aarch64_sve_orn_z">; -def SVORR_B_Z : SInst<"svorr[_b]_z", "PPPP", "Pc", MergeNone, "aarch64_sve_orr_z">; - -def SVBRKA : SInst<"svbrka[_b]_m", "PPPP", "Pc", MergeNone, "aarch64_sve_brka">; -def SVBRKA_Z : SInst<"svbrka[_b]_z", "PPP", "Pc", MergeNone, "aarch64_sve_brka_z">; -def SVBRKB : SInst<"svbrkb[_b]_m", "PPPP", "Pc", MergeNone, "aarch64_sve_brkb">; -def SVBRKB_Z : SInst<"svbrkb[_b]_z", "PPP", "Pc", MergeNone, "aarch64_sve_brkb_z">; -def SVBRKN_Z : SInst<"svbrkn[_b]_z", "PPPP", "Pc", MergeNone, "aarch64_sve_brkn_z">; -def SVBRKPA_Z : SInst<"svbrkpa[_b]_z", "PPPP", "Pc", MergeNone, "aarch64_sve_brkpa_z">; -def SVBRKPB_Z : SInst<"svbrkpb[_b]_z", "PPPP", "Pc", MergeNone, "aarch64_sve_brkpb_z">; - -def SVPFIRST : SInst<"svpfirst[_b]", "PPP", "Pc", MergeNone, "aarch64_sve_pfirst">; -def SVPNEXT : SInst<"svpnext_{d}", "PPP", "PcPsPiPl", MergeNone, "aarch64_sve_pnext">; +def SVAND_B_Z : SInst<"svand[_b]_z", "PPPP", "Pc", MergeNone, "aarch64_sve_and_z", [IsStreamingCompatible]>; +def SVBIC_B_Z : SInst<"svbic[_b]_z", "PPPP", "Pc", MergeNone, "aarch64_sve_bic_z", [IsStreamingCompatible]>; +def SVEOR_B_Z : SInst<"sveor[_b]_z", "PPPP", "Pc", MergeNone, "aarch64_sve_eor_z", [IsStreamingCompatible]>; +def SVMOV_B_Z : SInst<"svmov[_b]_z", "PPP", "Pc", MergeNone, "", [IsStreamingCompatible]>; // Uses custom expansion +def SVNAND_B_Z : SInst<"svnand[_b]_z", "PPPP", "Pc", MergeNone, "aarch64_sve_nand_z", [IsStreamingCompatible]>; +def SVNOR_B_Z : SInst<"svnor[_b]_z", "PPPP", "Pc", MergeNone, "aarch64_sve_nor_z", [IsStreamingCompatible]>; +def SVNOT_B_Z : SInst<"svnot[_b]_z", "PPP", "Pc", MergeNone, "", [IsStreamingCompatible]>; // Uses custom expansion +def SVORN_B_Z : SInst<"svorn[_b]_z", "PPPP", "Pc", MergeNone, "aarch64_sve_orn_z", [IsStreamingCompatible]>; +def SVORR_B_Z : SInst<"svorr[_b]_z", "PPPP", "Pc", MergeNone, "aarch64_sve_orr_z", [IsStreamingCompatible]>; + +def SVBRKA : SInst<"svbrka[_b]_m", "PPPP", "Pc", MergeNone, "aarch64_sve_brka", [IsStreamingCompatible]>; +def SVBRKA_Z : SInst<"svbrka[_b]_z", "PPP", "Pc", MergeNone, "aarch64_sve_brka_z", [IsStreamingCompatible]>; +def SVBRKB : SInst<"svbrkb[_b]_m", "PPPP", "Pc", MergeNone, "aarch64_sve_brkb", [IsStreamingCompatible]>; +def SVBRKB_Z : SInst<"svbrkb[_b]_z", "PPP", "Pc", MergeNone, "aarch64_sve_brkb_z", [IsStreamingCompatible]>; +def SVBRKN_Z : SInst<"svbrkn[_b]_z", "PPPP", "Pc", MergeNone, "aarch64_sve_brkn_z", [IsStreamingCompatible]>; +def SVBRKPA_Z : SInst<"svbrkpa[_b]_z", "PPPP", "Pc", MergeNone, "aarch64_sve_brkpa_z", [IsStreamingCompatible]>; +def SVBRKPB_Z : SInst<"svbrkpb[_b]_z", "PPPP", "Pc", MergeNone, "aarch64_sve_brkpb_z", [IsStreamingCompatible]>; + +def SVPFIRST : SInst<"svpfirst[_b]", "PPP", "Pc", MergeNone, "aarch64_sve_pfirst", [IsStreamingCompatible]>; +def SVPNEXT : SInst<"svpnext_{d}", "PPP", "PcPsPiPl", MergeNone, "aarch64_sve_pnext", [IsStreamingCompatible]>; //////////////////////////////////////////////////////////////////////////////// // Testing predicates -def SVPTEST_ANY : SInst<"svptest_any", "sPP", "Pc", MergeNone, "aarch64_sve_ptest_any">; -def SVPTEST_FIRST : SInst<"svptest_first", "sPP", "Pc", MergeNone, "aarch64_sve_ptest_first">; -def SVPTEST_LAST : SInst<"svptest_last", "sPP", "Pc", MergeNone, "aarch64_sve_ptest_last">; +def SVPTEST_ANY : SInst<"svptest_any", "sPP", "Pc", MergeNone, "aarch64_sve_ptest_any", [IsStreamingCompatible]>; +def SVPTEST_FIRST : SInst<"svptest_first", "sPP", "Pc", MergeNone, "aarch64_sve_ptest_first", [IsStreamingCompatible]>; +def SVPTEST_LAST : SInst<"svptest_last", "sPP", "Pc", MergeNone, "aarch64_sve_ptest_last", [IsStreamingCompatible]>; //////////////////////////////////////////////////////////////////////////////// // FFR manipulation @@ -1094,21 +1094,21 @@ def SVWRFFR : SInst<"svwrffr", "vP", "Pc", MergeNone, "", [IsOverloadNone]>; //////////////////////////////////////////////////////////////////////////////// // Counting elements -def SVCNTB_PAT : SInst<"svcntb_pat", "nI", "", MergeNone, "aarch64_sve_cntb", [IsOverloadNone]>; -def SVCNTH_PAT : SInst<"svcnth_pat", "nI", "", MergeNone, "aarch64_sve_cnth", [IsOverloadNone]>; -def SVCNTW_PAT : SInst<"svcntw_pat", "nI", "", MergeNone, "aarch64_sve_cntw", [IsOverloadNone]>; -def SVCNTD_PAT : SInst<"svcntd_pat", "nI", "", MergeNone, "aarch64_sve_cntd", [IsOverloadNone]>; +def SVCNTB_PAT : SInst<"svcntb_pat", "nI", "", MergeNone, "aarch64_sve_cntb", [IsOverloadNone, IsStreamingCompatible]>; +def SVCNTH_PAT : SInst<"svcnth_pat", "nI", "", MergeNone, "aarch64_sve_cnth", [IsOverloadNone, IsStreamingCompatible]>; +def SVCNTW_PAT : SInst<"svcntw_pat", "nI", "", MergeNone, "aarch64_sve_cntw", [IsOverloadNone, IsStreamingCompatible]>; +def SVCNTD_PAT : SInst<"svcntd_pat", "nI", "", MergeNone, "aarch64_sve_cntd", [IsOverloadNone, IsStreamingCompatible]>; -def SVCNTB : SInst<"svcntb", "nv", "", MergeNone, "aarch64_sve_cntb", [IsAppendSVALL, IsOverloadNone]>; -def SVCNTH : SInst<"svcnth", "nv", "", MergeNone, "aarch64_sve_cnth", [IsAppendSVALL, IsOverloadNone]>; -def SVCNTW : SInst<"svcntw", "nv", "", MergeNone, "aarch64_sve_cntw", [IsAppendSVALL, IsOverloadNone]>; -def SVCNTD : SInst<"svcntd", "nv", "", MergeNone, "aarch64_sve_cntd", [IsAppendSVALL, IsOverloadNone]>; +def SVCNTB : SInst<"svcntb", "nv", "", MergeNone, "aarch64_sve_cntb", [IsAppendSVALL, IsOverloadNone, IsStreamingCompatible]>; +def SVCNTH : SInst<"svcnth", "nv", "", MergeNone, "aarch64_sve_cnth", [IsAppendSVALL, IsOverloadNone, IsStreamingCompatible]>; +def SVCNTW : SInst<"svcntw", "nv", "", MergeNone, "aarch64_sve_cntw", [IsAppendSVALL, IsOverloadNone, IsStreamingCompatible]>; +def SVCNTD : SInst<"svcntd", "nv", "", MergeNone, "aarch64_sve_cntd", [IsAppendSVALL, IsOverloadNone, IsStreamingCompatible]>; -def SVCNTP : SInst<"svcntp_{d}", "nPP", "PcPsPiPl", MergeNone, "aarch64_sve_cntp">; -def SVLEN : SInst<"svlen[_{d}]", "nd", "csilUcUsUiUlhfd", MergeNone>; +def SVCNTP : SInst<"svcntp_{d}", "nPP", "PcPsPiPl", MergeNone, "aarch64_sve_cntp", [IsStreamingCompatible]>; +def SVLEN : SInst<"svlen[_{d}]", "nd", "csilUcUsUiUlhfd", MergeNone, "", [IsStreamingCompatible]>; let TargetGuard = "sve,bf16" in { -def SVLEN_BF16 : SInst<"svlen[_{d}]", "nd", "b", MergeNone>; +def SVLEN_BF16 : SInst<"svlen[_{d}]", "nd", "b", MergeNone, "", [IsStreamingCompatible]>; } //////////////////////////////////////////////////////////////////////////////// @@ -1125,20 +1125,20 @@ def UnsignedWord : sat_type<"U", "Ui">; def UnsignedDoubleWord : sat_type<"U", "Ul">; multiclass SInst_SAT1 { - def _N32 : SInst]>; - def _N64 : SInst]>; - def _N32_ALL : SInst]>; - def _N64_ALL : SInst]>; + def _N32 : SInst]>; + def _N64 : SInst]>; + def _N32_ALL : SInst]>; + def _N64_ALL : SInst]>; } multiclass SInst_SAT2 { - def "" : SInst]>; - def _ALL : SInst]>; + def "" : SInst]>; + def _ALL : SInst]>; - def _N32 : SInst]>; - def _N64 : SInst]>; - def _N32_ALL : SInst]>; - def _N64_ALL : SInst]>; + def _N32 : SInst]>; + def _N64 : SInst]>; + def _N32_ALL : SInst]>; + def _N64_ALL : SInst]>; } defm SVQDECB_S : SInst_SAT1<"svqdecb", "aarch64_sve_sqdecb", SignedByte>; @@ -1159,32 +1159,32 @@ defm SVQINCW_U : SInst_SAT2<"svqincw", "aarch64_sve_uqincw", UnsignedWord>; defm SVQINCD_S : SInst_SAT2<"svqincd", "aarch64_sve_sqincd", SignedDoubleWord>; defm SVQINCD_U : SInst_SAT2<"svqincd", "aarch64_sve_uqincd", UnsignedDoubleWord>; -def SVQDECP_S : SInst<"svqdecp[_{d}]", "ddP", "sil", MergeNone, "aarch64_sve_sqdecp">; -def SVQDECP_U : SInst<"svqdecp[_{d}]", "ddP", "UsUiUl", MergeNone, "aarch64_sve_uqdecp">; -def SVQINCP_S : SInst<"svqincp[_{d}]", "ddP", "sil", MergeNone, "aarch64_sve_sqincp">; -def SVQINCP_U : SInst<"svqincp[_{d}]", "ddP", "UsUiUl", MergeNone, "aarch64_sve_uqincp">; +def SVQDECP_S : SInst<"svqdecp[_{d}]", "ddP", "sil", MergeNone, "aarch64_sve_sqdecp", [IsStreamingCompatible]>; +def SVQDECP_U : SInst<"svqdecp[_{d}]", "ddP", "UsUiUl", MergeNone, "aarch64_sve_uqdecp", [IsStreamingCompatible]>; +def SVQINCP_S : SInst<"svqincp[_{d}]", "ddP", "sil", MergeNone, "aarch64_sve_sqincp", [IsStreamingCompatible]>; +def SVQINCP_U : SInst<"svqincp[_{d}]", "ddP", "UsUiUl", MergeNone, "aarch64_sve_uqincp", [IsStreamingCompatible]>; -def SVQDECP_N_S32 : SInst<"svqdecp[_n_s32]_{d}", "kkP", "PcPsPiPl", MergeNone, "aarch64_sve_sqdecp_n32">; -def SVQDECP_N_S64 : SInst<"svqdecp[_n_s64]_{d}", "llP", "PcPsPiPl", MergeNone, "aarch64_sve_sqdecp_n64">; -def SVQDECP_N_U32 : SInst<"svqdecp[_n_u32]_{d}", "mmP", "PcPsPiPl", MergeNone, "aarch64_sve_uqdecp_n32">; -def SVQDECP_N_U64 : SInst<"svqdecp[_n_u64]_{d}", "nnP", "PcPsPiPl", MergeNone, "aarch64_sve_uqdecp_n64">; -def SVQINCP_N_S32 : SInst<"svqincp[_n_s32]_{d}", "kkP", "PcPsPiPl", MergeNone, "aarch64_sve_sqincp_n32">; -def SVQINCP_N_S64 : SInst<"svqincp[_n_s64]_{d}", "llP", "PcPsPiPl", MergeNone, "aarch64_sve_sqincp_n64">; -def SVQINCP_N_U32 : SInst<"svqincp[_n_u32]_{d}", "mmP", "PcPsPiPl", MergeNone, "aarch64_sve_uqincp_n32">; -def SVQINCP_N_U64 : SInst<"svqincp[_n_u64]_{d}", "nnP", "PcPsPiPl", MergeNone, "aarch64_sve_uqincp_n64">; +def SVQDECP_N_S32 : SInst<"svqdecp[_n_s32]_{d}", "kkP", "PcPsPiPl", MergeNone, "aarch64_sve_sqdecp_n32", [IsStreamingCompatible]>; +def SVQDECP_N_S64 : SInst<"svqdecp[_n_s64]_{d}", "llP", "PcPsPiPl", MergeNone, "aarch64_sve_sqdecp_n64", [IsStreamingCompatible]>; +def SVQDECP_N_U32 : SInst<"svqdecp[_n_u32]_{d}", "mmP", "PcPsPiPl", MergeNone, "aarch64_sve_uqdecp_n32", [IsStreamingCompatible]>; +def SVQDECP_N_U64 : SInst<"svqdecp[_n_u64]_{d}", "nnP", "PcPsPiPl", MergeNone, "aarch64_sve_uqdecp_n64", [IsStreamingCompatible]>; +def SVQINCP_N_S32 : SInst<"svqincp[_n_s32]_{d}", "kkP", "PcPsPiPl", MergeNone, "aarch64_sve_sqincp_n32", [IsStreamingCompatible]>; +def SVQINCP_N_S64 : SInst<"svqincp[_n_s64]_{d}", "llP", "PcPsPiPl", MergeNone, "aarch64_sve_sqincp_n64", [IsStreamingCompatible]>; +def SVQINCP_N_U32 : SInst<"svqincp[_n_u32]_{d}", "mmP", "PcPsPiPl", MergeNone, "aarch64_sve_uqincp_n32", [IsStreamingCompatible]>; +def SVQINCP_N_U64 : SInst<"svqincp[_n_u64]_{d}", "nnP", "PcPsPiPl", MergeNone, "aarch64_sve_uqincp_n64", [IsStreamingCompatible]>; let TargetGuard = "sve,i8mm" in { def SVMLLA_S32 : SInst<"svmmla[_s32]", "ddqq","i", MergeNone, "aarch64_sve_smmla">; def SVMLLA_U32 : SInst<"svmmla[_u32]", "ddqq","Ui", MergeNone, "aarch64_sve_ummla">; def SVUSMLLA_S32 : SInst<"svusmmla[_s32]", "ddbq","i", MergeNone, "aarch64_sve_usmmla">; -def SVUSDOT_S : SInst<"svusdot[_s32]", "ddbq", "i", MergeNone, "aarch64_sve_usdot">; -def SVUSDOT_N_S : SInst<"svusdot[_n_s32]", "ddbr", "i", MergeNone, "aarch64_sve_usdot">; -def SVSUDOT_S : SInst<"svsudot[_s32]", "ddqb", "i", MergeNone, "aarch64_sve_usdot", [ReverseUSDOT]>; -def SVSUDOT_N_S : SInst<"svsudot[_n_s32]", "ddq@", "i", MergeNone, "aarch64_sve_usdot", [ReverseUSDOT]>; +def SVUSDOT_S : SInst<"svusdot[_s32]", "ddbq", "i", MergeNone, "aarch64_sve_usdot", [IsStreamingCompatible]>; +def SVUSDOT_N_S : SInst<"svusdot[_n_s32]", "ddbr", "i", MergeNone, "aarch64_sve_usdot", [IsStreamingCompatible]>; +def SVSUDOT_S : SInst<"svsudot[_s32]", "ddqb", "i", MergeNone, "aarch64_sve_usdot", [ReverseUSDOT, IsStreamingCompatible]>; +def SVSUDOT_N_S : SInst<"svsudot[_n_s32]", "ddq@", "i", MergeNone, "aarch64_sve_usdot", [ReverseUSDOT, IsStreamingCompatible]>; -def SVUSDOT_LANE_S : SInst<"svusdot_lane[_s32]", "ddbqi", "i", MergeNone, "aarch64_sve_usdot_lane", [], [ImmCheck<3, ImmCheckLaneIndexDot, 2>]>; -def SVSUDOT_LANE_S : SInst<"svsudot_lane[_s32]", "ddqbi", "i", MergeNone, "aarch64_sve_sudot_lane", [], [ImmCheck<3, ImmCheckLaneIndexDot, 2>]>; +def SVUSDOT_LANE_S : SInst<"svusdot_lane[_s32]", "ddbqi", "i", MergeNone, "aarch64_sve_usdot_lane", [IsStreamingCompatible], [ImmCheck<3, ImmCheckLaneIndexDot, 2>]>; +def SVSUDOT_LANE_S : SInst<"svsudot_lane[_s32]", "ddqbi", "i", MergeNone, "aarch64_sve_sudot_lane", [IsStreamingCompatible], [ImmCheck<3, ImmCheckLaneIndexDot, 2>]>; } let TargetGuard = "sve,f32mm" in { @@ -1193,12 +1193,12 @@ def SVMLLA_F32 : SInst<"svmmla[_f32]", "dddd","f", MergeNone, "aarch64_sve_fmmla let TargetGuard = "sve,f64mm" in { def SVMLLA_F64 : SInst<"svmmla[_f64]", "dddd","d", MergeNone, "aarch64_sve_fmmla">; -def SVTRN1Q : SInst<"svtrn1q[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_trn1q">; -def SVTRN2Q : SInst<"svtrn2q[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_trn2q">; -def SVUZP1Q : SInst<"svuzp1q[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_uzp1q">; -def SVUZP2Q : SInst<"svuzp2q[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_uzp2q">; -def SVZIP1Q : SInst<"svzip1q[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_zip1q">; -def SVZIP2Q : SInst<"svzip2q[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_zip2q">; +def SVTRN1Q : SInst<"svtrn1q[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_trn1q", [IsStreamingCompatible]>; +def SVTRN2Q : SInst<"svtrn2q[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_trn2q", [IsStreamingCompatible]>; +def SVUZP1Q : SInst<"svuzp1q[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_uzp1q", [IsStreamingCompatible]>; +def SVUZP2Q : SInst<"svuzp2q[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_uzp2q", [IsStreamingCompatible]>; +def SVZIP1Q : SInst<"svzip1q[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_zip1q", [IsStreamingCompatible]>; +def SVZIP2Q : SInst<"svzip2q[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_zip2q", [IsStreamingCompatible]>; } let TargetGuard = "sve,bf16,f64mm" in { @@ -1212,57 +1212,57 @@ def SVZIP2Q_BF16 : SInst<"svzip2q[_{d}]", "ddd", "b", MergeNone, "aarc //////////////////////////////////////////////////////////////////////////////// // Vector creation -def SVUNDEF_1 : SInst<"svundef_{d}", "dv", "csilUcUsUiUlhfd", MergeNone, "", [IsUndef]>; -def SVUNDEF_2 : SInst<"svundef2_{d}", "2v", "csilUcUsUiUlhfd", MergeNone, "", [IsUndef]>; -def SVUNDEF_3 : SInst<"svundef3_{d}", "3v", "csilUcUsUiUlhfd", MergeNone, "", [IsUndef]>; -def SVUNDEF_4 : SInst<"svundef4_{d}", "4v", "csilUcUsUiUlhfd", MergeNone, "", [IsUndef]>; +def SVUNDEF_1 : SInst<"svundef_{d}", "dv", "csilUcUsUiUlhfd", MergeNone, "", [IsUndef, IsStreamingCompatible]>; +def SVUNDEF_2 : SInst<"svundef2_{d}", "2v", "csilUcUsUiUlhfd", MergeNone, "", [IsUndef, IsStreamingCompatible]>; +def SVUNDEF_3 : SInst<"svundef3_{d}", "3v", "csilUcUsUiUlhfd", MergeNone, "", [IsUndef, IsStreamingCompatible]>; +def SVUNDEF_4 : SInst<"svundef4_{d}", "4v", "csilUcUsUiUlhfd", MergeNone, "", [IsUndef, IsStreamingCompatible]>; -def SVCREATE_2 : SInst<"svcreate2[_{d}]", "2dd", "csilUcUsUiUlhfd", MergeNone, "", [IsTupleCreate]>; -def SVCREATE_3 : SInst<"svcreate3[_{d}]", "3ddd", "csilUcUsUiUlhfd", MergeNone, "", [IsTupleCreate]>; -def SVCREATE_4 : SInst<"svcreate4[_{d}]", "4dddd", "csilUcUsUiUlhfd", MergeNone, "", [IsTupleCreate]>; +def SVCREATE_2 : SInst<"svcreate2[_{d}]", "2dd", "csilUcUsUiUlhfd", MergeNone, "", [IsTupleCreate, IsStreamingCompatible]>; +def SVCREATE_3 : SInst<"svcreate3[_{d}]", "3ddd", "csilUcUsUiUlhfd", MergeNone, "", [IsTupleCreate, IsStreamingCompatible]>; +def SVCREATE_4 : SInst<"svcreate4[_{d}]", "4dddd", "csilUcUsUiUlhfd", MergeNone, "", [IsTupleCreate, IsStreamingCompatible]>; let TargetGuard = "sve,bf16" in { -def SVUNDEF_1_BF16 : SInst<"svundef_{d}", "dv", "b", MergeNone, "", [IsUndef]>; -def SVUNDEF_2_BF16 : SInst<"svundef2_{d}", "2v", "b", MergeNone, "", [IsUndef]>; -def SVUNDEF_3_BF16 : SInst<"svundef3_{d}", "3v", "b", MergeNone, "", [IsUndef]>; -def SVUNDEF_4_BF16 : SInst<"svundef4_{d}", "4v", "b", MergeNone, "", [IsUndef]>; +def SVUNDEF_1_BF16 : SInst<"svundef_{d}", "dv", "b", MergeNone, "", [IsUndef, IsStreamingCompatible]>; +def SVUNDEF_2_BF16 : SInst<"svundef2_{d}", "2v", "b", MergeNone, "", [IsUndef, IsStreamingCompatible]>; +def SVUNDEF_3_BF16 : SInst<"svundef3_{d}", "3v", "b", MergeNone, "", [IsUndef, IsStreamingCompatible]>; +def SVUNDEF_4_BF16 : SInst<"svundef4_{d}", "4v", "b", MergeNone, "", [IsUndef, IsStreamingCompatible]>; -def SVCREATE_2_BF16 : SInst<"svcreate2[_{d}]", "2dd", "b", MergeNone, "", [IsTupleCreate]>; -def SVCREATE_3_BF16 : SInst<"svcreate3[_{d}]", "3ddd", "b", MergeNone, "", [IsTupleCreate]>; -def SVCREATE_4_BF16 : SInst<"svcreate4[_{d}]", "4dddd", "b", MergeNone, "", [IsTupleCreate]>; +def SVCREATE_2_BF16 : SInst<"svcreate2[_{d}]", "2dd", "b", MergeNone, "", [IsTupleCreate, IsStreamingCompatible]>; +def SVCREATE_3_BF16 : SInst<"svcreate3[_{d}]", "3ddd", "b", MergeNone, "", [IsTupleCreate, IsStreamingCompatible]>; +def SVCREATE_4_BF16 : SInst<"svcreate4[_{d}]", "4dddd", "b", MergeNone, "", [IsTupleCreate, IsStreamingCompatible]>; } //////////////////////////////////////////////////////////////////////////////// // Vector insertion and extraction -def SVGET_2 : SInst<"svget2[_{d}]", "d2i", "csilUcUsUiUlhfd", MergeNone, "", [IsTupleGet], [ImmCheck<1, ImmCheck0_1>]>; -def SVGET_3 : SInst<"svget3[_{d}]", "d3i", "csilUcUsUiUlhfd", MergeNone, "", [IsTupleGet], [ImmCheck<1, ImmCheck0_2>]>; -def SVGET_4 : SInst<"svget4[_{d}]", "d4i", "csilUcUsUiUlhfd", MergeNone, "", [IsTupleGet], [ImmCheck<1, ImmCheck0_3>]>; +def SVGET_2 : SInst<"svget2[_{d}]", "d2i", "csilUcUsUiUlhfd", MergeNone, "", [IsTupleGet, IsStreamingCompatible], [ImmCheck<1, ImmCheck0_1>]>; +def SVGET_3 : SInst<"svget3[_{d}]", "d3i", "csilUcUsUiUlhfd", MergeNone, "", [IsTupleGet, IsStreamingCompatible], [ImmCheck<1, ImmCheck0_2>]>; +def SVGET_4 : SInst<"svget4[_{d}]", "d4i", "csilUcUsUiUlhfd", MergeNone, "", [IsTupleGet, IsStreamingCompatible], [ImmCheck<1, ImmCheck0_3>]>; -def SVSET_2 : SInst<"svset2[_{d}]", "22id", "csilUcUsUiUlhfd", MergeNone, "", [IsTupleSet], [ImmCheck<1, ImmCheck0_1>]>; -def SVSET_3 : SInst<"svset3[_{d}]", "33id", "csilUcUsUiUlhfd", MergeNone, "", [IsTupleSet], [ImmCheck<1, ImmCheck0_2>]>; -def SVSET_4 : SInst<"svset4[_{d}]", "44id", "csilUcUsUiUlhfd", MergeNone, "", [IsTupleSet], [ImmCheck<1, ImmCheck0_3>]>; +def SVSET_2 : SInst<"svset2[_{d}]", "22id", "csilUcUsUiUlhfd", MergeNone, "", [IsTupleSet, IsStreamingCompatible], [ImmCheck<1, ImmCheck0_1>]>; +def SVSET_3 : SInst<"svset3[_{d}]", "33id", "csilUcUsUiUlhfd", MergeNone, "", [IsTupleSet, IsStreamingCompatible], [ImmCheck<1, ImmCheck0_2>]>; +def SVSET_4 : SInst<"svset4[_{d}]", "44id", "csilUcUsUiUlhfd", MergeNone, "", [IsTupleSet, IsStreamingCompatible], [ImmCheck<1, ImmCheck0_3>]>; let TargetGuard = "sve,bf16" in { -def SVGET_2_BF16 : SInst<"svget2[_{d}]", "d2i", "b", MergeNone, "", [IsTupleGet], [ImmCheck<1, ImmCheck0_1>]>; -def SVGET_3_BF16 : SInst<"svget3[_{d}]", "d3i", "b", MergeNone, "", [IsTupleGet], [ImmCheck<1, ImmCheck0_2>]>; -def SVGET_4_BF16 : SInst<"svget4[_{d}]", "d4i", "b", MergeNone, "", [IsTupleGet], [ImmCheck<1, ImmCheck0_3>]>; +def SVGET_2_BF16 : SInst<"svget2[_{d}]", "d2i", "b", MergeNone, "", [IsTupleGet, IsStreamingCompatible], [ImmCheck<1, ImmCheck0_1>]>; +def SVGET_3_BF16 : SInst<"svget3[_{d}]", "d3i", "b", MergeNone, "", [IsTupleGet, IsStreamingCompatible], [ImmCheck<1, ImmCheck0_2>]>; +def SVGET_4_BF16 : SInst<"svget4[_{d}]", "d4i", "b", MergeNone, "", [IsTupleGet, IsStreamingCompatible], [ImmCheck<1, ImmCheck0_3>]>; -def SVSET_2_BF16 : SInst<"svset2[_{d}]", "22id", "b", MergeNone, "", [IsTupleSet], [ImmCheck<1, ImmCheck0_1>]>; -def SVSET_3_BF16 : SInst<"svset3[_{d}]", "33id", "b", MergeNone, "", [IsTupleSet], [ImmCheck<1, ImmCheck0_2>]>; -def SVSET_4_BF16 : SInst<"svset4[_{d}]", "44id", "b", MergeNone, "", [IsTupleSet], [ImmCheck<1, ImmCheck0_3>]>; +def SVSET_2_BF16 : SInst<"svset2[_{d}]", "22id", "b", MergeNone, "", [IsTupleSet, IsStreamingCompatible], [ImmCheck<1, ImmCheck0_1>]>; +def SVSET_3_BF16 : SInst<"svset3[_{d}]", "33id", "b", MergeNone, "", [IsTupleSet, IsStreamingCompatible], [ImmCheck<1, ImmCheck0_2>]>; +def SVSET_4_BF16 : SInst<"svset4[_{d}]", "44id", "b", MergeNone, "", [IsTupleSet, IsStreamingCompatible], [ImmCheck<1, ImmCheck0_3>]>; } //////////////////////////////////////////////////////////////////////////////// // SVE2 WhileGE/GT let TargetGuard = "sve2" in { -def SVWHILEGE_S32 : SInst<"svwhilege_{d}[_{1}]", "Pkk", "PcPsPiPl", MergeNone, "aarch64_sve_whilege", [IsOverloadWhile]>; -def SVWHILEGE_S64 : SInst<"svwhilege_{d}[_{1}]", "Pll", "PcPsPiPl", MergeNone, "aarch64_sve_whilege", [IsOverloadWhile]>; -def SVWHILEGT_S32 : SInst<"svwhilegt_{d}[_{1}]", "Pkk", "PcPsPiPl", MergeNone, "aarch64_sve_whilegt", [IsOverloadWhile]>; -def SVWHILEGT_S64 : SInst<"svwhilegt_{d}[_{1}]", "Pll", "PcPsPiPl", MergeNone, "aarch64_sve_whilegt", [IsOverloadWhile]>; -def SVWHILEHI_U32 : SInst<"svwhilegt_{d}[_{1}]", "Pmm", "PUcPUsPUiPUl", MergeNone, "aarch64_sve_whilehi", [IsOverloadWhile]>; -def SVWHILEHI_U64 : SInst<"svwhilegt_{d}[_{1}]", "Pnn", "PUcPUsPUiPUl", MergeNone, "aarch64_sve_whilehi", [IsOverloadWhile]>; -def SVWHILEHS_U32 : SInst<"svwhilege_{d}[_{1}]", "Pmm", "PUcPUsPUiPUl", MergeNone, "aarch64_sve_whilehs", [IsOverloadWhile]>; -def SVWHILEHS_U64 : SInst<"svwhilege_{d}[_{1}]", "Pnn", "PUcPUsPUiPUl", MergeNone, "aarch64_sve_whilehs", [IsOverloadWhile]>; +def SVWHILEGE_S32 : SInst<"svwhilege_{d}[_{1}]", "Pkk", "PcPsPiPl", MergeNone, "aarch64_sve_whilege", [IsOverloadWhile, IsStreamingCompatible]>; +def SVWHILEGE_S64 : SInst<"svwhilege_{d}[_{1}]", "Pll", "PcPsPiPl", MergeNone, "aarch64_sve_whilege", [IsOverloadWhile, IsStreamingCompatible]>; +def SVWHILEGT_S32 : SInst<"svwhilegt_{d}[_{1}]", "Pkk", "PcPsPiPl", MergeNone, "aarch64_sve_whilegt", [IsOverloadWhile, IsStreamingCompatible]>; +def SVWHILEGT_S64 : SInst<"svwhilegt_{d}[_{1}]", "Pll", "PcPsPiPl", MergeNone, "aarch64_sve_whilegt", [IsOverloadWhile, IsStreamingCompatible]>; +def SVWHILEHI_U32 : SInst<"svwhilegt_{d}[_{1}]", "Pmm", "PUcPUsPUiPUl", MergeNone, "aarch64_sve_whilehi", [IsOverloadWhile, IsStreamingCompatible]>; +def SVWHILEHI_U64 : SInst<"svwhilegt_{d}[_{1}]", "Pnn", "PUcPUsPUiPUl", MergeNone, "aarch64_sve_whilehi", [IsOverloadWhile, IsStreamingCompatible]>; +def SVWHILEHS_U32 : SInst<"svwhilege_{d}[_{1}]", "Pmm", "PUcPUsPUiPUl", MergeNone, "aarch64_sve_whilehs", [IsOverloadWhile, IsStreamingCompatible]>; +def SVWHILEHS_U64 : SInst<"svwhilege_{d}[_{1}]", "Pnn", "PUcPUsPUiPUl", MergeNone, "aarch64_sve_whilehs", [IsOverloadWhile, IsStreamingCompatible]>; } //////////////////////////////////////////////////////////////////////////////// @@ -1304,49 +1304,49 @@ multiclass SInstZPZxZ; -defm SVQRSHL_U : SInstZPZxZ<"svqrshl", "UcUsUiUl", "dPdx", "dPdK", "aarch64_sve_uqrshl">; -defm SVQSHL_S : SInstZPZxZ<"svqshl", "csil", "dPdx", "dPdK", "aarch64_sve_sqshl">; -defm SVQSHL_U : SInstZPZxZ<"svqshl", "UcUsUiUl", "dPdx", "dPdK", "aarch64_sve_uqshl">; -defm SVRSHL_S : SInstZPZxZ<"svrshl", "csil", "dPdx", "dPdK", "aarch64_sve_srshl">; -defm SVRSHL_U : SInstZPZxZ<"svrshl", "UcUsUiUl", "dPdx", "dPdK", "aarch64_sve_urshl">; -defm SVSQADD : SInstZPZxZ<"svsqadd", "UcUsUiUl", "dPdx", "dPdK", "aarch64_sve_usqadd">; -defm SVUQADD : SInstZPZxZ<"svuqadd", "csil", "dPdu", "dPdL", "aarch64_sve_suqadd">; - -def SVABA_S : SInst<"svaba[_{d}]", "dddd", "csil" , MergeNone, "aarch64_sve_saba">; -def SVABA_U : SInst<"svaba[_{d}]", "dddd", "UcUsUiUl", MergeNone, "aarch64_sve_uaba">; -def SVQDMULH : SInst<"svqdmulh[_{d}]", "ddd", "csil", MergeNone, "aarch64_sve_sqdmulh">; -def SVQRDMULH : SInst<"svqrdmulh[_{d}]", "ddd", "csil", MergeNone, "aarch64_sve_sqrdmulh">; -def SVQRDMLAH : SInst<"svqrdmlah[_{d}]", "dddd", "csil", MergeNone, "aarch64_sve_sqrdmlah">; -def SVQRDMLSH : SInst<"svqrdmlsh[_{d}]", "dddd", "csil", MergeNone, "aarch64_sve_sqrdmlsh">; - -def SVABA_S_N : SInst<"svaba[_n_{d}]", "ddda", "csil", MergeNone, "aarch64_sve_saba">; -def SVABA_U_N : SInst<"svaba[_n_{d}]", "ddda", "UcUsUiUl", MergeNone, "aarch64_sve_uaba">; -def SVQDMULH_N : SInst<"svqdmulh[_n_{d}]", "dda", "csil", MergeNone, "aarch64_sve_sqdmulh">; -def SVQRDMULH_N : SInst<"svqrdmulh[_n_{d}]", "dda", "csil", MergeNone, "aarch64_sve_sqrdmulh">; -def SVQRDMLAH_N : SInst<"svqrdmlah[_n_{d}]", "ddda", "csil", MergeNone, "aarch64_sve_sqrdmlah">; -def SVQRDMLSH_N : SInst<"svqrdmlsh[_n_{d}]", "ddda", "csil", MergeNone, "aarch64_sve_sqrdmlsh">; - -def SVQDMULH_LANE : SInst<"svqdmulh_lane[_{d}]", "dddi", "sil", MergeNone, "aarch64_sve_sqdmulh_lane", [], [ImmCheck<2, ImmCheckLaneIndex, 1>]>; -def SVQRDMULH_LANE : SInst<"svqrdmulh_lane[_{d}]", "dddi", "sil", MergeNone, "aarch64_sve_sqrdmulh_lane", [], [ImmCheck<2, ImmCheckLaneIndex, 1>]>; -def SVQRDMLAH_LANE : SInst<"svqrdmlah_lane[_{d}]", "ddddi", "sil", MergeNone, "aarch64_sve_sqrdmlah_lane", [], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; -def SVQRDMLSH_LANE : SInst<"svqrdmlsh_lane[_{d}]", "ddddi", "sil", MergeNone, "aarch64_sve_sqrdmlsh_lane", [], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; - -def SVQSHLU_M : SInst<"svqshlu[_n_{d}]", "uPdi", "csil", MergeOp1, "aarch64_sve_sqshlu", [], [ImmCheck<2, ImmCheckShiftLeft, 1>]>; -def SVQSHLU_X : SInst<"svqshlu[_n_{d}]", "uPdi", "csil", MergeAny, "aarch64_sve_sqshlu", [], [ImmCheck<2, ImmCheckShiftLeft, 1>]>; -def SVQSHLU_Z : SInst<"svqshlu[_n_{d}]", "uPdi", "csil", MergeZero, "aarch64_sve_sqshlu", [], [ImmCheck<2, ImmCheckShiftLeft, 1>]>; -def SVRSHR_M_S : SInst<"svrshr[_n_{d}]", "dPdi", "csil", MergeOp1, "aarch64_sve_srshr", [], [ImmCheck<2, ImmCheckShiftRight, 1>]>; -def SVRSHR_M_U : SInst<"svrshr[_n_{d}]", "dPdi", "UcUsUiUl", MergeOp1, "aarch64_sve_urshr", [], [ImmCheck<2, ImmCheckShiftRight, 1>]>; -def SVRSHR_X_S : SInst<"svrshr[_n_{d}]", "dPdi", "csil", MergeAny, "aarch64_sve_srshr", [], [ImmCheck<2, ImmCheckShiftRight, 1>]>; -def SVRSHR_X_U : SInst<"svrshr[_n_{d}]", "dPdi", "UcUsUiUl", MergeAny, "aarch64_sve_urshr", [], [ImmCheck<2, ImmCheckShiftRight, 1>]>; -def SVRSHR_Z_S : SInst<"svrshr[_n_{d}]", "dPdi", "csil", MergeZero, "aarch64_sve_srshr", [], [ImmCheck<2, ImmCheckShiftRight, 1>]>; -def SVRSHR_Z_U : SInst<"svrshr[_n_{d}]", "dPdi", "UcUsUiUl", MergeZero, "aarch64_sve_urshr", [], [ImmCheck<2, ImmCheckShiftRight, 1>]>; -def SVRSRA_S : SInst<"svrsra[_n_{d}]", "dddi", "csil", MergeNone, "aarch64_sve_srsra", [], [ImmCheck<2, ImmCheckShiftRight, 1>]>; -def SVRSRA_U : SInst<"svrsra[_n_{d}]", "dddi", "UcUsUiUl", MergeNone, "aarch64_sve_ursra", [], [ImmCheck<2, ImmCheckShiftRight, 1>]>; -def SVSLI : SInst<"svsli[_n_{d}]", "dddi", "csilUcUsUiUl", MergeNone, "aarch64_sve_sli", [], [ImmCheck<2, ImmCheckShiftLeft, 1>]>; -def SVSRA_S : SInst<"svsra[_n_{d}]", "dddi", "csil", MergeNone, "aarch64_sve_ssra", [], [ImmCheck<2, ImmCheckShiftRight, 1>]>; -def SVSRA_U : SInst<"svsra[_n_{d}]", "dddi", "UcUsUiUl", MergeNone, "aarch64_sve_usra", [], [ImmCheck<2, ImmCheckShiftRight, 1>]>; -def SVSRI : SInst<"svsri[_n_{d}]", "dddi", "csilUcUsUiUl", MergeNone, "aarch64_sve_sri", [], [ImmCheck<2, ImmCheckShiftRight, 1>]>; +defm SVQRSHL_S : SInstZPZxZ<"svqrshl", "csil", "dPdx", "dPdK", "aarch64_sve_sqrshl", [IsStreamingCompatible]>; +defm SVQRSHL_U : SInstZPZxZ<"svqrshl", "UcUsUiUl", "dPdx", "dPdK", "aarch64_sve_uqrshl", [IsStreamingCompatible]>; +defm SVQSHL_S : SInstZPZxZ<"svqshl", "csil", "dPdx", "dPdK", "aarch64_sve_sqshl", [IsStreamingCompatible]>; +defm SVQSHL_U : SInstZPZxZ<"svqshl", "UcUsUiUl", "dPdx", "dPdK", "aarch64_sve_uqshl", [IsStreamingCompatible]>; +defm SVRSHL_S : SInstZPZxZ<"svrshl", "csil", "dPdx", "dPdK", "aarch64_sve_srshl", [IsStreamingCompatible]>; +defm SVRSHL_U : SInstZPZxZ<"svrshl", "UcUsUiUl", "dPdx", "dPdK", "aarch64_sve_urshl", [IsStreamingCompatible]>; +defm SVSQADD : SInstZPZxZ<"svsqadd", "UcUsUiUl", "dPdx", "dPdK", "aarch64_sve_usqadd", [IsStreamingCompatible]>; +defm SVUQADD : SInstZPZxZ<"svuqadd", "csil", "dPdu", "dPdL", "aarch64_sve_suqadd", [IsStreamingCompatible]>; + +def SVABA_S : SInst<"svaba[_{d}]", "dddd", "csil" , MergeNone, "aarch64_sve_saba", [IsStreamingCompatible]>; +def SVABA_U : SInst<"svaba[_{d}]", "dddd", "UcUsUiUl", MergeNone, "aarch64_sve_uaba", [IsStreamingCompatible]>; +def SVQDMULH : SInst<"svqdmulh[_{d}]", "ddd", "csil", MergeNone, "aarch64_sve_sqdmulh", [IsStreamingCompatible]>; +def SVQRDMULH : SInst<"svqrdmulh[_{d}]", "ddd", "csil", MergeNone, "aarch64_sve_sqrdmulh", [IsStreamingCompatible]>; +def SVQRDMLAH : SInst<"svqrdmlah[_{d}]", "dddd", "csil", MergeNone, "aarch64_sve_sqrdmlah", [IsStreamingCompatible]>; +def SVQRDMLSH : SInst<"svqrdmlsh[_{d}]", "dddd", "csil", MergeNone, "aarch64_sve_sqrdmlsh", [IsStreamingCompatible]>; + +def SVABA_S_N : SInst<"svaba[_n_{d}]", "ddda", "csil", MergeNone, "aarch64_sve_saba", [IsStreamingCompatible]>; +def SVABA_U_N : SInst<"svaba[_n_{d}]", "ddda", "UcUsUiUl", MergeNone, "aarch64_sve_uaba", [IsStreamingCompatible]>; +def SVQDMULH_N : SInst<"svqdmulh[_n_{d}]", "dda", "csil", MergeNone, "aarch64_sve_sqdmulh", [IsStreamingCompatible]>; +def SVQRDMULH_N : SInst<"svqrdmulh[_n_{d}]", "dda", "csil", MergeNone, "aarch64_sve_sqrdmulh", [IsStreamingCompatible]>; +def SVQRDMLAH_N : SInst<"svqrdmlah[_n_{d}]", "ddda", "csil", MergeNone, "aarch64_sve_sqrdmlah", [IsStreamingCompatible]>; +def SVQRDMLSH_N : SInst<"svqrdmlsh[_n_{d}]", "ddda", "csil", MergeNone, "aarch64_sve_sqrdmlsh", [IsStreamingCompatible]>; + +def SVQDMULH_LANE : SInst<"svqdmulh_lane[_{d}]", "dddi", "sil", MergeNone, "aarch64_sve_sqdmulh_lane", [IsStreamingCompatible], [ImmCheck<2, ImmCheckLaneIndex, 1>]>; +def SVQRDMULH_LANE : SInst<"svqrdmulh_lane[_{d}]", "dddi", "sil", MergeNone, "aarch64_sve_sqrdmulh_lane", [IsStreamingCompatible], [ImmCheck<2, ImmCheckLaneIndex, 1>]>; +def SVQRDMLAH_LANE : SInst<"svqrdmlah_lane[_{d}]", "ddddi", "sil", MergeNone, "aarch64_sve_sqrdmlah_lane", [IsStreamingCompatible], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; +def SVQRDMLSH_LANE : SInst<"svqrdmlsh_lane[_{d}]", "ddddi", "sil", MergeNone, "aarch64_sve_sqrdmlsh_lane", [IsStreamingCompatible], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; + +def SVQSHLU_M : SInst<"svqshlu[_n_{d}]", "uPdi", "csil", MergeOp1, "aarch64_sve_sqshlu", [IsStreamingCompatible], [ImmCheck<2, ImmCheckShiftLeft, 1>]>; +def SVQSHLU_X : SInst<"svqshlu[_n_{d}]", "uPdi", "csil", MergeAny, "aarch64_sve_sqshlu", [IsStreamingCompatible], [ImmCheck<2, ImmCheckShiftLeft, 1>]>; +def SVQSHLU_Z : SInst<"svqshlu[_n_{d}]", "uPdi", "csil", MergeZero, "aarch64_sve_sqshlu", [IsStreamingCompatible], [ImmCheck<2, ImmCheckShiftLeft, 1>]>; +def SVRSHR_M_S : SInst<"svrshr[_n_{d}]", "dPdi", "csil", MergeOp1, "aarch64_sve_srshr", [IsStreamingCompatible], [ImmCheck<2, ImmCheckShiftRight, 1>]>; +def SVRSHR_M_U : SInst<"svrshr[_n_{d}]", "dPdi", "UcUsUiUl", MergeOp1, "aarch64_sve_urshr", [IsStreamingCompatible], [ImmCheck<2, ImmCheckShiftRight, 1>]>; +def SVRSHR_X_S : SInst<"svrshr[_n_{d}]", "dPdi", "csil", MergeAny, "aarch64_sve_srshr", [IsStreamingCompatible], [ImmCheck<2, ImmCheckShiftRight, 1>]>; +def SVRSHR_X_U : SInst<"svrshr[_n_{d}]", "dPdi", "UcUsUiUl", MergeAny, "aarch64_sve_urshr", [IsStreamingCompatible], [ImmCheck<2, ImmCheckShiftRight, 1>]>; +def SVRSHR_Z_S : SInst<"svrshr[_n_{d}]", "dPdi", "csil", MergeZero, "aarch64_sve_srshr", [IsStreamingCompatible], [ImmCheck<2, ImmCheckShiftRight, 1>]>; +def SVRSHR_Z_U : SInst<"svrshr[_n_{d}]", "dPdi", "UcUsUiUl", MergeZero, "aarch64_sve_urshr", [IsStreamingCompatible], [ImmCheck<2, ImmCheckShiftRight, 1>]>; +def SVRSRA_S : SInst<"svrsra[_n_{d}]", "dddi", "csil", MergeNone, "aarch64_sve_srsra", [IsStreamingCompatible], [ImmCheck<2, ImmCheckShiftRight, 1>]>; +def SVRSRA_U : SInst<"svrsra[_n_{d}]", "dddi", "UcUsUiUl", MergeNone, "aarch64_sve_ursra", [IsStreamingCompatible], [ImmCheck<2, ImmCheckShiftRight, 1>]>; +def SVSLI : SInst<"svsli[_n_{d}]", "dddi", "csilUcUsUiUl", MergeNone, "aarch64_sve_sli", [IsStreamingCompatible], [ImmCheck<2, ImmCheckShiftLeft, 1>]>; +def SVSRA_S : SInst<"svsra[_n_{d}]", "dddi", "csil", MergeNone, "aarch64_sve_ssra", [IsStreamingCompatible], [ImmCheck<2, ImmCheckShiftRight, 1>]>; +def SVSRA_U : SInst<"svsra[_n_{d}]", "dddi", "UcUsUiUl", MergeNone, "aarch64_sve_usra", [IsStreamingCompatible], [ImmCheck<2, ImmCheckShiftRight, 1>]>; +def SVSRI : SInst<"svsri[_n_{d}]", "dddi", "csilUcUsUiUl", MergeNone, "aarch64_sve_sri", [IsStreamingCompatible], [ImmCheck<2, ImmCheckShiftRight, 1>]>; } //////////////////////////////////////////////////////////////////////////////// @@ -1358,29 +1358,29 @@ multiclass SInstPairwise; -defm SVADDP_F : SInstPairwise<"svaddp", "hfd", "aarch64_sve_faddp">; -defm SVMAXNMP : SInstPairwise<"svmaxnmp", "hfd", "aarch64_sve_fmaxnmp">; -defm SVMAXP_F : SInstPairwise<"svmaxp", "hfd", "aarch64_sve_fmaxp">; -defm SVMAXP_S : SInstPairwise<"svmaxp", "csli", "aarch64_sve_smaxp">; -defm SVMAXP_U : SInstPairwise<"svmaxp", "UcUsUiUl", "aarch64_sve_umaxp">; -defm SVMINNMP : SInstPairwise<"svminnmp", "hfd", "aarch64_sve_fminnmp">; -defm SVMINP_F : SInstPairwise<"svminp", "hfd", "aarch64_sve_fminp">; -defm SVMINP_S : SInstPairwise<"svminp", "csli", "aarch64_sve_sminp">; -defm SVMINP_U : SInstPairwise<"svminp", "UcUsUiUl", "aarch64_sve_uminp">; +defm SVADDP : SInstPairwise<"svaddp", "csliUcUsUiUl", "aarch64_sve_addp", [IsStreamingCompatible]>; +defm SVADDP_F : SInstPairwise<"svaddp", "hfd", "aarch64_sve_faddp", [IsStreamingCompatible]>; +defm SVMAXNMP : SInstPairwise<"svmaxnmp", "hfd", "aarch64_sve_fmaxnmp", [IsStreamingCompatible]>; +defm SVMAXP_F : SInstPairwise<"svmaxp", "hfd", "aarch64_sve_fmaxp", [IsStreamingCompatible]>; +defm SVMAXP_S : SInstPairwise<"svmaxp", "csli", "aarch64_sve_smaxp", [IsStreamingCompatible]>; +defm SVMAXP_U : SInstPairwise<"svmaxp", "UcUsUiUl", "aarch64_sve_umaxp", [IsStreamingCompatible]>; +defm SVMINNMP : SInstPairwise<"svminnmp", "hfd", "aarch64_sve_fminnmp", [IsStreamingCompatible]>; +defm SVMINP_F : SInstPairwise<"svminp", "hfd", "aarch64_sve_fminp", [IsStreamingCompatible]>; +defm SVMINP_S : SInstPairwise<"svminp", "csli", "aarch64_sve_sminp", [IsStreamingCompatible]>; +defm SVMINP_U : SInstPairwise<"svminp", "UcUsUiUl", "aarch64_sve_uminp", [IsStreamingCompatible]>; } //////////////////////////////////////////////////////////////////////////////// // SVE2 - Widening pairwise arithmetic let TargetGuard = "sve2" in { -def SVADALP_S_M : SInst<"svadalp[_{d}]", "dPdh", "sil", MergeOp1, "aarch64_sve_sadalp">; -def SVADALP_S_X : SInst<"svadalp[_{d}]", "dPdh", "sil", MergeAny, "aarch64_sve_sadalp">; -def SVADALP_S_Z : SInst<"svadalp[_{d}]", "dPdh", "sil", MergeZero, "aarch64_sve_sadalp">; +def SVADALP_S_M : SInst<"svadalp[_{d}]", "dPdh", "sil", MergeOp1, "aarch64_sve_sadalp", [IsStreamingCompatible]>; +def SVADALP_S_X : SInst<"svadalp[_{d}]", "dPdh", "sil", MergeAny, "aarch64_sve_sadalp", [IsStreamingCompatible]>; +def SVADALP_S_Z : SInst<"svadalp[_{d}]", "dPdh", "sil", MergeZero, "aarch64_sve_sadalp", [IsStreamingCompatible]>; -def SVADALP_U_M : SInst<"svadalp[_{d}]", "dPdh", "UsUiUl", MergeOp1, "aarch64_sve_uadalp">; -def SVADALP_U_X : SInst<"svadalp[_{d}]", "dPdh", "UsUiUl", MergeAny, "aarch64_sve_uadalp">; -def SVADALP_U_Z : SInst<"svadalp[_{d}]", "dPdh", "UsUiUl", MergeZero, "aarch64_sve_uadalp">; +def SVADALP_U_M : SInst<"svadalp[_{d}]", "dPdh", "UsUiUl", MergeOp1, "aarch64_sve_uadalp", [IsStreamingCompatible]>; +def SVADALP_U_X : SInst<"svadalp[_{d}]", "dPdh", "UsUiUl", MergeAny, "aarch64_sve_uadalp", [IsStreamingCompatible]>; +def SVADALP_U_Z : SInst<"svadalp[_{d}]", "dPdh", "UsUiUl", MergeZero, "aarch64_sve_uadalp", [IsStreamingCompatible]>; } //////////////////////////////////////////////////////////////////////////////// @@ -1388,56 +1388,56 @@ def SVADALP_U_Z : SInst<"svadalp[_{d}]", "dPdh", "UsUiUl", MergeZero, "aarch64_s // let TargetGuard = "sve2" in { -def SVBCAX : SInst<"svbcax[_{d}]", "dddd", "csilUcUsUiUl", MergeNone, "aarch64_sve_bcax">; -def SVBSL : SInst<"svbsl[_{d}]", "dddd", "csilUcUsUiUl", MergeNone, "aarch64_sve_bsl">; -def SVBSL1N : SInst<"svbsl1n[_{d}]", "dddd", "csilUcUsUiUl", MergeNone, "aarch64_sve_bsl1n">; -def SVBSL2N : SInst<"svbsl2n[_{d}]", "dddd", "csilUcUsUiUl", MergeNone, "aarch64_sve_bsl2n">; -def SVEOR3 : SInst<"sveor3[_{d}]", "dddd", "csilUcUsUiUl", MergeNone, "aarch64_sve_eor3">; -def SVNBSL : SInst<"svnbsl[_{d}]", "dddd", "csilUcUsUiUl", MergeNone, "aarch64_sve_nbsl">; - -def SVBCAX_N : SInst<"svbcax[_n_{d}]", "ddda", "csilUcUsUiUl", MergeNone, "aarch64_sve_bcax">; -def SVBSL_N : SInst<"svbsl[_n_{d}]", "ddda", "csilUcUsUiUl", MergeNone, "aarch64_sve_bsl">; -def SVBSL1N_N : SInst<"svbsl1n[_n_{d}]", "ddda", "csilUcUsUiUl", MergeNone, "aarch64_sve_bsl1n">; -def SVBSL2N_N : SInst<"svbsl2n[_n_{d}]", "ddda", "csilUcUsUiUl", MergeNone, "aarch64_sve_bsl2n">; -def SVEOR3_N : SInst<"sveor3[_n_{d}]", "ddda", "csilUcUsUiUl", MergeNone, "aarch64_sve_eor3">; -def SVNBSL_N : SInst<"svnbsl[_n_{d}]", "ddda", "csilUcUsUiUl", MergeNone, "aarch64_sve_nbsl">; -def SVXAR_N : SInst<"svxar[_n_{d}]", "dddi", "csilUcUsUiUl", MergeNone, "aarch64_sve_xar", [], [ImmCheck<2, ImmCheckShiftRight, 1>]>; +def SVBCAX : SInst<"svbcax[_{d}]", "dddd", "csilUcUsUiUl", MergeNone, "aarch64_sve_bcax", [IsStreamingCompatible]>; +def SVBSL : SInst<"svbsl[_{d}]", "dddd", "csilUcUsUiUl", MergeNone, "aarch64_sve_bsl", [IsStreamingCompatible]>; +def SVBSL1N : SInst<"svbsl1n[_{d}]", "dddd", "csilUcUsUiUl", MergeNone, "aarch64_sve_bsl1n", [IsStreamingCompatible]>; +def SVBSL2N : SInst<"svbsl2n[_{d}]", "dddd", "csilUcUsUiUl", MergeNone, "aarch64_sve_bsl2n", [IsStreamingCompatible]>; +def SVEOR3 : SInst<"sveor3[_{d}]", "dddd", "csilUcUsUiUl", MergeNone, "aarch64_sve_eor3", [IsStreamingCompatible]>; +def SVNBSL : SInst<"svnbsl[_{d}]", "dddd", "csilUcUsUiUl", MergeNone, "aarch64_sve_nbsl", [IsStreamingCompatible]>; + +def SVBCAX_N : SInst<"svbcax[_n_{d}]", "ddda", "csilUcUsUiUl", MergeNone, "aarch64_sve_bcax", [IsStreamingCompatible]>; +def SVBSL_N : SInst<"svbsl[_n_{d}]", "ddda", "csilUcUsUiUl", MergeNone, "aarch64_sve_bsl", [IsStreamingCompatible]>; +def SVBSL1N_N : SInst<"svbsl1n[_n_{d}]", "ddda", "csilUcUsUiUl", MergeNone, "aarch64_sve_bsl1n", [IsStreamingCompatible]>; +def SVBSL2N_N : SInst<"svbsl2n[_n_{d}]", "ddda", "csilUcUsUiUl", MergeNone, "aarch64_sve_bsl2n", [IsStreamingCompatible]>; +def SVEOR3_N : SInst<"sveor3[_n_{d}]", "ddda", "csilUcUsUiUl", MergeNone, "aarch64_sve_eor3", [IsStreamingCompatible]>; +def SVNBSL_N : SInst<"svnbsl[_n_{d}]", "ddda", "csilUcUsUiUl", MergeNone, "aarch64_sve_nbsl", [IsStreamingCompatible]>; +def SVXAR_N : SInst<"svxar[_n_{d}]", "dddi", "csilUcUsUiUl", MergeNone, "aarch64_sve_xar", [IsStreamingCompatible], [ImmCheck<2, ImmCheckShiftRight, 1>]>; } //////////////////////////////////////////////////////////////////////////////// // SVE2 - Large integer arithmetic let TargetGuard = "sve2" in { -def SVADCLB : SInst<"svadclb[_{d}]", "dddd", "UiUl", MergeNone, "aarch64_sve_adclb">; -def SVADCLT : SInst<"svadclt[_{d}]", "dddd", "UiUl", MergeNone, "aarch64_sve_adclt">; -def SVSBCLB : SInst<"svsbclb[_{d}]", "dddd", "UiUl", MergeNone, "aarch64_sve_sbclb">; -def SVSBCLT : SInst<"svsbclt[_{d}]", "dddd", "UiUl", MergeNone, "aarch64_sve_sbclt">; +def SVADCLB : SInst<"svadclb[_{d}]", "dddd", "UiUl", MergeNone, "aarch64_sve_adclb", [IsStreamingCompatible]>; +def SVADCLT : SInst<"svadclt[_{d}]", "dddd", "UiUl", MergeNone, "aarch64_sve_adclt", [IsStreamingCompatible]>; +def SVSBCLB : SInst<"svsbclb[_{d}]", "dddd", "UiUl", MergeNone, "aarch64_sve_sbclb", [IsStreamingCompatible]>; +def SVSBCLT : SInst<"svsbclt[_{d}]", "dddd", "UiUl", MergeNone, "aarch64_sve_sbclt", [IsStreamingCompatible]>; -def SVADCLB_N : SInst<"svadclb[_n_{d}]", "ddda", "UiUl", MergeNone, "aarch64_sve_adclb">; -def SVADCLT_N : SInst<"svadclt[_n_{d}]", "ddda", "UiUl", MergeNone, "aarch64_sve_adclt">; -def SVSBCLB_N : SInst<"svsbclb[_n_{d}]", "ddda", "UiUl", MergeNone, "aarch64_sve_sbclb">; -def SVSBCLT_N : SInst<"svsbclt[_n_{d}]", "ddda", "UiUl", MergeNone, "aarch64_sve_sbclt">; +def SVADCLB_N : SInst<"svadclb[_n_{d}]", "ddda", "UiUl", MergeNone, "aarch64_sve_adclb", [IsStreamingCompatible]>; +def SVADCLT_N : SInst<"svadclt[_n_{d}]", "ddda", "UiUl", MergeNone, "aarch64_sve_adclt", [IsStreamingCompatible]>; +def SVSBCLB_N : SInst<"svsbclb[_n_{d}]", "ddda", "UiUl", MergeNone, "aarch64_sve_sbclb", [IsStreamingCompatible]>; +def SVSBCLT_N : SInst<"svsbclt[_n_{d}]", "ddda", "UiUl", MergeNone, "aarch64_sve_sbclt", [IsStreamingCompatible]>; } //////////////////////////////////////////////////////////////////////////////// // SVE2 - Multiplication by indexed elements let TargetGuard = "sve2" in { -def SVMLA_LANE_2 : SInst<"svmla_lane[_{d}]", "ddddi", "silUsUiUl", MergeNone, "aarch64_sve_mla_lane", [], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; -def SVMLS_LANE_2 : SInst<"svmls_lane[_{d}]", "ddddi", "silUsUiUl", MergeNone, "aarch64_sve_mls_lane", [], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; -def SVMUL_LANE_2 : SInst<"svmul_lane[_{d}]", "dddi", "silUsUiUl", MergeNone, "aarch64_sve_mul_lane", [], [ImmCheck<2, ImmCheckLaneIndex, 1>]>; +def SVMLA_LANE_2 : SInst<"svmla_lane[_{d}]", "ddddi", "silUsUiUl", MergeNone, "aarch64_sve_mla_lane", [IsStreamingCompatible], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; +def SVMLS_LANE_2 : SInst<"svmls_lane[_{d}]", "ddddi", "silUsUiUl", MergeNone, "aarch64_sve_mls_lane", [IsStreamingCompatible], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; +def SVMUL_LANE_2 : SInst<"svmul_lane[_{d}]", "dddi", "silUsUiUl", MergeNone, "aarch64_sve_mul_lane", [IsStreamingCompatible], [ImmCheck<2, ImmCheckLaneIndex, 1>]>; } //////////////////////////////////////////////////////////////////////////////// // SVE2 - Uniform complex integer arithmetic let TargetGuard = "sve2" in { -def SVCADD : SInst<"svcadd[_{d}]", "dddi", "csilUcUsUiUl", MergeNone, "aarch64_sve_cadd_x", [], [ImmCheck<2, ImmCheckComplexRot90_270>]>; -def SVSQCADD : SInst<"svqcadd[_{d}]", "dddi", "csil", MergeNone, "aarch64_sve_sqcadd_x", [], [ImmCheck<2, ImmCheckComplexRot90_270>]>; -def SVCMLA : SInst<"svcmla[_{d}]", "ddddi", "csilUcUsUiUl", MergeNone, "aarch64_sve_cmla_x", [], [ImmCheck<3, ImmCheckComplexRotAll90>]>; -def SVCMLA_LANE_X : SInst<"svcmla_lane[_{d}]", "ddddii", "siUsUi", MergeNone, "aarch64_sve_cmla_lane_x", [], [ImmCheck<3, ImmCheckLaneIndexCompRotate, 2>, +def SVCADD : SInst<"svcadd[_{d}]", "dddi", "csilUcUsUiUl", MergeNone, "aarch64_sve_cadd_x", [IsStreamingCompatible], [ImmCheck<2, ImmCheckComplexRot90_270>]>; +def SVSQCADD : SInst<"svqcadd[_{d}]", "dddi", "csil", MergeNone, "aarch64_sve_sqcadd_x", [IsStreamingCompatible], [ImmCheck<2, ImmCheckComplexRot90_270>]>; +def SVCMLA : SInst<"svcmla[_{d}]", "ddddi", "csilUcUsUiUl", MergeNone, "aarch64_sve_cmla_x", [IsStreamingCompatible], [ImmCheck<3, ImmCheckComplexRotAll90>]>; +def SVCMLA_LANE_X : SInst<"svcmla_lane[_{d}]", "ddddii", "siUsUi", MergeNone, "aarch64_sve_cmla_lane_x", [IsStreamingCompatible], [ImmCheck<3, ImmCheckLaneIndexCompRotate, 2>, ImmCheck<4, ImmCheckComplexRotAll90>]>; -def SVSQRDCMLAH_X : SInst<"svqrdcmlah[_{d}]", "ddddi", "csil", MergeNone, "aarch64_sve_sqrdcmlah_x", [], [ImmCheck<3, ImmCheckComplexRotAll90>]>; -def SVSQRDCMLAH_LANE_X : SInst<"svqrdcmlah_lane[_{d}]", "ddddii", "si", MergeNone, "aarch64_sve_sqrdcmlah_lane_x", [], [ImmCheck<3, ImmCheckLaneIndexCompRotate, 2>, +def SVSQRDCMLAH_X : SInst<"svqrdcmlah[_{d}]", "ddddi", "csil", MergeNone, "aarch64_sve_sqrdcmlah_x", [IsStreamingCompatible], [ImmCheck<3, ImmCheckComplexRotAll90>]>; +def SVSQRDCMLAH_LANE_X : SInst<"svqrdcmlah_lane[_{d}]", "ddddii", "si", MergeNone, "aarch64_sve_sqrdcmlah_lane_x", [IsStreamingCompatible], [ImmCheck<3, ImmCheckLaneIndexCompRotate, 2>, ImmCheck<4, ImmCheckComplexRotAll90>]>; } @@ -1445,18 +1445,18 @@ def SVSQRDCMLAH_LANE_X : SInst<"svqrdcmlah_lane[_{d}]", "ddddii", "si", // SVE2 - Widening DSP operations multiclass SInstWideDSPAcc { - def : SInst; - def _N : SInst; + def : SInst; + def _N : SInst; } multiclass SInstWideDSPLong { - def : SInst; - def _N : SInst; + def : SInst; + def _N : SInst; } multiclass SInstWideDSPWide { - def : SInst; - def _N : SInst; + def : SInst; + def _N : SInst; } let TargetGuard = "sve2" in { @@ -1505,87 +1505,87 @@ defm SVSUBWB_U : SInstWideDSPWide<"svsubwb", "UsUiUl", "aarch64_sve_usubwb">; defm SVSUBWT_S : SInstWideDSPWide<"svsubwt", "sil", "aarch64_sve_ssubwt">; defm SVSUBWT_U : SInstWideDSPWide<"svsubwt", "UsUiUl", "aarch64_sve_usubwt">; -def SVSHLLB_S_N : SInst<"svshllb[_n_{d}]", "dhi", "sil", MergeNone, "aarch64_sve_sshllb", [], [ImmCheck<1, ImmCheckShiftLeft, 0>]>; -def SVSHLLB_U_N : SInst<"svshllb[_n_{d}]", "dhi", "UsUiUl", MergeNone, "aarch64_sve_ushllb", [], [ImmCheck<1, ImmCheckShiftLeft, 0>]>; -def SVSHLLT_S_N : SInst<"svshllt[_n_{d}]", "dhi", "sil", MergeNone, "aarch64_sve_sshllt", [], [ImmCheck<1, ImmCheckShiftLeft, 0>]>; -def SVSHLLT_U_N : SInst<"svshllt[_n_{d}]", "dhi", "UsUiUl", MergeNone, "aarch64_sve_ushllt", [], [ImmCheck<1, ImmCheckShiftLeft, 0>]>; - -def SVMOVLB_S_N : SInst<"svmovlb[_{d}]", "dh", "sil", MergeNone>; -def SVMOVLB_U_N : SInst<"svmovlb[_{d}]", "dh", "UsUiUl", MergeNone>; -def SVMOVLT_S_N : SInst<"svmovlt[_{d}]", "dh", "sil", MergeNone>; -def SVMOVLT_U_N : SInst<"svmovlt[_{d}]", "dh", "UsUiUl", MergeNone>; - -def SVMLALB_S_LANE : SInst<"svmlalb_lane[_{d}]", "ddhhi", "il", MergeNone, "aarch64_sve_smlalb_lane", [], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; -def SVMLALB_U_LANE : SInst<"svmlalb_lane[_{d}]", "ddhhi", "UiUl", MergeNone, "aarch64_sve_umlalb_lane", [], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; -def SVMLALT_S_LANE : SInst<"svmlalt_lane[_{d}]", "ddhhi", "il", MergeNone, "aarch64_sve_smlalt_lane", [], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; -def SVMLALT_U_LANE : SInst<"svmlalt_lane[_{d}]", "ddhhi", "UiUl", MergeNone, "aarch64_sve_umlalt_lane", [], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; -def SVMLSLB_S_LANE : SInst<"svmlslb_lane[_{d}]", "ddhhi", "il", MergeNone, "aarch64_sve_smlslb_lane", [], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; -def SVMLSLB_U_LANE : SInst<"svmlslb_lane[_{d}]", "ddhhi", "UiUl", MergeNone, "aarch64_sve_umlslb_lane", [], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; -def SVMLSLT_S_LANE : SInst<"svmlslt_lane[_{d}]", "ddhhi", "il", MergeNone, "aarch64_sve_smlslt_lane", [], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; -def SVMLSLT_U_LANE : SInst<"svmlslt_lane[_{d}]", "ddhhi", "UiUl", MergeNone, "aarch64_sve_umlslt_lane", [], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; -def SVMULLB_S_LANE : SInst<"svmullb_lane[_{d}]", "dhhi", "il", MergeNone, "aarch64_sve_smullb_lane", [], [ImmCheck<2, ImmCheckLaneIndex, 1>]>; -def SVMULLB_U_LANE : SInst<"svmullb_lane[_{d}]", "dhhi", "UiUl", MergeNone, "aarch64_sve_umullb_lane", [], [ImmCheck<2, ImmCheckLaneIndex, 1>]>; -def SVMULLT_S_LANE : SInst<"svmullt_lane[_{d}]", "dhhi", "il", MergeNone, "aarch64_sve_smullt_lane", [], [ImmCheck<2, ImmCheckLaneIndex, 1>]>; -def SVMULLT_U_LANE : SInst<"svmullt_lane[_{d}]", "dhhi", "UiUl", MergeNone, "aarch64_sve_umullt_lane", [], [ImmCheck<2, ImmCheckLaneIndex, 1>]>; -def SVQDMLALB_LANE : SInst<"svqdmlalb_lane[_{d}]", "ddhhi", "il", MergeNone, "aarch64_sve_sqdmlalb_lane", [], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; -def SVQDMLALT_LANE : SInst<"svqdmlalt_lane[_{d}]", "ddhhi", "il", MergeNone, "aarch64_sve_sqdmlalt_lane", [], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; -def SVQDMLSLB_LANE : SInst<"svqdmlslb_lane[_{d}]", "ddhhi", "il", MergeNone, "aarch64_sve_sqdmlslb_lane", [], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; -def SVQDMLSLT_LANE : SInst<"svqdmlslt_lane[_{d}]", "ddhhi", "il", MergeNone, "aarch64_sve_sqdmlslt_lane", [], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; -def SVQDMULLB_LANE : SInst<"svqdmullb_lane[_{d}]", "dhhi", "il", MergeNone, "aarch64_sve_sqdmullb_lane", [], [ImmCheck<2, ImmCheckLaneIndex, 1>]>; -def SVQDMULLT_LANE : SInst<"svqdmullt_lane[_{d}]", "dhhi", "il", MergeNone, "aarch64_sve_sqdmullt_lane", [], [ImmCheck<2, ImmCheckLaneIndex, 1>]>; +def SVSHLLB_S_N : SInst<"svshllb[_n_{d}]", "dhi", "sil", MergeNone, "aarch64_sve_sshllb", [IsStreamingCompatible], [ImmCheck<1, ImmCheckShiftLeft, 0>]>; +def SVSHLLB_U_N : SInst<"svshllb[_n_{d}]", "dhi", "UsUiUl", MergeNone, "aarch64_sve_ushllb", [IsStreamingCompatible], [ImmCheck<1, ImmCheckShiftLeft, 0>]>; +def SVSHLLT_S_N : SInst<"svshllt[_n_{d}]", "dhi", "sil", MergeNone, "aarch64_sve_sshllt", [IsStreamingCompatible], [ImmCheck<1, ImmCheckShiftLeft, 0>]>; +def SVSHLLT_U_N : SInst<"svshllt[_n_{d}]", "dhi", "UsUiUl", MergeNone, "aarch64_sve_ushllt", [IsStreamingCompatible], [ImmCheck<1, ImmCheckShiftLeft, 0>]>; + +def SVMOVLB_S_N : SInst<"svmovlb[_{d}]", "dh", "sil", MergeNone, "", [IsStreamingCompatible]>; +def SVMOVLB_U_N : SInst<"svmovlb[_{d}]", "dh", "UsUiUl", MergeNone, "", [IsStreamingCompatible]>; +def SVMOVLT_S_N : SInst<"svmovlt[_{d}]", "dh", "sil", MergeNone, "", [IsStreamingCompatible]>; +def SVMOVLT_U_N : SInst<"svmovlt[_{d}]", "dh", "UsUiUl", MergeNone, "", [IsStreamingCompatible]>; + +def SVMLALB_S_LANE : SInst<"svmlalb_lane[_{d}]", "ddhhi", "il", MergeNone, "aarch64_sve_smlalb_lane", [IsStreamingCompatible], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; +def SVMLALB_U_LANE : SInst<"svmlalb_lane[_{d}]", "ddhhi", "UiUl", MergeNone, "aarch64_sve_umlalb_lane", [IsStreamingCompatible], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; +def SVMLALT_S_LANE : SInst<"svmlalt_lane[_{d}]", "ddhhi", "il", MergeNone, "aarch64_sve_smlalt_lane", [IsStreamingCompatible], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; +def SVMLALT_U_LANE : SInst<"svmlalt_lane[_{d}]", "ddhhi", "UiUl", MergeNone, "aarch64_sve_umlalt_lane", [IsStreamingCompatible], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; +def SVMLSLB_S_LANE : SInst<"svmlslb_lane[_{d}]", "ddhhi", "il", MergeNone, "aarch64_sve_smlslb_lane", [IsStreamingCompatible], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; +def SVMLSLB_U_LANE : SInst<"svmlslb_lane[_{d}]", "ddhhi", "UiUl", MergeNone, "aarch64_sve_umlslb_lane", [IsStreamingCompatible], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; +def SVMLSLT_S_LANE : SInst<"svmlslt_lane[_{d}]", "ddhhi", "il", MergeNone, "aarch64_sve_smlslt_lane", [IsStreamingCompatible], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; +def SVMLSLT_U_LANE : SInst<"svmlslt_lane[_{d}]", "ddhhi", "UiUl", MergeNone, "aarch64_sve_umlslt_lane", [IsStreamingCompatible], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; +def SVMULLB_S_LANE : SInst<"svmullb_lane[_{d}]", "dhhi", "il", MergeNone, "aarch64_sve_smullb_lane", [IsStreamingCompatible], [ImmCheck<2, ImmCheckLaneIndex, 1>]>; +def SVMULLB_U_LANE : SInst<"svmullb_lane[_{d}]", "dhhi", "UiUl", MergeNone, "aarch64_sve_umullb_lane", [IsStreamingCompatible], [ImmCheck<2, ImmCheckLaneIndex, 1>]>; +def SVMULLT_S_LANE : SInst<"svmullt_lane[_{d}]", "dhhi", "il", MergeNone, "aarch64_sve_smullt_lane", [IsStreamingCompatible], [ImmCheck<2, ImmCheckLaneIndex, 1>]>; +def SVMULLT_U_LANE : SInst<"svmullt_lane[_{d}]", "dhhi", "UiUl", MergeNone, "aarch64_sve_umullt_lane", [IsStreamingCompatible], [ImmCheck<2, ImmCheckLaneIndex, 1>]>; +def SVQDMLALB_LANE : SInst<"svqdmlalb_lane[_{d}]", "ddhhi", "il", MergeNone, "aarch64_sve_sqdmlalb_lane", [IsStreamingCompatible], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; +def SVQDMLALT_LANE : SInst<"svqdmlalt_lane[_{d}]", "ddhhi", "il", MergeNone, "aarch64_sve_sqdmlalt_lane", [IsStreamingCompatible], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; +def SVQDMLSLB_LANE : SInst<"svqdmlslb_lane[_{d}]", "ddhhi", "il", MergeNone, "aarch64_sve_sqdmlslb_lane", [IsStreamingCompatible], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; +def SVQDMLSLT_LANE : SInst<"svqdmlslt_lane[_{d}]", "ddhhi", "il", MergeNone, "aarch64_sve_sqdmlslt_lane", [IsStreamingCompatible], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; +def SVQDMULLB_LANE : SInst<"svqdmullb_lane[_{d}]", "dhhi", "il", MergeNone, "aarch64_sve_sqdmullb_lane", [IsStreamingCompatible], [ImmCheck<2, ImmCheckLaneIndex, 1>]>; +def SVQDMULLT_LANE : SInst<"svqdmullt_lane[_{d}]", "dhhi", "il", MergeNone, "aarch64_sve_sqdmullt_lane", [IsStreamingCompatible], [ImmCheck<2, ImmCheckLaneIndex, 1>]>; } //////////////////////////////////////////////////////////////////////////////// // SVE2 - Narrowing DSP operations let TargetGuard = "sve2" in { -def SVADDHNB : SInst<"svaddhnb[_{d}]", "hdd", "silUsUiUl", MergeNone, "aarch64_sve_addhnb">; -def SVADDHNT : SInst<"svaddhnt[_{d}]", "hhdd", "silUsUiUl", MergeNone, "aarch64_sve_addhnt">; -def SVRADDHNB : SInst<"svraddhnb[_{d}]", "hdd", "silUsUiUl", MergeNone, "aarch64_sve_raddhnb">; -def SVRADDHNT : SInst<"svraddhnt[_{d}]", "hhdd", "silUsUiUl", MergeNone, "aarch64_sve_raddhnt">; -def SVRSUBHNB : SInst<"svrsubhnb[_{d}]", "hdd", "silUsUiUl", MergeNone, "aarch64_sve_rsubhnb">; -def SVRSUBHNT : SInst<"svrsubhnt[_{d}]", "hhdd", "silUsUiUl", MergeNone, "aarch64_sve_rsubhnt">; -def SVSUBHNB : SInst<"svsubhnb[_{d}]", "hdd", "silUsUiUl", MergeNone, "aarch64_sve_subhnb">; -def SVSUBHNT : SInst<"svsubhnt[_{d}]", "hhdd", "silUsUiUl", MergeNone, "aarch64_sve_subhnt">; - -def SVADDHNB_N : SInst<"svaddhnb[_n_{d}]", "hda", "silUsUiUl", MergeNone, "aarch64_sve_addhnb">; -def SVADDHNT_N : SInst<"svaddhnt[_n_{d}]", "hhda", "silUsUiUl", MergeNone, "aarch64_sve_addhnt">; -def SVRADDHNB_N : SInst<"svraddhnb[_n_{d}]", "hda", "silUsUiUl", MergeNone, "aarch64_sve_raddhnb">; -def SVRADDHNT_N : SInst<"svraddhnt[_n_{d}]", "hhda", "silUsUiUl", MergeNone, "aarch64_sve_raddhnt">; -def SVRSUBHNB_N : SInst<"svrsubhnb[_n_{d}]", "hda", "silUsUiUl", MergeNone, "aarch64_sve_rsubhnb">; -def SVRSUBHNT_N : SInst<"svrsubhnt[_n_{d}]", "hhda", "silUsUiUl", MergeNone, "aarch64_sve_rsubhnt">; -def SVSUBHNB_N : SInst<"svsubhnb[_n_{d}]", "hda", "silUsUiUl", MergeNone, "aarch64_sve_subhnb">; -def SVSUBHNT_N : SInst<"svsubhnt[_n_{d}]", "hhda", "silUsUiUl", MergeNone, "aarch64_sve_subhnt">; - -def SVSHRNB : SInst<"svshrnb[_n_{d}]", "hdi", "silUsUiUl", MergeNone, "aarch64_sve_shrnb", [], [ImmCheck<1, ImmCheckShiftRightNarrow, 0>]>; -def SVRSHRNB : SInst<"svrshrnb[_n_{d}]", "hdi", "silUsUiUl", MergeNone, "aarch64_sve_rshrnb", [], [ImmCheck<1, ImmCheckShiftRightNarrow, 0>]>; -def SVQSHRUNB : SInst<"svqshrunb[_n_{d}]", "edi", "sil", MergeNone, "aarch64_sve_sqshrunb", [], [ImmCheck<1, ImmCheckShiftRightNarrow, 0>]>; -def SVQRSHRUNB : SInst<"svqrshrunb[_n_{d}]", "edi", "sil", MergeNone, "aarch64_sve_sqrshrunb", [], [ImmCheck<1, ImmCheckShiftRightNarrow, 0>]>; -def SVQSHRNB_S : SInst<"svqshrnb[_n_{d}]", "hdi", "sil", MergeNone, "aarch64_sve_sqshrnb", [], [ImmCheck<1, ImmCheckShiftRightNarrow, 0>]>; -def SVQSHRNB_U : SInst<"svqshrnb[_n_{d}]", "hdi", "UsUiUl", MergeNone, "aarch64_sve_uqshrnb", [], [ImmCheck<1, ImmCheckShiftRightNarrow, 0>]>; -def SVQRSHRNB_S : SInst<"svqrshrnb[_n_{d}]", "hdi", "sil", MergeNone, "aarch64_sve_sqrshrnb", [], [ImmCheck<1, ImmCheckShiftRightNarrow, 0>]>; -def SVQRSHRNB_U : SInst<"svqrshrnb[_n_{d}]", "hdi", "UsUiUl", MergeNone, "aarch64_sve_uqrshrnb", [], [ImmCheck<1, ImmCheckShiftRightNarrow, 0>]>; - -def SVSHRNT : SInst<"svshrnt[_n_{d}]", "hhdi", "silUsUiUl", MergeNone, "aarch64_sve_shrnt", [], [ImmCheck<2, ImmCheckShiftRightNarrow, 1>]>; -def SVRSHRNT : SInst<"svrshrnt[_n_{d}]", "hhdi", "silUsUiUl", MergeNone, "aarch64_sve_rshrnt", [], [ImmCheck<2, ImmCheckShiftRightNarrow, 1>]>; -def SVQSHRUNT : SInst<"svqshrunt[_n_{d}]", "eedi", "sil", MergeNone, "aarch64_sve_sqshrunt", [], [ImmCheck<2, ImmCheckShiftRightNarrow, 1>]>; -def SVQRSHRUNT : SInst<"svqrshrunt[_n_{d}]", "eedi", "sil", MergeNone, "aarch64_sve_sqrshrunt", [], [ImmCheck<2, ImmCheckShiftRightNarrow, 1>]>; -def SVQSHRNT_S : SInst<"svqshrnt[_n_{d}]", "hhdi", "sil", MergeNone, "aarch64_sve_sqshrnt", [], [ImmCheck<2, ImmCheckShiftRightNarrow, 1>]>; -def SVQSHRNT_U : SInst<"svqshrnt[_n_{d}]", "hhdi", "UsUiUl", MergeNone, "aarch64_sve_uqshrnt", [], [ImmCheck<2, ImmCheckShiftRightNarrow, 1>]>; -def SVQRSHRNT_S : SInst<"svqrshrnt[_n_{d}]", "hhdi", "sil", MergeNone, "aarch64_sve_sqrshrnt", [], [ImmCheck<2, ImmCheckShiftRightNarrow, 1>]>; -def SVQRSHRNT_U : SInst<"svqrshrnt[_n_{d}]", "hhdi", "UsUiUl", MergeNone, "aarch64_sve_uqrshrnt", [], [ImmCheck<2, ImmCheckShiftRightNarrow, 1>]>; +def SVADDHNB : SInst<"svaddhnb[_{d}]", "hdd", "silUsUiUl", MergeNone, "aarch64_sve_addhnb", [IsStreamingCompatible]>; +def SVADDHNT : SInst<"svaddhnt[_{d}]", "hhdd", "silUsUiUl", MergeNone, "aarch64_sve_addhnt", [IsStreamingCompatible]>; +def SVRADDHNB : SInst<"svraddhnb[_{d}]", "hdd", "silUsUiUl", MergeNone, "aarch64_sve_raddhnb", [IsStreamingCompatible]>; +def SVRADDHNT : SInst<"svraddhnt[_{d}]", "hhdd", "silUsUiUl", MergeNone, "aarch64_sve_raddhnt", [IsStreamingCompatible]>; +def SVRSUBHNB : SInst<"svrsubhnb[_{d}]", "hdd", "silUsUiUl", MergeNone, "aarch64_sve_rsubhnb", [IsStreamingCompatible]>; +def SVRSUBHNT : SInst<"svrsubhnt[_{d}]", "hhdd", "silUsUiUl", MergeNone, "aarch64_sve_rsubhnt", [IsStreamingCompatible]>; +def SVSUBHNB : SInst<"svsubhnb[_{d}]", "hdd", "silUsUiUl", MergeNone, "aarch64_sve_subhnb", [IsStreamingCompatible]>; +def SVSUBHNT : SInst<"svsubhnt[_{d}]", "hhdd", "silUsUiUl", MergeNone, "aarch64_sve_subhnt", [IsStreamingCompatible]>; + +def SVADDHNB_N : SInst<"svaddhnb[_n_{d}]", "hda", "silUsUiUl", MergeNone, "aarch64_sve_addhnb", [IsStreamingCompatible]>; +def SVADDHNT_N : SInst<"svaddhnt[_n_{d}]", "hhda", "silUsUiUl", MergeNone, "aarch64_sve_addhnt", [IsStreamingCompatible]>; +def SVRADDHNB_N : SInst<"svraddhnb[_n_{d}]", "hda", "silUsUiUl", MergeNone, "aarch64_sve_raddhnb", [IsStreamingCompatible]>; +def SVRADDHNT_N : SInst<"svraddhnt[_n_{d}]", "hhda", "silUsUiUl", MergeNone, "aarch64_sve_raddhnt", [IsStreamingCompatible]>; +def SVRSUBHNB_N : SInst<"svrsubhnb[_n_{d}]", "hda", "silUsUiUl", MergeNone, "aarch64_sve_rsubhnb", [IsStreamingCompatible]>; +def SVRSUBHNT_N : SInst<"svrsubhnt[_n_{d}]", "hhda", "silUsUiUl", MergeNone, "aarch64_sve_rsubhnt", [IsStreamingCompatible]>; +def SVSUBHNB_N : SInst<"svsubhnb[_n_{d}]", "hda", "silUsUiUl", MergeNone, "aarch64_sve_subhnb", [IsStreamingCompatible]>; +def SVSUBHNT_N : SInst<"svsubhnt[_n_{d}]", "hhda", "silUsUiUl", MergeNone, "aarch64_sve_subhnt", [IsStreamingCompatible]>; + +def SVSHRNB : SInst<"svshrnb[_n_{d}]", "hdi", "silUsUiUl", MergeNone, "aarch64_sve_shrnb", [IsStreamingCompatible], [ImmCheck<1, ImmCheckShiftRightNarrow, 0>]>; +def SVRSHRNB : SInst<"svrshrnb[_n_{d}]", "hdi", "silUsUiUl", MergeNone, "aarch64_sve_rshrnb", [IsStreamingCompatible], [ImmCheck<1, ImmCheckShiftRightNarrow, 0>]>; +def SVQSHRUNB : SInst<"svqshrunb[_n_{d}]", "edi", "sil", MergeNone, "aarch64_sve_sqshrunb", [IsStreamingCompatible], [ImmCheck<1, ImmCheckShiftRightNarrow, 0>]>; +def SVQRSHRUNB : SInst<"svqrshrunb[_n_{d}]", "edi", "sil", MergeNone, "aarch64_sve_sqrshrunb", [IsStreamingCompatible], [ImmCheck<1, ImmCheckShiftRightNarrow, 0>]>; +def SVQSHRNB_S : SInst<"svqshrnb[_n_{d}]", "hdi", "sil", MergeNone, "aarch64_sve_sqshrnb", [IsStreamingCompatible], [ImmCheck<1, ImmCheckShiftRightNarrow, 0>]>; +def SVQSHRNB_U : SInst<"svqshrnb[_n_{d}]", "hdi", "UsUiUl", MergeNone, "aarch64_sve_uqshrnb", [IsStreamingCompatible], [ImmCheck<1, ImmCheckShiftRightNarrow, 0>]>; +def SVQRSHRNB_S : SInst<"svqrshrnb[_n_{d}]", "hdi", "sil", MergeNone, "aarch64_sve_sqrshrnb", [IsStreamingCompatible], [ImmCheck<1, ImmCheckShiftRightNarrow, 0>]>; +def SVQRSHRNB_U : SInst<"svqrshrnb[_n_{d}]", "hdi", "UsUiUl", MergeNone, "aarch64_sve_uqrshrnb", [IsStreamingCompatible], [ImmCheck<1, ImmCheckShiftRightNarrow, 0>]>; + +def SVSHRNT : SInst<"svshrnt[_n_{d}]", "hhdi", "silUsUiUl", MergeNone, "aarch64_sve_shrnt", [IsStreamingCompatible], [ImmCheck<2, ImmCheckShiftRightNarrow, 1>]>; +def SVRSHRNT : SInst<"svrshrnt[_n_{d}]", "hhdi", "silUsUiUl", MergeNone, "aarch64_sve_rshrnt", [IsStreamingCompatible], [ImmCheck<2, ImmCheckShiftRightNarrow, 1>]>; +def SVQSHRUNT : SInst<"svqshrunt[_n_{d}]", "eedi", "sil", MergeNone, "aarch64_sve_sqshrunt", [IsStreamingCompatible], [ImmCheck<2, ImmCheckShiftRightNarrow, 1>]>; +def SVQRSHRUNT : SInst<"svqrshrunt[_n_{d}]", "eedi", "sil", MergeNone, "aarch64_sve_sqrshrunt", [IsStreamingCompatible], [ImmCheck<2, ImmCheckShiftRightNarrow, 1>]>; +def SVQSHRNT_S : SInst<"svqshrnt[_n_{d}]", "hhdi", "sil", MergeNone, "aarch64_sve_sqshrnt", [IsStreamingCompatible], [ImmCheck<2, ImmCheckShiftRightNarrow, 1>]>; +def SVQSHRNT_U : SInst<"svqshrnt[_n_{d}]", "hhdi", "UsUiUl", MergeNone, "aarch64_sve_uqshrnt", [IsStreamingCompatible], [ImmCheck<2, ImmCheckShiftRightNarrow, 1>]>; +def SVQRSHRNT_S : SInst<"svqrshrnt[_n_{d}]", "hhdi", "sil", MergeNone, "aarch64_sve_sqrshrnt", [IsStreamingCompatible], [ImmCheck<2, ImmCheckShiftRightNarrow, 1>]>; +def SVQRSHRNT_U : SInst<"svqrshrnt[_n_{d}]", "hhdi", "UsUiUl", MergeNone, "aarch64_sve_uqrshrnt", [IsStreamingCompatible], [ImmCheck<2, ImmCheckShiftRightNarrow, 1>]>; } //////////////////////////////////////////////////////////////////////////////// // SVE2 - Unary narrowing operations let TargetGuard = "sve2" in { -def SVQXTNB_S : SInst<"svqxtnb[_{d}]", "hd", "sil", MergeNone, "aarch64_sve_sqxtnb">; -def SVQXTNB_U : SInst<"svqxtnb[_{d}]", "hd", "UsUiUl", MergeNone, "aarch64_sve_uqxtnb">; -def SVQXTUNB_S : SInst<"svqxtunb[_{d}]", "ed", "sil", MergeNone, "aarch64_sve_sqxtunb">; +def SVQXTNB_S : SInst<"svqxtnb[_{d}]", "hd", "sil", MergeNone, "aarch64_sve_sqxtnb", [IsStreamingCompatible]>; +def SVQXTNB_U : SInst<"svqxtnb[_{d}]", "hd", "UsUiUl", MergeNone, "aarch64_sve_uqxtnb", [IsStreamingCompatible]>; +def SVQXTUNB_S : SInst<"svqxtunb[_{d}]", "ed", "sil", MergeNone, "aarch64_sve_sqxtunb", [IsStreamingCompatible]>; -def SVQXTNT_S : SInst<"svqxtnt[_{d}]", "hhd", "sil", MergeNone, "aarch64_sve_sqxtnt">; -def SVQXTNT_U : SInst<"svqxtnt[_{d}]", "hhd", "UsUiUl", MergeNone, "aarch64_sve_uqxtnt">; -def SVQXTUNT_S : SInst<"svqxtunt[_{d}]", "eed", "sil", MergeNone, "aarch64_sve_sqxtunt">; +def SVQXTNT_S : SInst<"svqxtnt[_{d}]", "hhd", "sil", MergeNone, "aarch64_sve_sqxtnt", [IsStreamingCompatible]>; +def SVQXTNT_U : SInst<"svqxtnt[_{d}]", "hhd", "UsUiUl", MergeNone, "aarch64_sve_uqxtnt", [IsStreamingCompatible]>; +def SVQXTUNT_S : SInst<"svqxtunt[_{d}]", "eed", "sil", MergeNone, "aarch64_sve_sqxtunt", [IsStreamingCompatible]>; } //////////////////////////////////////////////////////////////////////////////// @@ -1726,18 +1726,18 @@ def SVSTNT1W_SCATTER_INDEX_S : MInst<"svstnt1w_scatter[_{2}base]_index[_{d}]", " // SVE2 - Polynomial arithmetic let TargetGuard = "sve2" in { -def SVEORBT : SInst<"sveorbt[_{d}]", "dddd", "csilUcUsUiUl", MergeNone, "aarch64_sve_eorbt">; -def SVEORBT_N : SInst<"sveorbt[_n_{d}]", "ddda", "csilUcUsUiUl", MergeNone, "aarch64_sve_eorbt">; -def SVEORTB : SInst<"sveortb[_{d}]", "dddd", "csilUcUsUiUl", MergeNone, "aarch64_sve_eortb">; -def SVEORTB_N : SInst<"sveortb[_n_{d}]", "ddda", "csilUcUsUiUl", MergeNone, "aarch64_sve_eortb">; -def SVPMUL : SInst<"svpmul[_{d}]", "ddd", "Uc", MergeNone, "aarch64_sve_pmul">; -def SVPMUL_N : SInst<"svpmul[_n_{d}]", "dda", "Uc", MergeNone, "aarch64_sve_pmul">; -def SVPMULLB : SInst<"svpmullb[_{d}]", "dhh", "UsUl", MergeNone>; -def SVPMULLB_N : SInst<"svpmullb[_n_{d}]", "dhR", "UsUl", MergeNone>; -def SVPMULLB_PAIR : SInst<"svpmullb_pair[_{d}]", "ddd", "UcUi", MergeNone, "aarch64_sve_pmullb_pair">; -def SVPMULLB_PAIR_N : SInst<"svpmullb_pair[_n_{d}]", "dda", "UcUi", MergeNone, "aarch64_sve_pmullb_pair">; -def SVPMULLT : SInst<"svpmullt[_{d}]", "dhh", "UsUl", MergeNone>; -def SVPMULLT_N : SInst<"svpmullt[_n_{d}]", "dhR", "UsUl", MergeNone>; +def SVEORBT : SInst<"sveorbt[_{d}]", "dddd", "csilUcUsUiUl", MergeNone, "aarch64_sve_eorbt", [IsStreamingCompatible]>; +def SVEORBT_N : SInst<"sveorbt[_n_{d}]", "ddda", "csilUcUsUiUl", MergeNone, "aarch64_sve_eorbt", [IsStreamingCompatible]>; +def SVEORTB : SInst<"sveortb[_{d}]", "dddd", "csilUcUsUiUl", MergeNone, "aarch64_sve_eortb", [IsStreamingCompatible]>; +def SVEORTB_N : SInst<"sveortb[_n_{d}]", "ddda", "csilUcUsUiUl", MergeNone, "aarch64_sve_eortb", [IsStreamingCompatible]>; +def SVPMUL : SInst<"svpmul[_{d}]", "ddd", "Uc", MergeNone, "aarch64_sve_pmul", [IsStreamingCompatible]>; +def SVPMUL_N : SInst<"svpmul[_n_{d}]", "dda", "Uc", MergeNone, "aarch64_sve_pmul", [IsStreamingCompatible]>; +def SVPMULLB : SInst<"svpmullb[_{d}]", "dhh", "UsUl", MergeNone, "", [IsStreamingCompatible]>; +def SVPMULLB_N : SInst<"svpmullb[_n_{d}]", "dhR", "UsUl", MergeNone, "", [IsStreamingCompatible]>; +def SVPMULLB_PAIR : SInst<"svpmullb_pair[_{d}]", "ddd", "UcUi", MergeNone, "aarch64_sve_pmullb_pair", [IsStreamingCompatible]>; +def SVPMULLB_PAIR_N : SInst<"svpmullb_pair[_n_{d}]", "dda", "UcUi", MergeNone, "aarch64_sve_pmullb_pair", [IsStreamingCompatible]>; +def SVPMULLT : SInst<"svpmullt[_{d}]", "dhh", "UsUl", MergeNone, "", [IsStreamingCompatible]>; +def SVPMULLT_N : SInst<"svpmullt[_n_{d}]", "dhR", "UsUl", MergeNone, "", [IsStreamingCompatible]>; def SVPMULLT_PAIR : SInst<"svpmullt_pair[_{d}]", "ddd", "UcUi", MergeNone, "aarch64_sve_pmullt_pair">; def SVPMULLT_PAIR_N : SInst<"svpmullt_pair[_n_{d}]", "dda", "UcUi", MergeNone, "aarch64_sve_pmullt_pair">; } @@ -1746,8 +1746,8 @@ def SVPMULLT_PAIR_N : SInst<"svpmullt_pair[_n_{d}]", "dda", "UcUi", Mer // SVE2 - Complex integer dot product let TargetGuard = "sve2" in { -def SVCDOT : SInst<"svcdot[_{d}]", "ddqqi", "il", MergeNone, "aarch64_sve_cdot", [], [ImmCheck<3, ImmCheckComplexRotAll90>]>; -def SVCDOT_LANE : SInst<"svcdot_lane[_{d}]", "ddqqii", "il", MergeNone, "aarch64_sve_cdot_lane", [], [ImmCheck<4, ImmCheckComplexRotAll90>, +def SVCDOT : SInst<"svcdot[_{d}]", "ddqqi", "il", MergeNone, "aarch64_sve_cdot", [IsStreamingCompatible], [ImmCheck<3, ImmCheckComplexRotAll90>]>; +def SVCDOT_LANE : SInst<"svcdot_lane[_{d}]", "ddqqii", "il", MergeNone, "aarch64_sve_cdot_lane", [IsStreamingCompatible], [ImmCheck<4, ImmCheckComplexRotAll90>, ImmCheck<3, ImmCheckLaneIndexDot, 2>]>; } @@ -1755,27 +1755,27 @@ def SVCDOT_LANE : SInst<"svcdot_lane[_{d}]", "ddqqii", "il", MergeNone, "aarch // SVE2 - Floating-point widening multiply-accumulate let TargetGuard = "sve2" in { -def SVMLALB_F : SInst<"svmlalb[_{d}]", "ddhh", "f", MergeNone, "aarch64_sve_fmlalb">; -def SVMLALB_F_N : SInst<"svmlalb[_n_{d}]", "ddhR", "f", MergeNone, "aarch64_sve_fmlalb">; -def SVMLALB_F_LANE : SInst<"svmlalb_lane[_{d}]", "ddhhi", "f", MergeNone, "aarch64_sve_fmlalb_lane", [], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; -def SVMLALT_F : SInst<"svmlalt[_{d}]", "ddhh", "f", MergeNone, "aarch64_sve_fmlalt">; -def SVMLALT_F_N : SInst<"svmlalt[_n_{d}]", "ddhR", "f", MergeNone, "aarch64_sve_fmlalt">; -def SVMLALT_F_LANE : SInst<"svmlalt_lane[_{d}]", "ddhhi", "f", MergeNone, "aarch64_sve_fmlalt_lane", [], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; -def SVMLSLB_F : SInst<"svmlslb[_{d}]", "ddhh", "f", MergeNone, "aarch64_sve_fmlslb">; -def SVMLSLB_F_N : SInst<"svmlslb[_n_{d}]", "ddhR", "f", MergeNone, "aarch64_sve_fmlslb">; -def SVMLSLB_F_LANE : SInst<"svmlslb_lane[_{d}]", "ddhhi", "f", MergeNone, "aarch64_sve_fmlslb_lane", [], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; -def SVMLSLT_F : SInst<"svmlslt[_{d}]", "ddhh", "f", MergeNone, "aarch64_sve_fmlslt">; -def SVMLSLT_F_N : SInst<"svmlslt[_n_{d}]", "ddhR", "f", MergeNone, "aarch64_sve_fmlslt">; -def SVMLSLT_F_LANE : SInst<"svmlslt_lane[_{d}]", "ddhhi", "f", MergeNone, "aarch64_sve_fmlslt_lane", [], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; +def SVMLALB_F : SInst<"svmlalb[_{d}]", "ddhh", "f", MergeNone, "aarch64_sve_fmlalb", [IsStreamingCompatible]>; +def SVMLALB_F_N : SInst<"svmlalb[_n_{d}]", "ddhR", "f", MergeNone, "aarch64_sve_fmlalb", [IsStreamingCompatible]>; +def SVMLALB_F_LANE : SInst<"svmlalb_lane[_{d}]", "ddhhi", "f", MergeNone, "aarch64_sve_fmlalb_lane", [IsStreamingCompatible], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; +def SVMLALT_F : SInst<"svmlalt[_{d}]", "ddhh", "f", MergeNone, "aarch64_sve_fmlalt", [IsStreamingCompatible]>; +def SVMLALT_F_N : SInst<"svmlalt[_n_{d}]", "ddhR", "f", MergeNone, "aarch64_sve_fmlalt", [IsStreamingCompatible]>; +def SVMLALT_F_LANE : SInst<"svmlalt_lane[_{d}]", "ddhhi", "f", MergeNone, "aarch64_sve_fmlalt_lane", [IsStreamingCompatible], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; +def SVMLSLB_F : SInst<"svmlslb[_{d}]", "ddhh", "f", MergeNone, "aarch64_sve_fmlslb", [IsStreamingCompatible]>; +def SVMLSLB_F_N : SInst<"svmlslb[_n_{d}]", "ddhR", "f", MergeNone, "aarch64_sve_fmlslb", [IsStreamingCompatible]>; +def SVMLSLB_F_LANE : SInst<"svmlslb_lane[_{d}]", "ddhhi", "f", MergeNone, "aarch64_sve_fmlslb_lane", [IsStreamingCompatible], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; +def SVMLSLT_F : SInst<"svmlslt[_{d}]", "ddhh", "f", MergeNone, "aarch64_sve_fmlslt", [IsStreamingCompatible]>; +def SVMLSLT_F_N : SInst<"svmlslt[_n_{d}]", "ddhR", "f", MergeNone, "aarch64_sve_fmlslt", [IsStreamingCompatible]>; +def SVMLSLT_F_LANE : SInst<"svmlslt_lane[_{d}]", "ddhhi", "f", MergeNone, "aarch64_sve_fmlslt_lane", [IsStreamingCompatible], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; } //////////////////////////////////////////////////////////////////////////////// // SVE2 - Floating-point integer binary logarithm let TargetGuard = "sve2" in { -def SVLOGB_M : SInst<"svlogb[_{d}]", "xxPd", "hfd", MergeOp1, "aarch64_sve_flogb">; -def SVLOGB_X : SInst<"svlogb[_{d}]", "xPd", "hfd", MergeAnyExp, "aarch64_sve_flogb">; -def SVLOGB_Z : SInst<"svlogb[_{d}]", "xPd", "hfd", MergeZeroExp, "aarch64_sve_flogb">; +def SVLOGB_M : SInst<"svlogb[_{d}]", "xxPd", "hfd", MergeOp1, "aarch64_sve_flogb", [IsStreamingCompatible]>; +def SVLOGB_X : SInst<"svlogb[_{d}]", "xPd", "hfd", MergeAnyExp, "aarch64_sve_flogb", [IsStreamingCompatible]>; +def SVLOGB_Z : SInst<"svlogb[_{d}]", "xPd", "hfd", MergeZeroExp, "aarch64_sve_flogb", [IsStreamingCompatible]>; } //////////////////////////////////////////////////////////////////////////////// @@ -1797,32 +1797,32 @@ def SVNMATCH : SInst<"svnmatch[_{d}]", "PPdd", "csUcUs", MergeNone, "aarch64_sve //////////////////////////////////////////////////////////////////////////////// // SVE2 - Contiguous conflict detection let TargetGuard = "sve2" in { -def SVWHILERW_B : SInst<"svwhilerw[_{1}]", "Pcc", "cUc", MergeNone, "aarch64_sve_whilerw_b", [IsOverloadWhileRW]>; -def SVWHILERW_H : SInst<"svwhilerw[_{1}]", "Pcc", "sUsh", MergeNone, "aarch64_sve_whilerw_h", [IsOverloadWhileRW]>; -def SVWHILERW_S : SInst<"svwhilerw[_{1}]", "Pcc", "iUif", MergeNone, "aarch64_sve_whilerw_s", [IsOverloadWhileRW]>; -def SVWHILERW_D : SInst<"svwhilerw[_{1}]", "Pcc", "lUld", MergeNone, "aarch64_sve_whilerw_d", [IsOverloadWhileRW]>; +def SVWHILERW_B : SInst<"svwhilerw[_{1}]", "Pcc", "cUc", MergeNone, "aarch64_sve_whilerw_b", [IsOverloadWhileRW, IsStreamingCompatible]>; +def SVWHILERW_H : SInst<"svwhilerw[_{1}]", "Pcc", "sUsh", MergeNone, "aarch64_sve_whilerw_h", [IsOverloadWhileRW, IsStreamingCompatible]>; +def SVWHILERW_S : SInst<"svwhilerw[_{1}]", "Pcc", "iUif", MergeNone, "aarch64_sve_whilerw_s", [IsOverloadWhileRW, IsStreamingCompatible]>; +def SVWHILERW_D : SInst<"svwhilerw[_{1}]", "Pcc", "lUld", MergeNone, "aarch64_sve_whilerw_d", [IsOverloadWhileRW, IsStreamingCompatible]>; -def SVWHILEWR_B : SInst<"svwhilewr[_{1}]", "Pcc", "cUc", MergeNone, "aarch64_sve_whilewr_b", [IsOverloadWhileRW]>; -def SVWHILEWR_H : SInst<"svwhilewr[_{1}]", "Pcc", "sUsh", MergeNone, "aarch64_sve_whilewr_h", [IsOverloadWhileRW]>; -def SVWHILEWR_S : SInst<"svwhilewr[_{1}]", "Pcc", "iUif", MergeNone, "aarch64_sve_whilewr_s", [IsOverloadWhileRW]>; -def SVWHILEWR_D : SInst<"svwhilewr[_{1}]", "Pcc", "lUld", MergeNone, "aarch64_sve_whilewr_d", [IsOverloadWhileRW]>; +def SVWHILEWR_B : SInst<"svwhilewr[_{1}]", "Pcc", "cUc", MergeNone, "aarch64_sve_whilewr_b", [IsOverloadWhileRW, IsStreamingCompatible]>; +def SVWHILEWR_H : SInst<"svwhilewr[_{1}]", "Pcc", "sUsh", MergeNone, "aarch64_sve_whilewr_h", [IsOverloadWhileRW, IsStreamingCompatible]>; +def SVWHILEWR_S : SInst<"svwhilewr[_{1}]", "Pcc", "iUif", MergeNone, "aarch64_sve_whilewr_s", [IsOverloadWhileRW, IsStreamingCompatible]>; +def SVWHILEWR_D : SInst<"svwhilewr[_{1}]", "Pcc", "lUld", MergeNone, "aarch64_sve_whilewr_d", [IsOverloadWhileRW, IsStreamingCompatible]>; } let TargetGuard = "sve2,bf16" in { -def SVWHILERW_H_BF16 : SInst<"svwhilerw[_{1}]", "Pcc", "b", MergeNone, "aarch64_sve_whilerw_h", [IsOverloadWhileRW]>; -def SVWHILEWR_H_BF16 : SInst<"svwhilewr[_{1}]", "Pcc", "b", MergeNone, "aarch64_sve_whilewr_h", [IsOverloadWhileRW]>; +def SVWHILERW_H_BF16 : SInst<"svwhilerw[_{1}]", "Pcc", "b", MergeNone, "aarch64_sve_whilerw_h", [IsOverloadWhileRW, IsStreamingCompatible]>; +def SVWHILEWR_H_BF16 : SInst<"svwhilewr[_{1}]", "Pcc", "b", MergeNone, "aarch64_sve_whilewr_h", [IsOverloadWhileRW, IsStreamingCompatible]>; } //////////////////////////////////////////////////////////////////////////////// // SVE2 - Extended table lookup/permute let TargetGuard = "sve2" in { -def SVTBL2 : SInst<"svtbl2[_{d}]", "d2u", "csilUcUsUiUlhfd", MergeNone>; -def SVTBX : SInst<"svtbx[_{d}]", "dddu", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_tbx">; +def SVTBL2 : SInst<"svtbl2[_{d}]", "d2u", "csilUcUsUiUlhfd", MergeNone, "", [IsStreamingCompatible]>; +def SVTBX : SInst<"svtbx[_{d}]", "dddu", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_tbx", [IsStreamingCompatible]>; } let TargetGuard = "sve2,bf16" in { -def SVTBL2_BF16 : SInst<"svtbl2[_{d}]", "d2u", "b", MergeNone>; -def SVTBX_BF16 : SInst<"svtbx[_{d}]", "dddu", "b", MergeNone, "aarch64_sve_tbx">; +def SVTBL2_BF16 : SInst<"svtbl2[_{d}]", "d2u", "b", MergeNone, "", [IsStreamingCompatible]>; +def SVTBX_BF16 : SInst<"svtbx[_{d}]", "dddu", "b", MergeNone, "aarch64_sve_tbx", [IsStreamingCompatible]>; } //////////////////////////////////////////////////////////////////////////////// @@ -1868,3 +1868,15 @@ let TargetGuard = "sve2p1" in { def SVSCLAMP : SInst<"svclamp[_{d}]", "dddd", "csil", MergeNone, "aarch64_sve_sclamp", [], []>; def SVUCLAMP : SInst<"svclamp[_{d}]", "dddd", "UcUsUiUl", MergeNone, "aarch64_sve_uclamp", [], []>; } + +//////////////////////////////////////////////////////////////////////////////// +// SME2 + +// SME intrinsics which operate only on vectors and do not require ZA should be added here, +// as they could possibly become SVE instructions in the future. + +let TargetGuard = "sme2" in { +// == ADD (vectors) == + def SVADD_SINGLE_X2 : SInst<"svadd[_single_{d}_x2]", "22d", "cUcsUsiUilUl", MergeNone, "aarch64_sve_add_single_x2", [IsStreaming], []>; + def SVADD_SINGLE_X4 : SInst<"svadd[_single_{d}_x4]", "44d", "cUcsUsiUilUl", MergeNone, "aarch64_sve_add_single_x4", [IsStreaming], []>; +} diff --git a/clang/include/clang/Basic/arm_sve_sme_incl.td b/clang/include/clang/Basic/arm_sve_sme_incl.td index 74c9b9266771b02920e0a30b0020eae1c9ed5921..75c1f05e1f2ff8b37aa0e15626d6a0059ee73066 100644 --- a/clang/include/clang/Basic/arm_sve_sme_incl.td +++ b/clang/include/clang/Basic/arm_sve_sme_incl.td @@ -218,10 +218,11 @@ def ReverseMergeAnyBinOp : FlagType<0x800000000>; // e.g. Implement SUBR_X def ReverseMergeAnyAccOp : FlagType<0x1000000000>; // e.g. Implement MSB_X using MLS_X. def IsStreaming : FlagType<0x2000000000>; def IsStreamingCompatible : FlagType<0x4000000000>; -def IsSharedZA : FlagType<0x8000000000>; -def IsPreservesZA : FlagType<0x10000000000>; -def IsReadZA : FlagType<0x20000000000>; -def IsWriteZA : FlagType<0x40000000000>; +def IsReadZA : FlagType<0x8000000000>; +def IsWriteZA : FlagType<0x10000000000>; +def IsInZA : FlagType<0x20000000000>; +def IsOutZA : FlagType<0x40000000000>; +def IsInOutZA : FlagType<0x80000000000>; // These must be kept in sync with the flags in include/clang/Basic/TargetBuiltins.h class ImmCheckType { @@ -254,7 +255,7 @@ class ImmCheck { } class Inst ft, list ch, MemEltType met> { + list ft, list ch, MemEltType met = MemEltTyDefault> { string Name = n; string Prototype = p; string Types = t; diff --git a/clang/include/clang/Parse/Parser.h b/clang/include/clang/Parse/Parser.h index 475dfe845528d9902ced70d97a31a49044a952b4..43e3a9d4d3983658dcc1eff3a08a3ba6a7e554c4 100644 --- a/clang/include/clang/Parse/Parser.h +++ b/clang/include/clang/Parse/Parser.h @@ -2790,6 +2790,13 @@ private: /// clang accepts as an extension. void DiagnoseCXX11AttributeExtension(ParsedAttributes &Attrs); + ExprResult ParseUnevaluatedStringInAttribute(const IdentifierInfo &AttrName); + + bool + ParseAttributeArgumentList(const clang::IdentifierInfo &AttrName, + SmallVectorImpl &Exprs, + ParsedAttributeArgumentsProperties ArgsProperties); + /// Parses syntax-generic attribute arguments for attributes which are /// known to the implementation, and adds them to the given ParsedAttributes /// list with the given attribute syntax. Returns the number of arguments diff --git a/clang/include/clang/Sema/ParsedAttr.h b/clang/include/clang/Sema/ParsedAttr.h index 592580bccd234fbf447591fd50eae8a9311bd8b0..8c0edca1ebc5eeee613cbfb5e960d4f0e47d3ea2 100644 --- a/clang/include/clang/Sema/ParsedAttr.h +++ b/clang/include/clang/Sema/ParsedAttr.h @@ -24,6 +24,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/VersionTuple.h" +#include #include #include #include @@ -911,6 +912,20 @@ private: VecTy AttrList; }; +struct ParsedAttributeArgumentsProperties { + ParsedAttributeArgumentsProperties(uint32_t StringLiteralBits) + : StringLiterals(StringLiteralBits) {} + bool isStringLiteralArg(unsigned I) const { + // If the last bit is set, assume we have a variadic parameter + if (I >= StringLiterals.size()) + return StringLiterals.test(StringLiterals.size() - 1); + return StringLiterals.test(I); + } + +private: + std::bitset<32> StringLiterals; +}; + /// ParsedAttributes - A collection of parsed attributes. Currently /// we don't differentiate between the various attribute syntaxes, /// which is basically silly. diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index b2ab6d0f8445728489e4c6423440a256d4e2b3f6..86cee3c728dc160fe5024cf54ae778cfee1cad70 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -7076,6 +7076,8 @@ public: NestedNameSpecInfo &IdInfo, bool EnteringContext); + bool IsInvalidSMECallConversion(QualType FromType, QualType ToType); + /// The parser has parsed a nested-name-specifier /// 'template[opt] template-name < template-args >::'. /// @@ -13604,6 +13606,9 @@ private: CallExpr *TheCall); bool CheckMVEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall); bool CheckSVEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall); + bool ParseSVEImmChecks(CallExpr *TheCall, + SmallVector, 3> &ImmChecks); + bool CheckSMEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall); bool CheckCDEBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID, CallExpr *TheCall); bool CheckARMCoprocessorImmediate(const TargetInfo &TI, const Expr *CoprocArg, diff --git a/clang/lib/AST/Type.cpp b/clang/lib/AST/Type.cpp index 99c859034423bbb7d54b318f40e4432984bf9c86..f0ab7b303541c4ddae417fcfa099cefcdc901344 100644 --- a/clang/lib/AST/Type.cpp +++ b/clang/lib/AST/Type.cpp @@ -3389,12 +3389,19 @@ FunctionProtoType::FunctionProtoType(QualType result, ArrayRef params, argSlot[i] = params[i]; } + // Propagate the SME ACLE attributes. + if (epi.AArch64SMEAttributes != SME_NormalFunction) { + auto &ExtraBits = *getTrailingObjects(); + assert(epi.AArch64SMEAttributes <= SME_AttributeMask && + "Not enough bits to encode SME attributes"); + ExtraBits.AArch64SMEAttributes = epi.AArch64SMEAttributes; + } + // Fill in the exception type array if present. if (getExceptionSpecType() == EST_Dynamic) { auto &ExtraBits = *getTrailingObjects(); size_t NumExceptions = epi.ExceptionSpec.Exceptions.size(); - assert(NumExceptions <= UINT16_MAX && - "Not enough bits to encode exceptions"); + assert(NumExceptions <= 1023 && "Not enough bits to encode exceptions"); ExtraBits.NumExceptionType = NumExceptions; assert(hasExtraBitfields() && "missing trailing extra bitfields!"); @@ -3551,8 +3558,11 @@ void FunctionProtoType::Profile(llvm::FoldingSetNodeID &ID, QualType Result, // This is followed by an optional "consumed argument" section of the // same length as the first type sequence: // bool* - // Finally, we have the ext info and trailing return type flag: - // int bool + // This is followed by the ext info: + // int + // Finally we have a trailing return type flag (bool) + // combined with AArch64 SME Attributes, to save space: + // int // // There is no ambiguity between the consumed arguments and an empty EH // spec because of the leading 'bool' which unambiguously indicates @@ -3585,8 +3595,9 @@ void FunctionProtoType::Profile(llvm::FoldingSetNodeID &ID, QualType Result, for (unsigned i = 0; i != NumParams; ++i) ID.AddInteger(epi.ExtParameterInfos[i].getOpaqueValue()); } + epi.ExtInfo.Profile(ID); - ID.AddBoolean(epi.HasTrailingReturn); + ID.AddInteger((epi.AArch64SMEAttributes << 1) | epi.HasTrailingReturn); } void FunctionProtoType::Profile(llvm::FoldingSetNodeID &ID, diff --git a/clang/lib/AST/TypePrinter.cpp b/clang/lib/AST/TypePrinter.cpp index 1b62f6630928c18bef1738bd446535e3eeed9750..31e6e56e7f693c99e6d51b21c5aafe0289732ba6 100644 --- a/clang/lib/AST/TypePrinter.cpp +++ b/clang/lib/AST/TypePrinter.cpp @@ -937,6 +937,20 @@ void TypePrinter::printFunctionProtoAfter(const FunctionProtoType *T, OS << ')'; FunctionType::ExtInfo Info = T->getExtInfo(); + unsigned SMEBits = T->getAArch64SMEAttributes(); + + if (SMEBits & FunctionType::SME_PStateSMCompatibleMask) + OS << " __arm_streaming_compatible"; + if (SMEBits & FunctionType::SME_PStateSMEnabledMask) + OS << " __arm_streaming"; + if (FunctionType::getArmZAState(SMEBits) == FunctionType::ARM_Preserves) + OS << " __arm_preserves(\"za\")"; + if (FunctionType::getArmZAState(SMEBits) == FunctionType::ARM_In) + OS << " __arm_in(\"za\")"; + if (FunctionType::getArmZAState(SMEBits) == FunctionType::ARM_Out) + OS << " __arm_out(\"za\")"; + if (FunctionType::getArmZAState(SMEBits) == FunctionType::ARM_InOut) + OS << " __arm_inout(\"za\")"; printFunctionAfter(Info, OS); @@ -1772,6 +1786,10 @@ void TypePrinter::printAttributedAfter(const AttributedType *T, OS << "__arm_streaming"; return; } + if (T->getAttrKind() == attr::ArmStreamingCompatible) { + OS << "__arm_streaming_compatible"; + return; + } OS << " __attribute__(("; switch (T->getAttrKind()) { @@ -1814,6 +1832,11 @@ void TypePrinter::printAttributedAfter(const AttributedType *T, case attr::AnnotateType: case attr::WebAssemblyFuncref: case attr::ArmStreaming: + case attr::ArmStreamingCompatible: + case attr::ArmIn: + case attr::ArmOut: + case attr::ArmInOut: + case attr::ArmPreserves: llvm_unreachable("This attribute should have been handled already"); case attr::NSReturnsRetained: diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp index 0889262b0c9ce839c3c1cfcd958a324c30f288e7..619677c1c1a491bccd191040f53a43140b677dbd 100644 --- a/clang/lib/Basic/Targets/AArch64.cpp +++ b/clang/lib/Basic/Targets/AArch64.cpp @@ -374,6 +374,11 @@ void AArch64TargetInfo::getTargetDefines(const LangOptions &Opts, Builder.defineMacro("__ARM_ALIGN_MAX_STACK_PWR", "4"); + // These macros are set when Clang can parse declarations with these + // attributes. + Builder.defineMacro("__ARM_STATE_ZA", "1"); + Builder.defineMacro("__ARM_STATE_ZT0", "1"); + // 0xe implies support for half, single and double precision operations. if (FPU & FPUMode) Builder.defineMacro("__ARM_FP", "0xE"); @@ -418,6 +423,17 @@ void AArch64TargetInfo::getTargetDefines(const LangOptions &Opts, if (HasSVE2 && HasSVE2SM4) Builder.defineMacro("__ARM_FEATURE_SVE2_SM4", "1"); + if (HasSME) { + Builder.defineMacro("__ARM_FEATURE_SME"); + Builder.defineMacro("__ARM_FEATURE_LOCALLY_STREAMING", "1"); + } + + if (HasSME2) { + Builder.defineMacro("__ARM_FEATURE_SME"); + Builder.defineMacro("__ARM_FEATURE_SME2"); + Builder.defineMacro("__ARM_FEATURE_LOCALLY_STREAMING", "1"); + } + if (HasCRC) Builder.defineMacro("__ARM_FEATURE_CRC32", "1"); @@ -670,6 +686,7 @@ bool AArch64TargetInfo::hasFeature(StringRef Feature) const { .Case("sve2-sha3", FPU & SveMode && HasSVE2SHA3) .Case("sve2-sm4", FPU & SveMode && HasSVE2SM4) .Case("sme", HasSME) + .Case("sme2", HasSME2) .Case("sme-f64f64", HasSMEF64F64) .Case("sme-i16i64", HasSMEI16I64) .Case("sme-fa64", HasSMEFA64) @@ -790,6 +807,12 @@ bool AArch64TargetInfo::handleTargetFeatures(std::vector &Features, HasBFloat16 = true; HasFullFP16 = true; } + if (Feature == "+sme2") { + HasSME = true; + HasSME2 = true; + HasBFloat16 = true; + HasFullFP16 = true; + } if (Feature == "+sme-f64f64") { HasSME = true; HasSMEF64F64 = true; @@ -1146,6 +1169,8 @@ TargetInfo::BuiltinVaListKind AArch64TargetInfo::getBuiltinVaListKind() const { } const char *const AArch64TargetInfo::GCCRegNames[] = { + // clang-format off + // 32-bit Integer registers "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7", "w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15", "w16", "w17", "w18", "w19", "w20", "w21", "w22", @@ -1182,7 +1207,12 @@ const char *const AArch64TargetInfo::GCCRegNames[] = { // SVE predicate-as-counter registers "pn0", "pn1", "pn2", "pn3", "pn4", "pn5", "pn6", "pn7", "pn8", - "pn9", "pn10", "pn11", "pn12", "pn13", "pn14", "pn15" + "pn9", "pn10", "pn11", "pn12", "pn13", "pn14", "pn15", + + // SME registers + "za", "zt0", + + // clang-format on }; ArrayRef AArch64TargetInfo::getGCCRegNames() const { @@ -1302,8 +1332,15 @@ bool AArch64TargetInfo::validateAsmConstraint( Info.setAllowsRegister(); return true; case 'U': - if (Name[1] == 'p' && (Name[2] == 'l' || Name[2] == 'a')) { - // SVE predicate registers ("Upa"=P0-15, "Upl"=P0-P7) + if (Name[1] == 'p' && + (Name[2] == 'l' || Name[2] == 'a' || Name[2] == 'h')) { + // SVE predicate registers ("Upa"=P0-15, "Upl"=P0-P7, "Uph"=P8-P15) + Info.setAllowsRegister(); + Name += 2; + return true; + } + if (Name[1] == 'c' && (Name[2] == 'i' || Name[2] == 'j')) { + // Gpr registers ("Uci"=w8-11, "Ucj"=w12-15) Info.setAllowsRegister(); Name += 2; return true; diff --git a/clang/lib/Basic/Targets/AArch64.h b/clang/lib/Basic/Targets/AArch64.h index 76d71aca76bd2276fd4489f36a7d26ea015ee77a..77040949fcbec24dfbb4bd3c0adec00f674ac250 100644 --- a/clang/lib/Basic/Targets/AArch64.h +++ b/clang/lib/Basic/Targets/AArch64.h @@ -68,6 +68,7 @@ class LLVM_LIBRARY_VISIBILITY AArch64TargetInfo : public TargetInfo { bool HasCCDP = false; bool HasFRInt3264 = false; bool HasSME = false; + bool HasSME2 = false; bool HasSMEF64F64 = false; bool HasSMEI16I64 = false; bool HasSB = false; diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 8f87c4d461090ef96d36b23f5f8caccfc6af48f5..eff67c6ff7cc92024687f8a0460ec5aa4a86a129 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -9126,13 +9126,6 @@ Value *CodeGenFunction::EmitSVEGatherLoad(const SVETypeFlags &TypeFlags, auto *OverloadedTy = llvm::ScalableVectorType::get(SVEBuiltinMemEltTy(TypeFlags), ResultTy); - // At the ACLE level there's only one predicate type, svbool_t, which is - // mapped to . However, this might be incompatible with the - // actual type being loaded. For example, when loading doubles (i64) the - // predicated should be instead. At the IR level the type of - // the predicate and the data being loaded must match. Cast accordingly. - Ops[0] = EmitSVEPredicateCast(Ops[0], OverloadedTy); - Function *F = nullptr; if (Ops[1]->getType()->isVectorTy()) // This is the "vector base, scalar offset" case. In order to uniquely @@ -9146,6 +9139,16 @@ Value *CodeGenFunction::EmitSVEGatherLoad(const SVETypeFlags &TypeFlags, // intrinsic. F = CGM.getIntrinsic(IntID, OverloadedTy); + // At the ACLE level there's only one predicate type, svbool_t, which is + // mapped to . However, this might be incompatible with the + // actual type being loaded. For example, when loading doubles (i64) the + // predicate should be instead. At the IR level the type of + // the predicate and the data being loaded must match. Cast to the type + // expected by the intrinsic. The intrinsic itself should be defined in + // a way than enforces relations between parameter types. + Ops[0] = EmitSVEPredicateCast( + Ops[0], cast(F->getArg(0)->getType())); + // Pass 0 when the offset is missing. This can only be applied when using // the "vector base" addressing mode for which ACLE allows no offset. The // corresponding LLVM IR always requires an offset. @@ -9210,8 +9213,11 @@ Value *CodeGenFunction::EmitSVEScatterStore(const SVETypeFlags &TypeFlags, // mapped to . However, this might be incompatible with the // actual type being stored. For example, when storing doubles (i64) the // predicated should be instead. At the IR level the type of - // the predicate and the data being stored must match. Cast accordingly. - Ops[1] = EmitSVEPredicateCast(Ops[1], OverloadedTy); + // the predicate and the data being stored must match. Cast to the type + // expected by the intrinsic. The intrinsic itself should be defined in + // a way that enforces relations between parameter types. + Ops[1] = EmitSVEPredicateCast( + Ops[1], cast(F->getArg(1)->getType())); // For "vector base, scalar index" scale the index so that it becomes a // scalar offset. @@ -9327,23 +9333,19 @@ Value *CodeGenFunction::EmitSVEStructStore(const SVETypeFlags &TypeFlags, Value *BasePtr = Builder.CreateBitCast(Ops[1], VecPtrTy); // Does the store have an offset? - if (Ops.size() > 3) + if (Ops.size() > (2 + N)) BasePtr = Builder.CreateGEP(VTy, BasePtr, Ops[2]); BasePtr = Builder.CreateBitCast(BasePtr, EltPtrTy); - Value *Val = Ops.back(); - + // The llvm.aarch64.sve.st2/3/4 intrinsics take legal part vectors, so we // need to break up the tuple vector. SmallVector Operands; - unsigned MinElts = VTy->getElementCount().getKnownMinValue(); - for (unsigned I = 0; I < N; ++I) { - Value *Idx = ConstantInt::get(CGM.Int64Ty, I * MinElts); - Operands.push_back(Builder.CreateExtractVector(VTy, Val, Idx)); - } + for (unsigned I = Ops.size() - N; I < Ops.size(); ++I) + Operands.push_back(Ops[I]); Operands.append({Predicate, BasePtr}); - Function *F = CGM.getIntrinsic(IntID, { VTy }); + return Builder.CreateCall(F, Operands); } @@ -9456,59 +9458,49 @@ Value *CodeGenFunction::EmitSVEMaskedStore(const CallExpr *E, return Store; } -Value *CodeGenFunction::EmitTileslice(Value *Offset, Value *Base) { - llvm::Value *CastOffset = Builder.CreateIntCast(Offset, Int32Ty, false); - return Builder.CreateAdd(Base, CastOffset, "tileslice"); -} - -Value *CodeGenFunction::EmitSMELd1St1(SVETypeFlags TypeFlags, +Value *CodeGenFunction::EmitSMELd1St1(const SVETypeFlags &TypeFlags, SmallVectorImpl &Ops, unsigned IntID) { - Ops[3] = EmitSVEPredicateCast( - Ops[3], getSVEVectorForElementType(SVEBuiltinMemEltTy(TypeFlags))); + Ops[2] = EmitSVEPredicateCast( + Ops[2], getSVEVectorForElementType(SVEBuiltinMemEltTy(TypeFlags))); SmallVector NewOps; - NewOps.push_back(Ops[3]); + NewOps.push_back(Ops[2]); - llvm::Value *BasePtr = Ops[4]; + llvm::Value *BasePtr = Ops[3]; // If the intrinsic contains the vnum parameter, multiply it with the vector // size in bytes. - if (Ops.size() == 6) { + if (Ops.size() == 5) { Function *StreamingVectorLength = CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsb); llvm::Value *StreamingVectorLengthCall = Builder.CreateCall(StreamingVectorLength); llvm::Value *Mulvl = - Builder.CreateMul(StreamingVectorLengthCall, Ops[5], "mulvl"); + Builder.CreateMul(StreamingVectorLengthCall, Ops[4], "mulvl"); // The type of the ptr parameter is void *, so use Int8Ty here. - BasePtr = Builder.CreateGEP(Int8Ty, Ops[4], Mulvl); + BasePtr = Builder.CreateGEP(Int8Ty, Ops[3], Mulvl); } NewOps.push_back(BasePtr); NewOps.push_back(Ops[0]); - NewOps.push_back(EmitTileslice(Ops[2], Ops[1])); + NewOps.push_back(Ops[1]); Function *F = CGM.getIntrinsic(IntID); return Builder.CreateCall(F, NewOps); } -Value *CodeGenFunction::EmitSMEReadWrite(SVETypeFlags TypeFlags, +Value *CodeGenFunction::EmitSMEReadWrite(const SVETypeFlags &TypeFlags, SmallVectorImpl &Ops, unsigned IntID) { auto *VecTy = getSVEType(TypeFlags); Function *F = CGM.getIntrinsic(IntID, VecTy); - if (TypeFlags.isReadZA()) { + if (TypeFlags.isReadZA()) Ops[1] = EmitSVEPredicateCast(Ops[1], VecTy); - Ops[3] = EmitTileslice(Ops[4], Ops[3]); - Ops.erase(&Ops[4]); - } else if (TypeFlags.isWriteZA()) { - Ops[1] = EmitTileslice(Ops[2], Ops[1]); - Ops[2] = EmitSVEPredicateCast(Ops[3], VecTy); - Ops.erase(&Ops[3]); - } + else if (TypeFlags.isWriteZA()) + Ops[2] = EmitSVEPredicateCast(Ops[2], VecTy); return Builder.CreateCall(F, Ops); } -Value *CodeGenFunction::EmitSMEZero(SVETypeFlags TypeFlags, +Value *CodeGenFunction::EmitSMEZero(const SVETypeFlags &TypeFlags, SmallVectorImpl &Ops, unsigned IntID) { // svzero_za() intrinsic zeros the entire za tile and has no paramters. @@ -9518,18 +9510,13 @@ Value *CodeGenFunction::EmitSMEZero(SVETypeFlags TypeFlags, return Builder.CreateCall(F, Ops); } -Value *CodeGenFunction::EmitSMELdrStr(SVETypeFlags TypeFlags, +Value *CodeGenFunction::EmitSMELdrStr(const SVETypeFlags &TypeFlags, SmallVectorImpl &Ops, unsigned IntID) { - Function *Cntsb = CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsb); - llvm::Value *CntsbCall = Builder.CreateCall(Cntsb, {}, "svlb"); - llvm::Value *MulVL = Builder.CreateMul( - CntsbCall, - Builder.getInt64(cast(Ops[1])->getZExtValue()), - "mulvl"); - Ops[2] = Builder.CreateGEP(Int8Ty, Ops[2], MulVL); - Ops[0] = EmitTileslice(Ops[1], Ops[0]); - Ops.erase(&Ops[1]); + if (Ops.size() == 2) + Ops.push_back(Builder.getInt32(0)); + else + Ops[2] = Builder.CreateIntCast(Ops[2], Int32Ty, true); Function *F = CGM.getIntrinsic(IntID, {}); return Builder.CreateCall(F, Ops); } @@ -9622,26 +9609,24 @@ Value *CodeGenFunction::EmitSVETupleCreate(const SVETypeFlags &TypeFlags, return Call; } -Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, - const CallExpr *E) { +void CodeGenFunction::GetAArch64SVEProcessedOperands( + unsigned BuiltinID, const CallExpr *E, SmallVectorImpl &Ops, + SVETypeFlags TypeFlags) { // Find out if any arguments are required to be integer constant expressions. unsigned ICEArguments = 0; ASTContext::GetBuiltinTypeError Error; getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); assert(Error == ASTContext::GE_None && "Should not codegen an error"); - llvm::Type *Ty = ConvertType(E->getType()); - if (BuiltinID >= SVE::BI__builtin_sve_reinterpret_s8_s8 && - BuiltinID <= SVE::BI__builtin_sve_reinterpret_f64_f64) { - Value *Val = EmitScalarExpr(E->getArg(0)); - return EmitSVEReinterpret(Val, Ty); - } + // Tuple set/get only requires one insert/extract vector, which is + // created by EmitSVETupleSetOrGet. + bool IsTupleGetOrSet = TypeFlags.isTupleSet() || TypeFlags.isTupleGet(); - llvm::SmallVector Ops; for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) { - if ((ICEArguments & (1 << i)) == 0) - Ops.push_back(EmitScalarExpr(E->getArg(i))); - else { + bool IsICE = ICEArguments & (1 << i); + Value *Arg = EmitScalarExpr(E->getArg(i)); + + if (IsICE) { // If this is required to be a constant, constant fold it so that we know // that the generated intrinsic gets a ConstantInt. std::optional Result = @@ -9653,12 +9638,49 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, // immediate requires more than a handful of bits. *Result = Result->extOrTrunc(32); Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), *Result)); + continue; + } + + if (IsTupleGetOrSet || !isa(Arg->getType())) { + Ops.push_back(Arg); + continue; + } + + auto *VTy = cast(Arg->getType()); + unsigned MinElts = VTy->getMinNumElements(); + bool IsPred = VTy->getElementType()->isIntegerTy(1); + unsigned N = (MinElts * VTy->getScalarSizeInBits()) / (IsPred ? 16 : 128); + + if (N == 1) { + Ops.push_back(Arg); + continue; + } + + for (unsigned I = 0; I < N; ++I) { + Value *Idx = ConstantInt::get(CGM.Int64Ty, (I * MinElts) / N); + auto *NewVTy = + ScalableVectorType::get(VTy->getElementType(), MinElts / N); + Ops.push_back(Builder.CreateExtractVector(NewVTy, Arg, Idx)); } } +} + +Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, + const CallExpr *E) { + llvm::Type *Ty = ConvertType(E->getType()); + if (BuiltinID >= SVE::BI__builtin_sve_reinterpret_s8_s8 && + BuiltinID <= SVE::BI__builtin_sve_reinterpret_f64_f64) { + Value *Val = EmitScalarExpr(E->getArg(0)); + return EmitSVEReinterpret(Val, Ty); + } auto *Builtin = findARMVectorIntrinsicInMap(AArch64SVEIntrinsicMap, BuiltinID, AArch64SVEIntrinsicsProvenSorted); + + llvm::SmallVector Ops; SVETypeFlags TypeFlags(Builtin->TypeModifier); + GetAArch64SVEProcessedOperands(BuiltinID, E, Ops, TypeFlags); + if (TypeFlags.isLoad()) return EmitSVEMaskedLoad(E, Ty, Ops, Builtin->LLVMIntrinsic, TypeFlags.isZExtReturn()); @@ -9672,14 +9694,14 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, return EmitSVEPrefetchLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic); else if (TypeFlags.isGatherPrefetch()) return EmitSVEGatherPrefetch(TypeFlags, Ops, Builtin->LLVMIntrinsic); - else if (TypeFlags.isStructLoad()) - return EmitSVEStructLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic); - else if (TypeFlags.isStructStore()) - return EmitSVEStructStore(TypeFlags, Ops, Builtin->LLVMIntrinsic); + else if (TypeFlags.isStructLoad()) + return EmitSVEStructLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic); + else if (TypeFlags.isStructStore()) + return EmitSVEStructStore(TypeFlags, Ops, Builtin->LLVMIntrinsic); else if (TypeFlags.isTupleSet() || TypeFlags.isTupleGet()) - return EmitSVETupleSetOrGet(TypeFlags, Ty, Ops); + return EmitSVETupleSetOrGet(TypeFlags, Ty, Ops); else if (TypeFlags.isTupleCreate()) - return EmitSVETupleCreate(TypeFlags, Ty, Ops); + return EmitSVETupleCreate(TypeFlags, Ty, Ops); else if (TypeFlags.isUndef()) return UndefValue::get(Ty); else if (Builtin->LLVMIntrinsic != 0) { @@ -9898,13 +9920,8 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, case SVE::BI__builtin_sve_svtbl2_f64: { SVETypeFlags TF(Builtin->TypeModifier); auto VTy = cast(getSVEType(TF)); - Value *V0 = Builder.CreateExtractVector(VTy, Ops[0], - ConstantInt::get(CGM.Int64Ty, 0)); - unsigned MinElts = VTy->getMinNumElements(); - Value *V1 = Builder.CreateExtractVector( - VTy, Ops[0], ConstantInt::get(CGM.Int64Ty, MinElts)); Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_tbl2, VTy); - return Builder.CreateCall(F, {V0, V1, Ops[1]}); + return Builder.CreateCall(F, Ops); } case SVE::BI__builtin_sve_svset_neonq_s8: @@ -9962,35 +9979,13 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, Value *CodeGenFunction::EmitAArch64SMEBuiltinExpr(unsigned BuiltinID, const CallExpr *E) { - // Find out if any arguments are required to be integer constant expressions. - unsigned ICEArguments = 0; - ASTContext::GetBuiltinTypeError Error; - getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); - assert(Error == ASTContext::GE_None && "Should not codegen an error"); - - llvm::Type *Ty = ConvertType(E->getType()); - llvm::SmallVector Ops; - for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) { - if ((ICEArguments & (1 << i)) == 0) - Ops.push_back(EmitScalarExpr(E->getArg(i))); - else { - // If this is required to be a constant, constant fold it so that we know - // that the generated intrinsic gets a ConstantInt. - std::optional Result = - E->getArg(i)->getIntegerConstantExpr(getContext()); - assert(Result && "Expected argument to be a constant"); - - // Immediates for SVE llvm intrinsics are always 32bit. We can safely - // truncate because the immediate has been range checked and no valid - // immediate requires more than a handful of bits. - *Result = Result->extOrTrunc(32); - Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), *Result)); - } - } - auto *Builtin = findARMVectorIntrinsicInMap(AArch64SMEIntrinsicMap, BuiltinID, AArch64SMEIntrinsicsProvenSorted); + + llvm::SmallVector Ops; SVETypeFlags TypeFlags(Builtin->TypeModifier); + GetAArch64SVEProcessedOperands(BuiltinID, E, Ops, TypeFlags); + if (TypeFlags.isLoad() || TypeFlags.isStore()) return EmitSMELd1St1(TypeFlags, Ops, Builtin->LLVMIntrinsic); else if (TypeFlags.isReadZA() || TypeFlags.isWriteZA()) @@ -9999,23 +9994,28 @@ Value *CodeGenFunction::EmitAArch64SMEBuiltinExpr(unsigned BuiltinID, BuiltinID == SME::BI__builtin_sme_svzero_za) return EmitSMEZero(TypeFlags, Ops, Builtin->LLVMIntrinsic); else if (BuiltinID == SME::BI__builtin_sme_svldr_vnum_za || - BuiltinID == SME::BI__builtin_sme_svstr_vnum_za) + BuiltinID == SME::BI__builtin_sme_svstr_vnum_za || + BuiltinID == SME::BI__builtin_sme_svldr_za || + BuiltinID == SME::BI__builtin_sme_svstr_za) return EmitSMELdrStr(TypeFlags, Ops, Builtin->LLVMIntrinsic); - else if (Builtin->LLVMIntrinsic != 0) { - // Predicates must match the main datatype. - for (unsigned i = 0, e = Ops.size(); i != e; ++i) - if (auto PredTy = dyn_cast(Ops[i]->getType())) - if (PredTy->getElementType()->isIntegerTy(1)) - Ops[i] = EmitSVEPredicateCast(Ops[i], getSVEType(TypeFlags)); - Function *F = CGM.getIntrinsic(Builtin->LLVMIntrinsic, - getSVEOverloadTypes(TypeFlags, Ty, Ops)); - Value *Call = Builder.CreateCall(F, Ops); - return Call; - } + // Should not happen! + if (Builtin->LLVMIntrinsic == 0) + return nullptr; - /// Should not happen - return nullptr; + // Predicates must match the main datatype. + for (unsigned i = 0, e = Ops.size(); i != e; ++i) + if (auto PredTy = dyn_cast(Ops[i]->getType())) + if (PredTy->getElementType()->isIntegerTy(1)) + Ops[i] = EmitSVEPredicateCast(Ops[i], getSVEType(TypeFlags)); + + Function *F = + TypeFlags.isOverloadNone() + ? CGM.getIntrinsic(Builtin->LLVMIntrinsic) + : CGM.getIntrinsic(Builtin->LLVMIntrinsic, {getSVEType(TypeFlags)}); + Value *Call = Builder.CreateCall(F, Ops); + + return Call; } Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, @@ -10062,6 +10062,24 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID)); } + if (BuiltinID == clang::AArch64::BI__builtin_arm_get_sme_state) { + // Create call to __arm_sme_state and store the results to the two pointers. + CallInst *CI = EmitRuntimeCall(CGM.CreateRuntimeFunction( + llvm::FunctionType::get(StructType::get(CGM.Int64Ty, CGM.Int64Ty), {}, + false), + "__arm_sme_state")); + auto Attrs = AttributeList().addFnAttribute(getLLVMContext(), + "aarch64_pstate_sm_compatible"); + CI->setAttributes(Attrs); + CI->setCallingConv( + llvm::CallingConv:: + AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2); + Builder.CreateStore(Builder.CreateExtractValue(CI, 0), + EmitPointerWithAlignment(E->getArg(0))); + return Builder.CreateStore(Builder.CreateExtractValue(CI, 1), + EmitPointerWithAlignment(E->getArg(1))); + } + if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit) { assert((getContext().getTypeSize(E->getType()) == 32) && "rbit of unusual size!"); diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index 6b8af9bf18c1fffb02365f527c57f7f7bba50b16..bb36dcb42aa7be8a8e9dd9d0232fb0500cba7972 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -1762,6 +1762,22 @@ static void AddAttributesFromFunctionProtoType(ASTContext &Ctx, if (!isUnresolvedExceptionSpec(FPT->getExceptionSpecType()) && FPT->isNothrow()) FuncAttrs.addAttribute(llvm::Attribute::NoUnwind); + + unsigned SMEBits = FPT->getAArch64SMEAttributes(); + if (SMEBits & FunctionType::SME_PStateSMEnabledMask) + FuncAttrs.addAttribute("aarch64_pstate_sm_enabled"); + if (SMEBits & FunctionType::SME_PStateSMCompatibleMask) + FuncAttrs.addAttribute("aarch64_pstate_sm_compatible"); + + // ZA + if (FunctionType::getArmZAState(SMEBits) == FunctionType::ARM_Preserves) + FuncAttrs.addAttribute("aarch64_preserves_za"); + if (FunctionType::getArmZAState(SMEBits) == FunctionType::ARM_In) + FuncAttrs.addAttribute("aarch64_in_za"); + if (FunctionType::getArmZAState(SMEBits) == FunctionType::ARM_Out) + FuncAttrs.addAttribute("aarch64_out_za"); + if (FunctionType::getArmZAState(SMEBits) == FunctionType::ARM_InOut) + FuncAttrs.addAttribute("aarch64_inout_za"); } static void AddAttributesFromAssumes(llvm::AttrBuilder &FuncAttrs, @@ -2402,6 +2418,9 @@ void CodeGenModule::ConstructAttributeList(StringRef Name, llvm::toStringRef(CodeGenOpts.UniformWGSize)); } } + + if (TargetDecl->hasAttr()) + FuncAttrs.addAttribute("aarch64_pstate_sm_body"); } // Attach "no-builtins" attributes to: diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 143e0707b9429ef46ea4b58a8e0aa7f216094e74..0553b4651fe9186b5a6e68ceaac517deff9f91e9 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -4272,7 +4272,6 @@ public: llvm::Value *EmitSVEMaskedStore(const CallExpr *, SmallVectorImpl &Ops, unsigned BuiltinID); - llvm::Value *EmitTileslice(llvm::Value *Offset, llvm::Value *Base); llvm::Value *EmitSVEPrefetchLoad(const SVETypeFlags &TypeFlags, SmallVectorImpl &Ops, unsigned BuiltinID); @@ -4287,18 +4286,23 @@ public: unsigned IntID); llvm::Value *EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E); - llvm::Value *EmitSMELd1St1(SVETypeFlags TypeFlags, + llvm::Value *EmitSMELd1St1(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl &Ops, unsigned IntID); - llvm::Value *EmitSMEReadWrite(SVETypeFlags TypeFlags, + llvm::Value *EmitSMEReadWrite(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl &Ops, unsigned IntID); - llvm::Value *EmitSMEZero(SVETypeFlags TypeFlags, + llvm::Value *EmitSMEZero(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl &Ops, unsigned IntID); - llvm::Value *EmitSMELdrStr(SVETypeFlags TypeFlags, + llvm::Value *EmitSMELdrStr(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl &Ops, unsigned IntID); + + void GetAArch64SVEProcessedOperands(unsigned BuiltinID, const CallExpr *E, + SmallVectorImpl &Ops, + SVETypeFlags TypeFlags); + llvm::Value *EmitAArch64SMEBuiltinExpr(unsigned BuiltinID, const CallExpr *E); llvm::Value *EmitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index f09d1129b128a35b75eb2479de23bee88a953b3c..e353a649f9511d789c135d8586e21d9bd05b81d2 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -2288,6 +2288,16 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D, return; } + // Handle SME attributes that apply to function definitions, + // rather than to function prototypes. + if (D->hasAttr()) + B.addAttribute("aarch64_pstate_sm_body"); + + if (auto *Attr = D->getAttr()) { + if (Attr->isNewZA()) + B.addAttribute("aarch64_new_za"); + } + // Track whether we need to add the optnone LLVM attribute, // starting with the default for this optimization level. bool ShouldAddOptNone = diff --git a/clang/lib/CodeGen/Targets/AArch64.cpp b/clang/lib/CodeGen/Targets/AArch64.cpp index 561110ff8c0d8d500bc63a3e46e0defbe4a5261e..cca1aea5dc6706d2e60317e5acaca143a25a86e9 100644 --- a/clang/lib/CodeGen/Targets/AArch64.cpp +++ b/clang/lib/CodeGen/Targets/AArch64.cpp @@ -8,6 +8,7 @@ #include "ABIInfoImpl.h" #include "TargetInfo.h" +#include "clang/Basic/DiagnosticFrontend.h" using namespace clang; using namespace clang::CodeGen; @@ -151,6 +152,11 @@ public: } return TargetCodeGenInfo::isScalarizableAsmOperand(CGF, Ty); } + + void checkFunctionCallABI(CodeGenModule &CGM, SourceLocation CallLoc, + const FunctionDecl *Caller, + const FunctionDecl *Callee, + const CallArgList &Args) const override; }; class WindowsAArch64TargetCodeGenInfo : public AArch64TargetCodeGenInfo { @@ -811,6 +817,43 @@ Address AArch64ABIInfo::EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr, /*allowHigherAlign*/ false); } +static bool isStreaming(const FunctionDecl *F) { + if (F->hasAttr()) + return true; + if (const auto *T = F->getType()->getAs()) + return T->getAArch64SMEAttributes() & FunctionType::SME_PStateSMEnabledMask; + return false; +} + +static bool isStreamingCompatible(const FunctionDecl *F) { + if (const auto *T = F->getType()->getAs()) + return T->getAArch64SMEAttributes() & + FunctionType::SME_PStateSMCompatibleMask; + return false; +} + +void AArch64TargetCodeGenInfo::checkFunctionCallABI( + CodeGenModule &CGM, SourceLocation CallLoc, const FunctionDecl *Caller, + const FunctionDecl *Callee, const CallArgList &Args) const { + if (!Caller || !Callee || !Callee->hasAttr()) + return; + + bool CallerIsStreaming = isStreaming(Caller); + bool CalleeIsStreaming = isStreaming(Callee); + bool CallerIsStreamingCompatible = isStreamingCompatible(Caller); + bool CalleeIsStreamingCompatible = isStreamingCompatible(Callee); + + if (!CalleeIsStreamingCompatible && + (CallerIsStreaming != CalleeIsStreaming || CallerIsStreamingCompatible)) + CGM.getDiags().Report(CallLoc, + diag::err_function_always_inline_attribute_mismatch) + << Caller->getDeclName() << Callee->getDeclName() << "streaming"; + if (auto *NewAttr = Callee->getAttr()) + if (NewAttr->isNewZA()) + CGM.getDiags().Report(CallLoc, diag::err_function_always_inline_new_za) + << Callee->getDeclName(); +} + std::unique_ptr CodeGen::createAArch64TargetCodeGenInfo(CodeGenModule &CGM, AArch64ABIKind Kind) { diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt index 356009ae9157c9231af761b8a78e9b85628d06fb..0477cad066b66c3c316539ca8874332e00279851 100644 --- a/clang/lib/Headers/CMakeLists.txt +++ b/clang/lib/Headers/CMakeLists.txt @@ -362,8 +362,8 @@ if(ARM IN_LIST LLVM_TARGETS_TO_BUILD OR AArch64 IN_LIST LLVM_TARGETS_TO_BUILD) clang_generate_header(-gen-arm-fp16 arm_fp16.td arm_fp16.h) # Generate arm_sve.h clang_generate_header(-gen-arm-sve-header arm_sve.td arm_sve.h) - # Generate arm_sme_draft_spec_subject_to_change.h - clang_generate_header(-gen-arm-sme-header arm_sme.td arm_sme_draft_spec_subject_to_change.h) + # Generate arm_sme.h + clang_generate_header(-gen-arm-sme-header arm_sme.td arm_sme.h) # Generate arm_bf16.h clang_generate_header(-gen-arm-bf16 arm_bf16.td arm_bf16.h) # Generate arm_mve.h @@ -384,7 +384,7 @@ if(ARM IN_LIST LLVM_TARGETS_TO_BUILD OR AArch64 IN_LIST LLVM_TARGETS_TO_BUILD) list(APPEND aarch64_only_generated_files "${CMAKE_CURRENT_BINARY_DIR}/arm_sve.h" - "${CMAKE_CURRENT_BINARY_DIR}/arm_sme_draft_spec_subject_to_change.h" + "${CMAKE_CURRENT_BINARY_DIR}/arm_sme.h" "${CMAKE_CURRENT_BINARY_DIR}/arm_bf16.h" ) endif() diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp index cf1e3a94de7fdddb4002157bf112daf068e98462..31a71be8a0f5971fb85f2ae11561686f7a834711 100644 --- a/clang/lib/Parse/ParseDecl.cpp +++ b/clang/lib/Parse/ParseDecl.cpp @@ -288,6 +288,16 @@ static bool attributeHasIdentifierArg(const IdentifierInfo &II) { #undef CLANG_ATTR_IDENTIFIER_ARG_LIST } +/// Determine whether the given attribute has an identifier argument. +static ParsedAttributeArgumentsProperties +attributeStringLiteralListArg(const IdentifierInfo &II) { +#define CLANG_ATTR_STRING_LITERAL_ARG_LIST + return llvm::StringSwitch(normalizeAttrName(II.getName())) +#include "clang/Parse/AttrParserStringSwitches.inc" + .Default(0); +#undef CLANG_ATTR_STRING_LITERAL_ARG_LIST +} + /// Determine whether the given attribute has a variadic identifier argument. static bool attributeHasVariadicIdentifierArg(const IdentifierInfo &II) { #define CLANG_ATTR_VARIADIC_IDENTIFIER_ARG_LIST @@ -371,6 +381,81 @@ void Parser::ParseAttributeWithTypeArg(IdentifierInfo &AttrName, ScopeName, ScopeLoc, nullptr, 0, Form); } +ExprResult +Parser::ParseUnevaluatedStringInAttribute(const IdentifierInfo &AttrName) { + if (Tok.is(tok::l_paren)) { + BalancedDelimiterTracker Paren(*this, tok::l_paren); + Paren.consumeOpen(); + ExprResult Res = ParseUnevaluatedStringInAttribute(AttrName); + Paren.consumeClose(); + return Res; + } + if (!isTokenStringLiteral()) { + Diag(Tok.getLocation(), diag::err_expected_string_literal) + << /*in attribute...*/ 4 << AttrName.getName(); + return ExprError(); + } + return ParseUnevaluatedStringLiteralExpression(); +} + +bool Parser::ParseAttributeArgumentList( + const IdentifierInfo &AttrName, SmallVectorImpl &Exprs, + ParsedAttributeArgumentsProperties ArgsProperties) { + bool SawError = false; + unsigned Arg = 0; + while (true) { + ExprResult Expr; + if (ArgsProperties.isStringLiteralArg(Arg)) { + Expr = ParseUnevaluatedStringInAttribute(AttrName); + } else if (getLangOpts().CPlusPlus11 && Tok.is(tok::l_brace)) { + Diag(Tok, diag::warn_cxx98_compat_generalized_initializer_lists); + Expr = ParseBraceInitializer(); + } else { + Expr = ParseAssignmentExpression(); + } + Expr = Actions.CorrectDelayedTyposInExpr(Expr); + + if (Tok.is(tok::ellipsis)) + Expr = Actions.ActOnPackExpansion(Expr.get(), ConsumeToken()); + else if (Tok.is(tok::code_completion)) { + // There's nothing to suggest in here as we parsed a full expression. + // Instead fail and propagate the error since caller might have something + // the suggest, e.g. signature help in function call. Note that this is + // performed before pushing the \p Expr, so that signature help can report + // current argument correctly. + SawError = true; + cutOffParsing(); + break; + } + + if (Expr.isInvalid()) { + SawError = true; + break; + } + + Exprs.push_back(Expr.get()); + + if (Tok.isNot(tok::comma)) + break; + // Move to the next argument, remember where the comma was. + Token Comma = Tok; + ConsumeToken(); + checkPotentialAngleBracketDelimiter(Comma); + Arg++; + } + + if (SawError) { + // Ensure typos get diagnosed when errors were encountered while parsing the + // expression list. + for (auto &E : Exprs) { + ExprResult Expr = Actions.CorrectDelayedTyposInExpr(E); + if (Expr.isUsable()) + E = Expr.get(); + } + } + return SawError; +} + unsigned Parser::ParseAttributeArgsCommon( IdentifierInfo *AttrName, SourceLocation AttrNameLoc, ParsedAttributes &Attrs, SourceLocation *EndLoc, IdentifierInfo *ScopeName, @@ -463,9 +548,9 @@ unsigned Parser::ParseAttributeArgsCommon( : Sema::ExpressionEvaluationContext::ConstantEvaluated); ExprVector ParsedExprs; - if (ParseExpressionList(ParsedExprs, llvm::function_ref(), - /*FailImmediatelyOnInvalidExpr=*/true, - /*EarlyTypoCorrection=*/true)) { + ParsedAttributeArgumentsProperties ArgProperties = + attributeStringLiteralListArg(*AttrName); + if (ParseAttributeArgumentList(*AttrName, ParsedExprs, ArgProperties)) { SkipUntil(tok::r_paren, StopAtSemi); return 0; } @@ -6701,7 +6786,13 @@ void Parser::ParseDirectDeclarator(Declarator &D) { } else if (Tok.isRegularKeywordAttribute()) { // For consistency with attribute parsing. Diag(Tok, diag::err_keyword_not_allowed) << Tok.getIdentifierInfo(); + bool TakesArgs = doesKeywordAttributeTakeArgs(Tok.getKind()); ConsumeToken(); + if (TakesArgs) { + BalancedDelimiterTracker T(*this, tok::l_paren); + if (!T.consumeOpen()) + T.skipToEnd(); + } } else if (Tok.is(tok::kw_requires) && D.hasGroupingParens()) { // This declarator is declaring a function, but the requires clause is // in the wrong place: diff --git a/clang/lib/Parse/ParseDeclCXX.cpp b/clang/lib/Parse/ParseDeclCXX.cpp index d9ff6c42c502cee75c958c1fa68627242d870f65..d5c09e943f6a3d51c7e0f49d84884898ed346ede 100644 --- a/clang/lib/Parse/ParseDeclCXX.cpp +++ b/clang/lib/Parse/ParseDeclCXX.cpp @@ -1887,7 +1887,13 @@ void Parser::ParseClassSpecifier(tok::TokenKind TagTokKind, if (!SkipUntil(tok::r_paren, StopAtSemi)) break; } else if (Tok.isRegularKeywordAttribute()) { + bool TakesArgs = doesKeywordAttributeTakeArgs(Tok.getKind()); ConsumeToken(); + if (TakesArgs) { + BalancedDelimiterTracker T(*this, tok::l_paren); + if (!T.consumeOpen()) + T.skipToEnd(); + } } else { break; } @@ -4510,8 +4516,18 @@ void Parser::ParseCXX11AttributeSpecifierInternal(ParsedAttributes &Attrs, if (Tok.isRegularKeywordAttribute()) { SourceLocation Loc = Tok.getLocation(); IdentifierInfo *AttrName = Tok.getIdentifierInfo(); - Attrs.addNew(AttrName, Loc, nullptr, Loc, nullptr, 0, Tok.getKind()); + ParsedAttr::Form Form = ParsedAttr::Form(Tok.getKind()); + bool TakesArgs = doesKeywordAttributeTakeArgs(Tok.getKind()); ConsumeToken(); + if (TakesArgs) { + if (!Tok.is(tok::l_paren)) + Diag(Tok.getLocation(), diag::err_expected_lparen_after) << AttrName; + else + ParseAttributeArgsCommon(AttrName, Loc, Attrs, EndLoc, + /*ScopeName*/ nullptr, + /*ScopeLoc*/ Loc, Form); + } else + Attrs.addNew(AttrName, Loc, nullptr, Loc, nullptr, 0, Form); return; } @@ -4677,11 +4693,13 @@ SourceLocation Parser::SkipCXX11Attributes() { T.consumeOpen(); T.skipToEnd(); EndLoc = T.getCloseLocation(); - } else if (Tok.isRegularKeywordAttribute()) { + } else if (Tok.isRegularKeywordAttribute() && + !doesKeywordAttributeTakeArgs(Tok.getKind())) { EndLoc = Tok.getLocation(); ConsumeToken(); } else { - assert(Tok.is(tok::kw_alignas) && "not an attribute specifier"); + assert((Tok.is(tok::kw_alignas) || Tok.isRegularKeywordAttribute()) && + "not an attribute specifier"); ConsumeToken(); BalancedDelimiterTracker T(*this, tok::l_paren); if (!T.consumeOpen()) diff --git a/clang/lib/Parse/ParseExpr.cpp b/clang/lib/Parse/ParseExpr.cpp index 75d04824d8b99eef0d280f746ac5b5b0bdb8c267..3438ab3ed9aa8d9b937194a2af7f05dc897aa96e 100644 --- a/clang/lib/Parse/ParseExpr.cpp +++ b/clang/lib/Parse/ParseExpr.cpp @@ -3504,7 +3504,7 @@ bool Parser::ParseExpressionList(SmallVectorImpl &Exprs, Expr = Actions.ActOnPackExpansion(Expr.get(), ConsumeToken()); else if (Tok.is(tok::code_completion)) { // There's nothing to suggest in here as we parsed a full expression. - // Instead fail and propogate the error since caller might have something + // Instead fail and propagate the error since caller might have something // the suggest, e.g. signature help in function call. Note that this is // performed before pushing the \p Expr, so that signature help can report // current argument correctly. diff --git a/clang/lib/Parse/ParseTentative.cpp b/clang/lib/Parse/ParseTentative.cpp index 66433705250010cd8f08d0a2ab13505b6805052f..449be3425411d788820e040cd3f717d76d13736c 100644 --- a/clang/lib/Parse/ParseTentative.cpp +++ b/clang/lib/Parse/ParseTentative.cpp @@ -883,7 +883,8 @@ bool Parser::TrySkipAttributes() { // Note that explicitly checking for `[[` and `]]` allows to fail as // expected in the case of the Objective-C message send syntax. ConsumeBracket(); - } else if (Tok.isRegularKeywordAttribute()) { + } else if (Tok.isRegularKeywordAttribute() && + !doesKeywordAttributeTakeArgs(Tok.getKind())) { ConsumeToken(); } else { ConsumeToken(); diff --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp index 46ae6fba8344bb7519a82c20542e9c44139ab078..03030a02ee792d3c118d16e1d5300b1bc6666763 100644 --- a/clang/lib/Sema/Sema.cpp +++ b/clang/lib/Sema/Sema.cpp @@ -2057,8 +2057,8 @@ void Sema::checkTypeSupport(QualType Ty, SourceLocation Loc, ValueDecl *D) { if (Ty->isSVESizelessBuiltinType() && FD && FD->hasBody()) { llvm::StringMap CallerFeatureMap; Context.getFunctionFeatureMap(CallerFeatureMap, FD); - if (!Builtin::evaluateRequiredTargetFeatures( - "sve", CallerFeatureMap)) + if (!Builtin::evaluateRequiredTargetFeatures("sve", CallerFeatureMap) && + !Builtin::evaluateRequiredTargetFeatures("sme", CallerFeatureMap)) Diag(D->getLocation(), diag::err_sve_vector_in_non_sve_target) << Ty; } }; diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 5ee20554c4cf32fb529250ab22ee89d0da2dc179..96356f1bdca26ae9deffbfd4b61052fc99494e30 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -2893,28 +2893,26 @@ static QualType getNeonEltType(NeonTypeFlags Flags, ASTContext &Context, llvm_unreachable("Invalid NeonTypeFlag!"); } -bool Sema::CheckSVEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { - // Range check SVE intrinsics that take immediate values. - SmallVector, 3> ImmChecks; +enum ArmStreamingType { ArmNonStreaming, ArmStreaming, ArmStreamingCompatible }; - switch (BuiltinID) { - default: - return false; -#define GET_SVE_IMMEDIATE_CHECK -#include "clang/Basic/arm_sve_sema_rangechecks.inc" -#undef GET_SVE_IMMEDIATE_CHECK -#define GET_SME_IMMEDIATE_CHECK -#include "clang/Basic/arm_sme_sema_rangechecks.inc" -#undef GET_SME_IMMEDIATE_CHECK - } +enum ArmSMEState : unsigned { + ArmNoState = 0, + + ArmInZA = 0b01, + ArmOutZA = 0b10, + ArmInOutZA = 0b11, + ArmZAMask = 0b11, +}; +bool Sema::ParseSVEImmChecks( + CallExpr *TheCall, SmallVector, 3> &ImmChecks) { // Perform all the immediate checks for this builtin call. bool HasError = false; for (auto &I : ImmChecks) { int ArgNum, CheckTy, ElementSizeInBits; std::tie(ArgNum, CheckTy, ElementSizeInBits) = I; - typedef bool(*OptionSetCheckFnTy)(int64_t Value); + typedef bool (*OptionSetCheckFnTy)(int64_t Value); // Function that checks whether the operand (ArgNum) is an immediate // that is one of the predefined values. @@ -3029,8 +3027,135 @@ bool Sema::CheckSVEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { return HasError; } +static ArmStreamingType getArmStreamingFnType(const FunctionDecl *FD) { + if (FD->hasAttr()) + return ArmStreaming; + if (const auto *T = FD->getType()->getAs()) { + if (T->getAArch64SMEAttributes() & FunctionType::SME_PStateSMEnabledMask) + return ArmStreaming; + if (T->getAArch64SMEAttributes() & FunctionType::SME_PStateSMCompatibleMask) + return ArmStreamingCompatible; + } + return ArmNonStreaming; +} + +static void checkArmStreamingBuiltin(Sema &S, CallExpr *TheCall, + const FunctionDecl *FD, + ArmStreamingType BuiltinType) { + ArmStreamingType FnType = getArmStreamingFnType(FD); + if (FnType == ArmStreaming && BuiltinType == ArmNonStreaming) { + S.Diag(TheCall->getBeginLoc(), diag::warn_attribute_arm_sm_incompat_builtin) + << TheCall->getSourceRange() << "streaming"; + } + + if (FnType == ArmStreamingCompatible && + BuiltinType != ArmStreamingCompatible) { + S.Diag(TheCall->getBeginLoc(), diag::warn_attribute_arm_sm_incompat_builtin) + << TheCall->getSourceRange() << "streaming compatible"; + return; + } + + if (FnType == ArmNonStreaming && BuiltinType == ArmStreaming) { + S.Diag(TheCall->getBeginLoc(), diag::warn_attribute_arm_sm_incompat_builtin) + << TheCall->getSourceRange() << "non-streaming"; + } +} + +static bool hasArmZAState(const FunctionDecl *FD) { + const auto *T = FD->getType()->getAs(); + return (T && FunctionType::getArmZAState(T->getAArch64SMEAttributes()) != + FunctionType::ARM_None) || + (FD->hasAttr() && FD->getAttr()->isNewZA()); +} + +static ArmSMEState getSMEState(unsigned BuiltinID) { + switch (BuiltinID) { + default: + return ArmNoState; +#define GET_SME_BUILTIN_GET_STATE +#include "clang/Basic/arm_sme_builtins_za_state.inc" +#undef GET_SME_BUILTIN_GET_STATE + } +} + +bool Sema::CheckSMEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { + if (const FunctionDecl *FD = getCurFunctionDecl()) { + std::optional BuiltinType; + + switch (BuiltinID) { +#define GET_SME_STREAMING_ATTRS +#include "clang/Basic/arm_sme_streaming_attrs.inc" +#undef GET_SME_STREAMING_ATTRS + } + + if (BuiltinType) + checkArmStreamingBuiltin(*this, TheCall, FD, *BuiltinType); + + if ((getSMEState(BuiltinID) & ArmZAMask) && !hasArmZAState(FD)) + Diag(TheCall->getBeginLoc(), + diag::warn_attribute_arm_za_builtin_no_za_state) + << TheCall->getSourceRange(); + } + + // Range check SME intrinsics that take immediate values. + SmallVector, 3> ImmChecks; + + switch (BuiltinID) { + default: + return false; +#define GET_SME_IMMEDIATE_CHECK +#include "clang/Basic/arm_sme_sema_rangechecks.inc" +#undef GET_SME_IMMEDIATE_CHECK + } + + return ParseSVEImmChecks(TheCall, ImmChecks); +} + +bool Sema::CheckSVEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { + if (const FunctionDecl *FD = getCurFunctionDecl()) { + std::optional BuiltinType; + + switch (BuiltinID) { +#define GET_SVE_STREAMING_ATTRS +#include "clang/Basic/arm_sve_streaming_attrs.inc" +#undef GET_SVE_STREAMING_ATTRS + } + if (BuiltinType) + checkArmStreamingBuiltin(*this, TheCall, FD, *BuiltinType); + } + // Range check SVE intrinsics that take immediate values. + SmallVector, 3> ImmChecks; + + switch (BuiltinID) { + default: + return false; +#define GET_SVE_IMMEDIATE_CHECK +#include "clang/Basic/arm_sve_sema_rangechecks.inc" +#undef GET_SVE_IMMEDIATE_CHECK + } + + return ParseSVEImmChecks(TheCall, ImmChecks); +} + bool Sema::CheckNeonBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID, CallExpr *TheCall) { + if (const FunctionDecl *FD = getCurFunctionDecl()) { + + switch (BuiltinID) { + default: + break; +#define GET_NEON_BUILTINS +#define TARGET_BUILTIN(id, ...) case NEON::BI##id: +#define BUILTIN(id, ...) case NEON::BI##id: +#include "clang/Basic/arm_neon.inc" + checkArmStreamingBuiltin(*this, TheCall, FD, ArmNonStreaming); + break; +#undef TARGET_BUILTIN +#undef BUILTIN +#undef GET_NEON_BUILTINS + } + } + llvm::APSInt Result; uint64_t mask = 0; unsigned TV = 0; @@ -3393,6 +3518,9 @@ bool Sema::CheckAArch64BuiltinFunctionCall(const TargetInfo &TI, if (CheckSVEBuiltinFunctionCall(BuiltinID, TheCall)) return true; + if (CheckSMEBuiltinFunctionCall(BuiltinID, TheCall)) + return true; + // For intrinsics which take an immediate value as part of the instruction, // range check them here. unsigned i = 0, l = 0, u = 0; @@ -7053,8 +7181,8 @@ void Sema::CheckArgAlignment(SourceLocation Loc, NamedDecl *FDecl, } /// Handles the checks for format strings, non-POD arguments to vararg -/// functions, NULL arguments passed to non-NULL parameters, and diagnose_if -/// attributes. +/// functions, NULL arguments passed to non-NULL parameters, diagnose_if +/// attributes and AArch64 SME attributes. void Sema::checkCall(NamedDecl *FDecl, const FunctionProtoType *Proto, const Expr *ThisArg, ArrayRef Args, bool IsMemberFunction, SourceLocation Loc, @@ -7135,6 +7263,41 @@ void Sema::checkCall(NamedDecl *FDecl, const FunctionProtoType *Proto, ArgTy, ParamTy); } } + + // If the callee has an AArch64 SME attribute to indicate that it is an + // __arm_streaming function, then the caller requires SME to be available. + FunctionProtoType::ExtProtoInfo ExtInfo = Proto->getExtProtoInfo(); + if (ExtInfo.AArch64SMEAttributes & FunctionType::SME_PStateSMEnabledMask) { + if (auto *CallerFD = dyn_cast(CurContext)) { + llvm::StringMap CallerFeatureMap; + Context.getFunctionFeatureMap(CallerFeatureMap, CallerFD); + if (!CallerFeatureMap.contains("sme")) + Diag(Loc, diag::err_sme_call_in_non_sme_target); + } else if (!Context.getTargetInfo().hasFeature("sme")) { + Diag(Loc, diag::err_sme_call_in_non_sme_target); + } + } + + // If the callee uses AArch64 SME ZA state but the caller doesn't define + // any, then this is an error. + FunctionType::ArmStateValue ArmZAState = + FunctionType::getArmZAState(ExtInfo.AArch64SMEAttributes); + if (ArmZAState != FunctionType::ARM_None) { + bool CallerHasZAState = false; + if (const auto *CallerFD = dyn_cast(CurContext)) { + auto *Attr = CallerFD->getAttr(); + if (Attr && Attr->isNewZA()) + CallerHasZAState = true; + else if (const auto *FPT = + CallerFD->getType()->getAs()) + CallerHasZAState = FunctionType::getArmZAState( + FPT->getExtProtoInfo().AArch64SMEAttributes) != + FunctionType::ARM_None; + } + + if (!CallerHasZAState) + Diag(Loc, diag::err_sme_za_call_no_za_state); + } } if (FDecl && FDecl->hasAttr()) { diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index 5481bbd22c66b687bfdbcf009f4151eba9232b60..f7503e1e8e02a482030274b58bf5706afc9c122a 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -3758,6 +3758,15 @@ bool Sema::MergeFunctionDecl(FunctionDecl *New, NamedDecl *&OldD, Scope *S, } } + // It is not permitted to redeclare an SME function with different SME + // attributes. + if (IsInvalidSMECallConversion(Old->getType(), New->getType())) { + Diag(New->getLocation(), diag::err_sme_attr_mismatch) + << New->getType() << Old->getType(); + Diag(OldLocation, diag::note_previous_declaration); + return true; + } + // If a function is first declared with a calling convention, but is later // declared or defined without one, all following decls assume the calling // convention of the first. @@ -12081,6 +12090,35 @@ bool Sema::CheckFunctionDeclaration(Scope *S, FunctionDecl *NewFD, if (!Redeclaration && LangOpts.CUDA) checkCUDATargetOverload(NewFD, Previous); } + + // Check if the function definition uses any AArch64 SME features without + // having the '+sme' feature enabled. + if (DeclIsDefn) { + const auto *Attr = NewFD->getAttr(); + bool UsesSM = NewFD->hasAttr(); + bool UsesZA = Attr && Attr->isNewZA(); + if (const auto *FPT = NewFD->getType()->getAs()) { + FunctionProtoType::ExtProtoInfo EPI = FPT->getExtProtoInfo(); + UsesSM |= + EPI.AArch64SMEAttributes & FunctionType::SME_PStateSMEnabledMask; + UsesZA |= FunctionType::getArmZAState(EPI.AArch64SMEAttributes) != + FunctionType::ARM_None; + } + + if (UsesSM || UsesZA) { + llvm::StringMap FeatureMap; + Context.getFunctionFeatureMap(FeatureMap, NewFD); + if (!FeatureMap.contains("sme")) { + if (UsesSM) + Diag(NewFD->getLocation(), + diag::err_sme_definition_using_sm_in_non_sme_target); + else + Diag(NewFD->getLocation(), + diag::err_sme_definition_using_za_in_non_sme_target); + } + } + } + return Redeclaration; } diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index ed69e802c95dd51671c74fe417e5c8eb58431a3e..87c89299ace64dc85e44190c183895ef89453bd3 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -349,7 +349,7 @@ bool Sema::checkStringLiteralArgumentAttr(const AttributeCommonInfo &CI, if (ArgLocation) *ArgLocation = E->getBeginLoc(); - if (!Literal || !Literal->isOrdinary()) { + if (!Literal || (!Literal->isUnevaluated() && !Literal->isOrdinary())) { Diag(E->getBeginLoc(), diag::err_attribute_argument_type) << CI << AANT_ArgumentString; return false; @@ -381,6 +381,16 @@ bool Sema::checkStringLiteralArgumentAttr(const ParsedAttr &AL, unsigned ArgNum, // Now check for an actual string literal. Expr *ArgExpr = AL.getArgAsExpr(ArgNum); + const auto *Literal = dyn_cast(ArgExpr->IgnoreParenCasts()); + if (ArgLocation) + *ArgLocation = ArgExpr->getBeginLoc(); + + if (!Literal || (!Literal->isUnevaluated() && !Literal->isOrdinary())) { + Diag(ArgExpr->getBeginLoc(), diag::err_attribute_argument_type) + << AL << AANT_ArgumentString; + return false; + } + Str = Literal->getString(); return checkStringLiteralArgumentAttr(AL, ArgExpr, Str, ArgLocation); } @@ -5349,9 +5359,6 @@ bool Sema::CheckCallingConvAttr(const ParsedAttr &Attrs, CallingConv &CC, case ParsedAttr::AT_AArch64SVEPcs: CC = CC_AArch64SVEPCS; break; - case ParsedAttr::AT_ArmStreaming: - CC = CC_C; // FIXME: placeholder until real SME support is added. - break; case ParsedAttr::AT_AMDGPUKernelCall: CC = CC_AMDGPUKernelCall; break; @@ -8707,6 +8714,76 @@ static bool MustDelayAttributeArguments(const ParsedAttr &AL) { return false; } +static bool checkArmNewAttrMutualExclusion( + Sema &S, const ParsedAttr &AL, const FunctionProtoType *FPT, + FunctionType::ArmStateValue CurrentState, StringRef StateName) { + auto CheckForIncompatibleAttr = + [&](FunctionType::ArmStateValue IncompatibleState, + StringRef IncompatibleStateName) { + if (CurrentState == IncompatibleState) { + S.Diag(AL.getLoc(), diag::err_attributes_are_not_compatible) + << (std::string("'__arm_new(\"") + StateName.str() + "\")'") + << (std::string("'") + IncompatibleStateName.str() + "(\"" + + StateName.str() + "\")'") + << true; + AL.setInvalid(); + } + }; + + CheckForIncompatibleAttr(FunctionType::ARM_In, "__arm_in"); + CheckForIncompatibleAttr(FunctionType::ARM_Out, "__arm_out"); + CheckForIncompatibleAttr(FunctionType::ARM_InOut, "__arm_inout"); + CheckForIncompatibleAttr(FunctionType::ARM_Preserves, "__arm_preserves"); + return AL.isInvalid(); +} + +static void handleArmNewAttr(Sema &S, Decl *D, const ParsedAttr &AL) { + if (!AL.getNumArgs()) { + S.Diag(AL.getLoc(), diag::err_missing_arm_state) << AL; + AL.setInvalid(); + return; + } + + std::vector NewState; + if (const auto *ExistingAttr = D->getAttr()) { + for (StringRef S : ExistingAttr->newArgs()) + NewState.push_back(S); + } + + bool HasZA = false; + for (unsigned I = 0, E = AL.getNumArgs(); I != E; ++I) { + StringRef StateName; + SourceLocation LiteralLoc; + if (!S.checkStringLiteralArgumentAttr(AL, I, StateName, &LiteralLoc)) + return; + + if (StateName == "za") + HasZA = true; + else { + S.Diag(LiteralLoc, diag::err_unknown_arm_state) << StateName; + AL.setInvalid(); + return; + } + + if (std::find(NewState.begin(), NewState.end(), StateName) == + NewState.end()) { // Avoid adding duplicates. + NewState.push_back(StateName); + } + } + + if (auto *FPT = dyn_cast(D->getFunctionType())) { + FunctionType::ArmStateValue ZAState = + FunctionType::getArmZAState(FPT->getAArch64SMEAttributes()); + if (HasZA && ZAState != FunctionType::ARM_None && + checkArmNewAttrMutualExclusion(S, AL, FPT, ZAState, "za")) + return; + } + + D->dropAttr(); + D->addAttr(::new (S.Context) + ArmNewAttr(S.Context, AL, NewState.data(), NewState.size())); +} + /// ProcessDeclAttribute - Apply the specific attribute to the specified decl if /// the attribute applies to decls. If the attribute is a type attribute, just /// silently ignore it if a GNU attribute. @@ -9462,6 +9539,14 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL, handleArmBuiltinAliasAttr(S, D, AL); break; + case ParsedAttr::AT_ArmLocallyStreaming: + handleSimpleAttribute(S, D, AL); + break; + + case ParsedAttr::AT_ArmNew: + handleArmNewAttr(S, D, AL); + break; + case ParsedAttr::AT_AcquireHandle: handleAcquireHandleAttr(S, D, AL); break; diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp index b62f3c475c450c996d977f396f5640f3098f156d..a978ee0da6a0d1e2b3267944390f0b474b62e298 100644 --- a/clang/lib/Sema/SemaDeclCXX.cpp +++ b/clang/lib/Sema/SemaDeclCXX.cpp @@ -17985,6 +17985,14 @@ bool Sema::CheckOverridingFunctionAttributes(const CXXMethodDecl *New, } } + // SME attributes must match when overriding a function declaration. + if (IsInvalidSMECallConversion(Old->getType(), New->getType())) { + Diag(New->getLocation(), diag::err_conflicting_overriding_attributes) + << New << New->getType() << Old->getType(); + Diag(Old->getLocation(), diag::note_overridden_virtual_function); + return true; + } + // Virtual overrides must have the same code_seg. const auto *OldCSA = Old->getAttr(); const auto *NewCSA = New->getAttr(); diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index 3a5e302cc03a319116a623e3b2d88148655cfa79..8804076f4d11c4d22c58478662bd9aaa6d6f1528 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -9679,6 +9679,21 @@ ExprResult Sema::ActOnConditionalOp(SourceLocation QuestionLoc, ColonLoc, result, VK, OK); } +// Check that the SME attributes for PSTATE.ZA and PSTATE.SM are compatible. +bool Sema::IsInvalidSMECallConversion(QualType FromType, QualType ToType) { + unsigned FromAttributes = 0, ToAttributes = 0; + if (const auto *FromFn = + dyn_cast(Context.getCanonicalType(FromType))) + FromAttributes = + FromFn->getAArch64SMEAttributes() & FunctionType::SME_AttributeMask; + if (const auto *ToFn = + dyn_cast(Context.getCanonicalType(ToType))) + ToAttributes = + ToFn->getAArch64SMEAttributes() & FunctionType::SME_AttributeMask; + + return FromAttributes != ToAttributes; +} + // Check if we have a conversion between incompatible cmse function pointer // types, that is, a conversion between a function pointer with the // cmse_nonsecure_call attribute and one without. @@ -9845,6 +9860,8 @@ checkPointerTypesForAssignment(Sema &S, QualType LHSType, QualType RHSType, return Sema::IncompatibleFunctionPointer; if (IsInvalidCmseNSCallConversion(S, ltrans, rtrans)) return Sema::IncompatibleFunctionPointer; + if (S.IsInvalidSMECallConversion(rtrans, ltrans)) + return Sema::IncompatibleFunctionPointer; return ConvTy; } diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp index 0aa691d24171f37fe0ddaa1e16921fc3e1ac8d56..61e2ada139c72e908f4814ca9ce519a4bbcab336 100644 --- a/clang/lib/Sema/SemaType.cpp +++ b/clang/lib/Sema/SemaType.cpp @@ -128,7 +128,6 @@ static void diagnoseBadTypeAttribute(Sema &S, const ParsedAttr &attr, case ParsedAttr::AT_VectorCall: \ case ParsedAttr::AT_AArch64VectorPcs: \ case ParsedAttr::AT_AArch64SVEPcs: \ - case ParsedAttr::AT_ArmStreaming: \ case ParsedAttr::AT_AMDGPUKernelCall: \ case ParsedAttr::AT_MSABI: \ case ParsedAttr::AT_SysVABI: \ @@ -143,6 +142,12 @@ static void diagnoseBadTypeAttribute(Sema &S, const ParsedAttr &attr, case ParsedAttr::AT_NoReturn: \ case ParsedAttr::AT_Regparm: \ case ParsedAttr::AT_CmseNSCall: \ + case ParsedAttr::AT_ArmStreaming: \ + case ParsedAttr::AT_ArmStreamingCompatible: \ + case ParsedAttr::AT_ArmPreserves: \ + case ParsedAttr::AT_ArmIn: \ + case ParsedAttr::AT_ArmOut: \ + case ParsedAttr::AT_ArmInOut: \ case ParsedAttr::AT_AnyX86NoCallerSavedRegisters: \ case ParsedAttr::AT_AnyX86NoCfCheck: \ CALLING_CONV_ATTRS_CASELIST @@ -7772,6 +7777,69 @@ static Attr *getCCTypeAttr(ASTContext &Ctx, ParsedAttr &Attr) { llvm_unreachable("unexpected attribute kind!"); } +static bool checkMutualExclusion(TypeProcessingState &state, + const FunctionProtoType::ExtProtoInfo &EPI, + ParsedAttr &Attr, + AttributeCommonInfo::Kind OtherKind) { + auto OtherAttr = std::find_if( + state.getCurrentAttributes().begin(), state.getCurrentAttributes().end(), + [OtherKind](const ParsedAttr &A) { return A.getKind() == OtherKind; }); + if (OtherAttr == state.getCurrentAttributes().end() || OtherAttr->isInvalid()) + return false; + + Sema &S = state.getSema(); + S.Diag(Attr.getLoc(), diag::err_attributes_are_not_compatible) + << *OtherAttr << Attr + << (OtherAttr->isRegularKeywordAttribute() || + Attr.isRegularKeywordAttribute()); + S.Diag(OtherAttr->getLoc(), diag::note_conflicting_attribute); + Attr.setInvalid(); + return true; +} + +static bool handleArmStateAttribute(Sema &S, + FunctionProtoType::ExtProtoInfo &EPI, + ParsedAttr &Attr, + FunctionType::ArmStateValue State) { + if (!Attr.getNumArgs()) { + S.Diag(Attr.getLoc(), diag::err_missing_arm_state) << Attr; + Attr.setInvalid(); + return true; + } + + for (unsigned I = 0; I < Attr.getNumArgs(); ++I) { + StringRef StateName; + SourceLocation LiteralLoc; + if (!S.checkStringLiteralArgumentAttr(Attr, I, StateName, &LiteralLoc)) + return true; + + unsigned Shift; + FunctionType::ArmStateValue ExistingState; + if (StateName == "za") { + Shift = FunctionType::SME_ZAShift; + ExistingState = FunctionType::getArmZAState(EPI.AArch64SMEAttributes); + } else { + S.Diag(LiteralLoc, diag::err_unknown_arm_state) << StateName; + Attr.setInvalid(); + return true; + } + + // __arm_in(S), __arm_out(S), __arm_inout(S) and __arm_preserves(S) + // are all mutually exclusive for the same S, so check if there are + // conflicting attributes. + if (ExistingState != FunctionType::ARM_None && ExistingState != State) { + S.Diag(LiteralLoc, diag::err_conflicting_attributes_arm_state) + << StateName; + Attr.setInvalid(); + return true; + } + + EPI.setArmSMEAttribute( + (FunctionType::AArch64SMETypeAttributes)((State << Shift))); + } + return false; +} + /// Process an individual function attribute. Returns true to /// indicate that the attribute was handled, false if it wasn't. static bool handleFunctionTypeAttr(TypeProcessingState &state, ParsedAttr &attr, @@ -7901,6 +7969,72 @@ static bool handleFunctionTypeAttr(TypeProcessingState &state, ParsedAttr &attr, return true; } + if (attr.getKind() == ParsedAttr::AT_ArmStreaming || + attr.getKind() == ParsedAttr::AT_ArmStreamingCompatible || + attr.getKind() == ParsedAttr::AT_ArmPreserves || + attr.getKind() == ParsedAttr::AT_ArmIn || + attr.getKind() == ParsedAttr::AT_ArmOut || + attr.getKind() == ParsedAttr::AT_ArmInOut) { + if (S.CheckAttrTarget(attr)) + return true; + + if (attr.getKind() == ParsedAttr::AT_ArmStreaming || + attr.getKind() == ParsedAttr::AT_ArmStreamingCompatible) + if (S.CheckAttrNoArgs(attr)) + return true; + + if (!unwrapped.isFunctionType()) + return false; + + const auto *FnTy = unwrapped.get()->getAs(); + if (!FnTy) { + // SME ACLE attributes are not supported on K&R-style unprototyped C + // functions. + S.Diag(attr.getLoc(), diag::warn_attribute_wrong_decl_type) << + attr << attr.isRegularKeywordAttribute() << ExpectedFunctionWithProtoType; + attr.setInvalid(); + return false; + } + + FunctionProtoType::ExtProtoInfo EPI = FnTy->getExtProtoInfo(); + switch (attr.getKind()) { + case ParsedAttr::AT_ArmStreaming: + if (checkMutualExclusion(state, EPI, attr, + ParsedAttr::AT_ArmStreamingCompatible)) + return true; + EPI.setArmSMEAttribute(FunctionType::SME_PStateSMEnabledMask); + break; + case ParsedAttr::AT_ArmStreamingCompatible: + if (checkMutualExclusion(state, EPI, attr, ParsedAttr::AT_ArmStreaming)) + return true; + EPI.setArmSMEAttribute(FunctionType::SME_PStateSMCompatibleMask); + break; + case ParsedAttr::AT_ArmPreserves: + if (handleArmStateAttribute(S, EPI, attr, FunctionType::ARM_Preserves)) + return true; + break; + case ParsedAttr::AT_ArmIn: + if (handleArmStateAttribute(S, EPI, attr, FunctionType::ARM_In)) + return true; + break; + case ParsedAttr::AT_ArmOut: + if (handleArmStateAttribute(S, EPI, attr, FunctionType::ARM_Out)) + return true; + break; + case ParsedAttr::AT_ArmInOut: + if (handleArmStateAttribute(S, EPI, attr, FunctionType::ARM_InOut)) + return true; + break; + default: + llvm_unreachable("Unsupported attribute"); + } + + QualType newtype = S.Context.getFunctionType(FnTy->getReturnType(), + FnTy->getParamTypes(), EPI); + type = unwrapped.wrap(S, newtype->getAs()); + return true; + } + if (attr.getKind() == ParsedAttr::AT_NoThrow) { // Delay if this is not a function type. if (!unwrapped.isFunctionType()) diff --git a/clang/test/AST/ast-dump-sme-attributes.cpp b/clang/test/AST/ast-dump-sme-attributes.cpp new file mode 100644 index 0000000000000000000000000000000000000000..133648d90a15764a2c85511e3757014fb94eee92 --- /dev/null +++ b/clang/test/AST/ast-dump-sme-attributes.cpp @@ -0,0 +1,66 @@ +// Test without serialization: +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -std=c++2a -ast-dump -ast-dump-filter Foo %s | FileCheck -strict-whitespace %s + +// Test with serialization: +// RUN: %clang_cc1 -std=c++20 -triple aarch64 -target-feature +sme -emit-pch -o %t %s +// RUN: %clang_cc1 -x c++ -std=c++20 -triple aarch64 -target-feature +sme -include-pch %t -ast-dump-all -ast-dump-filter Foo /dev/null \ +// RUN: | sed -e "s/ //" -e "s/ imported//" \ +// RUN: | FileCheck --strict-whitespace %s + +struct Foo { +// CHECK: |-CXXRecordDecl {{.*}} implicit struct Foo +// CHECK-NEXT: |-CXXMethodDecl {{.*}} f_streaming 'void () __arm_streaming' +// CHECK-NEXT: |-CXXMethodDecl {{.*}} f_streaming_compatible 'void () __arm_streaming_compatible' +// CHECK-NEXT: |-CXXMethodDecl {{.*}} f_locally_streaming 'void ()' +// CHECK-NEXT: | `-ArmLocallyStreamingAttr +// CHECK-NEXT: |-CXXMethodDecl {{.*}} f_shared_za 'void () __arm_inout("za")' +// CHECK-NEXT: |-CXXMethodDecl {{.*}} f_new_za 'void ()' +// CHECK-NEXT: | `-ArmNewAttr {{.*}} za +// CHECK-NEXT: |-CXXMethodDecl {{.*}} f_preserves_za 'void () __arm_preserves("za")' + void f_streaming() __arm_streaming; + void f_streaming_compatible() __arm_streaming_compatible; + __arm_locally_streaming void f_locally_streaming(); + void f_shared_za() __arm_inout("za"); + __arm_new("za") void f_new_za(); + void f_preserves_za() __arm_preserves("za"); + + +// CHECK: |-CXXMethodDecl {{.*}} test_lambda 'int (int)' implicit-inline +// CHECK: `-CompoundStmt +// CHECK-NEXT: |-DeclStmt +// CHECK-NEXT: | `-VarDecl +// CHECK-NEXT: | `-LambdaExpr +// CHECK-NEXT: | |-CXXRecordDecl +// CHECK: | | |-CXXMethodDecl {{.*}} used constexpr operator() 'int (int) __arm_streaming const' inline +// CHECK: | | |-CXXConversionDecl {{.*}} implicit constexpr operator int (*)(int) __arm_streaming 'int (*() const noexcept)(int) __arm_streaming' inline +// CHECK: | | |-CXXMethodDecl {{.*}} implicit __invoke 'int (int) __arm_streaming' static inline +// CHECK: `-ReturnStmt +// CHECK: `-CXXOperatorCallExpr +// CHECK-NEXT: |-ImplicitCastExpr {{.*}} 'int (*)(int) __arm_streaming const' +// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int (int) __arm_streaming const' lvalue CXXMethod {{.*}} 'operator()' 'int (int) __arm_streaming const' + int test_lambda(int x) { + auto F = [](int x) __arm_streaming { return x; }; + return F(x); + } + +// CHECK: |-TypedefDecl {{.*}} referenced s_ptrty 'void (*)(int, int) __arm_streaming' +// CHECK-NEXT: | `-PointerType {{.*}} 'void (*)(int, int) __arm_streaming' +// CHECK-NEXT: | `-ParenType {{.*}} 'void (int, int) __arm_streaming' sugar +// CHECK-NEXT: | `-FunctionProtoType {{.*}} 'void (int, int) __arm_streaming' cdecl + typedef void (*s_ptrty) (int, int) __arm_streaming; + +// CHECK: `-CXXMethodDecl {{.*}} test_streaming_ptrty 'void (s_ptrty, int, int)' implicit-inline +// CHECK-NEXT: |-ParmVarDecl {{.*}} used f 's_ptrty':'void (*)(int, int) __arm_streaming' +// CHECK-NEXT: |-ParmVarDecl {{.*}} used x 'int' +// CHECK-NEXT: |-ParmVarDecl {{.*}} used y 'int' +// CHECK: `-CompoundStmt +// CHECK-NEXT: `-ReturnStmt +// CHECK-NEXT: `-CallExpr +// CHECK-NEXT: |-ImplicitCastExpr {{.*}} 's_ptrty':'void (*)(int, int) __arm_streaming' +// CHECK-NEXT: | `-DeclRefExpr {{.*}} 's_ptrty':'void (*)(int, int) __arm_streaming' lvalue ParmVar {{.*}} 'f' 's_ptrty':'void (*)(int, int) __arm_streaming' +// CHECK-NEXT: |-ImplicitCastExpr {{.*}} 'int' +// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' +// CHECK-NEXT: `-ImplicitCastExpr {{.*}} 'int' +// CHECK-NEXT: `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' + void test_streaming_ptrty(s_ptrty f, int x, int y) { return f(x, y); }; +}; diff --git a/clang/test/CodeGen/aarch64-inline-asm.c b/clang/test/CodeGen/aarch64-inline-asm.c index 439fb9e33f9ae15f51da654ec1f462d99065742c..8ddee560b11da4f1aec7c5026fe3a0c0a876b512 100644 --- a/clang/test/CodeGen/aarch64-inline-asm.c +++ b/clang/test/CodeGen/aarch64-inline-asm.c @@ -80,3 +80,26 @@ void test_tied_earlyclobber(void) { asm("" : "+&r"(a)); // CHECK: call i32 asm "", "=&{x1},0"(i32 %0) } + +void test_reduced_gpr_constraints(int var32, long var64) { + asm("add w0, w0, %0" : : "Uci"(var32) : "w0"); +// CHECK: [[ARG1:%.+]] = load i32, ptr +// CHECK: call void asm sideeffect "add w0, w0, $0", "@3Uci,~{w0}"(i32 [[ARG1]]) + asm("add x0, x0, %0" : : "Uci"(var64) : "x0"); +// CHECK: [[ARG1:%.+]] = load i64, ptr +// CHECK: call void asm sideeffect "add x0, x0, $0", "@3Uci,~{x0}"(i64 [[ARG1]]) + asm("add w0, w0, %0" : : "Ucj"(var32) : "w0"); +// CHECK: [[ARG2:%.+]] = load i32, ptr +// CHECK: call void asm sideeffect "add w0, w0, $0", "@3Ucj,~{w0}"(i32 [[ARG2]]) + asm("add x0, x0, %0" : : "Ucj"(var64) : "x0"); +// CHECK: [[ARG2:%.+]] = load i64, ptr +// CHECK: call void asm sideeffect "add x0, x0, $0", "@3Ucj,~{x0}"(i64 [[ARG2]]) +} + +void test_sme_constraints(){ + asm("movt zt0[3, mul vl], z0" : : : "za"); +// CHECK: call void asm sideeffect "movt zt0[3, mul vl], z0", "~{za}"() + + asm("movt zt0[3, mul vl], z0" : : : "zt0"); +// CHECK: call void asm sideeffect "movt zt0[3, mul vl], z0", "~{zt0}"() +} \ No newline at end of file diff --git a/clang/test/CodeGen/aarch64-sme-inline-streaming-attrs.c b/clang/test/CodeGen/aarch64-sme-inline-streaming-attrs.c new file mode 100644 index 0000000000000000000000000000000000000000..7eb74f28a1c85a73d9e003d7fedcdcb9e2cb920f --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme-inline-streaming-attrs.c @@ -0,0 +1,47 @@ +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -S -target-feature +sme -verify -DTEST_NONE %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -S -target-feature +sme -verify -DTEST_COMPATIBLE %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -S -target-feature +sme -verify -DTEST_STREAMING %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -S -target-feature +sme -verify -DTEST_LOCALLY %s + +#define __ai __attribute__((always_inline)) +__ai void inlined_fn(void) {} +__ai void inlined_fn_streaming_compatible(void) __arm_streaming_compatible {} +__ai void inlined_fn_streaming(void) __arm_streaming {} +__ai __arm_locally_streaming void inlined_fn_local(void) {} + +#ifdef TEST_NONE +void caller(void) { + inlined_fn(); + inlined_fn_streaming_compatible(); + inlined_fn_streaming(); // expected-error {{always_inline function 'inlined_fn_streaming' and its caller 'caller' have mismatching streaming attributes}} + inlined_fn_local(); // expected-error {{always_inline function 'inlined_fn_local' and its caller 'caller' have mismatching streaming attributes}} +} +#endif + +#ifdef TEST_COMPATIBLE +void caller_compatible(void) __arm_streaming_compatible { + inlined_fn(); // expected-error {{always_inline function 'inlined_fn' and its caller 'caller_compatible' have mismatching streaming attributes}} + inlined_fn_streaming_compatible(); + inlined_fn_streaming(); // expected-error {{always_inline function 'inlined_fn_streaming' and its caller 'caller_compatible' have mismatching streaming attributes}} + inlined_fn_local(); // expected-error {{always_inline function 'inlined_fn_local' and its caller 'caller_compatible' have mismatching streaming attributes}} +} +#endif + +#ifdef TEST_STREAMING +void caller_streaming(void) __arm_streaming { + inlined_fn(); // expected-error {{always_inline function 'inlined_fn' and its caller 'caller_streaming' have mismatching streaming attributes}} + inlined_fn_streaming_compatible(); + inlined_fn_streaming(); + inlined_fn_local(); +} +#endif + +#ifdef TEST_LOCALLY +__arm_locally_streaming +void caller_local(void) { + inlined_fn(); // expected-error {{always_inline function 'inlined_fn' and its caller 'caller_local' have mismatching streaming attributes}} + inlined_fn_streaming_compatible(); + inlined_fn_streaming(); + inlined_fn_local(); +} +#endif diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/aarch64-sme-attrs.cpp b/clang/test/CodeGen/aarch64-sme-intrinsics/aarch64-sme-attrs.cpp new file mode 100644 index 0000000000000000000000000000000000000000..fdd2de11365dd4a1f5ba31694cb3369ea702df7d --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/aarch64-sme-attrs.cpp @@ -0,0 +1,303 @@ +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme \ +// RUN: -S -disable-O0-optnone -Werror -emit-llvm -o - %s \ +// RUN: | opt -S -passes=mem2reg \ +// RUN: | opt -S -passes=inline \ +// RUN: | FileCheck %s + +extern "C" { + +extern int normal_callee(); + +// == FUNCTION DECLARATIONS == + +int streaming_decl(void) __arm_streaming; +int streaming_compatible_decl(void) __arm_streaming_compatible; +int shared_za_decl(void) __arm_inout("za"); +int preserves_za_decl(void) __arm_preserves("za"); +int private_za_decl(void); + +// == FUNCTION DEFINITIONS == + +// CHECK-LABEL: @streaming_caller() +// CHECK-SAME: #[[SM_ENABLED:[0-9]+]] +// CHECK: call i32 @normal_callee() +// + int streaming_caller() __arm_streaming { + return normal_callee(); +} + +// CHECK: declare i32 @normal_callee() #[[NORMAL_DECL:[0-9]+]] + + +// CHECK-LABEL: @streaming_callee() +// CHECK-SAME: #[[SM_ENABLED]] +// CHECK: call i32 @streaming_decl() #[[SM_ENABLED_CALL:[0-9]+]] +// + int streaming_callee() __arm_streaming { + return streaming_decl(); +} + +// CHECK: declare i32 @streaming_decl() #[[SM_ENABLED_DECL:[0-9]+]] + +// CHECK-LABEL: @streaming_compatible_caller() +// CHECK-SAME: #[[SM_COMPATIBLE:[0-9]+]] +// CHECK: call i32 @normal_callee() +// + int streaming_compatible_caller() __arm_streaming_compatible { + return normal_callee(); +} + +// CHECK-LABEL: @streaming_compatible_callee() +// CHECK-SAME: #[[SM_COMPATIBLE]] +// CHECK: call i32 @streaming_compatible_decl() #[[SM_COMPATIBLE_CALL:[0-9]+]] +// + int streaming_compatible_callee() __arm_streaming_compatible { + return streaming_compatible_decl(); +} + +// CHECK: declare i32 @streaming_compatible_decl() #[[SM_COMPATIBLE_DECL:[0-9]+]] + +// CHECK-LABEL: @locally_streaming_caller() +// CHECK-SAME: #[[SM_BODY:[0-9]+]] +// CHECK: call i32 @normal_callee() +// +__arm_locally_streaming int locally_streaming_caller() { + return normal_callee(); +} + +// CHECK-LABEL: @locally_streaming_callee() +// CHECK-SAME: #[[SM_BODY]] +// CHECK: call i32 @locally_streaming_caller() #[[SM_BODY_CALL:[0-9]+]] +// +__arm_locally_streaming int locally_streaming_callee() { + return locally_streaming_caller(); +} + + +// CHECK-LABEL: @shared_za_caller() +// CHECK-SAME: #[[ZA_SHARED:[0-9]+]] +// CHECK: call i32 @normal_callee() +// + int shared_za_caller() __arm_inout("za") { + return normal_callee(); +} + +// CHECK-LABEL: @shared_za_callee() +// CHECK-SAME: #[[ZA_SHARED]] +// CHECK: call i32 @shared_za_decl() #[[ZA_SHARED_CALL:[0-9]+]] +// + int shared_za_callee() __arm_inout("za") { + return shared_za_decl(); +} + +// CHECK: declare i32 @shared_za_decl() #[[ZA_SHARED_DECL:[0-9]+]] + + +// CHECK-LABEL: @preserves_za_caller() +// CHECK-SAME: #[[ZA_PRESERVED:[0-9]+]] +// CHECK: call i32 @normal_callee() +// + int preserves_za_caller() __arm_preserves("za") { + return normal_callee(); +} + +// CHECK-LABEL: @preserves_za_callee() +// CHECK-SAME: #[[ZA_PRESERVED]] +// CHECK: call i32 @preserves_za_decl() #[[ZA_PRESERVED_CALL:[0-9]+]] +// + int preserves_za_callee() __arm_preserves("za") { + return preserves_za_decl(); +} + +// CHECK: declare i32 @preserves_za_decl() #[[ZA_PRESERVED_DECL:[0-9]+]] + + +// CHECK-LABEL: @new_za_caller() +// CHECK-SAME: #[[ZA_NEW:[0-9]+]] +// CHECK: call i32 @normal_callee() +// +__arm_new("za") int new_za_caller() { + return normal_callee(); +} + +// CHECK-LABEL: @new_za_callee() +// CHECK-SAME: #[[ZA_NEW]] +// CHECK: call i32 @private_za_decl() +// +__arm_new("za") int new_za_callee() { + return private_za_decl(); +} + +// CHECK: declare i32 @private_za_decl() + + +// Ensure that the attributes are correctly propagated to function types +// and also to callsites. +typedef void (*s_ptrty) (int, int) __arm_streaming; +typedef void (*sc_ptrty) (int, int) __arm_streaming_compatible; +typedef void (*sz_ptrty) (int, int) __arm_inout("za"); +typedef void (*pz_ptrty) (int, int) __arm_preserves("za"); + +// CHECK-LABEL: @test_streaming_ptrty( +// CHECK-SAME: #[[NORMAL_DEF:[0-9]+]] +// CHECK: call void [[F:%.*]](i32 noundef [[X:%.*]], i32 noundef [[Y:%.*]]) #[[SM_ENABLED_CALL]] +// +void test_streaming_ptrty(s_ptrty f, int x, int y) { return f(x, y); } +// CHECK-LABEL: @test_streaming_compatible_ptrty( +// CHECK-SAME: #[[NORMAL_DEF]] +// CHECK: call void [[F:%.*]](i32 noundef [[X:%.*]], i32 noundef [[Y:%.*]]) #[[SM_COMPATIBLE_CALL]] +// +void test_streaming_compatible_ptrty(sc_ptrty f, int x, int y) { return f(x, y); } +// CHECK-LABEL: @test_shared_za( +// CHECK-SAME: #[[ZA_SHARED]] +// CHECK: call void [[F:%.*]](i32 noundef [[X:%.*]], i32 noundef [[Y:%.*]]) #[[ZA_SHARED_CALL]] +// +void test_shared_za(sz_ptrty f, int x, int y) __arm_inout("za") { return f(x, y); } +// CHECK-LABEL: @test_preserved_za( +// CHECK-SAME: #[[ZA_SHARED]] +// CHECK: call void [[F:%.*]](i32 noundef [[X:%.*]], i32 noundef [[Y:%.*]]) #[[ZA_PRESERVED_CALL]] +// +void test_preserved_za(pz_ptrty f, int x, int y) __arm_inout("za") { return f(x, y); } + +// CHECK-LABEL: @test_indirect_streaming_ptrty( +// CHECK-SAME: #[[NORMAL_DEF:[0-9]+]] +// CHECK: call void [[F:%.*]](i32 noundef [[X:%.*]], i32 noundef [[Y:%.*]]) #[[SM_ENABLED_CALL]] +// +typedef s_ptrty **indirect_s_ptrty; +void test_indirect_streaming_ptrty(indirect_s_ptrty fptr, int x, int y) { return (**fptr)(x, y); } +} // extern "C" + +// +// Test that having the attribute in different places (on declaration and on type) +// both results in the attribute being applied to the type. +// + +// CHECK-LABEL: @_Z24test_same_type_streamingv( +// CHECK: call void @_Z10streaming1v() #[[SM_ENABLED_CALL]] +// CHECK: call void @_Z10streaming2v() #[[SM_ENABLED_CALL]] +// CHECK: call void @_Z20same_type_streaming1v() #[[SM_ENABLED_CALL]] +// CHECK: call void @_Z20same_type_streaming2v() #[[SM_ENABLED_CALL]] +// CHECK: ret void +// CHECK: } +// CHECK: declare void @_Z10streaming1v() #[[SM_ENABLED_DECL]] +// CHECK: declare void @_Z10streaming2v() #[[SM_ENABLED_DECL]] +// CHECK: declare void @_Z20same_type_streaming1v() #[[SM_ENABLED_DECL]] +// CHECK: declare void @_Z20same_type_streaming2v() #[[SM_ENABLED_DECL]] +void streaming1(void) __arm_streaming; +void streaming2() __arm_streaming; +decltype(streaming1) same_type_streaming1; +decltype(streaming2) same_type_streaming2; +void test_same_type_streaming() { + streaming1(); + streaming2(); + same_type_streaming1(); + same_type_streaming2(); +} + +// +// Test overloading; the attribute is not required for overloaded types and +// does not apply if not specified. +// + +// CHECK-LABEL: @_Z12overloadedfni( +// CHECK-SAME: #[[SM_ENABLED]] +int overloadedfn(int x) __arm_streaming { return x; } +// CHECK-LABEL: @_Z12overloadedfnf( +// CHECK-SAME: #[[NORMAL_DEF]] +// +float overloadedfn(float x) { return x; } +// CHECK-LABEL: @_Z13test_overloadi( +// CHECK-SAME: #[[NORMAL_DEF]] +// +int test_overload(int x) { return overloadedfn(x); } +// CHECK-LABEL: @_Z13test_overloadf( +// CHECK-SAME: #[[NORMAL_DEF]] +// +float test_overload(float x) { return overloadedfn(x); } + +// CHECK-LABEL: @_Z11test_lambdai( +// CHECK-SAME: #[[NORMAL_DEF]] +// CHECK: call noundef i32 @"_ZZ11test_lambdaiENK3$_0clEi"({{.*}}) #[[SM_ENABLED_CALL]] +// +// CHECK: @"_ZZ11test_lambdaiENK3$_0clEi"( +// CHECK-SAME: #[[SM_ENABLED]] +int test_lambda(int x) { + auto F = [](int x) __arm_streaming { return x; }; + return F(x); +} + +// CHECK-LABEL: @_Z27test_template_instantiationv( +// CHECK-SAME: #[[NORMAL_DEF]] +// CHECK: call noundef i32 @_Z15template_functyIiET_S0_(i32 noundef 12) #[[SM_ENABLED_CALL]] +// +// CHECK: @_Z15template_functyIiET_S0_( +// CHECK-SAME: #[[SM_ENABLED]] +template +Ty template_functy(Ty x) __arm_streaming { return x; } +int test_template_instantiation() { return template_functy(12); } + +// +// Test that arm_locally_streaming is inherited by future redeclarations, +// even when they don't specify the attribute. +// + +// CHECK: define {{.*}} @_Z25locally_streaming_inheritv( +// CHECK-SAME: #[[SM_BODY]] +__arm_locally_streaming void locally_streaming_inherit(); +void locally_streaming_inherit() { + streaming_decl(); +} + +// Test that the attributes are propagated properly to calls +// when using a variadic template as indirection. +__attribute__((always_inline)) +int call() { return 0; } + +template +__attribute__((always_inline)) +int call(T f, Other... other) __arm_inout("za") { + return f() + call(other...); +} + +// CHECK: {{.*}} @_Z22test_variadic_templatev( +// CHECK: call {{.*}} i32 @normal_callee() #[[NOUNWIND_CALL:[0-9]+]] +// CHECK-NEXT: call {{.*}} i32 @streaming_decl() #[[NOUNWIND_SM_ENABLED_CALL:[0-9]+]] +// CHECK-NEXT: call {{.*}} i32 @streaming_compatible_decl() #[[NOUNWIND_SM_COMPATIBLE_CALL:[0-9]+]] +// CHECK-NEXT: call {{.*}} i32 @shared_za_decl() #[[NOUNWIND_ZA_SHARED_CALL:[0-9]+]] +// CHECK-NEXT: call {{.*}} i32 @preserves_za_decl() #[[NOUNWIND_ZA_PRESERVED_CALL:[0-9]+]] +// CHECK-NEXT: add nsw +// CHECK-NEXT: add nsw +// CHECK-NEXT: add nsw +// CHECK-NEXT: add nsw +// CHECK-NEXT: ret +int test_variadic_template() __arm_inout("za") { + return call(normal_callee, + streaming_decl, + streaming_compatible_decl, + shared_za_decl, + preserves_za_decl); +} + +// CHECK: attributes #[[SM_ENABLED]] = { mustprogress noinline nounwind "aarch64_pstate_sm_enabled" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+sme" } +// CHECK: attributes #[[NORMAL_DECL]] = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+sme" } +// CHECK: attributes #[[SM_ENABLED_DECL]] = { "aarch64_pstate_sm_enabled" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+sme" } +// CHECK: attributes #[[SM_COMPATIBLE]] = { mustprogress noinline nounwind "aarch64_pstate_sm_compatible" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+sme" } +// CHECK: attributes #[[SM_COMPATIBLE_DECL]] = { "aarch64_pstate_sm_compatible" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+sme" } +// CHECK: attributes #[[SM_BODY]] = { mustprogress noinline nounwind "aarch64_pstate_sm_body" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+sme" } +// CHECK: attributes #[[ZA_SHARED]] = { mustprogress noinline nounwind "aarch64_inout_za" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+sme" } +// CHECK: attributes #[[ZA_SHARED_DECL]] = { "aarch64_inout_za" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+sme" } +// CHECK: attributes #[[ZA_PRESERVED]] = { mustprogress noinline nounwind "aarch64_preserves_za" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+sme" } +// CHECK: attributes #[[ZA_PRESERVED_DECL]] = { "aarch64_preserves_za" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+sme" } +// CHECK: attributes #[[ZA_NEW]] = { mustprogress noinline nounwind "aarch64_new_za" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+sme" } +// CHECK: attributes #[[NORMAL_DEF]] = { mustprogress noinline nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+sme" } +// CHECK: attributes #[[SM_ENABLED_CALL]] = { "aarch64_pstate_sm_enabled" } +// CHECK: attributes #[[SM_COMPATIBLE_CALL]] = { "aarch64_pstate_sm_compatible" } +// CHECK: attributes #[[SM_BODY_CALL]] = { "aarch64_pstate_sm_body" } +// CHECK: attributes #[[ZA_SHARED_CALL]] = { "aarch64_inout_za" } +// CHECK: attributes #[[ZA_PRESERVED_CALL]] = { "aarch64_preserves_za" } +// CHECK: attributes #[[NOUNWIND_CALL]] = { nounwind } +// CHECK: attributes #[[NOUNWIND_SM_ENABLED_CALL]] = { nounwind "aarch64_pstate_sm_enabled" } +// CHECK: attributes #[[NOUNWIND_SM_COMPATIBLE_CALL]] = { nounwind "aarch64_pstate_sm_compatible" } +// CHECK: attributes #[[NOUNWIND_ZA_SHARED_CALL]] = { nounwind "aarch64_inout_za" } +// CHECK: attributes #[[NOUNWIND_ZA_PRESERVED_CALL]] = { nounwind "aarch64_preserves_za" } + diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_add-i32.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_add-i32.c index b0855553df79f6881acddebb88a9c65bb03a7943..ce951f4423b606166a802f3ba03bc5138a2025d0 100644 --- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_add-i32.c +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_add-i32.c @@ -1,11 +1,11 @@ // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX -// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C -// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -S -O1 -Werror -o /dev/null %s -#include +#include #ifdef SME_OVERLOADED_FORMS #define SME_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3 @@ -21,7 +21,7 @@ // CHECK-NEXT: tail call void @llvm.aarch64.sme.addha.nxv4i32(i32 0, [[TMP0]], [[TMP1]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svaddha_za32_u32(svbool_t pn, svbool_t pm, svuint32_t zn) { +void test_svaddha_za32_u32(svbool_t pn, svbool_t pm, svuint32_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svaddha_za32, _u32, _m)(0, pn, pm, zn); } @@ -33,7 +33,7 @@ void test_svaddha_za32_u32(svbool_t pn, svbool_t pm, svuint32_t zn) { // CHECK-NEXT: tail call void @llvm.aarch64.sme.addha.nxv4i32(i32 3, [[TMP0]], [[TMP1]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svaddha_za32_u32_1(svbool_t pn, svbool_t pm, svuint32_t zn) { +void test_svaddha_za32_u32_1(svbool_t pn, svbool_t pm, svuint32_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svaddha_za32, _u32, _m)(3, pn, pm, zn); } @@ -45,7 +45,7 @@ void test_svaddha_za32_u32_1(svbool_t pn, svbool_t pm, svuint32_t zn) { // CHECK-NEXT: tail call void @llvm.aarch64.sme.addha.nxv4i32(i32 0, [[TMP0]], [[TMP1]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svaddha_za32_s32(svbool_t pn, svbool_t pm, svint32_t zn) { +void test_svaddha_za32_s32(svbool_t pn, svbool_t pm, svint32_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svaddha_za32, _s32, _m)(0, pn, pm, zn); } @@ -57,7 +57,7 @@ void test_svaddha_za32_s32(svbool_t pn, svbool_t pm, svint32_t zn) { // CHECK-NEXT: tail call void @llvm.aarch64.sme.addha.nxv4i32(i32 3, [[TMP0]], [[TMP1]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svaddha_za32_s32_1(svbool_t pn, svbool_t pm, svint32_t zn) { +void test_svaddha_za32_s32_1(svbool_t pn, svbool_t pm, svint32_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svaddha_za32, _s32, _m)(3, pn, pm, zn); } @@ -69,7 +69,7 @@ void test_svaddha_za32_s32_1(svbool_t pn, svbool_t pm, svint32_t zn) { // CHECK-NEXT: tail call void @llvm.aarch64.sme.addva.nxv4i32(i32 0, [[TMP0]], [[TMP1]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svaddva_za32_u32(svbool_t pn, svbool_t pm, svuint32_t zn) { +void test_svaddva_za32_u32(svbool_t pn, svbool_t pm, svuint32_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svaddva_za32, _u32, _m)(0, pn, pm, zn); } @@ -81,7 +81,7 @@ void test_svaddva_za32_u32(svbool_t pn, svbool_t pm, svuint32_t zn) { // CHECK-NEXT: tail call void @llvm.aarch64.sme.addva.nxv4i32(i32 3, [[TMP0]], [[TMP1]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svaddva_za32_u32_1(svbool_t pn, svbool_t pm, svuint32_t zn) { +void test_svaddva_za32_u32_1(svbool_t pn, svbool_t pm, svuint32_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svaddva_za32, _u32, _m)(3, pn, pm, zn); } @@ -93,7 +93,7 @@ void test_svaddva_za32_u32_1(svbool_t pn, svbool_t pm, svuint32_t zn) { // CHECK-NEXT: tail call void @llvm.aarch64.sme.addva.nxv4i32(i32 0, [[TMP0]], [[TMP1]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svaddva_za32_s32(svbool_t pn, svbool_t pm, svint32_t zn) { +void test_svaddva_za32_s32(svbool_t pn, svbool_t pm, svint32_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svaddva_za32, _s32, _m)(0, pn, pm, zn); } @@ -105,6 +105,6 @@ void test_svaddva_za32_s32(svbool_t pn, svbool_t pm, svint32_t zn) { // CHECK-NEXT: tail call void @llvm.aarch64.sme.addva.nxv4i32(i32 3, [[TMP0]], [[TMP1]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svaddva_za32_s32_1(svbool_t pn, svbool_t pm, svint32_t zn) { +void test_svaddva_za32_s32_1(svbool_t pn, svbool_t pm, svint32_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svaddva_za32, _s32, _m)(3, pn, pm, zn); } diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_add-i64.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_add-i64.c index 2f0f97e742e3e1707e9c20b5da832e67e3bc7e1c..d15a068fa6403beb71499bcc45372798e1720e2d 100644 --- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_add-i64.c +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_add-i64.c @@ -1,11 +1,11 @@ // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme-i16i64 -target-feature +sve -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme-i16i64 -target-feature +sve -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX -// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme-i16i64 -target-feature +sve -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C -// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme-i16i64 -target-feature +sve -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme-i16i64 -target-feature +sve -S -O1 -Werror -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme-i16i64 -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme-i16i64 -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme-i16i64 -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme-i16i64 -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme-i16i64 -S -O1 -Werror -o /dev/null %s -#include +#include #ifdef SME_OVERLOADED_FORMS #define SME_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3 @@ -21,7 +21,7 @@ // CHECK-NEXT: tail call void @llvm.aarch64.sme.addha.nxv2i64(i32 0, [[TMP0]], [[TMP1]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svaddha_za64_u64(svbool_t pn, svbool_t pm, svuint64_t zn) { +void test_svaddha_za64_u64(svbool_t pn, svbool_t pm, svuint64_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svaddha_za64, _u64, _m)(0, pn, pm, zn); } @@ -33,7 +33,7 @@ void test_svaddha_za64_u64(svbool_t pn, svbool_t pm, svuint64_t zn) { // CHECK-NEXT: tail call void @llvm.aarch64.sme.addha.nxv2i64(i32 7, [[TMP0]], [[TMP1]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svaddha_za64_u64_1(svbool_t pn, svbool_t pm, svuint64_t zn) { +void test_svaddha_za64_u64_1(svbool_t pn, svbool_t pm, svuint64_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svaddha_za64, _u64, _m)(7, pn, pm, zn); } @@ -45,7 +45,7 @@ void test_svaddha_za64_u64_1(svbool_t pn, svbool_t pm, svuint64_t zn) { // CHECK-NEXT: tail call void @llvm.aarch64.sme.addha.nxv2i64(i32 0, [[TMP0]], [[TMP1]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svaddha_za64_s64(svbool_t pn, svbool_t pm, svint64_t zn) { +void test_svaddha_za64_s64(svbool_t pn, svbool_t pm, svint64_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svaddha_za64, _s64, _m)(0, pn, pm, zn); } @@ -57,7 +57,7 @@ void test_svaddha_za64_s64(svbool_t pn, svbool_t pm, svint64_t zn) { // CHECK-NEXT: tail call void @llvm.aarch64.sme.addha.nxv2i64(i32 7, [[TMP0]], [[TMP1]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svaddha_za64_s64_1(svbool_t pn, svbool_t pm, svint64_t zn) { +void test_svaddha_za64_s64_1(svbool_t pn, svbool_t pm, svint64_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svaddha_za64, _s64, _m)(7, pn, pm, zn); } @@ -69,7 +69,7 @@ void test_svaddha_za64_s64_1(svbool_t pn, svbool_t pm, svint64_t zn) { // CHECK-NEXT: tail call void @llvm.aarch64.sme.addva.nxv2i64(i32 0, [[TMP0]], [[TMP1]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svaddva_za64_u64(svbool_t pn, svbool_t pm, svuint64_t zn) { +void test_svaddva_za64_u64(svbool_t pn, svbool_t pm, svuint64_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svaddva_za64, _u64, _m)(0, pn, pm, zn); } @@ -81,7 +81,7 @@ void test_svaddva_za64_u64(svbool_t pn, svbool_t pm, svuint64_t zn) { // CHECK-NEXT: tail call void @llvm.aarch64.sme.addva.nxv2i64(i32 7, [[TMP0]], [[TMP1]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svaddva_za64_u64_1(svbool_t pn, svbool_t pm, svuint64_t zn) { +void test_svaddva_za64_u64_1(svbool_t pn, svbool_t pm, svuint64_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svaddva_za64, _u64, _m)(7, pn, pm, zn); } @@ -93,7 +93,7 @@ void test_svaddva_za64_u64_1(svbool_t pn, svbool_t pm, svuint64_t zn) { // CHECK-NEXT: tail call void @llvm.aarch64.sme.addva.nxv2i64(i32 0, [[TMP0]], [[TMP1]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svaddva_za64_s64(svbool_t pn, svbool_t pm, svint64_t zn) { +void test_svaddva_za64_s64(svbool_t pn, svbool_t pm, svint64_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svaddva_za64, _s64, _m)(0, pn, pm, zn); } @@ -105,6 +105,6 @@ void test_svaddva_za64_s64(svbool_t pn, svbool_t pm, svint64_t zn) { // CHECK-NEXT: tail call void @llvm.aarch64.sme.addva.nxv2i64(i32 7, [[TMP0]], [[TMP1]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svaddva_za64_s64_1(svbool_t pn, svbool_t pm, svint64_t zn) { +void test_svaddva_za64_s64_1(svbool_t pn, svbool_t pm, svint64_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svaddva_za64, _s64, _m)(7, pn, pm, zn); } diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_cnt.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_cnt.c index b3b2499a383033244a4a73a5bcaf372977773a31..c80c8bd37e959a3619f6731da7ab7ade3b735861 100644 --- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_cnt.c +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_cnt.c @@ -1,9 +1,9 @@ // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -S -O1 -Werror -o /dev/null %s -#include +#include // CHECK-C-LABEL: @test_svcntsb( // CHECK-CXX-LABEL: @_Z12test_svcntsbv( diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ld1.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ld1.c index c309bde627f7df2629cd41403e6a6ffb440a9e8a..6fbbadc7fc8c6813a24b350a8e23e7cd7e79e999 100644 --- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ld1.c +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ld1.c @@ -1,15 +1,9 @@ // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -DDISABLE_SME_ATTRIBUTES -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C -// RUN: %clang_cc1 -DDISABLE_SME_ATTRIBUTES -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX -// RUN: %clang_cc1 -DDISABLE_SME_ATTRIBUTES -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -S -O1 -Werror -o /dev/null %s -#include - -#ifdef DISABLE_SME_ATTRIBUTES -#define ARM_STREAMING_ATTR -#else -#define ARM_STREAMING_ATTR __attribute__((arm_streaming)) -#endif +#include // CHECK-C-LABEL: @test_svld1_hor_za8( // CHECK-CXX-LABEL: @_Z18test_svld1_hor_za8ju10__SVBool_tPKv( @@ -19,9 +13,9 @@ // CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1b.horiz( [[PG]], [[PTRTY]] [[PTR]], i32 0, i32 [[TILESLICE1]]) // CHECK-NEXT: ret void // -ARM_STREAMING_ATTR void test_svld1_hor_za8(uint32_t slice_base, svbool_t pg, const void *ptr) { - svld1_hor_za8(0, slice_base, 0, pg, ptr); - svld1_hor_za8(0, slice_base, 15, pg, ptr); +void test_svld1_hor_za8(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming __arm_out("za") { + svld1_hor_za8(0, slice_base, pg, ptr); + svld1_hor_za8(0, slice_base + 15, pg, ptr); } // CHECK-C-LABEL: @test_svld1_hor_za16( @@ -33,9 +27,9 @@ ARM_STREAMING_ATTR void test_svld1_hor_za8(uint32_t slice_base, svbool_t pg, con // CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1h.horiz( [[TMP0]], [[PTRTY]] [[PTR]], i32 1, i32 [[TILESLICE1]]) // CHECK-NEXT: ret void // -ARM_STREAMING_ATTR void test_svld1_hor_za16(uint32_t slice_base, svbool_t pg, const void *ptr) { - svld1_hor_za16(0, slice_base, 0, pg, ptr); - svld1_hor_za16(1, slice_base, 7, pg, ptr); +void test_svld1_hor_za16(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming __arm_out("za") { + svld1_hor_za16(0, slice_base, pg, ptr); + svld1_hor_za16(1, slice_base + 7, pg, ptr); } // CHECK-C-LABEL: @test_svld1_hor_za32( @@ -47,9 +41,9 @@ ARM_STREAMING_ATTR void test_svld1_hor_za16(uint32_t slice_base, svbool_t pg, co // CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1w.horiz( [[TMP0]], [[PTRTY]] [[PTR]], i32 3, i32 [[TILESLICE1]]) // CHECK-NEXT: ret void // -ARM_STREAMING_ATTR void test_svld1_hor_za32(uint32_t slice_base, svbool_t pg, const void *ptr) { - svld1_hor_za32(0, slice_base, 0, pg, ptr); - svld1_hor_za32(3, slice_base, 3, pg, ptr); +void test_svld1_hor_za32(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming __arm_out("za") { + svld1_hor_za32(0, slice_base, pg, ptr); + svld1_hor_za32(3, slice_base + 3, pg, ptr); } // CHECK-C-LABEL: @test_svld1_hor_za64( @@ -61,9 +55,9 @@ ARM_STREAMING_ATTR void test_svld1_hor_za32(uint32_t slice_base, svbool_t pg, co // CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1d.horiz( [[TMP0]], [[PTRTY]] [[PTR]], i32 7, i32 [[TILESLICE1]]) // CHECK-NEXT: ret void // -ARM_STREAMING_ATTR void test_svld1_hor_za64(uint32_t slice_base, svbool_t pg, const void *ptr) { - svld1_hor_za64(0, slice_base, 0, pg, ptr); - svld1_hor_za64(7, slice_base, 1, pg, ptr); +void test_svld1_hor_za64(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming __arm_out("za") { + svld1_hor_za64(0, slice_base, pg, ptr); + svld1_hor_za64(7, slice_base + 1, pg, ptr); } // CHECK-C-LABEL: @test_svld1_hor_za128( @@ -74,9 +68,9 @@ ARM_STREAMING_ATTR void test_svld1_hor_za64(uint32_t slice_base, svbool_t pg, co // CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1q.horiz( [[TMP0]], [[PTRTY]] [[PTR]], i32 15, i32 [[SLICE_BASE]]) // CHECK-NEXT: ret void // -ARM_STREAMING_ATTR void test_svld1_hor_za128(uint32_t slice_base, svbool_t pg, const void *ptr) { - svld1_hor_za128(0, slice_base, 0, pg, ptr); - svld1_hor_za128(15, slice_base, 0, pg, ptr); +void test_svld1_hor_za128(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming __arm_out("za") { + svld1_hor_za128(0, slice_base, pg, ptr); + svld1_hor_za128(15, slice_base, pg, ptr); } // CHECK-C-LABEL: @test_svld1_ver_za8( @@ -87,9 +81,9 @@ ARM_STREAMING_ATTR void test_svld1_hor_za128(uint32_t slice_base, svbool_t pg, c // CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1b.vert( [[PG]], [[PTRTY]] [[PTR]], i32 0, i32 [[TILESLICE1]]) // CHECK-NEXT: ret void // -ARM_STREAMING_ATTR void test_svld1_ver_za8(uint32_t slice_base, svbool_t pg, const void *ptr) { - svld1_ver_za8(0, slice_base, 0, pg, ptr); - svld1_ver_za8(0, slice_base, 15, pg, ptr); +void test_svld1_ver_za8(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming __arm_out("za") { + svld1_ver_za8(0, slice_base, pg, ptr); + svld1_ver_za8(0, slice_base + 15, pg, ptr); } // CHECK-C-LABEL: @test_svld1_ver_za16( @@ -101,9 +95,9 @@ ARM_STREAMING_ATTR void test_svld1_ver_za8(uint32_t slice_base, svbool_t pg, con // CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1h.vert( [[TMP0]], [[PTRTY]] [[PTR]], i32 1, i32 [[TILESLICE1]]) // CHECK-NEXT: ret void // -ARM_STREAMING_ATTR void test_svld1_ver_za16(uint32_t slice_base, svbool_t pg, const void *ptr) { - svld1_ver_za16(0, slice_base, 0, pg, ptr); - svld1_ver_za16(1, slice_base, 7, pg, ptr); +void test_svld1_ver_za16(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming __arm_out("za") { + svld1_ver_za16(0, slice_base, pg, ptr); + svld1_ver_za16(1, slice_base + 7, pg, ptr); } // CHECK-C-LABEL: @test_svld1_ver_za32( @@ -115,9 +109,9 @@ ARM_STREAMING_ATTR void test_svld1_ver_za16(uint32_t slice_base, svbool_t pg, co // CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1w.vert( [[TMP0]], [[PTRTY]] [[PTR]], i32 3, i32 [[TILESLICE1]]) // CHECK-NEXT: ret void // -ARM_STREAMING_ATTR void test_svld1_ver_za32(uint32_t slice_base, svbool_t pg, const void *ptr) { - svld1_ver_za32(0, slice_base, 0, pg, ptr); - svld1_ver_za32(3, slice_base, 3, pg, ptr); +void test_svld1_ver_za32(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming __arm_out("za") { + svld1_ver_za32(0, slice_base, pg, ptr); + svld1_ver_za32(3, slice_base + 3, pg, ptr); } // CHECK-C-LABEL: @test_svld1_ver_za64( @@ -129,9 +123,9 @@ ARM_STREAMING_ATTR void test_svld1_ver_za32(uint32_t slice_base, svbool_t pg, co // CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1d.vert( [[TMP0]], [[PTRTY]] [[PTR]], i32 7, i32 [[TILESLICE1]]) // CHECK-NEXT: ret void // -ARM_STREAMING_ATTR void test_svld1_ver_za64(uint32_t slice_base, svbool_t pg, const void *ptr) { - svld1_ver_za64(0, slice_base, 0, pg, ptr); - svld1_ver_za64(7, slice_base, 1, pg, ptr); +void test_svld1_ver_za64(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming __arm_out("za") { + svld1_ver_za64(0, slice_base, pg, ptr); + svld1_ver_za64(7, slice_base + 1, pg, ptr); } // CHECK-C-LABEL: @test_svld1_ver_za128( @@ -142,7 +136,7 @@ ARM_STREAMING_ATTR void test_svld1_ver_za64(uint32_t slice_base, svbool_t pg, co // CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1q.vert( [[TMP0]], [[PTRTY]] [[PTR]], i32 15, i32 [[SLICE_BASE]]) // CHECK-NEXT: ret void // -ARM_STREAMING_ATTR void test_svld1_ver_za128(uint32_t slice_base, svbool_t pg, const void *ptr) { - svld1_ver_za128(0, slice_base, 0, pg, ptr); - svld1_ver_za128(15, slice_base, 0, pg, ptr); +void test_svld1_ver_za128(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming __arm_out("za") { + svld1_ver_za128(0, slice_base, pg, ptr); + svld1_ver_za128(15, slice_base, pg, ptr); } diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ld1_vnum.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ld1_vnum.c index 6c80ef55f81895d0fe57c10a9eedbd6f878a8c40..37e0a5853e7746c6fa6f336b61ce09d5d29ef197 100644 --- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ld1_vnum.c +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ld1_vnum.c @@ -1,15 +1,9 @@ // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -DDISABLE_SME_ATTRIBUTES -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C -// RUN: %clang_cc1 -DDISABLE_SME_ATTRIBUTES -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX -// RUN: %clang_cc1 -DDISABLE_SME_ATTRIBUTES -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -S -O1 -Werror -o /dev/null %s -#include - -#ifdef DISABLE_SME_ATTRIBUTES -#define ARM_STREAMING_ATTR -#else -#define ARM_STREAMING_ATTR __attribute__((arm_streaming)) -#endif +#include // CHECK-C-LABEL: @test_svld1_hor_vnum_za8( // CHECK-CXX-LABEL: @_Z23test_svld1_hor_vnum_za8ju10__SVBool_tPKvl( @@ -22,9 +16,9 @@ // CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1b.horiz( [[PG]], [[PTRTY]] [[TMP1]], i32 0, i32 [[TILESLICE2]]) // CHECK-NEXT: ret void // -ARM_STREAMING_ATTR void test_svld1_hor_vnum_za8(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) { - svld1_hor_vnum_za8(0, slice_base, 0, pg, ptr, vnum); - svld1_hor_vnum_za8(0, slice_base, 15, pg, ptr, vnum); +void test_svld1_hor_vnum_za8(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming __arm_out("za") { + svld1_hor_vnum_za8(0, slice_base, pg, ptr, vnum); + svld1_hor_vnum_za8(0, slice_base + 15, pg, ptr, vnum); } // CHECK-C-LABEL: @test_svld1_hor_vnum_za16( @@ -39,9 +33,9 @@ ARM_STREAMING_ATTR void test_svld1_hor_vnum_za8(uint32_t slice_base, svbool_t pg // CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1h.horiz( [[TMP0]], [[PTRTY]] [[TMP2]], i32 1, i32 [[TILESLICE2]]) // CHECK-NEXT: ret void // -ARM_STREAMING_ATTR void test_svld1_hor_vnum_za16(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) { - svld1_hor_vnum_za16(0, slice_base, 0, pg, ptr, vnum); - svld1_hor_vnum_za16(1, slice_base, 7, pg, ptr, vnum); +void test_svld1_hor_vnum_za16(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming __arm_out("za") { + svld1_hor_vnum_za16(0, slice_base, pg, ptr, vnum); + svld1_hor_vnum_za16(1, slice_base + 7, pg, ptr, vnum); } // CHECK-C-LABEL: @test_svld1_hor_vnum_za32( @@ -56,9 +50,9 @@ ARM_STREAMING_ATTR void test_svld1_hor_vnum_za16(uint32_t slice_base, svbool_t p // CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1w.horiz( [[TMP0]], [[PTRTY]] [[TMP2]], i32 3, i32 [[TILESLICE2]]) // CHECK-NEXT: ret void // -ARM_STREAMING_ATTR void test_svld1_hor_vnum_za32(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) { - svld1_hor_vnum_za32(0, slice_base, 0, pg, ptr, vnum); - svld1_hor_vnum_za32(3, slice_base, 3, pg, ptr, vnum); +void test_svld1_hor_vnum_za32(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming __arm_out("za") { + svld1_hor_vnum_za32(0, slice_base, pg, ptr, vnum); + svld1_hor_vnum_za32(3, slice_base + 3, pg, ptr, vnum); } // CHECK-C-LABEL: @test_svld1_hor_vnum_za64( @@ -73,9 +67,9 @@ ARM_STREAMING_ATTR void test_svld1_hor_vnum_za32(uint32_t slice_base, svbool_t p // CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1d.horiz( [[TMP0]], [[PTRTY]] [[TMP2]], i32 7, i32 [[TILESLICE2]]) // CHECK-NEXT: ret void // -ARM_STREAMING_ATTR void test_svld1_hor_vnum_za64(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) { - svld1_hor_vnum_za64(0, slice_base, 0, pg, ptr, vnum); - svld1_hor_vnum_za64(7, slice_base, 1, pg, ptr, vnum); +void test_svld1_hor_vnum_za64(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming __arm_out("za") { + svld1_hor_vnum_za64(0, slice_base, pg, ptr, vnum); + svld1_hor_vnum_za64(7, slice_base + 1, pg, ptr, vnum); } // CHECK-C-LABEL: @test_svld1_hor_vnum_za128( @@ -89,9 +83,9 @@ ARM_STREAMING_ATTR void test_svld1_hor_vnum_za64(uint32_t slice_base, svbool_t p // CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1q.horiz( [[TMP0]], [[PTRTY]] [[TMP2]], i32 15, i32 [[SLICE_BASE]]) // CHECK-NEXT: ret void // -ARM_STREAMING_ATTR void test_svld1_hor_vnum_za128(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) { - svld1_hor_vnum_za128(0, slice_base, 0, pg, ptr, vnum); - svld1_hor_vnum_za128(15, slice_base, 0, pg, ptr, vnum); +void test_svld1_hor_vnum_za128(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming __arm_out("za") { + svld1_hor_vnum_za128(0, slice_base, pg, ptr, vnum); + svld1_hor_vnum_za128(15, slice_base, pg, ptr, vnum); } // CHECK-C-LABEL: @test_svld1_ver_hor_za8( @@ -105,9 +99,9 @@ ARM_STREAMING_ATTR void test_svld1_hor_vnum_za128(uint32_t slice_base, svbool_t // CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1b.vert( [[PG]], [[PTRTY]] [[TMP1]], i32 0, i32 [[TILESLICE2]]) // CHECK-NEXT: ret void // -ARM_STREAMING_ATTR void test_svld1_ver_hor_za8(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) { - svld1_ver_vnum_za8(0, slice_base, 0, pg, ptr, vnum); - svld1_ver_vnum_za8(0, slice_base, 15, pg, ptr, vnum); +void test_svld1_ver_hor_za8(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming __arm_out("za") { + svld1_ver_vnum_za8(0, slice_base, pg, ptr, vnum); + svld1_ver_vnum_za8(0, slice_base + 15, pg, ptr, vnum); } // CHECK-C-LABEL: @test_svld1_ver_vnum_za16( @@ -122,9 +116,9 @@ ARM_STREAMING_ATTR void test_svld1_ver_hor_za8(uint32_t slice_base, svbool_t pg, // CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1h.vert( [[TMP0]], [[PTRTY]] [[TMP2]], i32 1, i32 [[TILESLICE2]]) // CHECK-NEXT: ret void // -ARM_STREAMING_ATTR void test_svld1_ver_vnum_za16(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) { - svld1_ver_vnum_za16(0, slice_base, 0, pg, ptr, vnum); - svld1_ver_vnum_za16(1, slice_base, 7, pg, ptr, vnum); +void test_svld1_ver_vnum_za16(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming __arm_out("za") { + svld1_ver_vnum_za16(0, slice_base, pg, ptr, vnum); + svld1_ver_vnum_za16(1, slice_base + 7, pg, ptr, vnum); } // CHECK-C-LABEL: @test_svld1_ver_vnum_za32( @@ -139,9 +133,9 @@ ARM_STREAMING_ATTR void test_svld1_ver_vnum_za16(uint32_t slice_base, svbool_t p // CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1w.vert( [[TMP0]], [[PTRTY]] [[TMP2]], i32 3, i32 [[TILESLICE2]]) // CHECK-NEXT: ret void // -ARM_STREAMING_ATTR void test_svld1_ver_vnum_za32(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) { - svld1_ver_vnum_za32(0, slice_base, 0, pg, ptr, vnum); - svld1_ver_vnum_za32(3, slice_base, 3, pg, ptr, vnum); +void test_svld1_ver_vnum_za32(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming __arm_out("za") { + svld1_ver_vnum_za32(0, slice_base, pg, ptr, vnum); + svld1_ver_vnum_za32(3, slice_base + 3, pg, ptr, vnum); } // CHECK-C-LABEL: @test_svld1_ver_vnum_za64( @@ -156,9 +150,9 @@ ARM_STREAMING_ATTR void test_svld1_ver_vnum_za32(uint32_t slice_base, svbool_t p // CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1d.vert( [[TMP0]], [[PTRTY]] [[TMP2]], i32 7, i32 [[TILESLICE2]]) // CHECK-NEXT: ret void // -ARM_STREAMING_ATTR void test_svld1_ver_vnum_za64(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) { - svld1_ver_vnum_za64(0, slice_base, 0, pg, ptr, vnum); - svld1_ver_vnum_za64(7, slice_base, 1, pg, ptr, vnum); +void test_svld1_ver_vnum_za64(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming __arm_out("za") { + svld1_ver_vnum_za64(0, slice_base, pg, ptr, vnum); + svld1_ver_vnum_za64(7, slice_base + 1, pg, ptr, vnum); } // CHECK-C-LABEL: @test_svld1_ver_vnum_za128( @@ -172,7 +166,7 @@ ARM_STREAMING_ATTR void test_svld1_ver_vnum_za64(uint32_t slice_base, svbool_t p // CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1q.vert( [[TMP0]], [[PTRTY]] [[TMP2]], i32 15, i32 [[SLICE_BASE]]) // CHECK-NEXT: ret void // -ARM_STREAMING_ATTR void test_svld1_ver_vnum_za128(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) { - svld1_ver_vnum_za128(0, slice_base, 0, pg, ptr, vnum); - svld1_ver_vnum_za128(15, slice_base, 0, pg, ptr, vnum); +void test_svld1_ver_vnum_za128(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming __arm_out("za") { + svld1_ver_vnum_za128(0, slice_base, pg, ptr, vnum); + svld1_ver_vnum_za128(15, slice_base, pg, ptr, vnum); } diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c index 7efa8b1556857cfdf0606c61a94bee39a46dec73..bf7cffc1b0834d600530bc131d2a43670d98d913 100644 --- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c @@ -1,30 +1,57 @@ // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -S -O1 -Werror -o /dev/null %s -#include +#include // CHECK-C-LABEL: @test_svldr_vnum_za( // CHECK-CXX-LABEL: @_Z18test_svldr_vnum_zajPKv( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE:%.*]], ptr [[PTR:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE:%.*]], ptr [[PTR:%.*]], i32 0) // CHECK-NEXT: ret void // -void test_svldr_vnum_za(uint32_t slice_base, const void *ptr) { - svldr_vnum_za(slice_base, 0, ptr); +void test_svldr_vnum_za(uint32_t slice_base, const void *ptr) __arm_out("za") { + svldr_vnum_za(slice_base, ptr, 0); } // CHECK-C-LABEL: @test_svldr_vnum_za_1( // CHECK-CXX-LABEL: @_Z20test_svldr_vnum_za_1jPKv( // CHECK-NEXT: entry: -// CHECK-NEXT: [[SVLB:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() -// CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[SVLB]], 15 -// CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[PTR:%.*]], i64 [[MULVL]] -// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 15 -// CHECK-NEXT: tail call void @llvm.aarch64.sme.ldr(i32 [[TILESLICE]], ptr [[TMP0]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE:%.*]], ptr [[PTR:%.*]], i32 15) // CHECK-NEXT: ret void // -void test_svldr_vnum_za_1(uint32_t slice_base, const void *ptr) { - svldr_vnum_za(slice_base, 15, ptr); +void test_svldr_vnum_za_1(uint32_t slice_base, const void *ptr) __arm_out("za") { + svldr_vnum_za(slice_base, ptr, 15); +} + +// CHECK-C-LABEL: @test_svldr_za( +// CHECK-CXX-LABEL: @_Z13test_svldr_zajPKv( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE:%.*]], ptr [[PTR:%.*]], i32 0) +// CHECK-NEXT: ret void +// +void test_svldr_za(uint32_t slice_base, const void *ptr) __arm_out("za") { + svldr_za(slice_base, ptr); +} + +// CHECK-C-LABEL: @test_svldr_vnum_za_var( +// CHECK-CXX-LABEL: @_Z22test_svldr_vnum_za_varjPKvl( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[VNUM:%.*]] to i32 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE:%.*]], ptr [[PTR:%.*]], i32 [[TMP0:%.*]]) +// CHECK-NEXT: ret void +// +void test_svldr_vnum_za_var(uint32_t slice_base, const void *ptr, int64_t vnum) __arm_out("za") { + svldr_vnum_za(slice_base, ptr, vnum); +} + +// CHECK-C-LABEL: @test_svldr_vnum_za_2( +// CHECK-CXX-LABEL: @_Z20test_svldr_vnum_za_2jPKv( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE:%.*]], ptr [[PTR:%.*]], i32 16) +// CHECK-NEXT: ret void +// +void test_svldr_vnum_za_2(uint32_t slice_base, const void *ptr) __arm_out("za") { + svldr_vnum_za(slice_base, ptr, 16); } diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mopa-za32.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mopa-za32.c index b52aee12f9c7037f55b3af3d7bc70a3815e65935..bceb2d6f83df9d97678b1cf729142361469d4235 100644 --- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mopa-za32.c +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mopa-za32.c @@ -1,11 +1,11 @@ // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -target-feature +bf16 -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -target-feature +bf16 -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX -// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -target-feature +bf16 -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C -// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -target-feature +bf16 -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -target-feature +bf16 -S -O1 -Werror -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +bf16 -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +bf16 -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +bf16 -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +bf16 -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +bf16 -S -O1 -Werror -o /dev/null %s -#include +#include #ifdef SME_OVERLOADED_FORMS #define SME_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3 @@ -19,7 +19,7 @@ // CHECK-NEXT: tail call void @llvm.aarch64.sme.smopa.wide.nxv16i8(i32 0, [[PN:%.*]], [[PM:%.*]], [[ZN:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // -void test_svmopa_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svint8_t zm) { +void test_svmopa_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svint8_t zm) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svmopa_za32, _s8, _m)(0, pn, pm, zn, zm); } @@ -29,7 +29,7 @@ void test_svmopa_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svint8_t zm) { // CHECK-NEXT: tail call void @llvm.aarch64.sme.umopa.wide.nxv16i8(i32 0, [[PN:%.*]], [[PM:%.*]], [[ZN:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // -void test_svmopa_za32_u8(svbool_t pn, svbool_t pm, svuint8_t zn, svuint8_t zm) { +void test_svmopa_za32_u8(svbool_t pn, svbool_t pm, svuint8_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svmopa_za32, _u8, _m)(0, pn, pm, zn, zm); } @@ -41,7 +41,7 @@ void test_svmopa_za32_u8(svbool_t pn, svbool_t pm, svuint8_t zn, svuint8_t zm) { // CHECK-NEXT: tail call void @llvm.aarch64.sme.mopa.wide.nxv8bf16(i32 0, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // -void test_svmopa_za32_bf16(svbool_t pn, svbool_t pm, svbfloat16_t zn, svbfloat16_t zm) { +void test_svmopa_za32_bf16(svbool_t pn, svbool_t pm, svbfloat16_t zn, svbfloat16_t zm) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svmopa_za32, _bf16, _m)(0, pn, pm, zn, zm); } @@ -53,7 +53,7 @@ void test_svmopa_za32_bf16(svbool_t pn, svbool_t pm, svbfloat16_t zn, svbfloat16 // CHECK-NEXT: tail call void @llvm.aarch64.sme.mopa.wide.nxv8f16(i32 1, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // -void test_svmopa_za32_f16(svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t zm) { +void test_svmopa_za32_f16(svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svmopa_za32, _f16, _m)(1, pn, pm, zn, zm); } @@ -65,7 +65,7 @@ void test_svmopa_za32_f16(svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t // CHECK-NEXT: tail call void @llvm.aarch64.sme.mopa.nxv4f32(i32 1, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // -void test_svmopa_za32_f32(svbool_t pn, svbool_t pm, svfloat32_t zn, svfloat32_t zm) { +void test_svmopa_za32_f32(svbool_t pn, svbool_t pm, svfloat32_t zn, svfloat32_t zm) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svmopa_za32, _f32, _m)(1, pn, pm, zn, zm); } @@ -75,7 +75,7 @@ void test_svmopa_za32_f32(svbool_t pn, svbool_t pm, svfloat32_t zn, svfloat32_t // CHECK-NEXT: tail call void @llvm.aarch64.sme.sumopa.wide.nxv16i8(i32 0, [[PN:%.*]], [[PM:%.*]], [[ZN:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // -void test_svsumopa_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svuint8_t zm) { +void test_svsumopa_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svsumopa_za32, _s8, _m)(0, pn, pm, zn, zm); } @@ -85,6 +85,6 @@ void test_svsumopa_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svuint8_t zm) // CHECK-NEXT: tail call void @llvm.aarch64.sme.usmopa.wide.nxv16i8(i32 0, [[PN:%.*]], [[PM:%.*]], [[ZN:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // -void test_svusmopa_za32_u8(svbool_t pn, svbool_t pm, svuint8_t zn, svint8_t zm) { +void test_svusmopa_za32_u8(svbool_t pn, svbool_t pm, svuint8_t zn, svint8_t zm) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svusmopa_za32, _u8, _m)(0, pn, pm, zn, zm); } diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mopa-za64.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mopa-za64.c index 6925a450ba386fbca53ed182db94bf699e335d09..7cdeb8731d0be6ffbf3297b2718dc60ff42da5c5 100644 --- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mopa-za64.c +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mopa-za64.c @@ -1,11 +1,11 @@ // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme-f64f64 -target-feature +sme-i16i64 -target-feature +sve -target-feature +bf16 -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme-f64f64 -target-feature +sme-i16i64 -target-feature +sve -target-feature +bf16 -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX -// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme-f64f64 -target-feature +sme-i16i64 -target-feature +sve -target-feature +bf16 -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C -// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme-f64f64 -target-feature +sme-i16i64 -target-feature +sve -target-feature +bf16 -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme-f64f64 -target-feature +sme-i16i64 -target-feature +sve -target-feature +bf16 -S -O1 -Werror -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme-f64f64 -target-feature +sme-i16i64 -target-feature +bf16 -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme-f64f64 -target-feature +sme-i16i64 -target-feature +bf16 -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme-f64f64 -target-feature +sme-i16i64 -target-feature +bf16 -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme-f64f64 -target-feature +sme-i16i64 -target-feature +bf16 -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme-f64f64 -target-feature +sme-i16i64 -target-feature +bf16 -S -O1 -Werror -o /dev/null %s -#include +#include #ifdef SME_OVERLOADED_FORMS #define SME_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3 @@ -18,11 +18,11 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PN:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PM:%.*]]) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.smopa.wide.nxv8i16(i32 1, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.smopa.wide.nxv8i16(i32 7, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // -void test_svmopa_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svint16_t zm) { - SME_ACLE_FUNC(svmopa_za64, _s16, _m)(1, pn, pm, zn, zm); +void test_svmopa_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svint16_t zm) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmopa_za64, _s16, _m)(7, pn, pm, zn, zm); } // CHECK-C-LABEL: @test_svmopa_za64_u16( @@ -33,7 +33,7 @@ void test_svmopa_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svint16_t zm) // CHECK-NEXT: tail call void @llvm.aarch64.sme.umopa.wide.nxv8i16(i32 0, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // -void test_svmopa_za64_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svuint16_t zm) { +void test_svmopa_za64_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svmopa_za64, _u16, _m)(0, pn, pm, zn, zm); } @@ -42,11 +42,11 @@ void test_svmopa_za64_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svuint16_t zm // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PN:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PM:%.*]]) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.mopa.nxv2f64(i32 1, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.mopa.nxv2f64(i32 7, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // -void test_svmopa_za64_f64(svbool_t pn, svbool_t pm, svfloat64_t zn, svfloat64_t zm) { - SME_ACLE_FUNC(svmopa_za64, _f64, _m)(1, pn, pm, zn, zm); +void test_svmopa_za64_f64(svbool_t pn, svbool_t pm, svfloat64_t zn, svfloat64_t zm) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmopa_za64, _f64, _m)(7, pn, pm, zn, zm); } // CHECK-C-LABEL: @test_svsumopa_za64_s16( @@ -57,7 +57,7 @@ void test_svmopa_za64_f64(svbool_t pn, svbool_t pm, svfloat64_t zn, svfloat64_t // CHECK-NEXT: tail call void @llvm.aarch64.sme.sumopa.wide.nxv8i16(i32 0, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // -void test_svsumopa_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svuint16_t zm) { +void test_svsumopa_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svsumopa_za64, _s16, _m)(0, pn, pm, zn, zm); } @@ -66,9 +66,9 @@ void test_svsumopa_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svuint16_t z // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PN:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PM:%.*]]) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.usmopa.wide.nxv8i16(i32 2, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.usmopa.wide.nxv8i16(i32 7, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // -void test_svusmopa_za64_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svint16_t zm) { - SME_ACLE_FUNC(svusmopa_za64, _u16, _m)(2, pn, pm, zn, zm); +void test_svusmopa_za64_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svint16_t zm) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svusmopa_za64, _u16, _m)(7, pn, pm, zn, zm); } diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mops-za32.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mops-za32.c index 923b6b96b4b4e0184ada7bb790971b6ad8fc6de7..51fffd42b9442765f6136cc50487e3a87d2c8f23 100644 --- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mops-za32.c +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mops-za32.c @@ -1,11 +1,11 @@ // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -target-feature +bf16 -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -target-feature +bf16 -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX -// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -target-feature +bf16 -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C -// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -target-feature +bf16 -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -target-feature +bf16 -S -O1 -Werror -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +bf16 -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +bf16 -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +bf16 -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +bf16 -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +bf16 -S -O1 -Werror -o /dev/null %s -#include +#include #ifdef SME_OVERLOADED_FORMS #define SME_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3 @@ -19,7 +19,7 @@ // CHECK-NEXT: tail call void @llvm.aarch64.sme.smops.wide.nxv16i8(i32 0, [[PN:%.*]], [[PM:%.*]], [[ZN:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // -void test_svmops_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svint8_t zm) { +void test_svmops_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svint8_t zm) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svmops_za32, _s8, _m)(0, pn, pm, zn, zm); } @@ -29,7 +29,7 @@ void test_svmops_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svint8_t zm) { // CHECK-NEXT: tail call void @llvm.aarch64.sme.umops.wide.nxv16i8(i32 0, [[PN:%.*]], [[PM:%.*]], [[ZN:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // -void test_svmops_za32_u8(svbool_t pn, svbool_t pm, svuint8_t zn, svuint8_t zm) { +void test_svmops_za32_u8(svbool_t pn, svbool_t pm, svuint8_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svmops_za32, _u8, _m)(0, pn, pm, zn, zm); } @@ -41,7 +41,7 @@ void test_svmops_za32_u8(svbool_t pn, svbool_t pm, svuint8_t zn, svuint8_t zm) { // CHECK-NEXT: tail call void @llvm.aarch64.sme.mops.wide.nxv8bf16(i32 0, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // -void test_svmops_za32_bf16(svbool_t pn, svbool_t pm, svbfloat16_t zn, svbfloat16_t zm) { +void test_svmops_za32_bf16(svbool_t pn, svbool_t pm, svbfloat16_t zn, svbfloat16_t zm) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svmops_za32, _bf16, _m)(0, pn, pm, zn, zm); } @@ -53,7 +53,7 @@ void test_svmops_za32_bf16(svbool_t pn, svbool_t pm, svbfloat16_t zn, svbfloat16 // CHECK-NEXT: tail call void @llvm.aarch64.sme.mops.wide.nxv8f16(i32 1, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // -void test_svmops_za32_f16(svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t zm) { +void test_svmops_za32_f16(svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svmops_za32, _f16, _m)(1, pn, pm, zn, zm); } @@ -65,7 +65,7 @@ void test_svmops_za32_f16(svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t // CHECK-NEXT: tail call void @llvm.aarch64.sme.mops.nxv4f32(i32 1, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // -void test_svmops_za32_f32(svbool_t pn, svbool_t pm, svfloat32_t zn, svfloat32_t zm) { +void test_svmops_za32_f32(svbool_t pn, svbool_t pm, svfloat32_t zn, svfloat32_t zm) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svmops_za32, _f32, _m)(1, pn, pm, zn, zm); } @@ -75,7 +75,7 @@ void test_svmops_za32_f32(svbool_t pn, svbool_t pm, svfloat32_t zn, svfloat32_t // CHECK-NEXT: tail call void @llvm.aarch64.sme.sumops.wide.nxv16i8(i32 0, [[PN:%.*]], [[PM:%.*]], [[ZN:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // -void test_svsumops_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svuint8_t zm) { +void test_svsumops_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svsumops_za32, _s8, _m)(0, pn, pm, zn, zm); } @@ -85,6 +85,6 @@ void test_svsumops_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svuint8_t zm) // CHECK-NEXT: tail call void @llvm.aarch64.sme.usmops.wide.nxv16i8(i32 0, [[PN:%.*]], [[PM:%.*]], [[ZN:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // -void test_svusmops_za32_u8(svbool_t pn, svbool_t pm, svuint8_t zn, svint8_t zm) { +void test_svusmops_za32_u8(svbool_t pn, svbool_t pm, svuint8_t zn, svint8_t zm) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svusmops_za32, _u8, _m)(0, pn, pm, zn, zm); } diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mops-za64.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mops-za64.c index 447bca055ad42201a24d6de7572b920bf8923efb..24141cd1f20bc98287febdd80524d0d547318f94 100644 --- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mops-za64.c +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mops-za64.c @@ -1,11 +1,11 @@ // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme-f64f64 -target-feature +sme-i16i64 -target-feature +sve -target-feature +bf16 -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme-f64f64 -target-feature +sme-i16i64 -target-feature +sve -target-feature +bf16 -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX -// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme-f64f64 -target-feature +sme-i16i64 -target-feature +sve -target-feature +bf16 -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C -// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme-f64f64 -target-feature +sme-i16i64 -target-feature +sve -target-feature +bf16 -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme-f64f64 -target-feature +sme-i16i64 -target-feature +sve -target-feature +bf16 -S -O1 -Werror -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme-f64f64 -target-feature +sme-i16i64 -target-feature +bf16 -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme-f64f64 -target-feature +sme-i16i64 -target-feature +bf16 -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme-f64f64 -target-feature +sme-i16i64 -target-feature +bf16 -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme-f64f64 -target-feature +sme-i16i64 -target-feature +bf16 -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme-f64f64 -target-feature +sme-i16i64 -target-feature +bf16 -S -O1 -Werror -o /dev/null %s -#include +#include #ifdef SME_OVERLOADED_FORMS #define SME_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3 @@ -18,11 +18,11 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PN:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PM:%.*]]) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.smops.wide.nxv8i16(i32 1, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.smops.wide.nxv8i16(i32 7, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // -void test_svmops_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svint16_t zm) { - SME_ACLE_FUNC(svmops_za64, _s16, _m)(1, pn, pm, zn, zm); +void test_svmops_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svint16_t zm) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmops_za64, _s16, _m)(7, pn, pm, zn, zm); } // CHECK-C-LABEL: @test_svmops_za64_u16( @@ -33,7 +33,7 @@ void test_svmops_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svint16_t zm) // CHECK-NEXT: tail call void @llvm.aarch64.sme.umops.wide.nxv8i16(i32 0, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // -void test_svmops_za64_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svuint16_t zm) { +void test_svmops_za64_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svmops_za64, _u16, _m)(0, pn, pm, zn, zm); } @@ -42,11 +42,11 @@ void test_svmops_za64_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svuint16_t zm // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PN:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PM:%.*]]) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.mops.nxv2f64(i32 1, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.mops.nxv2f64(i32 7, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // -void test_svmops_za64_f64(svbool_t pn, svbool_t pm, svfloat64_t zn, svfloat64_t zm) { - SME_ACLE_FUNC(svmops_za64, _f64, _m)(1, pn, pm, zn, zm); +void test_svmops_za64_f64(svbool_t pn, svbool_t pm, svfloat64_t zn, svfloat64_t zm) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmops_za64, _f64, _m)(7, pn, pm, zn, zm); } // CHECK-C-LABEL: @test_svsumops_za64_s16( @@ -57,7 +57,7 @@ void test_svmops_za64_f64(svbool_t pn, svbool_t pm, svfloat64_t zn, svfloat64_t // CHECK-NEXT: tail call void @llvm.aarch64.sme.sumops.wide.nxv8i16(i32 0, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // -void test_svsumops_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svuint16_t zm) { +void test_svsumops_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svsumops_za64, _s16, _m)(0, pn, pm, zn, zm); } @@ -66,9 +66,9 @@ void test_svsumops_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svuint16_t z // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PN:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PM:%.*]]) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.usmops.wide.nxv8i16(i32 2, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.usmops.wide.nxv8i16(i32 7, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // -void test_svusmops_za64_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svint16_t zm) { - SME_ACLE_FUNC(svusmops_za64, _u16, _m)(2, pn, pm, zn, zm); +void test_svusmops_za64_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svint16_t zm) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svusmops_za64, _u16, _m)(7, pn, pm, zn, zm); } diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_read.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_read.c index 19e2b42e13f2d64c3322d36cd10ecf28bbedd0f5..2974511e4ebbb12eb3cc93344a97f4b3fcb41f67 100644 --- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_read.c +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_read.c @@ -1,11 +1,11 @@ // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX -// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C -// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -S -O1 -Werror -o /dev/null %s -#include +#include #ifdef SME_OVERLOADED_FORMS #define SME_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3 @@ -19,8 +19,8 @@ // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv16i8( [[ZD:%.*]], [[PG:%.*]], i32 0, i32 [[SLICE_BASE:%.*]]) // CHECK-NEXT: ret [[TMP0]] // -svint8_t test_svread_hor_za8_s8(svint8_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_hor_za8, _s8, _m)(zd, pg, 0, slice_base, 0); +svint8_t test_svread_hor_za8_s8(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { + return SME_ACLE_FUNC(svread_hor_za8, _s8, _m)(zd, pg, 0, slice_base); } // CHECK-C-LABEL: @test_svread_hor_za8_s8_1( @@ -30,8 +30,9 @@ svint8_t test_svread_hor_za8_s8(svint8_t zd, svbool_t pg, uint32_t slice_base) { // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv16i8( [[ZD:%.*]], [[PG:%.*]], i32 0, i32 [[TILESLICE]]) // CHECK-NEXT: ret [[TMP0]] // -svint8_t test_svread_hor_za8_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_hor_za8, _s8, _m)(zd, pg, 0, slice_base, 15); +svint8_t test_svread_hor_za8_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { + uint32_t slice = slice_base + 15; + return SME_ACLE_FUNC(svread_hor_za8, _s8, _m)(zd, pg, 0, slice); } // CHECK-C-LABEL: @test_svread_hor_za16_s16( @@ -41,20 +42,21 @@ svint8_t test_svread_hor_za8_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_base) // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv8i16( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) // CHECK-NEXT: ret [[TMP1]] // -svint16_t test_svread_hor_za16_s16(svint16_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_hor_za16, _s16, _m)(zd, pg, 0, slice_base, 0); +svint16_t test_svread_hor_za16_s16(svint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { + return SME_ACLE_FUNC(svread_hor_za16, _s16, _m)(zd, pg, 0, slice_base); } // CHECK-C-LABEL: @test_svread_hor_za16_s16_1( // CHECK-CXX-LABEL: @_Z26test_svread_hor_za16_s16_1u11__SVInt16_tu10__SVBool_tj( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv8i16( [[ZD:%.*]], [[TMP0]], i32 1, i32 [[TILESLICE]]) // CHECK-NEXT: ret [[TMP1]] // -svint16_t test_svread_hor_za16_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_hor_za16, _s16, _m)(zd, pg, 1, slice_base, 7); +svint16_t test_svread_hor_za16_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { + uint32_t slice = slice_base + 7; + return SME_ACLE_FUNC(svread_hor_za16, _s16, _m)(zd, pg, 1, slice); } // CHECK-C-LABEL: @test_svread_hor_za32_s32( @@ -64,20 +66,21 @@ svint16_t test_svread_hor_za16_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_b // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv4i32( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) // CHECK-NEXT: ret [[TMP1]] // -svint32_t test_svread_hor_za32_s32(svint32_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_hor_za32, _s32, _m)(zd, pg, 0, slice_base, 0); +svint32_t test_svread_hor_za32_s32(svint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { + return SME_ACLE_FUNC(svread_hor_za32, _s32, _m)(zd, pg, 0, slice_base); } // CHECK-C-LABEL: @test_svread_hor_za32_s32_1( // CHECK-CXX-LABEL: @_Z26test_svread_hor_za32_s32_1u11__SVInt32_tu10__SVBool_tj( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 3 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv4i32( [[ZD:%.*]], [[TMP0]], i32 3, i32 [[TILESLICE]]) // CHECK-NEXT: ret [[TMP1]] // -svint32_t test_svread_hor_za32_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_hor_za32, _s32, _m)(zd, pg, 3, slice_base, 3); +svint32_t test_svread_hor_za32_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { + uint32_t slice = slice_base + 3; + return SME_ACLE_FUNC(svread_hor_za32, _s32, _m)(zd, pg, 3, slice); } // CHECK-C-LABEL: @test_svread_hor_za64_s64( @@ -87,20 +90,21 @@ svint32_t test_svread_hor_za32_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_b // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv2i64( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) // CHECK-NEXT: ret [[TMP1]] // -svint64_t test_svread_hor_za64_s64(svint64_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_hor_za64, _s64, _m)(zd, pg, 0, slice_base, 0); +svint64_t test_svread_hor_za64_s64(svint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { + return SME_ACLE_FUNC(svread_hor_za64, _s64, _m)(zd, pg, 0, slice_base); } // CHECK-C-LABEL: @test_svread_hor_za64_s64_1( // CHECK-CXX-LABEL: @_Z26test_svread_hor_za64_s64_1u11__SVInt64_tu10__SVBool_tj( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 1 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv2i64( [[ZD:%.*]], [[TMP0]], i32 7, i32 [[TILESLICE]]) // CHECK-NEXT: ret [[TMP1]] // -svint64_t test_svread_hor_za64_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_hor_za64, _s64, _m)(zd, pg, 7, slice_base, 1); +svint64_t test_svread_hor_za64_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { + uint32_t slice = slice_base + 1; + return SME_ACLE_FUNC(svread_hor_za64, _s64, _m)(zd, pg, 7, slice); } // CHECK-C-LABEL: @test_svread_hor_za8_u8( @@ -109,8 +113,8 @@ svint64_t test_svread_hor_za64_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_b // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv16i8( [[ZD:%.*]], [[PG:%.*]], i32 0, i32 [[SLICE_BASE:%.*]]) // CHECK-NEXT: ret [[TMP0]] // -svuint8_t test_svread_hor_za8_u8(svuint8_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_hor_za8, _u8, _m)(zd, pg, 0, slice_base, 0); +svuint8_t test_svread_hor_za8_u8(svuint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { + return SME_ACLE_FUNC(svread_hor_za8, _u8, _m)(zd, pg, 0, slice_base); } // CHECK-C-LABEL: @test_svread_hor_za8_u8_1( @@ -120,8 +124,9 @@ svuint8_t test_svread_hor_za8_u8(svuint8_t zd, svbool_t pg, uint32_t slice_base) // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv16i8( [[ZD:%.*]], [[PG:%.*]], i32 0, i32 [[TILESLICE]]) // CHECK-NEXT: ret [[TMP0]] // -svuint8_t test_svread_hor_za8_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_hor_za8, _u8, _m)(zd, pg, 0, slice_base, 15); +svuint8_t test_svread_hor_za8_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { + uint32_t slice = slice_base + 15; + return SME_ACLE_FUNC(svread_hor_za8, _u8, _m)(zd, pg, 0, slice); } // CHECK-C-LABEL: @test_svread_hor_za16_u16( @@ -131,20 +136,21 @@ svuint8_t test_svread_hor_za8_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_bas // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv8i16( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) // CHECK-NEXT: ret [[TMP1]] // -svuint16_t test_svread_hor_za16_u16(svuint16_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_hor_za16, _u16, _m)(zd, pg, 0, slice_base, 0); +svuint16_t test_svread_hor_za16_u16(svuint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { + return SME_ACLE_FUNC(svread_hor_za16, _u16, _m)(zd, pg, 0, slice_base); } // CHECK-C-LABEL: @test_svread_hor_za16_u16_1( // CHECK-CXX-LABEL: @_Z26test_svread_hor_za16_u16_1u12__SVUint16_tu10__SVBool_tj( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv8i16( [[ZD:%.*]], [[TMP0]], i32 1, i32 [[TILESLICE]]) // CHECK-NEXT: ret [[TMP1]] // -svuint16_t test_svread_hor_za16_u16_1(svuint16_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_hor_za16, _u16, _m)(zd, pg, 1, slice_base, 7); +svuint16_t test_svread_hor_za16_u16_1(svuint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { + uint32_t slice = slice_base + 7; + return SME_ACLE_FUNC(svread_hor_za16, _u16, _m)(zd, pg, 1, slice); } // CHECK-C-LABEL: @test_svread_hor_za32_u32( @@ -154,20 +160,21 @@ svuint16_t test_svread_hor_za16_u16_1(svuint16_t zd, svbool_t pg, uint32_t slice // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv4i32( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) // CHECK-NEXT: ret [[TMP1]] // -svuint32_t test_svread_hor_za32_u32(svuint32_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_hor_za32, _u32, _m)(zd, pg, 0, slice_base, 0); +svuint32_t test_svread_hor_za32_u32(svuint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { + return SME_ACLE_FUNC(svread_hor_za32, _u32, _m)(zd, pg, 0, slice_base); } // CHECK-C-LABEL: @test_svread_hor_za32_u32_1( // CHECK-CXX-LABEL: @_Z26test_svread_hor_za32_u32_1u12__SVUint32_tu10__SVBool_tj( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 3 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv4i32( [[ZD:%.*]], [[TMP0]], i32 3, i32 [[TILESLICE]]) // CHECK-NEXT: ret [[TMP1]] // -svuint32_t test_svread_hor_za32_u32_1(svuint32_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_hor_za32, _u32, _m)(zd, pg, 3, slice_base, 3); +svuint32_t test_svread_hor_za32_u32_1(svuint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { + uint32_t slice = slice_base + 3; + return SME_ACLE_FUNC(svread_hor_za32, _u32, _m)(zd, pg, 3, slice); } // CHECK-C-LABEL: @test_svread_hor_za64_u64( @@ -177,20 +184,21 @@ svuint32_t test_svread_hor_za32_u32_1(svuint32_t zd, svbool_t pg, uint32_t slice // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv2i64( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) // CHECK-NEXT: ret [[TMP1]] // -svuint64_t test_svread_hor_za64_u64(svuint64_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_hor_za64, _u64, _m)(zd, pg, 0, slice_base, 0); +svuint64_t test_svread_hor_za64_u64(svuint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { + return SME_ACLE_FUNC(svread_hor_za64, _u64, _m)(zd, pg, 0, slice_base); } // CHECK-C-LABEL: @test_svread_hor_za64_u64_1( // CHECK-CXX-LABEL: @_Z26test_svread_hor_za64_u64_1u12__SVUint64_tu10__SVBool_tj( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 1 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv2i64( [[ZD:%.*]], [[TMP0]], i32 7, i32 [[TILESLICE]]) // CHECK-NEXT: ret [[TMP1]] // -svuint64_t test_svread_hor_za64_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_hor_za64, _u64, _m)(zd, pg, 7, slice_base, 1); +svuint64_t test_svread_hor_za64_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { + uint32_t slice = slice_base + 1; + return SME_ACLE_FUNC(svread_hor_za64, _u64, _m)(zd, pg, 7, slice); } // CHECK-C-LABEL: @test_svread_hor_za16_f16( @@ -200,20 +208,21 @@ svuint64_t test_svread_hor_za64_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv8f16( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) // CHECK-NEXT: ret [[TMP1]] // -svfloat16_t test_svread_hor_za16_f16(svfloat16_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_hor_za16, _f16, _m)(zd, pg, 0, slice_base, 0); +svfloat16_t test_svread_hor_za16_f16(svfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { + return SME_ACLE_FUNC(svread_hor_za16, _f16, _m)(zd, pg, 0, slice_base); } // CHECK-C-LABEL: @test_svread_hor_za16_f16_1( // CHECK-CXX-LABEL: @_Z26test_svread_hor_za16_f16_1u13__SVFloat16_tu10__SVBool_tj( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv8f16( [[ZD:%.*]], [[TMP0]], i32 1, i32 [[TILESLICE]]) // CHECK-NEXT: ret [[TMP1]] // -svfloat16_t test_svread_hor_za16_f16_1(svfloat16_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_hor_za16, _f16, _m)(zd, pg, 1, slice_base, 7); +svfloat16_t test_svread_hor_za16_f16_1(svfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { + uint32_t slice = slice_base + 7; + return SME_ACLE_FUNC(svread_hor_za16, _f16, _m)(zd, pg, 1, slice); } // CHECK-C-LABEL: @test_svread_hor_za16_bf16( @@ -223,20 +232,21 @@ svfloat16_t test_svread_hor_za16_f16_1(svfloat16_t zd, svbool_t pg, uint32_t sli // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv8bf16( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) // CHECK-NEXT: ret [[TMP1]] // -svbfloat16_t test_svread_hor_za16_bf16(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_hor_za16, _bf16, _m)(zd, pg, 0, slice_base, 0); +svbfloat16_t test_svread_hor_za16_bf16(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { + return SME_ACLE_FUNC(svread_hor_za16, _bf16, _m)(zd, pg, 0, slice_base); } // CHECK-C-LABEL: @test_svread_hor_za16_bf16_1( // CHECK-CXX-LABEL: @_Z27test_svread_hor_za16_bf16_1u14__SVBFloat16_tu10__SVBool_tj( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv8bf16( [[ZD:%.*]], [[TMP0]], i32 1, i32 [[TILESLICE]]) // CHECK-NEXT: ret [[TMP1]] // -svbfloat16_t test_svread_hor_za16_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_hor_za16, _bf16, _m)(zd, pg, 1, slice_base, 7); +svbfloat16_t test_svread_hor_za16_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { + uint32_t slice = slice_base + 7; + return SME_ACLE_FUNC(svread_hor_za16, _bf16, _m)(zd, pg, 1, slice); } // CHECK-C-LABEL: @test_svread_hor_za32_f32( @@ -246,20 +256,21 @@ svbfloat16_t test_svread_hor_za16_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv4f32( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) // CHECK-NEXT: ret [[TMP1]] // -svfloat32_t test_svread_hor_za32_f32(svfloat32_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_hor_za32, _f32, _m)(zd, pg, 0, slice_base, 0); +svfloat32_t test_svread_hor_za32_f32(svfloat32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { + return SME_ACLE_FUNC(svread_hor_za32, _f32, _m)(zd, pg, 0, slice_base); } // CHECK-C-LABEL: @test_svread_hor_za32_f32_1( // CHECK-CXX-LABEL: @_Z26test_svread_hor_za32_f32_1u13__SVFloat32_tu10__SVBool_tj( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 3 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv4f32( [[ZD:%.*]], [[TMP0]], i32 3, i32 [[TILESLICE]]) // CHECK-NEXT: ret [[TMP1]] // -svfloat32_t test_svread_hor_za32_f32_1(svfloat32_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_hor_za32, _f32, _m)(zd, pg, 3, slice_base, 3); +svfloat32_t test_svread_hor_za32_f32_1(svfloat32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { + uint32_t slice = slice_base + 3; + return SME_ACLE_FUNC(svread_hor_za32, _f32, _m)(zd, pg, 3, slice); } // CHECK-C-LABEL: @test_svread_hor_za64_f64( @@ -269,20 +280,21 @@ svfloat32_t test_svread_hor_za32_f32_1(svfloat32_t zd, svbool_t pg, uint32_t sli // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv2f64( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) // CHECK-NEXT: ret [[TMP1]] // -svfloat64_t test_svread_hor_za64_f64(svfloat64_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_hor_za64, _f64, _m)(zd, pg, 0, slice_base, 0); +svfloat64_t test_svread_hor_za64_f64(svfloat64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { + return SME_ACLE_FUNC(svread_hor_za64, _f64, _m)(zd, pg, 0, slice_base); } // CHECK-C-LABEL: @test_svread_hor_za64_f64_1( // CHECK-CXX-LABEL: @_Z26test_svread_hor_za64_f64_1u13__SVFloat64_tu10__SVBool_tj( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 1 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv2f64( [[ZD:%.*]], [[TMP0]], i32 7, i32 [[TILESLICE]]) // CHECK-NEXT: ret [[TMP1]] // -svfloat64_t test_svread_hor_za64_f64_1(svfloat64_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_hor_za64, _f64, _m)(zd, pg, 7, slice_base, 1); +svfloat64_t test_svread_hor_za64_f64_1(svfloat64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { + uint32_t slice = slice_base + 1; + return SME_ACLE_FUNC(svread_hor_za64, _f64, _m)(zd, pg, 7, slice); } // CHECK-C-LABEL: @test_svread_hor_za128_s8( @@ -291,8 +303,8 @@ svfloat64_t test_svread_hor_za64_f64_1(svfloat64_t zd, svbool_t pg, uint32_t sli // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv16i8( [[ZD:%.*]], [[PG:%.*]], i32 0, i32 [[SLICE_BASE:%.*]]) // CHECK-NEXT: ret [[TMP0]] // -svint8_t test_svread_hor_za128_s8(svint8_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_hor_za128, _s8, _m)(zd, pg, 0, slice_base, 0); +svint8_t test_svread_hor_za128_s8(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { + return SME_ACLE_FUNC(svread_hor_za128, _s8, _m)(zd, pg, 0, slice_base); } // CHECK-C-LABEL: @test_svread_hor_za128_s8_1( @@ -301,8 +313,8 @@ svint8_t test_svread_hor_za128_s8(svint8_t zd, svbool_t pg, uint32_t slice_base) // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv16i8( [[ZD:%.*]], [[PG:%.*]], i32 15, i32 [[SLICE_BASE:%.*]]) // CHECK-NEXT: ret [[TMP0]] // -svint8_t test_svread_hor_za128_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_hor_za128, _s8, _m)(zd, pg, 15, slice_base, 0); +svint8_t test_svread_hor_za128_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { + return SME_ACLE_FUNC(svread_hor_za128, _s8, _m)(zd, pg, 15, slice_base); } // CHECK-C-LABEL: @test_svread_hor_za128_s16( @@ -312,8 +324,8 @@ svint8_t test_svread_hor_za128_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_bas // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv8i16( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) // CHECK-NEXT: ret [[TMP1]] // -svint16_t test_svread_hor_za128_s16(svint16_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_hor_za128, _s16, _m)(zd, pg, 0, slice_base, 0); +svint16_t test_svread_hor_za128_s16(svint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { + return SME_ACLE_FUNC(svread_hor_za128, _s16, _m)(zd, pg, 0, slice_base); } // CHECK-C-LABEL: @test_svread_hor_za128_s16_1( @@ -323,8 +335,8 @@ svint16_t test_svread_hor_za128_s16(svint16_t zd, svbool_t pg, uint32_t slice_ba // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv8i16( [[ZD:%.*]], [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]]) // CHECK-NEXT: ret [[TMP1]] // -svint16_t test_svread_hor_za128_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_hor_za128, _s16, _m)(zd, pg, 15, slice_base, 0); +svint16_t test_svread_hor_za128_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { + return SME_ACLE_FUNC(svread_hor_za128, _s16, _m)(zd, pg, 15, slice_base); } // CHECK-C-LABEL: @test_svread_hor_za128_s32( @@ -334,8 +346,8 @@ svint16_t test_svread_hor_za128_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_ // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv4i32( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) // CHECK-NEXT: ret [[TMP1]] // -svint32_t test_svread_hor_za128_s32(svint32_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_hor_za128, _s32, _m)(zd, pg, 0, slice_base, 0); +svint32_t test_svread_hor_za128_s32(svint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { + return SME_ACLE_FUNC(svread_hor_za128, _s32, _m)(zd, pg, 0, slice_base); } // CHECK-C-LABEL: @test_svread_hor_za128_s32_1( @@ -345,8 +357,8 @@ svint32_t test_svread_hor_za128_s32(svint32_t zd, svbool_t pg, uint32_t slice_ba // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv4i32( [[ZD:%.*]], [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]]) // CHECK-NEXT: ret [[TMP1]] // -svint32_t test_svread_hor_za128_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_hor_za128, _s32, _m)(zd, pg, 15, slice_base, 0); +svint32_t test_svread_hor_za128_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { + return SME_ACLE_FUNC(svread_hor_za128, _s32, _m)(zd, pg, 15, slice_base); } // CHECK-C-LABEL: @test_svread_hor_za128_s64( @@ -356,8 +368,8 @@ svint32_t test_svread_hor_za128_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_ // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv2i64( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) // CHECK-NEXT: ret [[TMP1]] // -svint64_t test_svread_hor_za128_s64(svint64_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_hor_za128, _s64, _m)(zd, pg, 0, slice_base, 0); +svint64_t test_svread_hor_za128_s64(svint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { + return SME_ACLE_FUNC(svread_hor_za128, _s64, _m)(zd, pg, 0, slice_base); } // CHECK-C-LABEL: @test_svread_hor_za128_s64_1( @@ -367,8 +379,8 @@ svint64_t test_svread_hor_za128_s64(svint64_t zd, svbool_t pg, uint32_t slice_ba // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv2i64( [[ZD:%.*]], [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]]) // CHECK-NEXT: ret [[TMP1]] // -svint64_t test_svread_hor_za128_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_hor_za128, _s64, _m)(zd, pg, 15, slice_base, 0); +svint64_t test_svread_hor_za128_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { + return SME_ACLE_FUNC(svread_hor_za128, _s64, _m)(zd, pg, 15, slice_base); } // CHECK-C-LABEL: @test_svread_hor_za128_u8( @@ -377,8 +389,8 @@ svint64_t test_svread_hor_za128_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_ // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv16i8( [[ZD:%.*]], [[PG:%.*]], i32 0, i32 [[SLICE_BASE:%.*]]) // CHECK-NEXT: ret [[TMP0]] // -svuint8_t test_svread_hor_za128_u8(svuint8_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_hor_za128, _u8, _m)(zd, pg, 0, slice_base, 0); +svuint8_t test_svread_hor_za128_u8(svuint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { + return SME_ACLE_FUNC(svread_hor_za128, _u8, _m)(zd, pg, 0, slice_base); } // CHECK-C-LABEL: @test_svread_hor_za128_u8_1( @@ -387,8 +399,8 @@ svuint8_t test_svread_hor_za128_u8(svuint8_t zd, svbool_t pg, uint32_t slice_bas // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv16i8( [[ZD:%.*]], [[PG:%.*]], i32 15, i32 [[SLICE_BASE:%.*]]) // CHECK-NEXT: ret [[TMP0]] // -svuint8_t test_svread_hor_za128_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_hor_za128, _u8, _m)(zd, pg, 15, slice_base, 0); +svuint8_t test_svread_hor_za128_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { + return SME_ACLE_FUNC(svread_hor_za128, _u8, _m)(zd, pg, 15, slice_base); } // CHECK-C-LABEL: @test_svread_hor_za128_u16( @@ -398,8 +410,8 @@ svuint8_t test_svread_hor_za128_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_b // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv8i16( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) // CHECK-NEXT: ret [[TMP1]] // -svuint16_t test_svread_hor_za128_u16(svuint16_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_hor_za128, _u16, _m)(zd, pg, 0, slice_base, 0); +svuint16_t test_svread_hor_za128_u16(svuint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { + return SME_ACLE_FUNC(svread_hor_za128, _u16, _m)(zd, pg, 0, slice_base); } // CHECK-C-LABEL: @test_svread_hor_za128_u16_1( @@ -409,8 +421,8 @@ svuint16_t test_svread_hor_za128_u16(svuint16_t zd, svbool_t pg, uint32_t slice_ // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv8i16( [[ZD:%.*]], [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]]) // CHECK-NEXT: ret [[TMP1]] // -svuint16_t test_svread_hor_za128_u16_1(svuint16_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_hor_za128, _u16, _m)(zd, pg, 15, slice_base, 0); +svuint16_t test_svread_hor_za128_u16_1(svuint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { + return SME_ACLE_FUNC(svread_hor_za128, _u16, _m)(zd, pg, 15, slice_base); } // CHECK-C-LABEL: @test_svread_hor_za128_u32( @@ -420,8 +432,8 @@ svuint16_t test_svread_hor_za128_u16_1(svuint16_t zd, svbool_t pg, uint32_t slic // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv4i32( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) // CHECK-NEXT: ret [[TMP1]] // -svuint32_t test_svread_hor_za128_u32(svuint32_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_hor_za128, _u32, _m)(zd, pg, 0, slice_base, 0); +svuint32_t test_svread_hor_za128_u32(svuint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { + return SME_ACLE_FUNC(svread_hor_za128, _u32, _m)(zd, pg, 0, slice_base); } // CHECK-C-LABEL: @test_svread_hor_za128_u32_1( @@ -431,8 +443,8 @@ svuint32_t test_svread_hor_za128_u32(svuint32_t zd, svbool_t pg, uint32_t slice_ // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv4i32( [[ZD:%.*]], [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]]) // CHECK-NEXT: ret [[TMP1]] // -svuint32_t test_svread_hor_za128_u32_1(svuint32_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_hor_za128, _u32, _m)(zd, pg, 15, slice_base, 0); +svuint32_t test_svread_hor_za128_u32_1(svuint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { + return SME_ACLE_FUNC(svread_hor_za128, _u32, _m)(zd, pg, 15, slice_base); } // CHECK-C-LABEL: @test_svread_hor_za128_u64( @@ -442,8 +454,8 @@ svuint32_t test_svread_hor_za128_u32_1(svuint32_t zd, svbool_t pg, uint32_t slic // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv2i64( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) // CHECK-NEXT: ret [[TMP1]] // -svuint64_t test_svread_hor_za128_u64(svuint64_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_hor_za128, _u64, _m)(zd, pg, 0, slice_base, 0); +svuint64_t test_svread_hor_za128_u64(svuint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { + return SME_ACLE_FUNC(svread_hor_za128, _u64, _m)(zd, pg, 0, slice_base); } // CHECK-C-LABEL: @test_svread_hor_za128_u64_1( @@ -453,8 +465,8 @@ svuint64_t test_svread_hor_za128_u64(svuint64_t zd, svbool_t pg, uint32_t slice_ // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv2i64( [[ZD:%.*]], [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]]) // CHECK-NEXT: ret [[TMP1]] // -svuint64_t test_svread_hor_za128_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_hor_za128, _u64, _m)(zd, pg, 15, slice_base, 0); +svuint64_t test_svread_hor_za128_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { + return SME_ACLE_FUNC(svread_hor_za128, _u64, _m)(zd, pg, 15, slice_base); } // CHECK-C-LABEL: @test_svread_hor_za128_f16( @@ -464,8 +476,8 @@ svuint64_t test_svread_hor_za128_u64_1(svuint64_t zd, svbool_t pg, uint32_t slic // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv8f16( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) // CHECK-NEXT: ret [[TMP1]] // -svfloat16_t test_svread_hor_za128_f16(svfloat16_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_hor_za128, _f16, _m)(zd, pg, 0, slice_base, 0); +svfloat16_t test_svread_hor_za128_f16(svfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { + return SME_ACLE_FUNC(svread_hor_za128, _f16, _m)(zd, pg, 0, slice_base); } // CHECK-C-LABEL: @test_svread_hor_za128_f16_1( @@ -475,8 +487,8 @@ svfloat16_t test_svread_hor_za128_f16(svfloat16_t zd, svbool_t pg, uint32_t slic // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv8f16( [[ZD:%.*]], [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]]) // CHECK-NEXT: ret [[TMP1]] // -svfloat16_t test_svread_hor_za128_f16_1(svfloat16_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_hor_za128, _f16, _m)(zd, pg, 15, slice_base, 0); +svfloat16_t test_svread_hor_za128_f16_1(svfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { + return SME_ACLE_FUNC(svread_hor_za128, _f16, _m)(zd, pg, 15, slice_base); } // CHECK-C-LABEL: @test_svread_hor_za128_bf16( @@ -486,8 +498,8 @@ svfloat16_t test_svread_hor_za128_f16_1(svfloat16_t zd, svbool_t pg, uint32_t sl // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv8bf16( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) // CHECK-NEXT: ret [[TMP1]] // -svbfloat16_t test_svread_hor_za128_bf16(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_hor_za128, _bf16, _m)(zd, pg, 0, slice_base, 0); +svbfloat16_t test_svread_hor_za128_bf16(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { + return SME_ACLE_FUNC(svread_hor_za128, _bf16, _m)(zd, pg, 0, slice_base); } // CHECK-C-LABEL: @test_svread_hor_za128_bf16_1( @@ -497,8 +509,8 @@ svbfloat16_t test_svread_hor_za128_bf16(svbfloat16_t zd, svbool_t pg, uint32_t s // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv8bf16( [[ZD:%.*]], [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]]) // CHECK-NEXT: ret [[TMP1]] // -svbfloat16_t test_svread_hor_za128_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_hor_za128, _bf16, _m)(zd, pg, 15, slice_base, 0); +svbfloat16_t test_svread_hor_za128_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { + return SME_ACLE_FUNC(svread_hor_za128, _bf16, _m)(zd, pg, 15, slice_base); } // CHECK-C-LABEL: @test_svread_hor_za128_f32( @@ -508,8 +520,8 @@ svbfloat16_t test_svread_hor_za128_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv4f32( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) // CHECK-NEXT: ret [[TMP1]] // -svfloat32_t test_svread_hor_za128_f32(svfloat32_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_hor_za128, _f32, _m)(zd, pg, 0, slice_base, 0); +svfloat32_t test_svread_hor_za128_f32(svfloat32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { + return SME_ACLE_FUNC(svread_hor_za128, _f32, _m)(zd, pg, 0, slice_base); } // CHECK-C-LABEL: @test_svread_hor_za128_f32_1( @@ -519,8 +531,8 @@ svfloat32_t test_svread_hor_za128_f32(svfloat32_t zd, svbool_t pg, uint32_t slic // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv4f32( [[ZD:%.*]], [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]]) // CHECK-NEXT: ret [[TMP1]] // -svfloat32_t test_svread_hor_za128_f32_1(svfloat32_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_hor_za128, _f32, _m)(zd, pg, 15, slice_base, 0); +svfloat32_t test_svread_hor_za128_f32_1(svfloat32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { + return SME_ACLE_FUNC(svread_hor_za128, _f32, _m)(zd, pg, 15, slice_base); } // CHECK-C-LABEL: @test_svread_hor_za128_f64( @@ -530,8 +542,8 @@ svfloat32_t test_svread_hor_za128_f32_1(svfloat32_t zd, svbool_t pg, uint32_t sl // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv2f64( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) // CHECK-NEXT: ret [[TMP1]] // -svfloat64_t test_svread_hor_za128_f64(svfloat64_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_hor_za128, _f64, _m)(zd, pg, 0, slice_base, 0); +svfloat64_t test_svread_hor_za128_f64(svfloat64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { + return SME_ACLE_FUNC(svread_hor_za128, _f64, _m)(zd, pg, 0, slice_base); } // CHECK-C-LABEL: @test_svread_hor_za128_f64_1( @@ -541,8 +553,8 @@ svfloat64_t test_svread_hor_za128_f64(svfloat64_t zd, svbool_t pg, uint32_t slic // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv2f64( [[ZD:%.*]], [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]]) // CHECK-NEXT: ret [[TMP1]] // -svfloat64_t test_svread_hor_za128_f64_1(svfloat64_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_hor_za128, _f64, _m)(zd, pg, 15, slice_base, 0); +svfloat64_t test_svread_hor_za128_f64_1(svfloat64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { + return SME_ACLE_FUNC(svread_hor_za128, _f64, _m)(zd, pg, 15, slice_base); } // CHECK-C-LABEL: @test_svread_ver_za8_s8( @@ -551,8 +563,8 @@ svfloat64_t test_svread_hor_za128_f64_1(svfloat64_t zd, svbool_t pg, uint32_t sl // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv16i8( [[ZD:%.*]], [[PG:%.*]], i32 0, i32 [[SLICE_BASE:%.*]]) // CHECK-NEXT: ret [[TMP0]] // -svint8_t test_svread_ver_za8_s8(svint8_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_ver_za8, _s8, _m)(zd, pg, 0, slice_base, 0); +svint8_t test_svread_ver_za8_s8(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { + return SME_ACLE_FUNC(svread_ver_za8, _s8, _m)(zd, pg, 0, slice_base); } // CHECK-C-LABEL: @test_svread_ver_za8_s8_1( @@ -562,8 +574,9 @@ svint8_t test_svread_ver_za8_s8(svint8_t zd, svbool_t pg, uint32_t slice_base) // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv16i8( [[ZD:%.*]], [[PG:%.*]], i32 0, i32 [[TILESLICE]]) // CHECK-NEXT: ret [[TMP0]] // -svint8_t test_svread_ver_za8_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_ver_za8, _s8, _m)(zd, pg, 0, slice_base, 15); +svint8_t test_svread_ver_za8_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { + uint32_t slice = slice_base + 15; + return SME_ACLE_FUNC(svread_ver_za8, _s8, _m)(zd, pg, 0, slice); } // CHECK-C-LABEL: @test_svread_ver_za16_s16( @@ -573,20 +586,21 @@ svint8_t test_svread_ver_za8_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_base) // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv8i16( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) // CHECK-NEXT: ret [[TMP1]] // -svint16_t test_svread_ver_za16_s16(svint16_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_ver_za16, _s16, _m)(zd, pg, 0, slice_base, 0); +svint16_t test_svread_ver_za16_s16(svint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { + return SME_ACLE_FUNC(svread_ver_za16, _s16, _m)(zd, pg, 0, slice_base); } // CHECK-C-LABEL: @test_svread_ver_za16_s16_1( // CHECK-CXX-LABEL: @_Z26test_svread_ver_za16_s16_1u11__SVInt16_tu10__SVBool_tj( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv8i16( [[ZD:%.*]], [[TMP0]], i32 1, i32 [[TILESLICE]]) // CHECK-NEXT: ret [[TMP1]] // -svint16_t test_svread_ver_za16_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_ver_za16, _s16, _m)(zd, pg, 1, slice_base, 7); +svint16_t test_svread_ver_za16_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { + uint32_t slice = slice_base + 7; + return SME_ACLE_FUNC(svread_ver_za16, _s16, _m)(zd, pg, 1, slice); } // CHECK-C-LABEL: @test_svread_ver_za32_s32( @@ -596,20 +610,21 @@ svint16_t test_svread_ver_za16_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_b // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv4i32( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) // CHECK-NEXT: ret [[TMP1]] // -svint32_t test_svread_ver_za32_s32(svint32_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_ver_za32, _s32, _m)(zd, pg, 0, slice_base, 0); +svint32_t test_svread_ver_za32_s32(svint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { + return SME_ACLE_FUNC(svread_ver_za32, _s32, _m)(zd, pg, 0, slice_base); } // CHECK-C-LABEL: @test_svread_ver_za32_s32_1( // CHECK-CXX-LABEL: @_Z26test_svread_ver_za32_s32_1u11__SVInt32_tu10__SVBool_tj( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 3 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv4i32( [[ZD:%.*]], [[TMP0]], i32 3, i32 [[TILESLICE]]) // CHECK-NEXT: ret [[TMP1]] // -svint32_t test_svread_ver_za32_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_ver_za32, _s32, _m)(zd, pg, 3, slice_base, 3); +svint32_t test_svread_ver_za32_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { + uint32_t slice = slice_base + 3; + return SME_ACLE_FUNC(svread_ver_za32, _s32, _m)(zd, pg, 3, slice); } // CHECK-C-LABEL: @test_svread_ver_za64_s64( @@ -619,20 +634,21 @@ svint32_t test_svread_ver_za32_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_b // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv2i64( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) // CHECK-NEXT: ret [[TMP1]] // -svint64_t test_svread_ver_za64_s64(svint64_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_ver_za64, _s64, _m)(zd, pg, 0, slice_base, 0); +svint64_t test_svread_ver_za64_s64(svint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { + return SME_ACLE_FUNC(svread_ver_za64, _s64, _m)(zd, pg, 0, slice_base); } // CHECK-C-LABEL: @test_svread_ver_za64_s64_1( // CHECK-CXX-LABEL: @_Z26test_svread_ver_za64_s64_1u11__SVInt64_tu10__SVBool_tj( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 1 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv2i64( [[ZD:%.*]], [[TMP0]], i32 7, i32 [[TILESLICE]]) // CHECK-NEXT: ret [[TMP1]] // -svint64_t test_svread_ver_za64_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_ver_za64, _s64, _m)(zd, pg, 7, slice_base, 1); +svint64_t test_svread_ver_za64_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { + uint32_t slice = slice_base + 1; + return SME_ACLE_FUNC(svread_ver_za64, _s64, _m)(zd, pg, 7, slice); } // CHECK-C-LABEL: @test_svread_ver_za8_u8( @@ -641,8 +657,8 @@ svint64_t test_svread_ver_za64_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_b // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv16i8( [[ZD:%.*]], [[PG:%.*]], i32 0, i32 [[SLICE_BASE:%.*]]) // CHECK-NEXT: ret [[TMP0]] // -svuint8_t test_svread_ver_za8_u8(svuint8_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_ver_za8, _u8, _m)(zd, pg, 0, slice_base, 0); +svuint8_t test_svread_ver_za8_u8(svuint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { + return SME_ACLE_FUNC(svread_ver_za8, _u8, _m)(zd, pg, 0, slice_base); } // CHECK-C-LABEL: @test_svread_ver_za8_u8_1( @@ -652,8 +668,9 @@ svuint8_t test_svread_ver_za8_u8(svuint8_t zd, svbool_t pg, uint32_t slice_base) // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv16i8( [[ZD:%.*]], [[PG:%.*]], i32 0, i32 [[TILESLICE]]) // CHECK-NEXT: ret [[TMP0]] // -svuint8_t test_svread_ver_za8_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_ver_za8, _u8, _m)(zd, pg, 0, slice_base, 15); +svuint8_t test_svread_ver_za8_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { + uint32_t slice = slice_base + 15; + return SME_ACLE_FUNC(svread_ver_za8, _u8, _m)(zd, pg, 0, slice); } // CHECK-C-LABEL: @test_svread_ver_za16_u16( @@ -663,20 +680,21 @@ svuint8_t test_svread_ver_za8_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_bas // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv8i16( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) // CHECK-NEXT: ret [[TMP1]] // -svuint16_t test_svread_ver_za16_u16(svuint16_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_ver_za16, _u16, _m)(zd, pg, 0, slice_base, 0); +svuint16_t test_svread_ver_za16_u16(svuint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { + return SME_ACLE_FUNC(svread_ver_za16, _u16, _m)(zd, pg, 0, slice_base); } // CHECK-C-LABEL: @test_svread_ver_za16_u16_1( // CHECK-CXX-LABEL: @_Z26test_svread_ver_za16_u16_1u12__SVUint16_tu10__SVBool_tj( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv8i16( [[ZD:%.*]], [[TMP0]], i32 1, i32 [[TILESLICE]]) // CHECK-NEXT: ret [[TMP1]] // -svuint16_t test_svread_ver_za16_u16_1(svuint16_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_ver_za16, _u16, _m)(zd, pg, 1, slice_base, 7); +svuint16_t test_svread_ver_za16_u16_1(svuint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { + uint32_t slice = slice_base + 7; + return SME_ACLE_FUNC(svread_ver_za16, _u16, _m)(zd, pg, 1, slice); } // CHECK-C-LABEL: @test_svread_ver_za32_u32( @@ -686,20 +704,21 @@ svuint16_t test_svread_ver_za16_u16_1(svuint16_t zd, svbool_t pg, uint32_t slice // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv4i32( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) // CHECK-NEXT: ret [[TMP1]] // -svuint32_t test_svread_ver_za32_u32(svuint32_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_ver_za32, _u32, _m)(zd, pg, 0, slice_base, 0); +svuint32_t test_svread_ver_za32_u32(svuint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { + return SME_ACLE_FUNC(svread_ver_za32, _u32, _m)(zd, pg, 0, slice_base); } // CHECK-C-LABEL: @test_svread_ver_za32_u32_1( // CHECK-CXX-LABEL: @_Z26test_svread_ver_za32_u32_1u12__SVUint32_tu10__SVBool_tj( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 3 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv4i32( [[ZD:%.*]], [[TMP0]], i32 3, i32 [[TILESLICE]]) // CHECK-NEXT: ret [[TMP1]] // -svuint32_t test_svread_ver_za32_u32_1(svuint32_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_ver_za32, _u32, _m)(zd, pg, 3, slice_base, 3); +svuint32_t test_svread_ver_za32_u32_1(svuint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { + uint32_t slice = slice_base + 3; + return SME_ACLE_FUNC(svread_ver_za32, _u32, _m)(zd, pg, 3, slice); } // CHECK-C-LABEL: @test_svread_ver_za64_u64( @@ -709,20 +728,21 @@ svuint32_t test_svread_ver_za32_u32_1(svuint32_t zd, svbool_t pg, uint32_t slice // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv2i64( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) // CHECK-NEXT: ret [[TMP1]] // -svuint64_t test_svread_ver_za64_u64(svuint64_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_ver_za64, _u64, _m)(zd, pg, 0, slice_base, 0); +svuint64_t test_svread_ver_za64_u64(svuint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { + return SME_ACLE_FUNC(svread_ver_za64, _u64, _m)(zd, pg, 0, slice_base); } // CHECK-C-LABEL: @test_svread_ver_za64_u64_1( // CHECK-CXX-LABEL: @_Z26test_svread_ver_za64_u64_1u12__SVUint64_tu10__SVBool_tj( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 1 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv2i64( [[ZD:%.*]], [[TMP0]], i32 7, i32 [[TILESLICE]]) // CHECK-NEXT: ret [[TMP1]] // -svuint64_t test_svread_ver_za64_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_ver_za64, _u64, _m)(zd, pg, 7, slice_base, 1); +svuint64_t test_svread_ver_za64_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { + uint32_t slice = slice_base + 1; + return SME_ACLE_FUNC(svread_ver_za64, _u64, _m)(zd, pg, 7, slice); } // CHECK-C-LABEL: @test_svread_ver_za16_f16( @@ -732,20 +752,21 @@ svuint64_t test_svread_ver_za64_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv8f16( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) // CHECK-NEXT: ret [[TMP1]] // -svfloat16_t test_svread_ver_za16_f16(svfloat16_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_ver_za16, _f16, _m)(zd, pg, 0, slice_base, 0); +svfloat16_t test_svread_ver_za16_f16(svfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { + return SME_ACLE_FUNC(svread_ver_za16, _f16, _m)(zd, pg, 0, slice_base); } // CHECK-C-LABEL: @test_svread_ver_za16_f16_1( // CHECK-CXX-LABEL: @_Z26test_svread_ver_za16_f16_1u13__SVFloat16_tu10__SVBool_tj( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv8f16( [[ZD:%.*]], [[TMP0]], i32 1, i32 [[TILESLICE]]) // CHECK-NEXT: ret [[TMP1]] // -svfloat16_t test_svread_ver_za16_f16_1(svfloat16_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_ver_za16, _f16, _m)(zd, pg, 1, slice_base, 7); +svfloat16_t test_svread_ver_za16_f16_1(svfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { + uint32_t slice = slice_base + 7; + return SME_ACLE_FUNC(svread_ver_za16, _f16, _m)(zd, pg, 1, slice); } // CHECK-C-LABEL: @test_svread_ver_za16_bf16( @@ -755,20 +776,21 @@ svfloat16_t test_svread_ver_za16_f16_1(svfloat16_t zd, svbool_t pg, uint32_t sli // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv8bf16( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) // CHECK-NEXT: ret [[TMP1]] // -svbfloat16_t test_svread_ver_za16_bf16(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_ver_za16, _bf16, _m)(zd, pg, 0, slice_base, 0); +svbfloat16_t test_svread_ver_za16_bf16(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { + return SME_ACLE_FUNC(svread_ver_za16, _bf16, _m)(zd, pg, 0, slice_base); } // CHECK-C-LABEL: @test_svread_ver_za16_bf16_1( // CHECK-CXX-LABEL: @_Z27test_svread_ver_za16_bf16_1u14__SVBFloat16_tu10__SVBool_tj( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv8bf16( [[ZD:%.*]], [[TMP0]], i32 1, i32 [[TILESLICE]]) // CHECK-NEXT: ret [[TMP1]] // -svbfloat16_t test_svread_ver_za16_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_ver_za16, _bf16, _m)(zd, pg, 1, slice_base, 7); +svbfloat16_t test_svread_ver_za16_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { + uint32_t slice = slice_base + 7; + return SME_ACLE_FUNC(svread_ver_za16, _bf16, _m)(zd, pg, 1, slice); } // CHECK-C-LABEL: @test_svread_ver_za32_f32( @@ -778,20 +800,21 @@ svbfloat16_t test_svread_ver_za16_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv4f32( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) // CHECK-NEXT: ret [[TMP1]] // -svfloat32_t test_svread_ver_za32_f32(svfloat32_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_ver_za32, _f32, _m)(zd, pg, 0, slice_base, 0); +svfloat32_t test_svread_ver_za32_f32(svfloat32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { + return SME_ACLE_FUNC(svread_ver_za32, _f32, _m)(zd, pg, 0, slice_base); } // CHECK-C-LABEL: @test_svread_ver_za32_f32_1( // CHECK-CXX-LABEL: @_Z26test_svread_ver_za32_f32_1u13__SVFloat32_tu10__SVBool_tj( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 3 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv4f32( [[ZD:%.*]], [[TMP0]], i32 3, i32 [[TILESLICE]]) // CHECK-NEXT: ret [[TMP1]] // -svfloat32_t test_svread_ver_za32_f32_1(svfloat32_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_ver_za32, _f32, _m)(zd, pg, 3, slice_base, 3); +svfloat32_t test_svread_ver_za32_f32_1(svfloat32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { + uint32_t slice = slice_base + 3; + return SME_ACLE_FUNC(svread_ver_za32, _f32, _m)(zd, pg, 3, slice); } // CHECK-C-LABEL: @test_svread_ver_za64_f64( @@ -801,20 +824,21 @@ svfloat32_t test_svread_ver_za32_f32_1(svfloat32_t zd, svbool_t pg, uint32_t sli // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv2f64( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) // CHECK-NEXT: ret [[TMP1]] // -svfloat64_t test_svread_ver_za64_f64(svfloat64_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_ver_za64, _f64, _m)(zd, pg, 0, slice_base, 0); +svfloat64_t test_svread_ver_za64_f64(svfloat64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { + return SME_ACLE_FUNC(svread_ver_za64, _f64, _m)(zd, pg, 0, slice_base); } // CHECK-C-LABEL: @test_svread_ver_za64_f64_1( // CHECK-CXX-LABEL: @_Z26test_svread_ver_za64_f64_1u13__SVFloat64_tu10__SVBool_tj( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 1 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv2f64( [[ZD:%.*]], [[TMP0]], i32 7, i32 [[TILESLICE]]) // CHECK-NEXT: ret [[TMP1]] // -svfloat64_t test_svread_ver_za64_f64_1(svfloat64_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_ver_za64, _f64, _m)(zd, pg, 7, slice_base, 1); +svfloat64_t test_svread_ver_za64_f64_1(svfloat64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { + uint32_t slice = slice_base + 1; + return SME_ACLE_FUNC(svread_ver_za64, _f64, _m)(zd, pg, 7, slice); } // CHECK-C-LABEL: @test_svread_ver_za128_s8( @@ -823,8 +847,8 @@ svfloat64_t test_svread_ver_za64_f64_1(svfloat64_t zd, svbool_t pg, uint32_t sli // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv16i8( [[ZD:%.*]], [[PG:%.*]], i32 0, i32 [[SLICE_BASE:%.*]]) // CHECK-NEXT: ret [[TMP0]] // -svint8_t test_svread_ver_za128_s8(svint8_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_ver_za128, _s8, _m)(zd, pg, 0, slice_base, 0); +svint8_t test_svread_ver_za128_s8(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { + return SME_ACLE_FUNC(svread_ver_za128, _s8, _m)(zd, pg, 0, slice_base); } // CHECK-C-LABEL: @test_svread_ver_za128_s8_1( @@ -833,8 +857,8 @@ svint8_t test_svread_ver_za128_s8(svint8_t zd, svbool_t pg, uint32_t slice_base) // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv16i8( [[ZD:%.*]], [[PG:%.*]], i32 15, i32 [[SLICE_BASE:%.*]]) // CHECK-NEXT: ret [[TMP0]] // -svint8_t test_svread_ver_za128_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_ver_za128, _s8, _m)(zd, pg, 15, slice_base, 0); +svint8_t test_svread_ver_za128_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { + return SME_ACLE_FUNC(svread_ver_za128, _s8, _m)(zd, pg, 15, slice_base); } // CHECK-C-LABEL: @test_svread_ver_za128_s16( @@ -844,8 +868,8 @@ svint8_t test_svread_ver_za128_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_bas // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv8i16( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) // CHECK-NEXT: ret [[TMP1]] // -svint16_t test_svread_ver_za128_s16(svint16_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_ver_za128, _s16, _m)(zd, pg, 0, slice_base, 0); +svint16_t test_svread_ver_za128_s16(svint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { + return SME_ACLE_FUNC(svread_ver_za128, _s16, _m)(zd, pg, 0, slice_base); } // CHECK-C-LABEL: @test_svread_ver_za128_s16_1( @@ -855,8 +879,8 @@ svint16_t test_svread_ver_za128_s16(svint16_t zd, svbool_t pg, uint32_t slice_ba // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv8i16( [[ZD:%.*]], [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]]) // CHECK-NEXT: ret [[TMP1]] // -svint16_t test_svread_ver_za128_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_ver_za128, _s16, _m)(zd, pg, 15, slice_base, 0); +svint16_t test_svread_ver_za128_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { + return SME_ACLE_FUNC(svread_ver_za128, _s16, _m)(zd, pg, 15, slice_base); } // CHECK-C-LABEL: @test_svread_ver_za128_s32( @@ -866,8 +890,8 @@ svint16_t test_svread_ver_za128_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_ // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv4i32( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) // CHECK-NEXT: ret [[TMP1]] // -svint32_t test_svread_ver_za128_s32(svint32_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_ver_za128, _s32, _m)(zd, pg, 0, slice_base, 0); +svint32_t test_svread_ver_za128_s32(svint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { + return SME_ACLE_FUNC(svread_ver_za128, _s32, _m)(zd, pg, 0, slice_base); } // CHECK-C-LABEL: @test_svread_ver_za128_s32_1( @@ -877,8 +901,8 @@ svint32_t test_svread_ver_za128_s32(svint32_t zd, svbool_t pg, uint32_t slice_ba // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv4i32( [[ZD:%.*]], [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]]) // CHECK-NEXT: ret [[TMP1]] // -svint32_t test_svread_ver_za128_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_ver_za128, _s32, _m)(zd, pg, 15, slice_base, 0); +svint32_t test_svread_ver_za128_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { + return SME_ACLE_FUNC(svread_ver_za128, _s32, _m)(zd, pg, 15, slice_base); } // CHECK-C-LABEL: @test_svread_ver_za128_s64( @@ -888,8 +912,8 @@ svint32_t test_svread_ver_za128_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_ // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv2i64( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) // CHECK-NEXT: ret [[TMP1]] // -svint64_t test_svread_ver_za128_s64(svint64_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_ver_za128, _s64, _m)(zd, pg, 0, slice_base, 0); +svint64_t test_svread_ver_za128_s64(svint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { + return SME_ACLE_FUNC(svread_ver_za128, _s64, _m)(zd, pg, 0, slice_base); } // CHECK-C-LABEL: @test_svread_ver_za128_s64_1( @@ -899,8 +923,8 @@ svint64_t test_svread_ver_za128_s64(svint64_t zd, svbool_t pg, uint32_t slice_ba // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv2i64( [[ZD:%.*]], [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]]) // CHECK-NEXT: ret [[TMP1]] // -svint64_t test_svread_ver_za128_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_ver_za128, _s64, _m)(zd, pg, 15, slice_base, 0); +svint64_t test_svread_ver_za128_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { + return SME_ACLE_FUNC(svread_ver_za128, _s64, _m)(zd, pg, 15, slice_base); } // CHECK-C-LABEL: @test_svread_ver_za128_u8( @@ -909,8 +933,8 @@ svint64_t test_svread_ver_za128_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_ // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv16i8( [[ZD:%.*]], [[PG:%.*]], i32 0, i32 [[SLICE_BASE:%.*]]) // CHECK-NEXT: ret [[TMP0]] // -svuint8_t test_svread_ver_za128_u8(svuint8_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_ver_za128, _u8, _m)(zd, pg, 0, slice_base, 0); +svuint8_t test_svread_ver_za128_u8(svuint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { + return SME_ACLE_FUNC(svread_ver_za128, _u8, _m)(zd, pg, 0, slice_base); } // CHECK-C-LABEL: @test_svread_ver_za128_u8_1( @@ -919,8 +943,8 @@ svuint8_t test_svread_ver_za128_u8(svuint8_t zd, svbool_t pg, uint32_t slice_bas // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv16i8( [[ZD:%.*]], [[PG:%.*]], i32 15, i32 [[SLICE_BASE:%.*]]) // CHECK-NEXT: ret [[TMP0]] // -svuint8_t test_svread_ver_za128_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_ver_za128, _u8, _m)(zd, pg, 15, slice_base, 0); +svuint8_t test_svread_ver_za128_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { + return SME_ACLE_FUNC(svread_ver_za128, _u8, _m)(zd, pg, 15, slice_base); } // CHECK-C-LABEL: @test_svread_ver_za128_u16( @@ -930,8 +954,8 @@ svuint8_t test_svread_ver_za128_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_b // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv8i16( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) // CHECK-NEXT: ret [[TMP1]] // -svuint16_t test_svread_ver_za128_u16(svuint16_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_ver_za128, _u16, _m)(zd, pg, 0, slice_base, 0); +svuint16_t test_svread_ver_za128_u16(svuint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { + return SME_ACLE_FUNC(svread_ver_za128, _u16, _m)(zd, pg, 0, slice_base); } // CHECK-C-LABEL: @test_svread_ver_za128_u16_1( @@ -941,8 +965,8 @@ svuint16_t test_svread_ver_za128_u16(svuint16_t zd, svbool_t pg, uint32_t slice_ // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv8i16( [[ZD:%.*]], [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]]) // CHECK-NEXT: ret [[TMP1]] // -svuint16_t test_svread_ver_za128_u16_1(svuint16_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_ver_za128, _u16, _m)(zd, pg, 15, slice_base, 0); +svuint16_t test_svread_ver_za128_u16_1(svuint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { + return SME_ACLE_FUNC(svread_ver_za128, _u16, _m)(zd, pg, 15, slice_base); } // CHECK-C-LABEL: @test_svread_ver_za128_u32( @@ -952,8 +976,8 @@ svuint16_t test_svread_ver_za128_u16_1(svuint16_t zd, svbool_t pg, uint32_t slic // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv4i32( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) // CHECK-NEXT: ret [[TMP1]] // -svuint32_t test_svread_ver_za128_u32(svuint32_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_ver_za128, _u32, _m)(zd, pg, 0, slice_base, 0); +svuint32_t test_svread_ver_za128_u32(svuint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { + return SME_ACLE_FUNC(svread_ver_za128, _u32, _m)(zd, pg, 0, slice_base); } // CHECK-C-LABEL: @test_svread_ver_za128_u32_1( @@ -963,8 +987,8 @@ svuint32_t test_svread_ver_za128_u32(svuint32_t zd, svbool_t pg, uint32_t slice_ // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv4i32( [[ZD:%.*]], [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]]) // CHECK-NEXT: ret [[TMP1]] // -svuint32_t test_svread_ver_za128_u32_1(svuint32_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_ver_za128, _u32, _m)(zd, pg, 15, slice_base, 0); +svuint32_t test_svread_ver_za128_u32_1(svuint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { + return SME_ACLE_FUNC(svread_ver_za128, _u32, _m)(zd, pg, 15, slice_base); } // CHECK-C-LABEL: @test_svread_ver_za128_u64( @@ -974,8 +998,8 @@ svuint32_t test_svread_ver_za128_u32_1(svuint32_t zd, svbool_t pg, uint32_t slic // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv2i64( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) // CHECK-NEXT: ret [[TMP1]] // -svuint64_t test_svread_ver_za128_u64(svuint64_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_ver_za128, _u64, _m)(zd, pg, 0, slice_base, 0); +svuint64_t test_svread_ver_za128_u64(svuint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { + return SME_ACLE_FUNC(svread_ver_za128, _u64, _m)(zd, pg, 0, slice_base); } // CHECK-C-LABEL: @test_svread_ver_za128_u64_1( @@ -985,8 +1009,8 @@ svuint64_t test_svread_ver_za128_u64(svuint64_t zd, svbool_t pg, uint32_t slice_ // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv2i64( [[ZD:%.*]], [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]]) // CHECK-NEXT: ret [[TMP1]] // -svuint64_t test_svread_ver_za128_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_ver_za128, _u64, _m)(zd, pg, 15, slice_base, 0); +svuint64_t test_svread_ver_za128_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { + return SME_ACLE_FUNC(svread_ver_za128, _u64, _m)(zd, pg, 15, slice_base); } // CHECK-C-LABEL: @test_svread_ver_za128_f16( @@ -996,8 +1020,8 @@ svuint64_t test_svread_ver_za128_u64_1(svuint64_t zd, svbool_t pg, uint32_t slic // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv8f16( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) // CHECK-NEXT: ret [[TMP1]] // -svfloat16_t test_svread_ver_za128_f16(svfloat16_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_ver_za128, _f16, _m)(zd, pg, 0, slice_base, 0); +svfloat16_t test_svread_ver_za128_f16(svfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { + return SME_ACLE_FUNC(svread_ver_za128, _f16, _m)(zd, pg, 0, slice_base); } // CHECK-C-LABEL: @test_svread_ver_za128_f16_1( @@ -1007,8 +1031,8 @@ svfloat16_t test_svread_ver_za128_f16(svfloat16_t zd, svbool_t pg, uint32_t slic // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv8f16( [[ZD:%.*]], [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]]) // CHECK-NEXT: ret [[TMP1]] // -svfloat16_t test_svread_ver_za128_f16_1(svfloat16_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_ver_za128, _f16, _m)(zd, pg, 15, slice_base, 0); +svfloat16_t test_svread_ver_za128_f16_1(svfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { + return SME_ACLE_FUNC(svread_ver_za128, _f16, _m)(zd, pg, 15, slice_base); } // CHECK-C-LABEL: @test_svread_ver_za128_bf16( @@ -1018,8 +1042,8 @@ svfloat16_t test_svread_ver_za128_f16_1(svfloat16_t zd, svbool_t pg, uint32_t sl // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv8bf16( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) // CHECK-NEXT: ret [[TMP1]] // -svbfloat16_t test_svread_ver_za128_bf16(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_ver_za128, _bf16, _m)(zd, pg, 0, slice_base, 0); +svbfloat16_t test_svread_ver_za128_bf16(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { + return SME_ACLE_FUNC(svread_ver_za128, _bf16, _m)(zd, pg, 0, slice_base); } // CHECK-C-LABEL: @test_svread_ver_za128_bf16_1( @@ -1029,8 +1053,8 @@ svbfloat16_t test_svread_ver_za128_bf16(svbfloat16_t zd, svbool_t pg, uint32_t s // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv8bf16( [[ZD:%.*]], [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]]) // CHECK-NEXT: ret [[TMP1]] // -svbfloat16_t test_svread_ver_za128_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_ver_za128, _bf16, _m)(zd, pg, 15, slice_base, 0); +svbfloat16_t test_svread_ver_za128_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { + return SME_ACLE_FUNC(svread_ver_za128, _bf16, _m)(zd, pg, 15, slice_base); } // CHECK-C-LABEL: @test_svread_ver_za128_f32( @@ -1040,8 +1064,8 @@ svbfloat16_t test_svread_ver_za128_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv4f32( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) // CHECK-NEXT: ret [[TMP1]] // -svfloat32_t test_svread_ver_za128_f32(svfloat32_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_ver_za128, _f32, _m)(zd, pg, 0, slice_base, 0); +svfloat32_t test_svread_ver_za128_f32(svfloat32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { + return SME_ACLE_FUNC(svread_ver_za128, _f32, _m)(zd, pg, 0, slice_base); } // CHECK-C-LABEL: @test_svread_ver_za128_f32_1( @@ -1051,8 +1075,8 @@ svfloat32_t test_svread_ver_za128_f32(svfloat32_t zd, svbool_t pg, uint32_t slic // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv4f32( [[ZD:%.*]], [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]]) // CHECK-NEXT: ret [[TMP1]] // -svfloat32_t test_svread_ver_za128_f32_1(svfloat32_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_ver_za128, _f32, _m)(zd, pg, 15, slice_base, 0); +svfloat32_t test_svread_ver_za128_f32_1(svfloat32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { + return SME_ACLE_FUNC(svread_ver_za128, _f32, _m)(zd, pg, 15, slice_base); } // CHECK-C-LABEL: @test_svread_ver_za128_f64( @@ -1062,8 +1086,8 @@ svfloat32_t test_svread_ver_za128_f32_1(svfloat32_t zd, svbool_t pg, uint32_t sl // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv2f64( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) // CHECK-NEXT: ret [[TMP1]] // -svfloat64_t test_svread_ver_za128_f64(svfloat64_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_ver_za128, _f64, _m)(zd, pg, 0, slice_base, 0); +svfloat64_t test_svread_ver_za128_f64(svfloat64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { + return SME_ACLE_FUNC(svread_ver_za128, _f64, _m)(zd, pg, 0, slice_base); } // CHECK-C-LABEL: @test_svread_ver_za128_f64_1( @@ -1073,6 +1097,6 @@ svfloat64_t test_svread_ver_za128_f64(svfloat64_t zd, svbool_t pg, uint32_t slic // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv2f64( [[ZD:%.*]], [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]]) // CHECK-NEXT: ret [[TMP1]] // -svfloat64_t test_svread_ver_za128_f64_1(svfloat64_t zd, svbool_t pg, uint32_t slice_base) { - return SME_ACLE_FUNC(svread_ver_za128, _f64, _m)(zd, pg, 15, slice_base, 0); +svfloat64_t test_svread_ver_za128_f64_1(svfloat64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { + return SME_ACLE_FUNC(svread_ver_za128, _f64, _m)(zd, pg, 15, slice_base); } diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_st1.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_st1.c index 067745f7d4a0592cad054bfaf2be086fbdd09d26..6b0dd67a16fb6f0128f31a893195a994ab196a3c 100644 --- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_st1.c +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_st1.c @@ -1,15 +1,9 @@ // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -DDISABLE_SME_ATTRIBUTES -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C -// RUN: %clang_cc1 -DDISABLE_SME_ATTRIBUTES -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX -// RUN: %clang_cc1 -DDISABLE_SME_ATTRIBUTES -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -S -O1 -Werror -o /dev/null %s -#include - -#ifdef DISABLE_SME_ATTRIBUTES -#define ARM_STREAMING_ATTR -#else -#define ARM_STREAMING_ATTR __attribute__((arm_streaming)) -#endif +#include // CHECK-C-LABEL: @test_svst1_hor_za8( // CHECK-CXX-LABEL: @_Z18test_svst1_hor_za8ju10__SVBool_tPv( @@ -19,9 +13,9 @@ // CHECK-NEXT: tail call void @llvm.aarch64.sme.st1b.horiz( [[PG]], [[PTRTY]] [[PTR]], i32 0, i32 [[TILESLICE1]]) // CHECK-NEXT: ret void // -ARM_STREAMING_ATTR void test_svst1_hor_za8(uint32_t slice_base, svbool_t pg, void *ptr) { - svst1_hor_za8(0, slice_base, 0, pg, ptr); - svst1_hor_za8(0, slice_base, 15, pg, ptr); +void test_svst1_hor_za8(uint32_t slice_base, svbool_t pg, void *ptr) __arm_streaming __arm_in("za") { + svst1_hor_za8(0, slice_base, pg, ptr); + svst1_hor_za8(0, slice_base + 15, pg, ptr); } // CHECK-C-LABEL: @test_svst1_hor_za16( @@ -33,9 +27,9 @@ ARM_STREAMING_ATTR void test_svst1_hor_za8(uint32_t slice_base, svbool_t pg, voi // CHECK-NEXT: tail call void @llvm.aarch64.sme.st1h.horiz( [[TMP0]], [[PTRTY]] [[PTR]], i32 1, i32 [[TILESLICE1]]) // CHECK-NEXT: ret void // -ARM_STREAMING_ATTR void test_svst1_hor_za16(uint32_t slice_base, svbool_t pg, void *ptr) { - svst1_hor_za16(0, slice_base, 0, pg, ptr); - svst1_hor_za16(1, slice_base, 7, pg, ptr); +void test_svst1_hor_za16(uint32_t slice_base, svbool_t pg, void *ptr) __arm_streaming __arm_in("za") { + svst1_hor_za16(0, slice_base, pg, ptr); + svst1_hor_za16(1, slice_base + 7, pg, ptr); } // CHECK-C-LABEL: @test_svst1_hor_za32( @@ -47,9 +41,9 @@ ARM_STREAMING_ATTR void test_svst1_hor_za16(uint32_t slice_base, svbool_t pg, vo // CHECK-NEXT: tail call void @llvm.aarch64.sme.st1w.horiz( [[TMP0]], [[PTRTY]] [[PTR]], i32 3, i32 [[TILESLICE1]]) // CHECK-NEXT: ret void // -ARM_STREAMING_ATTR void test_svst1_hor_za32(uint32_t slice_base, svbool_t pg, void *ptr) { - svst1_hor_za32(0, slice_base, 0, pg, ptr); - svst1_hor_za32(3, slice_base, 3, pg, ptr); +void test_svst1_hor_za32(uint32_t slice_base, svbool_t pg, void *ptr) __arm_streaming __arm_in("za") { + svst1_hor_za32(0, slice_base, pg, ptr); + svst1_hor_za32(3, slice_base + 3, pg, ptr); } // CHECK-C-LABEL: @test_svst1_hor_za64( @@ -61,9 +55,9 @@ ARM_STREAMING_ATTR void test_svst1_hor_za32(uint32_t slice_base, svbool_t pg, vo // CHECK-NEXT: tail call void @llvm.aarch64.sme.st1d.horiz( [[TMP0]], [[PTRTY]] [[PTR]], i32 7, i32 [[TILESLICE1]]) // CHECK-NEXT: ret void // -ARM_STREAMING_ATTR void test_svst1_hor_za64(uint32_t slice_base, svbool_t pg, void *ptr) { - svst1_hor_za64(0, slice_base, 0, pg, ptr); - svst1_hor_za64(7, slice_base, 1, pg, ptr); +void test_svst1_hor_za64(uint32_t slice_base, svbool_t pg, void *ptr) __arm_streaming __arm_in("za") { + svst1_hor_za64(0, slice_base, pg, ptr); + svst1_hor_za64(7, slice_base + 1, pg, ptr); } // CHECK-C-LABEL: @test_svst1_hor_za128( @@ -74,9 +68,9 @@ ARM_STREAMING_ATTR void test_svst1_hor_za64(uint32_t slice_base, svbool_t pg, vo // CHECK-NEXT: tail call void @llvm.aarch64.sme.st1q.horiz( [[TMP0]], [[PTRTY]] [[PTR]], i32 15, i32 [[SLICE_BASE]]) // CHECK-NEXT: ret void // -ARM_STREAMING_ATTR void test_svst1_hor_za128(uint32_t slice_base, svbool_t pg, void *ptr) { - svst1_hor_za128(0, slice_base, 0, pg, ptr); - svst1_hor_za128(15, slice_base, 0, pg, ptr); +void test_svst1_hor_za128(uint32_t slice_base, svbool_t pg, void *ptr) __arm_streaming __arm_in("za") { + svst1_hor_za128(0, slice_base, pg, ptr); + svst1_hor_za128(15, slice_base, pg, ptr); } // CHECK-C-LABEL: @test_svst1_ver_za8( @@ -87,9 +81,9 @@ ARM_STREAMING_ATTR void test_svst1_hor_za128(uint32_t slice_base, svbool_t pg, v // CHECK-NEXT: tail call void @llvm.aarch64.sme.st1b.vert( [[PG]], [[PTRTY]] [[PTR]], i32 0, i32 [[TILESLICE1]]) // CHECK-NEXT: ret void // -ARM_STREAMING_ATTR void test_svst1_ver_za8(uint32_t slice_base, svbool_t pg, void *ptr) { - svst1_ver_za8(0, slice_base, 0, pg, ptr); - svst1_ver_za8(0, slice_base, 15, pg, ptr); +void test_svst1_ver_za8(uint32_t slice_base, svbool_t pg, void *ptr) __arm_streaming __arm_in("za") { + svst1_ver_za8(0, slice_base, pg, ptr); + svst1_ver_za8(0, slice_base + 15, pg, ptr); } // CHECK-C-LABEL: @test_svst1_ver_za16( @@ -101,9 +95,9 @@ ARM_STREAMING_ATTR void test_svst1_ver_za8(uint32_t slice_base, svbool_t pg, voi // CHECK-NEXT: tail call void @llvm.aarch64.sme.st1h.vert( [[TMP0]], [[PTRTY]] [[PTR]], i32 1, i32 [[TILESLICE1]]) // CHECK-NEXT: ret void // -ARM_STREAMING_ATTR void test_svst1_ver_za16(uint32_t slice_base, svbool_t pg, void *ptr) { - svst1_ver_za16(0, slice_base, 0, pg, ptr); - svst1_ver_za16(1, slice_base, 7, pg, ptr); +void test_svst1_ver_za16(uint32_t slice_base, svbool_t pg, void *ptr) __arm_streaming __arm_in("za") { + svst1_ver_za16(0, slice_base, pg, ptr); + svst1_ver_za16(1, slice_base + 7, pg, ptr); } // CHECK-C-LABEL: @test_svst1_ver_za32( @@ -115,9 +109,9 @@ ARM_STREAMING_ATTR void test_svst1_ver_za16(uint32_t slice_base, svbool_t pg, vo // CHECK-NEXT: tail call void @llvm.aarch64.sme.st1w.vert( [[TMP0]], [[PTRTY]] [[PTR]], i32 3, i32 [[TILESLICE1]]) // CHECK-NEXT: ret void // -ARM_STREAMING_ATTR void test_svst1_ver_za32(uint32_t slice_base, svbool_t pg, void *ptr) { - svst1_ver_za32(0, slice_base, 0, pg, ptr); - svst1_ver_za32(3, slice_base, 3, pg, ptr); +void test_svst1_ver_za32(uint32_t slice_base, svbool_t pg, void *ptr) __arm_streaming __arm_in("za") { + svst1_ver_za32(0, slice_base, pg, ptr); + svst1_ver_za32(3, slice_base + 3, pg, ptr); } // CHECK-C-LABEL: @test_svst1_ver_za64( @@ -129,9 +123,9 @@ ARM_STREAMING_ATTR void test_svst1_ver_za32(uint32_t slice_base, svbool_t pg, vo // CHECK-NEXT: tail call void @llvm.aarch64.sme.st1d.vert( [[TMP0]], [[PTRTY]] [[PTR]], i32 7, i32 [[TILESLICE1]]) // CHECK-NEXT: ret void // -ARM_STREAMING_ATTR void test_svst1_ver_za64(uint32_t slice_base, svbool_t pg, void *ptr) { - svst1_ver_za64(0, slice_base, 0, pg, ptr); - svst1_ver_za64(7, slice_base, 1, pg, ptr); +void test_svst1_ver_za64(uint32_t slice_base, svbool_t pg, void *ptr) __arm_streaming __arm_in("za") { + svst1_ver_za64(0, slice_base, pg, ptr); + svst1_ver_za64(7, slice_base + 1, pg, ptr); } // CHECK-C-LABEL: @test_svst1_ver_za128( @@ -142,7 +136,7 @@ ARM_STREAMING_ATTR void test_svst1_ver_za64(uint32_t slice_base, svbool_t pg, vo // CHECK-NEXT: tail call void @llvm.aarch64.sme.st1q.vert( [[TMP0]], [[PTRTY]] [[PTR]], i32 15, i32 [[SLICE_BASE]]) // CHECK-NEXT: ret void // -ARM_STREAMING_ATTR void test_svst1_ver_za128(uint32_t slice_base, svbool_t pg, void *ptr) { - svst1_ver_za128(0, slice_base, 0, pg, ptr); - svst1_ver_za128(15, slice_base, 0, pg, ptr); +void test_svst1_ver_za128(uint32_t slice_base, svbool_t pg, void *ptr) __arm_streaming __arm_in("za") { + svst1_ver_za128(0, slice_base, pg, ptr); + svst1_ver_za128(15, slice_base, pg, ptr); } diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_st1_vnum.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_st1_vnum.c index 4af93ac38dcca7530352e9b59efc16436fc0830b..be79674ea6840c31a456c5ca7e46342f959ee585 100644 --- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_st1_vnum.c +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_st1_vnum.c @@ -1,15 +1,9 @@ // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -DDISABLE_SME_ATTRIBUTES -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C -// RUN: %clang_cc1 -DDISABLE_SME_ATTRIBUTES -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX -// RUN: %clang_cc1 -DDISABLE_SME_ATTRIBUTES -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -S -O1 -Werror -o /dev/null %s -#include - -#ifdef DISABLE_SME_ATTRIBUTES -#define ARM_STREAMING_ATTR -#else -#define ARM_STREAMING_ATTR __attribute__((arm_streaming)) -#endif +#include // CHECK-C-LABEL: @test_svst1_hor_vnum_za8( // CHECK-CXX-LABEL: @_Z23test_svst1_hor_vnum_za8ju10__SVBool_tPvl( @@ -22,9 +16,9 @@ // CHECK-NEXT: tail call void @llvm.aarch64.sme.st1b.horiz( [[PG]], [[PTRTY]] [[TMP1]], i32 0, i32 [[TILESLICE2]]) // CHECK-NEXT: ret void // -ARM_STREAMING_ATTR void test_svst1_hor_vnum_za8(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) { - svst1_hor_vnum_za8(0, slice_base, 0, pg, ptr, vnum); - svst1_hor_vnum_za8(0, slice_base, 15, pg, ptr, vnum); +void test_svst1_hor_vnum_za8(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming __arm_in("za") { + svst1_hor_vnum_za8(0, slice_base, pg, ptr, vnum); + svst1_hor_vnum_za8(0, slice_base + 15, pg, ptr, vnum); } // CHECK-C-LABEL: @test_svst1_hor_vnum_za16( @@ -39,9 +33,9 @@ ARM_STREAMING_ATTR void test_svst1_hor_vnum_za8(uint32_t slice_base, svbool_t pg // CHECK-NEXT: tail call void @llvm.aarch64.sme.st1h.horiz( [[TMP0]], [[PTRTY]] [[TMP2]], i32 1, i32 [[TILESLICE2]]) // CHECK-NEXT: ret void // -ARM_STREAMING_ATTR void test_svst1_hor_vnum_za16(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) { - svst1_hor_vnum_za16(0, slice_base, 0, pg, ptr, vnum); - svst1_hor_vnum_za16(1, slice_base, 7, pg, ptr, vnum); +void test_svst1_hor_vnum_za16(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming __arm_in("za") { + svst1_hor_vnum_za16(0, slice_base, pg, ptr, vnum); + svst1_hor_vnum_za16(1, slice_base + 7, pg, ptr, vnum); } // CHECK-C-LABEL: @test_svst1_hor_vnum_za32( @@ -56,9 +50,9 @@ ARM_STREAMING_ATTR void test_svst1_hor_vnum_za16(uint32_t slice_base, svbool_t p // CHECK-NEXT: tail call void @llvm.aarch64.sme.st1w.horiz( [[TMP0]], [[PTRTY]] [[TMP2]], i32 3, i32 [[TILESLICE2]]) // CHECK-NEXT: ret void // -ARM_STREAMING_ATTR void test_svst1_hor_vnum_za32(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) { - svst1_hor_vnum_za32(0, slice_base, 0, pg, ptr, vnum); - svst1_hor_vnum_za32(3, slice_base, 3, pg, ptr, vnum); +void test_svst1_hor_vnum_za32(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming __arm_in("za") { + svst1_hor_vnum_za32(0, slice_base, pg, ptr, vnum); + svst1_hor_vnum_za32(3, slice_base + 3, pg, ptr, vnum); } // CHECK-C-LABEL: @test_svst1_hor_vnum_za64( @@ -73,9 +67,9 @@ ARM_STREAMING_ATTR void test_svst1_hor_vnum_za32(uint32_t slice_base, svbool_t p // CHECK-NEXT: tail call void @llvm.aarch64.sme.st1d.horiz( [[TMP0]], [[PTRTY]] [[TMP2]], i32 7, i32 [[TILESLICE2]]) // CHECK-NEXT: ret void // -ARM_STREAMING_ATTR void test_svst1_hor_vnum_za64(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) { - svst1_hor_vnum_za64(0, slice_base, 0, pg, ptr, vnum); - svst1_hor_vnum_za64(7, slice_base, 1, pg, ptr, vnum); +void test_svst1_hor_vnum_za64(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming __arm_in("za") { + svst1_hor_vnum_za64(0, slice_base, pg, ptr, vnum); + svst1_hor_vnum_za64(7, slice_base + 1, pg, ptr, vnum); } // CHECK-C-LABEL: @test_svst1_hor_vnum_za128( @@ -89,9 +83,9 @@ ARM_STREAMING_ATTR void test_svst1_hor_vnum_za64(uint32_t slice_base, svbool_t p // CHECK-NEXT: tail call void @llvm.aarch64.sme.st1q.horiz( [[TMP0]], [[PTRTY]] [[TMP2]], i32 15, i32 [[SLICE_BASE]]) // CHECK-NEXT: ret void // -ARM_STREAMING_ATTR void test_svst1_hor_vnum_za128(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) { - svst1_hor_vnum_za128(0, slice_base, 0, pg, ptr, vnum); - svst1_hor_vnum_za128(15, slice_base, 0, pg, ptr, vnum); +void test_svst1_hor_vnum_za128(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming __arm_in("za") { + svst1_hor_vnum_za128(0, slice_base, pg, ptr, vnum); + svst1_hor_vnum_za128(15, slice_base, pg, ptr, vnum); } // CHECK-C-LABEL: @test_svst1_ver_vnum_za8( @@ -105,9 +99,9 @@ ARM_STREAMING_ATTR void test_svst1_hor_vnum_za128(uint32_t slice_base, svbool_t // CHECK-NEXT: tail call void @llvm.aarch64.sme.st1b.vert( [[PG]], [[PTRTY]] [[TMP1]], i32 0, i32 [[TILESLICE2]]) // CHECK-NEXT: ret void // -ARM_STREAMING_ATTR void test_svst1_ver_vnum_za8(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) { - svst1_ver_vnum_za8(0, slice_base, 0, pg, ptr, vnum); - svst1_ver_vnum_za8(0, slice_base, 15, pg, ptr, vnum); +void test_svst1_ver_vnum_za8(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming __arm_in("za") { + svst1_ver_vnum_za8(0, slice_base, pg, ptr, vnum); + svst1_ver_vnum_za8(0, slice_base + 15, pg, ptr, vnum); } // CHECK-C-LABEL: @test_svst1_ver_vnum_za16( @@ -122,9 +116,9 @@ ARM_STREAMING_ATTR void test_svst1_ver_vnum_za8(uint32_t slice_base, svbool_t pg // CHECK-NEXT: tail call void @llvm.aarch64.sme.st1h.vert( [[TMP0]], [[PTRTY]] [[TMP2]], i32 1, i32 [[TILESLICE2]]) // CHECK-NEXT: ret void // -ARM_STREAMING_ATTR void test_svst1_ver_vnum_za16(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) { - svst1_ver_vnum_za16(0, slice_base, 0, pg, ptr, vnum); - svst1_ver_vnum_za16(1, slice_base, 7, pg, ptr, vnum); +void test_svst1_ver_vnum_za16(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming __arm_in("za") { + svst1_ver_vnum_za16(0, slice_base, pg, ptr, vnum); + svst1_ver_vnum_za16(1, slice_base + 7, pg, ptr, vnum); } // CHECK-C-LABEL: @test_svst1_ver_vnum_za32( @@ -139,9 +133,9 @@ ARM_STREAMING_ATTR void test_svst1_ver_vnum_za16(uint32_t slice_base, svbool_t p // CHECK-NEXT: tail call void @llvm.aarch64.sme.st1w.vert( [[TMP0]], [[PTRTY]] [[TMP2]], i32 3, i32 [[TILESLICE2]]) // CHECK-NEXT: ret void // -ARM_STREAMING_ATTR void test_svst1_ver_vnum_za32(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) { - svst1_ver_vnum_za32(0, slice_base, 0, pg, ptr, vnum); - svst1_ver_vnum_za32(3, slice_base, 3, pg, ptr, vnum); +void test_svst1_ver_vnum_za32(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming __arm_in("za") { + svst1_ver_vnum_za32(0, slice_base, pg, ptr, vnum); + svst1_ver_vnum_za32(3, slice_base + 3, pg, ptr, vnum); } // CHECK-C-LABEL: @test_svst1_ver_vnum_za64( @@ -156,9 +150,9 @@ ARM_STREAMING_ATTR void test_svst1_ver_vnum_za32(uint32_t slice_base, svbool_t p // CHECK-NEXT: tail call void @llvm.aarch64.sme.st1d.vert( [[TMP0]], [[PTRTY]] [[TMP2]], i32 7, i32 [[TILESLICE2]]) // CHECK-NEXT: ret void // -ARM_STREAMING_ATTR void test_svst1_ver_vnum_za64(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) { - svst1_ver_vnum_za64(0, slice_base, 0, pg, ptr, vnum); - svst1_ver_vnum_za64(7, slice_base, 1, pg, ptr, vnum); +void test_svst1_ver_vnum_za64(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming __arm_in("za") { + svst1_ver_vnum_za64(0, slice_base, pg, ptr, vnum); + svst1_ver_vnum_za64(7, slice_base + 1, pg, ptr, vnum); } // CHECK-C-LABEL: @test_svst1_ver_vnum_za128( @@ -172,7 +166,7 @@ ARM_STREAMING_ATTR void test_svst1_ver_vnum_za64(uint32_t slice_base, svbool_t p // CHECK-NEXT: tail call void @llvm.aarch64.sme.st1q.vert( [[TMP0]], [[PTRTY]] [[TMP2]], i32 15, i32 [[SLICE_BASE]]) // CHECK-NEXT: ret void // -ARM_STREAMING_ATTR void test_svst1_ver_vnum_za128(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) { - svst1_ver_vnum_za128(0, slice_base, 0, pg, ptr, vnum); - svst1_ver_vnum_za128(15, slice_base, 0, pg, ptr, vnum); +void test_svst1_ver_vnum_za128(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming __arm_in("za") { + svst1_ver_vnum_za128(0, slice_base, pg, ptr, vnum); + svst1_ver_vnum_za128(15, slice_base, pg, ptr, vnum); } diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_state_funs.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_state_funs.c new file mode 100644 index 0000000000000000000000000000000000000000..dc07efbb8160386fbc3a3dab98e7cca232614943 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_state_funs.c @@ -0,0 +1,72 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s + +#include + +// CHECK-LABEL: @test_in_streaming_mode( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call aarch64_sme_preservemost_from_x2 { i64, i64 } @__arm_sme_state() #[[ATTR3:[0-9]+]] +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { i64, i64 } [[TMP0]], 0 +// CHECK-NEXT: [[AND_I:%.*]] = and i64 [[TMP1]], 1 +// CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp ne i64 [[AND_I]], 0 +// CHECK-NEXT: ret i1 [[TOBOOL_I]] +// +// CPP-CHECK-LABEL: @_Z22test_in_streaming_modev( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call aarch64_sme_preservemost_from_x2 { i64, i64 } @__arm_sme_state() #[[ATTR3:[0-9]+]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { i64, i64 } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[AND_I:%.*]] = and i64 [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp ne i64 [[AND_I]], 0 +// CPP-CHECK-NEXT: ret i1 [[TOBOOL_I]] +// +bool test_in_streaming_mode(void) __arm_streaming_compatible { + return __arm_in_streaming_mode(); +} + +// CHECK-LABEL: @test_za_disable( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @__arm_za_disable() #[[ATTR4:[0-9]+]] +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z15test_za_disablev( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @__arm_za_disable() #[[ATTR4:[0-9]+]] +// CPP-CHECK-NEXT: ret void +// +void test_za_disable(void) __arm_streaming_compatible { + __arm_za_disable(); +} + +// CHECK-LABEL: @test_has_sme( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call aarch64_sme_preservemost_from_x2 { i64, i64 } @__arm_sme_state() #[[ATTR3]] +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { i64, i64 } [[TMP0]], 0 +// CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp slt i64 [[TMP1]], 0 +// CHECK-NEXT: ret i1 [[TOBOOL_I]] +// +// CPP-CHECK-LABEL: @_Z12test_has_smev( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call aarch64_sme_preservemost_from_x2 { i64, i64 } @__arm_sme_state() #[[ATTR3]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { i64, i64 } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp slt i64 [[TMP1]], 0 +// CPP-CHECK-NEXT: ret i1 [[TOBOOL_I]] +// +bool test_has_sme(void) __arm_streaming_compatible { + return __arm_has_sme(); +} + +// CHECK-LABEL: @test_svundef_za( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z15test_svundef_zav( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: ret void +// +void test_svundef_za(void) __arm_streaming_compatible __arm_out("za") { + svundef_za(); +} + diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_str.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_str.c index 12aa298858a18eb27838c98c2ade1a3d1cc36322..caac3ea6596f3e4f13ba95f3d794e91a6606eb3a 100644 --- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_str.c +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_str.c @@ -1,30 +1,57 @@ // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -S -O1 -Werror -o /dev/null %s -#include +#include // CHECK-C-LABEL: @test_svstr_vnum_za( // CHECK-CXX-LABEL: @_Z18test_svstr_vnum_zajPv( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.aarch64.sme.str(i32 [[SLICE_BASE:%.*]], ptr [[PTR:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.str(i32 [[SLICE_BASE:%.*]], ptr [[PTR:%.*]], i32 0) // CHECK-NEXT: ret void // -void test_svstr_vnum_za(uint32_t slice_base, void *ptr) { - svstr_vnum_za(slice_base, 0, ptr); +void test_svstr_vnum_za(uint32_t slice_base, void *ptr) __arm_in("za") { + svstr_vnum_za(slice_base, ptr, 0); } // CHECK-C-LABEL: @test_svstr_vnum_za_1( // CHECK-CXX-LABEL: @_Z20test_svstr_vnum_za_1jPv( // CHECK-NEXT: entry: -// CHECK-NEXT: [[SVLB:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() -// CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[SVLB]], 15 -// CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[PTR:%.*]], i64 [[MULVL]] -// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 15 -// CHECK-NEXT: tail call void @llvm.aarch64.sme.str(i32 [[TILESLICE]], ptr [[TMP0]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.str(i32 [[SLICE_BASE:%.*]], ptr [[PTR:%.*]], i32 15) // CHECK-NEXT: ret void // -void test_svstr_vnum_za_1(uint32_t slice_base, void *ptr) { - svstr_vnum_za(slice_base, 15, ptr); +void test_svstr_vnum_za_1(uint32_t slice_base, void *ptr) __arm_in("za") { + svstr_vnum_za(slice_base, ptr, 15); +} + +// CHECK-C-LABEL: @test_svstr_za( +// CHECK-CXX-LABEL: @_Z13test_svstr_zajPv( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.str(i32 [[SLICE_BASE:%.*]], ptr [[PTR:%.*]], i32 0) +// CHECK-NEXT: ret void +// +void test_svstr_za(uint32_t slice_base, void *ptr) __arm_in("za") { + svstr_za(slice_base, ptr); +} + +// CHECK-C-LABEL: @test_svstr_vnum_za_var( +// CHECK-CXX-LABEL: @_Z22test_svstr_vnum_za_varjPvl( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[VNUM:%.*]] to i32 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.str(i32 [[SLICE_BASE:%.*]], ptr [[PTR:%.*]], i32 [[TMP0:%.*]]) +// CHECK-NEXT: ret void +// +void test_svstr_vnum_za_var(uint32_t slice_base, void *ptr, int64_t vnum) __arm_in("za") { + svstr_vnum_za(slice_base, ptr, vnum); +} + +// CHECK-C-LABEL: @test_svstr_vnum_za_2( +// CHECK-CXX-LABEL: @_Z20test_svstr_vnum_za_2jPv( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.str(i32 [[SLICE_BASE:%.*]], ptr [[PTR:%.*]], i32 16) +// CHECK-NEXT: ret void +// +void test_svstr_vnum_za_2(uint32_t slice_base, void *ptr) __arm_in("za") { + svstr_vnum_za(slice_base, ptr, 16); } diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_write.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_write.c index 66e4550294ac7ae09881d88ce2c2a547cf279e59..c76be10518c0dd9ea7254fce55688e7352e15e9b 100644 --- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_write.c +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_write.c @@ -1,11 +1,11 @@ // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX -// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C -// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -S -O1 -Werror -o /dev/null %s -#include +#include #ifdef SME_OVERLOADED_FORMS #define SME_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3 @@ -19,8 +19,8 @@ // CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv16i8(i32 0, i32 [[SLICE_BASE:%.*]], [[PG:%.*]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svwrite_hor_za8_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) { - SME_ACLE_FUNC(svwrite_hor_za8, _s8, _m)(0, slice_base, 0, pg, zn); +void test_svwrite_hor_za8_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svwrite_hor_za8, _s8, _m)(0, slice_base, pg, zn); } // CHECK-C-LABEL: @test_svwrite_hor_za8_s8_1( @@ -30,8 +30,9 @@ void test_svwrite_hor_za8_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) { // CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv16i8(i32 0, i32 [[TILESLICE]], [[PG:%.*]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svwrite_hor_za8_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) { - SME_ACLE_FUNC(svwrite_hor_za8, _s8, _m)(0, slice_base, 15, pg, zn); +void test_svwrite_hor_za8_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) __arm_streaming __arm_inout("za") { + uint32_t slice = slice_base + 15; + SME_ACLE_FUNC(svwrite_hor_za8, _s8, _m)(0, slice, pg, zn); } // CHECK-C-LABEL: @test_svwrite_hor_za16_s16( @@ -41,8 +42,8 @@ void test_svwrite_hor_za8_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) { // CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8i16(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svwrite_hor_za16_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) { - SME_ACLE_FUNC(svwrite_hor_za16, _s16, _m)(0, slice_base, 0, pg, zn); +void test_svwrite_hor_za16_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svwrite_hor_za16, _s16, _m)(0, slice_base, pg, zn); } // CHECK-C-LABEL: @test_svwrite_hor_za16_s16_1( @@ -53,8 +54,9 @@ void test_svwrite_hor_za16_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) { // CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8i16(i32 1, i32 [[TILESLICE]], [[TMP0]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svwrite_hor_za16_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn) { - SME_ACLE_FUNC(svwrite_hor_za16, _s16, _m)(1, slice_base, 7, pg, zn); +void test_svwrite_hor_za16_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn) __arm_streaming __arm_inout("za") { + uint32_t slice = slice_base + 7; + SME_ACLE_FUNC(svwrite_hor_za16, _s16, _m)(1, slice, pg, zn); } // CHECK-C-LABEL: @test_svwrite_hor_za32_s32( @@ -64,8 +66,8 @@ void test_svwrite_hor_za16_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn) // CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv4i32(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svwrite_hor_za32_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) { - SME_ACLE_FUNC(svwrite_hor_za32, _s32, _m)(0, slice_base, 0, pg, zn); +void test_svwrite_hor_za32_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svwrite_hor_za32, _s32, _m)(0, slice_base, pg, zn); } // CHECK-C-LABEL: @test_svwrite_hor_za32_s32_1( @@ -76,8 +78,9 @@ void test_svwrite_hor_za32_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) { // CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv4i32(i32 3, i32 [[TILESLICE]], [[TMP0]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svwrite_hor_za32_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn) { - SME_ACLE_FUNC(svwrite_hor_za32, _s32, _m)(3, slice_base, 3, pg, zn); +void test_svwrite_hor_za32_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn) __arm_streaming __arm_inout("za") { + uint32_t slice = slice_base + 3; + SME_ACLE_FUNC(svwrite_hor_za32, _s32, _m)(3, slice, pg, zn); } // CHECK-C-LABEL: @test_svwrite_hor_za64_s64( @@ -87,8 +90,8 @@ void test_svwrite_hor_za32_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn) // CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv2i64(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svwrite_hor_za64_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) { - SME_ACLE_FUNC(svwrite_hor_za64, _s64, _m)(0, slice_base, 0, pg, zn); +void test_svwrite_hor_za64_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svwrite_hor_za64, _s64, _m)(0, slice_base, pg, zn); } // CHECK-C-LABEL: @test_svwrite_hor_za64_s64_1( @@ -99,8 +102,9 @@ void test_svwrite_hor_za64_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) { // CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv2i64(i32 7, i32 [[TILESLICE]], [[TMP0]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svwrite_hor_za64_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn) { - SME_ACLE_FUNC(svwrite_hor_za64, _s64, _m)(7, slice_base, 1, pg, zn); +void test_svwrite_hor_za64_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn) __arm_streaming __arm_inout("za") { + uint32_t slice = slice_base + 1; + SME_ACLE_FUNC(svwrite_hor_za64, _s64, _m)(7, slice, pg, zn); } // CHECK-C-LABEL: @test_svwrite_hor_za8_u8( @@ -109,8 +113,8 @@ void test_svwrite_hor_za64_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn) // CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv16i8(i32 0, i32 [[SLICE_BASE:%.*]], [[PG:%.*]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svwrite_hor_za8_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) { - SME_ACLE_FUNC(svwrite_hor_za8, _u8, _m)(0, slice_base, 0, pg, zn); +void test_svwrite_hor_za8_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svwrite_hor_za8, _u8, _m)(0, slice_base, pg, zn); } // CHECK-C-LABEL: @test_svwrite_hor_za8_u8_1( @@ -120,8 +124,9 @@ void test_svwrite_hor_za8_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) { // CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv16i8(i32 0, i32 [[TILESLICE]], [[PG:%.*]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svwrite_hor_za8_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) { - SME_ACLE_FUNC(svwrite_hor_za8, _u8, _m)(0, slice_base, 15, pg, zn); +void test_svwrite_hor_za8_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) __arm_streaming __arm_inout("za") { + uint32_t slice = slice_base + 15; + SME_ACLE_FUNC(svwrite_hor_za8, _u8, _m)(0, slice, pg, zn); } // CHECK-C-LABEL: @test_svwrite_hor_za16_u16( @@ -131,8 +136,8 @@ void test_svwrite_hor_za8_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) { // CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8i16(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svwrite_hor_za16_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn) { - SME_ACLE_FUNC(svwrite_hor_za16, _u16, _m)(0, slice_base, 0, pg, zn); +void test_svwrite_hor_za16_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svwrite_hor_za16, _u16, _m)(0, slice_base, pg, zn); } // CHECK-C-LABEL: @test_svwrite_hor_za16_u16_1( @@ -143,8 +148,9 @@ void test_svwrite_hor_za16_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn) // CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8i16(i32 1, i32 [[TILESLICE]], [[TMP0]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svwrite_hor_za16_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t zn) { - SME_ACLE_FUNC(svwrite_hor_za16, _u16, _m)(1, slice_base, 7, pg, zn); +void test_svwrite_hor_za16_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t zn) __arm_streaming __arm_inout("za") { + uint32_t slice = slice_base + 7; + SME_ACLE_FUNC(svwrite_hor_za16, _u16, _m)(1, slice, pg, zn); } // CHECK-C-LABEL: @test_svwrite_hor_za32_u32( @@ -154,8 +160,8 @@ void test_svwrite_hor_za16_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t zn // CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv4i32(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svwrite_hor_za32_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn) { - SME_ACLE_FUNC(svwrite_hor_za32, _u32, _m)(0, slice_base, 0, pg, zn); +void test_svwrite_hor_za32_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svwrite_hor_za32, _u32, _m)(0, slice_base, pg, zn); } // CHECK-C-LABEL: @test_svwrite_hor_za32_u32_1( @@ -166,8 +172,9 @@ void test_svwrite_hor_za32_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn) // CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv4i32(i32 3, i32 [[TILESLICE]], [[TMP0]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svwrite_hor_za32_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t zn) { - SME_ACLE_FUNC(svwrite_hor_za32, _u32, _m)(3, slice_base, 3, pg, zn); +void test_svwrite_hor_za32_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t zn) __arm_streaming __arm_inout("za") { + uint32_t slice = slice_base + 3; + SME_ACLE_FUNC(svwrite_hor_za32, _u32, _m)(3, slice, pg, zn); } // CHECK-C-LABEL: @test_svwrite_hor_za64_u64( @@ -177,8 +184,8 @@ void test_svwrite_hor_za32_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t zn // CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv2i64(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svwrite_hor_za64_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn) { - SME_ACLE_FUNC(svwrite_hor_za64, _u64, _m)(0, slice_base, 0, pg, zn); +void test_svwrite_hor_za64_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svwrite_hor_za64, _u64, _m)(0, slice_base, pg, zn); } // CHECK-C-LABEL: @test_svwrite_hor_za64_u64_1( @@ -189,8 +196,9 @@ void test_svwrite_hor_za64_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn) // CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv2i64(i32 7, i32 [[TILESLICE]], [[TMP0]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svwrite_hor_za64_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t zn) { - SME_ACLE_FUNC(svwrite_hor_za64, _u64, _m)(7, slice_base, 1, pg, zn); +void test_svwrite_hor_za64_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t zn) __arm_streaming __arm_inout("za") { + uint32_t slice = slice_base + 1; + SME_ACLE_FUNC(svwrite_hor_za64, _u64, _m)(7, slice, pg, zn); } // CHECK-C-LABEL: @test_svwrite_hor_za16_f16( @@ -200,8 +208,8 @@ void test_svwrite_hor_za64_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t zn // CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8f16(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svwrite_hor_za16_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn) { - SME_ACLE_FUNC(svwrite_hor_za16, _f16, _m)(0, slice_base, 0, pg, zn); +void test_svwrite_hor_za16_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svwrite_hor_za16, _f16, _m)(0, slice_base, pg, zn); } // CHECK-C-LABEL: @test_svwrite_hor_za16_f16_1( @@ -212,8 +220,9 @@ void test_svwrite_hor_za16_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn) // CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8f16(i32 1, i32 [[TILESLICE]], [[TMP0]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svwrite_hor_za16_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t zn) { - SME_ACLE_FUNC(svwrite_hor_za16, _f16, _m)(1, slice_base, 7, pg, zn); +void test_svwrite_hor_za16_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t zn) __arm_streaming __arm_inout("za") { + uint32_t slice = slice_base + 7; + SME_ACLE_FUNC(svwrite_hor_za16, _f16, _m)(1, slice, pg, zn); } // CHECK-C-LABEL: @test_svwrite_hor_za16_bf16( @@ -223,8 +232,8 @@ void test_svwrite_hor_za16_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t z // CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8bf16(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svwrite_hor_za16_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) { - SME_ACLE_FUNC(svwrite_hor_za16, _bf16, _m)(0, slice_base, 0, pg, zn); +void test_svwrite_hor_za16_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svwrite_hor_za16, _bf16, _m)(0, slice_base, pg, zn); } // CHECK-C-LABEL: @test_svwrite_hor_za16_bf16_1( @@ -235,8 +244,9 @@ void test_svwrite_hor_za16_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t z // CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8bf16(i32 1, i32 [[TILESLICE]], [[TMP0]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svwrite_hor_za16_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) { - SME_ACLE_FUNC(svwrite_hor_za16, _bf16, _m)(1, slice_base, 7, pg, zn); +void test_svwrite_hor_za16_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) __arm_streaming __arm_inout("za") { + uint32_t slice = slice_base + 7; + SME_ACLE_FUNC(svwrite_hor_za16, _bf16, _m)(1, slice, pg, zn); } // CHECK-C-LABEL: @test_svwrite_hor_za32_f32( @@ -246,8 +256,8 @@ void test_svwrite_hor_za16_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_t // CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv4f32(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svwrite_hor_za32_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn) { - SME_ACLE_FUNC(svwrite_hor_za32, _f32, _m)(0, slice_base, 0, pg, zn); +void test_svwrite_hor_za32_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svwrite_hor_za32, _f32, _m)(0, slice_base, pg, zn); } // CHECK-C-LABEL: @test_svwrite_hor_za32_f32_1( @@ -258,8 +268,9 @@ void test_svwrite_hor_za32_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn) // CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv4f32(i32 3, i32 [[TILESLICE]], [[TMP0]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svwrite_hor_za32_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t zn) { - SME_ACLE_FUNC(svwrite_hor_za32, _f32, _m)(3, slice_base, 3, pg, zn); +void test_svwrite_hor_za32_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t zn) __arm_streaming __arm_inout("za") { + uint32_t slice = slice_base + 3; + SME_ACLE_FUNC(svwrite_hor_za32, _f32, _m)(3, slice, pg, zn); } // CHECK-C-LABEL: @test_svwrite_hor_za64_f64( @@ -269,8 +280,8 @@ void test_svwrite_hor_za32_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t z // CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv2f64(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svwrite_hor_za64_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn) { - SME_ACLE_FUNC(svwrite_hor_za64, _f64, _m)(0, slice_base, 0, pg, zn); +void test_svwrite_hor_za64_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svwrite_hor_za64, _f64, _m)(0, slice_base, pg, zn); } // CHECK-C-LABEL: @test_svwrite_hor_za64_f64_1( @@ -281,8 +292,9 @@ void test_svwrite_hor_za64_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn) // CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv2f64(i32 7, i32 [[TILESLICE]], [[TMP0]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svwrite_hor_za64_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t zn) { - SME_ACLE_FUNC(svwrite_hor_za64, _f64, _m)(7, slice_base, 1, pg, zn); +void test_svwrite_hor_za64_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t zn) __arm_streaming __arm_inout("za") { + uint32_t slice = slice_base + 1; + SME_ACLE_FUNC(svwrite_hor_za64, _f64, _m)(7, slice, pg, zn); } // CHECK-C-LABEL: @test_svwrite_hor_za128_s8( @@ -291,8 +303,8 @@ void test_svwrite_hor_za64_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t z // CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv16i8(i32 0, i32 [[SLICE_BASE:%.*]], [[PG:%.*]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svwrite_hor_za128_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) { - SME_ACLE_FUNC(svwrite_hor_za128, _s8, _m)(0, slice_base, 0, pg, zn); +void test_svwrite_hor_za128_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svwrite_hor_za128, _s8, _m)(0, slice_base, pg, zn); } // CHECK-C-LABEL: @test_svwrite_hor_za128_s8_1( @@ -301,8 +313,8 @@ void test_svwrite_hor_za128_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) { // CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv16i8(i32 15, i32 [[SLICE_BASE:%.*]], [[PG:%.*]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svwrite_hor_za128_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) { - SME_ACLE_FUNC(svwrite_hor_za128, _s8, _m)(15, slice_base, 0, pg, zn); +void test_svwrite_hor_za128_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svwrite_hor_za128, _s8, _m)(15, slice_base, pg, zn); } // CHECK-C-LABEL: @test_svwrite_hor_za128_s16( @@ -312,8 +324,8 @@ void test_svwrite_hor_za128_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) // CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8i16(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svwrite_hor_za128_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) { - SME_ACLE_FUNC(svwrite_hor_za128, _s16, _m)(0, slice_base, 0, pg, zn); +void test_svwrite_hor_za128_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svwrite_hor_za128, _s16, _m)(0, slice_base, pg, zn); } // CHECK-C-LABEL: @test_svwrite_hor_za128_s16_1( @@ -323,8 +335,8 @@ void test_svwrite_hor_za128_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) // CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8i16(i32 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svwrite_hor_za128_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn) { - SME_ACLE_FUNC(svwrite_hor_za128, _s16, _m)(15, slice_base, 0, pg, zn); +void test_svwrite_hor_za128_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svwrite_hor_za128, _s16, _m)(15, slice_base, pg, zn); } // CHECK-C-LABEL: @test_svwrite_hor_za128_s32( @@ -334,8 +346,8 @@ void test_svwrite_hor_za128_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn // CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv4i32(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svwrite_hor_za128_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) { - SME_ACLE_FUNC(svwrite_hor_za128, _s32, _m)(0, slice_base, 0, pg, zn); +void test_svwrite_hor_za128_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svwrite_hor_za128, _s32, _m)(0, slice_base, pg, zn); } // CHECK-C-LABEL: @test_svwrite_hor_za128_s32_1( @@ -345,8 +357,8 @@ void test_svwrite_hor_za128_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) // CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv4i32(i32 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svwrite_hor_za128_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn) { - SME_ACLE_FUNC(svwrite_hor_za128, _s32, _m)(15, slice_base, 0, pg, zn); +void test_svwrite_hor_za128_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svwrite_hor_za128, _s32, _m)(15, slice_base, pg, zn); } // CHECK-C-LABEL: @test_svwrite_hor_za128_s64( @@ -356,8 +368,8 @@ void test_svwrite_hor_za128_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn // CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv2i64(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svwrite_hor_za128_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) { - SME_ACLE_FUNC(svwrite_hor_za128, _s64, _m)(0, slice_base, 0, pg, zn); +void test_svwrite_hor_za128_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svwrite_hor_za128, _s64, _m)(0, slice_base, pg, zn); } // CHECK-C-LABEL: @test_svwrite_hor_za128_s64_1( @@ -367,8 +379,8 @@ void test_svwrite_hor_za128_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) // CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv2i64(i32 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svwrite_hor_za128_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn) { - SME_ACLE_FUNC(svwrite_hor_za128, _s64, _m)(15, slice_base, 0, pg, zn); +void test_svwrite_hor_za128_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svwrite_hor_za128, _s64, _m)(15, slice_base, pg, zn); } // CHECK-C-LABEL: @test_svwrite_hor_za128_u8( @@ -377,8 +389,8 @@ void test_svwrite_hor_za128_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn // CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv16i8(i32 0, i32 [[SLICE_BASE:%.*]], [[PG:%.*]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svwrite_hor_za128_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) { - SME_ACLE_FUNC(svwrite_hor_za128, _u8, _m)(0, slice_base, 0, pg, zn); +void test_svwrite_hor_za128_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svwrite_hor_za128, _u8, _m)(0, slice_base, pg, zn); } // CHECK-C-LABEL: @test_svwrite_hor_za128_u8_1( @@ -387,8 +399,8 @@ void test_svwrite_hor_za128_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) { // CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv16i8(i32 15, i32 [[SLICE_BASE:%.*]], [[PG:%.*]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svwrite_hor_za128_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) { - SME_ACLE_FUNC(svwrite_hor_za128, _u8, _m)(15, slice_base, 0, pg, zn); +void test_svwrite_hor_za128_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svwrite_hor_za128, _u8, _m)(15, slice_base, pg, zn); } // CHECK-C-LABEL: @test_svwrite_hor_za128_u16( @@ -398,8 +410,8 @@ void test_svwrite_hor_za128_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) // CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8i16(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svwrite_hor_za128_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn) { - SME_ACLE_FUNC(svwrite_hor_za128, _u16, _m)(0, slice_base, 0, pg, zn); +void test_svwrite_hor_za128_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svwrite_hor_za128, _u16, _m)(0, slice_base, pg, zn); } // CHECK-C-LABEL: @test_svwrite_hor_za128_u16_1( @@ -409,8 +421,8 @@ void test_svwrite_hor_za128_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn) // CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8i16(i32 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svwrite_hor_za128_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t zn) { - SME_ACLE_FUNC(svwrite_hor_za128, _u16, _m)(15, slice_base, 0, pg, zn); +void test_svwrite_hor_za128_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t zn) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svwrite_hor_za128, _u16, _m)(15, slice_base, pg, zn); } // CHECK-C-LABEL: @test_svwrite_hor_za128_u32( @@ -420,8 +432,8 @@ void test_svwrite_hor_za128_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t z // CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv4i32(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svwrite_hor_za128_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn) { - SME_ACLE_FUNC(svwrite_hor_za128, _u32, _m)(0, slice_base, 0, pg, zn); +void test_svwrite_hor_za128_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svwrite_hor_za128, _u32, _m)(0, slice_base, pg, zn); } // CHECK-C-LABEL: @test_svwrite_hor_za128_u32_1( @@ -431,8 +443,8 @@ void test_svwrite_hor_za128_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn) // CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv4i32(i32 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svwrite_hor_za128_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t zn) { - SME_ACLE_FUNC(svwrite_hor_za128, _u32, _m)(15, slice_base, 0, pg, zn); +void test_svwrite_hor_za128_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t zn) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svwrite_hor_za128, _u32, _m)(15, slice_base, pg, zn); } // CHECK-C-LABEL: @test_svwrite_hor_za128_u64( @@ -442,8 +454,8 @@ void test_svwrite_hor_za128_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t z // CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv2i64(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svwrite_hor_za128_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn) { - SME_ACLE_FUNC(svwrite_hor_za128, _u64, _m)(0, slice_base, 0, pg, zn); +void test_svwrite_hor_za128_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svwrite_hor_za128, _u64, _m)(0, slice_base, pg, zn); } // CHECK-C-LABEL: @test_svwrite_hor_za128_u64_1( @@ -453,8 +465,8 @@ void test_svwrite_hor_za128_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn) // CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv2i64(i32 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svwrite_hor_za128_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t zn) { - SME_ACLE_FUNC(svwrite_hor_za128, _u64, _m)(15, slice_base, 0, pg, zn); +void test_svwrite_hor_za128_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t zn) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svwrite_hor_za128, _u64, _m)(15, slice_base, pg, zn); } // CHECK-C-LABEL: @test_svwrite_hor_za128_f16( @@ -464,8 +476,8 @@ void test_svwrite_hor_za128_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t z // CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8f16(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svwrite_hor_za128_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn) { - SME_ACLE_FUNC(svwrite_hor_za128, _f16, _m)(0, slice_base, 0, pg, zn); +void test_svwrite_hor_za128_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svwrite_hor_za128, _f16, _m)(0, slice_base, pg, zn); } // CHECK-C-LABEL: @test_svwrite_hor_za128_f16_1( @@ -475,8 +487,8 @@ void test_svwrite_hor_za128_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn // CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8f16(i32 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svwrite_hor_za128_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t zn) { - SME_ACLE_FUNC(svwrite_hor_za128, _f16, _m)(15, slice_base, 0, pg, zn); +void test_svwrite_hor_za128_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t zn) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svwrite_hor_za128, _f16, _m)(15, slice_base, pg, zn); } // CHECK-C-LABEL: @test_svwrite_hor_za128_bf16( @@ -486,8 +498,8 @@ void test_svwrite_hor_za128_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t // CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8bf16(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svwrite_hor_za128_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) { - SME_ACLE_FUNC(svwrite_hor_za128, _bf16, _m)(0, slice_base, 0, pg, zn); +void test_svwrite_hor_za128_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svwrite_hor_za128, _bf16, _m)(0, slice_base, pg, zn); } // CHECK-C-LABEL: @test_svwrite_hor_za128_bf16_1( @@ -497,8 +509,8 @@ void test_svwrite_hor_za128_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t // CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8bf16(i32 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svwrite_hor_za128_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) { - SME_ACLE_FUNC(svwrite_hor_za128, _bf16, _m)(15, slice_base, 0, pg, zn); +void test_svwrite_hor_za128_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svwrite_hor_za128, _bf16, _m)(15, slice_base, pg, zn); } // CHECK-C-LABEL: @test_svwrite_hor_za128_f32( @@ -508,8 +520,8 @@ void test_svwrite_hor_za128_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_ // CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv4f32(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svwrite_hor_za128_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn) { - SME_ACLE_FUNC(svwrite_hor_za128, _f32, _m)(0, slice_base, 0, pg, zn); +void test_svwrite_hor_za128_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svwrite_hor_za128, _f32, _m)(0, slice_base, pg, zn); } // CHECK-C-LABEL: @test_svwrite_hor_za128_f32_1( @@ -519,8 +531,8 @@ void test_svwrite_hor_za128_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn // CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv4f32(i32 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svwrite_hor_za128_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t zn) { - SME_ACLE_FUNC(svwrite_hor_za128, _f32, _m)(15, slice_base, 0, pg, zn); +void test_svwrite_hor_za128_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t zn) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svwrite_hor_za128, _f32, _m)(15, slice_base, pg, zn); } // CHECK-C-LABEL: @test_svwrite_hor_za128_f64( @@ -530,8 +542,8 @@ void test_svwrite_hor_za128_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t // CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv2f64(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svwrite_hor_za128_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn) { - SME_ACLE_FUNC(svwrite_hor_za128, _f64, _m)(0, slice_base, 0, pg, zn); +void test_svwrite_hor_za128_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svwrite_hor_za128, _f64, _m)(0, slice_base, pg, zn); } // CHECK-C-LABEL: @test_svwrite_hor_za128_f64_1( @@ -541,8 +553,8 @@ void test_svwrite_hor_za128_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn // CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv2f64(i32 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svwrite_hor_za128_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t zn) { - SME_ACLE_FUNC(svwrite_hor_za128, _f64, _m)(15, slice_base, 0, pg, zn); +void test_svwrite_hor_za128_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t zn) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svwrite_hor_za128, _f64, _m)(15, slice_base, pg, zn); } // CHECK-C-LABEL: @test_svwrite_ver_za8_s8( @@ -551,8 +563,8 @@ void test_svwrite_hor_za128_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t // CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv16i8(i32 0, i32 [[SLICE_BASE:%.*]], [[PG:%.*]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svwrite_ver_za8_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) { - SME_ACLE_FUNC(svwrite_ver_za8, _s8, _m)(0, slice_base, 0, pg, zn); +void test_svwrite_ver_za8_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svwrite_ver_za8, _s8, _m)(0, slice_base, pg, zn); } // CHECK-C-LABEL: @test_svwrite_ver_za8_s8_1( @@ -562,8 +574,9 @@ void test_svwrite_ver_za8_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) { // CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv16i8(i32 0, i32 [[TILESLICE]], [[PG:%.*]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svwrite_ver_za8_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) { - SME_ACLE_FUNC(svwrite_ver_za8, _s8, _m)(0, slice_base, 15, pg, zn); +void test_svwrite_ver_za8_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) __arm_streaming __arm_inout("za") { + uint32_t slice = slice_base + 15; + SME_ACLE_FUNC(svwrite_ver_za8, _s8, _m)(0, slice, pg, zn); } // CHECK-C-LABEL: @test_svwrite_ver_za16_s16( @@ -573,8 +586,8 @@ void test_svwrite_ver_za8_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) { // CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8i16(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svwrite_ver_za16_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) { - SME_ACLE_FUNC(svwrite_ver_za16, _s16, _m)(0, slice_base, 0, pg, zn); +void test_svwrite_ver_za16_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svwrite_ver_za16, _s16, _m)(0, slice_base, pg, zn); } // CHECK-C-LABEL: @test_svwrite_ver_za16_s16_1( @@ -585,8 +598,9 @@ void test_svwrite_ver_za16_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) { // CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8i16(i32 1, i32 [[TILESLICE]], [[TMP0]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svwrite_ver_za16_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn) { - SME_ACLE_FUNC(svwrite_ver_za16, _s16, _m)(1, slice_base, 7, pg, zn); +void test_svwrite_ver_za16_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn) __arm_streaming __arm_inout("za") { + uint32_t slice = slice_base + 7; + SME_ACLE_FUNC(svwrite_ver_za16, _s16, _m)(1, slice, pg, zn); } // CHECK-C-LABEL: @test_svwrite_ver_za32_s32( @@ -596,8 +610,8 @@ void test_svwrite_ver_za16_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn) // CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv4i32(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svwrite_ver_za32_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) { - SME_ACLE_FUNC(svwrite_ver_za32, _s32, _m)(0, slice_base, 0, pg, zn); +void test_svwrite_ver_za32_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svwrite_ver_za32, _s32, _m)(0, slice_base, pg, zn); } // CHECK-C-LABEL: @test_svwrite_ver_za32_s32_1( @@ -608,8 +622,9 @@ void test_svwrite_ver_za32_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) { // CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv4i32(i32 3, i32 [[TILESLICE]], [[TMP0]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svwrite_ver_za32_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn) { - SME_ACLE_FUNC(svwrite_ver_za32, _s32, _m)(3, slice_base, 3, pg, zn); +void test_svwrite_ver_za32_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn) __arm_streaming __arm_inout("za") { + uint32_t slice = slice_base + 3; + SME_ACLE_FUNC(svwrite_ver_za32, _s32, _m)(3, slice, pg, zn); } // CHECK-C-LABEL: @test_svwrite_ver_za64_s64( @@ -619,8 +634,8 @@ void test_svwrite_ver_za32_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn) // CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv2i64(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svwrite_ver_za64_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) { - SME_ACLE_FUNC(svwrite_ver_za64, _s64, _m)(0, slice_base, 0, pg, zn); +void test_svwrite_ver_za64_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svwrite_ver_za64, _s64, _m)(0, slice_base, pg, zn); } // CHECK-C-LABEL: @test_svwrite_ver_za64_s64_1( @@ -631,8 +646,9 @@ void test_svwrite_ver_za64_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) { // CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv2i64(i32 7, i32 [[TILESLICE]], [[TMP0]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svwrite_ver_za64_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn) { - SME_ACLE_FUNC(svwrite_ver_za64, _s64, _m)(7, slice_base, 1, pg, zn); +void test_svwrite_ver_za64_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn) __arm_streaming __arm_inout("za") { + uint32_t slice = slice_base + 1; + SME_ACLE_FUNC(svwrite_ver_za64, _s64, _m)(7, slice, pg, zn); } // CHECK-C-LABEL: @test_svwrite_ver_za8_u8( @@ -641,8 +657,8 @@ void test_svwrite_ver_za64_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn) // CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv16i8(i32 0, i32 [[SLICE_BASE:%.*]], [[PG:%.*]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svwrite_ver_za8_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) { - SME_ACLE_FUNC(svwrite_ver_za8, _u8, _m)(0, slice_base, 0, pg, zn); +void test_svwrite_ver_za8_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svwrite_ver_za8, _u8, _m)(0, slice_base, pg, zn); } // CHECK-C-LABEL: @test_svwrite_ver_za8_u8_1( @@ -652,8 +668,9 @@ void test_svwrite_ver_za8_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) { // CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv16i8(i32 0, i32 [[TILESLICE]], [[PG:%.*]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svwrite_ver_za8_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) { - SME_ACLE_FUNC(svwrite_ver_za8, _u8, _m)(0, slice_base, 15, pg, zn); +void test_svwrite_ver_za8_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) __arm_streaming __arm_inout("za") { + uint32_t slice = slice_base + 15; + SME_ACLE_FUNC(svwrite_ver_za8, _u8, _m)(0, slice, pg, zn); } // CHECK-C-LABEL: @test_svwrite_ver_za16_u16( @@ -663,8 +680,8 @@ void test_svwrite_ver_za8_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) { // CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8i16(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svwrite_ver_za16_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn) { - SME_ACLE_FUNC(svwrite_ver_za16, _u16, _m)(0, slice_base, 0, pg, zn); +void test_svwrite_ver_za16_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svwrite_ver_za16, _u16, _m)(0, slice_base, pg, zn); } // CHECK-C-LABEL: @test_svwrite_ver_za16_u16_1( @@ -675,8 +692,9 @@ void test_svwrite_ver_za16_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn) // CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8i16(i32 1, i32 [[TILESLICE]], [[TMP0]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svwrite_ver_za16_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t zn) { - SME_ACLE_FUNC(svwrite_ver_za16, _u16, _m)(1, slice_base, 7, pg, zn); +void test_svwrite_ver_za16_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t zn) __arm_streaming __arm_inout("za") { + uint32_t slice = slice_base + 7; + SME_ACLE_FUNC(svwrite_ver_za16, _u16, _m)(1, slice, pg, zn); } // CHECK-C-LABEL: @test_svwrite_ver_za32_u32( @@ -686,8 +704,8 @@ void test_svwrite_ver_za16_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t zn // CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv4i32(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svwrite_ver_za32_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn) { - SME_ACLE_FUNC(svwrite_ver_za32, _u32, _m)(0, slice_base, 0, pg, zn); +void test_svwrite_ver_za32_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svwrite_ver_za32, _u32, _m)(0, slice_base, pg, zn); } // CHECK-C-LABEL: @test_svwrite_ver_za32_u32_1( @@ -698,8 +716,9 @@ void test_svwrite_ver_za32_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn) // CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv4i32(i32 3, i32 [[TILESLICE]], [[TMP0]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svwrite_ver_za32_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t zn) { - SME_ACLE_FUNC(svwrite_ver_za32, _u32, _m)(3, slice_base, 3, pg, zn); +void test_svwrite_ver_za32_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t zn) __arm_streaming __arm_inout("za") { + uint32_t slice = slice_base + 3; + SME_ACLE_FUNC(svwrite_ver_za32, _u32, _m)(3, slice, pg, zn); } // CHECK-C-LABEL: @test_svwrite_ver_za64_u64( @@ -709,8 +728,8 @@ void test_svwrite_ver_za32_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t zn // CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv2i64(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svwrite_ver_za64_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn) { - SME_ACLE_FUNC(svwrite_ver_za64, _u64, _m)(0, slice_base, 0, pg, zn); +void test_svwrite_ver_za64_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svwrite_ver_za64, _u64, _m)(0, slice_base, pg, zn); } // CHECK-C-LABEL: @test_svwrite_ver_za64_u64_1( @@ -721,8 +740,9 @@ void test_svwrite_ver_za64_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn) // CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv2i64(i32 7, i32 [[TILESLICE]], [[TMP0]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svwrite_ver_za64_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t zn) { - SME_ACLE_FUNC(svwrite_ver_za64, _u64, _m)(7, slice_base, 1, pg, zn); +void test_svwrite_ver_za64_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t zn) __arm_streaming __arm_inout("za") { + uint32_t slice = slice_base + 1; + SME_ACLE_FUNC(svwrite_ver_za64, _u64, _m)(7, slice, pg, zn); } // CHECK-C-LABEL: @test_svwrite_ver_za16_f16( @@ -732,8 +752,8 @@ void test_svwrite_ver_za64_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t zn // CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8f16(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svwrite_ver_za16_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn) { - SME_ACLE_FUNC(svwrite_ver_za16, _f16, _m)(0, slice_base, 0, pg, zn); +void test_svwrite_ver_za16_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svwrite_ver_za16, _f16, _m)(0, slice_base, pg, zn); } // CHECK-C-LABEL: @test_svwrite_ver_za16_f16_1( @@ -744,8 +764,9 @@ void test_svwrite_ver_za16_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn) // CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8f16(i32 1, i32 [[TILESLICE]], [[TMP0]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svwrite_ver_za16_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t zn) { - SME_ACLE_FUNC(svwrite_ver_za16, _f16, _m)(1, slice_base, 7, pg, zn); +void test_svwrite_ver_za16_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t zn) __arm_streaming __arm_inout("za") { + uint32_t slice = slice_base + 7; + SME_ACLE_FUNC(svwrite_ver_za16, _f16, _m)(1, slice, pg, zn); } // CHECK-C-LABEL: @test_svwrite_ver_za16_bf16( @@ -755,8 +776,8 @@ void test_svwrite_ver_za16_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t z // CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8bf16(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svwrite_ver_za16_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) { - SME_ACLE_FUNC(svwrite_ver_za16, _bf16, _m)(0, slice_base, 0, pg, zn); +void test_svwrite_ver_za16_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svwrite_ver_za16, _bf16, _m)(0, slice_base, pg, zn); } // CHECK-C-LABEL: @test_svwrite_ver_za16_bf16_1( @@ -767,8 +788,9 @@ void test_svwrite_ver_za16_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t z // CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8bf16(i32 1, i32 [[TILESLICE]], [[TMP0]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svwrite_ver_za16_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) { - SME_ACLE_FUNC(svwrite_ver_za16, _bf16, _m)(1, slice_base, 7, pg, zn); +void test_svwrite_ver_za16_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) __arm_streaming __arm_inout("za") { + uint32_t slice = slice_base + 7; + SME_ACLE_FUNC(svwrite_ver_za16, _bf16, _m)(1, slice, pg, zn); } // CHECK-C-LABEL: @test_svwrite_ver_za32_f32( @@ -778,8 +800,8 @@ void test_svwrite_ver_za16_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_t // CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv4f32(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svwrite_ver_za32_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn) { - SME_ACLE_FUNC(svwrite_ver_za32, _f32, _m)(0, slice_base, 0, pg, zn); +void test_svwrite_ver_za32_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svwrite_ver_za32, _f32, _m)(0, slice_base, pg, zn); } // CHECK-C-LABEL: @test_svwrite_ver_za32_f32_1( @@ -790,8 +812,9 @@ void test_svwrite_ver_za32_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn) // CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv4f32(i32 3, i32 [[TILESLICE]], [[TMP0]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svwrite_ver_za32_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t zn) { - SME_ACLE_FUNC(svwrite_ver_za32, _f32, _m)(3, slice_base, 3, pg, zn); +void test_svwrite_ver_za32_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t zn) __arm_streaming __arm_inout("za") { + uint32_t slice = slice_base + 3; + SME_ACLE_FUNC(svwrite_ver_za32, _f32, _m)(3, slice, pg, zn); } // CHECK-C-LABEL: @test_svwrite_ver_za64_f64( @@ -801,8 +824,8 @@ void test_svwrite_ver_za32_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t z // CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv2f64(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svwrite_ver_za64_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn) { - SME_ACLE_FUNC(svwrite_ver_za64, _f64, _m)(0, slice_base, 0, pg, zn); +void test_svwrite_ver_za64_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svwrite_ver_za64, _f64, _m)(0, slice_base, pg, zn); } // CHECK-C-LABEL: @test_svwrite_ver_za64_f64_1( @@ -813,8 +836,9 @@ void test_svwrite_ver_za64_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn) // CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv2f64(i32 7, i32 [[TILESLICE]], [[TMP0]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svwrite_ver_za64_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t zn) { - SME_ACLE_FUNC(svwrite_ver_za64, _f64, _m)(7, slice_base, 1, pg, zn); +void test_svwrite_ver_za64_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t zn) __arm_streaming __arm_inout("za") { + uint32_t slice = slice_base + 1; + SME_ACLE_FUNC(svwrite_ver_za64, _f64, _m)(7, slice, pg, zn); } // CHECK-C-LABEL: @test_svwrite_ver_za128_s8( @@ -823,8 +847,8 @@ void test_svwrite_ver_za64_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t z // CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv16i8(i32 0, i32 [[SLICE_BASE:%.*]], [[PG:%.*]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svwrite_ver_za128_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) { - SME_ACLE_FUNC(svwrite_ver_za128, _s8, _m)(0, slice_base, 0, pg, zn); +void test_svwrite_ver_za128_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svwrite_ver_za128, _s8, _m)(0, slice_base, pg, zn); } // CHECK-C-LABEL: @test_svwrite_ver_za128_s8_1( @@ -833,8 +857,8 @@ void test_svwrite_ver_za128_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) { // CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv16i8(i32 15, i32 [[SLICE_BASE:%.*]], [[PG:%.*]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svwrite_ver_za128_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) { - SME_ACLE_FUNC(svwrite_ver_za128, _s8, _m)(15, slice_base, 0, pg, zn); +void test_svwrite_ver_za128_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svwrite_ver_za128, _s8, _m)(15, slice_base, pg, zn); } // CHECK-C-LABEL: @test_svwrite_ver_za128_s16( @@ -844,8 +868,8 @@ void test_svwrite_ver_za128_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) // CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8i16(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svwrite_ver_za128_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) { - SME_ACLE_FUNC(svwrite_ver_za128, _s16, _m)(0, slice_base, 0, pg, zn); +void test_svwrite_ver_za128_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svwrite_ver_za128, _s16, _m)(0, slice_base, pg, zn); } // CHECK-C-LABEL: @test_svwrite_ver_za128_s16_1( @@ -855,8 +879,8 @@ void test_svwrite_ver_za128_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) // CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8i16(i32 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svwrite_ver_za128_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn) { - SME_ACLE_FUNC(svwrite_ver_za128, _s16, _m)(15, slice_base, 0, pg, zn); +void test_svwrite_ver_za128_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svwrite_ver_za128, _s16, _m)(15, slice_base, pg, zn); } // CHECK-C-LABEL: @test_svwrite_ver_za128_s32( @@ -866,8 +890,8 @@ void test_svwrite_ver_za128_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn // CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv4i32(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svwrite_ver_za128_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) { - SME_ACLE_FUNC(svwrite_ver_za128, _s32, _m)(0, slice_base, 0, pg, zn); +void test_svwrite_ver_za128_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svwrite_ver_za128, _s32, _m)(0, slice_base, pg, zn); } // CHECK-C-LABEL: @test_svwrite_ver_za128_s32_1( @@ -877,8 +901,8 @@ void test_svwrite_ver_za128_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) // CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv4i32(i32 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svwrite_ver_za128_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn) { - SME_ACLE_FUNC(svwrite_ver_za128, _s32, _m)(15, slice_base, 0, pg, zn); +void test_svwrite_ver_za128_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svwrite_ver_za128, _s32, _m)(15, slice_base, pg, zn); } // CHECK-C-LABEL: @test_svwrite_ver_za128_s64( @@ -888,8 +912,8 @@ void test_svwrite_ver_za128_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn // CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv2i64(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svwrite_ver_za128_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) { - SME_ACLE_FUNC(svwrite_ver_za128, _s64, _m)(0, slice_base, 0, pg, zn); +void test_svwrite_ver_za128_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svwrite_ver_za128, _s64, _m)(0, slice_base, pg, zn); } // CHECK-C-LABEL: @test_svwrite_ver_za128_s64_1( @@ -899,8 +923,8 @@ void test_svwrite_ver_za128_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) // CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv2i64(i32 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svwrite_ver_za128_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn) { - SME_ACLE_FUNC(svwrite_ver_za128, _s64, _m)(15, slice_base, 0, pg, zn); +void test_svwrite_ver_za128_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svwrite_ver_za128, _s64, _m)(15, slice_base, pg, zn); } // CHECK-C-LABEL: @test_svwrite_ver_za128_u8( @@ -909,8 +933,8 @@ void test_svwrite_ver_za128_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn // CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv16i8(i32 0, i32 [[SLICE_BASE:%.*]], [[PG:%.*]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svwrite_ver_za128_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) { - SME_ACLE_FUNC(svwrite_ver_za128, _u8, _m)(0, slice_base, 0, pg, zn); +void test_svwrite_ver_za128_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svwrite_ver_za128, _u8, _m)(0, slice_base, pg, zn); } // CHECK-C-LABEL: @test_svwrite_ver_za128_u8_1( @@ -919,8 +943,8 @@ void test_svwrite_ver_za128_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) { // CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv16i8(i32 15, i32 [[SLICE_BASE:%.*]], [[PG:%.*]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svwrite_ver_za128_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) { - SME_ACLE_FUNC(svwrite_ver_za128, _u8, _m)(15, slice_base, 0, pg, zn); +void test_svwrite_ver_za128_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svwrite_ver_za128, _u8, _m)(15, slice_base, pg, zn); } // CHECK-C-LABEL: @test_svwrite_ver_za128_u16( @@ -930,8 +954,8 @@ void test_svwrite_ver_za128_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) // CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8i16(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svwrite_ver_za128_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn) { - SME_ACLE_FUNC(svwrite_ver_za128, _u16, _m)(0, slice_base, 0, pg, zn); +void test_svwrite_ver_za128_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svwrite_ver_za128, _u16, _m)(0, slice_base, pg, zn); } // CHECK-C-LABEL: @test_svwrite_ver_za128_u16_1( @@ -941,8 +965,8 @@ void test_svwrite_ver_za128_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn) // CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8i16(i32 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svwrite_ver_za128_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t zn) { - SME_ACLE_FUNC(svwrite_ver_za128, _u16, _m)(15, slice_base, 0, pg, zn); +void test_svwrite_ver_za128_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t zn) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svwrite_ver_za128, _u16, _m)(15, slice_base, pg, zn); } // CHECK-C-LABEL: @test_svwrite_ver_za128_u32( @@ -952,8 +976,8 @@ void test_svwrite_ver_za128_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t z // CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv4i32(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svwrite_ver_za128_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn) { - SME_ACLE_FUNC(svwrite_ver_za128, _u32, _m)(0, slice_base, 0, pg, zn); +void test_svwrite_ver_za128_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svwrite_ver_za128, _u32, _m)(0, slice_base, pg, zn); } // CHECK-C-LABEL: @test_svwrite_ver_za128_u32_1( @@ -963,8 +987,8 @@ void test_svwrite_ver_za128_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn) // CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv4i32(i32 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svwrite_ver_za128_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t zn) { - SME_ACLE_FUNC(svwrite_ver_za128, _u32, _m)(15, slice_base, 0, pg, zn); +void test_svwrite_ver_za128_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t zn) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svwrite_ver_za128, _u32, _m)(15, slice_base, pg, zn); } // CHECK-C-LABEL: @test_svwrite_ver_za128_u64( @@ -974,8 +998,8 @@ void test_svwrite_ver_za128_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t z // CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv2i64(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svwrite_ver_za128_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn) { - SME_ACLE_FUNC(svwrite_ver_za128, _u64, _m)(0, slice_base, 0, pg, zn); +void test_svwrite_ver_za128_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svwrite_ver_za128, _u64, _m)(0, slice_base, pg, zn); } // CHECK-C-LABEL: @test_svwrite_ver_za128_u64_1( @@ -985,8 +1009,8 @@ void test_svwrite_ver_za128_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn) // CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv2i64(i32 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svwrite_ver_za128_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t zn) { - SME_ACLE_FUNC(svwrite_ver_za128, _u64, _m)(15, slice_base, 0, pg, zn); +void test_svwrite_ver_za128_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t zn) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svwrite_ver_za128, _u64, _m)(15, slice_base, pg, zn); } // CHECK-C-LABEL: @test_svwrite_ver_za128_f16( @@ -996,8 +1020,8 @@ void test_svwrite_ver_za128_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t z // CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8f16(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svwrite_ver_za128_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn) { - SME_ACLE_FUNC(svwrite_ver_za128, _f16, _m)(0, slice_base, 0, pg, zn); +void test_svwrite_ver_za128_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svwrite_ver_za128, _f16, _m)(0, slice_base, pg, zn); } // CHECK-C-LABEL: @test_svwrite_ver_za128_f16_1( @@ -1007,8 +1031,8 @@ void test_svwrite_ver_za128_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn // CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8f16(i32 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svwrite_ver_za128_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t zn) { - SME_ACLE_FUNC(svwrite_ver_za128, _f16, _m)(15, slice_base, 0, pg, zn); +void test_svwrite_ver_za128_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t zn) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svwrite_ver_za128, _f16, _m)(15, slice_base, pg, zn); } // CHECK-C-LABEL: @test_svwrite_ver_za128_bf16( @@ -1018,8 +1042,8 @@ void test_svwrite_ver_za128_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t // CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8bf16(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svwrite_ver_za128_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) { - SME_ACLE_FUNC(svwrite_ver_za128, _bf16, _m)(0, slice_base, 0, pg, zn); +void test_svwrite_ver_za128_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svwrite_ver_za128, _bf16, _m)(0, slice_base, pg, zn); } // CHECK-C-LABEL: @test_svwrite_ver_za128_bf16_1( @@ -1029,8 +1053,8 @@ void test_svwrite_ver_za128_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t // CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8bf16(i32 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svwrite_ver_za128_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) { - SME_ACLE_FUNC(svwrite_ver_za128, _bf16, _m)(15, slice_base, 0, pg, zn); +void test_svwrite_ver_za128_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svwrite_ver_za128, _bf16, _m)(15, slice_base, pg, zn); } // CHECK-C-LABEL: @test_svwrite_ver_za128_f32( @@ -1040,8 +1064,8 @@ void test_svwrite_ver_za128_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_ // CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv4f32(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svwrite_ver_za128_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn) { - SME_ACLE_FUNC(svwrite_ver_za128, _f32, _m)(0, slice_base, 0, pg, zn); +void test_svwrite_ver_za128_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svwrite_ver_za128, _f32, _m)(0, slice_base, pg, zn); } // CHECK-C-LABEL: @test_svwrite_ver_za128_f32_1( @@ -1051,8 +1075,8 @@ void test_svwrite_ver_za128_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn // CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv4f32(i32 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svwrite_ver_za128_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t zn) { - SME_ACLE_FUNC(svwrite_ver_za128, _f32, _m)(15, slice_base, 0, pg, zn); +void test_svwrite_ver_za128_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t zn) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svwrite_ver_za128, _f32, _m)(15, slice_base, pg, zn); } // CHECK-C-LABEL: @test_svwrite_ver_za128_f64( @@ -1062,8 +1086,8 @@ void test_svwrite_ver_za128_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t // CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv2f64(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svwrite_ver_za128_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn) { - SME_ACLE_FUNC(svwrite_ver_za128, _f64, _m)(0, slice_base, 0, pg, zn); +void test_svwrite_ver_za128_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svwrite_ver_za128, _f64, _m)(0, slice_base, pg, zn); } // CHECK-C-LABEL: @test_svwrite_ver_za128_f64_1( @@ -1073,6 +1097,6 @@ void test_svwrite_ver_za128_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn // CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv2f64(i32 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) // CHECK-NEXT: ret void // -void test_svwrite_ver_za128_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t zn) { - SME_ACLE_FUNC(svwrite_ver_za128, _f64, _m)(15, slice_base, 0, pg, zn); +void test_svwrite_ver_za128_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t zn) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svwrite_ver_za128, _f64, _m)(15, slice_base, pg, zn); } diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_zero.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_zero.c index 3ff9f6346c49212009bd89eabd5aa7f1309b8506..72db124537bda63bcec36b235fd7ae01ea158a99 100644 --- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_zero.c +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_zero.c @@ -1,9 +1,9 @@ // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -S -O1 -Werror -o /dev/null %s -#include +#include // CHECK-C-LABEL: @test_svzero_mask_za( // CHECK-CXX-LABEL: @_Z19test_svzero_mask_zav( @@ -11,7 +11,7 @@ // CHECK-NEXT: tail call void @llvm.aarch64.sme.zero(i32 0) // CHECK-NEXT: ret void // -void test_svzero_mask_za() { +void test_svzero_mask_za(void) __arm_inout("za") { svzero_mask_za(0); } @@ -21,7 +21,7 @@ void test_svzero_mask_za() { // CHECK-NEXT: tail call void @llvm.aarch64.sme.zero(i32 176) // CHECK-NEXT: ret void // -void test_svzero_mask_za_1() { +void test_svzero_mask_za_1(void) __arm_inout("za") { svzero_mask_za(176); } @@ -31,16 +31,22 @@ void test_svzero_mask_za_1() { // CHECK-NEXT: tail call void @llvm.aarch64.sme.zero(i32 255) // CHECK-NEXT: ret void // -void test_svzero_mask_za_2() { +void test_svzero_mask_za_2(void) __arm_inout("za") { svzero_mask_za(255); } -// CHECK-C-LABEL: @test_svzero_za( -// CHECK-CXX-LABEL: @_Z14test_svzero_zav( -// CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.aarch64.sme.zero(i32 255) -// CHECK-NEXT: ret void +// CHECK-C-LABEL: define dso_local void @test_svzero_za( +// CHECK-C-SAME: ) local_unnamed_addr #[[ATTR2:[0-9]+]] { +// CHECK-C-NEXT: entry: +// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.zero(i32 255) +// CHECK-C-NEXT: ret void +// +// CHECK-CXX-LABEL: define dso_local void @_Z14test_svzero_zav( +// CHECK-CXX-SAME: ) local_unnamed_addr #[[ATTR2:[0-9]+]] { +// CHECK-CXX-NEXT: entry: +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.zero(i32 255) +// CHECK-CXX-NEXT: ret void // -void test_svzero_za() { +void test_svzero_za(void) __arm_out("za") { svzero_za(); } diff --git a/clang/test/CodeGen/aarch64-sve-inline-asm-datatypes.c b/clang/test/CodeGen/aarch64-sve-inline-asm-datatypes.c index 5c1e931a727124fd03e438526221bb728d30c3f3..14a29dfac2c7bdcae683af11a9fd0c98baf0b8e2 100644 --- a/clang/test/CodeGen/aarch64-sve-inline-asm-datatypes.c +++ b/clang/test/CodeGen/aarch64-sve-inline-asm-datatypes.c @@ -168,6 +168,30 @@ SVBOOL_TEST_UPL(__SVInt32_t, s) ; SVBOOL_TEST_UPL(__SVInt64_t, d) ; // CHECK: call asm sideeffect "fadd $0.d, $1.d, $2.d, $3.d\0A", "=w,@3Upl,w,w"( %in1, %in2, %in3) +#define SVBOOL_TEST_UPH(DT, KIND)\ +__SVBool_t func_bool_uph_##KIND(__SVBool_t in1, DT in2, DT in3)\ +{\ + __SVBool_t out;\ + asm volatile (\ + "fadd %[out]." #KIND ", %[in1]." #KIND ", %[in2]." #KIND ", %[in3]." #KIND "\n"\ + : [out] "=w" (out)\ + : [in1] "Uph" (in1),\ + [in2] "w" (in2),\ + [in3] "w" (in3)\ + :);\ + return out;\ +} + +SVBOOL_TEST_UPH(__SVInt8_t, b) ; +// CHECK: call asm sideeffect "fadd $0.b, $1.b, $2.b, $3.b\0A", "=w,@3Uph,w,w"( %in1, %in2, %in3) +SVBOOL_TEST_UPH(__SVInt16_t, h) ; +// CHECK: call asm sideeffect "fadd $0.h, $1.h, $2.h, $3.h\0A", "=w,@3Uph,w,w"( %in1, %in2, %in3) +SVBOOL_TEST_UPH(__SVInt32_t, s) ; +// CHECK: call asm sideeffect "fadd $0.s, $1.s, $2.s, $3.s\0A", "=w,@3Uph,w,w"( %in1, %in2, %in3) +SVBOOL_TEST_UPH(__SVInt64_t, d) ; +// CHECK: call asm sideeffect "fadd $0.d, $1.d, $2.d, $3.d\0A", "=w,@3Uph,w,w"( %in1, %in2, %in3) + + #define SVFLOAT_TEST(DT,KIND)\ DT func_float_##DT##KIND(DT inout1, DT in2)\ {\ diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create2-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create2-bfloat.c index 2605d574b830440a267a0f347bd09550a998785d..dec7e1297ebf1dfe4f13eff12c638fafda95d91c 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create2-bfloat.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create2-bfloat.c @@ -3,6 +3,7 @@ // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DTEST_SME -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s // REQUIRES: aarch64-registered-target @@ -15,6 +16,12 @@ #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 #endif +#ifndef TEST_SME +#define ATTR +#else +#define ATTR __arm_streaming +#endif + // CHECK-LABEL: @test_svcreate2_bf16( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[X0:%.*]], i64 0) @@ -27,7 +34,7 @@ // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP0]], [[X1:%.*]], i64 8) // CPP-CHECK-NEXT: ret [[TMP1]] // -svbfloat16x2_t test_svcreate2_bf16(svbfloat16_t x0, svbfloat16_t x1) +svbfloat16x2_t test_svcreate2_bf16(svbfloat16_t x0, svbfloat16_t x1) ATTR { return SVE_ACLE_FUNC(svcreate2,_bf16,,)(x0, x1); } diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create2.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create2.c index 08e4280064a6b4047c3fa443a92b6adaa633a51d..65d4e61184858c1680ef8919c37f7473104da7a9 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create2.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create2.c @@ -3,6 +3,7 @@ // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DTEST_SME -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s // REQUIRES: aarch64-registered-target @@ -15,6 +16,12 @@ #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 #endif +#ifndef TEST_SME +#define ATTR +#else +#define ATTR __arm_streaming +#endif + // CHECK-LABEL: @test_svcreate2_s8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[X0:%.*]], i64 0) @@ -27,7 +34,7 @@ // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP0]], [[X1:%.*]], i64 16) // CPP-CHECK-NEXT: ret [[TMP1]] // -svint8x2_t test_svcreate2_s8(svint8_t x0, svint8_t x1) +svint8x2_t test_svcreate2_s8(svint8_t x0, svint8_t x1) ATTR { return SVE_ACLE_FUNC(svcreate2,_s8,,)(x0, x1); } @@ -44,7 +51,7 @@ svint8x2_t test_svcreate2_s8(svint8_t x0, svint8_t x1) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP0]], [[X1:%.*]], i64 8) // CPP-CHECK-NEXT: ret [[TMP1]] // -svint16x2_t test_svcreate2_s16(svint16_t x0, svint16_t x1) +svint16x2_t test_svcreate2_s16(svint16_t x0, svint16_t x1) ATTR { return SVE_ACLE_FUNC(svcreate2,_s16,,)(x0, x1); } @@ -61,7 +68,7 @@ svint16x2_t test_svcreate2_s16(svint16_t x0, svint16_t x1) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP0]], [[X1:%.*]], i64 4) // CPP-CHECK-NEXT: ret [[TMP1]] // -svint32x2_t test_svcreate2_s32(svint32_t x0, svint32_t x1) +svint32x2_t test_svcreate2_s32(svint32_t x0, svint32_t x1) ATTR { return SVE_ACLE_FUNC(svcreate2,_s32,,)(x0, x1); } @@ -78,7 +85,7 @@ svint32x2_t test_svcreate2_s32(svint32_t x0, svint32_t x1) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP0]], [[X1:%.*]], i64 2) // CPP-CHECK-NEXT: ret [[TMP1]] // -svint64x2_t test_svcreate2_s64(svint64_t x0, svint64_t x1) +svint64x2_t test_svcreate2_s64(svint64_t x0, svint64_t x1) ATTR { return SVE_ACLE_FUNC(svcreate2,_s64,,)(x0, x1); } @@ -95,7 +102,7 @@ svint64x2_t test_svcreate2_s64(svint64_t x0, svint64_t x1) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP0]], [[X1:%.*]], i64 16) // CPP-CHECK-NEXT: ret [[TMP1]] // -svuint8x2_t test_svcreate2_u8(svuint8_t x0, svuint8_t x1) +svuint8x2_t test_svcreate2_u8(svuint8_t x0, svuint8_t x1) ATTR { return SVE_ACLE_FUNC(svcreate2,_u8,,)(x0, x1); } @@ -112,7 +119,7 @@ svuint8x2_t test_svcreate2_u8(svuint8_t x0, svuint8_t x1) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP0]], [[X1:%.*]], i64 8) // CPP-CHECK-NEXT: ret [[TMP1]] // -svuint16x2_t test_svcreate2_u16(svuint16_t x0, svuint16_t x1) +svuint16x2_t test_svcreate2_u16(svuint16_t x0, svuint16_t x1) ATTR { return SVE_ACLE_FUNC(svcreate2,_u16,,)(x0, x1); } @@ -129,7 +136,7 @@ svuint16x2_t test_svcreate2_u16(svuint16_t x0, svuint16_t x1) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP0]], [[X1:%.*]], i64 4) // CPP-CHECK-NEXT: ret [[TMP1]] // -svuint32x2_t test_svcreate2_u32(svuint32_t x0, svuint32_t x1) +svuint32x2_t test_svcreate2_u32(svuint32_t x0, svuint32_t x1) ATTR { return SVE_ACLE_FUNC(svcreate2,_u32,,)(x0, x1); } @@ -146,7 +153,7 @@ svuint32x2_t test_svcreate2_u32(svuint32_t x0, svuint32_t x1) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP0]], [[X1:%.*]], i64 2) // CPP-CHECK-NEXT: ret [[TMP1]] // -svuint64x2_t test_svcreate2_u64(svuint64_t x0, svuint64_t x1) +svuint64x2_t test_svcreate2_u64(svuint64_t x0, svuint64_t x1) ATTR { return SVE_ACLE_FUNC(svcreate2,_u64,,)(x0, x1); } @@ -163,7 +170,7 @@ svuint64x2_t test_svcreate2_u64(svuint64_t x0, svuint64_t x1) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP0]], [[X1:%.*]], i64 8) // CPP-CHECK-NEXT: ret [[TMP1]] // -svfloat16x2_t test_svcreate2_f16(svfloat16_t x0, svfloat16_t x1) +svfloat16x2_t test_svcreate2_f16(svfloat16_t x0, svfloat16_t x1) ATTR { return SVE_ACLE_FUNC(svcreate2,_f16,,)(x0, x1); } @@ -180,7 +187,7 @@ svfloat16x2_t test_svcreate2_f16(svfloat16_t x0, svfloat16_t x1) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP0]], [[X1:%.*]], i64 4) // CPP-CHECK-NEXT: ret [[TMP1]] // -svfloat32x2_t test_svcreate2_f32(svfloat32_t x0, svfloat32_t x1) +svfloat32x2_t test_svcreate2_f32(svfloat32_t x0, svfloat32_t x1) ATTR { return SVE_ACLE_FUNC(svcreate2,_f32,,)(x0, x1); } @@ -197,7 +204,7 @@ svfloat32x2_t test_svcreate2_f32(svfloat32_t x0, svfloat32_t x1) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP0]], [[X1:%.*]], i64 2) // CPP-CHECK-NEXT: ret [[TMP1]] // -svfloat64x2_t test_svcreate2_f64(svfloat64_t x0, svfloat64_t x1) +svfloat64x2_t test_svcreate2_f64(svfloat64_t x0, svfloat64_t x1) ATTR { return SVE_ACLE_FUNC(svcreate2,_f64,,)(x0, x1); } diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create3-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create3-bfloat.c index 760708cfb7a052069e9601a7a78dd1ec82711254..d8c22cfb88a003e406747f607168fe84a2c7e5dd 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create3-bfloat.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create3-bfloat.c @@ -3,6 +3,7 @@ // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DTEST_SME -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s // REQUIRES: aarch64-registered-target @@ -15,6 +16,12 @@ #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 #endif +#ifndef TEST_SME +#define ATTR +#else +#define ATTR __arm_streaming +#endif + // CHECK-LABEL: @test_svcreate3_bf16( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv24bf16.nxv8bf16( poison, [[X0:%.*]], i64 0) @@ -29,7 +36,7 @@ // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv24bf16.nxv8bf16( [[TMP1]], [[X2:%.*]], i64 16) // CPP-CHECK-NEXT: ret [[TMP2]] // -svbfloat16x3_t test_svcreate3_bf16(svbfloat16_t x0, svbfloat16_t x1, svbfloat16_t x2) +svbfloat16x3_t test_svcreate3_bf16(svbfloat16_t x0, svbfloat16_t x1, svbfloat16_t x2) ATTR { return SVE_ACLE_FUNC(svcreate3,_bf16,,)(x0, x1, x2); } diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create3.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create3.c index 325997ab6f91b7c39c12e1cd099cb1649d567b6b..9f485d6d3a6cfca2b46a5647506cae63d723adbc 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create3.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create3.c @@ -3,6 +3,7 @@ // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DTEST_SME -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s // REQUIRES: aarch64-registered-target @@ -15,6 +16,12 @@ #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 #endif +#ifndef TEST_SME +#define ATTR +#else +#define ATTR __arm_streaming +#endif + // CHECK-LABEL: @test_svcreate3_s8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( poison, [[X0:%.*]], i64 0) @@ -29,7 +36,7 @@ // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( [[TMP1]], [[X2:%.*]], i64 32) // CPP-CHECK-NEXT: ret [[TMP2]] // -svint8x3_t test_svcreate3_s8(svint8_t x0, svint8_t x1, svint8_t x2) +svint8x3_t test_svcreate3_s8(svint8_t x0, svint8_t x1, svint8_t x2) ATTR { return SVE_ACLE_FUNC(svcreate3,_s8,,)(x0, x1, x2); } @@ -48,7 +55,7 @@ svint8x3_t test_svcreate3_s8(svint8_t x0, svint8_t x1, svint8_t x2) // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( [[TMP1]], [[X2:%.*]], i64 16) // CPP-CHECK-NEXT: ret [[TMP2]] // -svint16x3_t test_svcreate3_s16(svint16_t x0, svint16_t x1, svint16_t x2) +svint16x3_t test_svcreate3_s16(svint16_t x0, svint16_t x1, svint16_t x2) ATTR { return SVE_ACLE_FUNC(svcreate3,_s16,,)(x0, x1, x2); } @@ -67,7 +74,7 @@ svint16x3_t test_svcreate3_s16(svint16_t x0, svint16_t x1, svint16_t x2) // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( [[TMP1]], [[X2:%.*]], i64 8) // CPP-CHECK-NEXT: ret [[TMP2]] // -svint32x3_t test_svcreate3_s32(svint32_t x0, svint32_t x1, svint32_t x2) +svint32x3_t test_svcreate3_s32(svint32_t x0, svint32_t x1, svint32_t x2) ATTR { return SVE_ACLE_FUNC(svcreate3,_s32,,)(x0, x1, x2); } @@ -86,7 +93,7 @@ svint32x3_t test_svcreate3_s32(svint32_t x0, svint32_t x1, svint32_t x2) // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( [[TMP1]], [[X2:%.*]], i64 4) // CPP-CHECK-NEXT: ret [[TMP2]] // -svint64x3_t test_svcreate3_s64(svint64_t x0, svint64_t x1, svint64_t x2) +svint64x3_t test_svcreate3_s64(svint64_t x0, svint64_t x1, svint64_t x2) ATTR { return SVE_ACLE_FUNC(svcreate3,_s64,,)(x0, x1, x2); } @@ -105,7 +112,7 @@ svint64x3_t test_svcreate3_s64(svint64_t x0, svint64_t x1, svint64_t x2) // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( [[TMP1]], [[X2:%.*]], i64 32) // CPP-CHECK-NEXT: ret [[TMP2]] // -svuint8x3_t test_svcreate3_u8(svuint8_t x0, svuint8_t x1, svuint8_t x2) +svuint8x3_t test_svcreate3_u8(svuint8_t x0, svuint8_t x1, svuint8_t x2) ATTR { return SVE_ACLE_FUNC(svcreate3,_u8,,)(x0, x1, x2); } @@ -124,7 +131,7 @@ svuint8x3_t test_svcreate3_u8(svuint8_t x0, svuint8_t x1, svuint8_t x2) // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( [[TMP1]], [[X2:%.*]], i64 16) // CPP-CHECK-NEXT: ret [[TMP2]] // -svuint16x3_t test_svcreate3_u16(svuint16_t x0, svuint16_t x1, svuint16_t x2) +svuint16x3_t test_svcreate3_u16(svuint16_t x0, svuint16_t x1, svuint16_t x2) ATTR { return SVE_ACLE_FUNC(svcreate3,_u16,,)(x0, x1, x2); } @@ -143,7 +150,7 @@ svuint16x3_t test_svcreate3_u16(svuint16_t x0, svuint16_t x1, svuint16_t x2) // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( [[TMP1]], [[X2:%.*]], i64 8) // CPP-CHECK-NEXT: ret [[TMP2]] // -svuint32x3_t test_svcreate3_u32(svuint32_t x0, svuint32_t x1, svuint32_t x2) +svuint32x3_t test_svcreate3_u32(svuint32_t x0, svuint32_t x1, svuint32_t x2) ATTR { return SVE_ACLE_FUNC(svcreate3,_u32,,)(x0, x1, x2); } @@ -162,7 +169,7 @@ svuint32x3_t test_svcreate3_u32(svuint32_t x0, svuint32_t x1, svuint32_t x2) // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( [[TMP1]], [[X2:%.*]], i64 4) // CPP-CHECK-NEXT: ret [[TMP2]] // -svuint64x3_t test_svcreate3_u64(svuint64_t x0, svuint64_t x1, svuint64_t x2) +svuint64x3_t test_svcreate3_u64(svuint64_t x0, svuint64_t x1, svuint64_t x2) ATTR { return SVE_ACLE_FUNC(svcreate3,_u64,,)(x0, x1, x2); } @@ -181,7 +188,7 @@ svuint64x3_t test_svcreate3_u64(svuint64_t x0, svuint64_t x1, svuint64_t x2) // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv24f16.nxv8f16( [[TMP1]], [[X2:%.*]], i64 16) // CPP-CHECK-NEXT: ret [[TMP2]] // -svfloat16x3_t test_svcreate3_f16(svfloat16_t x0, svfloat16_t x1, svfloat16_t x2) +svfloat16x3_t test_svcreate3_f16(svfloat16_t x0, svfloat16_t x1, svfloat16_t x2) ATTR { return SVE_ACLE_FUNC(svcreate3,_f16,,)(x0, x1, x2); } @@ -200,7 +207,7 @@ svfloat16x3_t test_svcreate3_f16(svfloat16_t x0, svfloat16_t x1, svfloat16_t x2) // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv12f32.nxv4f32( [[TMP1]], [[X2:%.*]], i64 8) // CPP-CHECK-NEXT: ret [[TMP2]] // -svfloat32x3_t test_svcreate3_f32(svfloat32_t x0, svfloat32_t x1, svfloat32_t x2) +svfloat32x3_t test_svcreate3_f32(svfloat32_t x0, svfloat32_t x1, svfloat32_t x2) ATTR { return SVE_ACLE_FUNC(svcreate3,_f32,,)(x0, x1, x2); } @@ -219,7 +226,7 @@ svfloat32x3_t test_svcreate3_f32(svfloat32_t x0, svfloat32_t x1, svfloat32_t x2) // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv6f64.nxv2f64( [[TMP1]], [[X2:%.*]], i64 4) // CPP-CHECK-NEXT: ret [[TMP2]] // -svfloat64x3_t test_svcreate3_f64(svfloat64_t x0, svfloat64_t x1, svfloat64_t x2) +svfloat64x3_t test_svcreate3_f64(svfloat64_t x0, svfloat64_t x1, svfloat64_t x2) ATTR { return SVE_ACLE_FUNC(svcreate3,_f64,,)(x0, x1, x2); } diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create4-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create4-bfloat.c index e3af56c5d1339fe209707e73b8c376f999d71e8e..ca90a435af8c3a1f03c525536e7c028bf59ca92e 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create4-bfloat.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create4-bfloat.c @@ -3,6 +3,7 @@ // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DTEST_SME -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s // REQUIRES: aarch64-registered-target @@ -15,6 +16,12 @@ #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 #endif +#ifndef TEST_SME +#define ATTR +#else +#define ATTR __arm_streaming +#endif + // CHECK-LABEL: @test_svcreate4_bf16( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[X0:%.*]], i64 0) @@ -31,7 +38,7 @@ // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[X4:%.*]], i64 24) // CPP-CHECK-NEXT: ret [[TMP3]] // -svbfloat16x4_t test_svcreate4_bf16(svbfloat16_t x0, svbfloat16_t x1, svbfloat16_t x2, svbfloat16_t x4) +svbfloat16x4_t test_svcreate4_bf16(svbfloat16_t x0, svbfloat16_t x1, svbfloat16_t x2, svbfloat16_t x4) ATTR { return SVE_ACLE_FUNC(svcreate4,_bf16,,)(x0, x1, x2, x4); } diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create4.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create4.c index 7e0c2a7f00d1d003dbd0bf62333729a906059c14..de8a5b06165735cf9cd64210a854c95002b626f3 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create4.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create4.c @@ -3,6 +3,7 @@ // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DTEST_SME -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s // REQUIRES: aarch64-registered-target @@ -15,6 +16,12 @@ #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 #endif +#ifndef TEST_SME +#define ATTR +#else +#define ATTR __arm_streaming +#endif + // CHECK-LABEL: @test_svcreate4_s8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[X0:%.*]], i64 0) @@ -31,7 +38,7 @@ // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[X4:%.*]], i64 48) // CPP-CHECK-NEXT: ret [[TMP3]] // -svint8x4_t test_svcreate4_s8(svint8_t x0, svint8_t x1, svint8_t x2, svint8_t x4) +svint8x4_t test_svcreate4_s8(svint8_t x0, svint8_t x1, svint8_t x2, svint8_t x4) ATTR { return SVE_ACLE_FUNC(svcreate4,_s8,,)(x0, x1, x2, x4); } @@ -52,7 +59,7 @@ svint8x4_t test_svcreate4_s8(svint8_t x0, svint8_t x1, svint8_t x2, svint8_t x4) // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[X4:%.*]], i64 24) // CPP-CHECK-NEXT: ret [[TMP3]] // -svint16x4_t test_svcreate4_s16(svint16_t x0, svint16_t x1, svint16_t x2, svint16_t x4) +svint16x4_t test_svcreate4_s16(svint16_t x0, svint16_t x1, svint16_t x2, svint16_t x4) ATTR { return SVE_ACLE_FUNC(svcreate4,_s16,,)(x0, x1, x2, x4); } @@ -73,7 +80,7 @@ svint16x4_t test_svcreate4_s16(svint16_t x0, svint16_t x1, svint16_t x2, svint16 // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[X4:%.*]], i64 12) // CPP-CHECK-NEXT: ret [[TMP3]] // -svint32x4_t test_svcreate4_s32(svint32_t x0, svint32_t x1, svint32_t x2, svint32_t x4) +svint32x4_t test_svcreate4_s32(svint32_t x0, svint32_t x1, svint32_t x2, svint32_t x4) ATTR { return SVE_ACLE_FUNC(svcreate4,_s32,,)(x0, x1, x2, x4); } @@ -94,7 +101,7 @@ svint32x4_t test_svcreate4_s32(svint32_t x0, svint32_t x1, svint32_t x2, svint32 // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[X4:%.*]], i64 6) // CPP-CHECK-NEXT: ret [[TMP3]] // -svint64x4_t test_svcreate4_s64(svint64_t x0, svint64_t x1, svint64_t x2, svint64_t x4) +svint64x4_t test_svcreate4_s64(svint64_t x0, svint64_t x1, svint64_t x2, svint64_t x4) ATTR { return SVE_ACLE_FUNC(svcreate4,_s64,,)(x0, x1, x2, x4); } @@ -115,7 +122,7 @@ svint64x4_t test_svcreate4_s64(svint64_t x0, svint64_t x1, svint64_t x2, svint64 // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[X4:%.*]], i64 48) // CPP-CHECK-NEXT: ret [[TMP3]] // -svuint8x4_t test_svcreate4_u8(svuint8_t x0, svuint8_t x1, svuint8_t x2, svuint8_t x4) +svuint8x4_t test_svcreate4_u8(svuint8_t x0, svuint8_t x1, svuint8_t x2, svuint8_t x4) ATTR { return SVE_ACLE_FUNC(svcreate4,_u8,,)(x0, x1, x2, x4); } @@ -136,7 +143,7 @@ svuint8x4_t test_svcreate4_u8(svuint8_t x0, svuint8_t x1, svuint8_t x2, svuint8_ // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[X4:%.*]], i64 24) // CPP-CHECK-NEXT: ret [[TMP3]] // -svuint16x4_t test_svcreate4_u16(svuint16_t x0, svuint16_t x1, svuint16_t x2, svuint16_t x4) +svuint16x4_t test_svcreate4_u16(svuint16_t x0, svuint16_t x1, svuint16_t x2, svuint16_t x4) ATTR { return SVE_ACLE_FUNC(svcreate4,_u16,,)(x0, x1, x2, x4); } @@ -157,7 +164,7 @@ svuint16x4_t test_svcreate4_u16(svuint16_t x0, svuint16_t x1, svuint16_t x2, svu // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[X4:%.*]], i64 12) // CPP-CHECK-NEXT: ret [[TMP3]] // -svuint32x4_t test_svcreate4_u32(svuint32_t x0, svuint32_t x1, svuint32_t x2, svuint32_t x4) +svuint32x4_t test_svcreate4_u32(svuint32_t x0, svuint32_t x1, svuint32_t x2, svuint32_t x4) ATTR { return SVE_ACLE_FUNC(svcreate4,_u32,,)(x0, x1, x2, x4); } @@ -178,7 +185,7 @@ svuint32x4_t test_svcreate4_u32(svuint32_t x0, svuint32_t x1, svuint32_t x2, svu // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[X4:%.*]], i64 6) // CPP-CHECK-NEXT: ret [[TMP3]] // -svuint64x4_t test_svcreate4_u64(svuint64_t x0, svuint64_t x1, svuint64_t x2, svuint64_t x4) +svuint64x4_t test_svcreate4_u64(svuint64_t x0, svuint64_t x1, svuint64_t x2, svuint64_t x4) ATTR { return SVE_ACLE_FUNC(svcreate4,_u64,,)(x0, x1, x2, x4); } @@ -199,7 +206,7 @@ svuint64x4_t test_svcreate4_u64(svuint64_t x0, svuint64_t x1, svuint64_t x2, svu // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[X4:%.*]], i64 24) // CPP-CHECK-NEXT: ret [[TMP3]] // -svfloat16x4_t test_svcreate4_f16(svfloat16_t x0, svfloat16_t x1, svfloat16_t x2, svfloat16_t x4) +svfloat16x4_t test_svcreate4_f16(svfloat16_t x0, svfloat16_t x1, svfloat16_t x2, svfloat16_t x4) ATTR { return SVE_ACLE_FUNC(svcreate4,_f16,,)(x0, x1, x2, x4); } @@ -220,7 +227,7 @@ svfloat16x4_t test_svcreate4_f16(svfloat16_t x0, svfloat16_t x1, svfloat16_t x2, // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[X4:%.*]], i64 12) // CPP-CHECK-NEXT: ret [[TMP3]] // -svfloat32x4_t test_svcreate4_f32(svfloat32_t x0, svfloat32_t x1, svfloat32_t x2, svfloat32_t x4) +svfloat32x4_t test_svcreate4_f32(svfloat32_t x0, svfloat32_t x1, svfloat32_t x2, svfloat32_t x4) ATTR { return SVE_ACLE_FUNC(svcreate4,_f32,,)(x0, x1, x2, x4); } @@ -241,7 +248,7 @@ svfloat32x4_t test_svcreate4_f32(svfloat32_t x0, svfloat32_t x1, svfloat32_t x2, // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[X4:%.*]], i64 6) // CPP-CHECK-NEXT: ret [[TMP3]] // -svfloat64x4_t test_svcreate4_f64(svfloat64_t x0, svfloat64_t x1, svfloat64_t x2, svfloat64_t x4) +svfloat64x4_t test_svcreate4_f64(svfloat64_t x0, svfloat64_t x1, svfloat64_t x2, svfloat64_t x4) ATTR { return SVE_ACLE_FUNC(svcreate4,_f64,,)(x0, x1, x2, x4); } diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get2-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get2-bfloat.c index 25dc49a4c2bd3b6487a955988172fdd178724b6f..b9c46b2261f5d13132428daeab2f92cdf1e944db 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get2-bfloat.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get2-bfloat.c @@ -3,6 +3,7 @@ // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DTEST_SME -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s // REQUIRES: aarch64-registered-target @@ -15,6 +16,12 @@ #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 #endif +#ifndef TEST_SME +#define ATTR +#else +#define ATTR __arm_streaming +#endif + // CHECK-LABEL: @test_svget2_bf16_0( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[TUPLE:%.*]], i64 0) @@ -25,7 +32,7 @@ // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[TUPLE:%.*]], i64 0) // CPP-CHECK-NEXT: ret [[TMP0]] // -svbfloat16_t test_svget2_bf16_0(svbfloat16x2_t tuple) +svbfloat16_t test_svget2_bf16_0(svbfloat16x2_t tuple) ATTR { return SVE_ACLE_FUNC(svget2,_bf16,,)(tuple, 0); } @@ -40,7 +47,7 @@ svbfloat16_t test_svget2_bf16_0(svbfloat16x2_t tuple) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[TUPLE:%.*]], i64 8) // CPP-CHECK-NEXT: ret [[TMP0]] // -svbfloat16_t test_svget2_bf16_1(svbfloat16x2_t tuple) +svbfloat16_t test_svget2_bf16_1(svbfloat16x2_t tuple) ATTR { return SVE_ACLE_FUNC(svget2,_bf16,,)(tuple, 1); } diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get2.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get2.c index 32a84c91b74d4c6b3b64775d43b95f9e0847c47d..8cd887aaff407d58e4fb957d143597a6d61934ef 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get2.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get2.c @@ -5,6 +5,7 @@ // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -DTEST_SME -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include #ifdef SVE_OVERLOADED_FORMS @@ -14,6 +15,12 @@ #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 #endif +#ifndef TEST_SME +#define ATTR +#else +#define ATTR __arm_streaming +#endif + // CHECK-LABEL: @test_svget2_s8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[TUPLE:%.*]], i64 0) @@ -24,7 +31,7 @@ // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[TUPLE:%.*]], i64 0) // CPP-CHECK-NEXT: ret [[TMP0]] // -svint8_t test_svget2_s8(svint8x2_t tuple) +svint8_t test_svget2_s8(svint8x2_t tuple) ATTR { return SVE_ACLE_FUNC(svget2,_s8,,)(tuple, 0); } @@ -39,7 +46,7 @@ svint8_t test_svget2_s8(svint8x2_t tuple) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[TUPLE:%.*]], i64 8) // CPP-CHECK-NEXT: ret [[TMP0]] // -svint16_t test_svget2_s16(svint16x2_t tuple) +svint16_t test_svget2_s16(svint16x2_t tuple) ATTR { return SVE_ACLE_FUNC(svget2,_s16,,)(tuple, 1); } @@ -54,7 +61,7 @@ svint16_t test_svget2_s16(svint16x2_t tuple) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[TUPLE:%.*]], i64 0) // CPP-CHECK-NEXT: ret [[TMP0]] // -svint32_t test_svget2_s32(svint32x2_t tuple) +svint32_t test_svget2_s32(svint32x2_t tuple) ATTR { return SVE_ACLE_FUNC(svget2,_s32,,)(tuple, 0); } @@ -69,7 +76,7 @@ svint32_t test_svget2_s32(svint32x2_t tuple) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[TUPLE:%.*]], i64 2) // CPP-CHECK-NEXT: ret [[TMP0]] // -svint64_t test_svget2_s64(svint64x2_t tuple) +svint64_t test_svget2_s64(svint64x2_t tuple) ATTR { return SVE_ACLE_FUNC(svget2,_s64,,)(tuple, 1); } @@ -84,7 +91,7 @@ svint64_t test_svget2_s64(svint64x2_t tuple) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[TUPLE:%.*]], i64 0) // CPP-CHECK-NEXT: ret [[TMP0]] // -svuint8_t test_svget2_u8(svuint8x2_t tuple) +svuint8_t test_svget2_u8(svuint8x2_t tuple) ATTR { return SVE_ACLE_FUNC(svget2,_u8,,)(tuple, 0); } @@ -99,7 +106,7 @@ svuint8_t test_svget2_u8(svuint8x2_t tuple) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[TUPLE:%.*]], i64 8) // CPP-CHECK-NEXT: ret [[TMP0]] // -svuint16_t test_svget2_u16(svuint16x2_t tuple) +svuint16_t test_svget2_u16(svuint16x2_t tuple) ATTR { return SVE_ACLE_FUNC(svget2,_u16,,)(tuple, 1); } @@ -114,7 +121,7 @@ svuint16_t test_svget2_u16(svuint16x2_t tuple) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[TUPLE:%.*]], i64 0) // CPP-CHECK-NEXT: ret [[TMP0]] // -svuint32_t test_svget2_u32(svuint32x2_t tuple) +svuint32_t test_svget2_u32(svuint32x2_t tuple) ATTR { return SVE_ACLE_FUNC(svget2,_u32,,)(tuple, 0); } @@ -129,7 +136,7 @@ svuint32_t test_svget2_u32(svuint32x2_t tuple) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[TUPLE:%.*]], i64 2) // CPP-CHECK-NEXT: ret [[TMP0]] // -svuint64_t test_svget2_u64(svuint64x2_t tuple) +svuint64_t test_svget2_u64(svuint64x2_t tuple) ATTR { return SVE_ACLE_FUNC(svget2,_u64,,)(tuple, 1); } @@ -144,7 +151,7 @@ svuint64_t test_svget2_u64(svuint64x2_t tuple) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[TUPLE:%.*]], i64 0) // CPP-CHECK-NEXT: ret [[TMP0]] // -svfloat16_t test_svget2_f16(svfloat16x2_t tuple) +svfloat16_t test_svget2_f16(svfloat16x2_t tuple) ATTR { return SVE_ACLE_FUNC(svget2,_f16,,)(tuple, 0); } @@ -159,7 +166,7 @@ svfloat16_t test_svget2_f16(svfloat16x2_t tuple) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[TUPLE:%.*]], i64 4) // CPP-CHECK-NEXT: ret [[TMP0]] // -svfloat32_t test_svget2_f32(svfloat32x2_t tuple) +svfloat32_t test_svget2_f32(svfloat32x2_t tuple) ATTR { return SVE_ACLE_FUNC(svget2,_f32,,)(tuple, 1); } @@ -174,7 +181,7 @@ svfloat32_t test_svget2_f32(svfloat32x2_t tuple) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[TUPLE:%.*]], i64 0) // CPP-CHECK-NEXT: ret [[TMP0]] // -svfloat64_t test_svget2_f64(svfloat64x2_t tuple) +svfloat64_t test_svget2_f64(svfloat64x2_t tuple) ATTR { return SVE_ACLE_FUNC(svget2,_f64,,)(tuple, 0); } diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get3-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get3-bfloat.c index 47ce6bd19244e677c0f6b3d47ba29252c7f15257..7a991bc7431d036705cfd0d5298aaac4a7a99cb2 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get3-bfloat.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get3-bfloat.c @@ -3,6 +3,7 @@ // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DTEST_SME -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s // REQUIRES: aarch64-registered-target @@ -15,6 +16,12 @@ #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 #endif +#ifndef TEST_SME +#define ATTR +#else +#define ATTR __arm_streaming +#endif + // CHECK-LABEL: @test_svget3_bf16_0( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv24bf16( [[TUPLE:%.*]], i64 0) @@ -25,7 +32,7 @@ // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv24bf16( [[TUPLE:%.*]], i64 0) // CPP-CHECK-NEXT: ret [[TMP0]] // -svbfloat16_t test_svget3_bf16_0(svbfloat16x3_t tuple) +svbfloat16_t test_svget3_bf16_0(svbfloat16x3_t tuple) ATTR { return SVE_ACLE_FUNC(svget3,_bf16,,)(tuple, 0); } @@ -40,7 +47,7 @@ svbfloat16_t test_svget3_bf16_0(svbfloat16x3_t tuple) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv24bf16( [[TUPLE:%.*]], i64 8) // CPP-CHECK-NEXT: ret [[TMP0]] // -svbfloat16_t test_svget3_bf16_1(svbfloat16x3_t tuple) +svbfloat16_t test_svget3_bf16_1(svbfloat16x3_t tuple) ATTR { return SVE_ACLE_FUNC(svget3,_bf16,,)(tuple, 1); } @@ -55,7 +62,7 @@ svbfloat16_t test_svget3_bf16_1(svbfloat16x3_t tuple) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv24bf16( [[TUPLE:%.*]], i64 16) // CPP-CHECK-NEXT: ret [[TMP0]] // -svbfloat16_t test_svget3_bf16_2(svbfloat16x3_t tuple) +svbfloat16_t test_svget3_bf16_2(svbfloat16x3_t tuple) ATTR { return SVE_ACLE_FUNC(svget3,_bf16,,)(tuple, 2); } diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get3.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get3.c index 54847152dee7c5161a01c44622e3c572128221dd..de7c3c303ffcb83b8439964a42b2aad85ae8df23 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get3.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get3.c @@ -5,6 +5,8 @@ // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -DTEST_SME -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s + #include #ifdef SVE_OVERLOADED_FORMS @@ -14,6 +16,12 @@ #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 #endif +#ifndef TEST_SME +#define ATTR +#else +#define ATTR __arm_streaming +#endif + // CHECK-LABEL: @test_svget3_s8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[TUPLE:%.*]], i64 0) @@ -24,7 +32,7 @@ // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[TUPLE:%.*]], i64 0) // CPP-CHECK-NEXT: ret [[TMP0]] // -svint8_t test_svget3_s8(svint8x3_t tuple) +svint8_t test_svget3_s8(svint8x3_t tuple) ATTR { return SVE_ACLE_FUNC(svget3,_s8,,)(tuple, 0); } @@ -39,7 +47,7 @@ svint8_t test_svget3_s8(svint8x3_t tuple) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[TUPLE:%.*]], i64 16) // CPP-CHECK-NEXT: ret [[TMP0]] // -svint16_t test_svget3_s16(svint16x3_t tuple) +svint16_t test_svget3_s16(svint16x3_t tuple) ATTR { return SVE_ACLE_FUNC(svget3,_s16,,)(tuple, 2); } @@ -54,7 +62,7 @@ svint16_t test_svget3_s16(svint16x3_t tuple) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[TUPLE:%.*]], i64 4) // CPP-CHECK-NEXT: ret [[TMP0]] // -svint32_t test_svget3_s32(svint32x3_t tuple) +svint32_t test_svget3_s32(svint32x3_t tuple) ATTR { return SVE_ACLE_FUNC(svget3,_s32,,)(tuple, 1); } @@ -69,7 +77,7 @@ svint32_t test_svget3_s32(svint32x3_t tuple) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[TUPLE:%.*]], i64 0) // CPP-CHECK-NEXT: ret [[TMP0]] // -svint64_t test_svget3_s64(svint64x3_t tuple) +svint64_t test_svget3_s64(svint64x3_t tuple) ATTR { return SVE_ACLE_FUNC(svget3,_s64,,)(tuple, 0); } @@ -84,7 +92,7 @@ svint64_t test_svget3_s64(svint64x3_t tuple) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[TUPLE:%.*]], i64 32) // CPP-CHECK-NEXT: ret [[TMP0]] // -svuint8_t test_svget3_u8(svuint8x3_t tuple) +svuint8_t test_svget3_u8(svuint8x3_t tuple) ATTR { return SVE_ACLE_FUNC(svget3,_u8,,)(tuple, 2); } @@ -99,7 +107,7 @@ svuint8_t test_svget3_u8(svuint8x3_t tuple) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[TUPLE:%.*]], i64 8) // CPP-CHECK-NEXT: ret [[TMP0]] // -svuint16_t test_svget3_u16(svuint16x3_t tuple) +svuint16_t test_svget3_u16(svuint16x3_t tuple) ATTR { return SVE_ACLE_FUNC(svget3,_u16,,)(tuple, 1); } @@ -114,7 +122,7 @@ svuint16_t test_svget3_u16(svuint16x3_t tuple) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[TUPLE:%.*]], i64 0) // CPP-CHECK-NEXT: ret [[TMP0]] // -svuint32_t test_svget3_u32(svuint32x3_t tuple) +svuint32_t test_svget3_u32(svuint32x3_t tuple) ATTR { return SVE_ACLE_FUNC(svget3,_u32,,)(tuple, 0); } @@ -129,7 +137,7 @@ svuint32_t test_svget3_u32(svuint32x3_t tuple) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[TUPLE:%.*]], i64 4) // CPP-CHECK-NEXT: ret [[TMP0]] // -svuint64_t test_svget3_u64(svuint64x3_t tuple) +svuint64_t test_svget3_u64(svuint64x3_t tuple) ATTR { return SVE_ACLE_FUNC(svget3,_u64,,)(tuple, 2); } @@ -144,7 +152,7 @@ svuint64_t test_svget3_u64(svuint64x3_t tuple) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv24f16( [[TUPLE:%.*]], i64 8) // CPP-CHECK-NEXT: ret [[TMP0]] // -svfloat16_t test_svget3_f16(svfloat16x3_t tuple) +svfloat16_t test_svget3_f16(svfloat16x3_t tuple) ATTR { return SVE_ACLE_FUNC(svget3,_f16,,)(tuple, 1); } @@ -159,7 +167,7 @@ svfloat16_t test_svget3_f16(svfloat16x3_t tuple) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv12f32( [[TUPLE:%.*]], i64 0) // CPP-CHECK-NEXT: ret [[TMP0]] // -svfloat32_t test_svget3_f32(svfloat32x3_t tuple) +svfloat32_t test_svget3_f32(svfloat32x3_t tuple) ATTR { return SVE_ACLE_FUNC(svget3,_f32,,)(tuple, 0); } @@ -174,7 +182,7 @@ svfloat32_t test_svget3_f32(svfloat32x3_t tuple) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv6f64( [[TUPLE:%.*]], i64 4) // CPP-CHECK-NEXT: ret [[TMP0]] // -svfloat64_t test_svget3_f64(svfloat64x3_t tuple) +svfloat64_t test_svget3_f64(svfloat64x3_t tuple) ATTR { return SVE_ACLE_FUNC(svget3,_f64,,)(tuple, 2); } diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get4-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get4-bfloat.c index 454b3bf38bd318ccbe85087c9c16f7803a8478fd..3a5e282bfdfa36c4b6a89fe65097ffcc9175daef 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get4-bfloat.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get4-bfloat.c @@ -3,6 +3,7 @@ // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DTEST_SME -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s // REQUIRES: aarch64-registered-target @@ -15,6 +16,12 @@ #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 #endif +#ifndef TEST_SME +#define ATTR +#else +#define ATTR __arm_streaming +#endif + // CHECK-LABEL: @test_svget4_bf16_0( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[TUPLE:%.*]], i64 0) @@ -25,7 +32,7 @@ // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[TUPLE:%.*]], i64 0) // CPP-CHECK-NEXT: ret [[TMP0]] // -svbfloat16_t test_svget4_bf16_0(svbfloat16x4_t tuple) +svbfloat16_t test_svget4_bf16_0(svbfloat16x4_t tuple) ATTR { return SVE_ACLE_FUNC(svget4,_bf16,,)(tuple, 0); } @@ -40,7 +47,7 @@ svbfloat16_t test_svget4_bf16_0(svbfloat16x4_t tuple) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[TUPLE:%.*]], i64 8) // CPP-CHECK-NEXT: ret [[TMP0]] // -svbfloat16_t test_svget4_bf16_1(svbfloat16x4_t tuple) +svbfloat16_t test_svget4_bf16_1(svbfloat16x4_t tuple) ATTR { return SVE_ACLE_FUNC(svget4,_bf16,,)(tuple, 1); } @@ -55,7 +62,7 @@ svbfloat16_t test_svget4_bf16_1(svbfloat16x4_t tuple) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[TUPLE:%.*]], i64 16) // CPP-CHECK-NEXT: ret [[TMP0]] // -svbfloat16_t test_svget4_bf16_2(svbfloat16x4_t tuple) +svbfloat16_t test_svget4_bf16_2(svbfloat16x4_t tuple) ATTR { return SVE_ACLE_FUNC(svget4,_bf16,,)(tuple, 2); } @@ -70,7 +77,7 @@ svbfloat16_t test_svget4_bf16_2(svbfloat16x4_t tuple) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[TUPLE:%.*]], i64 24) // CPP-CHECK-NEXT: ret [[TMP0]] // -svbfloat16_t test_svget4_bf16_3(svbfloat16x4_t tuple) +svbfloat16_t test_svget4_bf16_3(svbfloat16x4_t tuple) ATTR { return SVE_ACLE_FUNC(svget4,_bf16,,)(tuple, 3); } diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get4.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get4.c index 13f8c2a2906ef786585076163727b7b730bc01c9..9b4f9e5332a57228359b44009b9a16189ba9523b 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get4.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get4.c @@ -5,6 +5,7 @@ // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -DTEST_SME -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include #ifdef SVE_OVERLOADED_FORMS @@ -14,6 +15,12 @@ #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 #endif +#ifndef TEST_SME +#define ATTR +#else +#define ATTR __arm_streaming +#endif + // NOTE: For these tests clang converts the struct parameter into // several parameters, one for each member of the original struct. // CHECK-LABEL: @test_svget4_s8( @@ -26,7 +33,7 @@ // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[TUPLE:%.*]], i64 0) // CPP-CHECK-NEXT: ret [[TMP0]] // -svint8_t test_svget4_s8(svint8x4_t tuple) +svint8_t test_svget4_s8(svint8x4_t tuple) ATTR { return SVE_ACLE_FUNC(svget4,_s8,,)(tuple, 0); } @@ -41,7 +48,7 @@ svint8_t test_svget4_s8(svint8x4_t tuple) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[TUPLE:%.*]], i64 16) // CPP-CHECK-NEXT: ret [[TMP0]] // -svint16_t test_svget4_s16(svint16x4_t tuple) +svint16_t test_svget4_s16(svint16x4_t tuple) ATTR { return SVE_ACLE_FUNC(svget4,_s16,,)(tuple, 2); } @@ -56,7 +63,7 @@ svint16_t test_svget4_s16(svint16x4_t tuple) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[TUPLE:%.*]], i64 8) // CPP-CHECK-NEXT: ret [[TMP0]] // -svint32_t test_svget4_s32(svint32x4_t tuple) +svint32_t test_svget4_s32(svint32x4_t tuple) ATTR { return SVE_ACLE_FUNC(svget4,_s32,,)(tuple, 2); } @@ -71,7 +78,7 @@ svint32_t test_svget4_s32(svint32x4_t tuple) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[TUPLE:%.*]], i64 6) // CPP-CHECK-NEXT: ret [[TMP0]] // -svint64_t test_svget4_s64(svint64x4_t tuple) +svint64_t test_svget4_s64(svint64x4_t tuple) ATTR { return SVE_ACLE_FUNC(svget4,_s64,,)(tuple, 3); } @@ -86,7 +93,7 @@ svint64_t test_svget4_s64(svint64x4_t tuple) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[TUPLE:%.*]], i64 32) // CPP-CHECK-NEXT: ret [[TMP0]] // -svuint8_t test_svget4_u8(svuint8x4_t tuple) +svuint8_t test_svget4_u8(svuint8x4_t tuple) ATTR { return SVE_ACLE_FUNC(svget4,_u8,,)(tuple, 2); } @@ -101,7 +108,7 @@ svuint8_t test_svget4_u8(svuint8x4_t tuple) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[TUPLE:%.*]], i64 24) // CPP-CHECK-NEXT: ret [[TMP0]] // -svuint16_t test_svget4_u16(svuint16x4_t tuple) +svuint16_t test_svget4_u16(svuint16x4_t tuple) ATTR { return SVE_ACLE_FUNC(svget4,_u16,,)(tuple, 3); } @@ -116,7 +123,7 @@ svuint16_t test_svget4_u16(svuint16x4_t tuple) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[TUPLE:%.*]], i64 0) // CPP-CHECK-NEXT: ret [[TMP0]] // -svuint32_t test_svget4_u32(svuint32x4_t tuple) +svuint32_t test_svget4_u32(svuint32x4_t tuple) ATTR { return SVE_ACLE_FUNC(svget4,_u32,,)(tuple, 0); } @@ -131,7 +138,7 @@ svuint32_t test_svget4_u32(svuint32x4_t tuple) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[TUPLE:%.*]], i64 6) // CPP-CHECK-NEXT: ret [[TMP0]] // -svuint64_t test_svget4_u64(svuint64x4_t tuple) +svuint64_t test_svget4_u64(svuint64x4_t tuple) ATTR { return SVE_ACLE_FUNC(svget4,_u64,,)(tuple, 3); } @@ -146,7 +153,7 @@ svuint64_t test_svget4_u64(svuint64x4_t tuple) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[TUPLE:%.*]], i64 16) // CPP-CHECK-NEXT: ret [[TMP0]] // -svfloat16_t test_svget4_f16(svfloat16x4_t tuple) +svfloat16_t test_svget4_f16(svfloat16x4_t tuple) ATTR { return SVE_ACLE_FUNC(svget4,_f16,,)(tuple, 2); } @@ -161,7 +168,7 @@ svfloat16_t test_svget4_f16(svfloat16x4_t tuple) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[TUPLE:%.*]], i64 0) // CPP-CHECK-NEXT: ret [[TMP0]] // -svfloat32_t test_svget4_f32(svfloat32x4_t tuple) +svfloat32_t test_svget4_f32(svfloat32x4_t tuple) ATTR { return SVE_ACLE_FUNC(svget4,_f32,,)(tuple, 0); } @@ -176,7 +183,7 @@ svfloat32_t test_svget4_f32(svfloat32x4_t tuple) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[TUPLE:%.*]], i64 4) // CPP-CHECK-NEXT: ret [[TMP0]] // -svfloat64_t test_svget4_f64(svfloat64x4_t tuple) +svfloat64_t test_svget4_f64(svfloat64x4_t tuple) ATTR { return SVE_ACLE_FUNC(svget4,_f64,,)(tuple, 2); } diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_reinterpret_from_streaming_mode.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_reinterpret_from_streaming_mode.c new file mode 100644 index 0000000000000000000000000000000000000000..b430a0ba3cc75a3f3dc8392c730df33b97fdda88 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_reinterpret_from_streaming_mode.c @@ -0,0 +1,35 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -O1 -Werror -Wall -o /dev/null %s + +// Note: We need to run this test with '-O1' because oddly enough the svreinterpret is always inlined at -O0. + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +// Test that svreinterpret is inlined (because it should be streaming-compatible) +__attribute__((target("sme"))) +// CHECK-LABEL: @test_svreinterpret_s16_s8_from_streaming_mode( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z45test_svreinterpret_s16_s8_from_streaming_modeu10__SVInt8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svint16_t test_svreinterpret_s16_s8_from_streaming_mode(svint8_t op) __arm_streaming { + return SVE_ACLE_FUNC(svreinterpret_s16,_s8,,)(op); +} + diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set2-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set2-bfloat.c index f6217977d26ab916490e32553dd3c9059987b319..a92e5aedb59fd1fc904257f36e8e8478ee9f60ff 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set2-bfloat.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set2-bfloat.c @@ -3,6 +3,7 @@ // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DTEST_SME -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s // REQUIRES: aarch64-registered-target @@ -15,6 +16,12 @@ #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 #endif +#ifndef TEST_SME +#define ATTR +#else +#define ATTR __arm_streaming +#endif + // CHECK-LABEL: @test_svset2_bf16_0( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TUPLE:%.*]], [[X:%.*]], i64 0) @@ -25,7 +32,7 @@ // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TUPLE:%.*]], [[X:%.*]], i64 0) // CPP-CHECK-NEXT: ret [[TMP0]] // -svbfloat16x2_t test_svset2_bf16_0(svbfloat16x2_t tuple, svbfloat16_t x) +svbfloat16x2_t test_svset2_bf16_0(svbfloat16x2_t tuple, svbfloat16_t x) ATTR { return SVE_ACLE_FUNC(svset2,_bf16,,)(tuple, 0, x); } @@ -40,7 +47,7 @@ svbfloat16x2_t test_svset2_bf16_0(svbfloat16x2_t tuple, svbfloat16_t x) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TUPLE:%.*]], [[X:%.*]], i64 8) // CPP-CHECK-NEXT: ret [[TMP0]] // -svbfloat16x2_t test_svset2_bf16_1(svbfloat16x2_t tuple, svbfloat16_t x) +svbfloat16x2_t test_svset2_bf16_1(svbfloat16x2_t tuple, svbfloat16_t x) ATTR { return SVE_ACLE_FUNC(svset2,_bf16,,)(tuple, 1, x); } diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set2.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set2.c index 9ae3011549207cedb993108aaf5f624007eae085..b2bf4ad08aa9ebeba44233b588c566ffa20168fa 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set2.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set2.c @@ -3,6 +3,7 @@ // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DTEST_SME -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s // REQUIRES: aarch64-registered-target @@ -15,6 +16,12 @@ #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 #endif +#ifndef TEST_SME +#define ATTR +#else +#define ATTR __arm_streaming +#endif + // CHECK-LABEL: @test_svset2_s8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TUPLE:%.*]], [[X:%.*]], i64 16) @@ -25,7 +32,7 @@ // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TUPLE:%.*]], [[X:%.*]], i64 16) // CPP-CHECK-NEXT: ret [[TMP0]] // -svint8x2_t test_svset2_s8(svint8x2_t tuple, svint8_t x) +svint8x2_t test_svset2_s8(svint8x2_t tuple, svint8_t x) ATTR { return SVE_ACLE_FUNC(svset2,_s8,,)(tuple, 1, x); } @@ -40,7 +47,7 @@ svint8x2_t test_svset2_s8(svint8x2_t tuple, svint8_t x) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TUPLE:%.*]], [[X:%.*]], i64 0) // CPP-CHECK-NEXT: ret [[TMP0]] // -svint16x2_t test_svset2_s16(svint16x2_t tuple, svint16_t x) +svint16x2_t test_svset2_s16(svint16x2_t tuple, svint16_t x) ATTR { return SVE_ACLE_FUNC(svset2,_s16,,)(tuple, 0, x); } @@ -55,7 +62,7 @@ svint16x2_t test_svset2_s16(svint16x2_t tuple, svint16_t x) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TUPLE:%.*]], [[X:%.*]], i64 4) // CPP-CHECK-NEXT: ret [[TMP0]] // -svint32x2_t test_svset2_s32(svint32x2_t tuple, svint32_t x) +svint32x2_t test_svset2_s32(svint32x2_t tuple, svint32_t x) ATTR { return SVE_ACLE_FUNC(svset2,_s32,,)(tuple, 1, x); } @@ -70,7 +77,7 @@ svint32x2_t test_svset2_s32(svint32x2_t tuple, svint32_t x) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TUPLE:%.*]], [[X:%.*]], i64 0) // CPP-CHECK-NEXT: ret [[TMP0]] // -svint64x2_t test_svset2_s64(svint64x2_t tuple, svint64_t x) +svint64x2_t test_svset2_s64(svint64x2_t tuple, svint64_t x) ATTR { return SVE_ACLE_FUNC(svset2,_s64,,)(tuple, 0, x); } @@ -85,7 +92,7 @@ svint64x2_t test_svset2_s64(svint64x2_t tuple, svint64_t x) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TUPLE:%.*]], [[X:%.*]], i64 16) // CPP-CHECK-NEXT: ret [[TMP0]] // -svuint8x2_t test_svset2_u8(svuint8x2_t tuple, svuint8_t x) +svuint8x2_t test_svset2_u8(svuint8x2_t tuple, svuint8_t x) ATTR { return SVE_ACLE_FUNC(svset2,_u8,,)(tuple, 1, x); } @@ -100,7 +107,7 @@ svuint8x2_t test_svset2_u8(svuint8x2_t tuple, svuint8_t x) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TUPLE:%.*]], [[X:%.*]], i64 0) // CPP-CHECK-NEXT: ret [[TMP0]] // -svuint16x2_t test_svset2_u16(svuint16x2_t tuple, svuint16_t x) +svuint16x2_t test_svset2_u16(svuint16x2_t tuple, svuint16_t x) ATTR { return SVE_ACLE_FUNC(svset2,_u16,,)(tuple, 0, x); } @@ -115,7 +122,7 @@ svuint16x2_t test_svset2_u16(svuint16x2_t tuple, svuint16_t x) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TUPLE:%.*]], [[X:%.*]], i64 4) // CPP-CHECK-NEXT: ret [[TMP0]] // -svuint32x2_t test_svset2_u32(svuint32x2_t tuple, svuint32_t x) +svuint32x2_t test_svset2_u32(svuint32x2_t tuple, svuint32_t x) ATTR { return SVE_ACLE_FUNC(svset2,_u32,,)(tuple, 1, x); } @@ -130,7 +137,7 @@ svuint32x2_t test_svset2_u32(svuint32x2_t tuple, svuint32_t x) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TUPLE:%.*]], [[X:%.*]], i64 0) // CPP-CHECK-NEXT: ret [[TMP0]] // -svuint64x2_t test_svset2_u64(svuint64x2_t tuple, svuint64_t x) +svuint64x2_t test_svset2_u64(svuint64x2_t tuple, svuint64_t x) ATTR { return SVE_ACLE_FUNC(svset2,_u64,,)(tuple, 0, x); } @@ -145,7 +152,7 @@ svuint64x2_t test_svset2_u64(svuint64x2_t tuple, svuint64_t x) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TUPLE:%.*]], [[X:%.*]], i64 8) // CPP-CHECK-NEXT: ret [[TMP0]] // -svfloat16x2_t test_svset2_f16(svfloat16x2_t tuple, svfloat16_t x) +svfloat16x2_t test_svset2_f16(svfloat16x2_t tuple, svfloat16_t x) ATTR { return SVE_ACLE_FUNC(svset2,_f16,,)(tuple, 1, x); } @@ -160,7 +167,7 @@ svfloat16x2_t test_svset2_f16(svfloat16x2_t tuple, svfloat16_t x) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TUPLE:%.*]], [[X:%.*]], i64 0) // CPP-CHECK-NEXT: ret [[TMP0]] // -svfloat32x2_t test_svset2_f32(svfloat32x2_t tuple, svfloat32_t x) +svfloat32x2_t test_svset2_f32(svfloat32x2_t tuple, svfloat32_t x) ATTR { return SVE_ACLE_FUNC(svset2,_f32,,)(tuple, 0, x); } @@ -175,7 +182,7 @@ svfloat32x2_t test_svset2_f32(svfloat32x2_t tuple, svfloat32_t x) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TUPLE:%.*]], [[X:%.*]], i64 2) // CPP-CHECK-NEXT: ret [[TMP0]] // -svfloat64x2_t test_svset2_f64(svfloat64x2_t tuple, svfloat64_t x) +svfloat64x2_t test_svset2_f64(svfloat64x2_t tuple, svfloat64_t x) ATTR { return SVE_ACLE_FUNC(svset2,_f64,,)(tuple, 1, x); } diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set3-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set3-bfloat.c index aeef3ab59ee0dd4b96a861094394d8957e17dfc4..3bc8698ef89b7dce2b7394835428d440adccf17a 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set3-bfloat.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set3-bfloat.c @@ -3,6 +3,7 @@ // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DTEST_SME -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s // REQUIRES: aarch64-registered-target @@ -15,6 +16,11 @@ #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 #endif +#ifndef TEST_SME +#define ATTR +#else +#define ATTR __arm_streaming +#endif // CHECK-LABEL: @test_svset3_bf16_0( // CHECK-NEXT: entry: @@ -26,7 +32,7 @@ // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv24bf16.nxv8bf16( [[TUPLE:%.*]], [[X:%.*]], i64 0) // CPP-CHECK-NEXT: ret [[TMP0]] // -svbfloat16x3_t test_svset3_bf16_0(svbfloat16x3_t tuple, svbfloat16_t x) +svbfloat16x3_t test_svset3_bf16_0(svbfloat16x3_t tuple, svbfloat16_t x) ATTR { return SVE_ACLE_FUNC(svset3,_bf16,,)(tuple, 0, x); } @@ -41,7 +47,7 @@ svbfloat16x3_t test_svset3_bf16_0(svbfloat16x3_t tuple, svbfloat16_t x) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv24bf16.nxv8bf16( [[TUPLE:%.*]], [[X:%.*]], i64 8) // CPP-CHECK-NEXT: ret [[TMP0]] // -svbfloat16x3_t test_svset3_bf16_1(svbfloat16x3_t tuple, svbfloat16_t x) +svbfloat16x3_t test_svset3_bf16_1(svbfloat16x3_t tuple, svbfloat16_t x) ATTR { return SVE_ACLE_FUNC(svset3,_bf16,,)(tuple, 1, x); } @@ -56,7 +62,7 @@ svbfloat16x3_t test_svset3_bf16_1(svbfloat16x3_t tuple, svbfloat16_t x) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv24bf16.nxv8bf16( [[TUPLE:%.*]], [[X:%.*]], i64 16) // CPP-CHECK-NEXT: ret [[TMP0]] // -svbfloat16x3_t test_svset3_bf16_2(svbfloat16x3_t tuple, svbfloat16_t x) +svbfloat16x3_t test_svset3_bf16_2(svbfloat16x3_t tuple, svbfloat16_t x) ATTR { return SVE_ACLE_FUNC(svset3,_bf16,,)(tuple, 2, x); } diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set3.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set3.c index 1b9191cc8a330880d2b18f1df9e39059ceef432f..9d10e6afca93532172914b0fb8c647e4b3dd9713 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set3.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set3.c @@ -3,6 +3,7 @@ // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DTEST_SME -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s // REQUIRES: aarch64-registered-target @@ -15,6 +16,11 @@ #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 #endif +#ifndef TEST_SME +#define ATTR +#else +#define ATTR __arm_streaming +#endif // NOTE: For these tests clang converts the struct parameter into // several parameters, one for each member of the original struct. @@ -28,7 +34,7 @@ // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( [[TUPLE:%.*]], [[X:%.*]], i64 16) // CPP-CHECK-NEXT: ret [[TMP0]] // -svint8x3_t test_svset3_s8(svint8x3_t tuple, svint8_t x) +svint8x3_t test_svset3_s8(svint8x3_t tuple, svint8_t x) ATTR { return SVE_ACLE_FUNC(svset3,_s8,,)(tuple, 1, x); } @@ -43,7 +49,7 @@ svint8x3_t test_svset3_s8(svint8x3_t tuple, svint8_t x) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( [[TUPLE:%.*]], [[X:%.*]], i64 16) // CPP-CHECK-NEXT: ret [[TMP0]] // -svint16x3_t test_svset3_s16(svint16x3_t tuple, svint16_t x) +svint16x3_t test_svset3_s16(svint16x3_t tuple, svint16_t x) ATTR { return SVE_ACLE_FUNC(svset3,_s16,,)(tuple, 2, x); } @@ -58,7 +64,7 @@ svint16x3_t test_svset3_s16(svint16x3_t tuple, svint16_t x) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( [[TUPLE:%.*]], [[X:%.*]], i64 0) // CPP-CHECK-NEXT: ret [[TMP0]] // -svint32x3_t test_svset3_s32(svint32x3_t tuple, svint32_t x) +svint32x3_t test_svset3_s32(svint32x3_t tuple, svint32_t x) ATTR { return SVE_ACLE_FUNC(svset3,_s32,,)(tuple, 0, x); } @@ -73,7 +79,7 @@ svint32x3_t test_svset3_s32(svint32x3_t tuple, svint32_t x) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( [[TUPLE:%.*]], [[X:%.*]], i64 2) // CPP-CHECK-NEXT: ret [[TMP0]] // -svint64x3_t test_svset3_s64(svint64x3_t tuple, svint64_t x) +svint64x3_t test_svset3_s64(svint64x3_t tuple, svint64_t x) ATTR { return SVE_ACLE_FUNC(svset3,_s64,,)(tuple, 1, x); } @@ -88,7 +94,7 @@ svint64x3_t test_svset3_s64(svint64x3_t tuple, svint64_t x) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( [[TUPLE:%.*]], [[X:%.*]], i64 32) // CPP-CHECK-NEXT: ret [[TMP0]] // -svuint8x3_t test_svset3_u8(svuint8x3_t tuple, svuint8_t x) +svuint8x3_t test_svset3_u8(svuint8x3_t tuple, svuint8_t x) ATTR { return SVE_ACLE_FUNC(svset3,_u8,,)(tuple, 2, x); } @@ -103,7 +109,7 @@ svuint8x3_t test_svset3_u8(svuint8x3_t tuple, svuint8_t x) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( [[TUPLE:%.*]], [[X:%.*]], i64 0) // CPP-CHECK-NEXT: ret [[TMP0]] // -svuint16x3_t test_svset3_u16(svuint16x3_t tuple, svuint16_t x) +svuint16x3_t test_svset3_u16(svuint16x3_t tuple, svuint16_t x) ATTR { return SVE_ACLE_FUNC(svset3,_u16,,)(tuple, 0, x); } @@ -118,7 +124,7 @@ svuint16x3_t test_svset3_u16(svuint16x3_t tuple, svuint16_t x) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( [[TUPLE:%.*]], [[X:%.*]], i64 4) // CPP-CHECK-NEXT: ret [[TMP0]] // -svuint32x3_t test_svset3_u32(svuint32x3_t tuple, svuint32_t x) +svuint32x3_t test_svset3_u32(svuint32x3_t tuple, svuint32_t x) ATTR { return SVE_ACLE_FUNC(svset3,_u32,,)(tuple, 1, x); } @@ -133,7 +139,7 @@ svuint32x3_t test_svset3_u32(svuint32x3_t tuple, svuint32_t x) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( [[TUPLE:%.*]], [[X:%.*]], i64 4) // CPP-CHECK-NEXT: ret [[TMP0]] // -svuint64x3_t test_svset3_u64(svuint64x3_t tuple, svuint64_t x) +svuint64x3_t test_svset3_u64(svuint64x3_t tuple, svuint64_t x) ATTR { return SVE_ACLE_FUNC(svset3,_u64,,)(tuple, 2, x); } @@ -148,7 +154,7 @@ svuint64x3_t test_svset3_u64(svuint64x3_t tuple, svuint64_t x) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv24f16.nxv8f16( [[TUPLE:%.*]], [[X:%.*]], i64 0) // CPP-CHECK-NEXT: ret [[TMP0]] // -svfloat16x3_t test_svset3_f16(svfloat16x3_t tuple, svfloat16_t x) +svfloat16x3_t test_svset3_f16(svfloat16x3_t tuple, svfloat16_t x) ATTR { return SVE_ACLE_FUNC(svset3,_f16,,)(tuple, 0, x); } @@ -163,7 +169,7 @@ svfloat16x3_t test_svset3_f16(svfloat16x3_t tuple, svfloat16_t x) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv12f32.nxv4f32( [[TUPLE:%.*]], [[X:%.*]], i64 4) // CPP-CHECK-NEXT: ret [[TMP0]] // -svfloat32x3_t test_svset3_f32(svfloat32x3_t tuple, svfloat32_t x) +svfloat32x3_t test_svset3_f32(svfloat32x3_t tuple, svfloat32_t x) ATTR { return SVE_ACLE_FUNC(svset3,_f32,,)(tuple, 1, x); } @@ -178,7 +184,7 @@ svfloat32x3_t test_svset3_f32(svfloat32x3_t tuple, svfloat32_t x) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv6f64.nxv2f64( [[TUPLE:%.*]], [[X:%.*]], i64 4) // CPP-CHECK-NEXT: ret [[TMP0]] // -svfloat64x3_t test_svset3_f64(svfloat64x3_t tuple, svfloat64_t x) +svfloat64x3_t test_svset3_f64(svfloat64x3_t tuple, svfloat64_t x) ATTR { return SVE_ACLE_FUNC(svset3,_f64,,)(tuple, 2, x); } diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set4-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set4-bfloat.c index 013f591e8d72fc30e7c8a4d0091dbc24729639e9..31ca805dafc741bc1f2e0b584907603310d783c1 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set4-bfloat.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set4-bfloat.c @@ -3,6 +3,7 @@ // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DTEST_SME -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s // REQUIRES: aarch64-registered-target @@ -15,6 +16,11 @@ #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 #endif +#ifndef TEST_SME +#define ATTR +#else +#define ATTR __arm_streaming +#endif // CHECK-LABEL: @test_svset4_bf16_0( // CHECK-NEXT: entry: @@ -26,7 +32,7 @@ // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TUPLE:%.*]], [[X:%.*]], i64 0) // CPP-CHECK-NEXT: ret [[TMP0]] // -svbfloat16x4_t test_svset4_bf16_0(svbfloat16x4_t tuple, svbfloat16_t x) +svbfloat16x4_t test_svset4_bf16_0(svbfloat16x4_t tuple, svbfloat16_t x) ATTR { return SVE_ACLE_FUNC(svset4,_bf16,,)(tuple, 0, x); } @@ -41,7 +47,7 @@ svbfloat16x4_t test_svset4_bf16_0(svbfloat16x4_t tuple, svbfloat16_t x) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TUPLE:%.*]], [[X:%.*]], i64 8) // CPP-CHECK-NEXT: ret [[TMP0]] // -svbfloat16x4_t test_svset4_bf16_1(svbfloat16x4_t tuple, svbfloat16_t x) +svbfloat16x4_t test_svset4_bf16_1(svbfloat16x4_t tuple, svbfloat16_t x) ATTR { return SVE_ACLE_FUNC(svset4,_bf16,,)(tuple, 1, x); } @@ -56,7 +62,7 @@ svbfloat16x4_t test_svset4_bf16_1(svbfloat16x4_t tuple, svbfloat16_t x) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TUPLE:%.*]], [[X:%.*]], i64 16) // CPP-CHECK-NEXT: ret [[TMP0]] // -svbfloat16x4_t test_svset4_bf16_2(svbfloat16x4_t tuple, svbfloat16_t x) +svbfloat16x4_t test_svset4_bf16_2(svbfloat16x4_t tuple, svbfloat16_t x) ATTR { return SVE_ACLE_FUNC(svset4,_bf16,,)(tuple, 2, x); } @@ -71,7 +77,7 @@ svbfloat16x4_t test_svset4_bf16_2(svbfloat16x4_t tuple, svbfloat16_t x) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TUPLE:%.*]], [[X:%.*]], i64 24) // CPP-CHECK-NEXT: ret [[TMP0]] // -svbfloat16x4_t test_svset4_bf16_3(svbfloat16x4_t tuple, svbfloat16_t x) +svbfloat16x4_t test_svset4_bf16_3(svbfloat16x4_t tuple, svbfloat16_t x) ATTR { return SVE_ACLE_FUNC(svset4,_bf16,,)(tuple, 3, x); } diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set4.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set4.c index e4ece8c2a65ff7f5b5ab67e3fb10709eb0a8fa15..ce35bfb83c88d941b45f263f80482886c20e19a9 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set4.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set4.c @@ -3,6 +3,7 @@ // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DTEST_SME -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s // REQUIRES: aarch64-registered-target @@ -15,6 +16,11 @@ #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 #endif +#ifndef TEST_SME +#define ATTR +#else +#define ATTR __arm_streaming +#endif // CHECK-LABEL: @test_svset4_s8( // CHECK-NEXT: entry: @@ -26,7 +32,7 @@ // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TUPLE:%.*]], [[X:%.*]], i64 16) // CPP-CHECK-NEXT: ret [[TMP0]] // -svint8x4_t test_svset4_s8(svint8x4_t tuple, svint8_t x) +svint8x4_t test_svset4_s8(svint8x4_t tuple, svint8_t x) ATTR { return SVE_ACLE_FUNC(svset4,_s8,,)(tuple, 1, x); } @@ -41,7 +47,7 @@ svint8x4_t test_svset4_s8(svint8x4_t tuple, svint8_t x) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TUPLE:%.*]], [[X:%.*]], i64 24) // CPP-CHECK-NEXT: ret [[TMP0]] // -svint16x4_t test_svset4_s16(svint16x4_t tuple, svint16_t x) +svint16x4_t test_svset4_s16(svint16x4_t tuple, svint16_t x) ATTR { return SVE_ACLE_FUNC(svset4,_s16,,)(tuple, 3, x); } @@ -56,7 +62,7 @@ svint16x4_t test_svset4_s16(svint16x4_t tuple, svint16_t x) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TUPLE:%.*]], [[X:%.*]], i64 4) // CPP-CHECK-NEXT: ret [[TMP0]] // -svint32x4_t test_svset4_s32(svint32x4_t tuple, svint32_t x) +svint32x4_t test_svset4_s32(svint32x4_t tuple, svint32_t x) ATTR { return SVE_ACLE_FUNC(svset4,_s32,,)(tuple, 1, x); } @@ -71,7 +77,7 @@ svint32x4_t test_svset4_s32(svint32x4_t tuple, svint32_t x) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TUPLE:%.*]], [[X:%.*]], i64 2) // CPP-CHECK-NEXT: ret [[TMP0]] // -svint64x4_t test_svset4_s64(svint64x4_t tuple, svint64_t x) +svint64x4_t test_svset4_s64(svint64x4_t tuple, svint64_t x) ATTR { return SVE_ACLE_FUNC(svset4,_s64,,)(tuple, 1, x); } @@ -86,7 +92,7 @@ svint64x4_t test_svset4_s64(svint64x4_t tuple, svint64_t x) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TUPLE:%.*]], [[X:%.*]], i64 48) // CPP-CHECK-NEXT: ret [[TMP0]] // -svuint8x4_t test_svset4_u8(svuint8x4_t tuple, svuint8_t x) +svuint8x4_t test_svset4_u8(svuint8x4_t tuple, svuint8_t x) ATTR { return SVE_ACLE_FUNC(svset4,_u8,,)(tuple, 3, x); } @@ -101,7 +107,7 @@ svuint8x4_t test_svset4_u8(svuint8x4_t tuple, svuint8_t x) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TUPLE:%.*]], [[X:%.*]], i64 8) // CPP-CHECK-NEXT: ret [[TMP0]] // -svuint16x4_t test_svset4_u16(svuint16x4_t tuple, svuint16_t x) +svuint16x4_t test_svset4_u16(svuint16x4_t tuple, svuint16_t x) ATTR { return SVE_ACLE_FUNC(svset4,_u16,,)(tuple, 1, x); } @@ -116,7 +122,7 @@ svuint16x4_t test_svset4_u16(svuint16x4_t tuple, svuint16_t x) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TUPLE:%.*]], [[X:%.*]], i64 4) // CPP-CHECK-NEXT: ret [[TMP0]] // -svuint32x4_t test_svset4_u32(svuint32x4_t tuple, svuint32_t x) +svuint32x4_t test_svset4_u32(svuint32x4_t tuple, svuint32_t x) ATTR { return SVE_ACLE_FUNC(svset4,_u32,,)(tuple, 1, x); } @@ -131,7 +137,7 @@ svuint32x4_t test_svset4_u32(svuint32x4_t tuple, svuint32_t x) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TUPLE:%.*]], [[X:%.*]], i64 6) // CPP-CHECK-NEXT: ret [[TMP0]] // -svuint64x4_t test_svset4_u64(svuint64x4_t tuple, svuint64_t x) +svuint64x4_t test_svset4_u64(svuint64x4_t tuple, svuint64_t x) ATTR { return SVE_ACLE_FUNC(svset4,_u64,,)(tuple, 3, x); } @@ -146,7 +152,7 @@ svuint64x4_t test_svset4_u64(svuint64x4_t tuple, svuint64_t x) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TUPLE:%.*]], [[X:%.*]], i64 8) // CPP-CHECK-NEXT: ret [[TMP0]] // -svfloat16x4_t test_svset4_f16(svfloat16x4_t tuple, svfloat16_t x) +svfloat16x4_t test_svset4_f16(svfloat16x4_t tuple, svfloat16_t x) ATTR { return SVE_ACLE_FUNC(svset4,_f16,,)(tuple, 1, x); } @@ -161,7 +167,7 @@ svfloat16x4_t test_svset4_f16(svfloat16x4_t tuple, svfloat16_t x) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TUPLE:%.*]], [[X:%.*]], i64 4) // CPP-CHECK-NEXT: ret [[TMP0]] // -svfloat32x4_t test_svset4_f32(svfloat32x4_t tuple, svfloat32_t x) +svfloat32x4_t test_svset4_f32(svfloat32x4_t tuple, svfloat32_t x) ATTR { return SVE_ACLE_FUNC(svset4,_f32,,)(tuple, 1, x); } @@ -176,7 +182,7 @@ svfloat32x4_t test_svset4_f32(svfloat32x4_t tuple, svfloat32_t x) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TUPLE:%.*]], [[X:%.*]], i64 6) // CPP-CHECK-NEXT: ret [[TMP0]] // -svfloat64x4_t test_svset4_f64(svfloat64x4_t tuple, svfloat64_t x) +svfloat64x4_t test_svset4_f64(svfloat64x4_t tuple, svfloat64_t x) ATTR { return SVE_ACLE_FUNC(svset4,_f64,,)(tuple, 3, x); } diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st2-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st2-bfloat.c index fcfa7eb5768363c7667b740c699001f412644623..a9a285bdbfb8d6349cf00207355f6d9b2f581cff 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st2-bfloat.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st2-bfloat.c @@ -16,18 +16,18 @@ #endif // CHECK-LABEL: define {{[^@]+}}@test_svst2_bf16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[DATA]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv8bf16( [[TMP1]], [[TMP2]], [[TMP0]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[DATA:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[DATA]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv8bf16( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: define {{[^@]+}}@_Z15test_svst2_bf16u10__SVBool_tPu6__bf1614svbfloat16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[DATA]], i64 8) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv8bf16( [[TMP1]], [[TMP2]], [[TMP0]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[DATA:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[DATA]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv8bf16( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst2_bf16(svbool_t pg, bfloat16_t *base, svbfloat16x2_t data) @@ -37,20 +37,20 @@ void test_svst2_bf16(svbool_t pg, bfloat16_t *base, svbfloat16x2_t data) // CHECK-LABEL: define {{[^@]+}}@test_svst2_vnum_bf16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[DATA]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv8bf16( [[TMP2]], [[TMP3]], [[TMP0]], ptr [[TMP1]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[DATA:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[DATA]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv8bf16( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[TMP3]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: define {{[^@]+}}@_Z20test_svst2_vnum_bf16u10__SVBool_tPu6__bf16l14svbfloat16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[DATA]], i64 8) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv8bf16( [[TMP2]], [[TMP3]], [[TMP0]], ptr [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[DATA:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[DATA]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv8bf16( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[TMP3]]) // CPP-CHECK-NEXT: ret void // void test_svst2_vnum_bf16(svbool_t pg, bfloat16_t *base, int64_t vnum, svbfloat16x2_t data) diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st2.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st2.c index 3235a8b9e11155bf248240bb782fe3611049e1b3..dece2ca82c53955377509c9262e406761ade2558 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st2.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st2.c @@ -35,18 +35,18 @@ void test_svst2_s8(svbool_t pg, int8_t *base, svint8x2_t data) // CHECK-LABEL: define {{[^@]+}}@test_svst2_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[DATA]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv8i16( [[TMP1]], [[TMP2]], [[TMP0]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[DATA:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[DATA]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: define {{[^@]+}}@_Z14test_svst2_s16u10__SVBool_tPs11svint16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[DATA]], i64 8) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv8i16( [[TMP1]], [[TMP2]], [[TMP0]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[DATA:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[DATA]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst2_s16(svbool_t pg, int16_t *base, svint16x2_t data) @@ -56,18 +56,18 @@ void test_svst2_s16(svbool_t pg, int16_t *base, svint16x2_t data) // CHECK-LABEL: define {{[^@]+}}@test_svst2_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[DATA]], i64 4) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv4i32( [[TMP1]], [[TMP2]], [[TMP0]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[DATA:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[DATA]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: define {{[^@]+}}@_Z14test_svst2_s32u10__SVBool_tPi11svint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[DATA]], i64 4) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv4i32( [[TMP1]], [[TMP2]], [[TMP0]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[DATA:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[DATA]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst2_s32(svbool_t pg, int32_t *base, svint32x2_t data) @@ -77,18 +77,18 @@ void test_svst2_s32(svbool_t pg, int32_t *base, svint32x2_t data) // CHECK-LABEL: define {{[^@]+}}@test_svst2_s64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[DATA]], i64 2) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv2i64( [[TMP1]], [[TMP2]], [[TMP0]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[DATA:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[DATA]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: define {{[^@]+}}@_Z14test_svst2_s64u10__SVBool_tPl11svint64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[DATA]], i64 2) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv2i64( [[TMP1]], [[TMP2]], [[TMP0]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[DATA:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[DATA]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst2_s64(svbool_t pg, int64_t *base, svint64x2_t data) @@ -117,18 +117,18 @@ void test_svst2_u8(svbool_t pg, uint8_t *base, svuint8x2_t data) // CHECK-LABEL: define {{[^@]+}}@test_svst2_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[DATA]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv8i16( [[TMP1]], [[TMP2]], [[TMP0]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[DATA:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[DATA]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: define {{[^@]+}}@_Z14test_svst2_u16u10__SVBool_tPt12svuint16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[DATA]], i64 8) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv8i16( [[TMP1]], [[TMP2]], [[TMP0]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[DATA:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[DATA]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst2_u16(svbool_t pg, uint16_t *base, svuint16x2_t data) @@ -138,18 +138,18 @@ void test_svst2_u16(svbool_t pg, uint16_t *base, svuint16x2_t data) // CHECK-LABEL: define {{[^@]+}}@test_svst2_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[DATA]], i64 4) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv4i32( [[TMP1]], [[TMP2]], [[TMP0]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[DATA:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[DATA]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: define {{[^@]+}}@_Z14test_svst2_u32u10__SVBool_tPj12svuint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[DATA]], i64 4) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv4i32( [[TMP1]], [[TMP2]], [[TMP0]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[DATA:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[DATA]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst2_u32(svbool_t pg, uint32_t *base, svuint32x2_t data) @@ -159,18 +159,18 @@ void test_svst2_u32(svbool_t pg, uint32_t *base, svuint32x2_t data) // CHECK-LABEL: define {{[^@]+}}@test_svst2_u64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[DATA]], i64 2) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv2i64( [[TMP1]], [[TMP2]], [[TMP0]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[DATA:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[DATA]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: define {{[^@]+}}@_Z14test_svst2_u64u10__SVBool_tPm12svuint64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[DATA]], i64 2) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv2i64( [[TMP1]], [[TMP2]], [[TMP0]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[DATA:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[DATA]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst2_u64(svbool_t pg, uint64_t *base, svuint64x2_t data) @@ -180,18 +180,18 @@ void test_svst2_u64(svbool_t pg, uint64_t *base, svuint64x2_t data) // CHECK-LABEL: define {{[^@]+}}@test_svst2_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[DATA]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv8f16( [[TMP1]], [[TMP2]], [[TMP0]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[DATA:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[DATA]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: define {{[^@]+}}@_Z14test_svst2_f16u10__SVBool_tPDh13svfloat16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[DATA]], i64 8) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv8f16( [[TMP1]], [[TMP2]], [[TMP0]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[DATA:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[DATA]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst2_f16(svbool_t pg, float16_t *base, svfloat16x2_t data) @@ -201,18 +201,18 @@ void test_svst2_f16(svbool_t pg, float16_t *base, svfloat16x2_t data) // CHECK-LABEL: define {{[^@]+}}@test_svst2_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[DATA]], i64 4) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv4f32( [[TMP1]], [[TMP2]], [[TMP0]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[DATA:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[DATA]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: define {{[^@]+}}@_Z14test_svst2_f32u10__SVBool_tPf13svfloat32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[DATA]], i64 4) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv4f32( [[TMP1]], [[TMP2]], [[TMP0]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[DATA:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[DATA]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst2_f32(svbool_t pg, float32_t *base, svfloat32x2_t data) @@ -222,18 +222,18 @@ void test_svst2_f32(svbool_t pg, float32_t *base, svfloat32x2_t data) // CHECK-LABEL: define {{[^@]+}}@test_svst2_f64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[DATA]], i64 2) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv2f64( [[TMP1]], [[TMP2]], [[TMP0]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[DATA:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[DATA]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: define {{[^@]+}}@_Z14test_svst2_f64u10__SVBool_tPd13svfloat64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[DATA]], i64 2) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv2f64( [[TMP1]], [[TMP2]], [[TMP0]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[DATA:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[DATA]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst2_f64(svbool_t pg, float64_t *base, svfloat64x2_t data) @@ -243,18 +243,18 @@ void test_svst2_f64(svbool_t pg, float64_t *base, svfloat64x2_t data) // CHECK-LABEL: define {{[^@]+}}@test_svst2_vnum_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[DATA]], i64 16) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv16i8( [[TMP1]], [[TMP2]], [[PG:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[DATA:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[DATA]], i64 16) +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv16i8( [[TMP0]], [[TMP1]], [[PG:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: define {{[^@]+}}@_Z18test_svst2_vnum_s8u10__SVBool_tPal10svint8x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[DATA]], i64 16) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv16i8( [[TMP1]], [[TMP2]], [[PG:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[DATA:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[DATA]], i64 16) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv16i8( [[TMP0]], [[TMP1]], [[PG:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void // void test_svst2_vnum_s8(svbool_t pg, int8_t *base, int64_t vnum, svint8x2_t data) @@ -264,20 +264,20 @@ void test_svst2_vnum_s8(svbool_t pg, int8_t *base, int64_t vnum, svint8x2_t data // CHECK-LABEL: define {{[^@]+}}@test_svst2_vnum_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[DATA]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv8i16( [[TMP2]], [[TMP3]], [[TMP0]], ptr [[TMP1]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[DATA:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[DATA]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[TMP3]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: define {{[^@]+}}@_Z19test_svst2_vnum_s16u10__SVBool_tPsl11svint16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[DATA]], i64 8) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv8i16( [[TMP2]], [[TMP3]], [[TMP0]], ptr [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[DATA:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[DATA]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[TMP3]]) // CPP-CHECK-NEXT: ret void // void test_svst2_vnum_s16(svbool_t pg, int16_t *base, int64_t vnum, svint16x2_t data) @@ -287,20 +287,20 @@ void test_svst2_vnum_s16(svbool_t pg, int16_t *base, int64_t vnum, svint16x2_t d // CHECK-LABEL: define {{[^@]+}}@test_svst2_vnum_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[DATA]], i64 4) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv4i32( [[TMP2]], [[TMP3]], [[TMP0]], ptr [[TMP1]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[DATA:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[DATA]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[TMP3]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: define {{[^@]+}}@_Z19test_svst2_vnum_s32u10__SVBool_tPil11svint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[DATA]], i64 4) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv4i32( [[TMP2]], [[TMP3]], [[TMP0]], ptr [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[DATA:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[DATA]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[TMP3]]) // CPP-CHECK-NEXT: ret void // void test_svst2_vnum_s32(svbool_t pg, int32_t *base, int64_t vnum, svint32x2_t data) @@ -310,20 +310,20 @@ void test_svst2_vnum_s32(svbool_t pg, int32_t *base, int64_t vnum, svint32x2_t d // CHECK-LABEL: define {{[^@]+}}@test_svst2_vnum_s64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[DATA]], i64 2) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv2i64( [[TMP2]], [[TMP3]], [[TMP0]], ptr [[TMP1]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[DATA:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[DATA]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[TMP3]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: define {{[^@]+}}@_Z19test_svst2_vnum_s64u10__SVBool_tPll11svint64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[DATA]], i64 2) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv2i64( [[TMP2]], [[TMP3]], [[TMP0]], ptr [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[DATA:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[DATA]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[TMP3]]) // CPP-CHECK-NEXT: ret void // void test_svst2_vnum_s64(svbool_t pg, int64_t *base, int64_t vnum, svint64x2_t data) @@ -333,18 +333,18 @@ void test_svst2_vnum_s64(svbool_t pg, int64_t *base, int64_t vnum, svint64x2_t d // CHECK-LABEL: define {{[^@]+}}@test_svst2_vnum_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[DATA]], i64 16) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv16i8( [[TMP1]], [[TMP2]], [[PG:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[DATA:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[DATA]], i64 16) +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv16i8( [[TMP0]], [[TMP1]], [[PG:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: define {{[^@]+}}@_Z18test_svst2_vnum_u8u10__SVBool_tPhl11svuint8x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[DATA]], i64 16) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv16i8( [[TMP1]], [[TMP2]], [[PG:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[DATA:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[DATA]], i64 16) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv16i8( [[TMP0]], [[TMP1]], [[PG:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void // void test_svst2_vnum_u8(svbool_t pg, uint8_t *base, int64_t vnum, svuint8x2_t data) @@ -354,20 +354,20 @@ void test_svst2_vnum_u8(svbool_t pg, uint8_t *base, int64_t vnum, svuint8x2_t da // CHECK-LABEL: define {{[^@]+}}@test_svst2_vnum_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[DATA]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv8i16( [[TMP2]], [[TMP3]], [[TMP0]], ptr [[TMP1]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[DATA:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[DATA]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[TMP3]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: define {{[^@]+}}@_Z19test_svst2_vnum_u16u10__SVBool_tPtl12svuint16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[DATA]], i64 8) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv8i16( [[TMP2]], [[TMP3]], [[TMP0]], ptr [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[DATA:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[DATA]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[TMP3]]) // CPP-CHECK-NEXT: ret void // void test_svst2_vnum_u16(svbool_t pg, uint16_t *base, int64_t vnum, svuint16x2_t data) @@ -377,20 +377,20 @@ void test_svst2_vnum_u16(svbool_t pg, uint16_t *base, int64_t vnum, svuint16x2_t // CHECK-LABEL: define {{[^@]+}}@test_svst2_vnum_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[DATA]], i64 4) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv4i32( [[TMP2]], [[TMP3]], [[TMP0]], ptr [[TMP1]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[DATA:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[DATA]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[TMP3]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: define {{[^@]+}}@_Z19test_svst2_vnum_u32u10__SVBool_tPjl12svuint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[DATA]], i64 4) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv4i32( [[TMP2]], [[TMP3]], [[TMP0]], ptr [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[DATA:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[DATA]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[TMP3]]) // CPP-CHECK-NEXT: ret void // void test_svst2_vnum_u32(svbool_t pg, uint32_t *base, int64_t vnum, svuint32x2_t data) @@ -400,20 +400,20 @@ void test_svst2_vnum_u32(svbool_t pg, uint32_t *base, int64_t vnum, svuint32x2_t // CHECK-LABEL: define {{[^@]+}}@test_svst2_vnum_u64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[DATA]], i64 2) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv2i64( [[TMP2]], [[TMP3]], [[TMP0]], ptr [[TMP1]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[DATA:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[DATA]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[TMP3]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: define {{[^@]+}}@_Z19test_svst2_vnum_u64u10__SVBool_tPml12svuint64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[DATA]], i64 2) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv2i64( [[TMP2]], [[TMP3]], [[TMP0]], ptr [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[DATA:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[DATA]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[TMP3]]) // CPP-CHECK-NEXT: ret void // void test_svst2_vnum_u64(svbool_t pg, uint64_t *base, int64_t vnum, svuint64x2_t data) @@ -423,20 +423,20 @@ void test_svst2_vnum_u64(svbool_t pg, uint64_t *base, int64_t vnum, svuint64x2_t // CHECK-LABEL: define {{[^@]+}}@test_svst2_vnum_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[DATA]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv8f16( [[TMP2]], [[TMP3]], [[TMP0]], ptr [[TMP1]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[DATA:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[DATA]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[TMP3]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: define {{[^@]+}}@_Z19test_svst2_vnum_f16u10__SVBool_tPDhl13svfloat16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[DATA]], i64 8) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv8f16( [[TMP2]], [[TMP3]], [[TMP0]], ptr [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[DATA:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[DATA]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[TMP3]]) // CPP-CHECK-NEXT: ret void // void test_svst2_vnum_f16(svbool_t pg, float16_t *base, int64_t vnum, svfloat16x2_t data) @@ -446,20 +446,20 @@ void test_svst2_vnum_f16(svbool_t pg, float16_t *base, int64_t vnum, svfloat16x2 // CHECK-LABEL: define {{[^@]+}}@test_svst2_vnum_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[DATA]], i64 4) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv4f32( [[TMP2]], [[TMP3]], [[TMP0]], ptr [[TMP1]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[DATA:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[DATA]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[TMP3]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: define {{[^@]+}}@_Z19test_svst2_vnum_f32u10__SVBool_tPfl13svfloat32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[DATA]], i64 4) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv4f32( [[TMP2]], [[TMP3]], [[TMP0]], ptr [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[DATA:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[DATA]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[TMP3]]) // CPP-CHECK-NEXT: ret void // void test_svst2_vnum_f32(svbool_t pg, float32_t *base, int64_t vnum, svfloat32x2_t data) @@ -469,20 +469,20 @@ void test_svst2_vnum_f32(svbool_t pg, float32_t *base, int64_t vnum, svfloat32x2 // CHECK-LABEL: define {{[^@]+}}@test_svst2_vnum_f64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[DATA]], i64 2) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv2f64( [[TMP2]], [[TMP3]], [[TMP0]], ptr [[TMP1]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[DATA:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[DATA]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[TMP3]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: define {{[^@]+}}@_Z19test_svst2_vnum_f64u10__SVBool_tPdl13svfloat64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[DATA]], i64 2) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv2f64( [[TMP2]], [[TMP3]], [[TMP0]], ptr [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[DATA:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[DATA]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[TMP3]]) // CPP-CHECK-NEXT: ret void // void test_svst2_vnum_f64(svbool_t pg, float64_t *base, int64_t vnum, svfloat64x2_t data) diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st3-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st3-bfloat.c index 096699191e88492f5f9fac6a67f23f7bcd9fea41..ffd24a45b94e5dc928b648d1dc5b819b0a93af9e 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st3-bfloat.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st3-bfloat.c @@ -17,20 +17,20 @@ // CHECK-LABEL: define {{[^@]+}}@test_svst3_bf16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv24bf16( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv24bf16( [[DATA]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv24bf16( [[DATA]], i64 16) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv8bf16( [[TMP1]], [[TMP2]], [[TMP3]], [[TMP0]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv24bf16( [[DATA:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv24bf16( [[DATA]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv24bf16( [[DATA]], i64 16) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv8bf16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: define {{[^@]+}}@_Z15test_svst3_bf16u10__SVBool_tPu6__bf1614svbfloat16x3_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv24bf16( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv24bf16( [[DATA]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv24bf16( [[DATA]], i64 16) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv8bf16( [[TMP1]], [[TMP2]], [[TMP3]], [[TMP0]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv24bf16( [[DATA:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv24bf16( [[DATA]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv24bf16( [[DATA]], i64 16) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv8bf16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst3_bf16(svbool_t pg, bfloat16_t *base, svbfloat16x3_t data) @@ -40,22 +40,22 @@ void test_svst3_bf16(svbool_t pg, bfloat16_t *base, svbfloat16x3_t data) // CHECK-LABEL: define {{[^@]+}}@test_svst3_vnum_bf16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv24bf16( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv24bf16( [[DATA]], i64 8) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv24bf16( [[DATA]], i64 16) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv8bf16( [[TMP2]], [[TMP3]], [[TMP4]], [[TMP0]], ptr [[TMP1]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv24bf16( [[DATA:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv24bf16( [[DATA]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv24bf16( [[DATA]], i64 16) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv8bf16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[TMP4]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: define {{[^@]+}}@_Z20test_svst3_vnum_bf16u10__SVBool_tPu6__bf16l14svbfloat16x3_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv24bf16( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv24bf16( [[DATA]], i64 8) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv24bf16( [[DATA]], i64 16) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv8bf16( [[TMP2]], [[TMP3]], [[TMP4]], [[TMP0]], ptr [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv24bf16( [[DATA:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv24bf16( [[DATA]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv24bf16( [[DATA]], i64 16) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv8bf16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[TMP4]]) // CPP-CHECK-NEXT: ret void // void test_svst3_vnum_bf16(svbool_t pg, bfloat16_t *base, int64_t vnum, svbfloat16x3_t data) diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st3.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st3.c index a962f0734dc9a2625ef050259dc25b980df91874..d52ff0b6db8858a5d74ed84cf49b9cacb5a58684 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st3.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st3.c @@ -37,20 +37,20 @@ void test_svst3_s8(svbool_t pg, int8_t *base, svint8x3_t data) // CHECK-LABEL: define {{[^@]+}}@test_svst3_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[DATA]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[DATA]], i64 16) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv8i16( [[TMP1]], [[TMP2]], [[TMP3]], [[TMP0]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[DATA:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[DATA]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[DATA]], i64 16) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: define {{[^@]+}}@_Z14test_svst3_s16u10__SVBool_tPs11svint16x3_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[DATA]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[DATA]], i64 16) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv8i16( [[TMP1]], [[TMP2]], [[TMP3]], [[TMP0]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[DATA:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[DATA]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[DATA]], i64 16) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst3_s16(svbool_t pg, int16_t *base, svint16x3_t data) @@ -60,20 +60,20 @@ void test_svst3_s16(svbool_t pg, int16_t *base, svint16x3_t data) // CHECK-LABEL: define {{[^@]+}}@test_svst3_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[DATA]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[DATA]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv4i32( [[TMP1]], [[TMP2]], [[TMP3]], [[TMP0]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[DATA:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[DATA]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[DATA]], i64 8) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: define {{[^@]+}}@_Z14test_svst3_s32u10__SVBool_tPi11svint32x3_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[DATA]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[DATA]], i64 8) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv4i32( [[TMP1]], [[TMP2]], [[TMP3]], [[TMP0]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[DATA:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[DATA]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[DATA]], i64 8) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst3_s32(svbool_t pg, int32_t *base, svint32x3_t data) @@ -83,20 +83,20 @@ void test_svst3_s32(svbool_t pg, int32_t *base, svint32x3_t data) // CHECK-LABEL: define {{[^@]+}}@test_svst3_s64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[DATA]], i64 2) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[DATA]], i64 4) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv2i64( [[TMP1]], [[TMP2]], [[TMP3]], [[TMP0]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[DATA:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[DATA]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[DATA]], i64 4) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: define {{[^@]+}}@_Z14test_svst3_s64u10__SVBool_tPl11svint64x3_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[DATA]], i64 2) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[DATA]], i64 4) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv2i64( [[TMP1]], [[TMP2]], [[TMP3]], [[TMP0]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[DATA:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[DATA]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[DATA]], i64 4) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst3_s64(svbool_t pg, int64_t *base, svint64x3_t data) @@ -127,20 +127,20 @@ void test_svst3_u8(svbool_t pg, uint8_t *base, svuint8x3_t data) // CHECK-LABEL: define {{[^@]+}}@test_svst3_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[DATA]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[DATA]], i64 16) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv8i16( [[TMP1]], [[TMP2]], [[TMP3]], [[TMP0]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[DATA:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[DATA]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[DATA]], i64 16) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: define {{[^@]+}}@_Z14test_svst3_u16u10__SVBool_tPt12svuint16x3_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[DATA]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[DATA]], i64 16) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv8i16( [[TMP1]], [[TMP2]], [[TMP3]], [[TMP0]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[DATA:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[DATA]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[DATA]], i64 16) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst3_u16(svbool_t pg, uint16_t *base, svuint16x3_t data) @@ -150,20 +150,20 @@ void test_svst3_u16(svbool_t pg, uint16_t *base, svuint16x3_t data) // CHECK-LABEL: define {{[^@]+}}@test_svst3_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[DATA]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[DATA]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv4i32( [[TMP1]], [[TMP2]], [[TMP3]], [[TMP0]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[DATA:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[DATA]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[DATA]], i64 8) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: define {{[^@]+}}@_Z14test_svst3_u32u10__SVBool_tPj12svuint32x3_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[DATA]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[DATA]], i64 8) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv4i32( [[TMP1]], [[TMP2]], [[TMP3]], [[TMP0]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[DATA:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[DATA]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[DATA]], i64 8) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst3_u32(svbool_t pg, uint32_t *base, svuint32x3_t data) @@ -173,20 +173,20 @@ void test_svst3_u32(svbool_t pg, uint32_t *base, svuint32x3_t data) // CHECK-LABEL: define {{[^@]+}}@test_svst3_u64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[DATA]], i64 2) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[DATA]], i64 4) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv2i64( [[TMP1]], [[TMP2]], [[TMP3]], [[TMP0]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[DATA:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[DATA]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[DATA]], i64 4) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: define {{[^@]+}}@_Z14test_svst3_u64u10__SVBool_tPm12svuint64x3_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[DATA]], i64 2) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[DATA]], i64 4) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv2i64( [[TMP1]], [[TMP2]], [[TMP3]], [[TMP0]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[DATA:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[DATA]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[DATA]], i64 4) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst3_u64(svbool_t pg, uint64_t *base, svuint64x3_t data) @@ -196,20 +196,20 @@ void test_svst3_u64(svbool_t pg, uint64_t *base, svuint64x3_t data) // CHECK-LABEL: define {{[^@]+}}@test_svst3_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv24f16( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv24f16( [[DATA]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv24f16( [[DATA]], i64 16) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv8f16( [[TMP1]], [[TMP2]], [[TMP3]], [[TMP0]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv24f16( [[DATA:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv24f16( [[DATA]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv24f16( [[DATA]], i64 16) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: define {{[^@]+}}@_Z14test_svst3_f16u10__SVBool_tPDh13svfloat16x3_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv24f16( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv24f16( [[DATA]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv24f16( [[DATA]], i64 16) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv8f16( [[TMP1]], [[TMP2]], [[TMP3]], [[TMP0]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv24f16( [[DATA:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv24f16( [[DATA]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv24f16( [[DATA]], i64 16) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst3_f16(svbool_t pg, float16_t *base, svfloat16x3_t data) @@ -219,20 +219,20 @@ void test_svst3_f16(svbool_t pg, float16_t *base, svfloat16x3_t data) // CHECK-LABEL: define {{[^@]+}}@test_svst3_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv12f32( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv12f32( [[DATA]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv12f32( [[DATA]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv4f32( [[TMP1]], [[TMP2]], [[TMP3]], [[TMP0]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv12f32( [[DATA:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv12f32( [[DATA]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv12f32( [[DATA]], i64 8) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: define {{[^@]+}}@_Z14test_svst3_f32u10__SVBool_tPf13svfloat32x3_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv12f32( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv12f32( [[DATA]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv12f32( [[DATA]], i64 8) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv4f32( [[TMP1]], [[TMP2]], [[TMP3]], [[TMP0]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv12f32( [[DATA:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv12f32( [[DATA]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv12f32( [[DATA]], i64 8) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst3_f32(svbool_t pg, float32_t *base, svfloat32x3_t data) @@ -242,20 +242,20 @@ void test_svst3_f32(svbool_t pg, float32_t *base, svfloat32x3_t data) // CHECK-LABEL: define {{[^@]+}}@test_svst3_f64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv6f64( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv6f64( [[DATA]], i64 2) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv6f64( [[DATA]], i64 4) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv2f64( [[TMP1]], [[TMP2]], [[TMP3]], [[TMP0]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv6f64( [[DATA:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv6f64( [[DATA]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv6f64( [[DATA]], i64 4) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: define {{[^@]+}}@_Z14test_svst3_f64u10__SVBool_tPd13svfloat64x3_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv6f64( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv6f64( [[DATA]], i64 2) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv6f64( [[DATA]], i64 4) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv2f64( [[TMP1]], [[TMP2]], [[TMP3]], [[TMP0]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv6f64( [[DATA:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv6f64( [[DATA]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv6f64( [[DATA]], i64 4) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst3_f64(svbool_t pg, float64_t *base, svfloat64x3_t data) @@ -265,20 +265,20 @@ void test_svst3_f64(svbool_t pg, float64_t *base, svfloat64x3_t data) // CHECK-LABEL: define {{[^@]+}}@test_svst3_vnum_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[DATA]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[DATA]], i64 32) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv16i8( [[TMP1]], [[TMP2]], [[TMP3]], [[PG:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[DATA:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[DATA]], i64 16) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[DATA]], i64 32) +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[PG:%.*]], ptr [[TMP3]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: define {{[^@]+}}@_Z18test_svst3_vnum_s8u10__SVBool_tPal10svint8x3_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[DATA]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[DATA]], i64 32) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv16i8( [[TMP1]], [[TMP2]], [[TMP3]], [[PG:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[DATA:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[DATA]], i64 16) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[DATA]], i64 32) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[PG:%.*]], ptr [[TMP3]]) // CPP-CHECK-NEXT: ret void // void test_svst3_vnum_s8(svbool_t pg, int8_t *base, int64_t vnum, svint8x3_t data) @@ -288,22 +288,22 @@ void test_svst3_vnum_s8(svbool_t pg, int8_t *base, int64_t vnum, svint8x3_t data // CHECK-LABEL: define {{[^@]+}}@test_svst3_vnum_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[DATA]], i64 8) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[DATA]], i64 16) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv8i16( [[TMP2]], [[TMP3]], [[TMP4]], [[TMP0]], ptr [[TMP1]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[DATA:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[DATA]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[DATA]], i64 16) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[TMP4]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: define {{[^@]+}}@_Z19test_svst3_vnum_s16u10__SVBool_tPsl11svint16x3_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[DATA]], i64 8) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[DATA]], i64 16) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv8i16( [[TMP2]], [[TMP3]], [[TMP4]], [[TMP0]], ptr [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[DATA:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[DATA]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[DATA]], i64 16) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[TMP4]]) // CPP-CHECK-NEXT: ret void // void test_svst3_vnum_s16(svbool_t pg, int16_t *base, int64_t vnum, svint16x3_t data) @@ -313,22 +313,22 @@ void test_svst3_vnum_s16(svbool_t pg, int16_t *base, int64_t vnum, svint16x3_t d // CHECK-LABEL: define {{[^@]+}}@test_svst3_vnum_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[DATA]], i64 4) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[DATA]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv4i32( [[TMP2]], [[TMP3]], [[TMP4]], [[TMP0]], ptr [[TMP1]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[DATA:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[DATA]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[DATA]], i64 8) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[TMP4]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: define {{[^@]+}}@_Z19test_svst3_vnum_s32u10__SVBool_tPil11svint32x3_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[DATA]], i64 4) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[DATA]], i64 8) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv4i32( [[TMP2]], [[TMP3]], [[TMP4]], [[TMP0]], ptr [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[DATA:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[DATA]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[DATA]], i64 8) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[TMP4]]) // CPP-CHECK-NEXT: ret void // void test_svst3_vnum_s32(svbool_t pg, int32_t *base, int64_t vnum, svint32x3_t data) @@ -338,22 +338,22 @@ void test_svst3_vnum_s32(svbool_t pg, int32_t *base, int64_t vnum, svint32x3_t d // CHECK-LABEL: define {{[^@]+}}@test_svst3_vnum_s64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[DATA]], i64 2) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[DATA]], i64 4) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv2i64( [[TMP2]], [[TMP3]], [[TMP4]], [[TMP0]], ptr [[TMP1]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[DATA:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[DATA]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[DATA]], i64 4) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[TMP4]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: define {{[^@]+}}@_Z19test_svst3_vnum_s64u10__SVBool_tPll11svint64x3_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[DATA]], i64 2) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[DATA]], i64 4) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv2i64( [[TMP2]], [[TMP3]], [[TMP4]], [[TMP0]], ptr [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[DATA:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[DATA]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[DATA]], i64 4) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[TMP4]]) // CPP-CHECK-NEXT: ret void // void test_svst3_vnum_s64(svbool_t pg, int64_t *base, int64_t vnum, svint64x3_t data) @@ -363,20 +363,20 @@ void test_svst3_vnum_s64(svbool_t pg, int64_t *base, int64_t vnum, svint64x3_t d // CHECK-LABEL: define {{[^@]+}}@test_svst3_vnum_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[DATA]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[DATA]], i64 32) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv16i8( [[TMP1]], [[TMP2]], [[TMP3]], [[PG:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[DATA:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[DATA]], i64 16) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[DATA]], i64 32) +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[PG:%.*]], ptr [[TMP3]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: define {{[^@]+}}@_Z18test_svst3_vnum_u8u10__SVBool_tPhl11svuint8x3_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[DATA]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[DATA]], i64 32) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv16i8( [[TMP1]], [[TMP2]], [[TMP3]], [[PG:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[DATA:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[DATA]], i64 16) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[DATA]], i64 32) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[PG:%.*]], ptr [[TMP3]]) // CPP-CHECK-NEXT: ret void // void test_svst3_vnum_u8(svbool_t pg, uint8_t *base, int64_t vnum, svuint8x3_t data) @@ -386,22 +386,22 @@ void test_svst3_vnum_u8(svbool_t pg, uint8_t *base, int64_t vnum, svuint8x3_t da // CHECK-LABEL: define {{[^@]+}}@test_svst3_vnum_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[DATA]], i64 8) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[DATA]], i64 16) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv8i16( [[TMP2]], [[TMP3]], [[TMP4]], [[TMP0]], ptr [[TMP1]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[DATA:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[DATA]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[DATA]], i64 16) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[TMP4]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: define {{[^@]+}}@_Z19test_svst3_vnum_u16u10__SVBool_tPtl12svuint16x3_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[DATA]], i64 8) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[DATA]], i64 16) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv8i16( [[TMP2]], [[TMP3]], [[TMP4]], [[TMP0]], ptr [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[DATA:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[DATA]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[DATA]], i64 16) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[TMP4]]) // CPP-CHECK-NEXT: ret void // void test_svst3_vnum_u16(svbool_t pg, uint16_t *base, int64_t vnum, svuint16x3_t data) @@ -411,22 +411,22 @@ void test_svst3_vnum_u16(svbool_t pg, uint16_t *base, int64_t vnum, svuint16x3_t // CHECK-LABEL: define {{[^@]+}}@test_svst3_vnum_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[DATA]], i64 4) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[DATA]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv4i32( [[TMP2]], [[TMP3]], [[TMP4]], [[TMP0]], ptr [[TMP1]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[DATA:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[DATA]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[DATA]], i64 8) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[TMP4]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: define {{[^@]+}}@_Z19test_svst3_vnum_u32u10__SVBool_tPjl12svuint32x3_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[DATA]], i64 4) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[DATA]], i64 8) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv4i32( [[TMP2]], [[TMP3]], [[TMP4]], [[TMP0]], ptr [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[DATA:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[DATA]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[DATA]], i64 8) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[TMP4]]) // CPP-CHECK-NEXT: ret void // void test_svst3_vnum_u32(svbool_t pg, uint32_t *base, int64_t vnum, svuint32x3_t data) @@ -436,22 +436,22 @@ void test_svst3_vnum_u32(svbool_t pg, uint32_t *base, int64_t vnum, svuint32x3_t // CHECK-LABEL: define {{[^@]+}}@test_svst3_vnum_u64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[DATA]], i64 2) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[DATA]], i64 4) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv2i64( [[TMP2]], [[TMP3]], [[TMP4]], [[TMP0]], ptr [[TMP1]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[DATA:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[DATA]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[DATA]], i64 4) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[TMP4]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: define {{[^@]+}}@_Z19test_svst3_vnum_u64u10__SVBool_tPml12svuint64x3_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[DATA]], i64 2) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[DATA]], i64 4) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv2i64( [[TMP2]], [[TMP3]], [[TMP4]], [[TMP0]], ptr [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[DATA:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[DATA]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[DATA]], i64 4) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[TMP4]]) // CPP-CHECK-NEXT: ret void // void test_svst3_vnum_u64(svbool_t pg, uint64_t *base, int64_t vnum, svuint64x3_t data) @@ -461,22 +461,22 @@ void test_svst3_vnum_u64(svbool_t pg, uint64_t *base, int64_t vnum, svuint64x3_t // CHECK-LABEL: define {{[^@]+}}@test_svst3_vnum_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv24f16( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv24f16( [[DATA]], i64 8) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv24f16( [[DATA]], i64 16) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv8f16( [[TMP2]], [[TMP3]], [[TMP4]], [[TMP0]], ptr [[TMP1]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv24f16( [[DATA:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv24f16( [[DATA]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv24f16( [[DATA]], i64 16) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[TMP4]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: define {{[^@]+}}@_Z19test_svst3_vnum_f16u10__SVBool_tPDhl13svfloat16x3_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv24f16( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv24f16( [[DATA]], i64 8) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv24f16( [[DATA]], i64 16) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv8f16( [[TMP2]], [[TMP3]], [[TMP4]], [[TMP0]], ptr [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv24f16( [[DATA:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv24f16( [[DATA]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv24f16( [[DATA]], i64 16) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[TMP4]]) // CPP-CHECK-NEXT: ret void // void test_svst3_vnum_f16(svbool_t pg, float16_t *base, int64_t vnum, svfloat16x3_t data) @@ -486,22 +486,22 @@ void test_svst3_vnum_f16(svbool_t pg, float16_t *base, int64_t vnum, svfloat16x3 // CHECK-LABEL: define {{[^@]+}}@test_svst3_vnum_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv12f32( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv12f32( [[DATA]], i64 4) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv12f32( [[DATA]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv4f32( [[TMP2]], [[TMP3]], [[TMP4]], [[TMP0]], ptr [[TMP1]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv12f32( [[DATA:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv12f32( [[DATA]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv12f32( [[DATA]], i64 8) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[TMP4]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: define {{[^@]+}}@_Z19test_svst3_vnum_f32u10__SVBool_tPfl13svfloat32x3_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv12f32( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv12f32( [[DATA]], i64 4) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv12f32( [[DATA]], i64 8) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv4f32( [[TMP2]], [[TMP3]], [[TMP4]], [[TMP0]], ptr [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv12f32( [[DATA:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv12f32( [[DATA]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv12f32( [[DATA]], i64 8) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[TMP4]]) // CPP-CHECK-NEXT: ret void // void test_svst3_vnum_f32(svbool_t pg, float32_t *base, int64_t vnum, svfloat32x3_t data) @@ -511,22 +511,22 @@ void test_svst3_vnum_f32(svbool_t pg, float32_t *base, int64_t vnum, svfloat32x3 // CHECK-LABEL: define {{[^@]+}}@test_svst3_vnum_f64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv6f64( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv6f64( [[DATA]], i64 2) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv6f64( [[DATA]], i64 4) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv2f64( [[TMP2]], [[TMP3]], [[TMP4]], [[TMP0]], ptr [[TMP1]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv6f64( [[DATA:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv6f64( [[DATA]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv6f64( [[DATA]], i64 4) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[TMP4]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: define {{[^@]+}}@_Z19test_svst3_vnum_f64u10__SVBool_tPdl13svfloat64x3_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv6f64( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv6f64( [[DATA]], i64 2) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv6f64( [[DATA]], i64 4) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv2f64( [[TMP2]], [[TMP3]], [[TMP4]], [[TMP0]], ptr [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv6f64( [[DATA:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv6f64( [[DATA]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv6f64( [[DATA]], i64 4) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[TMP4]]) // CPP-CHECK-NEXT: ret void // void test_svst3_vnum_f64(svbool_t pg, float64_t *base, int64_t vnum, svfloat64x3_t data) diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st4-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st4-bfloat.c index 484fc6333b577d98b0e403746181cc2fcbbfd9b5..80efdf9df65c92edc1439ee9be62782b5d56dd96 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st4-bfloat.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st4-bfloat.c @@ -17,22 +17,22 @@ // CHECK-LABEL: define {{[^@]+}}@test_svst4_bf16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[DATA]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[DATA]], i64 16) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[DATA]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv8bf16( [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP0]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[DATA:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[DATA]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[DATA]], i64 16) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[DATA]], i64 24) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv8bf16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: define {{[^@]+}}@_Z15test_svst4_bf16u10__SVBool_tPu6__bf1614svbfloat16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[DATA]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[DATA]], i64 16) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[DATA]], i64 24) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv8bf16( [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP0]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[DATA:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[DATA]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[DATA]], i64 16) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[DATA]], i64 24) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv8bf16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst4_bf16(svbool_t pg, bfloat16_t *base, svbfloat16x4_t data) @@ -42,24 +42,24 @@ void test_svst4_bf16(svbool_t pg, bfloat16_t *base, svbfloat16x4_t data) // CHECK-LABEL: define {{[^@]+}}@test_svst4_vnum_bf16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[DATA]], i64 8) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[DATA]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[DATA]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv8bf16( [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP0]], ptr [[TMP1]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[DATA:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[DATA]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[DATA]], i64 16) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[DATA]], i64 24) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv8bf16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP5]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: define {{[^@]+}}@_Z20test_svst4_vnum_bf16u10__SVBool_tPu6__bf16l14svbfloat16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[DATA]], i64 8) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[DATA]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[DATA]], i64 24) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv8bf16( [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP0]], ptr [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[DATA:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[DATA]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[DATA]], i64 16) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[DATA]], i64 24) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv8bf16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP5]]) // CPP-CHECK-NEXT: ret void // void test_svst4_vnum_bf16(svbool_t pg, bfloat16_t *base, int64_t vnum, svbfloat16x4_t data) diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st4.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st4.c index 774460c9151926a77161c87599aa1afe6f41e976..c61e78a941b228d304bf97fe1886fa797f6c9552 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st4.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st4.c @@ -39,22 +39,22 @@ void test_svst4_s8(svbool_t pg, int8_t *base, svint8x4_t data) // CHECK-LABEL: define {{[^@]+}}@test_svst4_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[DATA]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[DATA]], i64 16) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[DATA]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv8i16( [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP0]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[DATA:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[DATA]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[DATA]], i64 16) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[DATA]], i64 24) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: define {{[^@]+}}@_Z14test_svst4_s16u10__SVBool_tPs11svint16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[DATA]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[DATA]], i64 16) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[DATA]], i64 24) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv8i16( [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP0]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[DATA:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[DATA]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[DATA]], i64 16) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[DATA]], i64 24) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst4_s16(svbool_t pg, int16_t *base, svint16x4_t data) @@ -64,22 +64,22 @@ void test_svst4_s16(svbool_t pg, int16_t *base, svint16x4_t data) // CHECK-LABEL: define {{[^@]+}}@test_svst4_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[DATA]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[DATA]], i64 8) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[DATA]], i64 12) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv4i32( [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP0]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[DATA:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[DATA]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[DATA]], i64 8) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[DATA]], i64 12) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: define {{[^@]+}}@_Z14test_svst4_s32u10__SVBool_tPi11svint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[DATA]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[DATA]], i64 8) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[DATA]], i64 12) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv4i32( [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP0]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[DATA:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[DATA]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[DATA]], i64 8) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[DATA]], i64 12) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst4_s32(svbool_t pg, int32_t *base, svint32x4_t data) @@ -89,22 +89,22 @@ void test_svst4_s32(svbool_t pg, int32_t *base, svint32x4_t data) // CHECK-LABEL: define {{[^@]+}}@test_svst4_s64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[DATA]], i64 2) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[DATA]], i64 4) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[DATA]], i64 6) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv2i64( [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP0]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[DATA:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[DATA]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[DATA]], i64 4) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[DATA]], i64 6) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: define {{[^@]+}}@_Z14test_svst4_s64u10__SVBool_tPl11svint64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[DATA]], i64 2) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[DATA]], i64 4) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[DATA]], i64 6) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv2i64( [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP0]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[DATA:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[DATA]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[DATA]], i64 4) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[DATA]], i64 6) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst4_s64(svbool_t pg, int64_t *base, svint64x4_t data) @@ -137,22 +137,22 @@ void test_svst4_u8(svbool_t pg, uint8_t *base, svuint8x4_t data) // CHECK-LABEL: define {{[^@]+}}@test_svst4_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[DATA]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[DATA]], i64 16) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[DATA]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv8i16( [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP0]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[DATA:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[DATA]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[DATA]], i64 16) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[DATA]], i64 24) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: define {{[^@]+}}@_Z14test_svst4_u16u10__SVBool_tPt12svuint16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[DATA]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[DATA]], i64 16) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[DATA]], i64 24) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv8i16( [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP0]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[DATA:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[DATA]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[DATA]], i64 16) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[DATA]], i64 24) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst4_u16(svbool_t pg, uint16_t *base, svuint16x4_t data) @@ -162,22 +162,22 @@ void test_svst4_u16(svbool_t pg, uint16_t *base, svuint16x4_t data) // CHECK-LABEL: define {{[^@]+}}@test_svst4_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[DATA]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[DATA]], i64 8) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[DATA]], i64 12) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv4i32( [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP0]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[DATA:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[DATA]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[DATA]], i64 8) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[DATA]], i64 12) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: define {{[^@]+}}@_Z14test_svst4_u32u10__SVBool_tPj12svuint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[DATA]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[DATA]], i64 8) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[DATA]], i64 12) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv4i32( [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP0]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[DATA:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[DATA]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[DATA]], i64 8) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[DATA]], i64 12) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst4_u32(svbool_t pg, uint32_t *base, svuint32x4_t data) @@ -187,22 +187,22 @@ void test_svst4_u32(svbool_t pg, uint32_t *base, svuint32x4_t data) // CHECK-LABEL: define {{[^@]+}}@test_svst4_u64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[DATA]], i64 2) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[DATA]], i64 4) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[DATA]], i64 6) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv2i64( [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP0]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[DATA:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[DATA]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[DATA]], i64 4) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[DATA]], i64 6) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: define {{[^@]+}}@_Z14test_svst4_u64u10__SVBool_tPm12svuint64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[DATA]], i64 2) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[DATA]], i64 4) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[DATA]], i64 6) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv2i64( [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP0]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[DATA:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[DATA]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[DATA]], i64 4) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[DATA]], i64 6) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst4_u64(svbool_t pg, uint64_t *base, svuint64x4_t data) @@ -212,22 +212,22 @@ void test_svst4_u64(svbool_t pg, uint64_t *base, svuint64x4_t data) // CHECK-LABEL: define {{[^@]+}}@test_svst4_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[DATA]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[DATA]], i64 16) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[DATA]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv8f16( [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP0]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[DATA:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[DATA]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[DATA]], i64 16) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[DATA]], i64 24) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: define {{[^@]+}}@_Z14test_svst4_f16u10__SVBool_tPDh13svfloat16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[DATA]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[DATA]], i64 16) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[DATA]], i64 24) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv8f16( [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP0]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[DATA:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[DATA]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[DATA]], i64 16) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[DATA]], i64 24) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst4_f16(svbool_t pg, float16_t *base, svfloat16x4_t data) @@ -237,22 +237,22 @@ void test_svst4_f16(svbool_t pg, float16_t *base, svfloat16x4_t data) // CHECK-LABEL: define {{[^@]+}}@test_svst4_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[DATA]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[DATA]], i64 8) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[DATA]], i64 12) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv4f32( [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP0]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[DATA:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[DATA]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[DATA]], i64 8) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[DATA]], i64 12) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: define {{[^@]+}}@_Z14test_svst4_f32u10__SVBool_tPf13svfloat32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[DATA]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[DATA]], i64 8) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[DATA]], i64 12) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv4f32( [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP0]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[DATA:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[DATA]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[DATA]], i64 8) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[DATA]], i64 12) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst4_f32(svbool_t pg, float32_t *base, svfloat32x4_t data) @@ -262,22 +262,22 @@ void test_svst4_f32(svbool_t pg, float32_t *base, svfloat32x4_t data) // CHECK-LABEL: define {{[^@]+}}@test_svst4_f64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[DATA]], i64 2) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[DATA]], i64 4) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[DATA]], i64 6) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv2f64( [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP0]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[DATA:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[DATA]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[DATA]], i64 4) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[DATA]], i64 6) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: define {{[^@]+}}@_Z14test_svst4_f64u10__SVBool_tPd13svfloat64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[DATA]], i64 2) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[DATA]], i64 4) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[DATA]], i64 6) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv2f64( [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP0]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[DATA:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[DATA]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[DATA]], i64 4) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[DATA]], i64 6) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst4_f64(svbool_t pg, float64_t *base, svfloat64x4_t data) @@ -287,22 +287,22 @@ void test_svst4_f64(svbool_t pg, float64_t *base, svfloat64x4_t data) // CHECK-LABEL: define {{[^@]+}}@test_svst4_vnum_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[DATA]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[DATA]], i64 32) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[DATA]], i64 48) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv16i8( [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[PG:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[DATA:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[DATA]], i64 16) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[DATA]], i64 32) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[DATA]], i64 48) +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[PG:%.*]], ptr [[TMP4]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: define {{[^@]+}}@_Z18test_svst4_vnum_s8u10__SVBool_tPal10svint8x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[DATA]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[DATA]], i64 32) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[DATA]], i64 48) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv16i8( [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[PG:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[DATA:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[DATA]], i64 16) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[DATA]], i64 32) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[DATA]], i64 48) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[PG:%.*]], ptr [[TMP4]]) // CPP-CHECK-NEXT: ret void // void test_svst4_vnum_s8(svbool_t pg, int8_t *base, int64_t vnum, svint8x4_t data) @@ -312,24 +312,24 @@ void test_svst4_vnum_s8(svbool_t pg, int8_t *base, int64_t vnum, svint8x4_t data // CHECK-LABEL: define {{[^@]+}}@test_svst4_vnum_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[DATA]], i64 8) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[DATA]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[DATA]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv8i16( [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP0]], ptr [[TMP1]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[DATA:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[DATA]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[DATA]], i64 16) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[DATA]], i64 24) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP5]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: define {{[^@]+}}@_Z19test_svst4_vnum_s16u10__SVBool_tPsl11svint16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[DATA]], i64 8) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[DATA]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[DATA]], i64 24) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv8i16( [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP0]], ptr [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[DATA:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[DATA]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[DATA]], i64 16) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[DATA]], i64 24) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP5]]) // CPP-CHECK-NEXT: ret void // void test_svst4_vnum_s16(svbool_t pg, int16_t *base, int64_t vnum, svint16x4_t data) @@ -339,24 +339,24 @@ void test_svst4_vnum_s16(svbool_t pg, int16_t *base, int64_t vnum, svint16x4_t d // CHECK-LABEL: define {{[^@]+}}@test_svst4_vnum_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[DATA]], i64 4) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[DATA]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[DATA]], i64 12) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv4i32( [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP0]], ptr [[TMP1]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[DATA:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[DATA]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[DATA]], i64 8) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[DATA]], i64 12) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP5]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: define {{[^@]+}}@_Z19test_svst4_vnum_s32u10__SVBool_tPil11svint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[DATA]], i64 4) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[DATA]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[DATA]], i64 12) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv4i32( [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP0]], ptr [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[DATA:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[DATA]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[DATA]], i64 8) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[DATA]], i64 12) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP5]]) // CPP-CHECK-NEXT: ret void // void test_svst4_vnum_s32(svbool_t pg, int32_t *base, int64_t vnum, svint32x4_t data) @@ -366,24 +366,24 @@ void test_svst4_vnum_s32(svbool_t pg, int32_t *base, int64_t vnum, svint32x4_t d // CHECK-LABEL: define {{[^@]+}}@test_svst4_vnum_s64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[DATA]], i64 2) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[DATA]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[DATA]], i64 6) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv2i64( [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP0]], ptr [[TMP1]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[DATA:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[DATA]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[DATA]], i64 4) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[DATA]], i64 6) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP5]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: define {{[^@]+}}@_Z19test_svst4_vnum_s64u10__SVBool_tPll11svint64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[DATA]], i64 2) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[DATA]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[DATA]], i64 6) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv2i64( [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP0]], ptr [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[DATA:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[DATA]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[DATA]], i64 4) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[DATA]], i64 6) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP5]]) // CPP-CHECK-NEXT: ret void // void test_svst4_vnum_s64(svbool_t pg, int64_t *base, int64_t vnum, svint64x4_t data) @@ -393,22 +393,22 @@ void test_svst4_vnum_s64(svbool_t pg, int64_t *base, int64_t vnum, svint64x4_t d // CHECK-LABEL: define {{[^@]+}}@test_svst4_vnum_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[DATA]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[DATA]], i64 32) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[DATA]], i64 48) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv16i8( [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[PG:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[DATA:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[DATA]], i64 16) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[DATA]], i64 32) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[DATA]], i64 48) +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[PG:%.*]], ptr [[TMP4]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: define {{[^@]+}}@_Z18test_svst4_vnum_u8u10__SVBool_tPhl11svuint8x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[DATA]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[DATA]], i64 32) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[DATA]], i64 48) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv16i8( [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[PG:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[DATA:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[DATA]], i64 16) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[DATA]], i64 32) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[DATA]], i64 48) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[PG:%.*]], ptr [[TMP4]]) // CPP-CHECK-NEXT: ret void // void test_svst4_vnum_u8(svbool_t pg, uint8_t *base, int64_t vnum, svuint8x4_t data) @@ -418,24 +418,24 @@ void test_svst4_vnum_u8(svbool_t pg, uint8_t *base, int64_t vnum, svuint8x4_t da // CHECK-LABEL: define {{[^@]+}}@test_svst4_vnum_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[DATA]], i64 8) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[DATA]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[DATA]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv8i16( [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP0]], ptr [[TMP1]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[DATA:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[DATA]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[DATA]], i64 16) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[DATA]], i64 24) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP5]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: define {{[^@]+}}@_Z19test_svst4_vnum_u16u10__SVBool_tPtl12svuint16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[DATA]], i64 8) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[DATA]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[DATA]], i64 24) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv8i16( [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP0]], ptr [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[DATA:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[DATA]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[DATA]], i64 16) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[DATA]], i64 24) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP5]]) // CPP-CHECK-NEXT: ret void // void test_svst4_vnum_u16(svbool_t pg, uint16_t *base, int64_t vnum, svuint16x4_t data) @@ -445,24 +445,24 @@ void test_svst4_vnum_u16(svbool_t pg, uint16_t *base, int64_t vnum, svuint16x4_t // CHECK-LABEL: define {{[^@]+}}@test_svst4_vnum_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[DATA]], i64 4) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[DATA]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[DATA]], i64 12) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv4i32( [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP0]], ptr [[TMP1]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[DATA:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[DATA]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[DATA]], i64 8) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[DATA]], i64 12) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP5]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: define {{[^@]+}}@_Z19test_svst4_vnum_u32u10__SVBool_tPjl12svuint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[DATA]], i64 4) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[DATA]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[DATA]], i64 12) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv4i32( [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP0]], ptr [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[DATA:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[DATA]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[DATA]], i64 8) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[DATA]], i64 12) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP5]]) // CPP-CHECK-NEXT: ret void // void test_svst4_vnum_u32(svbool_t pg, uint32_t *base, int64_t vnum, svuint32x4_t data) @@ -472,24 +472,24 @@ void test_svst4_vnum_u32(svbool_t pg, uint32_t *base, int64_t vnum, svuint32x4_t // CHECK-LABEL: define {{[^@]+}}@test_svst4_vnum_u64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[DATA]], i64 2) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[DATA]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[DATA]], i64 6) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv2i64( [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP0]], ptr [[TMP1]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[DATA:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[DATA]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[DATA]], i64 4) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[DATA]], i64 6) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP5]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: define {{[^@]+}}@_Z19test_svst4_vnum_u64u10__SVBool_tPml12svuint64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[DATA]], i64 2) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[DATA]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[DATA]], i64 6) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv2i64( [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP0]], ptr [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[DATA:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[DATA]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[DATA]], i64 4) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[DATA]], i64 6) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP5]]) // CPP-CHECK-NEXT: ret void // void test_svst4_vnum_u64(svbool_t pg, uint64_t *base, int64_t vnum, svuint64x4_t data) @@ -499,24 +499,24 @@ void test_svst4_vnum_u64(svbool_t pg, uint64_t *base, int64_t vnum, svuint64x4_t // CHECK-LABEL: define {{[^@]+}}@test_svst4_vnum_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[DATA]], i64 8) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[DATA]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[DATA]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv8f16( [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP0]], ptr [[TMP1]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[DATA:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[DATA]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[DATA]], i64 16) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[DATA]], i64 24) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP5]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: define {{[^@]+}}@_Z19test_svst4_vnum_f16u10__SVBool_tPDhl13svfloat16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[DATA]], i64 8) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[DATA]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[DATA]], i64 24) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv8f16( [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP0]], ptr [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[DATA:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[DATA]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[DATA]], i64 16) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[DATA]], i64 24) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP5]]) // CPP-CHECK-NEXT: ret void // void test_svst4_vnum_f16(svbool_t pg, float16_t *base, int64_t vnum, svfloat16x4_t data) @@ -526,24 +526,24 @@ void test_svst4_vnum_f16(svbool_t pg, float16_t *base, int64_t vnum, svfloat16x4 // CHECK-LABEL: define {{[^@]+}}@test_svst4_vnum_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[DATA]], i64 4) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[DATA]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[DATA]], i64 12) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv4f32( [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP0]], ptr [[TMP1]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[DATA:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[DATA]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[DATA]], i64 8) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[DATA]], i64 12) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP5]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: define {{[^@]+}}@_Z19test_svst4_vnum_f32u10__SVBool_tPfl13svfloat32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[DATA]], i64 4) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[DATA]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[DATA]], i64 12) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv4f32( [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP0]], ptr [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[DATA:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[DATA]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[DATA]], i64 8) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[DATA]], i64 12) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP5]]) // CPP-CHECK-NEXT: ret void // void test_svst4_vnum_f32(svbool_t pg, float32_t *base, int64_t vnum, svfloat32x4_t data) @@ -553,24 +553,24 @@ void test_svst4_vnum_f32(svbool_t pg, float32_t *base, int64_t vnum, svfloat32x4 // CHECK-LABEL: define {{[^@]+}}@test_svst4_vnum_f64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[DATA]], i64 2) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[DATA]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[DATA]], i64 6) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv2f64( [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP0]], ptr [[TMP1]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[DATA:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[DATA]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[DATA]], i64 4) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[DATA]], i64 6) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP5]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: define {{[^@]+}}@_Z19test_svst4_vnum_f64u10__SVBool_tPdl13svfloat64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[DATA]], i64 2) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[DATA]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[DATA]], i64 6) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv2f64( [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP0]], ptr [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[DATA:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[DATA]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[DATA]], i64 4) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[DATA]], i64 6) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP5]]) // CPP-CHECK-NEXT: ret void // void test_svst4_vnum_f64(svbool_t pg, float64_t *base, int64_t vnum, svfloat64x4_t data) diff --git a/clang/test/Modules/aarch64-sme-keywords.cppm b/clang/test/Modules/aarch64-sme-keywords.cppm new file mode 100644 index 0000000000000000000000000000000000000000..759701a633cebed2b242ffe4a44f60a3c50af1da --- /dev/null +++ b/clang/test/Modules/aarch64-sme-keywords.cppm @@ -0,0 +1,65 @@ +// REQUIRES: aarch64-registered-target +// +// RUN: rm -rf %t +// RUN: split-file %s %t +// +// RUN: %clang_cc1 -std=c++20 -triple aarch64 -target-feature +sme %t/A.cppm -emit-module-interface -o %t/A.pcm +// RUN: %clang_cc1 -std=c++20 -triple aarch64 -target-feature +sme -fprebuilt-module-path=%t -I%t %t/Use.cpp -emit-llvm +// RUN: cat %t/Use.ll | FileCheck %s + +//--- A.cppm +module; +export module A; + +export void f_streaming(void) __arm_streaming { } +export void f_streaming_compatible(void) __arm_streaming_compatible { } +export void f_shared_za(void) __arm_inout("za") { } +export void f_preserves_za(void) __arm_preserves("za") { } + +//--- Use.cpp +// expected-no-diagnostics +import A; + +// CHECK: define dso_local void @_Z18f_shared_za_callerv() #[[SHARED_ZA_DEF:[0-9]+]] { +// CHECK: entry: +// CHECK: call void @_ZW1A11f_shared_zav() #[[SHARED_ZA_USE:[0-9]+]] +// CHECK: call void @_ZW1A14f_preserves_zav() #[[PRESERVES_ZA_USE:[0-9]+]] +// CHECK: ret void +// CHECK: } +// +// CHECK:declare void @_ZW1A11f_shared_zav() #[[SHARED_ZA_DECL:[0-9]+]] +// +// CHECK:declare void @_ZW1A14f_preserves_zav() #[[PRESERVES_ZA_DECL:[0-9]+]] +// +// CHECK:; Function Attrs: mustprogress noinline nounwind optnone +// CHECK:define dso_local void @_Z21f_nonstreaming_callerv() #[[NORMAL_DEF:[0-9]+]] { +// CHECK:entry: +// CHECK: call void @_ZW1A11f_streamingv() #[[STREAMING_USE:[0-9]+]] +// CHECK: call void @_ZW1A22f_streaming_compatiblev() #[[STREAMING_COMPATIBLE_USE:[0-9]+]] +// CHECK: ret void +// CHECK:} +// +// CHECK:declare void @_ZW1A11f_streamingv() #[[STREAMING_DECL:[0-9]+]] +// +// CHECK:declare void @_ZW1A22f_streaming_compatiblev() #[[STREAMING_COMPATIBLE_DECL:[0-9]+]] +// +// CHECK-DAG: attributes #[[SHARED_ZA_DEF]] = {{{.*}} "aarch64_inout_za" {{.*}}} +// CHECK-DAG: attributes #[[SHARED_ZA_DECL]] = {{{.*}} "aarch64_inout_za" {{.*}}} +// CHECK-DAG: attributes #[[PRESERVES_ZA_DECL]] = {{{.*}} "aarch64_preserves_za" {{.*}}} +// CHECK-DAG: attributes #[[NORMAL_DEF]] = {{{.*}}} +// CHECK-DAG: attributes #[[STREAMING_DECL]] = {{{.*}} "aarch64_pstate_sm_enabled" {{.*}}} +// CHECK-DAG: attributes #[[STREAMING_COMPATIBLE_DECL]] = {{{.*}} "aarch64_pstate_sm_compatible" {{.*}}} +// CHECK-DAG: attributes #[[SHARED_ZA_USE]] = { "aarch64_inout_za" } +// CHECK-DAG: attributes #[[PRESERVES_ZA_USE]] = { "aarch64_preserves_za" } +// CHECK-DAG: attributes #[[STREAMING_USE]] = { "aarch64_pstate_sm_enabled" } +// CHECK-DAG: attributes #[[STREAMING_COMPATIBLE_USE]] = { "aarch64_pstate_sm_compatible" } + +void f_shared_za_caller(void) __arm_inout("za") { + f_shared_za(); + f_preserves_za(); +} + +void f_nonstreaming_caller(void) { + f_streaming(); + f_streaming_compatible(); +} diff --git a/clang/test/Parser/MicrosoftExtensions.cpp b/clang/test/Parser/MicrosoftExtensions.cpp index 01e59aa1945a863cdb14f26a1ab6e8fd8b8c87bc..6bf802a29ace398398e4a4de1a96e0c9aa386931 100644 --- a/clang/test/Parser/MicrosoftExtensions.cpp +++ b/clang/test/Parser/MicrosoftExtensions.cpp @@ -44,8 +44,8 @@ typedef struct _GUID unsigned char Data4[8]; } GUID; -struct __declspec(uuid(L"00000000-0000-0000-1234-000000000047")) uuid_attr_bad1 { };// expected-error {{'uuid' attribute requires a string}} -struct __declspec(uuid(3)) uuid_attr_bad2 { };// expected-error {{'uuid' attribute requires a string}} +struct __declspec(uuid(L"00000000-0000-0000-1234-000000000047")) uuid_attr_bad1 { };// expected-warning {{encoding prefix 'L' on an unevaluated string literal has no effect and is incompatible with c++2c}} +struct __declspec(uuid(3)) uuid_attr_bad2 { };// expected-error {{expected string literal as argument of 'uuid' attribute}} struct __declspec(uuid("0000000-0000-0000-1234-0000500000047")) uuid_attr_bad3 { };// expected-error {{uuid attribute contains a malformed GUID}} struct __declspec(uuid("0000000-0000-0000-Z234-000000000047")) uuid_attr_bad4 { };// expected-error {{uuid attribute contains a malformed GUID}} struct __declspec(uuid("000000000000-0000-1234-000000000047")) uuid_attr_bad5 { };// expected-error {{uuid attribute contains a malformed GUID}} diff --git a/clang/test/Parser/c2x-attribute-keywords.c b/clang/test/Parser/c2x-attribute-keywords.c index 757dc82860110a5f0f1e4e14e56d2b90ca33ac8a..b88d2b9c23e697ad449d887d6b2f430152e8c5ba 100644 --- a/clang/test/Parser/c2x-attribute-keywords.c +++ b/clang/test/Parser/c2x-attribute-keywords.c @@ -1,60 +1,64 @@ -// RUN: %clang_cc1 -fsyntax-only -triple aarch64-none-linux-gnu -target-feature +sme -verify=expected,notc2x -Wno-strict-prototypes %s -// RUN: %clang_cc1 -fsyntax-only -triple aarch64-none-linux-gnu -target-feature +sme -verify=expected,c2x %s - -enum __arm_streaming E { // expected-error {{'__arm_streaming' cannot be applied to a declaration}} - One __arm_streaming, // expected-error {{'__arm_streaming' cannot be applied to a declaration}} +// RUN: sed -e "s@ATTR_USE@__arm_streaming@g" -e "s@ATTR_NAME@__arm_streaming@g" %s > %t +// RUN: %clang_cc1 -fsyntax-only -triple aarch64-none-linux-gnu -target-feature +sme -verify=expected,notc2x -Wno-strict-prototypes %t +// RUN: %clang_cc1 -fsyntax-only -triple aarch64-none-linux-gnu -target-feature +sme -verify=expected,c2x %t +// RUN: sed -e "s@ATTR_USE@__arm_inout\(\"za\"\)@g" -e "s@ATTR_NAME@__arm_inout@g" %s > %t +// RUN: %clang_cc1 -fsyntax-only -triple aarch64-none-linux-gnu -target-feature +sme -verify=expected,notc2x -Wno-strict-prototypes %t +// RUN: %clang_cc1 -fsyntax-only -triple aarch64-none-linux-gnu -target-feature +sme -verify=expected,c2x %t + +enum ATTR_USE E { // expected-error {{'ATTR_NAME' only applies to non-K&R-style functions}} + One ATTR_USE, // expected-error {{'ATTR_NAME' only applies to non-K&R-style functions}} Two, - Three __arm_streaming // expected-error {{'__arm_streaming' cannot be applied to a declaration}} + Three ATTR_USE // expected-error {{'ATTR_NAME' only applies to non-K&R-style functions}} }; -enum __arm_streaming { Four }; // expected-error {{'__arm_streaming' cannot be applied to a declaration}} -__arm_streaming enum E2 { Five }; // expected-error {{misplaced '__arm_streaming'}} +enum ATTR_USE { Four }; // expected-error {{'ATTR_NAME' only applies to non-K&R-style functions}} +ATTR_USE enum E2 { Five }; // expected-error {{misplaced 'ATTR_NAME'}} // FIXME: this diagnostic can be improved. -enum { __arm_streaming Six }; // expected-error {{expected identifier}} +enum { ATTR_USE Six }; // expected-error {{expected identifier}} // FIXME: this diagnostic can be improved. -enum E3 __arm_streaming { Seven }; // expected-error {{expected identifier or '('}} - -struct __arm_streaming S1 { // expected-error {{'__arm_streaming' cannot be applied to a declaration}} - int i __arm_streaming; // expected-error {{'__arm_streaming' only applies to function types}} - int __arm_streaming j; // expected-error {{'__arm_streaming' only applies to function types}} - int k[10] __arm_streaming; // expected-error {{'__arm_streaming' only applies to function types}} - int l __arm_streaming[10]; // expected-error {{'__arm_streaming' only applies to function types}} - __arm_streaming int m, n; // expected-error {{'__arm_streaming' only applies to function types}} - int o __arm_streaming : 12; // expected-error {{'__arm_streaming' only applies to function types}} - int __arm_streaming : 0; // expected-error {{'__arm_streaming' only applies to function types}} - int p, __arm_streaming : 0; // expected-error {{'__arm_streaming' cannot appear here}} - int q, __arm_streaming r; // expected-error {{'__arm_streaming' cannot appear here}} - __arm_streaming int; // expected-error {{'__arm_streaming' cannot appear here}} \ +enum E3 ATTR_USE { Seven }; // expected-error {{expected identifier or '('}} + +struct ATTR_USE S1 { // expected-error {{'ATTR_NAME' only applies to non-K&R-style functions}} + int i ATTR_USE; // expected-error {{'ATTR_NAME' only applies to function types}} + int ATTR_USE j; // expected-error {{'ATTR_NAME' only applies to function types}} + int k[10] ATTR_USE; // expected-error {{'ATTR_NAME' only applies to function types}} + int l ATTR_USE[10]; // expected-error {{'ATTR_NAME' only applies to function types}} + ATTR_USE int m, n; // expected-error {{'ATTR_NAME' only applies to function types}} + int o ATTR_USE : 12; // expected-error {{'ATTR_NAME' only applies to function types}} + int ATTR_USE : 0; // expected-error {{'ATTR_NAME' only applies to function types}} + int p, ATTR_USE : 0; // expected-error {{'ATTR_NAME' cannot appear here}} + int q, ATTR_USE r; // expected-error {{'ATTR_NAME' cannot appear here}} + ATTR_USE int; // expected-error {{'ATTR_NAME' cannot appear here}} \ // expected-warning {{declaration does not declare anything}} }; -__arm_streaming struct S2 { int a; }; // expected-error {{misplaced '__arm_streaming'}} -struct S3 __arm_streaming { int a; }; // expected-error {{'__arm_streaming' cannot appear here}} \ - expected-error {{'__arm_streaming' cannot be applied to a declaration}} +ATTR_USE struct S2 { int a; }; // expected-error {{misplaced 'ATTR_NAME'}} +struct S3 ATTR_USE { int a; }; // expected-error {{'ATTR_NAME' cannot appear here}} \ + expected-error {{'ATTR_NAME' only applies to non-K&R-style functions}} -union __arm_streaming U { // expected-error {{'__arm_streaming' cannot be applied to a declaration}} - double d __arm_streaming; // expected-error {{'__arm_streaming' only applies to function types; type here is 'double'}} - __arm_streaming int i; // expected-error {{'__arm_streaming' only applies to function types; type here is 'int'}} +union ATTR_USE U { // expected-error {{'ATTR_NAME' only applies to non-K&R-style functions}} + double d ATTR_USE; // expected-error {{'ATTR_NAME' only applies to function types; type here is 'double'}} + ATTR_USE int i; // expected-error {{'ATTR_NAME' only applies to function types; type here is 'int'}} }; -__arm_streaming union U2 { double d; }; // expected-error {{misplaced '__arm_streaming'}} -union U3 __arm_streaming { double d; }; // expected-error {{'__arm_streaming' cannot appear here}} \ - expected-error {{'__arm_streaming' cannot be applied to a declaration}} +ATTR_USE union U2 { double d; }; // expected-error {{misplaced 'ATTR_NAME'}} +union U3 ATTR_USE { double d; }; // expected-error {{'ATTR_NAME' cannot appear here}} \ + expected-error {{'ATTR_NAME' only applies to non-K&R-style functions}} -struct __arm_streaming IncompleteStruct; // expected-error {{'__arm_streaming' cannot be applied to a declaration}} -union __arm_streaming IncompleteUnion; // expected-error {{'__arm_streaming' cannot be applied to a declaration}} -enum __arm_streaming IncompleteEnum; // expected-error {{'__arm_streaming' cannot be applied to a declaration}} +struct ATTR_USE IncompleteStruct; // expected-error {{'ATTR_NAME' only applies to non-K&R-style functions}} +union ATTR_USE IncompleteUnion; // expected-error {{'ATTR_NAME' only applies to non-K&R-style functions}} +enum ATTR_USE IncompleteEnum; // expected-error {{'ATTR_NAME' only applies to non-K&R-style functions}} -__arm_streaming void f1(void); // expected-error {{'__arm_streaming' cannot be applied to a declaration}} -void __arm_streaming f2(void); // expected-error {{'__arm_streaming' only applies to function types}} -void f3 __arm_streaming (void); // expected-error {{'__arm_streaming' cannot be applied to a declaration}} -void f4(void) __arm_streaming; +ATTR_USE void f1(void); // expected-error {{'ATTR_NAME' cannot be applied to a declaration}} +void ATTR_USE f2(void); // expected-error {{'ATTR_NAME' only applies to function types}} +void f3 ATTR_USE (void); // expected-error {{'ATTR_NAME' cannot be applied to a declaration}} +void f4(void) ATTR_USE; -void f5(int i __arm_streaming, __arm_streaming int j, int __arm_streaming k); // expected-error 3 {{'__arm_streaming' only applies to function types}} +void f5(int i ATTR_USE, ATTR_USE int j, int ATTR_USE k); // expected-error 3 {{'ATTR_NAME' only applies to function types}} -void f6(a, b) __arm_streaming int a; int b; { // expected-error {{'__arm_streaming' cannot appear here}} \ +void f6(a, b) ATTR_USE int a; int b; { // expected-error {{'ATTR_NAME' cannot appear here}} \ c2x-warning {{deprecated}} } @@ -63,57 +67,74 @@ void f6(a, b) __arm_streaming int a; int b; { // expected-error {{'__arm_streami // behavior given that we *don't* want to parse it as part of the K&R parameter // declarations. It is disallowed to avoid a parsing ambiguity we already // handle well. -int (*f7(a, b))(int, int) __arm_streaming int a; int b; { // c2x-warning {{deprecated}} +int (*f7(a, b))(int, int) ATTR_USE int a; int b; { // c2x-warning {{deprecated}} return 0; } -__arm_streaming int a, b; // expected-error {{'__arm_streaming' only applies to function types}} -int c __arm_streaming, d __arm_streaming; // expected-error 2 {{'__arm_streaming' only applies to function types}} +ATTR_USE int a, b; // expected-error {{'ATTR_NAME' only applies to function types}} +int c ATTR_USE, d ATTR_USE; // expected-error 2 {{'ATTR_NAME' only applies to function types}} -void f8(void) __arm_streaming { - __arm_streaming int i, j; // expected-error {{'__arm_streaming' only applies to function types}} - int k, l __arm_streaming; // expected-error {{'__arm_streaming' only applies to function types}} +void f8(void) ATTR_USE { + ATTR_USE int i, j; // expected-error {{'ATTR_NAME' only applies to function types}} + int k, l ATTR_USE; // expected-error {{'ATTR_NAME' only applies to function types}} } -__arm_streaming void f9(void) { // expected-error {{'__arm_streaming' cannot be applied to a declaration}} - int i[10] __arm_streaming; // expected-error {{'__arm_streaming' only applies to function types}} - int (*fp1)(void)__arm_streaming; - int (*fp2 __arm_streaming)(void); // expected-error {{'__arm_streaming' cannot be applied to a declaration}} +ATTR_USE void f9(void) { // expected-error {{'ATTR_NAME' cannot be applied to a declaration}} + int i[10] ATTR_USE; // expected-error {{'ATTR_NAME' only applies to function types}} + int (*fp1)(void)ATTR_USE; + int (*fp2 ATTR_USE)(void); // expected-error {{'ATTR_NAME' cannot be applied to a declaration}} - int * __arm_streaming *ipp; // expected-error {{'__arm_streaming' only applies to function types}} + int * ATTR_USE *ipp; // expected-error {{'ATTR_NAME' only applies to function types}} } -void f10(int j[static 10] __arm_streaming, int k[*] __arm_streaming); // expected-error 2 {{'__arm_streaming' only applies to function types}} +void f10(int j[static 10] ATTR_USE, int k[*] ATTR_USE); // expected-error 2 {{'ATTR_NAME' only applies to function types}} void f11(void) { - __arm_streaming {} // expected-error {{'__arm_streaming' cannot be applied to a statement}} - __arm_streaming if (1) {} // expected-error {{'__arm_streaming' cannot be applied to a statement}} + ATTR_USE {} // expected-error {{'ATTR_NAME' cannot be applied to a statement}} + ATTR_USE if (1) {} // expected-error {{'ATTR_NAME' cannot be applied to a statement}} - __arm_streaming switch (1) { // expected-error {{'__arm_streaming' cannot be applied to a statement}} - __arm_streaming case 1: __arm_streaming break; // expected-error 2 {{'__arm_streaming' cannot be applied to a statement}} - __arm_streaming default: break; // expected-error {{'__arm_streaming' cannot be applied to a statement}} + ATTR_USE switch (1) { // expected-error {{'ATTR_NAME' cannot be applied to a statement}} + ATTR_USE case 1: ATTR_USE break; // expected-error 2 {{'ATTR_NAME' cannot be applied to a statement}} + ATTR_USE default: break; // expected-error {{'ATTR_NAME' cannot be applied to a statement}} } goto foo; - __arm_streaming foo: (void)1; // expected-error {{'__arm_streaming' cannot be applied to a declaration}} + ATTR_USE foo: (void)1; // expected-error {{'ATTR_NAME' only applies to non-K&R-style functions}} - __arm_streaming for (;;); // expected-error {{'__arm_streaming' cannot be applied to a statement}} - __arm_streaming while (1); // expected-error {{'__arm_streaming' cannot be applied to a statement}} - __arm_streaming do __arm_streaming { } while(1); // expected-error 2 {{'__arm_streaming' cannot be applied to a statement}} + ATTR_USE for (;;); // expected-error {{'ATTR_NAME' cannot be applied to a statement}} + ATTR_USE while (1); // expected-error {{'ATTR_NAME' cannot be applied to a statement}} + ATTR_USE do ATTR_USE { } while(1); // expected-error 2 {{'ATTR_NAME' cannot be applied to a statement}} - __arm_streaming (void)1; // expected-error {{'__arm_streaming' cannot be applied to a statement}} + ATTR_USE (void)1; // expected-error {{'ATTR_NAME' cannot be applied to a statement}} - __arm_streaming; // expected-error {{'__arm_streaming' cannot be applied to a statement}} + ATTR_USE; // expected-error {{'ATTR_NAME' cannot be applied to a statement}} - (void)sizeof(int [4]__arm_streaming); // expected-error {{'__arm_streaming' only applies to function types}} - (void)sizeof(struct __arm_streaming S3 { int a __arm_streaming; }); // expected-error {{'__arm_streaming' cannot be applied to a declaration}} \ - // expected-error {{'__arm_streaming' only applies to function types; type here is 'int'}} + (void)sizeof(int [4]ATTR_USE); // expected-error {{'ATTR_NAME' only applies to function types}} + (void)sizeof(struct ATTR_USE S3 { int a ATTR_USE; }); // expected-error {{'ATTR_NAME' only applies to non-K&R-style functions}} \ + // expected-error {{'ATTR_NAME' only applies to function types; type here is 'int'}} - __arm_streaming return; // expected-error {{'__arm_streaming' cannot be applied to a statement}} + ATTR_USE return; // expected-error {{'ATTR_NAME' cannot be applied to a statement}} - __arm_streaming asm (""); // expected-error {{'__arm_streaming' cannot appear here}} + ATTR_USE asm (""); // expected-error {{'ATTR_NAME' cannot appear here}} } -struct __arm_streaming S4 *s; // expected-error {{'__arm_streaming' cannot appear here}} +struct ATTR_USE S4 *s; // expected-error {{'ATTR_NAME' cannot appear here}} struct S5 {}; -int c = sizeof(struct __arm_streaming S5); // expected-error {{'__arm_streaming' cannot appear here}} +int c = sizeof(struct ATTR_USE S5); // expected-error {{'ATTR_NAME' cannot appear here}} + +void invalid_parentheses1() __arm_inout; // expected-error {{expected '(' after ''__arm_inout''}} +void invalid_parentheses2() __arm_inout(; // expected-error {{expected string literal as argument of '__arm_inout' attribute}} +void invalid_parentheses3() __arm_inout((); // expected-error {{expected string literal as argument of '__arm_inout' attribute}} +void invalid_parentheses4() __arm_inout); // expected-error {{expected '(' after ''__arm_inout''}} \ + // expected-error {{expected function body after function declarator}} +void invalid_parentheses5() __arm_inout(()); // expected-error {{expected string literal as argument of '__arm_inout' attribute}} +void invalid_parentheses6() __arm_inout("za"; // expected-error {{expected ')'}} +void invalid_parentheses7() __arm_streaming(; // expected-error {{expected parameter declarator}} \ + // expected-error {{expected ')'}} \ + // expected-note {{to match this '('}} \ + // expected-error {{function cannot return function type 'void ()'}} \ + // expected-error {{'__arm_streaming' only applies to function types; type here is 'int ()'}} \ + // expected-warning {{'__arm_streaming' only applies to non-K&R-style functions}} +void invalid_parentheses8() __arm_streaming(); // expected-error {{function cannot return function type 'void ()'}} \ + // expected-error {{'__arm_streaming' only applies to function types; type here is 'int ()'}} \ + // expected-warning {{'__arm_streaming' only applies to non-K&R-style functions}} diff --git a/clang/test/Parser/c2x-attribute-keywords.m b/clang/test/Parser/c2x-attribute-keywords.m index d1c45da34fbc60df330034fc738fbbd20de09002..575c88ffffc3d6699bd11d9ff36286a21fd5c843 100644 --- a/clang/test/Parser/c2x-attribute-keywords.m +++ b/clang/test/Parser/c2x-attribute-keywords.m @@ -1,6 +1,6 @@ // RUN: %clang_cc1 -fsyntax-only -triple aarch64-none-linux-gnu -target-feature +sme -verify %s -enum __arm_streaming E1 : int; // expected-error {{'__arm_streaming' cannot be applied to a declaration}} +enum __arm_inout("za") E1 : int; // expected-error {{'__arm_inout' only applies to non-K&R-style functions}} @interface Base @end @@ -15,5 +15,5 @@ enum __arm_streaming E1 : int; // expected-error {{'__arm_streaming' cannot be a void f(T *t) { - __arm_streaming[[t foo] bar]; // expected-error {{'__arm_streaming' cannot be applied to a statement}} + __arm_inout("za")[[t foo] bar]; // expected-error {{'__arm_inout' cannot be applied to a statement}} } diff --git a/clang/test/Parser/c2x-attributes.c b/clang/test/Parser/c2x-attributes.c index 3e9469f8fa8b5fa1013ce9a54a16683aa159e982..be039e40f98ef19ac98326e3c5352f2c4fb85fcd 100644 --- a/clang/test/Parser/c2x-attributes.c +++ b/clang/test/Parser/c2x-attributes.c @@ -16,7 +16,7 @@ enum { [[]] Six }; // expected-error {{expected identifier}} // FIXME: this diagnostic can be improved. enum E3 [[]] { Seven }; // expected-error {{expected identifier or '('}} -[[deprecated([""])]] int WrongArgs; // expected-error {{expected expression}} +[[deprecated([""])]] int WrongArgs; // expected-error {{expected string literal as argument of 'deprecated' attribute}} [[,,,,,]] int Commas1; // ok [[,, maybe_unused]] int Commas2; // ok [[maybe_unused,,,]] int Commas3; // ok @@ -24,6 +24,13 @@ enum E3 [[]] { Seven }; // expected-error {{expected identifier or '('}} [[foo bar]] int NoComma; // expected-error {{expected ','}} \ // expected-warning {{unknown attribute 'foo' ignored}} + +[[deprecated(L"abc")]] void unevaluated_string(void); +// expected-warning@-1 {{encoding prefix 'L' on an unevaluated string literal has no effect}} + +[[nodiscard("\123")]] int unevaluated_string2(void); +// expected-error@-1 {{invalid escape sequence '\123' in an unevaluated string literal}} + struct [[]] S1 { int i [[]]; int [[]] j; diff --git a/clang/test/Parser/cxx-attributes.cpp b/clang/test/Parser/cxx-attributes.cpp index b46326c4d474f0d74cac87a5b98efd05f4d3272a..51d1f9c8228121886e2e2db65f1755689484174c 100644 --- a/clang/test/Parser/cxx-attributes.cpp +++ b/clang/test/Parser/cxx-attributes.cpp @@ -41,7 +41,7 @@ void fn() { pi = &i[0]; } -[[deprecated([""])]] int WrongArgs; // expected-error {{expected variable name or 'this' in lambda capture list}} +[[deprecated([""])]] int WrongArgs; // expected-error {{expected string literal as argument of 'deprecated' attribute}} [[,,,,,]] int Commas1; // ok [[,, maybe_unused]] int Commas2; // ok [[maybe_unused,,,]] int Commas3; // ok diff --git a/clang/test/Parser/cxx0x-attributes.cpp b/clang/test/Parser/cxx0x-attributes.cpp index 3cb4e180e5f5f1f834ba2fe52338ec0c9c7bc285..10c5bbcac10227be31de09ecdb1fc94344c940b6 100644 --- a/clang/test/Parser/cxx0x-attributes.cpp +++ b/clang/test/Parser/cxx0x-attributes.cpp @@ -94,7 +94,7 @@ class [[]] [[]] final_class_another [[]] [[]] alignas(16) [[]]{}; // expected-error {{an attribute list cannot appear here}} // The diagnostics here don't matter much, this just shouldn't crash: -class C final [[deprecated(l]] {}); // expected-error {{use of undeclared identifier}} expected-error {{expected ']'}} expected-error {{an attribute list cannot appear here}} expected-error {{expected unqualified-id}} +class C final [[deprecated(l]] {}); //expected-error {{expected string literal as argument of 'deprecated' attribute}} expected-error {{an attribute list cannot appear here}} expected-error {{expected unqualified-id}} class D final alignas ([l) {}]{}); // expected-error {{expected ',' or ']' in lambda capture list}} expected-error {{an attribute list cannot appear here}} [[]] struct with_init_declarators {} init_declarator; @@ -266,7 +266,7 @@ template void variadic() { template void variadic_nttp() { void bar [[noreturn...]] (); // expected-error {{attribute 'noreturn' cannot be used as an attribute pack}} - void baz [[clang::no_sanitize(Is...)]] (); // expected-error {{attribute 'no_sanitize' does not support argument pack expansion}} + void baz [[clang::no_sanitize(Is...)]] (); // expected-error {{expected string literal as argument of 'no_sanitize' attribute}} void bor [[clang::annotate("A", "V" ...)]] (); // expected-error {{pack expansion does not contain any unexpanded parameter packs}} void bir [[clang::annotate("B", {1, 2, 3, 4})]] (); // expected-error {{'annotate' attribute requires parameter 1 to be a constant expression}} expected-note {{subexpression not valid in a constant expression}} void boo [[unknown::foo(Is...)]] (); // expected-warning {{unknown attribute 'foo' ignored}} @@ -445,3 +445,9 @@ class Ordering { ) { } }; + +namespace P2361 { +[[deprecated(L"abc")]] void a(); // expected-warning{{encoding prefix 'L' on an unevaluated string literal has no effect and is incompatible with c++2c}} \ + // expected-warning {{use of the 'deprecated' attribute is a C++14 extension}} +[[nodiscard("\123")]] int b(); // expected-error{{invalid escape sequence '\123' in an unevaluated string literal}} +} diff --git a/clang/test/Parser/cxx0x-keyword-attributes.cpp b/clang/test/Parser/cxx0x-keyword-attributes.cpp index 256a834e9e5460602814a4d01b009371e17c47b6..be7423cc7ecee367fdafc8051e8ba79ebb8e54bc 100644 --- a/clang/test/Parser/cxx0x-keyword-attributes.cpp +++ b/clang/test/Parser/cxx0x-keyword-attributes.cpp @@ -1,4 +1,7 @@ -// RUN: %clang_cc1 -fcxx-exceptions -fdeclspec -fexceptions -fsyntax-only -verify -std=c++11 -Wc++14-compat -Wc++14-extensions -Wc++17-extensions -triple aarch64-none-linux-gnu %s +// RUN: sed -e "s@ATTR_USE@__arm_streaming@g" -e "s@ATTR_NAME@__arm_streaming@g" %s > %t +// RUN: %clang_cc1 -fcxx-exceptions -fdeclspec -fexceptions -fsyntax-only -verify -std=c++11 -Wc++14-compat -Wc++14-extensions -Wc++17-extensions -triple aarch64-none-linux-gnu -target-feature +sme -x c++ %t +// RUN: sed -e "s@ATTR_USE@__arm_inout\(\"za\"\)@g" -e "s@ATTR_NAME@__arm_inout@g" %s > %t +// RUN: %clang_cc1 -fcxx-exceptions -fdeclspec -fexceptions -fsyntax-only -verify -std=c++11 -Wc++14-compat -Wc++14-extensions -Wc++17-extensions -triple aarch64-none-linux-gnu -target-feature +sme -x c++ %t // Need std::initializer_list namespace std { @@ -35,136 +38,136 @@ namespace std { // Declaration syntax checks -__arm_streaming int before_attr; // expected-error {{'__arm_streaming' only applies to function types}} -int __arm_streaming between_attr; // expected-error {{'__arm_streaming' only applies to function types}} -const __arm_streaming int between_attr_2 = 0; // expected-error {{'__arm_streaming' cannot appear here}} -int after_attr __arm_streaming; // expected-error {{'__arm_streaming' only applies to function types}} -int * __arm_streaming ptr_attr; // expected-error {{'__arm_streaming' only applies to function types}} -int & __arm_streaming ref_attr = after_attr; // expected-error {{'__arm_streaming' only applies to function types}} -int && __arm_streaming rref_attr = 0; // expected-error {{'__arm_streaming' only applies to function types}} -int array_attr [1] __arm_streaming; // expected-error {{'__arm_streaming' only applies to function types}} -void fn_attr () __arm_streaming; -void noexcept_fn_attr () noexcept __arm_streaming; +ATTR_USE int before_attr; // expected-error {{'ATTR_NAME' only applies to function types}} +int ATTR_USE between_attr; // expected-error {{'ATTR_NAME' only applies to function types}} +const ATTR_USE int between_attr_2 = 0; // expected-error {{'ATTR_NAME' cannot appear here}} +int after_attr ATTR_USE; // expected-error {{'ATTR_NAME' only applies to function types}} +int * ATTR_USE ptr_attr; // expected-error {{'ATTR_NAME' only applies to function types}} +int & ATTR_USE ref_attr = after_attr; // expected-error {{'ATTR_NAME' only applies to function types}} +int && ATTR_USE rref_attr = 0; // expected-error {{'ATTR_NAME' only applies to function types}} +int array_attr [1] ATTR_USE; // expected-error {{'ATTR_NAME' only applies to function types}} +void fn_attr () ATTR_USE; +void noexcept_fn_attr () noexcept ATTR_USE; struct MemberFnOrder { - virtual void f() const volatile && noexcept __arm_streaming final = 0; + virtual void f() const volatile && noexcept ATTR_USE final = 0; }; -struct __arm_streaming struct_attr; // expected-error {{'__arm_streaming' cannot be applied to a declaration}} -class __arm_streaming class_attr {}; // expected-error {{'__arm_streaming' cannot be applied to a declaration}} -union __arm_streaming union_attr; // expected-error {{'__arm_streaming' cannot be applied to a declaration}} -enum __arm_streaming E { }; // expected-error {{'__arm_streaming' cannot be applied to a declaration}} +struct ATTR_USE struct_attr; // expected-error {{'ATTR_NAME' only applies to non-K&R-style functions}} +class ATTR_USE class_attr {}; // expected-error {{'ATTR_NAME' only applies to non-K&R-style functions}} +union ATTR_USE union_attr; // expected-error {{'ATTR_NAME' only applies to non-K&R-style functions}} +enum ATTR_USE E { }; // expected-error {{'ATTR_NAME' only applies to non-K&R-style functions}} namespace test_misplacement { -__arm_streaming struct struct_attr2; // expected-error {{misplaced '__arm_streaming'}} -__arm_streaming class class_attr2; // expected-error {{misplaced '__arm_streaming'}} -__arm_streaming union union_attr2; // expected-error {{misplaced '__arm_streaming'}} -__arm_streaming enum E2 { }; // expected-error {{misplaced '__arm_streaming'}} +ATTR_USE struct struct_attr2; // expected-error {{misplaced 'ATTR_NAME'}} +ATTR_USE class class_attr2; // expected-error {{misplaced 'ATTR_NAME'}} +ATTR_USE union union_attr2; // expected-error {{misplaced 'ATTR_NAME'}} +ATTR_USE enum E2 { }; // expected-error {{misplaced 'ATTR_NAME'}} } // Checks attributes placed at wrong syntactic locations of class specifiers. -class __arm_streaming __arm_streaming // expected-error 2 {{'__arm_streaming' cannot be applied to a declaration}} - attr_after_class_name_decl __arm_streaming __arm_streaming; // expected-error {{'__arm_streaming' cannot appear here}} \ - expected-error 2 {{'__arm_streaming' cannot be applied to a declaration}} +class ATTR_USE ATTR_USE // expected-error 2 {{'ATTR_NAME' only applies to non-K&R-style functions}} + attr_after_class_name_decl ATTR_USE ATTR_USE; // expected-error {{'ATTR_NAME' cannot appear here}} \ + expected-error 2 {{'ATTR_NAME' only applies to non-K&R-style functions}} -class __arm_streaming __arm_streaming // expected-error 2 {{'__arm_streaming' cannot be applied to a declaration}} - attr_after_class_name_definition __arm_streaming __arm_streaming __arm_streaming{}; // expected-error {{'__arm_streaming' cannot appear here}} \ - expected-error 3 {{'__arm_streaming' cannot be applied to a declaration}} +class ATTR_USE ATTR_USE // expected-error 2 {{'ATTR_NAME' only applies to non-K&R-style functions}} + attr_after_class_name_definition ATTR_USE ATTR_USE ATTR_USE{}; // expected-error {{'ATTR_NAME' cannot appear here}} \ + expected-error 3 {{'ATTR_NAME' only applies to non-K&R-style functions}} -class __arm_streaming c {}; // expected-error {{'__arm_streaming' cannot be applied to a declaration}} -class c __arm_streaming __arm_streaming x; // expected-error 2 {{'__arm_streaming' only applies to function types}} -class c __arm_streaming __arm_streaming y __arm_streaming __arm_streaming; // expected-error 4 {{'__arm_streaming' only applies to function types}} +class ATTR_USE c {}; // expected-error {{'ATTR_NAME' only applies to non-K&R-style functions}} +class c ATTR_USE ATTR_USE x; // expected-error 2 {{'ATTR_NAME' only applies to function types}} +class c ATTR_USE ATTR_USE y ATTR_USE ATTR_USE; // expected-error 4 {{'ATTR_NAME' only applies to function types}} class c final [(int){0}]; class base {}; -class __arm_streaming __arm_streaming final_class // expected-error 2 {{'__arm_streaming' cannot be applied to a declaration}} - __arm_streaming alignas(float) final // expected-error {{'__arm_streaming' cannot appear here}} \ - expected-error {{'__arm_streaming' cannot be applied to a declaration}} - __arm_streaming alignas(float) __arm_streaming alignas(float): base{}; // expected-error {{'__arm_streaming' cannot appear here}} +class ATTR_USE ATTR_USE final_class // expected-error 2 {{'ATTR_NAME' only applies to non-K&R-style functions}} + ATTR_USE alignas(float) final // expected-error {{'ATTR_NAME' cannot appear here}} \ + expected-error {{'ATTR_NAME' only applies to non-K&R-style functions}} + ATTR_USE alignas(float) ATTR_USE alignas(float): base{}; // expected-error {{'ATTR_NAME' cannot appear here}} -class __arm_streaming __arm_streaming final_class_another // expected-error 2 {{'__arm_streaming' cannot be applied to a declaration}} - __arm_streaming __arm_streaming alignas(16) final // expected-error {{'__arm_streaming' cannot appear here}} \ - expected-error 2 {{'__arm_streaming' cannot be applied to a declaration}} - __arm_streaming __arm_streaming alignas(16) __arm_streaming{}; // expected-error {{'__arm_streaming' cannot appear here}} +class ATTR_USE ATTR_USE final_class_another // expected-error 2 {{'ATTR_NAME' only applies to non-K&R-style functions}} + ATTR_USE ATTR_USE alignas(16) final // expected-error {{'ATTR_NAME' cannot appear here}} \ + expected-error 2 {{'ATTR_NAME' only applies to non-K&R-style functions}} + ATTR_USE ATTR_USE alignas(16) ATTR_USE{}; // expected-error {{'ATTR_NAME' cannot appear here}} -class after_class_close {} __arm_streaming; // expected-error {{'__arm_streaming' cannot appear here, place it after "class" to apply it to the type declaration}} +class after_class_close {} ATTR_USE; // expected-error {{'ATTR_NAME' cannot appear here, place it after "class" to apply it to the type declaration}} class C {}; -__arm_streaming struct with_init_declarators {} init_declarator; // expected-error {{'__arm_streaming' only applies to function types}} -__arm_streaming struct no_init_declarators; // expected-error {{misplaced '__arm_streaming'}} -template __arm_streaming struct no_init_declarators_template; // expected-error {{'__arm_streaming' cannot appear here}} +ATTR_USE struct with_init_declarators {} init_declarator; // expected-error {{'ATTR_NAME' only applies to function types}} +ATTR_USE struct no_init_declarators; // expected-error {{misplaced 'ATTR_NAME'}} +template ATTR_USE struct no_init_declarators_template; // expected-error {{'ATTR_NAME' cannot appear here}} void fn_with_structs() { - __arm_streaming struct with_init_declarators {} init_declarator; // expected-error {{'__arm_streaming' only applies to function types}} - __arm_streaming struct no_init_declarators; // expected-error {{'__arm_streaming' cannot appear here}} + ATTR_USE struct with_init_declarators {} init_declarator; // expected-error {{'ATTR_NAME' only applies to function types}} + ATTR_USE struct no_init_declarators; // expected-error {{'ATTR_NAME' cannot appear here}} } -__arm_streaming; // expected-error {{'__arm_streaming' cannot be applied to a declaration}} +ATTR_USE; // expected-error {{'ATTR_NAME' only applies to non-K&R-style functions}} struct ctordtor { - __arm_streaming ctordtor __arm_streaming () __arm_streaming; // expected-error 2 {{'__arm_streaming' cannot be applied to a declaration}} - ctordtor (C) __arm_streaming; - __arm_streaming ~ctordtor __arm_streaming () __arm_streaming; // expected-error 2 {{'__arm_streaming' cannot be applied to a declaration}} + ATTR_USE ctordtor ATTR_USE () ATTR_USE; // expected-error 2 {{'ATTR_NAME' cannot be applied to a declaration}} + ctordtor (C) ATTR_USE; + ATTR_USE ~ctordtor ATTR_USE () ATTR_USE; // expected-error 2 {{'ATTR_NAME' cannot be applied to a declaration}} }; -__arm_streaming ctordtor::ctordtor __arm_streaming () __arm_streaming {} // expected-error 2 {{'__arm_streaming' cannot be applied to a declaration}} -__arm_streaming ctordtor::ctordtor (C) __arm_streaming try {} catch (...) {} // expected-error {{'__arm_streaming' cannot be applied to a declaration}} -__arm_streaming ctordtor::~ctordtor __arm_streaming () __arm_streaming {} // expected-error 2 {{'__arm_streaming' cannot be applied to a declaration}} -extern "C++" __arm_streaming int extern_attr; // expected-error {{'__arm_streaming' only applies to function types}} -template __arm_streaming void template_attr (); // expected-error {{'__arm_streaming' cannot be applied to a declaration}} -__arm_streaming __arm_streaming int __arm_streaming __arm_streaming multi_attr __arm_streaming __arm_streaming; // expected-error 6 {{'__arm_streaming' only applies to function types}} - -int (paren_attr) __arm_streaming; // expected-error {{'__arm_streaming' cannot appear here}} -unsigned __arm_streaming int attr_in_decl_spec; // expected-error {{'__arm_streaming' cannot appear here}} -unsigned __arm_streaming int __arm_streaming const double_decl_spec = 0; // expected-error 2 {{'__arm_streaming' cannot appear here}} +ATTR_USE ctordtor::ctordtor ATTR_USE () ATTR_USE {} // expected-error 2 {{'ATTR_NAME' cannot be applied to a declaration}} +ATTR_USE ctordtor::ctordtor (C) ATTR_USE try {} catch (...) {} // expected-error {{'ATTR_NAME' cannot be applied to a declaration}} +ATTR_USE ctordtor::~ctordtor ATTR_USE () ATTR_USE {} // expected-error 2 {{'ATTR_NAME' cannot be applied to a declaration}} +extern "C++" ATTR_USE int extern_attr; // expected-error {{'ATTR_NAME' only applies to function types}} +template ATTR_USE void template_attr (); // expected-error {{'ATTR_NAME' cannot be applied to a declaration}} +ATTR_USE ATTR_USE int ATTR_USE ATTR_USE multi_attr ATTR_USE ATTR_USE; // expected-error 6 {{'ATTR_NAME' only applies to function types}} + +int (paren_attr) ATTR_USE; // expected-error {{'ATTR_NAME' cannot appear here}} +unsigned ATTR_USE int attr_in_decl_spec; // expected-error {{'ATTR_NAME' cannot appear here}} +unsigned ATTR_USE int ATTR_USE const double_decl_spec = 0; // expected-error 2 {{'ATTR_NAME' cannot appear here}} class foo { - void const_after_attr () __arm_streaming const; // expected-error {{expected ';'}} + void const_after_attr () ATTR_USE const; // expected-error {{expected ';'}} }; -extern "C++" __arm_streaming { } // expected-error {{'__arm_streaming' cannot appear here}} -__arm_streaming extern "C++" { } // expected-error {{'__arm_streaming' cannot appear here}} -__arm_streaming template void before_template_attr (); // expected-error {{'__arm_streaming' cannot appear here}} -__arm_streaming namespace ns { int i; } // expected-error {{'__arm_streaming' cannot appear here}} -__arm_streaming static_assert(true, ""); //expected-error {{'__arm_streaming' cannot appear here}} -__arm_streaming asm(""); // expected-error {{'__arm_streaming' cannot appear here}} - -__arm_streaming using ns::i; // expected-warning {{ISO C++}} \ - expected-error {{'__arm_streaming' cannot be applied to a declaration}} -__arm_streaming using namespace ns; // expected-error {{'__arm_streaming' cannot be applied to a declaration}} -namespace __arm_streaming ns2 {} // expected-warning {{attributes on a namespace declaration are a C++17 extension}} \ - expected-error {{'__arm_streaming' cannot be applied to a declaration}} - -using __arm_streaming alignas(4)__arm_streaming ns::i; // expected-warning 2 {{ISO C++}} \ - expected-error {{'__arm_streaming' cannot appear here}} \ +extern "C++" ATTR_USE { } // expected-error {{'ATTR_NAME' cannot appear here}} +ATTR_USE extern "C++" { } // expected-error {{'ATTR_NAME' cannot appear here}} +ATTR_USE template void before_template_attr (); // expected-error {{'ATTR_NAME' cannot appear here}} +ATTR_USE namespace ns { int i; } // expected-error {{'ATTR_NAME' cannot appear here}} +ATTR_USE static_assert(true, ""); //expected-error {{'ATTR_NAME' cannot appear here}} +ATTR_USE asm(""); // expected-error {{'ATTR_NAME' cannot appear here}} + +ATTR_USE using ns::i; // expected-warning {{ISO C++}} \ + expected-error {{'ATTR_NAME' only applies to non-K&R-style functions}} +ATTR_USE using namespace ns; // expected-error {{'ATTR_NAME' only applies to non-K&R-style functions}} +namespace ATTR_USE ns2 {} // expected-warning {{attributes on a namespace declaration are a C++17 extension}} \ + expected-error {{'ATTR_NAME' only applies to non-K&R-style functions}} + +using ATTR_USE alignas(4)ATTR_USE ns::i; // expected-warning 2 {{ISO C++}} \ + expected-error {{'ATTR_NAME' cannot appear here}} \ expected-error {{'alignas' attribute only applies to variables, data members and tag types}} \ expected-warning {{ISO C++}} \ - expected-error 2 {{'__arm_streaming' cannot be applied to a declaration}} -using __arm_streaming alignas(4) __arm_streaming foobar = int; // expected-error {{'__arm_streaming' cannot appear here}} \ + expected-error 2 {{'ATTR_NAME' only applies to non-K&R-style functions}} +using ATTR_USE alignas(4) ATTR_USE foobar = int; // expected-error {{'ATTR_NAME' cannot appear here}} \ expected-error {{'alignas' attribute only applies to}} \ - expected-error 2 {{'__arm_streaming' only applies to function types}} - -__arm_streaming using T = int; // expected-error {{'__arm_streaming' cannot appear here}} -using T __arm_streaming = int; // expected-error {{'__arm_streaming' only applies to function types}} -template using U __arm_streaming = T; // expected-error {{'__arm_streaming' only applies to function types}} -using ns::i __arm_streaming; // expected-warning {{ISO C++}} \ - expected-error {{'__arm_streaming' cannot be applied to a declaration}} -using ns::i __arm_streaming, ns::i __arm_streaming; // expected-warning 2 {{ISO C++}} \ + expected-error 2 {{'ATTR_NAME' only applies to function types}} + +ATTR_USE using T = int; // expected-error {{'ATTR_NAME' cannot appear here}} +using T ATTR_USE = int; // expected-error {{'ATTR_NAME' only applies to function types}} +template using U ATTR_USE = T; // expected-error {{'ATTR_NAME' only applies to function types}} +using ns::i ATTR_USE; // expected-warning {{ISO C++}} \ + expected-error {{'ATTR_NAME' only applies to non-K&R-style functions}} +using ns::i ATTR_USE, ns::i ATTR_USE; // expected-warning 2 {{ISO C++}} \ expected-warning {{use of multiple declarators in a single using declaration is a C++17 extension}} \ - expected-error 2 {{'__arm_streaming' cannot be applied to a declaration}} + expected-error 2 {{'ATTR_NAME' only applies to non-K&R-style functions}} struct using_in_struct_base { typedef int i, j, k, l; }; struct using_in_struct : using_in_struct_base { - __arm_streaming using using_in_struct_base::i; // expected-warning {{ISO C++}} \ - expected-error {{'__arm_streaming' cannot be applied to a declaration}} - using using_in_struct_base::j __arm_streaming; // expected-warning {{ISO C++}} \ - expected-error {{'__arm_streaming' cannot be applied to a declaration}} - __arm_streaming using using_in_struct_base::k __arm_streaming, using_in_struct_base::l __arm_streaming; // expected-warning 3 {{ISO C++}} \ + ATTR_USE using using_in_struct_base::i; // expected-warning {{ISO C++}} \ + expected-error {{'ATTR_NAME' only applies to non-K&R-style functions}} + using using_in_struct_base::j ATTR_USE; // expected-warning {{ISO C++}} \ + expected-error {{'ATTR_NAME' only applies to non-K&R-style functions}} + ATTR_USE using using_in_struct_base::k ATTR_USE, using_in_struct_base::l ATTR_USE; // expected-warning 3 {{ISO C++}} \ expected-warning {{use of multiple declarators in a single using declaration is a C++17 extension}} \ - expected-error 4 {{'__arm_streaming' cannot be applied to a declaration}} + expected-error 4 {{'ATTR_NAME' only applies to non-K&R-style functions}} }; -using __arm_streaming ns::i; // expected-warning {{ISO C++}} \ - expected-error {{'__arm_streaming' cannot appear here}} \ - expected-error {{'__arm_streaming' cannot be applied to a declaration}} -using T __arm_streaming = int; // expected-error {{'__arm_streaming' only applies to function types}} +using ATTR_USE ns::i; // expected-warning {{ISO C++}} \ + expected-error {{'ATTR_NAME' cannot appear here}} \ + expected-error {{'ATTR_NAME' only applies to non-K&R-style functions}} +using T ATTR_USE = int; // expected-error {{'ATTR_NAME' only applies to function types}} -auto trailing() -> __arm_streaming const int; // expected-error {{'__arm_streaming' cannot appear here}} -auto trailing() -> const __arm_streaming int; // expected-error {{'__arm_streaming' cannot appear here}} -auto trailing() -> const int __arm_streaming; // expected-error {{'__arm_streaming' only applies to function types}} -auto trailing_2() -> struct struct_attr __arm_streaming; // expected-error {{'__arm_streaming' only applies to function types}} +auto trailing() -> ATTR_USE const int; // expected-error {{'ATTR_NAME' cannot appear here}} +auto trailing() -> const ATTR_USE int; // expected-error {{'ATTR_NAME' cannot appear here}} +auto trailing() -> const int ATTR_USE; // expected-error {{'ATTR_NAME' only applies to function types}} +auto trailing_2() -> struct struct_attr ATTR_USE; // expected-error {{'ATTR_NAME' only applies to function types}} namespace N { struct S {}; @@ -172,88 +175,88 @@ namespace N { template struct Template {}; // FIXME: Improve this diagnostic -struct __arm_streaming N::S s; // expected-error {{'__arm_streaming' cannot appear here}} -struct __arm_streaming Template t; // expected-error {{'__arm_streaming' cannot appear here}} -struct __arm_streaming ::template Template u; // expected-error {{'__arm_streaming' cannot appear here}} -template struct __arm_streaming Template; // expected-error {{'__arm_streaming' cannot appear here}} +struct ATTR_USE N::S s; // expected-error {{'ATTR_NAME' cannot appear here}} +struct ATTR_USE Template t; // expected-error {{'ATTR_NAME' cannot appear here}} +struct ATTR_USE ::template Template u; // expected-error {{'ATTR_NAME' cannot appear here}} +template struct ATTR_USE Template; // expected-error {{'ATTR_NAME' cannot appear here}} template struct __attribute__((pure)) Template; // We still allow GNU-style attributes here -template <> struct __arm_streaming Template; // expected-error {{'__arm_streaming' cannot be applied to a declaration}} - -enum __arm_streaming E1 {}; // expected-error {{'__arm_streaming' cannot be applied to a declaration}} -enum __arm_streaming E2; // expected-error {{forbids forward references}} \ - expected-error {{'__arm_streaming' cannot be applied to a declaration}} -enum __arm_streaming E1; // expected-error {{'__arm_streaming' cannot be applied to a declaration}} -enum __arm_streaming E3 : int; // expected-error {{'__arm_streaming' cannot be applied to a declaration}} -enum __arm_streaming { // expected-error {{'__arm_streaming' cannot be applied to a declaration}} - k_123 __arm_streaming = 123 // expected-warning {{attributes on an enumerator declaration are a C++17 extension}} \ - expected-error {{'__arm_streaming' cannot be applied to a declaration}} +template <> struct ATTR_USE Template; // expected-error {{'ATTR_NAME' only applies to non-K&R-style functions}} + +enum ATTR_USE E1 {}; // expected-error {{'ATTR_NAME' only applies to non-K&R-style functions}} +enum ATTR_USE E2; // expected-error {{forbids forward references}} \ + expected-error {{'ATTR_NAME' only applies to non-K&R-style functions}} +enum ATTR_USE E1; // expected-error {{'ATTR_NAME' only applies to non-K&R-style functions}} +enum ATTR_USE E3 : int; // expected-error {{'ATTR_NAME' only applies to non-K&R-style functions}} +enum ATTR_USE { // expected-error {{'ATTR_NAME' only applies to non-K&R-style functions}} + k_123 ATTR_USE = 123 // expected-warning {{attributes on an enumerator declaration are a C++17 extension}} \ + expected-error {{'ATTR_NAME' only applies to non-K&R-style functions}} }; -enum __arm_streaming E1 e; // expected-error {{'__arm_streaming' cannot appear here}} -enum __arm_streaming class E4 { }; // expected-error {{'__arm_streaming' cannot appear here}} -enum struct __arm_streaming E5; // expected-error {{'__arm_streaming' cannot be applied to a declaration}} -enum E6 {} __arm_streaming; // expected-error {{'__arm_streaming' cannot appear here, place it after "enum" to apply it to the type declaration}} +enum ATTR_USE E1 e; // expected-error {{'ATTR_NAME' cannot appear here}} +enum ATTR_USE class E4 { }; // expected-error {{'ATTR_NAME' cannot appear here}} +enum struct ATTR_USE E5; // expected-error {{'ATTR_NAME' only applies to non-K&R-style functions}} +enum E6 {} ATTR_USE; // expected-error {{'ATTR_NAME' cannot appear here, place it after "enum" to apply it to the type declaration}} struct S { - friend int f __arm_streaming (); // expected-error {{'__arm_streaming' cannot appear here}} \ - expected-error {{'__arm_streaming' cannot be applied to a declaration}} - friend int f2 __arm_streaming () {} // expected-error {{'__arm_streaming' cannot be applied to a declaration}} - __arm_streaming friend int g(); // expected-error {{'__arm_streaming' cannot appear here}} - __arm_streaming friend int h() { // expected-error {{'__arm_streaming' cannot be applied to a declaration}} + friend int f ATTR_USE (); // expected-error {{'ATTR_NAME' cannot appear here}} \ + expected-error {{'ATTR_NAME' cannot be applied to a declaration}} + friend int f2 ATTR_USE () {} // expected-error {{'ATTR_NAME' cannot be applied to a declaration}} + ATTR_USE friend int g(); // expected-error {{'ATTR_NAME' cannot appear here}} + ATTR_USE friend int h() { // expected-error {{'ATTR_NAME' cannot be applied to a declaration}} } - __arm_streaming friend int f3(), f4(), f5(); // expected-error {{'__arm_streaming' cannot appear here}} - friend int f6 __arm_streaming (), f7 __arm_streaming (), f8 __arm_streaming (); // expected-error3 {{'__arm_streaming' cannot appear here}} \ - expected-error 3 {{'__arm_streaming' cannot be applied to a declaration}} - friend class __arm_streaming C; // expected-error {{'__arm_streaming' cannot appear here}} - __arm_streaming friend class D; // expected-error {{'__arm_streaming' cannot appear here}} - __arm_streaming friend int; // expected-error {{'__arm_streaming' cannot appear here}} + ATTR_USE friend int f3(), f4(), f5(); // expected-error {{'ATTR_NAME' cannot appear here}} + friend int f6 ATTR_USE (), f7 ATTR_USE (), f8 ATTR_USE (); // expected-error3 {{'ATTR_NAME' cannot appear here}} \ + expected-error 3 {{'ATTR_NAME' cannot be applied to a declaration}} + friend class ATTR_USE C; // expected-error {{'ATTR_NAME' cannot appear here}} + ATTR_USE friend class D; // expected-error {{'ATTR_NAME' cannot appear here}} + ATTR_USE friend int; // expected-error {{'ATTR_NAME' cannot appear here}} }; template void tmpl (T) {} -template __arm_streaming void tmpl(char); // expected-error {{'__arm_streaming' cannot appear here}} -template void __arm_streaming tmpl(short); // expected-error {{'__arm_streaming' only applies to function types}} +template ATTR_USE void tmpl(char); // expected-error {{'ATTR_NAME' cannot appear here}} +template void ATTR_USE tmpl(short); // expected-error {{'ATTR_NAME' only applies to function types}} // Statement tests void foo () { - __arm_streaming ; // expected-error {{'__arm_streaming' cannot be applied to a statement}} - __arm_streaming { } // expected-error {{'__arm_streaming' cannot be applied to a statement}} - __arm_streaming if (0) { } // expected-error {{'__arm_streaming' cannot be applied to a statement}} - __arm_streaming for (;;); // expected-error {{'__arm_streaming' cannot be applied to a statement}} - __arm_streaming do { // expected-error {{'__arm_streaming' cannot be applied to a statement}} - __arm_streaming continue; // expected-error {{'__arm_streaming' cannot be applied to a statement}} + ATTR_USE ; // expected-error {{'ATTR_NAME' cannot be applied to a statement}} + ATTR_USE { } // expected-error {{'ATTR_NAME' cannot be applied to a statement}} + ATTR_USE if (0) { } // expected-error {{'ATTR_NAME' cannot be applied to a statement}} + ATTR_USE for (;;); // expected-error {{'ATTR_NAME' cannot be applied to a statement}} + ATTR_USE do { // expected-error {{'ATTR_NAME' cannot be applied to a statement}} + ATTR_USE continue; // expected-error {{'ATTR_NAME' cannot be applied to a statement}} } while (0); - __arm_streaming while (0); // expected-error {{'__arm_streaming' cannot be applied to a statement}} + ATTR_USE while (0); // expected-error {{'ATTR_NAME' cannot be applied to a statement}} - __arm_streaming switch (i) { // expected-error {{'__arm_streaming' cannot be applied to a statement}} - __arm_streaming case 0: // expected-error {{'__arm_streaming' cannot be applied to a statement}} - __arm_streaming default: // expected-error {{'__arm_streaming' cannot be applied to a statement}} - __arm_streaming break; // expected-error {{'__arm_streaming' cannot be applied to a statement}} + ATTR_USE switch (i) { // expected-error {{'ATTR_NAME' cannot be applied to a statement}} + ATTR_USE case 0: // expected-error {{'ATTR_NAME' cannot be applied to a statement}} + ATTR_USE default: // expected-error {{'ATTR_NAME' cannot be applied to a statement}} + ATTR_USE break; // expected-error {{'ATTR_NAME' cannot be applied to a statement}} } - __arm_streaming goto there; // expected-error {{'__arm_streaming' cannot be applied to a statement}} - __arm_streaming there: // expected-error {{'__arm_streaming' cannot be applied to a declaration}} + ATTR_USE goto there; // expected-error {{'ATTR_NAME' cannot be applied to a statement}} + ATTR_USE there: // expected-error {{'ATTR_NAME' only applies to non-K&R-style functions}} - __arm_streaming try { // expected-error {{'__arm_streaming' cannot be applied to a statement}} - } __arm_streaming catch (...) { // expected-error {{'__arm_streaming' cannot appear here}} + ATTR_USE try { // expected-error {{'ATTR_NAME' cannot be applied to a statement}} + } ATTR_USE catch (...) { // expected-error {{'ATTR_NAME' cannot appear here}} } - void bar __arm_streaming (__arm_streaming int i, __arm_streaming int j); // expected-error 2 {{'__arm_streaming' only applies to function types}} \ - expected-error {{'__arm_streaming' cannot be applied to a declaration}} - using FuncType = void (__arm_streaming int); // expected-error {{'__arm_streaming' only applies to function types}} - void baz(__arm_streaming...); // expected-error {{expected parameter declarator}} + void bar ATTR_USE (ATTR_USE int i, ATTR_USE int j); // expected-error 2 {{'ATTR_NAME' only applies to function types}} \ + expected-error {{'ATTR_NAME' cannot be applied to a declaration}} + using FuncType = void (ATTR_USE int); // expected-error {{'ATTR_NAME' only applies to function types}} + void baz(ATTR_USE...); // expected-error {{expected parameter declarator}} - __arm_streaming return; // expected-error {{'__arm_streaming' cannot be applied to a statement}} + ATTR_USE return; // expected-error {{'ATTR_NAME' cannot be applied to a statement}} } // Expression tests void bar () { - new int[42]__arm_streaming[5]__arm_streaming{}; // expected-error {{'__arm_streaming' only applies to function types}} + new int[42]ATTR_USE[5]ATTR_USE{}; // expected-error {{'ATTR_NAME' only applies to function types}} } // Condition tests void baz () { - if (__arm_streaming bool b = true) { // expected-error {{'__arm_streaming' only applies to function types}} - switch (__arm_streaming int n { 42 }) { // expected-error {{'__arm_streaming' only applies to function types}} + if (ATTR_USE bool b = true) { // expected-error {{'ATTR_NAME' only applies to function types}} + switch (ATTR_USE int n { 42 }) { // expected-error {{'ATTR_NAME' only applies to function types}} default: - for (__arm_streaming int n = 0; __arm_streaming char b = n < 5; ++b) { // expected-error 2 {{'__arm_streaming' only applies to function types}} + for (ATTR_USE int n = 0; ATTR_USE char b = n < 5; ++b) { // expected-error 2 {{'ATTR_NAME' only applies to function types}} } } } @@ -261,37 +264,37 @@ void baz () { // An attribute can be applied to an expression-statement, such as the first // statement in a for. But it can't be applied to a condition which is an // expression. - for (__arm_streaming x = 0; ; ) {} // expected-error {{'__arm_streaming' cannot appear here}} - for (; __arm_streaming x < 5; ) {} // expected-error {{'__arm_streaming' cannot appear here}} - while (__arm_streaming bool k { false }) { // expected-error {{'__arm_streaming' only applies to function types}} + for (ATTR_USE x = 0; ; ) {} // expected-error {{'ATTR_NAME' cannot appear here}} + for (; ATTR_USE x < 5; ) {} // expected-error {{'ATTR_NAME' cannot appear here}} + while (ATTR_USE bool k { false }) { // expected-error {{'ATTR_NAME' only applies to function types}} } - while (__arm_streaming true) { // expected-error {{'__arm_streaming' cannot appear here}} + while (ATTR_USE true) { // expected-error {{'ATTR_NAME' cannot appear here}} } do { - } while (__arm_streaming false); // expected-error {{'__arm_streaming' cannot appear here}} + } while (ATTR_USE false); // expected-error {{'ATTR_NAME' cannot appear here}} - for (__arm_streaming int n : { 1, 2, 3 }) { // expected-error {{'__arm_streaming' only applies to function types}} + for (ATTR_USE int n : { 1, 2, 3 }) { // expected-error {{'ATTR_NAME' only applies to function types}} } } enum class __attribute__((visibility("hidden"))) SecretKeepers { one, /* rest are deprecated */ two, three }; -enum class __arm_streaming EvenMoreSecrets {}; // expected-error {{'__arm_streaming' cannot be applied to a declaration}} +enum class ATTR_USE EvenMoreSecrets {}; // expected-error {{'ATTR_NAME' only applies to non-K&R-style functions}} // Forbid attributes on decl specifiers. -unsigned __arm_streaming static int __arm_streaming v1; // expected-error {{'__arm_streaming' only applies to function types}} \ - expected-error {{'__arm_streaming' cannot appear here}} -typedef __arm_streaming unsigned long __arm_streaming v2; // expected-error {{'__arm_streaming' only applies to function types}} \ - expected-error {{'__arm_streaming' cannot appear here}} -int __arm_streaming foo(int __arm_streaming x); // expected-error 2 {{'__arm_streaming' only applies to function types}} +unsigned ATTR_USE static int ATTR_USE v1; // expected-error {{'ATTR_NAME' only applies to function types}} \ + expected-error {{'ATTR_NAME' cannot appear here}} +typedef ATTR_USE unsigned long ATTR_USE v2; // expected-error {{'ATTR_NAME' only applies to function types}} \ + expected-error {{'ATTR_NAME' cannot appear here}} +int ATTR_USE foo(int ATTR_USE x); // expected-error 2 {{'ATTR_NAME' only applies to function types}} -__arm_streaming; // expected-error {{'__arm_streaming' cannot be applied to a declaration}} +ATTR_USE; // expected-error {{'ATTR_NAME' only applies to non-K&R-style functions}} class A { - A(__arm_streaming int a); // expected-error {{'__arm_streaming' only applies to function types}} + A(ATTR_USE int a); // expected-error {{'ATTR_NAME' only applies to function types}} }; -A::A(__arm_streaming int a) {} // expected-error {{'__arm_streaming' only applies to function types}} +A::A(ATTR_USE int a) {} // expected-error {{'ATTR_NAME' only applies to function types}} template struct TemplateStruct {}; class FriendClassesWithAttributes { @@ -299,47 +302,47 @@ class FriendClassesWithAttributes { template friend class __attribute__((__type_visibility__("default"))) vector; template friend class __declspec(code_seg("foo,whatever")) vector2; // But not C++11 ones - template friend class __arm_streaming vector3; // expected-error {{'__arm_streaming' cannot appear here}} + template friend class ATTR_USE vector3; // expected-error {{'ATTR_NAME' cannot appear here}} // Also allowed friend struct __attribute__((__type_visibility__("default"))) TemplateStruct; friend struct __declspec(code_seg("foo,whatever")) TemplateStruct; - friend struct __arm_streaming TemplateStruct; // expected-error {{'__arm_streaming' cannot appear here}} + friend struct ATTR_USE TemplateStruct; // expected-error {{'ATTR_NAME' cannot appear here}} }; // Check ordering: C++11 attributes must appear before GNU attributes. class Ordering { void f1( - int (__arm_streaming __attribute__(()) int n) // expected-error {{'__arm_streaming' only applies to function types}} + int (ATTR_USE __attribute__(()) int n) // expected-error {{'ATTR_NAME' only applies to function types}} ) { } void f2( - int (*)(__arm_streaming __attribute__(()) int n) // expected-error {{'__arm_streaming' only applies to function types}} + int (*)(ATTR_USE __attribute__(()) int n) // expected-error {{'ATTR_NAME' only applies to function types}} ) { } void f3( - int (__attribute__(()) __arm_streaming int n) // expected-error {{'__arm_streaming' cannot appear here}} + int (__attribute__(()) ATTR_USE int n) // expected-error {{'ATTR_NAME' cannot appear here}} ) { } void f4( - int (*)(__attribute__(()) __arm_streaming int n) // expected-error {{'__arm_streaming' cannot appear here}} + int (*)(__attribute__(()) ATTR_USE int n) // expected-error {{'ATTR_NAME' cannot appear here}} ) { } }; namespace base_specs { struct A {}; -struct B : __arm_streaming A {}; // expected-error {{'__arm_streaming' cannot be applied to a base specifier}} -struct C : __arm_streaming virtual A {}; // expected-error {{'__arm_streaming' cannot be applied to a base specifier}} -struct D : __arm_streaming public virtual A {}; // expected-error {{'__arm_streaming' cannot be applied to a base specifier}} -struct E : public __arm_streaming virtual A {}; // expected-error {{'__arm_streaming' cannot appear here}} \ - expected-error {{'__arm_streaming' cannot be applied to a base specifier}} -struct F : virtual __arm_streaming public A {}; // expected-error {{'__arm_streaming' cannot appear here}} \ - expected-error {{'__arm_streaming' cannot be applied to a base specifier}} +struct B : ATTR_USE A {}; // expected-error {{'ATTR_NAME' cannot be applied to a base specifier}} +struct C : ATTR_USE virtual A {}; // expected-error {{'ATTR_NAME' cannot be applied to a base specifier}} +struct D : ATTR_USE public virtual A {}; // expected-error {{'ATTR_NAME' cannot be applied to a base specifier}} +struct E : public ATTR_USE virtual A {}; // expected-error {{'ATTR_NAME' cannot appear here}} \ + expected-error {{'ATTR_NAME' cannot be applied to a base specifier}} +struct F : virtual ATTR_USE public A {}; // expected-error {{'ATTR_NAME' cannot appear here}} \ + expected-error {{'ATTR_NAME' cannot be applied to a base specifier}} } -namespace __arm_streaming ns_attr {}; // expected-error {{'__arm_streaming' cannot be applied to a declaration}} \ +namespace ATTR_USE ns_attr {}; // expected-error {{'ATTR_NAME' only applies to non-K&R-style functions}} \ expected-warning {{attributes on a namespace declaration are a C++17 extension}} diff --git a/clang/test/Preprocessor/aarch64-target-features.c b/clang/test/Preprocessor/aarch64-target-features.c index 7f2b353ab18c0d009fdfb2041fffc69563ad5baa..703294ef13bb08b74603a29d34aa1443977ea75d 100644 --- a/clang/test/Preprocessor/aarch64-target-features.c +++ b/clang/test/Preprocessor/aarch64-target-features.c @@ -58,6 +58,10 @@ // CHECK-NOT: __ARM_FEATURE_SVE_BITS 512 // CHECK-NOT: __ARM_FEATURE_SVE_BITS 1024 // CHECK-NOT: __ARM_FEATURE_SVE_BITS 2048 +// CHECK: __ARM_STATE_ZA 1 +// CHECK: __ARM_STATE_ZT0 1 +// CHECK-NOT: __ARM_FEATURE_SME +// CHECK-NOT: __ARM_FEATURE_SME2 // RUN: %clang -target aarch64-none-elf -march=armv8-r -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-R-PROFILE // RUN: %clang -target arm64-none-linux-gnu -march=armv8-r -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-R-PROFILE @@ -616,3 +620,12 @@ // RUN: %clang --target=aarch64 -march=armv8.2-a+rcpc3 -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-RCPC3 %s // CHECK-RCPC3: __ARM_FEATURE_RCPC 3 + +// RUN: %clang --target=aarch64 -march=armv9-a+sme -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-SME %s +// CHECK-SME: __ARM_FEATURE_LOCALLY_STREAMING 1 +// CHECK-SME: __ARM_FEATURE_SME 1 +// +// RUN: %clang --target=aarch64 -march=armv9-a+sme2 -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-SME2 %s +// CHECK-SME2: __ARM_FEATURE_LOCALLY_STREAMING 1 +// CHECK-SME2: __ARM_FEATURE_SME 1 +// CHECK-SME2: __ARM_FEATURE_SME2 1 diff --git a/clang/test/Preprocessor/init-aarch64.c b/clang/test/Preprocessor/init-aarch64.c index 2b7cc57f230333386feb3231a0eb4718aacf94f5..6cc9cb22c7af2cc64f015bb2c365586d257197a0 100644 --- a/clang/test/Preprocessor/init-aarch64.c +++ b/clang/test/Preprocessor/init-aarch64.c @@ -32,6 +32,8 @@ // AARCH64-NEXT: #define __ARM_PCS_AAPCS64 1 // AARCH64-NEXT: #define __ARM_SIZEOF_MINIMAL_ENUM 4 // AARCH64-NEXT: #define __ARM_SIZEOF_WCHAR_T 4 +// AARCH64-NEXT: #define __ARM_STATE_ZA 1 +// AARCH64-NEXT: #define __ARM_STATE_ZT0 1 // AARCH64-NEXT: #define __ATOMIC_ACQUIRE 2 // AARCH64-NEXT: #define __ATOMIC_ACQ_REL 4 // AARCH64-NEXT: #define __ATOMIC_CONSUME 1 diff --git a/clang/test/Sema/MicrosoftExtensions.c b/clang/test/Sema/MicrosoftExtensions.c index 50077d90314886568f80a7c984a27db5d8cb47eb..cf7463d2e76ebc2d43ff2d7b874832a2b416cb90 100644 --- a/clang/test/Sema/MicrosoftExtensions.c +++ b/clang/test/Sema/MicrosoftExtensions.c @@ -123,7 +123,7 @@ struct __declspec(deprecated) DS1 { int i; float f; }; // expected-note {{'DS1' #define MY_TEXT "This is also deprecated" __declspec(deprecated(MY_TEXT)) void Dfunc1( void ) {} // expected-note {{'Dfunc1' has been explicitly marked deprecated here}} -struct __declspec(deprecated(123)) DS2 {}; // expected-error {{'deprecated' attribute requires a string}} +struct __declspec(deprecated(123)) DS2 {}; // expected-error {{expected string literal as argument of 'deprecated' attribute}} void test( void ) { e1 = one; // expected-warning {{'e1' is deprecated: This is deprecated}} diff --git a/clang/test/Sema/aarch64-incompat-sm-builtin-calls.c b/clang/test/Sema/aarch64-incompat-sm-builtin-calls.c new file mode 100644 index 0000000000000000000000000000000000000000..079cff5a5bbae95d495c311dfaf59f95c59b3012 --- /dev/null +++ b/clang/test/Sema/aarch64-incompat-sm-builtin-calls.c @@ -0,0 +1,110 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve \ +// RUN: -target-feature +sme -target-feature +sve2 -target-feature +neon -fsyntax-only -verify %s + +// REQUIRES: aarch64-registered-target + +#include "arm_neon.h" +#include "arm_sme.h" +#include "arm_sve.h" + +int16x8_t incompat_neon_sm(int16x8_t splat) __arm_streaming { + // expected-warning@+1 {{builtin call has undefined behaviour when called from a streaming function}} + return (int16x8_t)__builtin_neon_vqaddq_v((int8x16_t)splat, (int8x16_t)splat, 33); +} + +__arm_locally_streaming int16x8_t incompat_neon_ls(int16x8_t splat) { + // expected-warning@+1 {{builtin call has undefined behaviour when called from a streaming function}} + return (int16x8_t)__builtin_neon_vqaddq_v((int8x16_t)splat, (int8x16_t)splat, 33); +} + +int16x8_t incompat_neon_smc(int16x8_t splat) __arm_streaming_compatible { + // expected-warning@+1 {{builtin call has undefined behaviour when called from a streaming compatible function}} + return (int16x8_t)__builtin_neon_vqaddq_v((int8x16_t)splat, (int8x16_t)splat, 33); +} + +void incompat_sme_smc(svbool_t pg, void const *ptr) __arm_streaming_compatible __arm_inout("za") { + // expected-warning@+1 {{builtin call has undefined behaviour when called from a streaming compatible function}} + return __builtin_sme_svld1_hor_za128(0, 0, pg, ptr); +} + +svuint32_t incompat_sve_sm(svbool_t pg, svuint32_t a, int16_t b) __arm_streaming { + // expected-warning@+1 {{builtin call has undefined behaviour when called from a streaming function}} + return __builtin_sve_svld1_gather_u32base_index_u32(pg, a, b); +} + +__arm_locally_streaming svuint32_t incompat_sve_ls(svbool_t pg, svuint32_t a, int64_t b) { + // expected-warning@+1 {{builtin call has undefined behaviour when called from a streaming function}} + return __builtin_sve_svld1_gather_u32base_index_u32(pg, a, b); +} + +svuint32_t incompat_sve_smc(svbool_t pg, svuint32_t a, int64_t b) __arm_streaming_compatible { + // expected-warning@+1 {{builtin call has undefined behaviour when called from a streaming compatible function}} + return __builtin_sve_svld1_gather_u32base_index_u32(pg, a, b); +} + +svuint32_t incompat_sve2_sm(svbool_t pg, svuint32_t a, int64_t b) __arm_streaming { + // expected-warning@+1 {{builtin call has undefined behaviour when called from a streaming function}} + return __builtin_sve_svldnt1_gather_u32base_index_u32(pg, a, b); +} + +__arm_locally_streaming svuint32_t incompat_sve2_ls(svbool_t pg, svuint32_t a, int64_t b) { + // expected-warning@+1 {{builtin call has undefined behaviour when called from a streaming function}} + return __builtin_sve_svldnt1_gather_u32base_index_u32(pg, a, b); +} + +svuint32_t incompat_sve2_smc(svbool_t pg, svuint32_t a, int64_t b) __arm_streaming_compatible { + // expected-warning@+1 {{builtin call has undefined behaviour when called from a streaming compatible function}} + return __builtin_sve_svldnt1_gather_u32base_index_u32(pg, a, b); +} + +void incompat_sme_sm(svbool_t pn, svbool_t pm, svfloat32_t zn, svfloat32_t zm) __arm_inout("za") { + // expected-warning@+1 {{builtin call has undefined behaviour when called from a non-streaming function}} + svmops_za32_f32_m(0, pn, pm, zn, zm); +} + +svfloat64_t streaming_caller_sve(svbool_t pg, svfloat64_t a, float64_t b) __arm_streaming { + // expected-no-warning + return svadd_n_f64_m(pg, a, b); +} + +__arm_locally_streaming svfloat64_t locally_streaming_caller_sve(svbool_t pg, svfloat64_t a, float64_t b) { + // expected-no-warning + return svadd_n_f64_m(pg, a, b); +} + +svfloat64_t streaming_compatible_caller_sve(svbool_t pg, svfloat64_t a, float64_t b) __arm_streaming_compatible { + // expected-no-warning + return svadd_n_f64_m(pg, a, b); +} + +svint16_t streaming_caller_sve2(svint16_t op1, svint16_t op2) __arm_streaming { + // expected-no-warning + return svmul_lane_s16(op1, op2, 0); +} + +__arm_locally_streaming svint16_t locally_streaming_caller_sve2(svint16_t op1, svint16_t op2) { + // expected-no-warning + return svmul_lane_s16(op1, op2, 0); +} + +svint16_t streaming_compatible_caller_sve2(svint16_t op1, svint16_t op2) __arm_streaming_compatible { + // expected-no-warning + return svmul_lane_s16(op1, op2, 0); +} + +svbool_t streaming_caller_ptrue(void) __arm_streaming { + // expected-no-warning + return svand_z(svptrue_b16(), svptrue_pat_b16(SV_ALL), svptrue_pat_b16(SV_VL4)); +} + +svint8_t missing_za(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming { + // expected-warning@+1 {{builtin call is not valid when calling from a function without active ZA state}} + return svread_hor_za8_s8_m(zd, pg, 0, slice_base); +} + +__arm_new("za") +svint8_t new_za(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming { + // expected-no-warning + return svread_hor_za8_s8_m(zd, pg, 0, slice_base); +} diff --git a/clang/test/Sema/aarch64-sme-func-attrs-without-target-feature.cpp b/clang/test/Sema/aarch64-sme-func-attrs-without-target-feature.cpp new file mode 100644 index 0000000000000000000000000000000000000000..0a54a94f408b79521cdef2ad9a9146b068a2f6de --- /dev/null +++ b/clang/test/Sema/aarch64-sme-func-attrs-without-target-feature.cpp @@ -0,0 +1,48 @@ +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -fsyntax-only -verify %s + +// This test is testing the diagnostics that Clang emits when compiling without '+sme'. + +void streaming_compatible_def() __arm_streaming_compatible {} // OK +void streaming_def() __arm_streaming { } // expected-error {{function executed in streaming-SVE mode requires 'sme'}} +void shared_za_def() __arm_inout("za") { } // expected-error {{function using ZA state requires 'sme'}} +__arm_new("za") void new_za_def() { } // expected-error {{function using ZA state requires 'sme'}} +__arm_locally_streaming void locally_streaming_def() { } // expected-error {{function executed in streaming-SVE mode requires 'sme'}} +void streaming_shared_za_def() __arm_streaming __arm_inout("za") { } // expected-error {{function executed in streaming-SVE mode requires 'sme'}} + +// It should work fine when we explicitly add the target("sme") attribute. +__attribute__((target("sme"))) void streaming_compatible_def_sme_attr() __arm_streaming_compatible {} // OK +__attribute__((target("sme"))) void streaming_def_sme_attr() __arm_streaming { } // OK +__attribute__((target("sme"))) void shared_za_def_sme_attr() __arm_inout("za") { } // OK +__arm_new("za") __attribute__((target("sme"))) void new_za_def_sme_attr() {} // OK +__arm_locally_streaming __attribute__((target("sme"))) void locally_streaming_def_sme_attr() {} // OK + +// Test that it also works with the target("sme2") attribute. +__attribute__((target("sme2"))) void streaming_def_sme2_attr() __arm_streaming { } // OK + +// No code is generated for declarations, so it should be fine to declare using the attribute. +void streaming_compatible_decl() __arm_streaming_compatible; // OK +void streaming_decl() __arm_streaming; // OK +void shared_za_decl() __arm_inout("za"); // OK + +void non_streaming_decl(); +void non_streaming_def(void (*streaming_fn_ptr)(void) __arm_streaming, + void (*streaming_compatible_fn_ptr)(void) __arm_streaming_compatible) { + streaming_compatible_decl(); // OK + streaming_compatible_fn_ptr(); // OK + streaming_decl(); // expected-error {{call to a streaming function requires 'sme'}} + streaming_fn_ptr(); // expected-error {{call to a streaming function requires 'sme'}} +} + +void streaming_compatible_def2(void (*streaming_fn_ptr)(void) __arm_streaming, + void (*streaming_compatible_fn_ptr)(void) __arm_streaming_compatible) + __arm_streaming_compatible { + non_streaming_decl(); // OK + streaming_compatible_decl(); // OK + streaming_compatible_fn_ptr(); // OK + streaming_decl(); // expected-error {{call to a streaming function requires 'sme'}} + streaming_fn_ptr(); // expected-error {{call to a streaming function requires 'sme'}} +} + +// Also test when call-site is not a function. +int streaming_decl_ret_int() __arm_streaming; +int x = streaming_decl_ret_int(); // expected-error {{call to a streaming function requires 'sme'}} diff --git a/clang/test/Sema/aarch64-sme-func-attrs.c b/clang/test/Sema/aarch64-sme-func-attrs.c new file mode 100644 index 0000000000000000000000000000000000000000..b986b0b3de2e14d265ffe9883368a57e27056344 --- /dev/null +++ b/clang/test/Sema/aarch64-sme-func-attrs.c @@ -0,0 +1,402 @@ +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -fsyntax-only -verify %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -fsyntax-only -verify=expected-cpp -x c++ %s + +// Valid attributes + +void sme_arm_streaming(void) __arm_streaming; +void sme_arm_streaming_compatible(void) __arm_streaming_compatible; + +__arm_new("za") void sme_arm_new_za(void) {} +void sme_arm_shared_za(void) __arm_inout("za"); +void sme_arm_preserves_za(void) __arm_preserves("za"); + +__arm_new("za") void sme_arm_streaming_new_za(void) __arm_streaming {} +void sme_arm_streaming_shared_za(void) __arm_streaming __arm_inout("za"); +void sme_arm_streaming_preserves_za(void) __arm_streaming __arm_preserves("za"); + +__arm_new("za") void sme_arm_sc_new_za(void) __arm_streaming_compatible {} +void sme_arm_sc_shared_za(void) __arm_streaming_compatible __arm_inout("za"); +void sme_arm_sc_preserves_za(void) __arm_streaming_compatible __arm_preserves("za"); + +__arm_locally_streaming void sme_arm_locally_streaming(void) { } +__arm_locally_streaming void sme_arm_streaming_and_locally_streaming(void) __arm_streaming { } +__arm_locally_streaming void sme_arm_streaming_and_streaming_compatible(void) __arm_streaming_compatible { } + +__arm_locally_streaming __arm_new("za") void sme_arm_ls_new_za(void) { } +__arm_locally_streaming void sme_arm_ls_shared_za(void) __arm_inout("za") { } +__arm_locally_streaming void sme_arm_ls_preserves_za(void) __arm_preserves("za") { } + +// Valid attributes on function pointers + +void streaming_ptr(void) __arm_streaming; +typedef void (*fptrty1) (void) __arm_streaming; +fptrty1 call_streaming_func() { return streaming_ptr; } + +void streaming_compatible_ptr(void) __arm_streaming_compatible; +typedef void (*fptrty2) (void) __arm_streaming_compatible; +fptrty2 call_sc_func() { return streaming_compatible_ptr; } + +void shared_za_ptr(void) __arm_inout("za"); +typedef void (*fptrty3) (void) __arm_inout("za"); +fptrty3 call_shared_za_func() { return shared_za_ptr; } + +void preserves_za_ptr(void) __arm_preserves("za"); +typedef void (*fptrty4) (void) __arm_preserves("za"); +fptrty4 call_preserve_za_func() { return preserves_za_ptr; } + +typedef void (*fptrty6) (void); +fptrty6 cast_nza_func_to_normal() { return sme_arm_new_za; } +fptrty6 cast_ls_func_to_normal() { return sme_arm_locally_streaming; } + +// Invalid attributes + +// expected-cpp-error@+4 {{'__arm_streaming_compatible' and '__arm_streaming' are not compatible}} +// expected-cpp-note@+3 {{conflicting attribute is here}} +// expected-error@+2 {{'__arm_streaming_compatible' and '__arm_streaming' are not compatible}} +// expected-note@+1 {{conflicting attribute is here}} +void streaming_mode(void) __arm_streaming __arm_streaming_compatible; + +// expected-cpp-error@+4 {{'__arm_streaming' and '__arm_streaming_compatible' are not compatible}} +// expected-cpp-note@+3 {{conflicting attribute is here}} +// expected-error@+2 {{'__arm_streaming' and '__arm_streaming_compatible' are not compatible}} +// expected-note@+1 {{conflicting attribute is here}} +void streaming_compatible(void) __arm_streaming_compatible __arm_streaming; + +// expected-cpp-error@+2 {{'__arm_new("za")' and '__arm_inout("za")' are not compatible}} +// expected-error@+1 {{'__arm_new("za")' and '__arm_inout("za")' are not compatible}} +__arm_new("za") void new_shared_za(void) __arm_inout("za") {} + +// expected-cpp-error@+2 {{'__arm_new("za")' and '__arm_preserves("za")' are not compatible}} +// expected-error@+1 {{'__arm_new("za")' and '__arm_preserves("za")' are not compatible}} +__arm_new("za") void new_preserves_za(void) __arm_preserves("za") {} + +// Invalid attributes on function pointers + +// expected-cpp-error@+4 {{'__arm_streaming_compatible' and '__arm_streaming' are not compatible}} +// expected-cpp-note@+3 {{conflicting attribute is here}} +// expected-error@+2 {{'__arm_streaming_compatible' and '__arm_streaming' are not compatible}} +// expected-note@+1 {{conflicting attribute is here}} +void streaming_ptr_invalid(void) __arm_streaming __arm_streaming_compatible; +// expected-cpp-error@+4 {{'__arm_streaming_compatible' and '__arm_streaming' are not compatible}} +// expected-cpp-note@+3 {{conflicting attribute is here}} +// expected-error@+2 {{'__arm_streaming_compatible' and '__arm_streaming' are not compatible}} +// expected-note@+1 {{conflicting attribute is here}} +typedef void (*fptrty7) (void) __arm_streaming __arm_streaming_compatible; +fptrty7 invalid_streaming_func() { return streaming_ptr_invalid; } + +// expected-warning@+2 {{'__arm_streaming' only applies to non-K&R-style functions}} +// expected-error@+1 {{'__arm_streaming' only applies to function types; type here is 'void ()'}} +void function_no_prototype() __arm_streaming; + +// +// Check for incorrect conversions of function pointers with the attributes +// + +typedef void (*n_ptrty) (void); +typedef void (*s_ptrty) (void) __arm_streaming; +s_ptrty return_valid_streaming_fptr(s_ptrty f) { return f; } + +// expected-cpp-error@+2 {{cannot initialize return object of type 's_ptrty' (aka 'void (*)() __arm_streaming') with an lvalue of type 'n_ptrty' (aka 'void (*)()')}} +// expected-error@+1 {{incompatible function pointer types returning 'n_ptrty' (aka 'void (*)(void)') from a function with result type 's_ptrty' (aka 'void (*)(void) __arm_streaming')}} +s_ptrty return_invalid_fptr_streaming_normal(n_ptrty f) { return f; } +// expected-cpp-error@+2 {{cannot initialize return object of type 'n_ptrty' (aka 'void (*)()') with an lvalue of type 's_ptrty' (aka 'void (*)() __arm_streaming')}} +// expected-error@+1 {{incompatible function pointer types returning 's_ptrty' (aka 'void (*)(void) __arm_streaming') from a function with result type 'n_ptrty' (aka 'void (*)(void)')}} +n_ptrty return_invalid_fptr_normal_streaming(s_ptrty f) { return f; } + +// Test an instance where the result type is not a prototyped function, such that we still get a diagnostic. +typedef void (*nonproto_n_ptrty) (); +// expected-cpp-error@+2 {{cannot initialize return object of type 'nonproto_n_ptrty' (aka 'void (*)()') with an lvalue of type 's_ptrty' (aka 'void (*)() __arm_streaming')}} +// expected-error@+1 {{incompatible function pointer types returning 's_ptrty' (aka 'void (*)(void) __arm_streaming') from a function with result type 'nonproto_n_ptrty' (aka 'void (*)()')}} +nonproto_n_ptrty return_invalid_fptr_streaming_nonprotonormal(s_ptrty f) { return f; } + +typedef void (*sc_ptrty) (void) __arm_streaming_compatible; +sc_ptrty return_valid_streaming_compatible_fptr(sc_ptrty f) { return f; } + +// expected-cpp-error@+2 {{cannot initialize return object of type 'sc_ptrty' (aka 'void (*)() __arm_streaming_compatible') with an lvalue of type 'n_ptrty' (aka 'void (*)()')}} +// expected-error@+1 {{incompatible function pointer types returning 'n_ptrty' (aka 'void (*)(void)') from a function with result type 'sc_ptrty' (aka 'void (*)(void) __arm_streaming_compatible')}} +sc_ptrty return_invalid_fptr_streaming_compatible_normal(n_ptrty f) { return f; } +// expected-cpp-error@+2 {{cannot initialize return object of type 'n_ptrty' (aka 'void (*)()') with an lvalue of type 'sc_ptrty' (aka 'void (*)() __arm_streaming_compatible')}} +// expected-error@+1 {{incompatible function pointer types returning 'sc_ptrty' (aka 'void (*)(void) __arm_streaming_compatible') from a function with result type 'n_ptrty' (aka 'void (*)(void)')}} +n_ptrty return_invalid_fptr_normal_streaming_compatible(sc_ptrty f) { return f; } + +typedef void (*sz_ptrty) (void) __arm_inout("za"); +sz_ptrty return_valid_shared_za_fptr(sz_ptrty f) { return f; } + + +// expected-cpp-error@+2 {{cannot initialize return object of type 'sz_ptrty' (aka 'void (*)() __arm_inout("za")') with an lvalue of type 'n_ptrty' (aka 'void (*)()')}} +// expected-error@+1 {{incompatible function pointer types returning 'n_ptrty' (aka 'void (*)(void)') from a function with result type 'sz_ptrty' (aka 'void (*)(void) __arm_inout("za")')}} +sz_ptrty return_invalid_fptr_shared_za_normal(n_ptrty f) { return f; } +// expected-cpp-error@+2 {{cannot initialize return object of type 'n_ptrty' (aka 'void (*)()') with an lvalue of type 'sz_ptrty' (aka 'void (*)() __arm_inout("za")')}} +// expected-error@+1 {{incompatible function pointer types returning 'sz_ptrty' (aka 'void (*)(void) __arm_inout("za")') from a function with result type 'n_ptrty' (aka 'void (*)(void)')}} +n_ptrty return_invalid_fptr_normal_shared_za(sz_ptrty f) { return f; } + +typedef void (*pz_ptrty) (void) __arm_preserves("za"); +pz_ptrty return_valid_preserves_za_fptr(pz_ptrty f) { return f; } + +// expected-cpp-error@+2 {{cannot initialize return object of type 'pz_ptrty' (aka 'void (*)() __arm_preserves("za")') with an lvalue of type 'n_ptrty' (aka 'void (*)()')}} +// expected-error@+1 {{incompatible function pointer types returning 'n_ptrty' (aka 'void (*)(void)') from a function with result type 'pz_ptrty' (aka 'void (*)(void) __arm_preserves("za")')}} +pz_ptrty return_invalid_fptr_preserves_za_normal(n_ptrty f) { return f; } +// expected-cpp-error@+2 {{cannot initialize return object of type 'n_ptrty' (aka 'void (*)()') with an lvalue of type 'pz_ptrty' (aka 'void (*)() __arm_preserves("za")')}} +// expected-error@+1 {{incompatible function pointer types returning 'pz_ptrty' (aka 'void (*)(void) __arm_preserves("za")') from a function with result type 'n_ptrty' (aka 'void (*)(void)')}} +n_ptrty return_invalid_fptr_normal_preserves_za(pz_ptrty f) { return f; } + +// Test template instantiations +#ifdef __cplusplus +template T templated(T x) __arm_streaming { return x; } +template <> int templated(int x) __arm_streaming { return x + 1; } +template <> float templated(float x) __arm_streaming { return x + 2; } +// expected-cpp-error@+2 {{explicit instantiation of 'templated' does not refer to a function template, variable template, member function, member class, or static data member}} +// expected-cpp-note@-4 {{candidate template ignored: could not match 'short (short) __arm_streaming' against 'short (short)'}} +template short templated(short); +#endif + +// Conflicting attributes on redeclarations + +// expected-error@+5 {{function declared 'void (void) __arm_streaming_compatible' was previously declared 'void (void) __arm_streaming', which has different SME function attributes}} +// expected-note@+3 {{previous declaration is here}} +// expected-cpp-error@+3 {{function declared 'void () __arm_streaming_compatible' was previously declared 'void () __arm_streaming', which has different SME function attributes}} +// expected-cpp-note@+1 {{previous declaration is here}} +void redecl(void) __arm_streaming; +void redecl(void) __arm_streaming_compatible { } + +// expected-error@+5 {{function declared 'void (void)' was previously declared 'void (void) __arm_preserves("za")', which has different SME function attributes}} +// expected-note@+3 {{previous declaration is here}} +// expected-cpp-error@+3 {{function declared 'void ()' was previously declared 'void () __arm_preserves("za")', which has different SME function attributes}} +// expected-cpp-note@+1 {{previous declaration is here}} +void redecl_preserve_za(void) __arm_preserves("za");; +void redecl_preserve_za(void) {} + +// expected-error@+5 {{function declared 'void (void) __arm_preserves("za")' was previously declared 'void (void)', which has different SME function attributes}} +// expected-note@+3 {{previous declaration is here}} +// expected-cpp-error@+3 {{function declared 'void () __arm_preserves("za")' was previously declared 'void ()', which has different SME function attributes}} +// expected-cpp-note@+1 {{previous declaration is here}} +void redecl_nopreserve_za(void); +void redecl_nopreserve_za(void) __arm_preserves("za") {} + +void non_za_definition(void (*shared_za_fn_ptr)(void) __arm_inout("za"), void (*preserves_za_fn_ptr)(void) __arm_preserves("za")) { + sme_arm_new_za(); // OK + // expected-error@+2 {{call to a shared ZA function requires the caller to have ZA state}} + // expected-cpp-error@+1 {{call to a shared ZA function requires the caller to have ZA state}} + sme_arm_shared_za(); + // expected-error@+2 {{call to a shared ZA function requires the caller to have ZA state}} + // expected-cpp-error@+1 {{call to a shared ZA function requires the caller to have ZA state}} + shared_za_fn_ptr(); + // expected-error@+2 {{call to a shared ZA function requires the caller to have ZA state}} + // expected-cpp-error@+1 {{call to a shared ZA function requires the caller to have ZA state}} + preserves_za_fn_ptr(); +} + +void shared_za_definition(void (*shared_za_fn_ptr)(void) __arm_inout("za")) __arm_inout("za") { + sme_arm_shared_za(); // OK + shared_za_fn_ptr(); // OK +} + +__arm_new("za") void new_za_definition(void (*shared_za_fn_ptr)(void) __arm_inout("za")) { + sme_arm_shared_za(); // OK + shared_za_fn_ptr(); // OK +} + +#ifdef __cplusplus +int shared_za_initializer(void) __arm_inout("za"); +// expected-cpp-error@+1 {{call to a shared ZA function requires the caller to have ZA state}} +int global = shared_za_initializer(); + +struct S { + virtual void shared_za_memberfn(void) __arm_inout("za"); +}; + +struct S2 : public S { +// expected-cpp-error@+2 {{virtual function 'shared_za_memberfn' has different attributes ('void ()') than the function it overrides (which has 'void () __arm_inout("za")')}} +// expected-cpp-note@-5 {{overridden virtual function is here}} + __arm_new("za") void shared_za_memberfn(void) override {} +}; + +// The '__arm_preserves("za")' property cannot be dropped when overriding a virtual +// function. It is however fine for the overriding function to be '__arm_preserves("za")' +// even though the function that it overrides is not. + +struct S_PreservesZA { + virtual void memberfn(void) __arm_preserves("za"); +}; + +struct S_Drop_PreservesZA : S_PreservesZA { +// expected-cpp-error@+2 {{virtual function 'memberfn' has different attributes ('void ()') than the function it overrides (which has 'void () __arm_preserves("za")')}} +// expected-cpp-note@-5 {{overridden virtual function is here}} + void memberfn(void) override {} +}; + +struct S_NoPreservesZA { + virtual void memberfn(void); +}; + +struct S_AddPreservesZA : S_NoPreservesZA { +// expected-cpp-error@+2 {{virtual function 'memberfn' has different attributes ('void () __arm_preserves("za")') than the function it overrides (which has 'void ()')}} +// expected-cpp-note@-5 {{overridden virtual function is here}} + void memberfn(void) __arm_preserves("za") override {} +}; + + +// Check that the attribute propagates through template instantiations. +template +struct S3 { + static constexpr int value = 0; +}; + +template <> +struct S3 { + static constexpr int value = 1; +}; + +template <> +struct S3 { + static constexpr int value = 2; +}; + +template <> +struct S3 { + static constexpr int value = 4; +}; + +template <> +struct S3 { + static constexpr int value = 8; +}; + +template <> +struct S3 { + static constexpr int value = 16; +}; + +void normal_func(void) {} +void streaming_func(void) __arm_streaming {} +void streaming_compatible_func(void) __arm_streaming_compatible {} +void shared_za_func(void) __arm_inout("za") {} +void preserves_za_func(void) __arm_preserves("za") {} + +static_assert(S3::value == 1, "why are we picking the wrong specialization?"); +static_assert(S3::value == 2, "why are we picking the wrong specialization?"); +static_assert(S3::value == 4, "why are we picking the wrong specialization?"); +static_assert(S3::value == 8, "why are we picking the wrong specialization?"); +static_assert(S3::value == 16, "why are we picking the wrong specialization?"); + +// Also test the attribute is propagated with variadic templates +constexpr int eval_variadic_template() { return 0; } +template +constexpr int eval_variadic_template(T f, Other... other) { + return S3::value + eval_variadic_template(other...); +} +static_assert(eval_variadic_template(normal_func, streaming_func, + streaming_compatible_func, + shared_za_func, preserves_za_func) == 31, + "attributes not propagated properly in variadic template"); + +// Test that the attribute is propagated with template specialization. +template int test_templated_f(T); +template<> constexpr int test_templated_f(void(*)(void)) { return 1; } +template<> constexpr int test_templated_f(void(*)(void)__arm_streaming) { return 2; } +template<> constexpr int test_templated_f(void(*)(void)__arm_streaming_compatible) { return 4; } +template<> constexpr int test_templated_f(void(*)(void)__arm_inout("za")) { return 8; } +template<> constexpr int test_templated_f(void(*)(void)__arm_preserves("za")) { return 16; } + +static_assert(test_templated_f(&normal_func) == 1, "Instantiated to wrong function"); +static_assert(test_templated_f(&streaming_func) == 2, "Instantiated to wrong function"); +static_assert(test_templated_f(&streaming_compatible_func) == 4, "Instantiated to wrong function"); +static_assert(test_templated_f(&shared_za_func) == 8, "Instantiated to wrong function"); +static_assert(test_templated_f(&preserves_za_func) == 16, "Instantiated to wrong function"); + +// expected-cpp-error@+2 {{'__arm_streaming' only applies to function types; type here is 'int'}} +// expected-error@+1 {{'__arm_streaming' only applies to function types; type here is 'int'}} +int invalid_type_for_attribute __arm_streaming; + +// Test overloads +constexpr int overload(void f(void)) { return 1; } +constexpr int overload(void f(void) __arm_streaming) { return 2; } +constexpr int overload(void f(void) __arm_streaming_compatible) { return 4; } +constexpr int overload(void f(void) __arm_inout("za")) { return 8; } +constexpr int overload(void f(void) __arm_preserves("za")) { return 16; } +static_assert(overload(&normal_func) == 1, "Overloaded to wrong function"); +static_assert(overload(&streaming_func) == 2, "Overloaded to wrong function"); +static_assert(overload(&streaming_compatible_func) == 4, "Overloaded to wrong function"); +static_assert(overload(&shared_za_func) == 8, "Overloaded to wrong function"); +static_assert(overload(&preserves_za_func) == 16, "Overloaded to wrong function"); + +// Test implicit instantiation +template struct X { + static void foo(T) __arm_streaming { } +}; +constexpr int overload_int(void f(int)) { return 1; } +constexpr int overload_int(void f(int) __arm_streaming) { return 2; } +constexpr X *ptr = 0; +static_assert(overload_int(ptr->foo) == 2, "Overloaded to the wrong function after implicit instantiation"); + +#endif // ifdef __cplusplus + +// expected-cpp-error@+2 {{unknown state ''}} +// expected-error@+1 {{unknown state ''}} +__arm_new("") void invalid_arm_new_empty_string(void); +// expected-cpp-error@+2 {{expected string literal as argument of '__arm_new' attribute}} +// expected-error@+1 {{expected string literal as argument of '__arm_new' attribute}} +__arm_new(0) void invalid_arm_new_non_literal_string(void); +// expected-cpp-error@+2 {{unknown state 'unknownstate'}} +// expected-error@+1 {{unknown state 'unknownstate'}} +__arm_new("unknownstate") void invalid_arm_new_unknown_state(void); + +// expected-cpp-error@+2 {{unknown state ''}} +// expected-error@+1 {{unknown state ''}} +void invalid_arm_in_empty_string(void) __arm_in(""); +// expected-cpp-error@+2 {{expected string literal as argument of '__arm_in' attribute}} +// expected-error@+1 {{expected string literal as argument of '__arm_in' attribute}} +void invalid_arm_in_non_literal_string(void) __arm_in(0); +// expected-cpp-error@+2 {{unknown state 'unknownstate'}} +// expected-error@+1 {{unknown state 'unknownstate'}} +void invalid_arm_in_unknown_state(void) __arm_in("unknownstate"); + +void valid_state_attrs_in_in1(void) __arm_in("za"); +void valid_state_attrs_in_in2(void) __arm_in("za", "za"); + +// expected-cpp-error@+2 {{missing state for '__arm_in'}} +// expected-error@+1 {{missing state for '__arm_in'}} +void invalid_state_attrs_no_arg1(void) __arm_in(); +// expected-cpp-error@+2 {{missing state for '__arm_new'}} +// expected-error@+1 {{missing state for '__arm_new'}} +__arm_new() void invalid_state_attrs_no_arg2(void); + +// expected-cpp-error@+2 {{conflicting attributes for state 'za'}} +// expected-error@+1 {{conflicting attributes for state 'za'}} +void conflicting_state_attrs_in_out(void) __arm_in("za") __arm_out("za"); +// expected-cpp-error@+2 {{conflicting attributes for state 'za'}} +// expected-error@+1 {{conflicting attributes for state 'za'}} +void conflicting_state_attrs_in_inout(void) __arm_in("za") __arm_inout("za"); +// expected-cpp-error@+2 {{conflicting attributes for state 'za'}} +// expected-error@+1 {{conflicting attributes for state 'za'}} +void conflicting_state_attrs_in_preserves(void) __arm_in("za") __arm_preserves("za"); + +// expected-cpp-error@+2 {{conflicting attributes for state 'za'}} +// expected-error@+1 {{conflicting attributes for state 'za'}} +void conflicting_state_attrs_out_in(void) __arm_out("za") __arm_in("za"); +// expected-cpp-error@+2 {{conflicting attributes for state 'za'}} +// expected-error@+1 {{conflicting attributes for state 'za'}} +void conflicting_state_attrs_out_inout(void) __arm_out("za") __arm_inout("za"); +// expected-cpp-error@+2 {{conflicting attributes for state 'za'}} +// expected-error@+1 {{conflicting attributes for state 'za'}} +void conflicting_state_attrs_out_preserves(void) __arm_out("za") __arm_preserves("za"); + +// expected-cpp-error@+2 {{conflicting attributes for state 'za'}} +// expected-error@+1 {{conflicting attributes for state 'za'}} +void conflicting_state_attrs_inout_in(void) __arm_inout("za") __arm_in("za"); +// expected-cpp-error@+2 {{conflicting attributes for state 'za'}} +// expected-error@+1 {{conflicting attributes for state 'za'}} +void conflicting_state_attrs_inout_out(void) __arm_inout("za") __arm_out("za"); +// expected-cpp-error@+2 {{conflicting attributes for state 'za'}} +// expected-error@+1 {{conflicting attributes for state 'za'}} +void conflicting_state_attrs_inout_preserves(void) __arm_inout("za") __arm_preserves("za"); + +// expected-cpp-error@+2 {{conflicting attributes for state 'za'}} +// expected-error@+1 {{conflicting attributes for state 'za'}} +void conflicting_state_attrs_preserves_in(void) __arm_preserves("za") __arm_in("za"); +// expected-cpp-error@+2 {{conflicting attributes for state 'za'}} +// expected-error@+1 {{conflicting attributes for state 'za'}} +void conflicting_state_attrs_preserves_out(void) __arm_preserves("za") __arm_out("za"); +// expected-cpp-error@+2 {{conflicting attributes for state 'za'}} +// expected-error@+1 {{conflicting attributes for state 'za'}} +void conflicting_state_attrs_preserves_inout(void) __arm_preserves("za") __arm_inout("za"); diff --git a/clang/test/Sema/aarch64-sme-intrinsics/acle_sme_imm.cpp b/clang/test/Sema/aarch64-sme-intrinsics/acle_sme_imm.cpp index c08f9f25eb473befb4f7e0b07b8b187013d5bfb6..8f3f98394492ddb30e3dcad1c81fe1fa43053ce2 100644 --- a/clang/test/Sema/aarch64-sme-intrinsics/acle_sme_imm.cpp +++ b/clang/test/Sema/aarch64-sme-intrinsics/acle_sme_imm.cpp @@ -10,114 +10,66 @@ #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 #endif -#include +#include -void test_range_0_0(svbool_t pg, void *ptr) { +void test_range_0_0(uint32_t slice, svbool_t pg, void *ptr) __arm_streaming __arm_inout("za") { // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 0]}} - SVE_ACLE_FUNC(svld1_hor_za8,,,)(-1, -1, 0, pg, ptr); + SVE_ACLE_FUNC(svld1_hor_za8,,,)(-1, slice, pg, ptr); // expected-error@+1 {{argument value 1 is outside the valid range [0, 0]}} - SVE_ACLE_FUNC(svst1_ver_za8,,,)(1, -1, 15, pg, ptr); + SVE_ACLE_FUNC(svst1_ver_za8,,,)(1, slice, pg, ptr); // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 0]}} - SVE_ACLE_FUNC(svld1_hor_za128,,,)(0, -1, -1, pg, ptr); + SVE_ACLE_FUNC(svld1_hor_vnum_za8,,,)(-1, slice, pg, ptr, 1); // expected-error@+1 {{argument value 1 is outside the valid range [0, 0]}} - SVE_ACLE_FUNC(svst1_ver_za128,,,)(15, -1, 1, pg, ptr); - // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 0]}} - SVE_ACLE_FUNC(svld1_hor_vnum_za8,,,)(-1, -1, 0, pg, ptr, 1); - // expected-error@+1 {{argument value 1 is outside the valid range [0, 0]}} - SVE_ACLE_FUNC(svst1_ver_vnum_za8,,,)(1, -1, 15, pg, ptr, 1); - // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 0]}} - SVE_ACLE_FUNC(svld1_hor_vnum_za128,,,)(0, -1, -1, pg, ptr, 1); - // expected-error@+1 {{argument value 1 is outside the valid range [0, 0]}} - SVE_ACLE_FUNC(svst1_ver_vnum_za128,,,)(15, -1, 1, pg, ptr, 1); + SVE_ACLE_FUNC(svst1_ver_vnum_za8,,,)(1, slice, pg, ptr, 1); // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 0]}} - SVE_ACLE_FUNC(svread_hor_za8, _s8, _m,)(svundef_s8(), pg, -1, -1, 0); - // expected-error@+1 {{argument value 1 is outside the valid range [0, 0]}} - SVE_ACLE_FUNC(svread_ver_za8, _s8, _m,)(svundef_s8(), pg, 1, -1, 15); - // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 0]}} - SVE_ACLE_FUNC(svread_hor_za128, _s8, _m,)(svundef_s8(), pg, 0, -1, -1); + SVE_ACLE_FUNC(svread_hor_za8, _s8, _m,)(svundef_s8(), pg, -1, slice); // expected-error@+1 {{argument value 1 is outside the valid range [0, 0]}} - SVE_ACLE_FUNC(svread_ver_za128, _s8, _m,)(svundef_s8(), pg, 15, -1, 1); + SVE_ACLE_FUNC(svread_ver_za8, _s8, _m,)(svundef_s8(), pg, 1, slice); // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 0]}} - SVE_ACLE_FUNC(svwrite_hor_za8, _s8, _m,)(-1, -1, 0, pg, svundef_s8()); + SVE_ACLE_FUNC(svwrite_hor_za8, _s8, _m,)(-1, slice, pg, svundef_s8()); // expected-error@+1 {{argument value 1 is outside the valid range [0, 0]}} - SVE_ACLE_FUNC(svwrite_ver_za8, _s8, _m,)(1, -1, 15, pg, svundef_s8()); - // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 0]}} - SVE_ACLE_FUNC(svwrite_hor_za128, _s8, _m,)(0, -1, -1, pg, svundef_s8()); - // expected-error@+1 {{argument value 1 is outside the valid range [0, 0]}} - SVE_ACLE_FUNC(svwrite_ver_za128, _s8, _m,)(15, -1, 1, pg, svundef_s8()); + SVE_ACLE_FUNC(svwrite_ver_za8, _s8, _m,)(1, slice, pg, svundef_s8()); } -void test_range_0_1(svbool_t pg, void *ptr) { - // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 1]}} - SVE_ACLE_FUNC(svld1_hor_za16,,,)(-1, -1, 0, pg, ptr); - // expected-error@+1 {{argument value 2 is outside the valid range [0, 1]}} - SVE_ACLE_FUNC(svst1_ver_za16,,,)(2, -1, 7, pg, ptr); - // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 1]}} - SVE_ACLE_FUNC(svld1_hor_za64,,,)(0, -1, -1, pg, ptr); - // expected-error@+1 {{argument value 2 is outside the valid range [0, 1]}} - SVE_ACLE_FUNC(svst1_ver_za64,,,)(7, -1, 2, pg, ptr); +void test_range_0_1(uint32_t slice, svbool_t pg, void *ptr) __arm_streaming __arm_inout("za") { // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 1]}} - SVE_ACLE_FUNC(svld1_hor_vnum_za16,,,)(-1, -1, 0, pg, ptr, 1); + SVE_ACLE_FUNC(svld1_hor_za16,,,)(-1, slice, pg, ptr); // expected-error@+1 {{argument value 2 is outside the valid range [0, 1]}} - SVE_ACLE_FUNC(svst1_ver_vnum_za16,,,)(2, -1, 7, pg, ptr, 1); + SVE_ACLE_FUNC(svst1_ver_za16,,,)(2, slice, pg, ptr); // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 1]}} - SVE_ACLE_FUNC(svld1_hor_vnum_za64,,,)(0, -1, -1, pg, ptr, 1); + SVE_ACLE_FUNC(svld1_hor_vnum_za16,,,)(-1, slice, pg, ptr, 1); // expected-error@+1 {{argument value 2 is outside the valid range [0, 1]}} - SVE_ACLE_FUNC(svst1_ver_vnum_za64,,,)(7, -1, 2, pg, ptr, 1); + SVE_ACLE_FUNC(svst1_ver_vnum_za16,,,)(2, slice, pg, ptr, 1); // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 1]}} - SVE_ACLE_FUNC(svread_hor_za16, _s16, _m,)(svundef_s16(), pg, -1, -1, 0); + SVE_ACLE_FUNC(svread_hor_za16, _s16, _m,)(svundef_s16(), pg, -1, slice); // expected-error@+1 {{argument value 2 is outside the valid range [0, 1]}} - SVE_ACLE_FUNC(svread_ver_za16, _s16, _m,)(svundef_s16(), pg, 2, -1, 7); + SVE_ACLE_FUNC(svread_ver_za16, _s16, _m,)(svundef_s16(), pg, 2, slice); // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 1]}} - SVE_ACLE_FUNC(svread_hor_za64, _s64, _m,)(svundef_s64(), pg, 0, -1, -1); + SVE_ACLE_FUNC(svwrite_hor_za16, _s16, _m,)(-1, slice, pg, svundef_s16()); // expected-error@+1 {{argument value 2 is outside the valid range [0, 1]}} - SVE_ACLE_FUNC(svread_ver_za64, _s64, _m,)(svundef_s64(), pg, 7, -1, 2); - // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 1]}} - SVE_ACLE_FUNC(svwrite_hor_za16, _s16, _m,)(-1, -1, 0, pg, svundef_s16()); - // expected-error@+1 {{argument value 2 is outside the valid range [0, 1]}} - SVE_ACLE_FUNC(svwrite_ver_za16, _s16, _m,)(2, -1, 7, pg, svundef_s16()); - // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 1]}} - SVE_ACLE_FUNC(svwrite_hor_za64, _s64, _m,)(0, -1, -1, pg, svundef_s64()); - // expected-error@+1 {{argument value 2 is outside the valid range [0, 1]}} - SVE_ACLE_FUNC(svwrite_ver_za64, _s64, _m,)(7, -1, 2, pg, svundef_s64()); + SVE_ACLE_FUNC(svwrite_ver_za16, _s16, _m,)(2, slice, pg, svundef_s16()); } -void test_range_0_3(svbool_t pg, void *ptr) { - // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}} - SVE_ACLE_FUNC(svld1_hor_za32,,,)(-1, -1, 0, pg, ptr); - // expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} - SVE_ACLE_FUNC(svst1_ver_za32,,,)(4, -1, 3, pg, ptr); +void test_range_0_3(uint32_t slice, svbool_t pg, void *ptr) __arm_streaming __arm_inout("za") { // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}} - SVE_ACLE_FUNC(svld1_hor_za32,,,)(0, -1, -1, pg, ptr); + SVE_ACLE_FUNC(svld1_hor_za32,,,)(-1, slice, pg, ptr); // expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} - SVE_ACLE_FUNC(svst1_ver_za32,,,)(3, -1, 4, pg, ptr); + SVE_ACLE_FUNC(svst1_ver_za32,,,)(4, slice, pg, ptr); // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}} - SVE_ACLE_FUNC(svld1_hor_vnum_za32,,,)(-1, -1, 0, pg, ptr, 1); + SVE_ACLE_FUNC(svld1_hor_vnum_za32,,,)(-1, slice, pg, ptr, 1); // expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} - SVE_ACLE_FUNC(svst1_ver_vnum_za32,,,)(4, -1, 3, pg, ptr, 1); - // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}} - SVE_ACLE_FUNC(svld1_hor_vnum_za32,,,)(0, -1, -1, pg, ptr, 1); - // expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} - SVE_ACLE_FUNC(svst1_ver_vnum_za32,,,)(3, -1, 4, pg, ptr, 1); + SVE_ACLE_FUNC(svst1_ver_vnum_za32,,,)(4, slice, pg, ptr, 1); // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}} - SVE_ACLE_FUNC(svread_hor_za32, _s32, _m,)(svundef_s32(), pg, -1, -1, 0); - // expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} - SVE_ACLE_FUNC(svread_ver_za32, _s32, _m,)(svundef_s32(), pg, 4, -1, 3); - // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}} - SVE_ACLE_FUNC(svread_hor_za32, _s32, _m,)(svundef_s32(), pg, 0, -1, -1); - // expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} - SVE_ACLE_FUNC(svread_ver_za32, _s32, _m,)(svundef_s32(), pg, 3, -1, 4); - // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}} - SVE_ACLE_FUNC(svwrite_hor_za32, _s32, _m,)(-1, -1, 0, pg, svundef_s32()); + SVE_ACLE_FUNC(svread_hor_za32, _s32, _m,)(svundef_s32(), pg, -1, slice); // expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} - SVE_ACLE_FUNC(svwrite_ver_za32, _s32, _m,)(4, -1, 3, pg, svundef_s32()); + SVE_ACLE_FUNC(svread_ver_za32, _s32, _m,)(svundef_s32(), pg, 4, slice); // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}} - SVE_ACLE_FUNC(svwrite_hor_za32, _s32, _m,)(0, -1, -1, pg, svundef_s32()); + SVE_ACLE_FUNC(svwrite_hor_za32, _s32, _m,)(-1, slice, pg, svundef_s32()); // expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} - SVE_ACLE_FUNC(svwrite_ver_za32, _s32, _m,)(3, -1, 4, pg, svundef_s32()); + SVE_ACLE_FUNC(svwrite_ver_za32, _s32, _m,)(4, slice, pg, svundef_s32()); // expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} SVE_ACLE_FUNC(svaddha_za32, _s32, _m,)(4, pg, pg, svundef_s32()); @@ -138,40 +90,24 @@ void test_range_0_3(svbool_t pg, void *ptr) { SVE_ACLE_FUNC(svusmops_za32, _u8, _m,)(-1, pg, pg, svundef_u8(), svundef_s8()); } -void test_range_0_7(svbool_t pg, void *ptr) { +void test_range_0_7(uint32_t slice, svbool_t pg, void *ptr) __arm_streaming __arm_inout("za") { // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 7]}} - SVE_ACLE_FUNC(svld1_hor_za64,,,)(-1, -1, 0, pg, ptr); + SVE_ACLE_FUNC(svld1_hor_za64,,,)(-1, slice, pg, ptr); // expected-error@+1 {{argument value 8 is outside the valid range [0, 7]}} - SVE_ACLE_FUNC(svst1_ver_za64,,,)(8, -1, 1, pg, ptr); + SVE_ACLE_FUNC(svst1_ver_za64,,,)(8, slice, pg, ptr); // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 7]}} - SVE_ACLE_FUNC(svld1_hor_za16,,,)(0, -1, -1, pg, ptr); + SVE_ACLE_FUNC(svld1_hor_vnum_za64,,,)(-1, slice, pg, ptr, 1); // expected-error@+1 {{argument value 8 is outside the valid range [0, 7]}} - SVE_ACLE_FUNC(svst1_ver_za16,,,)(1, -1, 8, pg, ptr); - // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 7]}} - SVE_ACLE_FUNC(svld1_hor_vnum_za64,,,)(-1, -1, 0, pg, ptr, 1); - // expected-error@+1 {{argument value 8 is outside the valid range [0, 7]}} - SVE_ACLE_FUNC(svst1_ver_vnum_za64,,,)(8, -1, 1, pg, ptr, 1); - // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 7]}} - SVE_ACLE_FUNC(svld1_hor_vnum_za16,,,)(0, -1, -1, pg, ptr, 1); - // expected-error@+1 {{argument value 8 is outside the valid range [0, 7]}} - SVE_ACLE_FUNC(svst1_ver_vnum_za16,,,)(1, -1, 8, pg, ptr, 1); + SVE_ACLE_FUNC(svst1_ver_vnum_za64,,,)(8, slice, pg, ptr, 1); // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 7]}} - SVE_ACLE_FUNC(svread_hor_za64, _s64, _m,)(svundef_s64(), pg, -1, -1, 0); - // expected-error@+1 {{argument value 8 is outside the valid range [0, 7]}} - SVE_ACLE_FUNC(svread_ver_za64, _s64, _m,)(svundef_s64(), pg, 8, -1, 1); - // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 7]}} - SVE_ACLE_FUNC(svread_hor_za16, _s16, _m,)(svundef_s16(), pg, 0, -1, -1); + SVE_ACLE_FUNC(svread_hor_za64, _s64, _m,)(svundef_s64(), pg, -1, slice); // expected-error@+1 {{argument value 8 is outside the valid range [0, 7]}} - SVE_ACLE_FUNC(svread_ver_za16, _s16, _m,)(svundef_s16(), pg, 1, -1, 8); + SVE_ACLE_FUNC(svread_ver_za64, _s64, _m,)(svundef_s64(), pg, 8, slice); // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 7]}} - SVE_ACLE_FUNC(svwrite_hor_za64, _s64, _m,)(-1, -1, 0, pg, svundef_s64()); + SVE_ACLE_FUNC(svwrite_hor_za64, _s64, _m,)(-1, slice, pg, svundef_s64()); // expected-error@+1 {{argument value 8 is outside the valid range [0, 7]}} - SVE_ACLE_FUNC(svwrite_ver_za64, _s64, _m,)(8, -1, 1, pg, svundef_s64()); - // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 7]}} - SVE_ACLE_FUNC(svwrite_hor_za16, _s16, _m,)(0, -1, -1, pg, svundef_s16()); - // expected-error@+1 {{argument value 8 is outside the valid range [0, 7]}} - SVE_ACLE_FUNC(svwrite_ver_za16, _s16, _m,)(1, -1, 8, pg, svundef_s16()); + SVE_ACLE_FUNC(svwrite_ver_za64, _s64, _m,)(8, slice, pg, svundef_s64()); // expected-error@+1 {{argument value 8 is outside the valid range [0, 7]}} SVE_ACLE_FUNC(svaddha_za64, _s64, _m,)(8, pg, pg, svundef_s64()); @@ -190,71 +126,46 @@ void test_range_0_7(svbool_t pg, void *ptr) { SVE_ACLE_FUNC(svusmopa_za64, _u16, _m,)(8, pg, pg, svundef_u16(), svundef_s16()); // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 7]}} SVE_ACLE_FUNC(svusmops_za64, _u16, _m,)(-1, pg, pg, svundef_u16(), svundef_s16()); + + // expected-error@+1 {{argument value 8 is outside the valid range [0, 7]}} + SVE_ACLE_FUNC(svmopa_za64, _f64, _m,)(8, pg, pg, svundef_f64(), svundef_f64()); + // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 7]}} + SVE_ACLE_FUNC(svmops_za64, _f64, _m,)(-1, pg, pg, svundef_f64(), svundef_f64()); } -void test_range_0_15(svbool_t pg, void *ptr) { +void test_range_0_15(uint32_t slice, svbool_t pg, void *ptr) __arm_streaming __arm_inout("za") { // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 15]}} - SVE_ACLE_FUNC(svld1_hor_za128,,,)(-1, -1, 0, pg, ptr); + SVE_ACLE_FUNC(svld1_hor_za128,,,)(-1, slice, pg, ptr); // expected-error@+1 {{argument value 16 is outside the valid range [0, 15]}} - SVE_ACLE_FUNC(svst1_ver_za128,,,)(16, -1, 0, pg, ptr); + SVE_ACLE_FUNC(svst1_ver_za128,,,)(16, slice, pg, ptr); // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 15]}} - SVE_ACLE_FUNC(svld1_hor_za8,,,)(0, -1, -1, pg, ptr); + SVE_ACLE_FUNC(svld1_hor_vnum_za128,,,)(-1, slice, pg, ptr, 1); // expected-error@+1 {{argument value 16 is outside the valid range [0, 15]}} - SVE_ACLE_FUNC(svst1_ver_za8,,,)(0, -1, 16, pg, ptr); - // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 15]}} - SVE_ACLE_FUNC(svld1_hor_vnum_za128,,,)(-1, -1, 0, pg, ptr, 1); - // expected-error@+1 {{argument value 16 is outside the valid range [0, 15]}} - SVE_ACLE_FUNC(svst1_ver_vnum_za128,,,)(16, -1, 0, pg, ptr, 1); - // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 15]}} - SVE_ACLE_FUNC(svld1_hor_vnum_za8,,,)(0, -1, -1, pg, ptr, 1); - // expected-error@+1 {{argument value 16 is outside the valid range [0, 15]}} - SVE_ACLE_FUNC(svst1_ver_vnum_za8,,,)(0, -1, 16, pg, ptr, 1); - - // expected-error@+1 {{argument value 16 is outside the valid range [0, 15]}} - SVE_ACLE_FUNC(svldr_vnum_za,,,)(-1, 16, ptr); - // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 15]}} - SVE_ACLE_FUNC(svstr_vnum_za,,,)(-1, -1, ptr); + SVE_ACLE_FUNC(svst1_ver_vnum_za128,,,)(16, slice, pg, ptr, 1); // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 15]}} - SVE_ACLE_FUNC(svread_hor_za128, _s8, _m,)(svundef_s8(), pg, -1, -1, 0); + SVE_ACLE_FUNC(svread_hor_za128, _s8, _m,)(svundef_s8(), pg, -1, slice); // expected-error@+1 {{argument value 16 is outside the valid range [0, 15]}} - SVE_ACLE_FUNC(svread_ver_za128, _s8, _m,)(svundef_s8(), pg, 16, -1, 0); + SVE_ACLE_FUNC(svread_ver_za128, _s8, _m,)(svundef_s8(), pg, 16, slice); // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 15]}} - SVE_ACLE_FUNC(svread_hor_za8, _s8, _m,)(svundef_s8(), pg, 0, -1, -1); + SVE_ACLE_FUNC(svwrite_hor_za128, _s8, _m,)(-1, slice, pg, svundef_s8()); // expected-error@+1 {{argument value 16 is outside the valid range [0, 15]}} - SVE_ACLE_FUNC(svread_ver_za8, _s8, _m,)(svundef_s8(), pg, 0, -1, 16); - // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 15]}} - SVE_ACLE_FUNC(svwrite_hor_za128, _s8, _m,)(-1, -1, 0, pg, svundef_s8()); - // expected-error@+1 {{argument value 16 is outside the valid range [0, 15]}} - SVE_ACLE_FUNC(svwrite_ver_za128, _s8, _m,)(16, -1, 0, pg, svundef_s8()); - // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 15]}} - SVE_ACLE_FUNC(svwrite_hor_za8, _s8, _m,)(0, -1, -1, pg, svundef_s8()); - // expected-error@+1 {{argument value 16 is outside the valid range [0, 15]}} - SVE_ACLE_FUNC(svwrite_ver_za8, _s8, _m,)(0, -1, 16, pg, svundef_s8()); + SVE_ACLE_FUNC(svwrite_ver_za128, _s8, _m,)(16, slice, pg, svundef_s8()); } -void test_range_0_255(svbool_t pg, void *ptr) { +void test_range_0_255(svbool_t pg, void *ptr) __arm_streaming __arm_inout("za") { // expected-error@+1 {{argument value 256 is outside the valid range [0, 255]}} SVE_ACLE_FUNC(svzero_mask_za,,,)(256); // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 255]}} SVE_ACLE_FUNC(svzero_mask_za,,,)(-1); } -void test_constant(uint64_t u64, svbool_t pg, void *ptr) { - SVE_ACLE_FUNC(svld1_hor_za8,,,)(u64, u64, 0, pg, ptr); // expected-error {{argument to 'svld1_hor_za8' must be a constant integer}} - SVE_ACLE_FUNC(svld1_ver_za16,,,)(0, u64, u64, pg, ptr); // expected-error {{argument to 'svld1_ver_za16' must be a constant integer}} - SVE_ACLE_FUNC(svst1_hor_za32,,,)(u64, u64, 0, pg, ptr); // expected-error {{argument to 'svst1_hor_za32' must be a constant integer}} - SVE_ACLE_FUNC(svst1_ver_za64,,,)(0, u64, u64, pg, ptr); // expected-error {{argument to 'svst1_ver_za64' must be a constant integer}} - SVE_ACLE_FUNC(svld1_hor_vnum_za8,,,)(u64, u64, 0, pg, ptr, u64); // expected-error {{argument to 'svld1_hor_vnum_za8' must be a constant integer}} - SVE_ACLE_FUNC(svld1_ver_vnum_za16,,,)(0, u64, u64, pg, ptr, u64); // expected-error {{argument to 'svld1_ver_vnum_za16' must be a constant integer}} - SVE_ACLE_FUNC(svst1_hor_vnum_za32,,,)(u64, u64, 0, pg, ptr, u64); // expected-error {{argument to 'svst1_hor_vnum_za32' must be a constant integer}} - SVE_ACLE_FUNC(svst1_ver_vnum_za64,,,)(0, u64, u64, pg, ptr, u64); // expected-error {{argument to 'svst1_ver_vnum_za64' must be a constant integer}} - - SVE_ACLE_FUNC(svldr_vnum_za,,,)(u64, u64, ptr); // expected-error {{argument to 'svldr_vnum_za' must be a constant integer}} - SVE_ACLE_FUNC(svstr_vnum_za,,,)(u64, u64, ptr); // expected-error {{argument to 'svstr_vnum_za' must be a constant integer}} +void test_constant(uint64_t u64, svbool_t pg, void *ptr) __arm_streaming __arm_inout("za") { + SVE_ACLE_FUNC(svld1_hor_za8,,,)(u64, u64, pg, ptr); // expected-error {{argument to 'svld1_hor_za8' must be a constant integer}} + SVE_ACLE_FUNC(svst1_hor_za32,,,)(u64, 0, pg, ptr); // expected-error {{argument to 'svst1_hor_za32' must be a constant integer}} + SVE_ACLE_FUNC(svld1_hor_vnum_za8,,,)(u64, 0, pg, ptr, u64); // expected-error {{argument to 'svld1_hor_vnum_za8' must be a constant integer}} + SVE_ACLE_FUNC(svst1_hor_vnum_za32,,,)(u64, 0, pg, ptr, u64); // expected-error {{argument to 'svst1_hor_vnum_za32' must be a constant integer}} - SVE_ACLE_FUNC(svread_hor_za8, _s8, _m,)(svundef_s8(), pg, 0, u64, u64); // expected-error-re {{argument to 'svread_hor_za8{{.*}}_m' must be a constant integer}} - SVE_ACLE_FUNC(svread_ver_za16, _s16, _m,)(svundef_s16(), pg, u64, u64, 0); // expected-error-re {{argument to 'svread_ver_za16{{.*}}_m' must be a constant integer}} - SVE_ACLE_FUNC(svwrite_hor_za32, _s32, _m,)(0, u64, u64, pg, svundef_s32()); // expected-error-re {{argument to 'svwrite_hor_za32{{.*}}_m' must be a constant integer}} - SVE_ACLE_FUNC(svwrite_ver_za64, _s64, _m,)(u64, u64, 0, pg, svundef_s64()); // expected-error-re {{argument to 'svwrite_ver_za64{{.*}}_m' must be a constant integer}} + SVE_ACLE_FUNC(svread_ver_za16, _s16, _m,)(svundef_s16(), pg, u64, 0); // expected-error-re {{argument to 'svread_ver_za16{{.*}}_m' must be a constant integer}} + SVE_ACLE_FUNC(svwrite_ver_za64, _s64, _m,)(u64, 0, pg, svundef_s64()); // expected-error-re {{argument to 'svwrite_ver_za64{{.*}}_m' must be a constant integer}} } diff --git a/clang/test/Sema/aarch64-sme-intrinsics/acle_sme_target.c b/clang/test/Sema/aarch64-sme-intrinsics/acle_sme_target.c index b384244ac6c6a9206ecca0874cb74dc22f4210fb..6af115beba8e49a6921d56db43be17d5c4e94682 100644 --- a/clang/test/Sema/aarch64-sme-intrinsics/acle_sme_target.c +++ b/clang/test/Sema/aarch64-sme-intrinsics/acle_sme_target.c @@ -3,23 +3,24 @@ // Test that functions with the correct target attributes can use the correct SME intrinsics. -#include +#include __attribute__((target("sme"))) -void test_sme(svbool_t pg, void *ptr) { - svld1_hor_za8(0, 0, 0, pg, ptr); +void test_sme(svbool_t pg, void *ptr) __arm_streaming __arm_inout("za") { + svld1_hor_za8(0, 0, pg, ptr); } __attribute__((target("arch=armv8-a+sme"))) -void test_arch_sme(svbool_t pg, void *ptr) { - svld1_hor_vnum_za32(0, 0, 0, pg, ptr, 0); +void test_arch_sme(svbool_t pg, void *ptr) __arm_streaming __arm_inout("za") { + svld1_hor_vnum_za32(0, 0, pg, ptr, 0); } __attribute__((target("+sme"))) -void test_plus_sme(svbool_t pg, void *ptr) { - svst1_ver_za16(0, 0, 0, pg, ptr); +void test_plus_sme(svbool_t pg, void *ptr) __arm_streaming __arm_inout("za") { + svst1_ver_za16(0, 0, pg, ptr); } -void undefined(svbool_t pg, void *ptr) { - svst1_ver_vnum_za64(0, 0, 0, pg, ptr, 0); // expected-error {{'svst1_ver_vnum_za64' needs target feature sme}} +__attribute__((target("+sme"))) +void undefined(svbool_t pg, void *ptr) __arm_inout("za") { + svst1_ver_vnum_za64(0, 0, pg, ptr, 0); // expected-warning {{builtin call has undefined behaviour when called from a non-streaming function}} } diff --git a/clang/test/Sema/annotate-type.c b/clang/test/Sema/annotate-type.c index 9fd95f953c5d48a9ff82d527031f29b132eb11eb..901cef7ffa8b3ada3b57558e96cc9ae08329a245 100644 --- a/clang/test/Sema/annotate-type.c +++ b/clang/test/Sema/annotate-type.c @@ -20,10 +20,10 @@ void foo(float *[[clang::annotate_type("foo")]] a) { [[clang::annotate_type("bar")]] int *z1; // expected-error {{'annotate_type' attribute cannot be applied to a declaration}} int *z2 [[clang::annotate_type("bar")]]; // expected-error {{'annotate_type' attribute cannot be applied to a declaration}} [[clang::annotate_type("bar")]]; // expected-error {{'annotate_type' attribute cannot be applied to a statement}} - int *[[clang::annotate_type(1)]] z3; // expected-error {{'annotate_type' attribute requires a string}} + int *[[clang::annotate_type(1)]] z3; // expected-error {{expected string literal as argument of 'annotate_type' attribute}} int *[[clang::annotate_type()]] z4; // expected-error {{'annotate_type' attribute takes at least 1 argument}} int *[[clang::annotate_type]] z5; // expected-error {{'annotate_type' attribute takes at least 1 argument}} - int *[[clang::annotate_type(some_function())]] z6; // expected-error {{'annotate_type' attribute requires a string}} + int *[[clang::annotate_type(some_function())]] z6; // expected-error {{expected string literal as argument of 'annotate_type' attribute}} int *[[clang::annotate_type("bar", some_function())]] z7; // expected-error {{'annotate_type' attribute requires parameter 1 to be a constant expression}} expected-note{{subexpression not valid in a constant expression}} int *[[clang::annotate_type("bar", z7)]] z8; // expected-error {{'annotate_type' attribute requires parameter 1 to be a constant expression}} expected-note{{subexpression not valid in a constant expression}} int *[[clang::annotate_type("bar", int)]] z9; // expected-error {{expected expression}} diff --git a/clang/test/Sema/annotate.c b/clang/test/Sema/annotate.c index 2e7a37936fedec7356e13ecb6eeab972ccfac226..b4551a102e6174f7b769a68e8813462eb84e7de5 100644 --- a/clang/test/Sema/annotate.c +++ b/clang/test/Sema/annotate.c @@ -3,8 +3,8 @@ void __attribute__((annotate("foo"))) foo(float *a) { __attribute__((annotate("bar"))) int x; [[clang::annotate("bar")]] int x2; - __attribute__((annotate(1))) int y; // expected-error {{'annotate' attribute requires a string}} - [[clang::annotate(1)]] int y2; // expected-error {{'annotate' attribute requires a string}} + __attribute__((annotate(1))) int y; // expected-error {{expected string literal as argument of 'annotate' attribute}} + [[clang::annotate(1)]] int y2; // expected-error {{expected string literal as argument of 'annotate' attribute}} __attribute__((annotate("bar", 1))) int z; [[clang::annotate("bar", 1)]] int z2; diff --git a/clang/test/Sema/attr-assume.c b/clang/test/Sema/attr-assume.c index cb07940d02b5b57166c7c0a467ad601463c1e3fb..98deffa3a746094cd0f3eaf0cf9d5b0c89d82051 100644 --- a/clang/test/Sema/attr-assume.c +++ b/clang/test/Sema/attr-assume.c @@ -1,8 +1,8 @@ // RUN: %clang_cc1 -triple i386-apple-darwin9 -fsyntax-only -verify %s -void f1(void) __attribute__((assume(3))); // expected-error {{'assume' attribute requires a string}} -void f2(void) __attribute__((assume(int))); // expected-error {{expected expression}} -void f3(void) __attribute__((assume(for))); // expected-error {{expected expression}} +void f1(void) __attribute__((assume(3))); // expected-error {{expected string literal as argument of 'assume' attribute}} +void f2(void) __attribute__((assume(int))); // expected-error {{expected string literal as argument of 'assume' attribute}} +void f3(void) __attribute__((assume(for))); // expected-error {{expected string literal as argument of 'assume' attribute}} void f4(void) __attribute__((assume("QQQQ"))); // expected-warning {{unknown assumption string 'QQQQ'; attribute is potentially ignored}} void f5(void) __attribute__((assume("omp_no_openmp"))); void f6(void) __attribute__((assume("omp_noopenmp"))); // expected-warning {{unknown assumption string 'omp_noopenmp' may be misspelled; attribute is potentially ignored, did you mean 'omp_no_openmp'?}} @@ -10,5 +10,5 @@ void f7(void) __attribute__((assume("omp_no_openmp_routine"))); // expected-warn void f8(void) __attribute__((assume("omp_no_openmp1"))); // expected-warning {{unknown assumption string 'omp_no_openmp1' may be misspelled; attribute is potentially ignored, did you mean 'omp_no_openmp'?}} void f9(void) __attribute__((assume("omp_no_openmp", "omp_no_openmp"))); // expected-error {{'assume' attribute takes one argument}} -int g1 __attribute__((assume(0))); // expected-warning {{'assume' attribute only applies to functions and Objective-C methods}} +int g1 __attribute__((assume(0))); // expected-error {{expected string literal as argument of 'assume' attribute}} int g2 __attribute__((assume("omp_no_openmp"))); // expected-warning {{'assume' attribute only applies to functions and Objective-C methods}} diff --git a/clang/test/Sema/attr-btf_tag.c b/clang/test/Sema/attr-btf_tag.c index d33ccdf962825f9920a6b0db02074370f91df826..cbb21a5a88bb68b90db61803503dfb317838d417 100644 --- a/clang/test/Sema/attr-btf_tag.c +++ b/clang/test/Sema/attr-btf_tag.c @@ -21,7 +21,7 @@ struct __tag2 __tag3 t2 { int g1 __tag1; int g2 __tag_no_arg; // expected-error {{'btf_decl_tag' attribute takes one argument}} int g3 __tag_2_arg; // expected-error {{'btf_decl_tag' attribute takes one argument}} -int i1 __invalid; // expected-error {{'btf_decl_tag' attribute requires a string}} +int i1 __invalid; // expected-error {{expected string literal as argument of 'btf_decl_tag' attribute}} enum e1 { E1 diff --git a/clang/test/Sema/attr-btf_type_tag.c b/clang/test/Sema/attr-btf_type_tag.c index a3ef404f38238b35ceff407edaaed5822b8b4f81..aa3230a3e9b7f314db94855be9f2737763be2726 100644 --- a/clang/test/Sema/attr-btf_type_tag.c +++ b/clang/test/Sema/attr-btf_type_tag.c @@ -8,7 +8,7 @@ #define __tag6 __attribute__((btf_type_tag("tag6"))) int __attribute__((btf_type_tag("tag1", "tag2"))) *invalid1; // expected-error {{'btf_type_tag' attribute takes one argument}} -int __attribute__((btf_type_tag(2))) *invalid2; // expected-error {{'btf_type_tag' attribute requires a string}} +int __attribute__((btf_type_tag(2))) *invalid2; // expected-error {{expected string literal as argument of 'btf_type_tag' attribute}} int * __tag1 __tag2 * __tag3 __tag4 * __tag5 __tag6 *g; diff --git a/clang/test/Sema/attr-capabilities.c b/clang/test/Sema/attr-capabilities.c index b28c437935a130e0a0ec011f5f0d3e4aa4aec707..5138803bd5eb7ad74c85b7ff0ca1ee2505114bad 100644 --- a/clang/test/Sema/attr-capabilities.c +++ b/clang/test/Sema/attr-capabilities.c @@ -17,8 +17,8 @@ int Test3 __attribute__((acquire_capability("test3"))); // expected-warning {{' int Test4 __attribute__((try_acquire_capability("test4"))); // expected-error {{'try_acquire_capability' attribute only applies to functions}} int Test5 __attribute__((release_capability("test5"))); // expected-warning {{'release_capability' attribute only applies to functions}} -struct __attribute__((capability(12))) Test3 {}; // expected-error {{'capability' attribute requires a string}} -struct __attribute__((shared_capability(Test2))) Test4 {}; // expected-error {{'shared_capability' attribute requires a string}} +struct __attribute__((capability(12))) Test3 {}; // expected-error {{expected string literal as argument of 'capability' attribute}} +struct __attribute__((shared_capability(Test2))) Test4 {}; // expected-error {{expected string literal as argument of 'shared_capability' attribute}} struct __attribute__((capability)) Test5 {}; // expected-error {{'capability' attribute takes one argument}} struct __attribute__((shared_capability("test1", 12))) Test6 {}; // expected-error {{'shared_capability' attribute takes one argument}} diff --git a/clang/test/Sema/attr-enforce-tcb-errors.cpp b/clang/test/Sema/attr-enforce-tcb-errors.cpp index 1ce147ab32df9287ba8b965c7b1cdf1908b4a4a2..b5effe2fe51809867045cfe138bee9c57f513a83 100644 --- a/clang/test/Sema/attr-enforce-tcb-errors.cpp +++ b/clang/test/Sema/attr-enforce-tcb-errors.cpp @@ -6,14 +6,14 @@ void no_arguments() __attribute__((enforce_tcb)); // expected-error{{'enforce_tc void too_many_arguments() __attribute__((enforce_tcb("test", 12))); // expected-error{{'enforce_tcb' attribute takes one argument}} -void wrong_argument_type() __attribute__((enforce_tcb(12))); // expected-error{{'enforce_tcb' attribute requires a string}} +void wrong_argument_type() __attribute__((enforce_tcb(12))); // expected-error{{expected string literal as argument of 'enforce_tcb' attribute}} [[clang::enforce_tcb_leaf("oops")]] int wrong_subject_type_leaf; // expected-warning{{'enforce_tcb_leaf' attribute only applies to functions}} void no_arguments_leaf() __attribute__((enforce_tcb_leaf)); // expected-error{{'enforce_tcb_leaf' attribute takes one argument}} void too_many_arguments_leaf() __attribute__((enforce_tcb_leaf("test", 12))); // expected-error{{'enforce_tcb_leaf' attribute takes one argument}} -void wrong_argument_type_leaf() __attribute__((enforce_tcb_leaf(12))); // expected-error{{'enforce_tcb_leaf' attribute requires a string}} +void wrong_argument_type_leaf() __attribute__((enforce_tcb_leaf(12))); // expected-error{{expected string literal as argument of 'enforce_tcb_leaf' attribute}} void foo(); diff --git a/clang/test/Sema/attr-enforce-tcb-errors.m b/clang/test/Sema/attr-enforce-tcb-errors.m index f82d38c919d1d4122cb5956de82fd0cd13857413..c8d0716553cc6aad9fe3cdf8bca4c4e39c093dc6 100644 --- a/clang/test/Sema/attr-enforce-tcb-errors.m +++ b/clang/test/Sema/attr-enforce-tcb-errors.m @@ -20,13 +20,13 @@ __attribute__((objc_root_class)) - (void)tooManyArguments __attribute__((enforce_tcb("test", 12))); // expected-error{{'enforce_tcb' attribute takes one argument}} -- (void)wrongArgumentType __attribute__((enforce_tcb(12))); // expected-error{{'enforce_tcb' attribute requires a string}} +- (void)wrongArgumentType __attribute__((enforce_tcb(12))); // expected-error{{expected string literal as argument of 'enforce_tcb' attribute}} - (void)noArgumentsLeaf __attribute__((enforce_tcb_leaf)); // expected-error{{'enforce_tcb_leaf' attribute takes one argument}} - (void)tooManyArgumentsLeaf __attribute__((enforce_tcb_leaf("test", 12))); // expected-error{{'enforce_tcb_leaf' attribute takes one argument}} -- (void)wrongArgumentTypeLeaf __attribute__((enforce_tcb_leaf(12))); // expected-error{{'enforce_tcb_leaf' attribute requires a string}} +- (void)wrongArgumentTypeLeaf __attribute__((enforce_tcb_leaf(12))); // expected-error{{expected string literal as argument of 'enforce_tcb_leaf' attribute}} @end @implementation AClass diff --git a/clang/test/Sema/attr-error.c b/clang/test/Sema/attr-error.c index 581bfc43cbc06b1c6ff8292cea7f185551a8b911..9f500a87be14c6e1fec2e8c4368d93f2bdbc3eb1 100644 --- a/clang/test/Sema/attr-error.c +++ b/clang/test/Sema/attr-error.c @@ -15,7 +15,7 @@ int bad2(void) { __attribute__((error("bad2"))); // expected-error {{'error' attribute cannot be applied to a statement}} } -__attribute__((error(3))) // expected-error {{'error' attribute requires a string}} +__attribute__((error(3))) // expected-error {{expected string literal as argument of 'error' attribute}} int bad3(void); diff --git a/clang/test/Sema/attr-handles.cpp b/clang/test/Sema/attr-handles.cpp index 135467b6c0a8c2fc169914f1a582b81802066b96..ff1c1f68dfec8ef93763b312f0763238d47bb2d4 100644 --- a/clang/test/Sema/attr-handles.cpp +++ b/clang/test/Sema/attr-handles.cpp @@ -6,7 +6,7 @@ void (*fp)(int handle [[clang::use_handle("Fuchsia")]]); auto lambda = [](int handle [[clang::use_handle("Fuchsia")]]){}; void g(int a __attribute__((acquire_handle("Fuchsia")))); // expected-error {{attribute only applies to output parameters}} void h(int *a __attribute__((acquire_handle))); // expected-error {{'acquire_handle' attribute takes one argument}} -void h(int *a __attribute__((acquire_handle(1)))); // expected-error {{attribute requires a string}} +void h(int *a __attribute__((acquire_handle(1)))); // expected-error {{expected string literal as argument of 'acquire_handle' attribute}} void h(int *a __attribute__((acquire_handle("RandomString", "AndAnother")))); // expected-error {{'acquire_handle' attribute takes one argument}} __attribute__((release_handle("Fuchsia"))) int i(); // expected-warning {{'release_handle' attribute only applies to parameters}} __attribute__((use_handle("Fuchsia"))) int j(); // expected-warning {{'use_handle' attribute only applies to parameters}} diff --git a/clang/test/Sema/attr-section.c b/clang/test/Sema/attr-section.c index 3ea922c91947e38520ed0d03f7bb769e5cf9595a..1f058c24f980fdbdbb6237519395dba803bc25fe 100644 --- a/clang/test/Sema/attr-section.c +++ b/clang/test/Sema/attr-section.c @@ -1,7 +1,7 @@ // RUN: %clang_cc1 -verify -fsyntax-only -triple x86_64-apple-darwin9 %s int x __attribute__((section( - 42))); // expected-error {{'section' attribute requires a string}} + 42))); // expected-error {{expected string literal as argument of 'section' attribute}} // rdar://4341926 diff --git a/clang/test/Sema/attr-tls_model.c b/clang/test/Sema/attr-tls_model.c index 9106d39b55a3af11d162202e530ce0001b5a0092..ec99c7aacaa3a881948a2114583d81cf73a6ab73 100644 --- a/clang/test/Sema/attr-tls_model.c +++ b/clang/test/Sema/attr-tls_model.c @@ -10,5 +10,5 @@ int x __attribute((tls_model("global-dynamic"))); // expected-error {{'tls_model static __thread int y __attribute((tls_model("global-dynamic"))); // no-warning static __thread int y __attribute((tls_model("local", "dynamic"))); // expected-error {{'tls_model' attribute takes one argument}} -static __thread int y __attribute((tls_model(123))); // expected-error {{'tls_model' attribute requires a string}} +static __thread int y __attribute((tls_model(123))); // expected-error {{expected string literal as argument of 'tls_model' attribute}} static __thread int y __attribute((tls_model("foobar"))); // expected-error {{tls_model must be "global-dynamic", "local-dynamic", "initial-exec" or "local-exec"}} diff --git a/clang/test/Sema/attr-unavailable-message.c b/clang/test/Sema/attr-unavailable-message.c index b4ae70e0a39cba340d3c0f6a620c461bc8ad9589..0caa943ad8a42f8e439e53e141fd711549ffd466 100644 --- a/clang/test/Sema/attr-unavailable-message.c +++ b/clang/test/Sema/attr-unavailable-message.c @@ -8,7 +8,7 @@ double dfoo(double) __attribute__((__unavailable__("NO LONGER"))); // expected- void bar(void) __attribute__((__unavailable__)); // expected-note {{explicitly marked unavailable}} -int quux(void) __attribute__((__unavailable__(12))); // expected-error {{'__unavailable__' attribute requires a string}} +int quux(void) __attribute__((__unavailable__(12))); // expected-error {{expected string literal as argument of '__unavailable__' attribute}} #define ACCEPTABLE "Use something else" int quux2(void) __attribute__((__unavailable__(ACCEPTABLE))); diff --git a/clang/test/Sema/attr-warning.c b/clang/test/Sema/attr-warning.c index 0973f3c8eb4bd76b8bb5ec3a59b233e2e2436d6a..7510e88d291b1abf04ffd996b19738beff6ba43d 100644 --- a/clang/test/Sema/attr-warning.c +++ b/clang/test/Sema/attr-warning.c @@ -15,7 +15,7 @@ int bad2(void) { __attribute__((warning("bad2"))); // expected-error {{'warning' attribute cannot be applied to a statement}} } -__attribute__((warning(3))) // expected-error {{'warning' attribute requires a string}} +__attribute__((warning(3))) // expected-error {{expected string literal as argument of 'warning' attribute}} int bad3(void); diff --git a/clang/test/Sema/diagnose_if.c b/clang/test/Sema/diagnose_if.c index 6297545781f90d31cb50b3665eb720fc2a552918..4df39916c031e30bb13ea419596f53937eb0873f 100644 --- a/clang/test/Sema/diagnose_if.c +++ b/clang/test/Sema/diagnose_if.c @@ -6,7 +6,7 @@ void failure1(void) _diagnose_if(); // expected-error{{exactly 3 arguments}} void failure2(void) _diagnose_if(0); // expected-error{{exactly 3 arguments}} void failure3(void) _diagnose_if(0, ""); // expected-error{{exactly 3 arguments}} void failure4(void) _diagnose_if(0, "", "error", 1); // expected-error{{exactly 3 arguments}} -void failure5(void) _diagnose_if(0, 0, "error"); // expected-error{{requires a string}} +void failure5(void) _diagnose_if(0, 0, "error"); // expected-error{{expected string literal as argument of 'diagnose_if' attribute}} void failure6(void) _diagnose_if(0, "", "invalid"); // expected-error{{invalid diagnostic type for 'diagnose_if'; use "error" or "warning" instead}} void failure7(void) _diagnose_if(0, "", "ERROR"); // expected-error{{invalid diagnostic type}} void failure8(int a) _diagnose_if(a, "", ""); // expected-error{{invalid diagnostic type}} diff --git a/clang/test/Sema/enable_if.c b/clang/test/Sema/enable_if.c index 22eb84fa7275c0572abe9ecf51f3e7cd5631d53d..9d46c71274d69223d872b36467125bf49931f59a 100644 --- a/clang/test/Sema/enable_if.c +++ b/clang/test/Sema/enable_if.c @@ -105,7 +105,7 @@ __attribute__((enable_if(n == 0, "chosen when 'n' is zero"))) void f1(int n); // int n __attribute__((enable_if(1, "always chosen"))); // expected-warning{{'enable_if' attribute only applies to functions}} -void f(int n) __attribute__((enable_if("chosen when 'n' is zero", n == 0))); // expected-error{{'enable_if' attribute requires a string}} +void f(int n) __attribute__((enable_if("chosen when 'n' is zero", n == 0))); // expected-error{{expected string literal as argument of 'enable_if' attribute}} void f(int n) __attribute__((enable_if())); // expected-error{{'enable_if' attribute requires exactly 2 arguments}} diff --git a/clang/test/SemaCXX/attr-deprecated-replacement-error.cpp b/clang/test/SemaCXX/attr-deprecated-replacement-error.cpp index 54d0f9e74f3403ada41f8e64c1d4e9c97775cd29..8c9d9b29c5535275f0bc8397fe18cbcf568b08c8 100644 --- a/clang/test/SemaCXX/attr-deprecated-replacement-error.cpp +++ b/clang/test/SemaCXX/attr-deprecated-replacement-error.cpp @@ -5,13 +5,13 @@ #endif int a1 [[deprecated("warning", "fixit")]]; // expected-error{{'deprecated' attribute takes no more than 1 argument}} -int a2 [[deprecated("warning", 1)]]; // expected-error{{'deprecated' attribute takes no more than 1 argument}} +int a2 [[deprecated("warning", 1)]]; // expected-error{{expected string literal as argument of 'deprecated' attribute}} int b1 [[gnu::deprecated("warning", "fixit")]]; // expected-error{{'deprecated' attribute takes no more than 1 argument}} -int b2 [[gnu::deprecated("warning", 1)]]; // expected-error{{'deprecated' attribute takes no more than 1 argument}} +int b2 [[gnu::deprecated("warning", 1)]]; // expected-error{{expected string literal as argument of 'deprecated' attribute}} __declspec(deprecated("warning", "fixit")) int c1; // expected-error{{'deprecated' attribute takes no more than 1 argument}} -__declspec(deprecated("warning", 1)) int c2; // expected-error{{'deprecated' attribute takes no more than 1 argument}} +__declspec(deprecated("warning", 1)) int c2; // expected-error{{expected string literal as argument of 'deprecated' attribute}} int d1 __attribute__((deprecated("warning", "fixit"))); -int d2 __attribute__((deprecated("warning", 1))); // expected-error{{'deprecated' attribute requires a string}} +int d2 __attribute__((deprecated("warning", 1))); // expected-error{{expected string literal as argument of 'deprecated' attribute}} diff --git a/clang/test/SemaCXX/attr-no-sanitize.cpp b/clang/test/SemaCXX/attr-no-sanitize.cpp index 9e13fd3c02702e6bd2a3e21c16568c10b965f3ae..a464947fe5a3469964c9eb69b82a9a4e4b27a21f 100644 --- a/clang/test/SemaCXX/attr-no-sanitize.cpp +++ b/clang/test/SemaCXX/attr-no-sanitize.cpp @@ -4,7 +4,7 @@ int f1() __attribute__((no_sanitize)); // expected-error{{'no_sanitize' attribute takes at least 1 argument}} -int f2() __attribute__((no_sanitize(1))); // expected-error{{'no_sanitize' attribute requires a string}} +int f2() __attribute__((no_sanitize(1))); // expected-error{{expected string literal as argument of 'no_sanitize' attribute}} __attribute__((no_sanitize("all"))) int global; // expected-warning{{'no_sanitize' attribute argument 'all' not supported on a global variable}} __attribute__((no_sanitize("unknown"))) int global2; // expected-warning{{unknown sanitizer 'unknown' ignored}} diff --git a/clang/test/SemaCXX/attr-section.cpp b/clang/test/SemaCXX/attr-section.cpp index 12c0da283997df783cf33daa42eadab337ce32f9..62b597b4ff36f675c763a9af020b5b658aecfd79 100644 --- a/clang/test/SemaCXX/attr-section.cpp +++ b/clang/test/SemaCXX/attr-section.cpp @@ -1,7 +1,7 @@ // RUN: %clang_cc1 -verify -fsyntax-only -triple x86_64-linux-gnu %s int x __attribute__((section( - 42))); // expected-error {{'section' attribute requires a string}} + 42))); // expected-error {{expected string literal as argument of 'section' attribute}} // PR6007 diff --git a/clang/test/SemaCXX/attr-weakref.cpp b/clang/test/SemaCXX/attr-weakref.cpp index 46ca5ab20682037e1952f39665a314a4420a1c84..4e1834096ebedefb6e18dc2225c63a26e164f4e8 100644 --- a/clang/test/SemaCXX/attr-weakref.cpp +++ b/clang/test/SemaCXX/attr-weakref.cpp @@ -33,6 +33,6 @@ int a9 __attribute__((weakref)); // expected-error {{weakref declaration of 'a9 static int a10(); int a10() __attribute__((weakref ("foo"))); -static int v __attribute__((weakref(a1), alias("foo"))); // expected-error {{'weakref' attribute requires a string}} +static int v __attribute__((weakref(a1), alias("foo"))); // expected-error {{expected string literal as argument of 'weakref' attribute}} __attribute__((weakref ("foo"))) auto a11 = 1; // expected-error {{weakref declaration must have internal linkage}} diff --git a/clang/test/SemaCXX/suppress.cpp b/clang/test/SemaCXX/suppress.cpp index d88ae0bbca00f9526b7c6abe1209c06521f4c4ed..29544b3c573ccc8695589605675ad1f54d33a75b 100644 --- a/clang/test/SemaCXX/suppress.cpp +++ b/clang/test/SemaCXX/suppress.cpp @@ -16,7 +16,7 @@ void f_() { [[gsl::suppress]] int x; // expected-error {{'suppress' attribute takes at least 1 argument}} [[gsl::suppress()]] int y; // expected-error {{'suppress' attribute takes at least 1 argument}} int [[gsl::suppress("r")]] z; // expected-error {{'suppress' attribute cannot be applied to types}} - [[gsl::suppress(f_)]] float f; // expected-error {{'suppress' attribute requires a string}} + [[gsl::suppress(f_)]] float f; // expected-error {{expected string literal as argument of 'suppress' attribut}} } union [[gsl::suppress("type.1")]] U { diff --git a/clang/test/SemaObjC/attr-swift_bridge.m b/clang/test/SemaObjC/attr-swift_bridge.m index 8f53ed8154588ca720f4714f471a0008742e42d9..0464e3f326e0abec946310a3f3d6e28cb7e61f41 100644 --- a/clang/test/SemaObjC/attr-swift_bridge.m +++ b/clang/test/SemaObjC/attr-swift_bridge.m @@ -5,7 +5,7 @@ __attribute__((__swift_bridge__)) @interface I @end -// expected-error@+1 {{'__swift_bridge__' attribute requires a string}} +// expected-error@+1 {{expected string literal as argument of '__swift_bridge__' attribute}} __attribute__((__swift_bridge__(1))) @interface J @end diff --git a/clang/test/SemaObjC/objc-asm-attribute-neg-test.m b/clang/test/SemaObjC/objc-asm-attribute-neg-test.m index 9941189357ba1efa37983962339c72bcfaa5ba1d..ac3871969fe21a3c5213694e776ae7fa86c87e86 100644 --- a/clang/test/SemaObjC/objc-asm-attribute-neg-test.m +++ b/clang/test/SemaObjC/objc-asm-attribute-neg-test.m @@ -5,7 +5,7 @@ __attribute__((objc_runtime_name)) // expected-error {{'objc_runtime_name' attri @interface BInterface @end -__attribute__((objc_runtime_name(123))) // expected-error {{'objc_runtime_name' attribute requires a string}} +__attribute__((objc_runtime_name(123))) // expected-error {{expected string literal as argument of 'objc_runtime_name' attribute}} @protocol BProtocol1 @end @@ -14,7 +14,7 @@ __attribute__((objc_runtime_name("MySecretNamespace.Protocol"))) @end __attribute__((objc_runtime_name("MySecretNamespace.Message"))) -@interface Message { +@interface Message { __attribute__((objc_runtime_name("MySecretNamespace.Message"))) // expected-error {{'objc_runtime_name' attribute only applies to Objective-C interfaces and Objective-C protocols}} id MyIVAR; } diff --git a/clang/test/SemaObjC/validate-attr-swift_attr.m b/clang/test/SemaObjC/validate-attr-swift_attr.m index 4ff434d179725d1bd21f67fda1013d4249caf495..2c73b0a892722c16e34da578bdfaafdeb06faeda 100644 --- a/clang/test/SemaObjC/validate-attr-swift_attr.m +++ b/clang/test/SemaObjC/validate-attr-swift_attr.m @@ -5,7 +5,7 @@ __attribute__((swift_attr)) @interface I @end -// expected-error@+1 {{'swift_attr' attribute requires a string}} +// expected-error@+1 {{expected string literal as argument of 'swift_attr' attribute}} __attribute__((swift_attr(1))) @interface J @end diff --git a/clang/test/SemaTemplate/attributes.cpp b/clang/test/SemaTemplate/attributes.cpp index a7081e83470aa13bd448fd637bfb98f3809d0a85..9fd448a5e9353e19335200f70ccb1446cf0fc4cd 100644 --- a/clang/test/SemaTemplate/attributes.cpp +++ b/clang/test/SemaTemplate/attributes.cpp @@ -25,7 +25,7 @@ namespace attribute_aligned { { __attribute__((aligned(Align))) char storage[Size]; }; - + template class C { public: @@ -95,11 +95,11 @@ void UseAnnotations() { HasAnnotations(); } template [[clang::annotate("ANNOTATE_BAZ", Is...)]] void HasPackAnnotations(); void UsePackAnnotations() { HasPackAnnotations<1, 2, 3>(); } -template [[clang::annotate(Is...)]] void HasOnlyPackAnnotation() {} // expected-error {{'annotate' attribute takes at least 1 argument}} expected-error {{'annotate' attribute requires a string}} +template [[clang::annotate(Is...)]] void HasOnlyPackAnnotation() {} // expected-error {{expected string literal as argument of 'annotate' attribute}} void UseOnlyPackAnnotations() { - HasOnlyPackAnnotation<>(); // expected-note {{in instantiation of function template specialization 'attribute_annotate::HasOnlyPackAnnotation<>' requested here}} - HasOnlyPackAnnotation<1>(); // expected-note {{in instantiation of function template specialization 'attribute_annotate::HasOnlyPackAnnotation<1>' requested here}} + HasOnlyPackAnnotation<>(); + HasOnlyPackAnnotation<1>(); } // CHECK: ClassTemplateDecl {{.*}} AnnotatedPackTemplateStruct @@ -276,40 +276,21 @@ void UseOnlyPackAnnotations() { // CHECK-NEXT: value: Int 6 // CHECK-NEXT: IntegerLiteral {{.*}} 'int' 6 // CHECK-NEXT: CXXRecordDecl {{.*}} implicit struct AnnotatedPackTemplateStruct -// CHECK-NEXT: ClassTemplatePartialSpecializationDecl {{.*}} struct AnnotatedPackTemplateStruct definition -// CHECK-NEXT: DefinitionData -// CHECK-NEXT: DefaultConstructor -// CHECK-NEXT: CopyConstructor -// CHECK-NEXT: MoveConstructor -// CHECK-NEXT: CopyAssignment -// CHECK-NEXT: MoveAssignment -// CHECK-NEXT: Destructor -// CHECK-NEXT: TemplateArgument{{.*}} type 'char' -// CHECK-NEXT: BuiltinType {{.*}} 'char' -// CHECK-NEXT: TemplateArgument{{.*}} pack -// CHECK-NEXT: TemplateArgument{{.*}} expr -// CHECK-NEXT: PackExpansionExpr {{.*}} 'int' -// CHECK-NEXT: DeclRefExpr {{.*}} 'int' NonTypeTemplateParm {{.*}} 'Is' 'int' -// CHECK-NEXT: NonTypeTemplateParmDecl {{.*}} referenced 'int' depth 0 index 0 ... Is -// CHECK-NEXT: AnnotateAttr {{.*}} "" -// CHECK-NEXT: PackExpansionExpr {{.*}} '' -// CHECK-NEXT: DeclRefExpr {{.*}} 'int' NonTypeTemplateParm {{.*}} 'Is' 'int' -// CHECK-NEXT: CXXRecordDecl {{.*}} implicit struct AnnotatedPackTemplateStruct template struct [[clang::annotate("ANNOTATE_FOZ", Is...)]] AnnotatedPackTemplateStruct{}; template struct [[clang::annotate("ANNOTATE_BOO", Is...)]] AnnotatedPackTemplateStruct{}; template struct [[clang::annotate("ANNOTATE_FOZ", 4, 5, 6)]] AnnotatedPackTemplateStruct{}; -template struct [[clang::annotate(Is...)]] AnnotatedPackTemplateStruct{}; // expected-error {{'annotate' attribute requires a string}} expected-error {{'annotate' attribute takes at least 1 argument}} +template struct [[clang::annotate(Is...)]] AnnotatedPackTemplateStruct{}; // expected-error {{expected string literal as argument of 'annotate' attribute}} void UseAnnotatedPackTemplateStructSpecializations() { AnnotatedPackTemplateStruct Instance1{}; AnnotatedPackTemplateStruct Instance2{}; AnnotatedPackTemplateStruct Instance3{}; - AnnotatedPackTemplateStruct Instance4{}; // expected-note {{in instantiation of template class 'attribute_annotate::AnnotatedPackTemplateStruct' requested here}} - AnnotatedPackTemplateStruct Instance5{}; // expected-note {{in instantiation of template class 'attribute_annotate::AnnotatedPackTemplateStruct' requested here}} + AnnotatedPackTemplateStruct Instance4{}; + AnnotatedPackTemplateStruct Instance5{}; } // CHECK: ClassTemplateDecl {{.*}} InvalidAnnotatedPackTemplateStruct // CHECK-NEXT: TemplateTypeParmDecl {{.*}} typename depth 0 index 0 T -// CHECK-NEXT: NonTypeTemplateParmDecl {{.*}} referenced 'int' depth 0 index 1 ... Is +// CHECK-NEXT: NonTypeTemplateParmDecl {{.*}} 'int' depth 0 index 1 ... Is // CHECK-NEXT: CXXRecordDecl {{.*}} struct InvalidAnnotatedPackTemplateStruct definition // CHECK-NEXT: DefinitionData // CHECK-NEXT: DefaultConstructor @@ -318,9 +299,6 @@ void UseAnnotatedPackTemplateStructSpecializations() { // CHECK-NEXT: CopyAssignment // CHECK-NEXT: MoveAssignment // CHECK-NEXT: Destructor -// CHECK-NEXT: AnnotateAttr {{.*}} "" -// CHECK-NEXT: PackExpansionExpr {{.*}} '' -// CHECK-NEXT: DeclRefExpr {{.*}} 'int' NonTypeTemplateParm {{.*}} 'Is' 'int' // CHECK-NEXT: CXXRecordDecl {{.*}} implicit struct InvalidAnnotatedPackTemplateStruct // CHECK-NEXT: ClassTemplateSpecialization {{.*}} 'InvalidAnnotatedPackTemplateStruct' // CHECK-NEXT: ClassTemplateSpecializationDecl {{.*}} struct InvalidAnnotatedPackTemplateStruct definition @@ -446,7 +424,7 @@ void UseAnnotatedPackTemplateStructSpecializations() { // CHECK-NEXT: TemplateArgument{{.*}} integral 6 // CHECK-NEXT: TemplateArgument{{.*}} integral 7 // CHECK-NEXT: CXXRecordDecl {{.*}} implicit struct InvalidAnnotatedPackTemplateStruct -template struct [[clang::annotate(Is...)]] InvalidAnnotatedPackTemplateStruct{}; // expected-error {{'annotate' attribute requires a string}} expected-error {{'annotate' attribute takes at least 1 argument}} +template struct InvalidAnnotatedPackTemplateStruct{}; template struct [[clang::annotate("ANNOTATE_BIR", Is...)]] InvalidAnnotatedPackTemplateStruct{}; template struct InvalidAnnotatedPackTemplateStruct {}; template <> struct InvalidAnnotatedPackTemplateStruct {}; @@ -454,8 +432,8 @@ void UseInvalidAnnotatedPackTemplateStruct() { InvalidAnnotatedPackTemplateStruct Instance1{}; InvalidAnnotatedPackTemplateStruct Instance2{}; InvalidAnnotatedPackTemplateStruct Instance3{}; - InvalidAnnotatedPackTemplateStruct Instance4{}; // expected-note {{in instantiation of template class 'attribute_annotate::InvalidAnnotatedPackTemplateStruct' requested here}} - InvalidAnnotatedPackTemplateStruct Instance5{}; // expected-note {{in instantiation of template class 'attribute_annotate::InvalidAnnotatedPackTemplateStruct' requested here}} + InvalidAnnotatedPackTemplateStruct Instance4{}; + InvalidAnnotatedPackTemplateStruct Instance5{}; } // CHECK: FunctionTemplateDecl {{.*}} RedeclaredAnnotatedFunc diff --git a/clang/utils/TableGen/ClangAttrEmitter.cpp b/clang/utils/TableGen/ClangAttrEmitter.cpp index 79db17501b64f06442e13872a0478a548db6892c..30790bc45ce35b1346c8d1153340b96bc2b54b8e 100644 --- a/clang/utils/TableGen/ClangAttrEmitter.cpp +++ b/clang/utils/TableGen/ClangAttrEmitter.cpp @@ -2297,6 +2297,22 @@ static bool isVariadicExprArgument(const Record *Arg) { .Default(false); } +static bool isStringLiteralArgument(const Record *Arg) { + return !Arg->getSuperClasses().empty() && + llvm::StringSwitch( + Arg->getSuperClasses().back().first->getName()) + .Case("StringArgument", true) + .Default(false); +} + +static bool isVariadicStringLiteralArgument(const Record *Arg) { + return !Arg->getSuperClasses().empty() && + llvm::StringSwitch( + Arg->getSuperClasses().back().first->getName()) + .Case("VariadicStringArgument", true) + .Default(false); +} + static void emitClangAttrVariadicIdentifierArgList(RecordKeeper &Records, raw_ostream &OS) { OS << "#if defined(CLANG_ATTR_VARIADIC_IDENTIFIER_ARG_LIST)\n"; @@ -2317,6 +2333,34 @@ static void emitClangAttrVariadicIdentifierArgList(RecordKeeper &Records, OS << "#endif // CLANG_ATTR_VARIADIC_IDENTIFIER_ARG_LIST\n\n"; } +// Emits the list of arguments that should be parsed as unevaluated string +// literals for each attribute. +static void emitClangAttrUnevaluatedStringLiteralList(RecordKeeper &Records, + raw_ostream &OS) { + OS << "#if defined(CLANG_ATTR_STRING_LITERAL_ARG_LIST)\n"; + std::vector Attrs = Records.getAllDerivedDefinitions("Attr"); + for (const auto *Attr : Attrs) { + std::vector Args = Attr->getValueAsListOfDefs("Args"); + uint32_t Bits = 0; + assert(Args.size() <= 32 && "unsupported number of arguments in attribute"); + for (uint32_t N = 0; N < Args.size(); ++N) { + Bits |= (isStringLiteralArgument(Args[N]) << N); + // If we have a variadic string argument, set all the remaining bits to 1 + if (isVariadicStringLiteralArgument(Args[N])) { + Bits |= maskTrailingZeros(N); + break; + } + } + if (!Bits) + continue; + // All these spellings have at least one string literal has argument. + forEachUniqueSpelling(*Attr, [&](const FlattenedSpelling &S) { + OS << ".Case(\"" << S.name() << "\", " << Bits << ")\n"; + }); + } + OS << "#endif // CLANG_ATTR_STRING_LITERAL_ARG_LIST\n\n"; +} + // Emits the first-argument-is-identifier property for attributes. static void emitClangAttrIdentifierArgList(RecordKeeper &Records, raw_ostream &OS) { OS << "#if defined(CLANG_ATTR_IDENTIFIER_ARG_LIST)\n"; @@ -3415,23 +3459,27 @@ static void GenerateHasAttrSpellingStringSwitch( OS << " .Default(0);\n"; } -// Emits the list of tokens for regular keyword attributes. -void EmitClangAttrTokenKinds(RecordKeeper &Records, raw_ostream &OS) { - emitSourceFileHeader("A list of tokens generated from the attribute" - " definitions", - OS); +// Emits list of regular keyword attributes with info about their arguments. +void EmitClangRegularKeywordAttributeInfo(RecordKeeper &Records, + raw_ostream &OS) { + emitSourceFileHeader( + "A list of regular keyword attributes generated from the attribute" + " definitions", + OS); // Assume for now that the same token is not used in multiple regular // keyword attributes. for (auto *R : Records.getAllDerivedDefinitions("Attr")) - for (const auto &S : GetFlattenedSpellings(*R)) - if (isRegularKeywordAttribute(S)) { - if (!R->getValueAsListOfDefs("Args").empty()) - PrintError(R->getLoc(), - "RegularKeyword attributes with arguments are not " - "yet supported"); - OS << "KEYWORD_ATTRIBUTE(" - << S.getSpellingRecord().getValueAsString("Name") << ", )\n"; - } + for (const auto &S : GetFlattenedSpellings(*R)) { + if (!isRegularKeywordAttribute(S)) + continue; + std::vector Args = R->getValueAsListOfDefs("Args"); + bool HasArgs = llvm::any_of( + Args, [](const Record *Arg) { return !Arg->getValueAsBit("Fake"); }); + + OS << "KEYWORD_ATTRIBUTE(" + << S.getSpellingRecord().getValueAsString("Name") << ", " + << (HasArgs ? "true" : "false") << ",)\n"; + } OS << "#undef KEYWORD_ATTRIBUTE\n"; } @@ -4616,6 +4664,7 @@ void EmitClangAttrParserStringSwitches(RecordKeeper &Records, emitSourceFileHeader("Parser-related llvm::StringSwitch cases", OS); emitClangAttrArgContextList(Records, OS); emitClangAttrIdentifierArgList(Records, OS); + emitClangAttrUnevaluatedStringLiteralList(Records, OS); emitClangAttrVariadicIdentifierArgList(Records, OS); emitClangAttrThisIsaIdentifierArgList(Records, OS); emitClangAttrAcceptsExprPack(Records, OS); diff --git a/clang/utils/TableGen/NeonEmitter.cpp b/clang/utils/TableGen/NeonEmitter.cpp index 936724b9ce38f4e3053adb2fde74fbdf1a210b76..b19048eff5db206cc78882d24219eab200e1233e 100644 --- a/clang/utils/TableGen/NeonEmitter.cpp +++ b/clang/utils/TableGen/NeonEmitter.cpp @@ -550,6 +550,8 @@ class NeonEmitter { void createIntrinsic(Record *R, SmallVectorImpl &Out); void genBuiltinsDef(raw_ostream &OS, SmallVectorImpl &Defs); + void genStreamingSVECompatibleList(raw_ostream &OS, + SmallVectorImpl &Defs); void genOverloadTypeCheckCode(raw_ostream &OS, SmallVectorImpl &Defs); void genIntrinsicRangeCheckCode(raw_ostream &OS, @@ -2039,6 +2041,30 @@ void NeonEmitter::genBuiltinsDef(raw_ostream &OS, OS << "#endif\n\n"; } +void NeonEmitter::genStreamingSVECompatibleList( + raw_ostream &OS, SmallVectorImpl &Defs) { + OS << "#ifdef GET_NEON_STREAMING_COMPAT_FLAG\n"; + + std::set Emitted; + for (auto *Def : Defs) { + // If the def has a body (that is, it has Operation DAGs), it won't call + // __builtin_neon_* so we don't need to generate a definition for it. + if (Def->hasBody()) + continue; + + std::string Name = Def->getMangledName(); + if (Emitted.find(Name) != Emitted.end()) + continue; + + // FIXME: We should make exceptions here for some NEON builtins that are + // permitted in streaming mode. + OS << "case NEON::BI__builtin_neon_" << Name + << ": BuiltinType = ArmNonStreaming; break;\n"; + Emitted.insert(Name); + } + OS << "#endif\n\n"; +} + /// Generate the ARM and AArch64 overloaded type checking code for /// SemaChecking.cpp, checking for unique builtin declarations. void NeonEmitter::genOverloadTypeCheckCode(raw_ostream &OS, @@ -2222,6 +2248,8 @@ void NeonEmitter::runHeader(raw_ostream &OS) { // Generate ARM overloaded type checking code for SemaChecking.cpp genOverloadTypeCheckCode(OS, Defs); + genStreamingSVECompatibleList(OS, Defs); + // Generate ARM range checking code for shift/lane immediates. genIntrinsicRangeCheckCode(OS, Defs); } diff --git a/clang/utils/TableGen/SveEmitter.cpp b/clang/utils/TableGen/SveEmitter.cpp index dbf5122fdf22317cbc5258f41a44ad46c02ce4c1..2b61f8b81587df4a4c398186ca7bb343cd2a1512 100644 --- a/clang/utils/TableGen/SveEmitter.cpp +++ b/clang/utils/TableGen/SveEmitter.cpp @@ -43,6 +43,8 @@ enum ClassKind { ClassG, // Overloaded name without type suffix }; +enum class ACLEKind { SVE, SME }; + using TypeSpec = std::string; namespace { @@ -236,7 +238,7 @@ public: } /// Emits the intrinsic declaration to the ostream. - void emitIntrinsic(raw_ostream &OS, SVEEmitter &Emitter) const; + void emitIntrinsic(raw_ostream &OS, SVEEmitter &Emitter, ACLEKind Kind) const; private: std::string getMergeSuffix() const { return MergeSuffix; } @@ -344,6 +346,10 @@ public: /// Emit arm_sve.h. void createHeader(raw_ostream &o); + // Emits core intrinsics in both arm_sme.h and arm_sve.h + void createCoreHeaderIntrinsics(raw_ostream &o, SVEEmitter &Emitter, + ACLEKind Kind); + /// Emit all the __builtin prototypes and code needed by Sema. void createBuiltins(raw_ostream &o); @@ -365,9 +371,15 @@ public: /// Emit all the information needed to map builtin -> LLVM IR intrinsic. void createSMECodeGenMap(raw_ostream &o); + /// Create a table for a builtin's requirement for PSTATE.SM. + void createStreamingAttrs(raw_ostream &o, ACLEKind Kind); + /// Emit all the range checks for the immediates. void createSMERangeChecks(raw_ostream &o); + /// Create a table for a builtin's requirement for PSTATE.ZA. + void createBuiltinZAState(raw_ostream &OS); + /// Create intrinsic and add it to \p Out void createIntrinsic(Record *R, SmallVectorImpl> &Out); @@ -987,28 +999,24 @@ std::string Intrinsic::mangleName(ClassKind LocalCK) const { getMergeSuffix(); } -void Intrinsic::emitIntrinsic(raw_ostream &OS, SVEEmitter &Emitter) const { +void Intrinsic::emitIntrinsic(raw_ostream &OS, SVEEmitter &Emitter, + ACLEKind Kind) const { bool IsOverloaded = getClassKind() == ClassG && getProto().size() > 1; std::string FullName = mangleName(ClassS); std::string ProtoName = mangleName(getClassKind()); - std::string SMEAttrs = ""; + OS << (IsOverloaded ? "__aio " : "__ai ") + << "__attribute__((__clang_arm_builtin_alias("; - if (Flags & Emitter.getEnumValueForFlag("IsStreaming")) - SMEAttrs += ", arm_streaming"; - if (Flags & Emitter.getEnumValueForFlag("IsStreamingCompatible")) - SMEAttrs += ", arm_streaming_compatible"; - if (Flags & Emitter.getEnumValueForFlag("IsSharedZA")) - SMEAttrs += ", arm_shared_za"; - if (Flags & Emitter.getEnumValueForFlag("IsPreservesZA")) - SMEAttrs += ", arm_preserves_za"; + switch (Kind) { + case ACLEKind::SME: + OS << "__builtin_sme_" << FullName << ")"; + break; + case ACLEKind::SVE: + OS << "__builtin_sve_" << FullName << ")"; + break; + } - OS << (IsOverloaded ? "__aio " : "__ai ") - << "__attribute__((__clang_arm_builtin_alias(" - << (SMEAttrs.empty() ? "__builtin_sve_" : "__builtin_sme_") - << FullName << ")"; - if (!SMEAttrs.empty()) - OS << SMEAttrs; OS << "))\n"; OS << getTypes()[0].str() << " " << ProtoName << "("; @@ -1143,6 +1151,34 @@ void SVEEmitter::createIntrinsic( } } +void SVEEmitter::createCoreHeaderIntrinsics(raw_ostream &OS, + SVEEmitter &Emitter, + ACLEKind Kind) { + SmallVector, 128> Defs; + std::vector RV = Records.getAllDerivedDefinitions("Inst"); + for (auto *R : RV) + createIntrinsic(R, Defs); + + // Sort intrinsics in header file by following order/priority: + // - Architectural guard (i.e. does it require SVE2 or SVE2_AES) + // - Class (is intrinsic overloaded or not) + // - Intrinsic name + std::stable_sort(Defs.begin(), Defs.end(), + [](const std::unique_ptr &A, + const std::unique_ptr &B) { + auto ToTuple = [](const std::unique_ptr &I) { + return std::make_tuple(I->getGuard(), + (unsigned)I->getClassKind(), + I->getName()); + }; + return ToTuple(A) < ToTuple(B); + }); + + // Actually emit the intrinsic declarations. + for (auto &I : Defs) + I->emitIntrinsic(OS, Emitter, Kind); +} + void SVEEmitter::createHeader(raw_ostream &OS) { OS << "/*===---- arm_sve.h - ARM SVE intrinsics " "-----------------------------------===\n" @@ -1284,7 +1320,7 @@ void SVEEmitter::createHeader(raw_ostream &OS) { if (ShortForm) { OS << "__aio __attribute__((target(\"sve\"))) " << From.Type << " svreinterpret_" << From.Suffix; - OS << "(" << To.Type << " op) {\n"; + OS << "(" << To.Type << " op) __arm_streaming_compatible {\n"; OS << " return __builtin_sve_reinterpret_" << From.Suffix << "_" << To.Suffix << "(op);\n"; OS << "}\n\n"; @@ -1294,27 +1330,7 @@ void SVEEmitter::createHeader(raw_ostream &OS) { << To.Suffix << "(__VA_ARGS__)\n"; } - SmallVector, 128> Defs; - std::vector RV = Records.getAllDerivedDefinitions("Inst"); - for (auto *R : RV) - createIntrinsic(R, Defs); - - // Sort intrinsics in header file by following order/priority: - // - Architectural guard (i.e. does it require SVE2 or SVE2_AES) - // - Class (is intrinsic overloaded or not) - // - Intrinsic name - std::stable_sort( - Defs.begin(), Defs.end(), [](const std::unique_ptr &A, - const std::unique_ptr &B) { - auto ToTuple = [](const std::unique_ptr &I) { - return std::make_tuple(I->getGuard(), (unsigned)I->getClassKind(), I->getName()); - }; - return ToTuple(A) < ToTuple(B); - }); - - // Actually emit the intrinsic declarations. - for (auto &I : Defs) - I->emitIntrinsic(OS, *this); + createCoreHeaderIntrinsics(OS, *this, ACLEKind::SVE); OS << "#define svcvtnt_bf16_x svcvtnt_bf16_m\n"; OS << "#define svcvtnt_bf16_f32_x svcvtnt_bf16_f32_m\n"; @@ -1464,7 +1480,7 @@ void SVEEmitter::createTypeFlags(raw_ostream &OS) { } void SVEEmitter::createSMEHeader(raw_ostream &OS) { - OS << "/*===---- arm_sme_draft_spec_subject_to_change.h - ARM SME intrinsics " + OS << "/*===---- arm_sme.h - ARM SME intrinsics " "------===\n" " *\n" " *\n" @@ -1481,7 +1497,7 @@ void SVEEmitter::createSMEHeader(raw_ostream &OS) { OS << "#define __ARM_SME_H\n\n"; OS << "#if !defined(__LITTLE_ENDIAN__)\n"; - OS << "#error \"Big endian is currently not supported for arm_sme_draft_spec_subject_to_change.h\"\n"; + OS << "#error \"Big endian is currently not supported for arm_sme.h\"\n"; OS << "#endif\n"; OS << "#include \n\n"; @@ -1496,30 +1512,26 @@ void SVEEmitter::createSMEHeader(raw_ostream &OS) { OS << "extern \"C\" {\n"; OS << "#endif\n\n"; - SmallVector, 128> Defs; - std::vector RV = Records.getAllDerivedDefinitions("Inst"); - for (auto *R : RV) - createIntrinsic(R, Defs); + OS << "void __arm_za_disable(void) __arm_streaming_compatible;\n\n"; - // Sort intrinsics in header file by following order/priority similar to SVE: - // - Architectural guard - // - Class (is intrinsic overloaded or not) - // - Intrinsic name - std::stable_sort(Defs.begin(), Defs.end(), - [](const std::unique_ptr &A, - const std::unique_ptr &B) { - auto ToTuple = [](const std::unique_ptr &I) { - return std::make_tuple(I->getGuard(), - (unsigned)I->getClassKind(), - I->getName()); - }; - return ToTuple(A) < ToTuple(B); - }); + OS << "__ai bool __arm_has_sme(void) __arm_streaming_compatible {\n"; + OS << " uint64_t x0, x1;\n"; + OS << " __builtin_arm_get_sme_state(&x0, &x1);\n"; + OS << " return x0 & (1ULL << 63);\n"; + OS << "}\n\n"; - // Actually emit the intrinsic declaration. - for (auto &I : Defs) { - I->emitIntrinsic(OS, *this); - } + OS << "__ai bool __arm_in_streaming_mode(void) __arm_streaming_compatible " + "{\n"; + OS << " uint64_t x0, x1;\n"; + OS << " __builtin_arm_get_sme_state(&x0, &x1);\n"; + OS << " return x0 & 1;\n"; + OS << "}\n\n"; + + OS << "__ai __attribute__((target(\"sme\"))) void svundef_za(void) " + "__arm_streaming_compatible __arm_out(\"za\") " + "{ }\n\n"; + + createCoreHeaderIntrinsics(OS, *this, ACLEKind::SME); OS << "#ifdef __cplusplus\n"; OS << "} // extern \"C\"\n"; @@ -1624,6 +1636,76 @@ void SVEEmitter::createSMERangeChecks(raw_ostream &OS) { OS << "#endif\n\n"; } +void SVEEmitter::createBuiltinZAState(raw_ostream &OS) { + std::vector RV = Records.getAllDerivedDefinitions("Inst"); + SmallVector, 128> Defs; + for (auto *R : RV) + createIntrinsic(R, Defs); + + std::map> IntrinsicsPerState; + for (auto &Def : Defs) { + if (Def->isFlagSet(getEnumValueForFlag("IsInZA"))) + IntrinsicsPerState["ArmInZA"].insert(Def->getMangledName()); + else if (Def->isFlagSet(getEnumValueForFlag("IsOutZA"))) + IntrinsicsPerState["ArmOutZA"].insert(Def->getMangledName()); + else if (Def->isFlagSet(getEnumValueForFlag("IsInOutZA"))) + IntrinsicsPerState["ArmInOutZA"].insert(Def->getMangledName()); + } + + OS << "#ifdef GET_SME_BUILTIN_GET_STATE\n"; + for (auto &KV : IntrinsicsPerState) { + for (StringRef Name : KV.second) + OS << "case SME::BI__builtin_sme_" << Name << ":\n"; + OS << " return " << KV.first << ";\n"; + } + OS << "#endif\n\n"; +} + +void SVEEmitter::createStreamingAttrs(raw_ostream &OS, ACLEKind Kind) { + std::vector RV = Records.getAllDerivedDefinitions("Inst"); + SmallVector, 128> Defs; + for (auto *R : RV) + createIntrinsic(R, Defs); + + StringRef ExtensionKind; + switch (Kind) { + case ACLEKind::SME: + ExtensionKind = "SME"; + break; + case ACLEKind::SVE: + ExtensionKind = "SVE"; + break; + } + + OS << "#ifdef GET_" << ExtensionKind << "_STREAMING_ATTRS\n"; + + llvm::StringMap> StreamingMap; + + uint64_t IsStreamingFlag = getEnumValueForFlag("IsStreaming"); + uint64_t IsStreamingCompatibleFlag = + getEnumValueForFlag("IsStreamingCompatible"); + for (auto &Def : Defs) { + if (Def->isFlagSet(IsStreamingFlag)) + StreamingMap["ArmStreaming"].insert(Def->getMangledName()); + else if (Def->isFlagSet(IsStreamingCompatibleFlag)) + StreamingMap["ArmStreamingCompatible"].insert(Def->getMangledName()); + else + StreamingMap["ArmNonStreaming"].insert(Def->getMangledName()); + } + + for (auto BuiltinType : StreamingMap.keys()) { + for (auto Name : StreamingMap[BuiltinType]) { + OS << "case " << ExtensionKind << "::BI__builtin_" + << ExtensionKind.lower() << "_"; + OS << Name << ":\n"; + } + OS << " BuiltinType = " << BuiltinType << ";\n"; + OS << " break;\n"; + } + + OS << "#endif\n\n"; +} + namespace clang { void EmitSveHeader(RecordKeeper &Records, raw_ostream &OS) { SVEEmitter(Records).createHeader(OS); @@ -1645,6 +1727,10 @@ void EmitSveTypeFlags(RecordKeeper &Records, raw_ostream &OS) { SVEEmitter(Records).createTypeFlags(OS); } +void EmitSveStreamingAttrs(RecordKeeper &Records, raw_ostream &OS) { + SVEEmitter(Records).createStreamingAttrs(OS, ACLEKind::SVE); +} + void EmitSmeHeader(RecordKeeper &Records, raw_ostream &OS) { SVEEmitter(Records).createSMEHeader(OS); } @@ -1660,4 +1746,12 @@ void EmitSmeBuiltinCG(RecordKeeper &Records, raw_ostream &OS) { void EmitSmeRangeChecks(RecordKeeper &Records, raw_ostream &OS) { SVEEmitter(Records).createSMERangeChecks(OS); } + +void EmitSmeStreamingAttrs(RecordKeeper &Records, raw_ostream &OS) { + SVEEmitter(Records).createStreamingAttrs(OS, ACLEKind::SME); +} + +void EmitSmeBuiltinZAState(RecordKeeper &Records, raw_ostream &OS) { + SVEEmitter(Records).createBuiltinZAState(OS); +} } // End namespace clang diff --git a/clang/utils/TableGen/TableGen.cpp b/clang/utils/TableGen/TableGen.cpp index 38215abd9d9b7b68ba88fc941c7b164c70c33a5a..558c901be53b59ddad453519b507deafec7e321c 100644 --- a/clang/utils/TableGen/TableGen.cpp +++ b/clang/utils/TableGen/TableGen.cpp @@ -35,7 +35,7 @@ enum ActionType { GenClangAttrSubjectMatchRuleList, GenClangAttrPCHRead, GenClangAttrPCHWrite, - GenClangAttrTokenKinds, + GenClangRegularKeywordAttributeInfo, GenClangAttrHasAttributeImpl, GenClangAttrSpellingListIndex, GenClangAttrASTVisitor, @@ -83,10 +83,13 @@ enum ActionType { GenArmSveBuiltinCG, GenArmSveTypeFlags, GenArmSveRangeChecks, + GenArmSveStreamingAttrs, GenArmSmeHeader, GenArmSmeBuiltins, GenArmSmeBuiltinCG, GenArmSmeRangeChecks, + GenArmSmeStreamingAttrs, + GenArmSmeBuiltinZAState, GenArmCdeHeader, GenArmCdeBuiltinDef, GenArmCdeBuiltinSema, @@ -136,8 +139,10 @@ cl::opt Action( "Generate clang PCH attribute reader"), clEnumValN(GenClangAttrPCHWrite, "gen-clang-attr-pch-write", "Generate clang PCH attribute writer"), - clEnumValN(GenClangAttrTokenKinds, "gen-clang-attr-token-kinds", - "Generate a list of attribute-related clang tokens"), + clEnumValN(GenClangRegularKeywordAttributeInfo, + "gen-clang-regular-keyword-attr-info", + "Generate a list of regular keyword attributes with info " + "about their arguments"), clEnumValN(GenClangAttrHasAttributeImpl, "gen-clang-attr-has-attribute-impl", "Generate a clang attribute spelling list"), @@ -233,6 +238,8 @@ cl::opt Action( "Generate arm_sve_typeflags.inc for clang"), clEnumValN(GenArmSveRangeChecks, "gen-arm-sve-sema-rangechecks", "Generate arm_sve_sema_rangechecks.inc for clang"), + clEnumValN(GenArmSveStreamingAttrs, "gen-arm-sve-streaming-attrs", + "Generate arm_sve_streaming_attrs.inc for clang"), clEnumValN(GenArmSmeHeader, "gen-arm-sme-header", "Generate arm_sme.h for clang"), clEnumValN(GenArmSmeBuiltins, "gen-arm-sme-builtins", @@ -241,6 +248,10 @@ cl::opt Action( "Generate arm_sme_builtin_cg_map.inc for clang"), clEnumValN(GenArmSmeRangeChecks, "gen-arm-sme-sema-rangechecks", "Generate arm_sme_sema_rangechecks.inc for clang"), + clEnumValN(GenArmSmeStreamingAttrs, "gen-arm-sme-streaming-attrs", + "Generate arm_sme_streaming_attrs.inc for clang"), + clEnumValN(GenArmSmeBuiltinZAState, "gen-arm-sme-builtin-za-state", + "Generate arm_sme_builtins_za_state.inc for clang"), clEnumValN(GenArmMveHeader, "gen-arm-mve-header", "Generate arm_mve.h for clang"), clEnumValN(GenArmMveBuiltinDef, "gen-arm-mve-builtin-def", @@ -269,11 +280,14 @@ cl::opt Action( "Generate riscv_vector_builtin_cg.inc for clang"), clEnumValN(GenRISCVVectorBuiltinSema, "gen-riscv-vector-builtin-sema", "Generate riscv_vector_builtin_sema.inc for clang"), - clEnumValN(GenRISCVSiFiveVectorBuiltins, "gen-riscv-sifive-vector-builtins", + clEnumValN(GenRISCVSiFiveVectorBuiltins, + "gen-riscv-sifive-vector-builtins", "Generate riscv_sifive_vector_builtins.inc for clang"), - clEnumValN(GenRISCVSiFiveVectorBuiltinCG, "gen-riscv-sifive-vector-builtin-codegen", + clEnumValN(GenRISCVSiFiveVectorBuiltinCG, + "gen-riscv-sifive-vector-builtin-codegen", "Generate riscv_sifive_vector_builtin_cg.inc for clang"), - clEnumValN(GenRISCVSiFiveVectorBuiltinSema, "gen-riscv-sifive-vector-builtin-sema", + clEnumValN(GenRISCVSiFiveVectorBuiltinSema, + "gen-riscv-sifive-vector-builtin-sema", "Generate riscv_sifive_vector_builtin_sema.inc for clang"), clEnumValN(GenAttrDocs, "gen-attr-docs", "Generate attribute documentation"), @@ -327,8 +341,8 @@ bool ClangTableGenMain(raw_ostream &OS, RecordKeeper &Records) { case GenClangAttrPCHWrite: EmitClangAttrPCHWrite(Records, OS); break; - case GenClangAttrTokenKinds: - EmitClangAttrTokenKinds(Records, OS); + case GenClangRegularKeywordAttributeInfo: + EmitClangRegularKeywordAttributeInfo(Records, OS); break; case GenClangAttrHasAttributeImpl: EmitClangAttrHasAttrImpl(Records, OS); @@ -472,6 +486,9 @@ bool ClangTableGenMain(raw_ostream &OS, RecordKeeper &Records) { case GenArmSveRangeChecks: EmitSveRangeChecks(Records, OS); break; + case GenArmSveStreamingAttrs: + EmitSveStreamingAttrs(Records, OS); + break; case GenArmSmeHeader: EmitSmeHeader(Records, OS); break; @@ -484,6 +501,12 @@ bool ClangTableGenMain(raw_ostream &OS, RecordKeeper &Records) { case GenArmSmeRangeChecks: EmitSmeRangeChecks(Records, OS); break; + case GenArmSmeStreamingAttrs: + EmitSmeStreamingAttrs(Records, OS); + break; + case GenArmSmeBuiltinZAState: + EmitSmeBuiltinZAState(Records, OS); + break; case GenArmCdeHeader: EmitCdeHeader(Records, OS); break; diff --git a/clang/utils/TableGen/TableGenBackends.h b/clang/utils/TableGen/TableGenBackends.h index 8265a531a98fb2a1e0957634ce01c8a826b05ad3..8b8e2668efa3757b26e9ed9f7fd88782df6378fe 100644 --- a/clang/utils/TableGen/TableGenBackends.h +++ b/clang/utils/TableGen/TableGenBackends.h @@ -43,8 +43,8 @@ void EmitClangAttrSubjectMatchRuleList(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); void EmitClangAttrPCHRead(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); void EmitClangAttrPCHWrite(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); -void EmitClangAttrTokenKinds(llvm::RecordKeeper &Records, - llvm::raw_ostream &OS); +void EmitClangRegularKeywordAttributeInfo(llvm::RecordKeeper &Records, + llvm::raw_ostream &OS); void EmitClangAttrHasAttrImpl(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); void EmitClangAttrSpellingListIndex(llvm::RecordKeeper &Records, @@ -102,11 +102,14 @@ void EmitSveBuiltins(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); void EmitSveBuiltinCG(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); void EmitSveTypeFlags(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); void EmitSveRangeChecks(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); +void EmitSveStreamingAttrs(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); void EmitSmeHeader(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); void EmitSmeBuiltins(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); void EmitSmeBuiltinCG(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); void EmitSmeRangeChecks(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); +void EmitSmeStreamingAttrs(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); +void EmitSmeBuiltinZAState(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); void EmitMveHeader(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); void EmitMveBuiltinDef(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); diff --git a/clang/www/cxx_status.html b/clang/www/cxx_status.html index 23dc53b5a3cbc3e1520b1dfac9a09c3c003600a5..08ac5b9a5db2ea99feef3e76a64a07dabd5203e6 100755 --- a/clang/www/cxx_status.html +++ b/clang/www/cxx_status.html @@ -115,12 +115,7 @@ C++23, informally referred to as C++26.

Unevaluated strings P2361R6 - -
- Clang 17 (Partial) - Attributes arguments don't yet parse as unevaluated string literals. -
- + Clang 18 Add @, $, and ` to the basic character set diff --git a/compiler-rt/cmake/Modules/AddCompilerRT.cmake b/compiler-rt/cmake/Modules/AddCompilerRT.cmake index a72e279dd75e8fe76e65c9031d2425388d3b3273..5ed49f0f558814437ce95712047b64ea0e6980da 100644 --- a/compiler-rt/cmake/Modules/AddCompilerRT.cmake +++ b/compiler-rt/cmake/Modules/AddCompilerRT.cmake @@ -312,6 +312,10 @@ function(add_compiler_rt_runtime name type) set(COMPONENT_OPTION COMPONENT ${libname}) endif() + if(type STREQUAL "SHARED") + list(APPEND LIB_DEFS COMPILER_RT_SHARED_LIB) + endif() + if(type STREQUAL "OBJECT") if(CMAKE_C_COMPILER_ID MATCHES Clang AND CMAKE_C_COMPILER_TARGET) list(APPEND extra_cflags_${libname} "--target=${CMAKE_C_COMPILER_TARGET}") diff --git a/compiler-rt/cmake/builtin-config-ix.cmake b/compiler-rt/cmake/builtin-config-ix.cmake index 9cf4877baf48953aba0dca47dbb3150a5ed02b8b..e91e3923a756c5352996f7055cceeb6cd2cacbcd 100644 --- a/compiler-rt/cmake/builtin-config-ix.cmake +++ b/compiler-rt/cmake/builtin-config-ix.cmake @@ -33,6 +33,12 @@ asm(\".arch armv8-a+lse\"); asm(\"cas w0, w1, [x2]\"); ") +builtin_check_c_compiler_source(COMPILER_RT_HAS_ASM_SME +" +asm(\".arch armv9-a+sme\"); +asm(\"smstart\"); +") + if(ANDROID) set(OS_NAME "Android") else() diff --git a/compiler-rt/lib/builtins/CMakeLists.txt b/compiler-rt/lib/builtins/CMakeLists.txt index d62fa0432e2a5a52774a043e0926c86dfa0850b2..cf376b4a021c319fe46db2c2b252b739cd32c99d 100644 --- a/compiler-rt/lib/builtins/CMakeLists.txt +++ b/compiler-rt/lib/builtins/CMakeLists.txt @@ -555,6 +555,13 @@ set(aarch64_SOURCES aarch64/fp_mode.c ) +if(COMPILER_RT_HAS_ASM_SME) + list(APPEND aarch64_SOURCES aarch64/sme-abi.S aarch64/sme-abi-init.c) + message(STATUS "AArch64 SME ABI routines enabled") +else() + message(STATUS "AArch64 SME ABI routines disabled") +endif() + # Generate outline atomics helpers from lse.S base set(OA_HELPERS_DIR "${CMAKE_CURRENT_BINARY_DIR}/outline_atomic_helpers.dir") file(MAKE_DIRECTORY "${OA_HELPERS_DIR}") diff --git a/compiler-rt/lib/builtins/aarch64/sme-abi-init.c b/compiler-rt/lib/builtins/aarch64/sme-abi-init.c new file mode 100644 index 0000000000000000000000000000000000000000..b6ee12170d56dbd4890445ef74d6e76249b66bdb --- /dev/null +++ b/compiler-rt/lib/builtins/aarch64/sme-abi-init.c @@ -0,0 +1,52 @@ +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +__attribute__((visibility("hidden"), nocommon)) +_Bool __aarch64_has_sme_and_tpidr2_el0; + +// We have multiple ways to check that the function has SME, depending on our +// target. +// * For Linux we can use __getauxval(). +// * For newlib we can use __aarch64_sme_accessible(). + +#if defined(__linux__) + +#ifndef AT_HWCAP2 +#define AT_HWCAP2 26 +#endif + +#ifndef HWCAP2_SME +#define HWCAP2_SME (1 << 23) +#endif + +extern unsigned long int __getauxval (unsigned long int); + +static _Bool has_sme(void) { + return __getauxval(AT_HWCAP2) & HWCAP2_SME; +} + +#else // defined(__linux__) + +#if defined(COMPILER_RT_SHARED_LIB) +__attribute__((weak)) +#endif +extern _Bool __aarch64_sme_accessible(void); + +static _Bool has_sme(void) { +#if defined(COMPILER_RT_SHARED_LIB) + if (!__aarch64_sme_accessible) + return 0; +#endif + return __aarch64_sme_accessible(); +} + +#endif // defined(__linux__) + +#if __GNUC__ >= 9 +#pragma GCC diagnostic ignored "-Wprio-ctor-dtor" +#endif +__attribute__((constructor(90))) +static void init_aarch64_has_sme(void) { + __aarch64_has_sme_and_tpidr2_el0 = has_sme(); +} diff --git a/compiler-rt/lib/builtins/aarch64/sme-abi.S b/compiler-rt/lib/builtins/aarch64/sme-abi.S new file mode 100644 index 0000000000000000000000000000000000000000..4c0ff66931db7b2ccd6fcb5c2131ff8250b3e85e --- /dev/null +++ b/compiler-rt/lib/builtins/aarch64/sme-abi.S @@ -0,0 +1,186 @@ +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +// This patch implements the support routines for the SME ABI, +// described here: +// https://github.com/ARM-software/abi-aa/blob/main/aapcs64/aapcs64.rst#sme-support-routines + +#include "../assembly.h" + + +#if !defined(__APPLE__) +#define TPIDR2_SYMBOL SYMBOL_NAME(__aarch64_has_sme_and_tpidr2_el0) +#define TPIDR2_SYMBOL_OFFSET :lo12:SYMBOL_NAME(__aarch64_has_sme_and_tpidr2_el0) +#else +// MachO requires @page/@pageoff directives because the global is defined +// in a different file. Otherwise this file may fail to build. +#define TPIDR2_SYMBOL SYMBOL_NAME(__aarch64_has_sme_and_tpidr2_el0)@page +#define TPIDR2_SYMBOL_OFFSET SYMBOL_NAME(__aarch64_has_sme_and_tpidr2_el0)@pageoff +#endif + +.arch armv9-a+sme + +// Utility function which calls a system's abort() routine. Because the function +// is streaming-compatible it should disable streaming-SVE mode before calling +// abort(). Note that there is no need to preserve any state before the call, +// because the function does not return. +DEFINE_COMPILERRT_PRIVATE_FUNCTION(do_abort) + .cfi_startproc + .variant_pcs SYMBOL_NAME(do_abort) + BTI_C + stp x29, x30, [sp, #-32]! + cntd x0 + // Store VG to a stack location that we describe with .cfi_offset + str x0, [sp, #16] + .cfi_def_cfa_offset 32 + .cfi_offset w30, -24 + .cfi_offset w29, -32 + .cfi_offset 46, -16 + bl __arm_sme_state + tbz x0, #0, 2f +1: + smstop sm +2: + // We can't make this into a tail-call because the unwinder would + // need to restore the value of VG. + bl SYMBOL_NAME(abort) + .cfi_endproc +END_COMPILERRT_FUNCTION(do_abort) + +// __arm_sme_state fills the result registers based on a local +// that is set as part of the compiler-rt startup code. +// __aarch64_has_sme_and_tpidr2_el0 +DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_sme_state) + .variant_pcs __arm_sme_state + BTI_C + mov x0, xzr + mov x1, xzr + + adrp x16, TPIDR2_SYMBOL + ldrb w16, [x16, TPIDR2_SYMBOL_OFFSET] + cbz w16, 1f +0: + orr x0, x0, #0xC000000000000000 + mrs x16, SVCR + bfxil x0, x16, #0, #2 + mrs x1, TPIDR2_EL0 +1: + ret +END_COMPILERRT_OUTLINE_FUNCTION(__arm_sme_state) + +DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_tpidr2_restore) + .variant_pcs __arm_tpidr2_restore + BTI_C + // If TPIDR2_EL0 is nonnull, the subroutine aborts in some platform-specific + // manner. + mrs x14, TPIDR2_EL0 + cbnz x14, 2f + + // If any of the reserved bytes in the first 16 bytes of BLK are nonzero, + // the subroutine [..] aborts in some platform-defined manner. + ldrh w14, [x0, #10] + cbnz w14, 2f + ldr w14, [x0, #12] + cbnz w14, 2f + + // If BLK.za_save_buffer is NULL, the subroutine does nothing. + ldr x16, [x0] + cbz x16, 1f + + // If BLK.num_za_save_slices is zero, the subroutine does nothing. + ldrh w14, [x0, #8] + cbz x14, 1f + + mov x15, xzr +0: + ldr za[w15,0], [x16] + addsvl x16, x16, #1 + add x15, x15, #1 + cmp x14, x15 + b.ne 0b +1: + ret +2: + b SYMBOL_NAME(do_abort) +END_COMPILERRT_OUTLINE_FUNCTION(__arm_tpidr2_restore) + +DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_tpidr2_save) + .variant_pcs __arm_tpidr2_restore + BTI_C + // If the current thread does not have access to TPIDR2_EL0, the subroutine + // does nothing. + adrp x14, TPIDR2_SYMBOL + ldrb w14, [x14, TPIDR2_SYMBOL_OFFSET] + cbz w14, 1f + + // If TPIDR2_EL0 is null, the subroutine does nothing. + mrs x16, TPIDR2_EL0 + cbz x16, 1f + + // If any of the reserved bytes in the first 16 bytes of the TPIDR2 block are + // nonzero, the subroutine [..] aborts in some platform-defined manner. + ldrh w14, [x16, #10] + cbnz w14, 2f + ldr w14, [x16, #12] + cbnz w14, 2f + + // If num_za_save_slices is zero, the subroutine does nothing. + ldrh w14, [x16, #8] + cbz x14, 1f + + // If za_save_buffer is NULL, the subroutine does nothing. + ldr x16, [x16] + cbz x16, 1f + + mov x15, xzr +0: + str za[w15,0], [x16] + addsvl x16, x16, #1 + add x15, x15, #1 + cmp x14, x15 + b.ne 0b +1: + ret +2: + b SYMBOL_NAME(do_abort) +END_COMPILERRT_OUTLINE_FUNCTION(__arm_tpidr2_save) + +DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_za_disable) + .variant_pcs __arm_tpidr2_restore + BTI_C + // If the current thread does not have access to SME, the subroutine does + // nothing. + adrp x14, TPIDR2_SYMBOL + ldrb w14, [x14, TPIDR2_SYMBOL_OFFSET] + cbz w14, 0f + + // Otherwise, the subroutine behaves as if it did the following: + // * Call __arm_tpidr2_save. + stp x29, x30, [sp, #-16]! + .cfi_def_cfa_offset 16 + mov x29, sp + .cfi_def_cfa w29, 16 + .cfi_offset w30, -8 + .cfi_offset w29, -16 + bl __arm_tpidr2_save + + // * Set TPIDR2_EL0 to null. + msr TPIDR2_EL0, xzr + + // * Set PSTATE.ZA to 0. + smstop za + + .cfi_def_cfa wsp, 16 + ldp x29, x30, [sp], #16 + .cfi_def_cfa_offset 0 + .cfi_restore w30 + .cfi_restore w29 +0: + ret +END_COMPILERRT_OUTLINE_FUNCTION(__arm_za_disable) + +NO_EXEC_STACK_DIRECTIVE + +// GNU property note for BTI and PAC +GNU_PROPERTY_BTI_PAC diff --git a/llvm/docs/AArch64SME.rst b/llvm/docs/AArch64SME.rst index 63573bf91eacb56686a63c363fa127c508120ce5..b5a01cb204b812bfa8aaa813f540910a8580a9db 100644 --- a/llvm/docs/AArch64SME.rst +++ b/llvm/docs/AArch64SME.rst @@ -22,26 +22,32 @@ Below we describe the LLVM IR attributes and their relation to the C/C++ level ACLE attributes: ``aarch64_pstate_sm_enabled`` - is used for functions with ``__attribute__((arm_streaming))`` + is used for functions with ``__arm_streaming`` ``aarch64_pstate_sm_compatible`` - is used for functions with ``__attribute__((arm_streaming_compatible))`` + is used for functions with ``__arm_streaming_compatible`` ``aarch64_pstate_sm_body`` - is used for functions with ``__attribute__((arm_locally_streaming))`` and is + is used for functions with ``__arm_locally_streaming`` and is only valid on function definitions (not declarations) -``aarch64_pstate_za_new`` - is used for functions with ``__attribute__((arm_new_za))`` +``aarch64_new_za`` + is used for functions with ``__arm_new("za")`` -``aarch64_pstate_za_shared`` - is used for functions with ``__attribute__((arm_shared_za))`` +``aarch64_in_za`` + is used for functions with ``__arm_in("za")`` -``aarch64_pstate_za_preserved`` - is used for functions with ``__attribute__((arm_preserves_za))`` +``aarch64_out_za`` + is used for functions with ``__arm_out("za")`` + +``aarch64_inout_za`` + is used for functions with ``__arm_inout("za")`` + +``aarch64_preserves_za`` + is used for functions with ``__arm_preserves("za")`` ``aarch64_expanded_pstate_za`` - is used for functions with ``__attribute__((arm_new_za))`` + is used for functions with ``__arm_new_za`` Clang must ensure that the above attributes are added both to the function's declaration/definition as well as to their call-sites. This is @@ -89,11 +95,10 @@ Restrictions on attributes * It is not allowed for a function to be decorated with both ``aarch64_pstate_sm_compatible`` and ``aarch64_pstate_sm_enabled``. -* It is not allowed for a function to be decorated with both - ``aarch64_pstate_za_new`` and ``aarch64_pstate_za_preserved``. - -* It is not allowed for a function to be decorated with both - ``aarch64_pstate_za_new`` and ``aarch64_pstate_za_shared``. +* It is not allowed for a function to be decorated with more than one of the + following attributes: + ``aarch64_new_za``, ``aarch64_in_za``, ``aarch64_out_za``, ``aarch64_inout_za``, + ``aarch64_preserves_za``. These restrictions also apply in the higher level SME ACLE, which means we can emit diagnostics in Clang to signal users about incorrect behaviour. @@ -426,7 +431,7 @@ to toggle PSTATE.ZA using intrinsics. This also makes it simpler to setup a lazy-save mechanism for calls to private-ZA functions (i.e. functions that may either directly or indirectly clobber ZA state). -For the purpose of handling functions marked with ``aarch64_pstate_za_new``, +For the purpose of handling functions marked with ``aarch64_new_za``, we have introduced a new LLVM IR pass (SMEABIPass) that is run just before SelectionDAG. Any such functions dealt with by this pass are marked with ``aarch64_expanded_pstate_za``. diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 701cd6e5dbb6f7ba26fd643b3c702a57ca0fd92c..2f4d01c4cddd67bfe1e8d80387eca5ccd3d53473 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -4991,10 +4991,13 @@ AArch64: offsets). (However, LLVM currently does this for the ``m`` constraint as well.) - ``r``: A 32 or 64-bit integer register (W* or X*). +- ``Uci``: Like r, but restricted to registers 8 to 11 inclusive. +- ``Ucj``: Like r, but restricted to registers 12 to 15 inclusive. - ``w``: A 32, 64, or 128-bit floating-point, SIMD or SVE vector register. - ``x``: Like w, but restricted to registers 0 to 15 inclusive. - ``y``: Like w, but restricted to SVE vector registers Z0 to Z7 inclusive. -- ``Upl``: One of the low eight SVE predicate registers (P0 to P7) +- ``Uph``: One of the upper eight SVE predicate registers (P8 to P15) +- ``Upl``: One of the lower eight SVE predicate registers (P0 to P7) - ``Upa``: Any of the SVE predicate registers (P0 to P15) AMDGPU: diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index 557063c8813268e4fba13df2e30b10634e206afa..21d4b9062206acf108e3bd995310b2c345936e94 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -2679,10 +2679,10 @@ let TargetPrefix = "aarch64" in { def int_aarch64_sme_st1q_vert : SME_Load_Store_Intrinsic; // Spill + fill - def int_aarch64_sme_ldr : DefaultAttrsIntrinsic< - [], [llvm_i32_ty, llvm_ptr_ty]>; - def int_aarch64_sme_str : DefaultAttrsIntrinsic< - [], [llvm_i32_ty, llvm_ptr_ty]>; + class SME_LDR_STR_ZA_Intrinsic + : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_ptr_ty, llvm_i32_ty]>; + def int_aarch64_sme_ldr : SME_LDR_STR_ZA_Intrinsic; + def int_aarch64_sme_str : SME_LDR_STR_ZA_Intrinsic; class SME_TileToVector_Intrinsic : DefaultAttrsIntrinsic<[llvm_anyvector_ty], @@ -3438,4 +3438,9 @@ let TargetPrefix = "aarch64" in { def int_aarch64_sve_sel_x2 : SVE2_VG2_Sel_Intrinsic; def int_aarch64_sve_sel_x4 : SVE2_VG4_Sel_Intrinsic; + class SME_LDR_STR_ZT_Intrinsic + : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_ptr_ty]>; + def int_aarch64_sme_ldr_zt : SME_LDR_STR_ZT_Intrinsic; + def int_aarch64_sme_str_zt : SME_LDR_STR_ZT_Intrinsic; + } diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index 1408ce293ca6544716a5e3cb4aaa03df2226d555..438b4f49c75a226fae4f6f83839a5856779d9ce1 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -2116,17 +2116,13 @@ void Verifier::verifyFunctionAttrs(FunctionType *FT, AttributeList Attrs, V); } - if (Attrs.hasFnAttr("aarch64_pstate_za_new")) { - Check(!Attrs.hasFnAttr("aarch64_pstate_za_preserved"), - "Attributes 'aarch64_pstate_za_new and aarch64_pstate_za_preserved' " - "are incompatible!", - V); - - Check(!Attrs.hasFnAttr("aarch64_pstate_za_shared"), - "Attributes 'aarch64_pstate_za_new and aarch64_pstate_za_shared' " - "are incompatible!", - V); - } + Check((Attrs.hasFnAttr("aarch64_new_za") + Attrs.hasFnAttr("aarch64_in_za") + + Attrs.hasFnAttr("aarch64_inout_za") + + Attrs.hasFnAttr("aarch64_out_za") + + Attrs.hasFnAttr("aarch64_preserves_za")) <= 1, + "Attributes 'aarch64_new_za', 'aarch64_in_za', 'aarch64_out_za', " + "'aarch64_inout_za' and 'aarch64_preserves_za' are mutually exclusive", + V); if (Attrs.hasFnAttr(Attribute::JumpTable)) { const GlobalValue *GV = cast(V); diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp index 76f1cc782b248888d598a5ba3efc3a255f505745..b090efe3bb3411aa63cccc9e4f5ae00dea6e5896 100644 --- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -970,6 +970,8 @@ bool AArch64AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, RegClass = &AArch64::ZPRRegClass; } else if (AArch64::PPRRegClass.contains(Reg)) { RegClass = &AArch64::PPRRegClass; + } else if (AArch64::PNRRegClass.contains(Reg)) { + RegClass = &AArch64::PNRRegClass; } else { RegClass = &AArch64::FPR128RegClass; AltName = AArch64::vreg; diff --git a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp index dcb73ae2dce2b59a1010e8bdabb123b3e6851e52..fe392ddcf057da85d86376bd9a90902b123334ff 100644 --- a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp +++ b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp @@ -1000,10 +1000,12 @@ AArch64ExpandPseudo::expandCondSMToggle(MachineBasicBlock &MBB, // expected value for the callee (0 for a normal callee and 1 for a streaming // callee). auto PStateSM = MI.getOperand(2).getReg(); + auto TRI = MBB.getParent()->getSubtarget().getRegisterInfo(); + unsigned SMReg32 = TRI->getSubReg(PStateSM, AArch64::sub_32); bool IsStreamingCallee = MI.getOperand(3).getImm(); - unsigned Opc = IsStreamingCallee ? AArch64::TBZX : AArch64::TBNZX; + unsigned Opc = IsStreamingCallee ? AArch64::TBZW : AArch64::TBNZW; MachineInstrBuilder Tbx = - BuildMI(MBB, MBBI, DL, TII->get(Opc)).addReg(PStateSM).addImm(0); + BuildMI(MBB, MBBI, DL, TII->get(Opc)).addReg(SMReg32).addImm(0); // Split MBB and create two new blocks: // - MBB now contains all instructions before MSRcond_pstatesvcrImm1. @@ -1481,17 +1483,12 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB, NextMBBI = MBB.end(); // The NextMBBI iterator is invalidated. return true; } - case AArch64::OBSCURE_COPY: { - if (MI.getOperand(0).getReg() != MI.getOperand(1).getReg()) { - BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXrs)) - .add(MI.getOperand(0)) - .addReg(AArch64::XZR) - .add(MI.getOperand(1)) - .addImm(0); - } + case AArch64::COALESCER_BARRIER_FPR16: + case AArch64::COALESCER_BARRIER_FPR32: + case AArch64::COALESCER_BARRIER_FPR64: + case AArch64::COALESCER_BARRIER_FPR128: MI.eraseFromParent(); return true; - } } return false; } diff --git a/llvm/lib/Target/AArch64/AArch64FastISel.cpp b/llvm/lib/Target/AArch64/AArch64FastISel.cpp index 1ae3709e9588abbf3c4541b777902a596bdbd4b3..b2c46939e58469ac1508c1fabf92e072fe8bf3b5 100644 --- a/llvm/lib/Target/AArch64/AArch64FastISel.cpp +++ b/llvm/lib/Target/AArch64/AArch64FastISel.cpp @@ -5187,8 +5187,8 @@ FastISel *AArch64::createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) { SMEAttrs CallerAttrs(*FuncInfo.Fn); - if (CallerAttrs.hasZAState() || - (!CallerAttrs.hasStreamingInterface() && CallerAttrs.hasStreamingBody())) + if (CallerAttrs.hasZAState() || CallerAttrs.hasStreamingInterfaceOrBody() || + CallerAttrs.hasStreamingCompatibleInterface()) return nullptr; return new AArch64FastISel(FuncInfo, LibInfo); } diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index 4d5676f341017270c556407795a2b5b209b0692e..e8071bd7171e0bcea937df69c2317e498d14a8f5 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -427,6 +427,7 @@ bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const { bool AArch64FrameLowering::hasFP(const MachineFunction &MF) const { const MachineFrameInfo &MFI = MF.getFrameInfo(); const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); + // Win64 EH requires a frame pointer if funclets are present, as the locals // are accessed off the frame pointer in both the parent function and the // funclets. @@ -2977,11 +2978,6 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters( return MIB->getIterator(); }; - // SVE objects are always restored in reverse order. - for (const RegPairInfo &RPI : reverse(RegPairs)) - if (RPI.isScalable()) - EmitMI(RPI); - if (homogeneousPrologEpilog(MF, &MBB)) { auto MIB = BuildMI(MBB, MBBI, DL, TII.get(AArch64::HOM_Epilog)) .setMIFlag(MachineInstr::FrameDestroy); @@ -2992,11 +2988,19 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters( return true; } + // For performance reasons restore SVE register in increasing order + auto IsPPR = [](const RegPairInfo &c) { return c.Type == RegPairInfo::PPR; }; + auto PPRBegin = std::find_if(RegPairs.begin(), RegPairs.end(), IsPPR); + auto PPREnd = std::find_if_not(PPRBegin, RegPairs.end(), IsPPR); + std::reverse(PPRBegin, PPREnd); + auto IsZPR = [](const RegPairInfo &c) { return c.Type == RegPairInfo::ZPR; }; + auto ZPRBegin = std::find_if(RegPairs.begin(), RegPairs.end(), IsZPR); + auto ZPREnd = std::find_if_not(ZPRBegin, RegPairs.end(), IsZPR); + std::reverse(ZPRBegin, ZPREnd); + if (ReverseCSRRestoreSeq) { MachineBasicBlock::iterator First = MBB.end(); for (const RegPairInfo &RPI : reverse(RegPairs)) { - if (RPI.isScalable()) - continue; MachineBasicBlock::iterator It = EmitMI(RPI); if (First == MBB.end()) First = It; @@ -3005,8 +3009,6 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters( MBB.splice(MBBI, &MBB, First); } else { for (const RegPairInfo &RPI : RegPairs) { - if (RPI.isScalable()) - continue; (void)EmitMI(RPI); } } @@ -3252,6 +3254,12 @@ bool AArch64FrameLowering::assignCalleeSavedSpillSlots( bool AArch64FrameLowering::enableStackSlotScavenging( const MachineFunction &MF) const { const AArch64FunctionInfo *AFI = MF.getInfo(); + // If the function has streaming-mode changes, don't scavenge a + // spillslot in the callee-save area, as that might require an + // 'addvl' in the streaming-mode-changing call-sequence when the + // function doesn't use a FP. + if (AFI->hasStreamingModeChanges() && !hasFP(MF)) + return false; return AFI->hasCalleeSaveStackFreeSpace(); } diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 0cc5e7fc5cc3398e1de637aa298e699adbf1d790..b7c857b72a54552c9394c9194490743c29dcc0a0 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -31,6 +31,7 @@ #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/ObjCARCUtil.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/VectorUtils.h" @@ -2276,7 +2277,7 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { switch ((AArch64ISD::NodeType)Opcode) { case AArch64ISD::FIRST_NUMBER: break; - MAKE_CASE(AArch64ISD::OBSCURE_COPY) + MAKE_CASE(AArch64ISD::COALESCER_BARRIER) MAKE_CASE(AArch64ISD::SMSTART) MAKE_CASE(AArch64ISD::SMSTOP) MAKE_CASE(AArch64ISD::RESTORE_ZA) @@ -2347,6 +2348,8 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { MAKE_CASE(AArch64ISD::FCMP) MAKE_CASE(AArch64ISD::STRICT_FCMP) MAKE_CASE(AArch64ISD::STRICT_FCMPE) + MAKE_CASE(AArch64ISD::SME_ZA_LDR) + MAKE_CASE(AArch64ISD::SME_ZA_STR) MAKE_CASE(AArch64ISD::DUP) MAKE_CASE(AArch64ISD::DUPLANE8) MAKE_CASE(AArch64ISD::DUPLANE16) @@ -4759,17 +4762,9 @@ static SDValue getSVEPredicateBitCast(EVT VT, SDValue Op, SelectionDAG &DAG) { return DAG.getNode(ISD::AND, DL, VT, Reinterpret, Mask); } -SDValue AArch64TargetLowering::getPStateSM(SelectionDAG &DAG, SDValue Chain, - SMEAttrs Attrs, SDLoc DL, - EVT VT) const { - if (Attrs.hasStreamingInterfaceOrBody()) - return DAG.getConstant(1, DL, VT); - - if (Attrs.hasNonStreamingInterfaceAndBody()) - return DAG.getConstant(0, DL, VT); - - assert(Attrs.hasStreamingCompatibleInterface() && "Unexpected interface"); - +SDValue AArch64TargetLowering::getRuntimePStateSM(SelectionDAG &DAG, + SDValue Chain, SDLoc DL, + EVT VT) const { SDValue Callee = DAG.getExternalSymbol("__arm_sme_state", getPointerTy(DAG.getDataLayout())); Type *Int64Ty = Type::getInt64Ty(*DAG.getContext()); @@ -4785,15 +4780,88 @@ SDValue AArch64TargetLowering::getPStateSM(SelectionDAG &DAG, SDValue Chain, Mask); } -static std::optional getCalleeAttrsFromExternalFunction(SDValue V) { - if (auto *ES = dyn_cast(V)) { - StringRef S(ES->getSymbol()); - if (S == "__arm_sme_state" || S == "__arm_tpidr2_save") - return SMEAttrs(SMEAttrs::SM_Compatible | SMEAttrs::ZA_Preserved); - if (S == "__arm_tpidr2_restore") - return SMEAttrs(SMEAttrs::SM_Compatible | SMEAttrs::ZA_Shared); +// Lower an SME LDR/STR ZA intrinsic +// Case 1: If the vector number (vecnum) is an immediate in range, it gets +// folded into the instruction +// ldr(%tileslice, %ptr, 11) -> ldr [%tileslice, 11], [%ptr, 11] +// Case 2: If the vecnum is not an immediate, then it is used to modify the base +// and tile slice registers +// ldr(%tileslice, %ptr, %vecnum) +// -> +// %svl = rdsvl +// %ptr2 = %ptr + %svl * %vecnum +// %tileslice2 = %tileslice + %vecnum +// ldr [%tileslice2, 0], [%ptr2, 0] +// Case 3: If the vecnum is an immediate out of range, then the same is done as +// case 2, but the base and slice registers are modified by the greatest +// multiple of 15 lower than the vecnum and the remainder is folded into the +// instruction. This means that successive loads and stores that are offset from +// each other can share the same base and slice register updates. +// ldr(%tileslice, %ptr, 22) +// ldr(%tileslice, %ptr, 23) +// -> +// %svl = rdsvl +// %ptr2 = %ptr + %svl * 15 +// %tileslice2 = %tileslice + 15 +// ldr [%tileslice2, 7], [%ptr2, 7] +// ldr [%tileslice2, 8], [%ptr2, 8] +// Case 4: If the vecnum is an add of an immediate, then the non-immediate +// operand and the immediate can be folded into the instruction, like case 2. +// ldr(%tileslice, %ptr, %vecnum + 7) +// ldr(%tileslice, %ptr, %vecnum + 8) +// -> +// %svl = rdsvl +// %ptr2 = %ptr + %svl * %vecnum +// %tileslice2 = %tileslice + %vecnum +// ldr [%tileslice2, 7], [%ptr2, 7] +// ldr [%tileslice2, 8], [%ptr2, 8] +// Case 5: The vecnum being an add of an immediate out of range is also handled, +// in which case the same remainder logic as case 3 is used. +SDValue LowerSMELdrStr(SDValue N, SelectionDAG &DAG, bool IsLoad) { + SDLoc DL(N); + + SDValue TileSlice = N->getOperand(2); + SDValue Base = N->getOperand(3); + SDValue VecNum = N->getOperand(4); + int32_t ConstAddend = 0; + SDValue VarAddend = VecNum; + + // If the vnum is an add of an immediate, we can fold it into the instruction + if (VecNum.getOpcode() == ISD::ADD && + isa(VecNum.getOperand(1))) { + ConstAddend = cast(VecNum.getOperand(1))->getSExtValue(); + VarAddend = VecNum.getOperand(0); + } else if (auto ImmNode = dyn_cast(VecNum)) { + ConstAddend = ImmNode->getSExtValue(); + VarAddend = SDValue(); + } + + int32_t ImmAddend = ConstAddend % 16; + if (int32_t C = (ConstAddend - ImmAddend)) { + SDValue CVal = DAG.getTargetConstant(C, DL, MVT::i32); + VarAddend = VarAddend + ? DAG.getNode(ISD::ADD, DL, MVT::i32, {VarAddend, CVal}) + : CVal; + } + + if (VarAddend) { + // Get the vector length that will be multiplied by vnum + auto SVL = DAG.getNode(AArch64ISD::RDSVL, DL, MVT::i64, + DAG.getConstant(1, DL, MVT::i32)); + + // Multiply SVL and vnum then add it to the base + SDValue Mul = DAG.getNode( + ISD::MUL, DL, MVT::i64, + {SVL, DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, VarAddend)}); + Base = DAG.getNode(ISD::ADD, DL, MVT::i64, {Base, Mul}); + // Just add vnum to the tileslice + TileSlice = DAG.getNode(ISD::ADD, DL, MVT::i32, {TileSlice, VarAddend}); } - return std::nullopt; + + return DAG.getNode(IsLoad ? AArch64ISD::SME_ZA_LDR : AArch64ISD::SME_ZA_STR, + DL, MVT::Other, + {/*Chain=*/N.getOperand(0), TileSlice, Base, + DAG.getTargetConstant(ImmAddend, DL, MVT::i32)}); } SDValue AArch64TargetLowering::LowerINTRINSIC_VOID(SDValue Op, @@ -4819,6 +4887,10 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_VOID(SDValue Op, return DAG.getNode(AArch64ISD::PREFETCH, DL, MVT::Other, Chain, DAG.getTargetConstant(PrfOp, DL, MVT::i32), Addr); } + case Intrinsic::aarch64_sme_str: + case Intrinsic::aarch64_sme_ldr: { + return LowerSMELdrStr(Op, DAG, IntNo == Intrinsic::aarch64_sme_ldr); + } case Intrinsic::aarch64_sme_za_enable: return DAG.getNode( AArch64ISD::SMSTART, DL, MVT::Other, @@ -6283,9 +6355,25 @@ AArch64TargetLowering::allocateLazySaveBuffer(SDValue &Chain, const SDLoc &DL, DAG.getTargetLoweringInfo().getFrameIndexTy(DAG.getDataLayout())); Chain = DAG.getStore(Chain, DL, Buffer, Ptr, MPI); + // Set the reserved bytes (10-15) to zero + EVT PtrTy = Ptr.getValueType(); + SDValue ReservedPtr = + DAG.getNode(ISD::ADD, DL, PtrTy, Ptr, DAG.getConstant(10, DL, PtrTy)); + Chain = DAG.getStore(Chain, DL, DAG.getConstant(0, DL, MVT::i16), ReservedPtr, + MPI); + ReservedPtr = + DAG.getNode(ISD::ADD, DL, PtrTy, Ptr, DAG.getConstant(12, DL, PtrTy)); + Chain = DAG.getStore(Chain, DL, DAG.getConstant(0, DL, MVT::i32), ReservedPtr, + MPI); + return TPIDR2Obj; } +static bool isPassedInFPR(EVT VT) { + return VT.isFixedLengthVector() || + (VT.isFloatingPoint() && !VT.isScalableVector()); +} + SDValue AArch64TargetLowering::LowerFormalArguments( SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, const SDLoc &DL, @@ -6420,6 +6508,13 @@ SDValue AArch64TargetLowering::LowerFormalArguments( // This will be the new Chain/Root node. ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegVT, Glue); Glue = ArgValue.getValue(2); + if (isPassedInFPR(ArgValue.getValueType())) { + ArgValue = + DAG.getNode(AArch64ISD::COALESCER_BARRIER, DL, + DAG.getVTList(ArgValue.getValueType(), MVT::Glue), + {ArgValue, Glue}); + Glue = ArgValue.getValue(1); + } } else ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegVT); @@ -6579,12 +6674,19 @@ SDValue AArch64TargetLowering::LowerFormalArguments( // make sure it is Glued to the last CopyFromReg value. if (IsLocallyStreaming) { const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo(); - Chain = DAG.getNode( - AArch64ISD::SMSTART, DL, DAG.getVTList(MVT::Other, MVT::Glue), - {DAG.getRoot(), - DAG.getTargetConstant((int32_t)AArch64SVCR::SVCRSM, DL, MVT::i32), - DAG.getConstant(0, DL, MVT::i64), DAG.getConstant(1, DL, MVT::i64), - DAG.getRegisterMask(TRI->getSMStartStopCallPreservedMask()), Glue}); + SDValue PStateSM; + if (Attrs.hasStreamingCompatibleInterface()) { + PStateSM = getRuntimePStateSM(DAG, Chain, DL, MVT::i64); + Register Reg = MF.getRegInfo().createVirtualRegister( + getRegClassFor(PStateSM.getValueType().getSimpleVT())); + FuncInfo->setPStateSMReg(Reg); + Chain = DAG.getCopyToReg(Chain, DL, Reg, PStateSM); + } else { + PStateSM = DAG.getConstant(0, DL, MVT::i64); + } + Chain = changeStreamingMode(DAG, DL, /*Enable*/ true, Chain, Glue, PStateSM, + /*Entry*/ true); + // Ensure that the SMSTART happens after the CopyWithChain such that its // chain result is used. for (unsigned I=0; I &RVLocs, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl &InVals, bool isThisReturn, - SDValue ThisVal) const { + SDValue ThisVal, bool RequiresSMChange) const { DenseMap CopiedRegs; // Copy all of the result registers out of their specified physreg. for (unsigned i = 0; i != RVLocs.size(); ++i) { @@ -6830,6 +6932,10 @@ SDValue AArch64TargetLowering::LowerCallResult( break; } + if (RequiresSMChange && isPassedInFPR(VA.getValVT())) + Val = DAG.getNode(AArch64ISD::COALESCER_BARRIER, DL, Val.getValueType(), + Val); + InVals.push_back(Val); } @@ -6926,7 +7032,8 @@ bool AArch64TargetLowering::isEligibleForTailCallOptimization( SMEAttrs CallerAttrs(MF.getFunction()); auto CalleeAttrs = CLI.CB ? SMEAttrs(*CLI.CB) : SMEAttrs(SMEAttrs::Normal); if (CallerAttrs.requiresSMChange(CalleeAttrs) || - CallerAttrs.requiresLazySave(CalleeAttrs)) + CallerAttrs.requiresLazySave(CalleeAttrs) || + CallerAttrs.hasStreamingBody()) return false; // Functions using the C or Fast calling convention that have an SVE signature @@ -7109,9 +7216,55 @@ static bool checkZExtBool(SDValue Arg, const SelectionDAG &DAG) { return ZExtBool; } +void AArch64TargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI, + SDNode *Node) const { + // Live-in physreg copies that are glued to SMSTART are applied as + // implicit-def's in the InstrEmitter. Here we remove them, allowing the + // register allocator to pass call args in callee saved regs, without extra + // copies to avoid these fake clobbers of actually-preserved GPRs. + if (MI.getOpcode() == AArch64::MSRpstatesvcrImm1 || + MI.getOpcode() == AArch64::MSRpstatePseudo) { + for (unsigned I = MI.getNumOperands() - 1; I > 0; --I) + if (MachineOperand &MO = MI.getOperand(I); + MO.isReg() && MO.isImplicit() && MO.isDef() && + (AArch64::GPR32RegClass.contains(MO.getReg()) || + AArch64::GPR64RegClass.contains(MO.getReg()))) + MI.removeOperand(I); + + // The SVE vector length can change when entering/leaving streaming mode. + if (MI.getOperand(0).getImm() == AArch64SVCR::SVCRSM || + MI.getOperand(0).getImm() == AArch64SVCR::SVCRSMZA) { + MI.addOperand(MachineOperand::CreateReg(AArch64::VG, /*IsDef=*/false, + /*IsImplicit=*/true)); + MI.addOperand(MachineOperand::CreateReg(AArch64::VG, /*IsDef=*/true, + /*IsImplicit=*/true)); + } + } + + // Add an implicit use of 'VG' for ADDXri/SUBXri, which are instructions that + // have nothing to do with VG, were it not that they are used to materialise a + // frame-address. If they contain a frame-index to a scalable vector, this + // will likely require an ADDVL instruction to materialise the address, thus + // reading VG. + const MachineFunction &MF = *MI.getMF(); + if (MF.getInfo()->hasStreamingModeChanges() && + (MI.getOpcode() == AArch64::ADDXri || + MI.getOpcode() == AArch64::SUBXri)) { + const MachineOperand &MO = MI.getOperand(1); + if (MO.isFI() && MF.getFrameInfo().getStackID(MO.getIndex()) == + TargetStackID::ScalableVector) + MI.addOperand(MachineOperand::CreateReg(AArch64::VG, /*IsDef=*/false, + /*IsImplicit=*/true)); + } +} + SDValue AArch64TargetLowering::changeStreamingMode( SelectionDAG &DAG, SDLoc DL, bool Enable, SDValue Chain, SDValue InGlue, SDValue PStateSM, bool Entry) const { + MachineFunction &MF = DAG.getMachineFunction(); + AArch64FunctionInfo *FuncInfo = MF.getInfo(); + FuncInfo->setHasStreamingModeChanges(true); + const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo(); SDValue RegMask = DAG.getRegisterMask(TRI->getSMStartStopCallPreservedMask()); SDValue MSROp = @@ -7258,39 +7411,68 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, SMEAttrs CalleeAttrs, CallerAttrs(MF.getFunction()); if (CLI.CB) CalleeAttrs = SMEAttrs(*CLI.CB); - else if (std::optional Attrs = - getCalleeAttrsFromExternalFunction(CLI.Callee)) - CalleeAttrs = *Attrs; + else if (auto *ES = dyn_cast(CLI.Callee)) + CalleeAttrs = SMEAttrs(ES->getSymbol()); + + auto DescribeCallsite = + [&](OptimizationRemarkAnalysis &R) -> OptimizationRemarkAnalysis & { + R << "call from '" << ore::NV("Caller", MF.getName()) << "' to '"; + if (auto *ES = dyn_cast(CLI.Callee)) + R << ore::NV("Callee", ES->getSymbol()); + else if (CLI.CB && CLI.CB->getCalledFunction()) + R << ore::NV("Callee", CLI.CB->getCalledFunction()->getName()); + else + R << "unknown callee"; + R << "'"; + return R; + }; bool RequiresLazySave = CallerAttrs.requiresLazySave(CalleeAttrs); - - MachineFrameInfo &MFI = MF.getFrameInfo(); if (RequiresLazySave) { - // Set up a lazy save mechanism by storing the runtime live slices - // (worst-case N*N) to the TPIDR2 stack object. - SDValue N = DAG.getNode(AArch64ISD::RDSVL, DL, MVT::i64, - DAG.getConstant(1, DL, MVT::i32)); - SDValue NN = DAG.getNode(ISD::MUL, DL, MVT::i64, N, N); unsigned TPIDR2Obj = FuncInfo->getLazySaveTPIDR2Obj(); - MachinePointerInfo MPI = MachinePointerInfo::getStack(MF, TPIDR2Obj); SDValue TPIDR2ObjAddr = DAG.getFrameIndex(TPIDR2Obj, DAG.getTargetLoweringInfo().getFrameIndexTy(DAG.getDataLayout())); - SDValue BufferPtrAddr = + SDValue NumZaSaveSlicesAddr = DAG.getNode(ISD::ADD, DL, TPIDR2ObjAddr.getValueType(), TPIDR2ObjAddr, DAG.getConstant(8, DL, TPIDR2ObjAddr.getValueType())); - Chain = DAG.getTruncStore(Chain, DL, NN, BufferPtrAddr, MPI, MVT::i16); + SDValue NumZaSaveSlices = DAG.getNode(AArch64ISD::RDSVL, DL, MVT::i64, + DAG.getConstant(1, DL, MVT::i32)); + Chain = DAG.getTruncStore(Chain, DL, NumZaSaveSlices, NumZaSaveSlicesAddr, + MPI, MVT::i16); Chain = DAG.getNode( ISD::INTRINSIC_VOID, DL, MVT::Other, Chain, DAG.getConstant(Intrinsic::aarch64_sme_set_tpidr2, DL, MVT::i32), TPIDR2ObjAddr); + OptimizationRemarkEmitter ORE(&MF.getFunction()); + ORE.emit([&]() { + auto R = CLI.CB ? OptimizationRemarkAnalysis("sme", "SMELazySaveZA", + CLI.CB) + : OptimizationRemarkAnalysis("sme", "SMELazySaveZA", + &MF.getFunction()); + return DescribeCallsite(R) << " sets up a lazy save for ZA"; + }); } SDValue PStateSM; - std::optional RequiresSMChange = - CallerAttrs.requiresSMChange(CalleeAttrs); - if (RequiresSMChange) - PStateSM = getPStateSM(DAG, Chain, CallerAttrs, DL, MVT::i64); + bool RequiresSMChange = CallerAttrs.requiresSMChange(CalleeAttrs); + if (RequiresSMChange) { + if (CallerAttrs.hasStreamingInterfaceOrBody()) + PStateSM = DAG.getConstant(1, DL, MVT::i64); + else if (CallerAttrs.hasNonStreamingInterface()) + PStateSM = DAG.getConstant(0, DL, MVT::i64); + else + PStateSM = getRuntimePStateSM(DAG, Chain, DL, MVT::i64); + OptimizationRemarkEmitter ORE(&MF.getFunction()); + ORE.emit([&]() { + auto R = CLI.CB ? OptimizationRemarkAnalysis("sme", "SMETransition", + CLI.CB) + : OptimizationRemarkAnalysis("sme", "SMETransition", + &MF.getFunction()); + DescribeCallsite(R) << " requires a streaming mode transition"; + return R; + }); + } // Adjust the stack pointer for the new arguments... // These operations are automatically eliminated by the prolog/epilog pass @@ -7386,6 +7568,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, Type *Ty = EVT(VA.getValVT()).getTypeForEVT(*DAG.getContext()); Align Alignment = DAG.getDataLayout().getPrefTypeAlign(Ty); + MachineFrameInfo &MFI = MF.getFrameInfo(); int FI = MFI.CreateStackObject(StoreSize, Alignment, false); if (isScalable) MFI.setStackID(FI, TargetStackID::ScalableVector); @@ -7459,8 +7642,9 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, // Add an extra level of indirection for streaming mode changes by // using a pseudo copy node that cannot be rematerialised between a // smstart/smstop and the call by the simple register coalescer. - if (RequiresSMChange && isa(Arg)) - Arg = DAG.getNode(AArch64ISD::OBSCURE_COPY, DL, MVT::i64, Arg); + if (RequiresSMChange && isPassedInFPR(Arg.getValueType())) + Arg = DAG.getNode(AArch64ISD::COALESCER_BARRIER, DL, + Arg.getValueType(), Arg); RegsToPass.emplace_back(VA.getLocReg(), Arg); RegsUsed.insert(VA.getLocReg()); const TargetOptions &Options = DAG.getTarget().Options; @@ -7551,8 +7735,9 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, SDValue InGlue; if (RequiresSMChange) { - SDValue NewChain = changeStreamingMode(DAG, DL, *RequiresSMChange, Chain, - InGlue, PStateSM, true); + SDValue NewChain = + changeStreamingMode(DAG, DL, CalleeAttrs.hasStreamingInterface(), Chain, + InGlue, PStateSM, true); Chain = NewChain.getValue(0); InGlue = NewChain.getValue(1); } @@ -7693,17 +7878,17 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, // Handle result values, copying them out of physregs into vregs that we // return. - SDValue Result = LowerCallResult(Chain, InGlue, CallConv, IsVarArg, RVLocs, - DL, DAG, InVals, IsThisReturn, - IsThisReturn ? OutVals[0] : SDValue()); + SDValue Result = LowerCallResult( + Chain, InGlue, CallConv, IsVarArg, RVLocs, DL, DAG, InVals, IsThisReturn, + IsThisReturn ? OutVals[0] : SDValue(), RequiresSMChange); if (!Ins.empty()) InGlue = Result.getValue(Result->getNumValues() - 1); if (RequiresSMChange) { assert(PStateSM && "Expected a PStateSM to be set"); - Result = changeStreamingMode(DAG, DL, !*RequiresSMChange, Result, InGlue, - PStateSM, false); + Result = changeStreamingMode(DAG, DL, !CalleeAttrs.hasStreamingInterface(), + Result, InGlue, PStateSM, false); } if (RequiresLazySave) { @@ -7729,12 +7914,10 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, SDValue TPIDR2Block = DAG.getFrameIndex( FI, DAG.getTargetLoweringInfo().getFrameIndexTy(DAG.getDataLayout())); Result = DAG.getCopyToReg(Result, DL, AArch64::X0, TPIDR2Block, Glue); - Result = DAG.getNode(AArch64ISD::RESTORE_ZA, DL, MVT::Other, - {Result, TPIDR2_EL0, - DAG.getRegister(AArch64::X0, MVT::i64), - RestoreRoutine, - RegMask, - Result.getValue(1)}); + Result = + DAG.getNode(AArch64ISD::RESTORE_ZA, DL, MVT::Other, + {Result, TPIDR2_EL0, DAG.getRegister(AArch64::X0, MVT::i64), + RestoreRoutine, RegMask, Result.getValue(1)}); // Finally reset the TPIDR2_EL0 register to 0. Result = DAG.getNode( @@ -7838,16 +8021,26 @@ AArch64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, // Emit SMSTOP before returning from a locally streaming function SMEAttrs FuncAttrs(MF.getFunction()); if (FuncAttrs.hasStreamingBody() && !FuncAttrs.hasStreamingInterface()) { - Chain = DAG.getNode( - AArch64ISD::SMSTOP, DL, DAG.getVTList(MVT::Other, MVT::Glue), Chain, - DAG.getTargetConstant((int32_t)AArch64SVCR::SVCRSM, DL, MVT::i32), - DAG.getConstant(1, DL, MVT::i64), DAG.getConstant(0, DL, MVT::i64), - DAG.getRegisterMask(TRI->getSMStartStopCallPreservedMask())); + if (FuncAttrs.hasStreamingCompatibleInterface()) { + Register Reg = FuncInfo->getPStateSMReg(); + assert(Reg.isValid() && "PStateSM Register is invalid"); + SDValue PStateSM = DAG.getCopyFromReg(Chain, DL, Reg, MVT::i64); + Chain = + changeStreamingMode(DAG, DL, /*Enable*/ false, Chain, + /*Glue*/ SDValue(), PStateSM, /*Entry*/ false); + } else + Chain = changeStreamingMode( + DAG, DL, /*Enable*/ false, Chain, + /*Glue*/ SDValue(), DAG.getConstant(1, DL, MVT::i64), /*Entry*/ true); Glue = Chain.getValue(1); } SmallVector RetOps(1, Chain); for (auto &RetVal : RetVals) { + if (FuncAttrs.hasStreamingBody() && !FuncAttrs.hasStreamingInterface() && + isPassedInFPR(RetVal.second.getValueType())) + RetVal.second = DAG.getNode(AArch64ISD::COALESCER_BARRIER, DL, + RetVal.second.getValueType(), RetVal.second); Chain = DAG.getCopyToReg(Chain, DL, RetVal.first, RetVal.second, Glue); Glue = Chain.getValue(1); RetOps.push_back( @@ -9950,19 +10143,60 @@ const char *AArch64TargetLowering::LowerXConstraint(EVT ConstraintVT) const { return "r"; } -enum PredicateConstraint { - Upl, - Upa, - Invalid -}; +enum class PredicateConstraint { Uph, Upl, Upa }; + +static std::optional +parsePredicateConstraint(StringRef Constraint) { + return StringSwitch>(Constraint) + .Case("Uph", PredicateConstraint::Uph) + .Case("Upl", PredicateConstraint::Upl) + .Case("Upa", PredicateConstraint::Upa) + .Default(std::nullopt); +} + +static const TargetRegisterClass * +getPredicateRegisterClass(PredicateConstraint Constraint, EVT VT) { + if (VT != MVT::aarch64svcount && + (!VT.isScalableVector() || VT.getVectorElementType() != MVT::i1)) + return nullptr; -static PredicateConstraint parsePredicateConstraint(StringRef Constraint) { - PredicateConstraint P = PredicateConstraint::Invalid; - if (Constraint == "Upa") - P = PredicateConstraint::Upa; - if (Constraint == "Upl") - P = PredicateConstraint::Upl; - return P; + switch (Constraint) { + case PredicateConstraint::Uph: + return VT == MVT::aarch64svcount ? &AArch64::PNR_p8to15RegClass + : &AArch64::PPR_p8to15RegClass; + case PredicateConstraint::Upl: + return VT == MVT::aarch64svcount ? nullptr : &AArch64::PPR_3bRegClass; + case PredicateConstraint::Upa: + return VT == MVT::aarch64svcount ? &AArch64::PNRRegClass + : &AArch64::PPRRegClass; + } + + llvm_unreachable("Missing PredicateConstraint!"); +} + +enum class ReducedGprConstraint { Uci, Ucj }; + +static std::optional +parseReducedGprConstraint(StringRef Constraint) { + return StringSwitch>(Constraint) + .Case("Uci", ReducedGprConstraint::Uci) + .Case("Ucj", ReducedGprConstraint::Ucj) + .Default(std::nullopt); +} + +static const TargetRegisterClass * +getReducedGprRegisterClass(ReducedGprConstraint Constraint, EVT VT) { + if (!VT.isScalarInteger() || VT.getFixedSizeInBits() > 64) + return nullptr; + + switch (Constraint) { + case ReducedGprConstraint::Uci: + return &AArch64::MatrixIndexGPR32_8_11RegClass; + case ReducedGprConstraint::Ucj: + return &AArch64::MatrixIndexGPR32_12_15RegClass; + } + + llvm_unreachable("Missing ReducedGprConstraint!"); } // The set of cc code supported is from @@ -10060,9 +10294,10 @@ AArch64TargetLowering::getConstraintType(StringRef Constraint) const { case 'S': // A symbolic address return C_Other; } - } else if (parsePredicateConstraint(Constraint) != - PredicateConstraint::Invalid) - return C_RegisterClass; + } else if (parsePredicateConstraint(Constraint)) + return C_RegisterClass; + else if (parseReducedGprConstraint(Constraint)) + return C_RegisterClass; else if (parseConstraintCode(Constraint) != AArch64CC::Invalid) return C_Other; return TargetLowering::getConstraintType(Constraint); @@ -10096,7 +10331,8 @@ AArch64TargetLowering::getSingleConstraintMatchWeight( weight = CW_Constant; break; case 'U': - if (parsePredicateConstraint(constraint) != PredicateConstraint::Invalid) + if (parsePredicateConstraint(constraint) || + parseReducedGprConstraint(constraint)) weight = CW_Register; break; } @@ -10153,19 +10389,26 @@ AArch64TargetLowering::getRegForInlineAsmConstraint( break; } } else { - PredicateConstraint PC = parsePredicateConstraint(Constraint); - if (PC != PredicateConstraint::Invalid) { - if (!VT.isScalableVector() || VT.getVectorElementType() != MVT::i1) - return std::make_pair(0U, nullptr); - bool restricted = (PC == PredicateConstraint::Upl); - return restricted ? std::make_pair(0U, &AArch64::PPR_3bRegClass) - : std::make_pair(0U, &AArch64::PPRRegClass); - } + if (const auto PC = parsePredicateConstraint(Constraint)) + if (const auto *RegClass = getPredicateRegisterClass(*PC, VT)) + return std::make_pair(0U, RegClass); + + if (const auto RGC = parseReducedGprConstraint(Constraint)) + if (const auto *RegClass = getReducedGprRegisterClass(*RGC, VT)) + return std::make_pair(0U, RegClass); } if (StringRef("{cc}").equals_insensitive(Constraint) || parseConstraintCode(Constraint) != AArch64CC::Invalid) return std::make_pair(unsigned(AArch64::NZCV), &AArch64::CCRRegClass); + if (Constraint == "{za}") { + return std::make_pair(unsigned(AArch64::ZA), &AArch64::MPRRegClass); + } + + if (Constraint == "{zt0}") { + return std::make_pair(unsigned(AArch64::ZT0), &AArch64::ZTRRegClass); + } + // Use the default implementation in TargetLowering to convert the register // constraint into a member of a register class. std::pair Res; @@ -16555,7 +16798,8 @@ static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG, // conversion, use a fp load instead and a AdvSIMD scalar {S|U}CVTF instead. // This eliminates an "integer-to-vector-move" UOP and improves throughput. SDValue N0 = N->getOperand(0); - if (Subtarget->hasNEON() && ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && + if (Subtarget->isNeonAvailable() && ISD::isNormalLoad(N0.getNode()) && + N0.hasOneUse() && // Do not change the width of a volatile load. !cast(N0)->isVolatile()) { LoadSDNode *LN0 = cast(N0); @@ -24749,8 +24993,7 @@ bool AArch64TargetLowering::fallBackToDAGISel(const Instruction &Inst) const { if (auto *Base = dyn_cast(&Inst)) { auto CallerAttrs = SMEAttrs(*Inst.getFunction()); auto CalleeAttrs = SMEAttrs(*Base); - if (CallerAttrs.requiresSMChange(CalleeAttrs, - /*BodyOverridesInterface=*/false) || + if (CallerAttrs.requiresSMChange(CalleeAttrs) || CallerAttrs.requiresLazySave(CalleeAttrs)) return true; } @@ -24931,7 +25174,15 @@ SDValue AArch64TargetLowering::LowerFixedLengthVectorMLoadToSVE( EVT VT = Op.getValueType(); EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT); - SDValue Mask = convertFixedMaskToScalableVector(Load->getMask(), DAG); + SDValue Mask = Load->getMask(); + // If this is an extending load and the mask type is not the same as + // load's type then we have to extend the mask type. + if (VT.getScalarSizeInBits() > Mask.getValueType().getScalarSizeInBits()) { + assert(Load->getExtensionType() != ISD::NON_EXTLOAD && + "Incorrect mask type"); + Mask = DAG.getNode(ISD::ANY_EXTEND, DL, VT, Mask); + } + Mask = convertFixedMaskToScalableVector(Mask, DAG); SDValue PassThru; bool IsPassThruZeroOrUndef = false; diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index aca45f113e736679e22961efba1668df7e17ef19..2f6dc303934d1c387d1ced799a9348580d5eef4e 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -58,13 +58,8 @@ enum NodeType : unsigned { CALL_BTI, // Function call followed by a BTI instruction. - // Essentially like a normal COPY that works on GPRs, but cannot be - // rematerialised by passes like the simple register coalescer. It's - // required for SME when lowering calls because we cannot allow frame - // index calculations using addvl to slip in between the smstart/smstop - // and the bl instruction. The scalable vector length may change across - // the smstart/smstop boundary. - OBSCURE_COPY, + COALESCER_BARRIER, + SMSTART, SMSTOP, RESTORE_ZA, @@ -444,6 +439,10 @@ enum NodeType : unsigned { STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE, STRICT_FCMPE, + // SME ZA loads and stores + SME_ZA_LDR, + SME_ZA_STR, + // NEON Load/Store with post-increment base updates LD2post = ISD::FIRST_TARGET_MEMORY_OPCODE, LD3post, @@ -963,6 +962,9 @@ private: const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl &InVals) const override; + void AdjustInstrPostInstrSelection(MachineInstr &MI, + SDNode *Node) const override; + SDValue LowerCall(CallLoweringInfo & /*CLI*/, SmallVectorImpl &InVals) const override; @@ -971,7 +973,7 @@ private: const SmallVectorImpl &RVLocs, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl &InVals, bool isThisReturn, - SDValue ThisVal) const; + SDValue ThisVal, bool RequiresSMChange) const; SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; @@ -1238,10 +1240,10 @@ private: // This function does not handle predicate bitcasts. SDValue getSVESafeBitCast(EVT VT, SDValue Op, SelectionDAG &DAG) const; - // Returns the runtime value for PSTATE.SM. When the function is streaming- - // compatible, this generates a call to __arm_sme_state. - SDValue getPStateSM(SelectionDAG &DAG, SDValue Chain, SMEAttrs Attrs, - SDLoc DL, EVT VT) const; + // Returns the runtime value for PSTATE.SM by generating a call to + // __arm_sme_state. + SDValue getRuntimePStateSM(SelectionDAG &DAG, SDValue Chain, SDLoc DL, + EVT VT) const; bool isConstantUnsignedBitfieldExtractLegal(unsigned Opc, LLT Ty1, LLT Ty2) const override; diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td index 39135df285c2382bc8c67dfc5736527e7febe2e0..b645f16d062a506f29238c6133f9b17e65226d44 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -1485,7 +1485,7 @@ def UImm3s8Operand : UImmScaledMemoryIndexed<3, 8>; def uimm3s8 : Operand, ImmLeaf= 0 && Imm <= 56 && ((Imm % 8) == 0); }], UImmS8XForm> { - let PrintMethod = "printVectorIndex<8>"; + let PrintMethod = "printMatrixIndex<8>"; let ParserMatchClass = UImm3s8Operand; } @@ -2670,6 +2670,7 @@ class AddSubImmShift lsl #0, '01' => lsl #12 let Inst{21-10} = imm{11-0}; let DecoderMethod = "DecodeAddSubImmShift"; + let hasPostISelHook = 1; } class BaseAddSubRegPseudo size, // ARMv8.2-A Dot Product Instructions (Vector): These instructions extract // bytes from S-sized elements. -class BaseSIMDThreeSameVectorDot sz, bits<4> opc, string asm, + string kind1, string kind2, RegisterOperand RegType, ValueType AccumType, ValueType InputType, SDPatternOperator OpNode> : - BaseSIMDThreeSameVectorTied { - def v8i8 : BaseSIMDThreeSameVectorDot<0, U, Mixed, asm, ".2s", ".8b", V64, + def v8i8 : BaseSIMDThreeSameVectorDot<0, U, 0b10, {0b001, Mixed}, asm, ".2s", ".8b", V64, v2i32, v8i8, OpNode>; - def v16i8 : BaseSIMDThreeSameVectorDot<1, U, Mixed, asm, ".4s", ".16b", V128, + def v16i8 : BaseSIMDThreeSameVectorDot<1, U, 0b10, {0b001, Mixed}, asm, ".4s", ".16b", V128, v4i32, v16i8, OpNode>; } @@ -8412,12 +8413,12 @@ class SIMDThreeSameVectorMatMul size, string asm, +class BaseSIMDThreeSameVectorIndexS size, bits<4> opc, string asm, string dst_kind, string lhs_kind, string rhs_kind, RegisterOperand RegType, ValueType AccumType, ValueType InputType, SDPatternOperator OpNode> : - BaseSIMDIndexedTied size, str multiclass SIMDThreeSameVectorDotIndex size, string asm, SDPatternOperator OpNode> { - def v8i8 : BaseSIMDThreeSameVectorDotIndex<0, U, Mixed, size, asm, ".2s", ".8b", ".4b", + def v8i8 : BaseSIMDThreeSameVectorIndexS<0, U, size, {0b111, Mixed}, asm, ".2s", ".8b", ".4b", V64, v2i32, v8i8, OpNode>; - def v16i8 : BaseSIMDThreeSameVectorDotIndex<1, U, Mixed, size, asm, ".4s", ".16b", ".4b", + def v16i8 : BaseSIMDThreeSameVectorIndexS<1, U, size, {0b111, Mixed}, asm, ".4s", ".16b", ".4b", V128, v4i32, v16i8, OpNode>; } // ARMv8.2-A Fused Multiply Add-Long Instructions (Indexed) let mayRaiseFPException = 1, Uses = [FPCR] in -class BaseSIMDThreeSameVectorFMLIndex opc, string asm, +class BaseSIMDThreeSameVectorIndexH sz, bits<4> opc, string asm, string dst_kind, string lhs_kind, string rhs_kind, RegisterOperand RegType, - ValueType AccumType, ValueType InputType, - SDPatternOperator OpNode> : - BaseSIMDIndexedTied : + BaseSIMDIndexedTied opc, string asm, multiclass SIMDThreeSameVectorFMLIndex opc, string asm, SDPatternOperator OpNode> { - def v4f16 : BaseSIMDThreeSameVectorFMLIndex<0, U, opc, asm, ".2s", ".2h", ".h", - V64, v2f32, v4f16, OpNode>; - def v8f16 : BaseSIMDThreeSameVectorFMLIndex<1, U, opc, asm, ".4s", ".4h", ".h", - V128, v4f32, v8f16, OpNode>; + def v4f16 : BaseSIMDThreeSameVectorIndexH<0, U, 0b10, opc, asm, ".2s", ".2h", ".h", + V64, V128_lo, v2f32, v4f16, OpNode>; + def v8f16 : BaseSIMDThreeSameVectorIndexH<1, U, 0b10, opc, asm, ".4s", ".4h", ".h", + V128, V128_lo, v4f32, v8f16, OpNode>; } multiclass SIMDFPIndexed opc, string asm, diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 0691e07a639beee77f8096e2cdb1f70e259a0e03..e12dfeca8e24286b2b657761024eb053afe9c3d5 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -3580,6 +3580,39 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, return; } + if (AArch64::PNRRegClass.contains(DestReg) && + AArch64::PPRRegClass.contains(SrcReg)) { + assert((Subtarget.hasSVE2p1() || Subtarget.hasSME2()) && + "Unexpected predicate-as-counter register."); + // Copy from pX to pnX is a no-op + if ((DestReg.id() - AArch64::PN0) == (SrcReg.id() - AArch64::P0)) + return; + MCRegister PPRDestReg = (DestReg - AArch64::PN0) + AArch64::P0; + BuildMI(MBB, I, DL, get(AArch64::ORR_PPzPP), PPRDestReg) + .addReg(SrcReg) + .addReg(SrcReg) + .addReg(SrcReg, getKillRegState(KillSrc)) + .addDef(DestReg, RegState::Implicit); + return; + } + + if (AArch64::PPRRegClass.contains(DestReg) && + AArch64::PNRRegClass.contains(SrcReg)) { + assert((Subtarget.hasSVE2p1() || Subtarget.hasSME2()) && + "Unexpected predicate-as-counter register."); + // Copy from pnX to pX is a no-op + if ((DestReg.id() - AArch64::P0) == (SrcReg.id() - AArch64::PN0)) + return; + MCRegister PNRDestReg = (DestReg - AArch64::P0) + AArch64::PN0; + MCRegister PPRSrcReg = (SrcReg - AArch64::PN0) + AArch64::P0; + BuildMI(MBB, I, DL, get(AArch64::ORR_PPzPP), DestReg) + .addReg(PPRSrcReg) + .addReg(PPRSrcReg) + .addReg(PPRSrcReg, getKillRegState(KillSrc)) + .addDef(PNRDestReg, RegState::Implicit); + return; + } + // Copy a Z register by ORRing with itself. if (AArch64::ZPRRegClass.contains(DestReg) && AArch64::ZPRRegClass.contains(SrcReg)) { @@ -3869,6 +3902,7 @@ void AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, MFI.getObjectSize(FI), MFI.getObjectAlign(FI)); unsigned Opc = 0; bool Offset = true; + MCRegister PNRReg = MCRegister::NoRegister; unsigned StackID = TargetStackID::Default; switch (TRI->getSpillSize(*RC)) { case 1: @@ -3882,6 +3916,12 @@ void AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, assert(Subtarget.hasSVE() && "Unexpected register store without SVE"); Opc = AArch64::STR_PXI; StackID = TargetStackID::ScalableVector; + } else if (AArch64::PNRRegClass.hasSubClassEq(RC)) { + assert((Subtarget.hasSVE2p1() || Subtarget.hasSME2()) && + "Unexpected register store without SVE2p1 or SME2"); + SrcReg = (SrcReg - AArch64::PN0) + AArch64::P0; + Opc = AArch64::STR_PXI; + StackID = TargetStackID::ScalableVector; } break; case 4: @@ -3982,6 +4022,8 @@ void AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, if (Offset) MI.addImm(0); + if (PNRReg.isValid()) + MI.addDef(PNRReg, RegState::Implicit); MI.addMemOperand(MMO); } @@ -4026,6 +4068,7 @@ void AArch64InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, unsigned Opc = 0; bool Offset = true; unsigned StackID = TargetStackID::Default; + MCRegister PNRReg = MCRegister::NoRegister; switch (TRI->getSpillSize(*RC)) { case 1: if (AArch64::FPR8RegClass.hasSubClassEq(RC)) @@ -4038,6 +4081,13 @@ void AArch64InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, assert(Subtarget.hasSVE() && "Unexpected register load without SVE"); Opc = AArch64::LDR_PXI; StackID = TargetStackID::ScalableVector; + } else if (AArch64::PNRRegClass.hasSubClassEq(RC)) { + assert((Subtarget.hasSVE2p1() || Subtarget.hasSME2()) && + "Unexpected register load without SVE2p1 or SME2"); + PNRReg = DestReg; + DestReg = (DestReg - AArch64::PN0) + AArch64::P0; + Opc = AArch64::LDR_PXI; + StackID = TargetStackID::ScalableVector; } break; case 4: @@ -4138,6 +4188,8 @@ void AArch64InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, .addFrameIndex(FI); if (Offset) MI.addImm(0); + if (PNRReg.isValid()) + MI.addDef(PNRReg, RegState::Implicit); MI.addMemOperand(MMO); } diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 27a7e26c5e188a1d0894e006b38f8dc281fb4b76..8adceb9ffd1536e38d2a7ae55ecd70dff0ba3640 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -1198,7 +1198,7 @@ defm USDOTlane : SIMDThreeSameVectorDotIndex<0, 1, 0b10, "usdot", int_aarch64_ne class BaseSIMDSUDOTIndex - : BaseSIMDThreeSameVectorDotIndex { let Pattern = [(set (AccumType RegType:$dst), diff --git a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h index d82fb436925ec6fa2bd1b7f61bb73b9caba5dc29..7d841cdc22dfe38ff7ddc770a0932f37b99f4e9c 100644 --- a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h +++ b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h @@ -185,6 +185,8 @@ class AArch64FunctionInfo final : public MachineFunctionInfo { /// The frame-index for the TPIDR2 object used for lazy saves. Register LazySaveTPIDR2Obj = 0; + /// Whether this function changes streaming mode within the function. + bool HasStreamingModeChanges = false; /// True if the function need unwind information. mutable std::optional NeedsDwarfUnwindInfo; @@ -192,6 +194,10 @@ class AArch64FunctionInfo final : public MachineFunctionInfo { /// True if the function need asynchronous unwind information. mutable std::optional NeedsAsyncDwarfUnwindInfo; + // Holds a register containing pstate.sm. This is set + // on function entry to record the initial pstate of a function. + Register PStateSMReg = MCRegister::NoRegister; + public: AArch64FunctionInfo(const Function &F, const AArch64Subtarget *STI); @@ -200,6 +206,9 @@ public: const DenseMap &Src2DstMBB) const override; + Register getPStateSMReg() const { return PStateSMReg; }; + void setPStateSMReg(Register Reg) { PStateSMReg = Reg; }; + bool isSVECC() const { return IsSVECC; }; void setIsSVECC(bool s) { IsSVECC = s; }; @@ -447,6 +456,11 @@ public: bool needsDwarfUnwindInfo(const MachineFunction &MF) const; bool needsAsyncDwarfUnwindInfo(const MachineFunction &MF) const; + bool hasStreamingModeChanges() const { return HasStreamingModeChanges; } + void setHasStreamingModeChanges(bool HasChanges) { + HasStreamingModeChanges = HasChanges; + } + private: // Hold the lists of LOHs. MILOHContainer LOHContainerSet; diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp index d1ddf6d76975476e9ba53bea95ab73e4682591c2..465e4c5b8df58a38ec80a8c532788b92d57bc95c 100644 --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -440,6 +440,9 @@ AArch64RegisterInfo::getStrictlyReservedRegs(const MachineFunction &MF) const { Reserved.set(SubReg); } + // VG cannot be allocated + Reserved.set(AArch64::VG); + markSuperRegs(Reserved, AArch64::FPCR); assert(checkAllSuperRegsMarked(Reserved)); @@ -491,6 +494,10 @@ bool AArch64RegisterInfo::isAsmClobberable(const MachineFunction &MF, MCRegisterInfo::regsOverlap(PhysReg, AArch64::X16)) return true; + // ZA/ZT0 registers are reserved but may be permitted in the clobber list. + if (PhysReg == AArch64::ZA || PhysReg == AArch64::ZT0) + return true; + return !isReservedReg(MF, PhysReg); } @@ -987,6 +994,8 @@ bool AArch64RegisterInfo::shouldCoalesce( MachineInstr *MI, const TargetRegisterClass *SrcRC, unsigned SubReg, const TargetRegisterClass *DstRC, unsigned DstSubReg, const TargetRegisterClass *NewRC, LiveIntervals &LIS) const { + MachineRegisterInfo &MRI = MI->getMF()->getRegInfo(); + if (MI->isCopy() && ((DstRC->getID() == AArch64::GPR64RegClassID) || (DstRC->getID() == AArch64::GPR64commonRegClassID)) && @@ -995,5 +1004,38 @@ bool AArch64RegisterInfo::shouldCoalesce( // which implements a 32 to 64 bit zero extension // which relies on the upper 32 bits being zeroed. return false; + + auto IsCoalescerBarrier = [](const MachineInstr &MI) { + switch (MI.getOpcode()) { + case AArch64::COALESCER_BARRIER_FPR16: + case AArch64::COALESCER_BARRIER_FPR32: + case AArch64::COALESCER_BARRIER_FPR64: + case AArch64::COALESCER_BARRIER_FPR128: + return true; + default: + return false; + } + }; + + // For calls that temporarily have to toggle streaming mode as part of the + // call-sequence, we need to be more careful when coalescing copy instructions + // so that we don't end up coalescing the NEON/FP result or argument register + // with a whole Z-register, such that after coalescing the register allocator + // will try to spill/reload the entire Z register. + // + // We do this by checking if the node has any defs/uses that are + // COALESCER_BARRIER pseudos. These are 'nops' in practice, but they exist to + // instruct the coalescer to avoid coalescing the copy. + if (MI->isCopy() && SubReg != DstSubReg && + (AArch64::ZPRRegClass.hasSubClassEq(DstRC) || + AArch64::ZPRRegClass.hasSubClassEq(SrcRC))) { + unsigned SrcReg = MI->getOperand(1).getReg(); + if (any_of(MRI.def_instructions(SrcReg), IsCoalescerBarrier)) + return false; + unsigned DstReg = MI->getOperand(0).getReg(); + if (any_of(MRI.use_nodbg_instructions(DstReg), IsCoalescerBarrier)) + return false; + } + return true; } diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td index 4bb1f9413f2ba8b8af6b634f24cefc17abad658c..eeb09389a1b8cbe0d7da030d5ab5df5e709e4bb1 100644 --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td @@ -55,6 +55,8 @@ let Namespace = "AArch64" in { def zasubd1 : SubRegIndex<256>; // (16 x 16)/8 bytes = 256 bits def zasubq0 : SubRegIndex<128>; // (16 x 16)/16 bytes = 128 bits def zasubq1 : SubRegIndex<128>; // (16 x 16)/16 bytes = 128 bits + + def psub : SubRegIndex<16>; } let Namespace = "AArch64" in { @@ -767,23 +769,43 @@ def GPR64x8 : RegisterOperand { //===----- END: v8.7a accelerator extension register operands -------------===// +// SVE predicate-as-counter registers + def PN0 : AArch64Reg<0, "pn0">, DwarfRegNum<[48]>; + def PN1 : AArch64Reg<1, "pn1">, DwarfRegNum<[49]>; + def PN2 : AArch64Reg<2, "pn2">, DwarfRegNum<[50]>; + def PN3 : AArch64Reg<3, "pn3">, DwarfRegNum<[51]>; + def PN4 : AArch64Reg<4, "pn4">, DwarfRegNum<[52]>; + def PN5 : AArch64Reg<5, "pn5">, DwarfRegNum<[53]>; + def PN6 : AArch64Reg<6, "pn6">, DwarfRegNum<[54]>; + def PN7 : AArch64Reg<7, "pn7">, DwarfRegNum<[55]>; + def PN8 : AArch64Reg<8, "pn8">, DwarfRegNum<[56]>; + def PN9 : AArch64Reg<9, "pn9">, DwarfRegNum<[57]>; + def PN10 : AArch64Reg<10, "pn10">, DwarfRegNum<[58]>; + def PN11 : AArch64Reg<11, "pn11">, DwarfRegNum<[59]>; + def PN12 : AArch64Reg<12, "pn12">, DwarfRegNum<[60]>; + def PN13 : AArch64Reg<13, "pn13">, DwarfRegNum<[61]>; + def PN14 : AArch64Reg<14, "pn14">, DwarfRegNum<[62]>; + def PN15 : AArch64Reg<15, "pn15">, DwarfRegNum<[63]>; + // SVE predicate registers -def P0 : AArch64Reg<0, "p0">, DwarfRegNum<[48]>; -def P1 : AArch64Reg<1, "p1">, DwarfRegNum<[49]>; -def P2 : AArch64Reg<2, "p2">, DwarfRegNum<[50]>; -def P3 : AArch64Reg<3, "p3">, DwarfRegNum<[51]>; -def P4 : AArch64Reg<4, "p4">, DwarfRegNum<[52]>; -def P5 : AArch64Reg<5, "p5">, DwarfRegNum<[53]>; -def P6 : AArch64Reg<6, "p6">, DwarfRegNum<[54]>; -def P7 : AArch64Reg<7, "p7">, DwarfRegNum<[55]>; -def P8 : AArch64Reg<8, "p8">, DwarfRegNum<[56]>; -def P9 : AArch64Reg<9, "p9">, DwarfRegNum<[57]>; -def P10 : AArch64Reg<10, "p10">, DwarfRegNum<[58]>; -def P11 : AArch64Reg<11, "p11">, DwarfRegNum<[59]>; -def P12 : AArch64Reg<12, "p12">, DwarfRegNum<[60]>; -def P13 : AArch64Reg<13, "p13">, DwarfRegNum<[61]>; -def P14 : AArch64Reg<14, "p14">, DwarfRegNum<[62]>; -def P15 : AArch64Reg<15, "p15">, DwarfRegNum<[63]>; +let SubRegIndices = [psub] in { + def P0 : AArch64Reg<0, "p0", [PN0]>, DwarfRegAlias; + def P1 : AArch64Reg<1, "p1", [PN1]>, DwarfRegAlias; + def P2 : AArch64Reg<2, "p2", [PN2]>, DwarfRegAlias; + def P3 : AArch64Reg<3, "p3", [PN3]>, DwarfRegAlias; + def P4 : AArch64Reg<4, "p4", [PN4]>, DwarfRegAlias; + def P5 : AArch64Reg<5, "p5", [PN5]>, DwarfRegAlias; + def P6 : AArch64Reg<6, "p6", [PN6]>, DwarfRegAlias; + def P7 : AArch64Reg<7, "p7", [PN7]>, DwarfRegAlias; + def P8 : AArch64Reg<8, "p8", [PN8]>, DwarfRegAlias; + def P9 : AArch64Reg<9, "p9", [PN9]>, DwarfRegAlias; + def P10 : AArch64Reg<10, "p10", [PN10]>, DwarfRegAlias; + def P11 : AArch64Reg<11, "p11", [PN11]>, DwarfRegAlias; + def P12 : AArch64Reg<12, "p12", [PN12]>, DwarfRegAlias; + def P13 : AArch64Reg<13, "p13", [PN13]>, DwarfRegAlias; + def P14 : AArch64Reg<14, "p14", [PN14]>, DwarfRegAlias; + def P15 : AArch64Reg<15, "p15", [PN15]>, DwarfRegAlias; +} // The part of SVE registers that don't overlap Neon registers. // These are only used as part of clobber lists. @@ -881,8 +903,6 @@ class SVERegOp : SVERegOp {} class ZPRRegOp : SVERegOp {} @@ -891,7 +911,7 @@ class ZPRRegOp : RegisterClass< "AArch64", - [ nxv16i1, nxv8i1, nxv4i1, nxv2i1, nxv1i1, aarch64svcount ], 16, + [ nxv16i1, nxv8i1, nxv4i1, nxv2i1, nxv1i1 ], 16, (sequence "P%u", firstreg, lastreg)> { let Size = 16; } @@ -909,69 +929,89 @@ class PPRAsmOperand : AsmOperandClass { let ParserMethod = "tryParseSVEPredicateVector"; } -def PPRAsmOpAny : PPRAsmOperand<"PredicateAny", "PPR", 0>; -def PPRAsmOp8 : PPRAsmOperand<"PredicateB", "PPR", 8>; -def PPRAsmOp16 : PPRAsmOperand<"PredicateH", "PPR", 16>; -def PPRAsmOp32 : PPRAsmOperand<"PredicateS", "PPR", 32>; -def PPRAsmOp64 : PPRAsmOperand<"PredicateD", "PPR", 64>; - -def PPRAny : PPRRegOp<"", PPRAsmOpAny, ElementSizeNone, PPR>; -def PPR8 : PPRRegOp<"b", PPRAsmOp8, ElementSizeB, PPR>; -def PPR16 : PPRRegOp<"h", PPRAsmOp16, ElementSizeH, PPR>; -def PPR32 : PPRRegOp<"s", PPRAsmOp32, ElementSizeS, PPR>; -def PPR64 : PPRRegOp<"d", PPRAsmOp64, ElementSizeD, PPR>; - +def PPRAsmOpAny : PPRAsmOperand<"PredicateAny", "PPR", 0>; +def PPRAsmOp8 : PPRAsmOperand<"PredicateB", "PPR", 8>; +def PPRAsmOp16 : PPRAsmOperand<"PredicateH", "PPR", 16>; +def PPRAsmOp32 : PPRAsmOperand<"PredicateS", "PPR", 32>; +def PPRAsmOp64 : PPRAsmOperand<"PredicateD", "PPR", 64>; def PPRAsmOp3bAny : PPRAsmOperand<"Predicate3bAny", "PPR_3b", 0>; +class PPRRegOp : SVERegOp {} + +def PPRAny : PPRRegOp<"", PPRAsmOpAny, ElementSizeNone, PPR>; +def PPR8 : PPRRegOp<"b", PPRAsmOp8, ElementSizeB, PPR>; +def PPR16 : PPRRegOp<"h", PPRAsmOp16, ElementSizeH, PPR>; +def PPR32 : PPRRegOp<"s", PPRAsmOp32, ElementSizeS, PPR>; +def PPR64 : PPRRegOp<"d", PPRAsmOp64, ElementSizeD, PPR>; def PPR3bAny : PPRRegOp<"", PPRAsmOp3bAny, ElementSizeNone, PPR_3b>; +class PNRClass : RegisterClass< + "AArch64", + [ aarch64svcount ], 16, + (sequence "PN%u", firstreg, lastreg)> { + let Size = 16; +} + +def PNR : PNRClass<0, 15>; +def PNR_p8to15 : PNRClass<8, 15>; // SVE predicate-as-counter operand -class PNRAsmOperand - : PPRAsmOperand { +class PNRAsmOperand: AsmOperandClass { + let Name = "SVE" # name # "Reg"; let PredicateMethod = "isSVEPredicateAsCounterRegOfWidth<" # Width # ", " # "AArch64::" # RegClass # "RegClassID>"; let DiagnosticType = "InvalidSVE" # name # "Reg"; + let RenderMethod = "addRegOperands"; let ParserMethod = "tryParseSVEPredicateVector"; } -class PNRRegOp - : PPRRegOp { - let PrintMethod = "printPredicateAsCounter<" # EltSize # ">"; +let RenderMethod = "addPNRasPPRRegOperands" in { + def PNRasPPROpAny : PNRAsmOperand<"PNRasPPRPredicateAny", "PNR", 0>; + def PNRasPPROp8 : PNRAsmOperand<"PNRasPPRPredicateB", "PNR", 8>; } -def PNRAsmOpAny: PNRAsmOperand<"PNPredicateAny", "PPR", 0>; -def PNRAsmOp8 : PNRAsmOperand<"PNPredicateB", "PPR", 8>; -def PNRAsmOp16 : PNRAsmOperand<"PNPredicateH", "PPR", 16>; -def PNRAsmOp32 : PNRAsmOperand<"PNPredicateS", "PPR", 32>; -def PNRAsmOp64 : PNRAsmOperand<"PNPredicateD", "PPR", 64>; - -def PNRAny : PNRRegOp<"", PNRAsmOpAny, 0, PPR>; -def PNR8 : PNRRegOp<"b", PNRAsmOp8, 8, PPR>; -def PNR16 : PNRRegOp<"h", PNRAsmOp16, 16, PPR>; -def PNR32 : PNRRegOp<"s", PNRAsmOp32, 32, PPR>; -def PNR64 : PNRRegOp<"d", PNRAsmOp64, 64, PPR>; - -class PNRP8to15RegOp - : PPRRegOp { - let PrintMethod = "printPredicateAsCounter<" # EltSize # ">"; - let EncoderMethod = "EncodePPR_p8to15"; - let DecoderMethod = "DecodePPR_p8to15RegisterClass"; -} - -def PNRAsmAny_p8to15 : PNRAsmOperand<"PNPredicateAny_p8to15", "PPR_p8to15", 0>; -def PNRAsmOp8_p8to15 : PNRAsmOperand<"PNPredicateB_p8to15", "PPR_p8to15", 8>; -def PNRAsmOp16_p8to15 : PNRAsmOperand<"PNPredicateH_p8to15", "PPR_p8to15", 16>; -def PNRAsmOp32_p8to15 : PNRAsmOperand<"PNPredicateS_p8to15", "PPR_p8to15", 32>; -def PNRAsmOp64_p8to15 : PNRAsmOperand<"PNPredicateD_p8to15", "PPR_p8to15", 64>; - -def PNRAny_p8to15 : PNRP8to15RegOp<"", PNRAsmAny_p8to15, 0, PPR_p8to15>; -def PNR8_p8to15 : PNRP8to15RegOp<"b", PNRAsmOp8_p8to15, 8, PPR_p8to15>; -def PNR16_p8to15 : PNRP8to15RegOp<"h", PNRAsmOp16_p8to15, 16, PPR_p8to15>; -def PNR32_p8to15 : PNRP8to15RegOp<"s", PNRAsmOp32_p8to15, 32, PPR_p8to15>; -def PNR64_p8to15 : PNRP8to15RegOp<"d", PNRAsmOp64_p8to15, 64, PPR_p8to15>; +class PNRasPPRRegOp : SVERegOp {} +def PNRasPPRAny : PNRasPPRRegOp<"", PNRasPPROpAny, ElementSizeNone, PPR>; +def PNRasPPR8 : PNRasPPRRegOp<"b", PNRasPPROp8, ElementSizeB, PPR>; + +def PNRAsmOpAny: PNRAsmOperand<"PNPredicateAny", "PNR", 0>; +def PNRAsmOp8 : PNRAsmOperand<"PNPredicateB", "PNR", 8>; +def PNRAsmOp16 : PNRAsmOperand<"PNPredicateH", "PNR", 16>; +def PNRAsmOp32 : PNRAsmOperand<"PNPredicateS", "PNR", 32>; +def PNRAsmOp64 : PNRAsmOperand<"PNPredicateD", "PNR", 64>; + +class PNRRegOp + : SVERegOp { + let PrintMethod = "printPredicateAsCounter<" # Size # ">"; +} +def PNRAny : PNRRegOp<"", PNRAsmOpAny, 0, PNR>; +def PNR8 : PNRRegOp<"b", PNRAsmOp8, 8, PNR>; +def PNR16 : PNRRegOp<"h", PNRAsmOp16, 16, PNR>; +def PNR32 : PNRRegOp<"s", PNRAsmOp32, 32, PNR>; +def PNR64 : PNRRegOp<"d", PNRAsmOp64, 64, PNR>; + +def PNRAsmAny_p8to15 : PNRAsmOperand<"PNPredicateAny_p8to15", "PNR_p8to15", 0>; +def PNRAsmOp8_p8to15 : PNRAsmOperand<"PNPredicateB_p8to15", "PNR_p8to15", 8>; +def PNRAsmOp16_p8to15 : PNRAsmOperand<"PNPredicateH_p8to15", "PNR_p8to15", 16>; +def PNRAsmOp32_p8to15 : PNRAsmOperand<"PNPredicateS_p8to15", "PNR_p8to15", 32>; +def PNRAsmOp64_p8to15 : PNRAsmOperand<"PNPredicateD_p8to15", "PNR_p8to15", 64>; + +class PNRP8to15RegOp + : SVERegOp { + let PrintMethod = "printPredicateAsCounter<" # Width # ">"; + let EncoderMethod = "EncodePNR_p8to15"; + let DecoderMethod = "DecodePNR_p8to15RegisterClass"; +} + +def PNRAny_p8to15 : PNRP8to15RegOp<"", PNRAsmAny_p8to15, 0, PNR_p8to15>; +def PNR8_p8to15 : PNRP8to15RegOp<"b", PNRAsmOp8_p8to15, 8, PNR_p8to15>; +def PNR16_p8to15 : PNRP8to15RegOp<"h", PNRAsmOp16_p8to15, 16, PNR_p8to15>; +def PNR32_p8to15 : PNRP8to15RegOp<"s", PNRAsmOp32_p8to15, 32, PNR_p8to15>; +def PNR64_p8to15 : PNRP8to15RegOp<"d", PNRAsmOp64_p8to15, 64, PNR_p8to15>; let Namespace = "AArch64" in { def psub0 : SubRegIndex<16, -1>; diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td index cabfe9def7c293e9a6254611f0a1902f4663cf8c..5b4070aaeea98a2c776e5aa46c6a9b68e6e45b32 100644 --- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td @@ -22,8 +22,8 @@ def AArch64_restore_za : SDNode<"AArch64ISD::RESTORE_ZA", SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisPtrTy<1>]>, [SDNPHasChain, SDNPSideEffect, SDNPVariadic, SDNPOptInGlue]>; - -def AArch64ObscureCopy : SDNode<"AArch64ISD::OBSCURE_COPY", SDTypeProfile<1, 1, []>, []>; +def AArch64CoalescerBarrier + : SDNode<"AArch64ISD::COALESCER_BARRIER", SDTypeProfile<1, 1, []>, [SDNPOptInGlue, SDNPOutGlue]>; //===----------------------------------------------------------------------===// // Instruction naming conventions. @@ -68,8 +68,8 @@ let Predicates = [HasSME] in { defm BFMOPA_MPPZZ : sme_bf16_outer_product<0b000, "bfmopa", int_aarch64_sme_mopa_wide>; defm BFMOPS_MPPZZ : sme_bf16_outer_product<0b001, "bfmops", int_aarch64_sme_mops_wide>; -defm FMOPA_MPPZZ_S : sme_outer_product_fp32<0b0, "fmopa", int_aarch64_sme_mopa>; -defm FMOPS_MPPZZ_S : sme_outer_product_fp32<0b1, "fmops", int_aarch64_sme_mops>; +defm FMOPA_MPPZZ_S : sme_outer_product_fp32<0b0, 0b00, ZPR32, "fmopa", int_aarch64_sme_mopa>; +defm FMOPS_MPPZZ_S : sme_outer_product_fp32<0b1, 0b00, ZPR32, "fmops", int_aarch64_sme_mops>; } let Predicates = [HasSMEF64F64] in { @@ -134,52 +134,6 @@ defm ZERO_M : sme_zero<"zero">; // Mode selection and state access instructions //===----------------------------------------------------------------------===// -// SME defines three pstate fields to set or clear PSTATE.SM, PSTATE.ZA, or -// both fields: -// -// MSR SVCRSM, # -// MSR SVCRZA, # -// MSR SVCRSMZA, # -// -// It's tricky to using the existing pstate operand defined in -// AArch64SystemOperands.td since it only encodes 5 bits including op1;op2, -// when these fields are also encoded in CRm[3:1]. -def MSRpstatesvcrImm1 - : PstateWriteSimple<(ins svcr_op:$pstatefield, timm0_1:$imm), "msr", - "\t$pstatefield, $imm">, - Sched<[WriteSys]> { - bits<3> pstatefield; - bit imm; - let Inst{18-16} = 0b011; // op1 - let Inst{11-9} = pstatefield; - let Inst{8} = imm; - let Inst{7-5} = 0b011; // op2 -} - -def : InstAlias<"smstart", (MSRpstatesvcrImm1 0b011, 0b1)>; -def : InstAlias<"smstart sm", (MSRpstatesvcrImm1 0b001, 0b1)>; -def : InstAlias<"smstart za", (MSRpstatesvcrImm1 0b010, 0b1)>; - -def : InstAlias<"smstop", (MSRpstatesvcrImm1 0b011, 0b0)>; -def : InstAlias<"smstop sm", (MSRpstatesvcrImm1 0b001, 0b0)>; -def : InstAlias<"smstop za", (MSRpstatesvcrImm1 0b010, 0b0)>; - - -// Pseudo to match to smstart/smstop. This expands: -// -// pseudonode (pstate_za|pstate_sm), before_call, expected_value -// -// Into: -// -// if (before_call != expected_value) -// node (pstate_za|pstate_sm) -// -// where node can be either 'smstart' or 'smstop'. -def MSRpstatePseudo : - Pseudo<(outs), - (ins svcr_op:$pstatefield, timm0_1:$imm, GPR64:$rtpstate, timm0_1:$expected_pstate, variable_ops), []>, - Sched<[WriteSys]>; - // Pseudo to conditionally restore ZA state. This expands: // // pseudonode tpidr2_el0, tpidr2obj, restore_routine @@ -226,52 +180,91 @@ def : Pat<(AArch64_smstart (i32 svcr_op:$pstate), (i64 0), (i64 1)), // before def : Pat<(AArch64_smstop (i32 svcr_op:$pstate), (i64 0), (i64 1)), // after call (MSRpstatesvcrImm1 svcr_op:$pstate, 0b0)>; -// The generic case which gets expanded to a pseudo node. -def : Pat<(AArch64_smstart (i32 svcr_op:$pstate), (i64 GPR64:$rtpstate), (i64 timm0_1:$expected_pstate)), - (MSRpstatePseudo svcr_op:$pstate, 0b1, GPR64:$rtpstate, timm0_1:$expected_pstate)>; -def : Pat<(AArch64_smstop (i32 svcr_op:$pstate), (i64 GPR64:$rtpstate), (i64 timm0_1:$expected_pstate)), - (MSRpstatePseudo svcr_op:$pstate, 0b0, GPR64:$rtpstate, timm0_1:$expected_pstate)>; - // Read and write TPIDR2_EL0 def : Pat<(int_aarch64_sme_set_tpidr2 i64:$val), (MSR 0xde85, GPR64:$val)>; def : Pat<(i64 (int_aarch64_sme_get_tpidr2)), (MRS 0xde85)>; -def OBSCURE_COPY : Pseudo<(outs GPR64:$dst), (ins GPR64:$idx), []>, Sched<[]> { } -def : Pat<(i64 (AArch64ObscureCopy (i64 GPR64:$idx))), - (OBSCURE_COPY GPR64:$idx)>; } // End let Predicates = [HasSME] +multiclass CoalescerBarrierPseudo vts> { + def NAME : Pseudo<(outs rc:$dst), (ins rc:$src), []>, Sched<[]> { + let Constraints = "$dst = $src"; + } + foreach vt = vts in { + def : Pat<(vt (AArch64CoalescerBarrier (vt rc:$src))), + (!cast(NAME) rc:$src)>; + } +} + +multiclass CoalescerBarriers { + defm _FPR16 : CoalescerBarrierPseudo; + defm _FPR32 : CoalescerBarrierPseudo; + defm _FPR64 : CoalescerBarrierPseudo; + defm _FPR128 : CoalescerBarrierPseudo; +} + +defm COALESCER_BARRIER : CoalescerBarriers; + +// Pseudo to match to smstart/smstop. This expands: +// +// pseudonode (pstate_za|pstate_sm), before_call, expected_value +// +// Into: +// +// if (before_call != expected_value) +// node (pstate_za|pstate_sm) +// +// where node can be either 'smstart' or 'smstop'. +// +// This pseudo and corresponding patterns don't need to be predicated by SME, +// because when they're emitted for streaming-compatible functions and run +// in a non-SME context the generated code-paths will never execute any +// SME instructions. +def MSRpstatePseudo : + Pseudo<(outs), + (ins svcr_op:$pstatefield, timm0_1:$imm, GPR64:$rtpstate, timm0_1:$expected_pstate, variable_ops), []>, + Sched<[WriteSys]> { + let hasPostISelHook = 1; + let Uses = [VG]; + let Defs = [VG]; +} + +def : Pat<(AArch64_smstart (i32 svcr_op:$pstate), (i64 GPR64:$rtpstate), (i64 timm0_1:$expected_pstate)), + (MSRpstatePseudo svcr_op:$pstate, 0b1, GPR64:$rtpstate, timm0_1:$expected_pstate)>; +def : Pat<(AArch64_smstop (i32 svcr_op:$pstate), (i64 GPR64:$rtpstate), (i64 timm0_1:$expected_pstate)), + (MSRpstatePseudo svcr_op:$pstate, 0b0, GPR64:$rtpstate, timm0_1:$expected_pstate)>; + //===----------------------------------------------------------------------===// // SME2 Instructions //===----------------------------------------------------------------------===// let Predicates = [HasSME2] in { defm ADD_VG2_M2ZZ_S : sme2_dot_mla_add_sub_array_vg2_single<"add", 0b0011010, MatrixOp32, ZZ_s, ZPR4b32, nxv4i32, int_aarch64_sme_add_write_single_za_vg1x2>; defm ADD_VG4_M4ZZ_S : sme2_dot_mla_add_sub_array_vg4_single<"add", 0b0111010, MatrixOp32, ZZZZ_s, ZPR4b32, nxv4i32, int_aarch64_sme_add_write_single_za_vg1x4>; -defm ADD_VG2_M2Z2Z_S : sme2_dot_mla_add_sub_array_vg2_multi<"add", 0b011010, MatrixOp32, ZZ_s_mul_r, nxv4i32, int_aarch64_sme_add_write_za_vg1x2>; -defm ADD_VG4_M4Z4Z_S : sme2_dot_mla_add_sub_array_vg4_multi<"add", 0b011010, MatrixOp32, ZZZZ_s_mul_r, nxv4i32, int_aarch64_sme_add_write_za_vg1x4>; +defm ADD_VG2_M2Z2Z_S : sme2_dot_mla_add_sub_array_vg2_multi<"add", 0b0110010, MatrixOp32, ZZ_s_mul_r, nxv4i32, int_aarch64_sme_add_write_za_vg1x2>; +defm ADD_VG4_M4Z4Z_S : sme2_dot_mla_add_sub_array_vg4_multi<"add", 0b0110010, MatrixOp32, ZZZZ_s_mul_r, nxv4i32, int_aarch64_sme_add_write_za_vg1x4>; defm ADD_VG2_2ZZ : sme2_int_sve_destructive_vector_vg2_single<"add", 0b0110000>; defm ADD_VG4_4ZZ : sme2_int_sve_destructive_vector_vg4_single<"add", 0b0110000>; defm SUB_VG2_M2ZZ_S : sme2_dot_mla_add_sub_array_vg2_single<"sub", 0b0011011, MatrixOp32, ZZ_s, ZPR4b32, nxv4i32, int_aarch64_sme_sub_write_single_za_vg1x2>; defm SUB_VG4_M4ZZ_S : sme2_dot_mla_add_sub_array_vg4_single<"sub", 0b0111011, MatrixOp32, ZZZZ_s, ZPR4b32, nxv4i32, int_aarch64_sme_sub_write_single_za_vg1x4>; -defm SUB_VG2_M2Z2Z_S : sme2_dot_mla_add_sub_array_vg2_multi<"sub", 0b011011, MatrixOp32, ZZ_s_mul_r, nxv4i32, int_aarch64_sme_sub_write_za_vg1x2>; -defm SUB_VG4_M4Z4Z_S : sme2_dot_mla_add_sub_array_vg4_multi<"sub", 0b011011, MatrixOp32, ZZZZ_s_mul_r, nxv4i32, int_aarch64_sme_sub_write_za_vg1x4>; +defm SUB_VG2_M2Z2Z_S : sme2_dot_mla_add_sub_array_vg2_multi<"sub", 0b0110011, MatrixOp32, ZZ_s_mul_r, nxv4i32, int_aarch64_sme_sub_write_za_vg1x2>; +defm SUB_VG4_M4Z4Z_S : sme2_dot_mla_add_sub_array_vg4_multi<"sub", 0b0110011, MatrixOp32, ZZZZ_s_mul_r, nxv4i32, int_aarch64_sme_sub_write_za_vg1x4>; defm FMLA_VG2_M2ZZ_S : sme2_dot_mla_add_sub_array_vg2_single<"fmla", 0b0011000, MatrixOp32, ZZ_s, ZPR4b32, nxv4f32, int_aarch64_sme_fmla_single_vg1x2>; defm FMLA_VG4_M4ZZ_S : sme2_dot_mla_add_sub_array_vg4_single<"fmla", 0b0111000, MatrixOp32, ZZZZ_s, ZPR4b32, nxv4f32, int_aarch64_sme_fmla_single_vg1x4>; -defm FMLA_VG2_M2Z2Z_S : sme2_dot_mla_add_sub_array_vg2_multi<"fmla", 0b011000, MatrixOp32, ZZ_s_mul_r, nxv4f32, int_aarch64_sme_fmla_vg1x2>; -defm FMLA_VG4_M4Z4Z_S : sme2_dot_mla_add_sub_array_vg4_multi<"fmla", 0b011000, MatrixOp32, ZZZZ_s_mul_r, nxv4f32, int_aarch64_sme_fmla_vg1x4>; -defm FMLA_VG2_M2ZZI_S : sme2_multi_vec_array_vg2_index_32b<"fmla", 0b0000, ZZ_s_mul_r, ZPR4b32, nxv4f32, int_aarch64_sme_fmla_lane_vg1x2>; +defm FMLA_VG2_M2Z2Z_S : sme2_dot_mla_add_sub_array_vg2_multi<"fmla", 0b0110000, MatrixOp32, ZZ_s_mul_r, nxv4f32, int_aarch64_sme_fmla_vg1x2>; +defm FMLA_VG4_M4Z4Z_S : sme2_dot_mla_add_sub_array_vg4_multi<"fmla", 0b0110000, MatrixOp32, ZZZZ_s_mul_r, nxv4f32, int_aarch64_sme_fmla_vg1x4>; +defm FMLA_VG2_M2ZZI_S : sme2_multi_vec_array_vg2_index_32b<"fmla", 0b01, 0b0000, ZZ_s_mul_r, ZPR4b32, nxv4f32, int_aarch64_sme_fmla_lane_vg1x2>; defm FMLA_VG4_M4ZZI_S : sme2_multi_vec_array_vg4_index_32b<"fmla", 0b0000, ZZZZ_s_mul_r, ZPR4b32, nxv4f32, int_aarch64_sme_fmla_lane_vg1x4>; defm FMLS_VG2_M2ZZ_S : sme2_dot_mla_add_sub_array_vg2_single<"fmls", 0b0011001, MatrixOp32, ZZ_s, ZPR4b32, nxv4f32, int_aarch64_sme_fmls_single_vg1x2>; defm FMLS_VG4_M4ZZ_S : sme2_dot_mla_add_sub_array_vg4_single<"fmls", 0b0111001, MatrixOp32, ZZZZ_s, ZPR4b32, nxv4f32, int_aarch64_sme_fmls_single_vg1x4>; -defm FMLS_VG2_M2Z2Z_S : sme2_dot_mla_add_sub_array_vg2_multi<"fmls", 0b011001, MatrixOp32, ZZ_s_mul_r, nxv4f32, int_aarch64_sme_fmls_vg1x2>; -defm FMLS_VG4_M4Z4Z_S : sme2_dot_mla_add_sub_array_vg4_multi<"fmls", 0b011001, MatrixOp32, ZZZZ_s_mul_r, nxv4f32, int_aarch64_sme_fmls_vg1x4>; -defm FMLS_VG2_M2ZZI_S : sme2_multi_vec_array_vg2_index_32b<"fmls", 0b0010, ZZ_s_mul_r, ZPR4b32, nxv4f32, int_aarch64_sme_fmls_lane_vg1x2>; +defm FMLS_VG2_M2Z2Z_S : sme2_dot_mla_add_sub_array_vg2_multi<"fmls", 0b0110001, MatrixOp32, ZZ_s_mul_r, nxv4f32, int_aarch64_sme_fmls_vg1x2>; +defm FMLS_VG4_M4Z4Z_S : sme2_dot_mla_add_sub_array_vg4_multi<"fmls", 0b0110001, MatrixOp32, ZZZZ_s_mul_r, nxv4f32, int_aarch64_sme_fmls_vg1x4>; +defm FMLS_VG2_M2ZZI_S : sme2_multi_vec_array_vg2_index_32b<"fmls", 0b01, 0b0010, ZZ_s_mul_r, ZPR4b32, nxv4f32, int_aarch64_sme_fmls_lane_vg1x2>; defm FMLS_VG4_M4ZZI_S : sme2_multi_vec_array_vg4_index_32b<"fmls", 0b0010, ZZZZ_s_mul_r, ZPR4b32, nxv4f32, int_aarch64_sme_fmls_lane_vg1x4>; defm ADD_VG2_M2Z_S : sme2_multivec_accum_add_sub_vg2<"add", 0b0010, MatrixOp32, ZZ_s_mul_r, nxv4i32, int_aarch64_sme_add_za32_vg1x2>; @@ -295,37 +288,37 @@ defm FMLAL_MZZI : sme2_mla_long_array_index<"fmlal", 0b10, 0b00, nxv8f16 defm FMLAL_VG2_M2ZZI : sme2_fp_mla_long_array_vg2_index<"fmlal", 0b00, nxv8f16, int_aarch64_sme_fmlal_lane_vg2x2>; defm FMLAL_VG4_M4ZZI : sme2_fp_mla_long_array_vg4_index<"fmlal", 0b00, nxv8f16, int_aarch64_sme_fmlal_lane_vg2x4>; defm FMLAL_MZZ : sme2_mla_long_array_single<"fmlal", 0b00, 0b00, nxv8f16, int_aarch64_sme_fmlal_single_vg2x1>; -defm FMLAL_VG2_M2ZZ : sme2_fp_mla_long_array_vg2_single<"fmlal", 0b00, nxv8f16, int_aarch64_sme_fmlal_single_vg2x2>; -defm FMLAL_VG4_M4ZZ : sme2_fp_mla_long_array_vg4_single<"fmlal", 0b00, nxv8f16, int_aarch64_sme_fmlal_single_vg2x4>; -defm FMLAL_VG2_M2Z2Z : sme2_fp_mla_long_array_vg2_multi<"fmlal", 0b00, nxv8f16, int_aarch64_sme_fmlal_vg2x2>; -defm FMLAL_VG4_M4Z4Z : sme2_fp_mla_long_array_vg4_multi<"fmlal", 0b00, nxv8f16, int_aarch64_sme_fmlal_vg2x4>; +defm FMLAL_VG2_M2ZZ_HtoS : sme2_fp_mla_long_array_vg2_single<"fmlal", 0b000, MatrixOp32, ZZ_h, ZPR4b16, nxv8f16, int_aarch64_sme_fmlal_single_vg2x2>; +defm FMLAL_VG4_M4ZZ_HtoS : sme2_fp_mla_long_array_vg4_single<"fmlal", 0b000, MatrixOp32, ZZZZ_h, ZPR4b16, nxv8f16, int_aarch64_sme_fmlal_single_vg2x4>; +defm FMLAL_VG2_M2Z2Z_HtoS : sme2_fp_mla_long_array_vg2_multi<"fmlal", 0b000, MatrixOp32, ZZ_h_mul_r, nxv8f16, int_aarch64_sme_fmlal_vg2x2>; +defm FMLAL_VG4_M4Z4Z_HtoS : sme2_fp_mla_long_array_vg4_multi<"fmlal", 0b000, MatrixOp32, ZZZZ_h_mul_r, nxv8f16, int_aarch64_sme_fmlal_vg2x4>; defm FMLSL_MZZI : sme2_mla_long_array_index<"fmlsl", 0b10, 0b01, nxv8f16, int_aarch64_sme_fmlsl_lane_vg2x1>; defm FMLSL_VG2_M2ZZI : sme2_fp_mla_long_array_vg2_index<"fmlsl", 0b01, nxv8f16, int_aarch64_sme_fmlsl_lane_vg2x2>; defm FMLSL_VG4_M4ZZI : sme2_fp_mla_long_array_vg4_index<"fmlsl", 0b01, nxv8f16, int_aarch64_sme_fmlsl_lane_vg2x4>; defm FMLSL_MZZ : sme2_mla_long_array_single<"fmlsl", 0b00, 0b01, nxv8f16, int_aarch64_sme_fmlsl_single_vg2x1>; -defm FMLSL_VG2_M2ZZ : sme2_fp_mla_long_array_vg2_single<"fmlsl", 0b01, nxv8f16, int_aarch64_sme_fmlsl_single_vg2x2>; -defm FMLSL_VG4_M4ZZ : sme2_fp_mla_long_array_vg4_single<"fmlsl", 0b01, nxv8f16, int_aarch64_sme_fmlsl_single_vg2x4>; -defm FMLSL_VG2_M2Z2Z : sme2_fp_mla_long_array_vg2_multi<"fmlsl", 0b01, nxv8f16, int_aarch64_sme_fmlsl_vg2x2>; -defm FMLSL_VG4_M4Z4Z : sme2_fp_mla_long_array_vg4_multi<"fmlsl", 0b01, nxv8f16, int_aarch64_sme_fmlsl_vg2x4>; +defm FMLSL_VG2_M2ZZ_HtoS : sme2_fp_mla_long_array_vg2_single<"fmlsl", 0b010, MatrixOp32, ZZ_h, ZPR4b16, nxv8f16, int_aarch64_sme_fmlsl_single_vg2x2>; +defm FMLSL_VG4_M4ZZ_HtoS : sme2_fp_mla_long_array_vg4_single<"fmlsl", 0b010, MatrixOp32, ZZZZ_h, ZPR4b16, nxv8f16, int_aarch64_sme_fmlsl_single_vg2x4>; +defm FMLSL_VG2_M2Z2Z_HtoS : sme2_fp_mla_long_array_vg2_multi<"fmlsl", 0b001, MatrixOp32, ZZ_h_mul_r, nxv8f16, int_aarch64_sme_fmlsl_vg2x2>; +defm FMLSL_VG4_M4Z4Z_HtoS : sme2_fp_mla_long_array_vg4_multi<"fmlsl", 0b001, MatrixOp32, ZZZZ_h_mul_r, nxv8f16, int_aarch64_sme_fmlsl_vg2x4>; defm BFMLAL_MZZI : sme2_mla_long_array_index<"bfmlal", 0b10, 0b10, nxv8bf16, int_aarch64_sme_fmlal_lane_vg2x1>; defm BFMLAL_VG2_M2ZZI : sme2_fp_mla_long_array_vg2_index<"bfmlal", 0b10, nxv8bf16, int_aarch64_sme_fmlal_lane_vg2x2>; defm BFMLAL_VG4_M4ZZI : sme2_fp_mla_long_array_vg4_index<"bfmlal", 0b10, nxv8bf16, int_aarch64_sme_fmlal_lane_vg2x4>; defm BFMLAL_MZZ : sme2_mla_long_array_single<"bfmlal", 0b00, 0b10, nxv8bf16, int_aarch64_sme_fmlal_single_vg2x1>; -defm BFMLAL_VG2_M2ZZ : sme2_fp_mla_long_array_vg2_single<"bfmlal", 0b10, nxv8bf16, int_aarch64_sme_fmlal_single_vg2x2>; -defm BFMLAL_VG4_M4ZZ : sme2_fp_mla_long_array_vg4_single<"bfmlal", 0b10, nxv8bf16, int_aarch64_sme_fmlal_single_vg2x4>; -defm BFMLAL_VG2_M2Z2Z : sme2_fp_mla_long_array_vg2_multi<"bfmlal", 0b10, nxv8bf16, int_aarch64_sme_fmlal_vg2x2>; -defm BFMLAL_VG4_M4Z4Z : sme2_fp_mla_long_array_vg4_multi<"bfmlal", 0b10, nxv8bf16, int_aarch64_sme_fmlal_vg2x4>; +defm BFMLAL_VG2_M2ZZ_HtoS : sme2_fp_mla_long_array_vg2_single<"bfmlal", 0b100, MatrixOp32, ZZ_h, ZPR4b16, nxv8bf16, int_aarch64_sme_fmlal_single_vg2x2>; +defm BFMLAL_VG4_M4ZZ_HtoS : sme2_fp_mla_long_array_vg4_single<"bfmlal", 0b100, MatrixOp32, ZZZZ_h, ZPR4b16, nxv8bf16, int_aarch64_sme_fmlal_single_vg2x4>; +defm BFMLAL_VG2_M2Z2Z_HtoS : sme2_fp_mla_long_array_vg2_multi<"bfmlal", 0b010, MatrixOp32, ZZ_h_mul_r, nxv8bf16, int_aarch64_sme_fmlal_vg2x2>; +defm BFMLAL_VG4_M4Z4Z_HtoS : sme2_fp_mla_long_array_vg4_multi<"bfmlal", 0b010, MatrixOp32, ZZZZ_h_mul_r, nxv8bf16, int_aarch64_sme_fmlal_vg2x4>; defm BFMLSL_MZZI : sme2_mla_long_array_index<"bfmlsl", 0b10, 0b11, nxv8bf16, int_aarch64_sme_fmlsl_lane_vg2x1>; defm BFMLSL_VG2_M2ZZI : sme2_fp_mla_long_array_vg2_index<"bfmlsl", 0b11, nxv8bf16, int_aarch64_sme_fmlsl_lane_vg2x2>; defm BFMLSL_VG4_M4ZZI : sme2_fp_mla_long_array_vg4_index<"bfmlsl", 0b11, nxv8bf16, int_aarch64_sme_fmlsl_lane_vg2x4>; defm BFMLSL_MZZ : sme2_mla_long_array_single<"bfmlsl", 0b00, 0b11, nxv8bf16, int_aarch64_sme_fmlsl_single_vg2x1>; -defm BFMLSL_VG2_M2ZZ : sme2_fp_mla_long_array_vg2_single<"bfmlsl", 0b11, nxv8bf16, int_aarch64_sme_fmlsl_single_vg2x2>; -defm BFMLSL_VG4_M4ZZ : sme2_fp_mla_long_array_vg4_single<"bfmlsl", 0b11, nxv8bf16, int_aarch64_sme_fmlsl_single_vg2x4>; -defm BFMLSL_VG2_M2Z2Z : sme2_fp_mla_long_array_vg2_multi<"bfmlsl", 0b11, nxv8bf16, int_aarch64_sme_fmlsl_vg2x2>; -defm BFMLSL_VG4_M4Z4Z : sme2_fp_mla_long_array_vg4_multi<"bfmlsl", 0b11, nxv8bf16, int_aarch64_sme_fmlsl_vg2x4>; +defm BFMLSL_VG2_M2ZZ_HtoS : sme2_fp_mla_long_array_vg2_single<"bfmlsl", 0b110, MatrixOp32, ZZ_h, ZPR4b16, nxv8bf16, int_aarch64_sme_fmlsl_single_vg2x2>; +defm BFMLSL_VG4_M4ZZ_HtoS : sme2_fp_mla_long_array_vg4_single<"bfmlsl", 0b110, MatrixOp32, ZZZZ_h, ZPR4b16, nxv8bf16, int_aarch64_sme_fmlsl_single_vg2x4>; +defm BFMLSL_VG2_M2Z2Z_HtoS : sme2_fp_mla_long_array_vg2_multi<"bfmlsl", 0b011, MatrixOp32, ZZ_h_mul_r, nxv8bf16, int_aarch64_sme_fmlsl_vg2x2>; +defm BFMLSL_VG4_M4Z4Z_HtoS : sme2_fp_mla_long_array_vg4_multi<"bfmlsl", 0b011, MatrixOp32, ZZZZ_h_mul_r, nxv8bf16, int_aarch64_sme_fmlsl_vg2x4>; defm SMLAL_MZZI : sme2_mla_long_array_index<"smlal", 0b11, 0b00, nxv8i16, int_aarch64_sme_smlal_lane_vg2x1>; defm SMLAL_VG2_M2ZZI : sme2_int_mla_long_array_vg2_index<"smlal", 0b00, int_aarch64_sme_smlal_lane_vg2x2>; @@ -446,122 +439,122 @@ defm SCLAMP_VG4_4Z4Z : sme2_int_clamp_vector_vg4_multi<"sclamp", 0b0>; defm UCLAMP_VG2_2Z2Z : sme2_int_clamp_vector_vg2_multi<"uclamp", 0b1>; defm UCLAMP_VG4_4Z4Z : sme2_int_clamp_vector_vg4_multi<"uclamp", 0b1>; -defm FDOT_VG2_M2ZZI_HtoS : sme2_multi_vec_array_vg2_index_32b<"fdot", 0b1001, ZZ_h_mul_r, ZPR4b16, nxv8f16, int_aarch64_sme_fdot_lane_za32_vg1x2>; +defm FDOT_VG2_M2ZZI_HtoS : sme2_multi_vec_array_vg2_index_32b<"fdot", 0b01, 0b1001, ZZ_h_mul_r, ZPR4b16, nxv8f16, int_aarch64_sme_fdot_lane_za32_vg1x2>; defm FDOT_VG4_M4ZZI_HtoS : sme2_multi_vec_array_vg4_index_32b<"fdot", 0b1001, ZZZZ_h_mul_r, ZPR4b16, nxv8f16, int_aarch64_sme_fdot_lane_za32_vg1x4>; defm FDOT_VG2_M2ZZ_HtoS : sme2_dot_mla_add_sub_array_vg2_single<"fdot", 0b0010000, MatrixOp32, ZZ_h, ZPR4b16, nxv8f16, int_aarch64_sme_fdot_single_za32_vg1x2>; defm FDOT_VG4_M4ZZ_HtoS : sme2_dot_mla_add_sub_array_vg4_single<"fdot", 0b0110000, MatrixOp32, ZZZZ_h, ZPR4b16, nxv8f16, int_aarch64_sme_fdot_single_za32_vg1x4>; -defm FDOT_VG2_M2Z2Z_HtoS : sme2_dot_mla_add_sub_array_vg2_multi<"fdot", 0b010000, MatrixOp32, ZZ_h_mul_r, nxv8f16, int_aarch64_sme_fdot_za32_vg1x2>; -defm FDOT_VG4_M4Z4Z_HtoS : sme2_dot_mla_add_sub_array_vg4_multi<"fdot", 0b010000, MatrixOp32, ZZZZ_h_mul_r, nxv8f16, int_aarch64_sme_fdot_za32_vg1x4>; +defm FDOT_VG2_M2Z2Z_HtoS : sme2_dot_mla_add_sub_array_vg2_multi<"fdot", 0b0100000, MatrixOp32, ZZ_h_mul_r, nxv8f16, int_aarch64_sme_fdot_za32_vg1x2>; +defm FDOT_VG4_M4Z4Z_HtoS : sme2_dot_mla_add_sub_array_vg4_multi<"fdot", 0b0100000, MatrixOp32, ZZZZ_h_mul_r, nxv8f16, int_aarch64_sme_fdot_za32_vg1x4>; -defm BFDOT_VG2_M2ZZI_HtoS : sme2_multi_vec_array_vg2_index_32b<"bfdot", 0b1011, ZZ_h_mul_r, ZPR4b16, nxv8bf16, int_aarch64_sme_fdot_lane_za32_vg1x2>; +defm BFDOT_VG2_M2ZZI_HtoS : sme2_multi_vec_array_vg2_index_32b<"bfdot", 0b01, 0b1011, ZZ_h_mul_r, ZPR4b16, nxv8bf16, int_aarch64_sme_fdot_lane_za32_vg1x2>; defm BFDOT_VG4_M4ZZI_HtoS : sme2_multi_vec_array_vg4_index_32b<"bfdot", 0b1011, ZZZZ_h_mul_r, ZPR4b16, nxv8bf16, int_aarch64_sme_fdot_lane_za32_vg1x4>; defm BFDOT_VG2_M2ZZ_HtoS : sme2_dot_mla_add_sub_array_vg2_single<"bfdot", 0b0010010, MatrixOp32, ZZ_h, ZPR4b16, nxv8bf16, int_aarch64_sme_fdot_single_za32_vg1x2>; defm BFDOT_VG4_M4ZZ_HtoS : sme2_dot_mla_add_sub_array_vg4_single<"bfdot", 0b0110010, MatrixOp32, ZZZZ_h, ZPR4b16, nxv8bf16, int_aarch64_sme_fdot_single_za32_vg1x4>; -defm BFDOT_VG2_M2Z2Z_HtoS : sme2_dot_mla_add_sub_array_vg2_multi<"bfdot", 0b010010, MatrixOp32, ZZ_h_mul_r, nxv8bf16, int_aarch64_sme_fdot_za32_vg1x2>; -defm BFDOT_VG4_M4Z4Z_HtoS : sme2_dot_mla_add_sub_array_vg4_multi<"bfdot", 0b010010, MatrixOp32, ZZZZ_h_mul_r, nxv8bf16, int_aarch64_sme_fdot_za32_vg1x4>; +defm BFDOT_VG2_M2Z2Z_HtoS : sme2_dot_mla_add_sub_array_vg2_multi<"bfdot", 0b0100010, MatrixOp32, ZZ_h_mul_r, nxv8bf16, int_aarch64_sme_fdot_za32_vg1x2>; +defm BFDOT_VG4_M4Z4Z_HtoS : sme2_dot_mla_add_sub_array_vg4_multi<"bfdot", 0b0100010, MatrixOp32, ZZZZ_h_mul_r, nxv8bf16, int_aarch64_sme_fdot_za32_vg1x4>; -defm BFVDOT_VG2_M2ZZI_HtoS : sme2_multi_vec_array_vg2_index_32b<"bfvdot", 0b0011, ZZ_h_mul_r, ZPR4b16, nxv8bf16, int_aarch64_sme_fvdot_lane_za32_vg1x2>; +defm BFVDOT_VG2_M2ZZI_HtoS : sme2_multi_vec_array_vg2_index_32b<"bfvdot", 0b01, 0b0011, ZZ_h_mul_r, ZPR4b16, nxv8bf16, int_aarch64_sme_fvdot_lane_za32_vg1x2>; -defm FVDOT_VG2_M2ZZI_HtoS : sme2_multi_vec_array_vg2_index_32b<"fvdot", 0b0001, ZZ_h_mul_r, ZPR4b16, nxv8f16, int_aarch64_sme_fvdot_lane_za32_vg1x2>; +defm FVDOT_VG2_M2ZZI_HtoS : sme2_multi_vec_array_vg2_index_32b<"fvdot", 0b01, 0b0001, ZZ_h_mul_r, ZPR4b16, nxv8f16, int_aarch64_sme_fvdot_lane_za32_vg1x2>; -defm SDOT_VG2_M2ZZI_HToS : sme2_multi_vec_array_vg2_index_32b<"sdot", 0b1000, ZZ_h_mul_r, ZPR4b16, nxv8i16, int_aarch64_sme_sdot_lane_za32_vg1x2>; -defm SDOT_VG2_M2ZZI_BToS : sme2_multi_vec_array_vg2_index_32b<"sdot", 0b1100, ZZ_b_mul_r, ZPR4b8, nxv16i8, int_aarch64_sme_sdot_lane_za32_vg1x2>; +defm SDOT_VG2_M2ZZI_HToS : sme2_multi_vec_array_vg2_index_32b<"sdot", 0b01, 0b1000, ZZ_h_mul_r, ZPR4b16, nxv8i16, int_aarch64_sme_sdot_lane_za32_vg1x2>; +defm SDOT_VG2_M2ZZI_BToS : sme2_multi_vec_array_vg2_index_32b<"sdot", 0b01, 0b1100, ZZ_b_mul_r, ZPR4b8, nxv16i8, int_aarch64_sme_sdot_lane_za32_vg1x2>; defm SDOT_VG4_M4ZZI_HToS : sme2_multi_vec_array_vg4_index_32b<"sdot", 0b1000, ZZZZ_h_mul_r, ZPR4b16, nxv8i16, int_aarch64_sme_sdot_lane_za32_vg1x4>; defm SDOT_VG4_M4ZZI_BToS : sme2_multi_vec_array_vg4_index_32b<"sdot", 0b1100, ZZZZ_b_mul_r, ZPR4b8, nxv16i8, int_aarch64_sme_sdot_lane_za32_vg1x4>; defm SDOT_VG2_M2ZZ_HtoS : sme2_dot_mla_add_sub_array_vg2_single<"sdot", 0b1010101, MatrixOp32, ZZ_h, ZPR4b16, nxv8i16, int_aarch64_sme_sdot_single_za32_vg1x2>; defm SDOT_VG4_M4ZZ_HtoS : sme2_dot_mla_add_sub_array_vg4_single<"sdot", 0b1110101, MatrixOp32, ZZZZ_h, ZPR4b16, nxv8i16, int_aarch64_sme_sdot_single_za32_vg1x4>; -defm SDOT_VG2_M2Z2Z_HtoS : sme2_dot_mla_add_sub_array_vg2_multi<"sdot", 0b110101, MatrixOp32, ZZ_h_mul_r, nxv8i16, int_aarch64_sme_sdot_za32_vg1x2>; -defm SDOT_VG4_M4Z4Z_HtoS : sme2_dot_mla_add_sub_array_vg4_multi<"sdot", 0b110101, MatrixOp32, ZZZZ_h_mul_r, nxv8i16, int_aarch64_sme_sdot_za32_vg1x4>; +defm SDOT_VG2_M2Z2Z_HtoS : sme2_dot_mla_add_sub_array_vg2_multi<"sdot", 0b1101001, MatrixOp32, ZZ_h_mul_r, nxv8i16, int_aarch64_sme_sdot_za32_vg1x2>; +defm SDOT_VG4_M4Z4Z_HtoS : sme2_dot_mla_add_sub_array_vg4_multi<"sdot", 0b1101001, MatrixOp32, ZZZZ_h_mul_r, nxv8i16, int_aarch64_sme_sdot_za32_vg1x4>; defm SDOT_VG2_M2ZZ_BtoS : sme2_dot_mla_add_sub_array_vg2_single<"sdot", 0b0010100, MatrixOp32, ZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_sdot_single_za32_vg1x2>; defm SDOT_VG4_M4ZZ_BtoS : sme2_dot_mla_add_sub_array_vg4_single<"sdot", 0b0110100, MatrixOp32, ZZZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_sdot_single_za32_vg1x4>; -defm SDOT_VG2_M2Z2Z_BtoS : sme2_dot_mla_add_sub_array_vg2_multi<"sdot", 0b010100, MatrixOp32, ZZ_b_mul_r, nxv16i8, int_aarch64_sme_sdot_za32_vg1x2>; -defm SDOT_VG4_M4Z4Z_BtoS : sme2_dot_mla_add_sub_array_vg4_multi<"sdot", 0b010100, MatrixOp32, ZZZZ_b_mul_r, nxv16i8, int_aarch64_sme_sdot_za32_vg1x4>; +defm SDOT_VG2_M2Z2Z_BtoS : sme2_dot_mla_add_sub_array_vg2_multi<"sdot", 0b0101000, MatrixOp32, ZZ_b_mul_r, nxv16i8, int_aarch64_sme_sdot_za32_vg1x2>; +defm SDOT_VG4_M4Z4Z_BtoS : sme2_dot_mla_add_sub_array_vg4_multi<"sdot", 0b0101000, MatrixOp32, ZZZZ_b_mul_r, nxv16i8, int_aarch64_sme_sdot_za32_vg1x4>; -defm SUDOT_VG2_M2ZZI_BToS : sme2_multi_vec_array_vg2_index_32b<"sudot", 0b1111, ZZ_b_mul_r, ZPR4b8, nxv16i8, int_aarch64_sme_sudot_lane_za32_vg1x2>; +defm SUDOT_VG2_M2ZZI_BToS : sme2_multi_vec_array_vg2_index_32b<"sudot", 0b01, 0b1111, ZZ_b_mul_r, ZPR4b8, nxv16i8, int_aarch64_sme_sudot_lane_za32_vg1x2>; defm SUDOT_VG4_M4ZZI_BToS : sme2_multi_vec_array_vg4_index_32b<"sudot", 0b1111, ZZZZ_b_mul_r, ZPR4b8, nxv16i8, int_aarch64_sme_sudot_lane_za32_vg1x4>; defm SUDOT_VG2_M2ZZ_BToS : sme2_dot_mla_add_sub_array_vg2_single<"sudot", 0b0010111, MatrixOp32, ZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_sudot_single_za32_vg1x2>; defm SUDOT_VG4_M4ZZ_BToS : sme2_dot_mla_add_sub_array_vg4_single<"sudot", 0b0110111, MatrixOp32, ZZZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_sudot_single_za32_vg1x4>; -defm SVDOT_VG2_M2ZZI_HtoS : sme2_multi_vec_array_vg2_index_32b<"svdot", 0b0100, ZZ_h_mul_r, ZPR4b16, nxv8i16, int_aarch64_sme_svdot_lane_za32_vg1x2>; +defm SVDOT_VG2_M2ZZI_HtoS : sme2_multi_vec_array_vg2_index_32b<"svdot", 0b01, 0b0100, ZZ_h_mul_r, ZPR4b16, nxv8i16, int_aarch64_sme_svdot_lane_za32_vg1x2>; defm SVDOT_VG4_M4ZZI_BtoS : sme2_multi_vec_array_vg4_index_32b<"svdot", 0b0100, ZZZZ_b_mul_r, ZPR4b8, nxv16i8, int_aarch64_sme_svdot_lane_za32_vg1x4>; defm SUVDOT_VG4_M4ZZI_BToS : sme2_multi_vec_array_vg4_index_32b<"suvdot", 0b0111, ZZZZ_b_mul_r, ZPR4b8, nxv16i8, int_aarch64_sme_suvdot_lane_za32_vg1x4>; -defm UDOT_VG2_M2ZZI_HToS : sme2_multi_vec_array_vg2_index_32b<"udot", 0b1010, ZZ_h_mul_r, ZPR4b16, nxv8i16, int_aarch64_sme_udot_lane_za32_vg1x2>; -defm UDOT_VG2_M2ZZI_BToS : sme2_multi_vec_array_vg2_index_32b<"udot", 0b1110, ZZ_b_mul_r, ZPR4b8, nxv16i8, int_aarch64_sme_udot_lane_za32_vg1x2>; +defm UDOT_VG2_M2ZZI_HToS : sme2_multi_vec_array_vg2_index_32b<"udot", 0b01, 0b1010, ZZ_h_mul_r, ZPR4b16, nxv8i16, int_aarch64_sme_udot_lane_za32_vg1x2>; +defm UDOT_VG2_M2ZZI_BToS : sme2_multi_vec_array_vg2_index_32b<"udot", 0b01, 0b1110, ZZ_b_mul_r, ZPR4b8, nxv16i8, int_aarch64_sme_udot_lane_za32_vg1x2>; defm UDOT_VG4_M4ZZI_BtoS : sme2_multi_vec_array_vg4_index_32b<"udot", 0b1110, ZZZZ_b_mul_r, ZPR4b8, nxv16i8, int_aarch64_sme_udot_lane_za32_vg1x4>; defm UDOT_VG4_M4ZZI_HToS : sme2_multi_vec_array_vg4_index_32b<"udot", 0b1010, ZZZZ_h_mul_r, ZPR4b16, nxv8i16, int_aarch64_sme_udot_lane_za32_vg1x4>; defm UDOT_VG2_M2ZZ_HtoS : sme2_dot_mla_add_sub_array_vg2_single<"udot", 0b1010111, MatrixOp32, ZZ_h, ZPR4b16, nxv8i16, int_aarch64_sme_udot_single_za32_vg1x2>; defm UDOT_VG4_M4ZZ_HtoS : sme2_dot_mla_add_sub_array_vg4_single<"udot", 0b1110111, MatrixOp32, ZZZZ_h, ZPR4b16, nxv8i16, int_aarch64_sme_udot_single_za32_vg1x4>; -defm UDOT_VG2_M2Z2Z_HtoS : sme2_dot_mla_add_sub_array_vg2_multi<"udot", 0b110111, MatrixOp32, ZZ_h_mul_r, nxv8i16, int_aarch64_sme_udot_za32_vg1x2>; -defm UDOT_VG4_M4Z4Z_HtoS : sme2_dot_mla_add_sub_array_vg4_multi<"udot", 0b110111, MatrixOp32, ZZZZ_h_mul_r, nxv8i16, int_aarch64_sme_udot_za32_vg1x4>; +defm UDOT_VG2_M2Z2Z_HtoS : sme2_dot_mla_add_sub_array_vg2_multi<"udot", 0b1101011, MatrixOp32, ZZ_h_mul_r, nxv8i16, int_aarch64_sme_udot_za32_vg1x2>; +defm UDOT_VG4_M4Z4Z_HtoS : sme2_dot_mla_add_sub_array_vg4_multi<"udot", 0b1101011, MatrixOp32, ZZZZ_h_mul_r, nxv8i16, int_aarch64_sme_udot_za32_vg1x4>; defm UDOT_VG2_M2ZZ_BtoS : sme2_dot_mla_add_sub_array_vg2_single<"udot", 0b0010110, MatrixOp32, ZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_udot_single_za32_vg1x2>; defm UDOT_VG4_M4ZZ_BtoS : sme2_dot_mla_add_sub_array_vg4_single<"udot", 0b0110110, MatrixOp32, ZZZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_udot_single_za32_vg1x4>; -defm UDOT_VG2_M2Z2Z_BtoS : sme2_dot_mla_add_sub_array_vg2_multi<"udot", 0b010110, MatrixOp32, ZZ_b_mul_r, nxv16i8, int_aarch64_sme_udot_za32_vg1x2>; -defm UDOT_VG4_M4Z4Z_BtoS : sme2_dot_mla_add_sub_array_vg4_multi<"udot", 0b010110, MatrixOp32, ZZZZ_b_mul_r, nxv16i8, int_aarch64_sme_udot_za32_vg1x4>; +defm UDOT_VG2_M2Z2Z_BtoS : sme2_dot_mla_add_sub_array_vg2_multi<"udot", 0b0101010, MatrixOp32, ZZ_b_mul_r, nxv16i8, int_aarch64_sme_udot_za32_vg1x2>; +defm UDOT_VG4_M4Z4Z_BtoS : sme2_dot_mla_add_sub_array_vg4_multi<"udot", 0b0101010, MatrixOp32, ZZZZ_b_mul_r, nxv16i8, int_aarch64_sme_udot_za32_vg1x4>; -defm USDOT_VG2_M2ZZI_BToS: sme2_multi_vec_array_vg2_index_32b<"usdot", 0b1101, ZZ_b_mul_r, ZPR4b8, nxv16i8, int_aarch64_sme_usdot_lane_za32_vg1x2>; +defm USDOT_VG2_M2ZZI_BToS: sme2_multi_vec_array_vg2_index_32b<"usdot", 0b01, 0b1101, ZZ_b_mul_r, ZPR4b8, nxv16i8, int_aarch64_sme_usdot_lane_za32_vg1x2>; defm USDOT_VG4_M4ZZI_BToS: sme2_multi_vec_array_vg4_index_32b<"usdot", 0b1101, ZZZZ_b_mul_r, ZPR4b8, nxv16i8, int_aarch64_sme_usdot_lane_za32_vg1x4>; defm USDOT_VG2_M2ZZ_BToS : sme2_dot_mla_add_sub_array_vg2_single<"usdot", 0b0010101, MatrixOp32, ZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_usdot_single_za32_vg1x2>; defm USDOT_VG4_M4ZZ_BToS : sme2_dot_mla_add_sub_array_vg4_single<"usdot", 0b0110101, MatrixOp32, ZZZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_usdot_single_za32_vg1x4>; -defm USDOT_VG2_M2Z2Z_BToS : sme2_dot_mla_add_sub_array_vg2_multi<"usdot", 0b010101, MatrixOp32, ZZ_b_mul_r, nxv16i8, int_aarch64_sme_usdot_za32_vg1x2>; -defm USDOT_VG4_M4Z4Z_BToS : sme2_dot_mla_add_sub_array_vg4_multi<"usdot", 0b010101, MatrixOp32, ZZZZ_b_mul_r, nxv16i8, int_aarch64_sme_usdot_za32_vg1x4>; +defm USDOT_VG2_M2Z2Z_BToS : sme2_dot_mla_add_sub_array_vg2_multi<"usdot", 0b0101001, MatrixOp32, ZZ_b_mul_r, nxv16i8, int_aarch64_sme_usdot_za32_vg1x2>; +defm USDOT_VG4_M4Z4Z_BToS : sme2_dot_mla_add_sub_array_vg4_multi<"usdot", 0b0101001, MatrixOp32, ZZZZ_b_mul_r, nxv16i8, int_aarch64_sme_usdot_za32_vg1x4>; defm USVDOT_VG4_M4ZZI_BToS : sme2_multi_vec_array_vg4_index_32b<"usvdot", 0b0101, ZZZZ_b_mul_r, ZPR4b8, nxv16i8, int_aarch64_sme_usvdot_lane_za32_vg1x4>; -defm UVDOT_VG2_M2ZZI_HtoS : sme2_multi_vec_array_vg2_index_32b<"uvdot", 0b0110, ZZ_h_mul_r, ZPR4b16, nxv8i16, int_aarch64_sme_uvdot_lane_za32_vg1x2>; +defm UVDOT_VG2_M2ZZI_HtoS : sme2_multi_vec_array_vg2_index_32b<"uvdot", 0b01, 0b0110, ZZ_h_mul_r, ZPR4b16, nxv8i16, int_aarch64_sme_uvdot_lane_za32_vg1x2>; defm UVDOT_VG4_M4ZZI_BtoS : sme2_multi_vec_array_vg4_index_32b<"uvdot", 0b0110, ZZZZ_b_mul_r, ZPR4b8, nxv16i8, int_aarch64_sme_uvdot_lane_za32_vg1x4>; -defm SMLALL_MZZI_BtoS : sme2_mla_ll_array_index_32b<"smlall", 0b000, int_aarch64_sme_smla_za32_lane_vg4x1>; -defm SMLALL_VG2_M2ZZI_BtoS : sme2_mla_ll_array_vg2_index_32b<"smlall", 0b000, int_aarch64_sme_smla_za32_lane_vg4x2>; -defm SMLALL_VG4_M4ZZI_BtoS : sme2_mla_ll_array_vg4_index_32b<"smlall", 0b000, int_aarch64_sme_smla_za32_lane_vg4x4>; -defm SMLALL_MZZ_BtoS : sme2_mla_ll_array_single<"smlall", 0b0000, MatrixOp32, ZPR8, ZPR4b8, nxv16i8, int_aarch64_sme_smla_za32_single_vg4x1>; +defm SMLALL_MZZI_BtoS : sme2_mla_ll_array_index_32b<"smlall", 0b00, 0b000, int_aarch64_sme_smla_za32_lane_vg4x1>; +defm SMLALL_VG2_M2ZZI_BtoS : sme2_mla_ll_array_vg2_index_32b<"smlall", 0b00, 0b000, int_aarch64_sme_smla_za32_lane_vg4x2>; +defm SMLALL_VG4_M4ZZI_BtoS : sme2_mla_ll_array_vg4_index_32b<"smlall", 0b00, 0b0000, int_aarch64_sme_smla_za32_lane_vg4x4>; +defm SMLALL_MZZ_BtoS : sme2_mla_ll_array_single<"smlall", 0b00000, MatrixOp32, ZPR8, ZPR4b8, nxv16i8, int_aarch64_sme_smla_za32_single_vg4x1>; defm SMLALL_VG2_M2ZZ_BtoS : sme2_mla_ll_array_vg2_single<"smlall", 0b00000, MatrixOp32, ZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_smla_za32_single_vg4x2>; defm SMLALL_VG4_M4ZZ_BtoS : sme2_mla_ll_array_vg4_single<"smlall", 0b01000, MatrixOp32, ZZZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_smla_za32_single_vg4x4>; -defm SMLALL_VG2_M2Z2Z_BtoS : sme2_mla_ll_array_vg2_multi<"smlall", 0b0000, MatrixOp32, ZZ_b_mul_r, nxv16i8, int_aarch64_sme_smla_za32_vg4x2>; -defm SMLALL_VG4_M4Z4Z_BtoS : sme2_mla_ll_array_vg4_multi<"smlall", 0b0000, MatrixOp32, ZZZZ_b_mul_r, nxv16i8, int_aarch64_sme_smla_za32_vg4x4>; +defm SMLALL_VG2_M2Z2Z_BtoS : sme2_mla_ll_array_vg2_multi<"smlall", 0b00000, MatrixOp32, ZZ_b_mul_r, nxv16i8, int_aarch64_sme_smla_za32_vg4x2>; +defm SMLALL_VG4_M4Z4Z_BtoS : sme2_mla_ll_array_vg4_multi<"smlall", 0b00000, MatrixOp32, ZZZZ_b_mul_r, nxv16i8, int_aarch64_sme_smla_za32_vg4x4>; -defm USMLALL_MZZI_BtoS : sme2_mla_ll_array_index_32b<"usmlall", 0b001, int_aarch64_sme_usmla_za32_lane_vg4x1>; -defm USMLALL_VG2_M2ZZI_BtoS : sme2_mla_ll_array_vg2_index_32b<"usmlall", 0b100, int_aarch64_sme_usmla_za32_lane_vg4x2>; -defm USMLALL_VG4_M4ZZI_BtoS : sme2_mla_ll_array_vg4_index_32b<"usmlall", 0b100, int_aarch64_sme_usmla_za32_lane_vg4x4>; -defm USMLALL_MZZ_BtoS : sme2_mla_ll_array_single<"usmlall", 0b0001, MatrixOp32, ZPR8, ZPR4b8, nxv16i8, int_aarch64_sme_usmla_za32_single_vg4x1>; +defm USMLALL_MZZI_BtoS : sme2_mla_ll_array_index_32b<"usmlall", 0b00, 0b001, int_aarch64_sme_usmla_za32_lane_vg4x1>; +defm USMLALL_VG2_M2ZZI_BtoS : sme2_mla_ll_array_vg2_index_32b<"usmlall", 0b00, 0b100, int_aarch64_sme_usmla_za32_lane_vg4x2>; +defm USMLALL_VG4_M4ZZI_BtoS : sme2_mla_ll_array_vg4_index_32b<"usmlall", 0b00, 0b0100, int_aarch64_sme_usmla_za32_lane_vg4x4>; +defm USMLALL_MZZ_BtoS : sme2_mla_ll_array_single<"usmlall", 0b00001, MatrixOp32, ZPR8, ZPR4b8, nxv16i8, int_aarch64_sme_usmla_za32_single_vg4x1>; defm USMLALL_VG2_M2ZZ_BtoS : sme2_mla_ll_array_vg2_single<"usmlall", 0b00001, MatrixOp32, ZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_usmla_za32_single_vg4x2>; defm USMLALL_VG4_M4ZZ_BtoS : sme2_mla_ll_array_vg4_single<"usmlall", 0b01001, MatrixOp32, ZZZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_usmla_za32_single_vg4x4>; -defm USMLALL_VG2_M2Z2Z_BtoS : sme2_mla_ll_array_vg2_multi<"usmlall", 0b0001, MatrixOp32, ZZ_b_mul_r, nxv16i8, int_aarch64_sme_usmla_za32_vg4x2>; -defm USMLALL_VG4_M4Z4Z_BtoS : sme2_mla_ll_array_vg4_multi<"usmlall", 0b0001, MatrixOp32, ZZZZ_b_mul_r, nxv16i8, int_aarch64_sme_usmla_za32_vg4x4>; +defm USMLALL_VG2_M2Z2Z_BtoS : sme2_mla_ll_array_vg2_multi<"usmlall", 0b00001, MatrixOp32, ZZ_b_mul_r, nxv16i8, int_aarch64_sme_usmla_za32_vg4x2>; +defm USMLALL_VG4_M4Z4Z_BtoS : sme2_mla_ll_array_vg4_multi<"usmlall", 0b00001, MatrixOp32, ZZZZ_b_mul_r, nxv16i8, int_aarch64_sme_usmla_za32_vg4x4>; -defm SMLSLL_MZZI_BtoS : sme2_mla_ll_array_index_32b<"smlsll", 0b010, int_aarch64_sme_smls_za32_lane_vg4x1>; -defm SMLSLL_VG2_M2ZZI_BtoS : sme2_mla_ll_array_vg2_index_32b<"smlsll", 0b001, int_aarch64_sme_smls_za32_lane_vg4x2>; -defm SMLSLL_VG4_M4ZZI_BtoS : sme2_mla_ll_array_vg4_index_32b<"smlsll", 0b001, int_aarch64_sme_smls_za32_lane_vg4x4>; -defm SMLSLL_MZZ_BtoS : sme2_mla_ll_array_single<"smlsll", 0b0010, MatrixOp32, ZPR8, ZPR4b8, nxv16i8, int_aarch64_sme_smls_za32_single_vg4x1>; +defm SMLSLL_MZZI_BtoS : sme2_mla_ll_array_index_32b<"smlsll", 0b00, 0b010, int_aarch64_sme_smls_za32_lane_vg4x1>; +defm SMLSLL_VG2_M2ZZI_BtoS : sme2_mla_ll_array_vg2_index_32b<"smlsll", 0b00, 0b001, int_aarch64_sme_smls_za32_lane_vg4x2>; +defm SMLSLL_VG4_M4ZZI_BtoS : sme2_mla_ll_array_vg4_index_32b<"smlsll", 0b00, 0b0001, int_aarch64_sme_smls_za32_lane_vg4x4>; +defm SMLSLL_MZZ_BtoS : sme2_mla_ll_array_single<"smlsll", 0b00010, MatrixOp32, ZPR8, ZPR4b8, nxv16i8, int_aarch64_sme_smls_za32_single_vg4x1>; defm SMLSLL_VG2_M2ZZ_BtoS : sme2_mla_ll_array_vg2_single<"smlsll", 0b00010, MatrixOp32, ZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_smls_za32_single_vg4x2>; defm SMLSLL_VG4_M4ZZ_BtoS : sme2_mla_ll_array_vg4_single<"smlsll", 0b01010, MatrixOp32, ZZZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_smls_za32_single_vg4x4>; -defm SMLSLL_VG2_M2Z2Z_BtoS : sme2_mla_ll_array_vg2_multi<"smlsll", 0b0010, MatrixOp32, ZZ_b_mul_r, nxv16i8, int_aarch64_sme_smls_za32_vg4x2>; -defm SMLSLL_VG4_M4Z4Z_BtoS : sme2_mla_ll_array_vg4_multi<"smlsll", 0b0010, MatrixOp32, ZZZZ_b_mul_r, nxv16i8, int_aarch64_sme_smls_za32_vg4x4>; +defm SMLSLL_VG2_M2Z2Z_BtoS : sme2_mla_ll_array_vg2_multi<"smlsll", 0b00010, MatrixOp32, ZZ_b_mul_r, nxv16i8, int_aarch64_sme_smls_za32_vg4x2>; +defm SMLSLL_VG4_M4Z4Z_BtoS : sme2_mla_ll_array_vg4_multi<"smlsll", 0b00010, MatrixOp32, ZZZZ_b_mul_r, nxv16i8, int_aarch64_sme_smls_za32_vg4x4>; -defm UMLALL_MZZI_BtoS : sme2_mla_ll_array_index_32b<"umlall", 0b100, int_aarch64_sme_umla_za32_lane_vg4x1>; -defm UMLALL_VG2_M2ZZI_BtoS : sme2_mla_ll_array_vg2_index_32b<"umlall", 0b010, int_aarch64_sme_umla_za32_lane_vg4x2>; -defm UMLALL_VG4_M4ZZI_BtoS : sme2_mla_ll_array_vg4_index_32b<"umlall", 0b010, int_aarch64_sme_umla_za32_lane_vg4x4>; -defm UMLALL_MZZ_BtoS : sme2_mla_ll_array_single<"umlall", 0b0100, MatrixOp32, ZPR8, ZPR4b8, nxv16i8, int_aarch64_sme_umla_za32_single_vg4x1>; +defm UMLALL_MZZI_BtoS : sme2_mla_ll_array_index_32b<"umlall", 0b00, 0b100, int_aarch64_sme_umla_za32_lane_vg4x1>; +defm UMLALL_VG2_M2ZZI_BtoS : sme2_mla_ll_array_vg2_index_32b<"umlall", 0b00, 0b010, int_aarch64_sme_umla_za32_lane_vg4x2>; +defm UMLALL_VG4_M4ZZI_BtoS : sme2_mla_ll_array_vg4_index_32b<"umlall", 0b00, 0b0010, int_aarch64_sme_umla_za32_lane_vg4x4>; +defm UMLALL_MZZ_BtoS : sme2_mla_ll_array_single<"umlall", 0b00100, MatrixOp32, ZPR8, ZPR4b8, nxv16i8, int_aarch64_sme_umla_za32_single_vg4x1>; defm UMLALL_VG2_M2ZZ_BtoS : sme2_mla_ll_array_vg2_single<"umlall", 0b00100, MatrixOp32, ZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_umla_za32_single_vg4x2>; defm UMLALL_VG4_M4ZZ_BtoS : sme2_mla_ll_array_vg4_single<"umlall", 0b01100, MatrixOp32, ZZZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_umla_za32_single_vg4x4>; -defm UMLALL_VG2_M2Z2Z_BtoS : sme2_mla_ll_array_vg2_multi<"umlall", 0b0100, MatrixOp32, ZZ_b_mul_r, nxv16i8, int_aarch64_sme_umla_za32_vg4x2>; -defm UMLALL_VG4_M4Z4Z_BtoS : sme2_mla_ll_array_vg4_multi<"umlall", 0b0100, MatrixOp32, ZZZZ_b_mul_r, nxv16i8, int_aarch64_sme_umla_za32_vg4x4>; +defm UMLALL_VG2_M2Z2Z_BtoS : sme2_mla_ll_array_vg2_multi<"umlall", 0b00100, MatrixOp32, ZZ_b_mul_r, nxv16i8, int_aarch64_sme_umla_za32_vg4x2>; +defm UMLALL_VG4_M4Z4Z_BtoS : sme2_mla_ll_array_vg4_multi<"umlall", 0b00100, MatrixOp32, ZZZZ_b_mul_r, nxv16i8, int_aarch64_sme_umla_za32_vg4x4>; -defm SUMLALL_MZZI_BtoS : sme2_mla_ll_array_index_32b<"sumlall", 0b101, int_aarch64_sme_sumla_za32_lane_vg4x1>; -defm SUMLALL_VG2_M2ZZI_BtoS : sme2_mla_ll_array_vg2_index_32b<"sumlall", 0b110, int_aarch64_sme_sumla_za32_lane_vg4x2>; -defm SUMLALL_VG4_M4ZZI_BtoS : sme2_mla_ll_array_vg4_index_32b<"sumlall", 0b110, int_aarch64_sme_sumla_za32_lane_vg4x4>; +defm SUMLALL_MZZI_BtoS : sme2_mla_ll_array_index_32b<"sumlall", 0b00, 0b101, int_aarch64_sme_sumla_za32_lane_vg4x1>; +defm SUMLALL_VG2_M2ZZI_BtoS : sme2_mla_ll_array_vg2_index_32b<"sumlall", 0b00, 0b110, int_aarch64_sme_sumla_za32_lane_vg4x2>; +defm SUMLALL_VG4_M4ZZI_BtoS : sme2_mla_ll_array_vg4_index_32b<"sumlall", 0b00, 0b0110, int_aarch64_sme_sumla_za32_lane_vg4x4>; defm SUMLALL_VG2_M2ZZ_BtoS : sme2_mla_ll_array_vg2_single<"sumlall", 0b00101, MatrixOp32, ZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_sumla_za32_single_vg4x2>; defm SUMLALL_VG4_M4ZZ_BtoS : sme2_mla_ll_array_vg4_single<"sumlall", 0b01101, MatrixOp32, ZZZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_sumla_za32_single_vg4x4>; -defm UMLSLL_MZZI_BtoS : sme2_mla_ll_array_index_32b<"umlsll", 0b110, int_aarch64_sme_umls_za32_lane_vg4x1>; -defm UMLSLL_VG2_M2ZZI_BtoS : sme2_mla_ll_array_vg2_index_32b<"umlsll", 0b011, int_aarch64_sme_umls_za32_lane_vg4x2>; -defm UMLSLL_VG4_M4ZZI_BtoS : sme2_mla_ll_array_vg4_index_32b<"umlsll", 0b011, int_aarch64_sme_umls_za32_lane_vg4x4>; -defm UMLSLL_MZZ_BtoS : sme2_mla_ll_array_single<"umlsll", 0b0110, MatrixOp32, ZPR8, ZPR4b8, nxv16i8, int_aarch64_sme_umls_za32_single_vg4x1>; +defm UMLSLL_MZZI_BtoS : sme2_mla_ll_array_index_32b<"umlsll", 0b00, 0b110, int_aarch64_sme_umls_za32_lane_vg4x1>; +defm UMLSLL_VG2_M2ZZI_BtoS : sme2_mla_ll_array_vg2_index_32b<"umlsll", 0b00, 0b011, int_aarch64_sme_umls_za32_lane_vg4x2>; +defm UMLSLL_VG4_M4ZZI_BtoS : sme2_mla_ll_array_vg4_index_32b<"umlsll", 0b00, 0b0011, int_aarch64_sme_umls_za32_lane_vg4x4>; +defm UMLSLL_MZZ_BtoS : sme2_mla_ll_array_single<"umlsll", 0b00110, MatrixOp32, ZPR8, ZPR4b8, nxv16i8, int_aarch64_sme_umls_za32_single_vg4x1>; defm UMLSLL_VG2_M2ZZ_BtoS : sme2_mla_ll_array_vg2_single<"umlsll", 0b00110, MatrixOp32, ZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_umls_za32_single_vg4x2>; defm UMLSLL_VG4_M4ZZ_BtoS : sme2_mla_ll_array_vg4_single<"umlsll", 0b01110, MatrixOp32, ZZZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_umls_za32_single_vg4x4>; -defm UMLSLL_VG2_M2Z2Z_BtoS : sme2_mla_ll_array_vg2_multi<"umlsll", 0b0110, MatrixOp32, ZZ_b_mul_r, nxv16i8, int_aarch64_sme_umls_za32_vg4x2>; -defm UMLSLL_VG4_M4Z4Z_BtoS : sme2_mla_ll_array_vg4_multi<"umlsll", 0b0110, MatrixOp32, ZZZZ_b_mul_r, nxv16i8, int_aarch64_sme_umls_za32_vg4x4>; +defm UMLSLL_VG2_M2Z2Z_BtoS : sme2_mla_ll_array_vg2_multi<"umlsll", 0b00110, MatrixOp32, ZZ_b_mul_r, nxv16i8, int_aarch64_sme_umls_za32_vg4x2>; +defm UMLSLL_VG4_M4Z4Z_BtoS : sme2_mla_ll_array_vg4_multi<"umlsll", 0b00110, MatrixOp32, ZZZZ_b_mul_r, nxv16i8, int_aarch64_sme_umls_za32_vg4x4>; defm BMOPA_MPPZZ_S : sme2_int_bmopx_tile<"bmopa", 0b100, int_aarch64_sme_bmopa_za32>; defm BMOPS_MPPZZ_S : sme2_int_bmopx_tile<"bmops", 0b101, int_aarch64_sme_bmops_za32>; @@ -707,13 +700,13 @@ defm STNT1D_4Z_STRIDED_IMM : sme2_st_vector_vg4_multi_scalar_immediate<0b11, 0b1 let Predicates = [HasSME2, HasSMEI16I64] in { defm ADD_VG2_M2ZZ_D : sme2_dot_mla_add_sub_array_vg2_single<"add", 0b1011010, MatrixOp64, ZZ_d, ZPR4b64, nxv2i64, int_aarch64_sme_add_write_single_za_vg1x2>; defm ADD_VG4_M4ZZ_D : sme2_dot_mla_add_sub_array_vg4_single<"add", 0b1111010, MatrixOp64, ZZZZ_d, ZPR4b64, nxv2i64, int_aarch64_sme_add_write_single_za_vg1x4>; -defm ADD_VG2_M2Z2Z_D : sme2_dot_mla_add_sub_array_vg2_multi<"add", 0b111010, MatrixOp64, ZZ_d_mul_r, nxv2i64, int_aarch64_sme_add_write_za_vg1x2>; -defm ADD_VG4_M4Z4Z_D : sme2_dot_mla_add_sub_array_vg4_multi<"add", 0b111010, MatrixOp64, ZZZZ_d_mul_r, nxv2i64, int_aarch64_sme_add_write_za_vg1x4>; +defm ADD_VG2_M2Z2Z_D : sme2_dot_mla_add_sub_array_vg2_multi<"add", 0b1110010, MatrixOp64, ZZ_d_mul_r, nxv2i64, int_aarch64_sme_add_write_za_vg1x2>; +defm ADD_VG4_M4Z4Z_D : sme2_dot_mla_add_sub_array_vg4_multi<"add", 0b1110010, MatrixOp64, ZZZZ_d_mul_r, nxv2i64, int_aarch64_sme_add_write_za_vg1x4>; defm SUB_VG2_M2ZZ_D : sme2_dot_mla_add_sub_array_vg2_single<"sub", 0b1011011, MatrixOp64, ZZ_d, ZPR4b64, nxv2i64, int_aarch64_sme_sub_write_single_za_vg1x2>; defm SUB_VG4_M4ZZ_D : sme2_dot_mla_add_sub_array_vg4_single<"sub", 0b1111011, MatrixOp64, ZZZZ_d, ZPR4b64, nxv2i64, int_aarch64_sme_sub_write_single_za_vg1x4>; -defm SUB_VG2_M2Z2Z_D : sme2_dot_mla_add_sub_array_vg2_multi<"sub", 0b111011, MatrixOp64, ZZ_d_mul_r, nxv2i64, int_aarch64_sme_sub_write_za_vg1x2>; -defm SUB_VG4_M4Z4Z_D : sme2_dot_mla_add_sub_array_vg4_multi<"sub", 0b111011, MatrixOp64, ZZZZ_d_mul_r, nxv2i64, int_aarch64_sme_sub_write_za_vg1x4>; +defm SUB_VG2_M2Z2Z_D : sme2_dot_mla_add_sub_array_vg2_multi<"sub", 0b1110011, MatrixOp64, ZZ_d_mul_r, nxv2i64, int_aarch64_sme_sub_write_za_vg1x2>; +defm SUB_VG4_M4Z4Z_D : sme2_dot_mla_add_sub_array_vg4_multi<"sub", 0b1110011, MatrixOp64, ZZZZ_d_mul_r, nxv2i64, int_aarch64_sme_sub_write_za_vg1x4>; defm ADD_VG2_M2Z_D : sme2_multivec_accum_add_sub_vg2<"add", 0b1010, MatrixOp64, ZZ_d_mul_r, nxv2i64, int_aarch64_sme_add_za64_vg1x2>; defm ADD_VG4_M4Z_D : sme2_multivec_accum_add_sub_vg4<"add", 0b1010, MatrixOp64, ZZZZ_d_mul_r, nxv2i64, int_aarch64_sme_add_za64_vg1x4>; @@ -725,8 +718,8 @@ defm SDOT_VG2_M2ZZI_HtoD : sme2_multi_vec_array_vg2_index_64b<"sdot", 0b01, ZZ_h defm SDOT_VG4_M4ZZI_HtoD : sme2_multi_vec_array_vg4_index_64b<"sdot", 0b001, ZZZZ_h_mul_r, ZPR4b16, nxv8i16, int_aarch64_sme_sdot_lane_za64_vg1x4>; defm SDOT_VG2_M2ZZ_HtoD : sme2_dot_mla_add_sub_array_vg2_single<"sdot", 0b1010100, MatrixOp64, ZZ_h, ZPR4b16, nxv8i16, int_aarch64_sme_sdot_single_za64_vg1x2>; defm SDOT_VG4_M4ZZ_HtoD : sme2_dot_mla_add_sub_array_vg4_single<"sdot", 0b1110100, MatrixOp64, ZZZZ_h, ZPR4b16, nxv8i16, int_aarch64_sme_sdot_single_za64_vg1x4>; -defm SDOT_VG2_M2Z2Z_HtoD : sme2_dot_mla_add_sub_array_vg2_multi<"sdot", 0b110100, MatrixOp64, ZZ_h_mul_r, nxv8i16, int_aarch64_sme_sdot_za64_vg1x2>; -defm SDOT_VG4_M4Z4Z_HtoD : sme2_dot_mla_add_sub_array_vg4_multi<"sdot", 0b110100, MatrixOp64, ZZZZ_h_mul_r, nxv8i16, int_aarch64_sme_sdot_za64_vg1x4>; +defm SDOT_VG2_M2Z2Z_HtoD : sme2_dot_mla_add_sub_array_vg2_multi<"sdot", 0b1101000, MatrixOp64, ZZ_h_mul_r, nxv8i16, int_aarch64_sme_sdot_za64_vg1x2>; +defm SDOT_VG4_M4Z4Z_HtoD : sme2_dot_mla_add_sub_array_vg4_multi<"sdot", 0b1101000, MatrixOp64, ZZZZ_h_mul_r, nxv8i16, int_aarch64_sme_sdot_za64_vg1x4>; defm SVDOT_VG4_M4ZZI_HtoD : sme2_multi_vec_array_vg4_index_64b<"svdot", 0b101, ZZZZ_h_mul_r, ZPR4b16, nxv8i16, int_aarch64_sme_svdot_lane_za64_vg1x4>; @@ -734,46 +727,46 @@ defm UDOT_VG2_M2ZZI_HtoD : sme2_multi_vec_array_vg2_index_64b<"udot", 0b11, ZZ_h defm UDOT_VG4_M4ZZI_HtoD : sme2_multi_vec_array_vg4_index_64b<"udot", 0b011, ZZZZ_h_mul_r, ZPR4b16, nxv8i16, int_aarch64_sme_udot_lane_za64_vg1x4>; defm UDOT_VG2_M2ZZ_HtoD : sme2_dot_mla_add_sub_array_vg2_single<"udot", 0b1010110, MatrixOp64, ZZ_h, ZPR4b16, nxv8i16, int_aarch64_sme_udot_single_za64_vg1x2>; defm UDOT_VG4_M4ZZ_HtoD : sme2_dot_mla_add_sub_array_vg4_single<"udot", 0b1110110, MatrixOp64, ZZZZ_h, ZPR4b16, nxv8i16, int_aarch64_sme_udot_single_za64_vg1x4>; -defm UDOT_VG2_M2Z2Z_HtoD : sme2_dot_mla_add_sub_array_vg2_multi<"udot", 0b110110, MatrixOp64, ZZ_h_mul_r, nxv8i16, int_aarch64_sme_udot_za64_vg1x2>; -defm UDOT_VG4_M4Z4Z_HtoD : sme2_dot_mla_add_sub_array_vg4_multi<"udot", 0b110110, MatrixOp64, ZZZZ_h_mul_r, nxv8i16, int_aarch64_sme_udot_za64_vg1x4>; +defm UDOT_VG2_M2Z2Z_HtoD : sme2_dot_mla_add_sub_array_vg2_multi<"udot", 0b1101010, MatrixOp64, ZZ_h_mul_r, nxv8i16, int_aarch64_sme_udot_za64_vg1x2>; +defm UDOT_VG4_M4Z4Z_HtoD : sme2_dot_mla_add_sub_array_vg4_multi<"udot", 0b1101010, MatrixOp64, ZZZZ_h_mul_r, nxv8i16, int_aarch64_sme_udot_za64_vg1x4>; defm UVDOT_VG4_M4ZZI_HtoD : sme2_multi_vec_array_vg4_index_64b<"uvdot", 0b111, ZZZZ_h_mul_r, ZPR4b16, nxv8i16, int_aarch64_sme_uvdot_lane_za64_vg1x4>; defm SMLALL_MZZI_HtoD : sme2_mla_ll_array_index_64b<"smlall", 0b00, int_aarch64_sme_smla_za64_lane_vg4x1>; defm SMLALL_VG2_M2ZZI_HtoD : sme2_mla_ll_array_vg2_index_64b<"smlall", 0b00, int_aarch64_sme_smla_za64_lane_vg4x2>; defm SMLALL_VG4_M4ZZI_HtoD : sme2_mla_ll_array_vg4_index_64b<"smlall", 0b00, int_aarch64_sme_smla_za64_lane_vg4x4>; -defm SMLALL_MZZ_HtoD : sme2_mla_ll_array_single<"smlall", 0b1000, MatrixOp64, ZPR16, ZPR4b16, nxv8i16, int_aarch64_sme_smla_za64_single_vg4x1>; +defm SMLALL_MZZ_HtoD : sme2_mla_ll_array_single<"smlall", 0b10000, MatrixOp64, ZPR16, ZPR4b16, nxv8i16, int_aarch64_sme_smla_za64_single_vg4x1>; defm SMLALL_VG2_M2ZZ_HtoD : sme2_mla_ll_array_vg2_single<"smlall", 0b10000, MatrixOp64, ZZ_h, ZPR4b16, nxv8i16, int_aarch64_sme_smla_za64_single_vg4x2>; defm SMLALL_VG4_M4ZZ_HtoD : sme2_mla_ll_array_vg4_single<"smlall", 0b11000, MatrixOp64, ZZZZ_h, ZPR4b16, nxv8i16, int_aarch64_sme_smla_za64_single_vg4x4>; -defm SMLALL_VG2_M2Z2Z_HtoD : sme2_mla_ll_array_vg2_multi<"smlall", 0b1000, MatrixOp64, ZZ_h_mul_r, nxv8i16, int_aarch64_sme_smla_za64_vg4x2>; -defm SMLALL_VG4_M4Z4Z_HtoD : sme2_mla_ll_array_vg4_multi<"smlall", 0b1000, MatrixOp64, ZZZZ_h_mul_r, nxv8i16, int_aarch64_sme_smla_za64_vg4x4>; +defm SMLALL_VG2_M2Z2Z_HtoD : sme2_mla_ll_array_vg2_multi<"smlall", 0b10000, MatrixOp64, ZZ_h_mul_r, nxv8i16, int_aarch64_sme_smla_za64_vg4x2>; +defm SMLALL_VG4_M4Z4Z_HtoD : sme2_mla_ll_array_vg4_multi<"smlall", 0b10000, MatrixOp64, ZZZZ_h_mul_r, nxv8i16, int_aarch64_sme_smla_za64_vg4x4>; defm SMLSLL_MZZI_HtoD : sme2_mla_ll_array_index_64b<"smlsll", 0b01, int_aarch64_sme_smls_za64_lane_vg4x1>; defm SMLSLL_VG2_M2ZZI_HtoD : sme2_mla_ll_array_vg2_index_64b<"smlsll", 0b01, int_aarch64_sme_smls_za64_lane_vg4x2>; defm SMLSLL_VG4_M4ZZI_HtoD : sme2_mla_ll_array_vg4_index_64b<"smlsll", 0b01, int_aarch64_sme_smls_za64_lane_vg4x4>; -defm SMLSLL_MZZ_HtoD : sme2_mla_ll_array_single<"smlsll", 0b1010, MatrixOp64, ZPR16, ZPR4b16, nxv8i16, int_aarch64_sme_smls_za64_single_vg4x1>; +defm SMLSLL_MZZ_HtoD : sme2_mla_ll_array_single<"smlsll", 0b10010, MatrixOp64, ZPR16, ZPR4b16, nxv8i16, int_aarch64_sme_smls_za64_single_vg4x1>; defm SMLSLL_VG2_M2ZZ_HtoD : sme2_mla_ll_array_vg2_single<"smlsll", 0b10010, MatrixOp64, ZZ_h, ZPR4b16, nxv8i16, int_aarch64_sme_smls_za64_single_vg4x2>; defm SMLSLL_VG4_M4ZZ_HtoD : sme2_mla_ll_array_vg4_single<"smlsll", 0b11010, MatrixOp64, ZZZZ_h, ZPR4b16, nxv8i16, int_aarch64_sme_smls_za64_single_vg4x4>; -defm SMLSLL_VG2_M2Z2Z_HtoD : sme2_mla_ll_array_vg2_multi<"smlsll", 0b1010, MatrixOp64, ZZ_h_mul_r, nxv8i16, int_aarch64_sme_smls_za64_vg4x2>; -defm SMLSLL_VG4_M4Z4Z_HtoD : sme2_mla_ll_array_vg4_multi<"smlsll", 0b1010, MatrixOp64, ZZZZ_h_mul_r, nxv8i16, int_aarch64_sme_smls_za64_vg4x4>; +defm SMLSLL_VG2_M2Z2Z_HtoD : sme2_mla_ll_array_vg2_multi<"smlsll", 0b10010, MatrixOp64, ZZ_h_mul_r, nxv8i16, int_aarch64_sme_smls_za64_vg4x2>; +defm SMLSLL_VG4_M4Z4Z_HtoD : sme2_mla_ll_array_vg4_multi<"smlsll", 0b10010, MatrixOp64, ZZZZ_h_mul_r, nxv8i16, int_aarch64_sme_smls_za64_vg4x4>; defm UMLALL_MZZI_HtoD : sme2_mla_ll_array_index_64b<"umlall", 0b10, int_aarch64_sme_umla_za64_lane_vg4x1>; defm UMLALL_VG2_M2ZZI_HtoD : sme2_mla_ll_array_vg2_index_64b<"umlall", 0b10, int_aarch64_sme_umla_za64_lane_vg4x2>; defm UMLALL_VG4_M4ZZI_HtoD : sme2_mla_ll_array_vg4_index_64b<"umlall", 0b10, int_aarch64_sme_umla_za64_lane_vg4x4>; -defm UMLALL_MZZ_HtoD : sme2_mla_ll_array_single<"umlall", 0b1100, MatrixOp64, ZPR16, ZPR4b16, nxv8i16, int_aarch64_sme_umla_za64_single_vg4x1>; +defm UMLALL_MZZ_HtoD : sme2_mla_ll_array_single<"umlall", 0b10100, MatrixOp64, ZPR16, ZPR4b16, nxv8i16, int_aarch64_sme_umla_za64_single_vg4x1>; defm UMLALL_VG2_M2ZZ_HtoD : sme2_mla_ll_array_vg2_single<"umlall", 0b10100, MatrixOp64, ZZ_h, ZPR4b16, nxv8i16, int_aarch64_sme_umla_za64_single_vg4x2>; defm UMLALL_VG4_M4ZZ_HtoD : sme2_mla_ll_array_vg4_single<"umlall", 0b11100, MatrixOp64, ZZZZ_h, ZPR4b16, nxv8i16, int_aarch64_sme_umla_za64_single_vg4x4>; -defm UMLALL_VG2_M2Z2Z_HtoD : sme2_mla_ll_array_vg2_multi<"umlall", 0b1100, MatrixOp64, ZZ_h_mul_r, nxv8i16, int_aarch64_sme_umla_za64_vg4x2>; -defm UMLALL_VG4_M4Z4Z_HtoD : sme2_mla_ll_array_vg4_multi<"umlall", 0b1100, MatrixOp64, ZZZZ_h_mul_r, nxv8i16, int_aarch64_sme_umla_za64_vg4x4>; +defm UMLALL_VG2_M2Z2Z_HtoD : sme2_mla_ll_array_vg2_multi<"umlall", 0b10100, MatrixOp64, ZZ_h_mul_r, nxv8i16, int_aarch64_sme_umla_za64_vg4x2>; +defm UMLALL_VG4_M4Z4Z_HtoD : sme2_mla_ll_array_vg4_multi<"umlall", 0b10100, MatrixOp64, ZZZZ_h_mul_r, nxv8i16, int_aarch64_sme_umla_za64_vg4x4>; defm UMLSLL_MZZI_HtoD : sme2_mla_ll_array_index_64b<"umlsll", 0b11, int_aarch64_sme_umls_za64_lane_vg4x1>; defm UMLSLL_VG2_M2ZZI_HtoD : sme2_mla_ll_array_vg2_index_64b<"umlsll", 0b11, int_aarch64_sme_umls_za64_lane_vg4x2>; defm UMLSLL_VG4_M4ZZI_HtoD : sme2_mla_ll_array_vg4_index_64b<"umlsll", 0b11, int_aarch64_sme_umls_za64_lane_vg4x4>; -defm UMLSLL_MZZ_HtoD : sme2_mla_ll_array_single<"umlsll", 0b1110, MatrixOp64, ZPR16, ZPR4b16, nxv8i16, int_aarch64_sme_umls_za64_single_vg4x1>; +defm UMLSLL_MZZ_HtoD : sme2_mla_ll_array_single<"umlsll", 0b10110, MatrixOp64, ZPR16, ZPR4b16, nxv8i16, int_aarch64_sme_umls_za64_single_vg4x1>; defm UMLSLL_VG2_M2ZZ_HtoD : sme2_mla_ll_array_vg2_single<"umlsll", 0b10110, MatrixOp64, ZZ_h, ZPR4b16, nxv8i16, int_aarch64_sme_umls_za64_single_vg4x2>; defm UMLSLL_VG4_M4ZZ_HtoD : sme2_mla_ll_array_vg4_single<"umlsll", 0b11110, MatrixOp64, ZZZZ_h, ZPR4b16, nxv8i16, int_aarch64_sme_umls_za64_single_vg4x4>; -defm UMLSLL_VG2_M2Z2Z_HtoD : sme2_mla_ll_array_vg2_multi<"umlsll", 0b1110, MatrixOp64, ZZ_h_mul_r, nxv8i16, int_aarch64_sme_umls_za64_vg4x2>; -defm UMLSLL_VG4_M4Z4Z_HtoD : sme2_mla_ll_array_vg4_multi<"umlsll", 0b1110, MatrixOp64, ZZZZ_h_mul_r, nxv8i16, int_aarch64_sme_umls_za64_vg4x4>; +defm UMLSLL_VG2_M2Z2Z_HtoD : sme2_mla_ll_array_vg2_multi<"umlsll", 0b10110, MatrixOp64, ZZ_h_mul_r, nxv8i16, int_aarch64_sme_umls_za64_vg4x2>; +defm UMLSLL_VG4_M4Z4Z_HtoD : sme2_mla_ll_array_vg4_multi<"umlsll", 0b10110, MatrixOp64, ZZZZ_h_mul_r, nxv8i16, int_aarch64_sme_umls_za64_vg4x4>; } let Predicates = [HasSME2, HasSMEF64F64] in { @@ -781,15 +774,15 @@ defm FMLA_VG2_M2ZZI_D : sme2_multi_vec_array_vg2_index_64b<"fmla", 0b00, ZZ_d_mu defm FMLA_VG4_M4ZZI_D : sme2_multi_vec_array_vg4_index_64b<"fmla", 0b000, ZZZZ_d_mul_r, ZPR4b64, nxv2f64, int_aarch64_sme_fmla_lane_vg1x4>; defm FMLA_VG2_M2ZZ_D : sme2_dot_mla_add_sub_array_vg2_single<"fmla", 0b1011000, MatrixOp64, ZZ_d, ZPR4b64, nxv2f64, int_aarch64_sme_fmla_single_vg1x2>; defm FMLA_VG4_M4ZZ_D : sme2_dot_mla_add_sub_array_vg4_single<"fmla", 0b1111000, MatrixOp64, ZZZZ_d, ZPR4b64, nxv2f64, int_aarch64_sme_fmla_single_vg1x4>; -defm FMLA_VG2_M2Z2Z_D : sme2_dot_mla_add_sub_array_vg2_multi<"fmla", 0b111000, MatrixOp64, ZZ_d_mul_r, nxv2f64, int_aarch64_sme_fmla_vg1x2>; -defm FMLA_VG4_M4Z4Z_D : sme2_dot_mla_add_sub_array_vg4_multi<"fmla", 0b111000, MatrixOp64, ZZZZ_d_mul_r, nxv2f64, int_aarch64_sme_fmla_vg1x4>; +defm FMLA_VG2_M2Z2Z_D : sme2_dot_mla_add_sub_array_vg2_multi<"fmla", 0b1110000, MatrixOp64, ZZ_d_mul_r, nxv2f64, int_aarch64_sme_fmla_vg1x2>; +defm FMLA_VG4_M4Z4Z_D : sme2_dot_mla_add_sub_array_vg4_multi<"fmla", 0b1110000, MatrixOp64, ZZZZ_d_mul_r, nxv2f64, int_aarch64_sme_fmla_vg1x4>; defm FMLS_VG2_M2ZZI_D : sme2_multi_vec_array_vg2_index_64b<"fmls", 0b10, ZZ_d_mul_r, ZPR4b64, nxv2f64, int_aarch64_sme_fmls_lane_vg1x2>; defm FMLS_VG4_M4ZZI_D : sme2_multi_vec_array_vg4_index_64b<"fmls", 0b010, ZZZZ_d_mul_r, ZPR4b64, nxv2f64, int_aarch64_sme_fmls_lane_vg1x4>; defm FMLS_VG2_M2ZZ_D : sme2_dot_mla_add_sub_array_vg2_single<"fmls", 0b1011001, MatrixOp64, ZZ_d, ZPR4b64, nxv2f64, int_aarch64_sme_fmls_single_vg1x2>; defm FMLS_VG4_M4ZZ_D : sme2_dot_mla_add_sub_array_vg4_single<"fmls", 0b1111001, MatrixOp64, ZZZZ_d, ZPR4b64, nxv2f64, int_aarch64_sme_fmls_single_vg1x4>; -defm FMLS_VG2_M2Z2Z_D : sme2_dot_mla_add_sub_array_vg2_multi<"fmls", 0b111001, MatrixOp64, ZZ_d_mul_r, nxv2f64, int_aarch64_sme_fmls_vg1x2>; -defm FMLS_VG4_M4Z4Z_D : sme2_dot_mla_add_sub_array_vg4_multi<"fmls", 0b111001, MatrixOp64, ZZZZ_d_mul_r, nxv2f64, int_aarch64_sme_fmls_vg1x4>; +defm FMLS_VG2_M2Z2Z_D : sme2_dot_mla_add_sub_array_vg2_multi<"fmls", 0b1110001, MatrixOp64, ZZ_d_mul_r, nxv2f64, int_aarch64_sme_fmls_vg1x2>; +defm FMLS_VG4_M4Z4Z_D : sme2_dot_mla_add_sub_array_vg4_multi<"fmls", 0b1110001, MatrixOp64, ZZZZ_d_mul_r, nxv2f64, int_aarch64_sme_fmls_vg1x4>; defm FADD_VG2_M2Z_D : sme2_multivec_accum_add_sub_vg2<"fadd", 0b1000, MatrixOp64, ZZ_d_mul_r, nxv2f64, int_aarch64_sme_add_za64_vg1x2>; defm FADD_VG4_M4Z_D : sme2_multivec_accum_add_sub_vg4<"fadd", 0b1000, MatrixOp64, ZZZZ_d_mul_r, nxv2f64, int_aarch64_sme_add_za64_vg1x4>; @@ -820,25 +813,25 @@ defm FADD_VG4_M4Z_H : sme2_multivec_accum_add_sub_vg4<"fadd", 0b0100, MatrixOp16 defm FSUB_VG2_M2Z_H : sme2_multivec_accum_add_sub_vg2<"fsub", 0b0101, MatrixOp16, ZZ_h_mul_r, nxv8f16, null_frag>; defm FSUB_VG4_M4Z_H : sme2_multivec_accum_add_sub_vg4<"fsub", 0b0101, MatrixOp16, ZZZZ_h_mul_r, nxv8f16, null_frag>; -defm FMLA_VG2_M2ZZI_H : sme2p1_multi_vec_array_vg2_index_16b<"fmla", 0b00>; -defm FMLA_VG4_M4ZZI_H : sme2p1_multi_vec_array_vg4_index_16b<"fmla", 0b00>; +defm FMLA_VG2_M2ZZI_H : sme2p1_multi_vec_array_vg2_index_16b<"fmla", 0b00, 0b100, ZZ_h_mul_r, ZPR4b16>; +defm FMLA_VG4_M4ZZI_H : sme2p1_multi_vec_array_vg4_index_16b<"fmla", 0b000, ZZZZ_h_mul_r, ZPR4b16>; defm FMLA_VG2_M2ZZ_H : sme2_dot_mla_add_sub_array_vg24_single<"fmla", 0b0011100, MatrixOp16, ZZ_h, ZPR4b16>; defm FMLA_VG4_M4ZZ_H : sme2_dot_mla_add_sub_array_vg24_single<"fmla", 0b0111100, MatrixOp16, ZZZZ_h, ZPR4b16>; -defm FMLA_VG2_M2Z4Z_H : sme2_dot_mla_add_sub_array_vg2_multi<"fmla", 0b010001, MatrixOp16, ZZ_h_mul_r, nxv8f16, null_frag>; -defm FMLA_VG4_M4Z4Z_H : sme2_dot_mla_add_sub_array_vg4_multi<"fmla", 0b010001, MatrixOp16, ZZZZ_h_mul_r, nxv8f16, null_frag>; +defm FMLA_VG2_M2Z4Z_H : sme2_dot_mla_add_sub_array_vg2_multi<"fmla", 0b0100001, MatrixOp16, ZZ_h_mul_r, nxv8f16, null_frag>; +defm FMLA_VG4_M4Z4Z_H : sme2_dot_mla_add_sub_array_vg4_multi<"fmla", 0b0100001, MatrixOp16, ZZZZ_h_mul_r, nxv8f16, null_frag>; -defm FMLS_VG2_M2ZZI_H : sme2p1_multi_vec_array_vg2_index_16b<"fmls", 0b01>; -defm FMLS_VG4_M4ZZI_H : sme2p1_multi_vec_array_vg4_index_16b<"fmls", 0b01>; +defm FMLS_VG2_M2ZZI_H : sme2p1_multi_vec_array_vg2_index_16b<"fmls", 0b00, 0b101, ZZ_h_mul_r, ZPR4b16>; +defm FMLS_VG4_M4ZZI_H : sme2p1_multi_vec_array_vg4_index_16b<"fmls", 0b001, ZZZZ_h_mul_r, ZPR4b16>; defm FMLS_VG2_M2ZZ_H : sme2_dot_mla_add_sub_array_vg24_single<"fmls", 0b0011101, MatrixOp16, ZZ_h, ZPR4b16>; defm FMLS_VG4_M4ZZ_H : sme2_dot_mla_add_sub_array_vg24_single<"fmls", 0b0111101, MatrixOp16, ZZZZ_h, ZPR4b16>; -defm FMLS_VG2_M2Z2Z_H : sme2_dot_mla_add_sub_array_vg2_multi<"fmls", 0b010011, MatrixOp16, ZZ_h_mul_r, nxv8f16, null_frag>; -defm FMLS_VG4_M4Z2Z_H : sme2_dot_mla_add_sub_array_vg4_multi<"fmls", 0b010011, MatrixOp16, ZZZZ_h_mul_r, nxv8f16, null_frag>; +defm FMLS_VG2_M2Z2Z_H : sme2_dot_mla_add_sub_array_vg2_multi<"fmls", 0b0100011, MatrixOp16, ZZ_h_mul_r, nxv8f16, null_frag>; +defm FMLS_VG4_M4Z2Z_H : sme2_dot_mla_add_sub_array_vg4_multi<"fmls", 0b0100011, MatrixOp16, ZZZZ_h_mul_r, nxv8f16, null_frag>; defm FCVT_2ZZ_H : sme2p1_fp_cvt_vector_vg2_single<"fcvt", 0b0>; defm FCVTL_2ZZ_H : sme2p1_fp_cvt_vector_vg2_single<"fcvtl", 0b1>; -defm FMOPA_MPPZZ_H : sme2p1_fmop_tile_fp16<"fmopa", 0b0, 0b0>; -defm FMOPS_MPPZZ_H : sme2p1_fmop_tile_fp16<"fmops", 0b0, 0b1>; +defm FMOPA_MPPZZ_H : sme2p1_fmop_tile_fp16<"fmopa", 0b0, 0b0, 0b11, ZPR16>; +defm FMOPS_MPPZZ_H : sme2p1_fmop_tile_fp16<"fmops", 0b0, 0b1, 0b11, ZPR16>; } let Predicates = [HasSME2p1, HasB16B16] in { @@ -847,19 +840,19 @@ defm BFADD_VG4_M4Z_H : sme2_multivec_accum_add_sub_vg4<"bfadd", 0b1100, MatrixOp defm BFSUB_VG2_M2Z_H : sme2_multivec_accum_add_sub_vg2<"bfsub", 0b1101, MatrixOp16, ZZ_h_mul_r, nxv8bf16, null_frag>; defm BFSUB_VG4_M4Z_H : sme2_multivec_accum_add_sub_vg4<"bfsub", 0b1101, MatrixOp16, ZZZZ_h_mul_r, nxv8bf16, null_frag>; -defm BFMLA_VG2_M2ZZI : sme2p1_multi_vec_array_vg2_index_16b<"bfmla", 0b10>; -defm BFMLA_VG4_M4ZZI : sme2p1_multi_vec_array_vg4_index_16b<"bfmla", 0b10>; +defm BFMLA_VG2_M2ZZI : sme2p1_multi_vec_array_vg2_index_16b<"bfmla", 0b00, 0b110, ZZ_h_mul_r, ZPR4b16>; +defm BFMLA_VG4_M4ZZI : sme2p1_multi_vec_array_vg4_index_16b<"bfmla", 0b010, ZZZZ_h_mul_r, ZPR4b16>; defm BFMLA_VG2_M2ZZ : sme2_dot_mla_add_sub_array_vg24_single<"bfmla", 0b1011100, MatrixOp16, ZZ_h, ZPR4b16>; defm BFMLA_VG4_M4ZZ : sme2_dot_mla_add_sub_array_vg24_single<"bfmla", 0b1111100, MatrixOp16, ZZZZ_h, ZPR4b16>; -defm BFMLA_VG2_M2Z2Z : sme2_dot_mla_add_sub_array_vg2_multi<"bfmla", 0b110001, MatrixOp16, ZZ_h_mul_r, nxv8bf16, null_frag>; -defm BFMLA_VG4_M4Z4Z : sme2_dot_mla_add_sub_array_vg4_multi<"bfmla", 0b110001, MatrixOp16, ZZZZ_h_mul_r, nxv8bf16, null_frag>; +defm BFMLA_VG2_M2Z2Z : sme2_dot_mla_add_sub_array_vg2_multi<"bfmla", 0b1100001, MatrixOp16, ZZ_h_mul_r, nxv8bf16, null_frag>; +defm BFMLA_VG4_M4Z4Z : sme2_dot_mla_add_sub_array_vg4_multi<"bfmla", 0b1100001, MatrixOp16, ZZZZ_h_mul_r, nxv8bf16, null_frag>; -defm BFMLS_VG2_M2ZZI : sme2p1_multi_vec_array_vg2_index_16b<"bfmls", 0b11>; -defm BFMLS_VG4_M4ZZI : sme2p1_multi_vec_array_vg4_index_16b<"bfmls", 0b11>; +defm BFMLS_VG2_M2ZZI : sme2p1_multi_vec_array_vg2_index_16b<"bfmls", 0b00, 0b111, ZZ_h_mul_r, ZPR4b16>; +defm BFMLS_VG4_M4ZZI : sme2p1_multi_vec_array_vg4_index_16b<"bfmls", 0b011, ZZZZ_h_mul_r, ZPR4b16>; defm BFMLS_VG2_M2ZZ : sme2_dot_mla_add_sub_array_vg24_single<"bfmls", 0b1011101, MatrixOp16, ZZ_h, ZPR4b16>; defm BFMLS_VG4_M4ZZ : sme2_dot_mla_add_sub_array_vg24_single<"bfmls", 0b1111101, MatrixOp16, ZZZZ_h, ZPR4b16>; -defm BFMLS_VG2_M2Z2Z : sme2_dot_mla_add_sub_array_vg2_multi<"bfmls", 0b110011, MatrixOp16, ZZ_h_mul_r, nxv8bf16, null_frag>; -defm BFMLS_VG4_M4Z4Z : sme2_dot_mla_add_sub_array_vg4_multi<"bfmls", 0b110011, MatrixOp16, ZZZZ_h_mul_r, nxv8bf16, null_frag>; +defm BFMLS_VG2_M2Z2Z : sme2_dot_mla_add_sub_array_vg2_multi<"bfmls", 0b1100011, MatrixOp16, ZZ_h_mul_r, nxv8bf16, null_frag>; +defm BFMLS_VG4_M4Z4Z : sme2_dot_mla_add_sub_array_vg4_multi<"bfmls", 0b1100011, MatrixOp16, ZZZZ_h_mul_r, nxv8bf16, null_frag>; defm BFMAX_VG2_2ZZ : sme2p1_bf_max_min_vector_vg2_single<"bfmax", 0b0010000>; @@ -885,6 +878,6 @@ defm BFMINNM_VG4_4Z2Z : sme2p1_bf_max_min_vector_vg4_multi<"bfminnm", 0b0010011 defm BFCLAMP_VG2_2ZZZ: sme2p1_bfclamp_vector_vg2_multi<"bfclamp">; defm BFCLAMP_VG4_4ZZZ: sme2p1_bfclamp_vector_vg4_multi<"bfclamp">; -defm BFMOPA_MPPZZ_H : sme2p1_fmop_tile_fp16<"bfmopa", 0b1, 0b0>; -defm BFMOPS_MPPZZ_H : sme2p1_fmop_tile_fp16<"bfmops", 0b1, 0b1>; +defm BFMOPA_MPPZZ_H : sme2p1_fmop_tile_fp16<"bfmopa", 0b1, 0b0, 0b11, ZPR16>; +defm BFMOPS_MPPZZ_H : sme2p1_fmop_tile_fp16<"bfmops", 0b1, 0b1, 0b11, ZPR16>; } diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index b4f02e0dd20374eb58fc3dc34a9dd51226623a0e..1d00f152940a98d5e6b16f72aa9ac061cfdf78fc 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -2204,8 +2204,8 @@ let Predicates = [HasSVEorSME] in { } // End HasSVEorSME let Predicates = [HasBF16, HasSVEorSME] in { - defm BFDOT_ZZZ : sve_float_dot<0b1, "bfdot", nxv8bf16, int_aarch64_sve_bfdot>; - defm BFDOT_ZZI : sve_float_dot_indexed<0b1, "bfdot", nxv8bf16, int_aarch64_sve_bfdot_lane_v2>; + defm BFDOT_ZZZ : sve_float_dot<0b1, 0b0, ZPR32, ZPR16, "bfdot", nxv8bf16, int_aarch64_sve_bfdot>; + defm BFDOT_ZZI : sve_float_dot_indexed<0b1, 0b00, ZPR16, ZPR3b16, "bfdot", nxv8bf16, int_aarch64_sve_bfdot_lane_v2>; } // End HasBF16, HasSVEorSME let Predicates = [HasBF16, HasSVE] in { @@ -2544,8 +2544,8 @@ let Predicates = [HasSVEorSME] in { def : Pat<(nxv4f32 (bitconvert (nxv8bf16 ZPR:$src))), (nxv4f32 ZPR:$src)>; def : Pat<(nxv2f64 (bitconvert (nxv8bf16 ZPR:$src))), (nxv2f64 ZPR:$src)>; - def : Pat<(nxv16i1 (bitconvert (aarch64svcount PPR:$src))), (nxv16i1 PPR:$src)>; - def : Pat<(aarch64svcount (bitconvert (nxv16i1 PPR:$src))), (aarch64svcount PPR:$src)>; + def : Pat<(nxv16i1 (bitconvert (aarch64svcount PNR:$src))), (nxv16i1 PPR:$src)>; + def : Pat<(aarch64svcount (bitconvert (nxv16i1 PPR:$src))), (aarch64svcount PNR:$src)>; } // These allow casting from/to unpacked predicate types. @@ -3753,8 +3753,8 @@ defm PSEL_PPPRI : sve2_int_perm_sel_p<"psel", int_aarch64_sve_psel>; let Predicates = [HasSVE2p1_or_HasSME2] in { defm FCLAMP_ZZZ : sve2p1_fclamp<"fclamp", int_aarch64_sve_fclamp>; -defm FDOT_ZZZ_S : sve_float_dot<0b0, "fdot", nxv8f16, int_aarch64_sve_fdot_x2>; -defm FDOT_ZZZI_S : sve_float_dot_indexed<0b0, "fdot", nxv8f16, int_aarch64_sve_fdot_lane_x2>; +defm FDOT_ZZZ_S : sve_float_dot<0b0, 0b0, ZPR32, ZPR16, "fdot", nxv8f16, int_aarch64_sve_fdot_x2>; +defm FDOT_ZZZI_S : sve_float_dot_indexed<0b0, 0b00, ZPR16, ZPR3b16, "fdot", nxv8f16, int_aarch64_sve_fdot_lane_x2>; def BFMLSLB_ZZZ_S : sve2_fp_mla_long<0b110, "bfmlslb">; def BFMLSLT_ZZZ_S : sve2_fp_mla_long<0b111, "bfmlslt">; def BFMLSLB_ZZZI_S : sve2_fp_mla_long_by_indexed_elem<0b110, "bfmlslb">; @@ -3852,9 +3852,9 @@ defm STNT1D_4Z_IMM : sve2p1_mem_cst_si_4z<"stnt1d", 0b11, 0b1, ZZZZ_d_mul_r>; multiclass store_pn_x2 { def : Pat<(Store (Ty ZPR:$vec0), (Ty ZPR:$vec1), - (aarch64svcount PPR:$PNg), GPR64:$base), + (aarch64svcount PNR:$PNg), GPR64:$base), (RegImmInst (REG_SEQUENCE ZPR2Mul2, Ty:$vec0, zsub0, Ty:$vec1, zsub1), - PPR:$PNg, GPR64:$base, (i64 0))>; + PNR:$PNg, GPR64:$base, (i64 0))>; } // Stores of 2 consecutive vectors @@ -3878,10 +3878,10 @@ defm : store_pn_x2; multiclass store_pn_x4 { def : Pat<(Store (Ty ZPR:$vec0), (Ty ZPR:$vec1), (Ty ZPR:$vec2), (Ty ZPR:$vec3), - (aarch64svcount PPR:$PNg), GPR64:$base), + (aarch64svcount PNR:$PNg), GPR64:$base), (RegImmInst (REG_SEQUENCE ZPR4Mul4, Ty:$vec0, zsub0, Ty:$vec1, zsub1, Ty:$vec2, zsub2, Ty:$vec3, zsub3), - PPR:$PNg, GPR64:$base, (i64 0))>; + PNR:$PNg, GPR64:$base, (i64 0))>; } // Stores of 4 consecutive vectors @@ -3918,26 +3918,28 @@ defm WHILEHS_CXX : sve2p1_int_while_rr_pn<"whilehs", 0b100>; defm WHILEHI_CXX : sve2p1_int_while_rr_pn<"whilehi", 0b101>; defm WHILELO_CXX : sve2p1_int_while_rr_pn<"whilelo", 0b110>; defm WHILELS_CXX : sve2p1_int_while_rr_pn<"whilels", 0b111>; +} // End HasSVE2p1_or_HasSME2 +let Predicates = [HasSVEorSME] in { // Aliases for existing SVE instructions for which predicate-as-counter are // accepted as an operand to the instruction def : InstAlias<"ldr $Pt, [$Rn, $imm9, mul vl]", - (LDR_PXI PNRAny:$Pt, GPR64sp:$Rn, simm9:$imm9), 0>; + (LDR_PXI PNRasPPRAny:$Pt, GPR64sp:$Rn, simm9:$imm9), 0>; def : InstAlias<"ldr $Pt, [$Rn]", - (LDR_PXI PNRAny:$Pt, GPR64sp:$Rn, 0), 0>; + (LDR_PXI PNRasPPRAny:$Pt, GPR64sp:$Rn, 0), 0>; def : InstAlias<"str $Pt, [$Rn, $imm9, mul vl]", - (STR_PXI PNRAny:$Pt, GPR64sp:$Rn, simm9:$imm9), 0>; + (STR_PXI PNRasPPRAny:$Pt, GPR64sp:$Rn, simm9:$imm9), 0>; def : InstAlias<"str $Pt, [$Rn]", - (STR_PXI PNRAny:$Pt, GPR64sp:$Rn, 0), 0>; + (STR_PXI PNRasPPRAny:$Pt, GPR64sp:$Rn, 0), 0>; def : InstAlias<"mov $Pd, $Pn", - (ORR_PPzPP PNR8:$Pd, PNR8:$Pn, PNR8:$Pn, PNR8:$Pn), 0>; + (ORR_PPzPP PNRasPPR8:$Pd, PNRasPPR8:$Pn, PNRasPPR8:$Pn, PNRasPPR8:$Pn), 0>; -def : InstAlias<"pfalse\t$Pd", (PFALSE PNR8:$Pd), 0>; +def : InstAlias<"pfalse\t$Pd", (PFALSE PNRasPPR8:$Pd), 0>; -} // End HasSVE2p1_or_HasSME2 +} //===----------------------------------------------------------------------===// // SVE2.1 non-widening BFloat16 to BFloat16 instructions diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp index 559879139758ba6e7fc6cfcf0b5e6498c55e40a2..d2c12e6dfdc7307e9c58ddb18e24115a0fcf87c3 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -446,8 +446,6 @@ AArch64TargetMachine::getSubtargetImpl(const Function &F) const { assert((!StreamingSVEMode || I->hasSME()) && "Expected SME to be available"); - assert((!StreamingCompatibleSVEMode || I->hasSVEorSME()) && - "Expected SVE or SME to be available"); return I.get(); } diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index c60234fd85b51cb3e6d855693b7d6ea63fdf20c9..029b0931eb95039f03f16a89b01290a621e60ab4 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -190,16 +190,55 @@ static cl::opt EnableFixedwidthAutovecInStreamingMode( static cl::opt EnableScalableAutovecInStreamingMode( "enable-scalable-autovec-in-streaming-mode", cl::init(false), cl::Hidden); +static bool isSMEABIRoutineCall(const CallInst &CI) { + const auto *F = CI.getCalledFunction(); + return F && StringSwitch(F->getName()) + .Case("__arm_sme_state", true) + .Case("__arm_tpidr2_save", true) + .Case("__arm_tpidr2_restore", true) + .Case("__arm_za_disable", true) + .Default(false); +} + +/// Returns true if the function has explicit operations that can only be +/// lowered using incompatible instructions for the selected mode. This also +/// returns true if the function F may use or modify ZA state. +static bool hasPossibleIncompatibleOps(const Function *F) { + for (const BasicBlock &BB : *F) { + for (const Instruction &I : BB) { + // Be conservative for now and assume that any call to inline asm or to + // intrinsics could could result in non-streaming ops (e.g. calls to + // @llvm.aarch64.* or @llvm.gather/scatter intrinsics). We can assume that + // all native LLVM instructions can be lowered to compatible instructions. + if (isa(I) && !I.isDebugOrPseudoInst() && + (cast(I).isInlineAsm() || isa(I) || + isSMEABIRoutineCall(cast(I)))) + return true; + } + } + return false; +} + bool AArch64TTIImpl::areInlineCompatible(const Function *Caller, const Function *Callee) const { - SMEAttrs CallerAttrs(*Caller); - SMEAttrs CalleeAttrs(*Callee); - if (CallerAttrs.requiresSMChange(CalleeAttrs, - /*BodyOverridesInterface=*/true) || - CallerAttrs.requiresLazySave(CalleeAttrs) || - CalleeAttrs.hasNewZAInterface()) + SMEAttrs CallerAttrs(*Caller), CalleeAttrs(*Callee); + + // When inlining, we should consider the body of the function, not the + // interface. + if (CalleeAttrs.hasStreamingBody()) { + CalleeAttrs.set(SMEAttrs::SM_Compatible, false); + CalleeAttrs.set(SMEAttrs::SM_Enabled, true); + } + + if (CalleeAttrs.isNewZA()) return false; + if (CallerAttrs.requiresLazySave(CalleeAttrs) || + CallerAttrs.requiresSMChange(CalleeAttrs)) { + if (hasPossibleIncompatibleOps(Callee)) + return false; + } + const TargetMachine &TM = getTLI()->getTargetMachine(); const FeatureBitset &CallerBits = diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index f4b731db05b6e20b2213f5e8311177aa729cb991..139ad21f6ad3f826526a3d02993c14556e63011f 100644 --- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -1229,6 +1229,8 @@ public: case AArch64::PPRRegClassID: case AArch64::PPR_3bRegClassID: case AArch64::PPR_p8to15RegClassID: + case AArch64::PNRRegClassID: + case AArch64::PNR_p8to15RegClassID: RK = RegKind::SVEPredicateAsCounter; break; default: @@ -1249,6 +1251,9 @@ public: break; case AArch64::PPRRegClassID: case AArch64::PPR_3bRegClassID: + case AArch64::PPR_p8to15RegClassID: + case AArch64::PNRRegClassID: + case AArch64::PNR_p8to15RegClassID: RK = RegKind::SVEPredicateVector; break; default: @@ -1733,6 +1738,12 @@ public: Inst.addOperand(MCOperand::createReg(AArch64::Z0 + getReg() - Base)); } + void addPNRasPPRRegOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand( + MCOperand::createReg((getReg() - AArch64::PN0) + AArch64::P0)); + } + void addVectorReg64Operands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); assert( @@ -2697,22 +2708,22 @@ static unsigned matchSVEPredicateVectorRegName(StringRef Name) { static unsigned matchSVEPredicateAsCounterRegName(StringRef Name) { return StringSwitch(Name.lower()) - .Case("pn0", AArch64::P0) - .Case("pn1", AArch64::P1) - .Case("pn2", AArch64::P2) - .Case("pn3", AArch64::P3) - .Case("pn4", AArch64::P4) - .Case("pn5", AArch64::P5) - .Case("pn6", AArch64::P6) - .Case("pn7", AArch64::P7) - .Case("pn8", AArch64::P8) - .Case("pn9", AArch64::P9) - .Case("pn10", AArch64::P10) - .Case("pn11", AArch64::P11) - .Case("pn12", AArch64::P12) - .Case("pn13", AArch64::P13) - .Case("pn14", AArch64::P14) - .Case("pn15", AArch64::P15) + .Case("pn0", AArch64::PN0) + .Case("pn1", AArch64::PN1) + .Case("pn2", AArch64::PN2) + .Case("pn3", AArch64::PN3) + .Case("pn4", AArch64::PN4) + .Case("pn5", AArch64::PN5) + .Case("pn6", AArch64::PN6) + .Case("pn7", AArch64::PN7) + .Case("pn8", AArch64::PN8) + .Case("pn9", AArch64::PN9) + .Case("pn10", AArch64::PN10) + .Case("pn11", AArch64::PN11) + .Case("pn12", AArch64::PN12) + .Case("pn13", AArch64::PN13) + .Case("pn14", AArch64::PN14) + .Case("pn15", AArch64::PN15) .Default(0); } @@ -4525,6 +4536,8 @@ ParseStatus AArch64AsmParser::tryParseZTOperand(OperandVector &Operands) { // Check if register is followed by an index if (parseOptionalToken(AsmToken::LBrac)) { + Operands.push_back( + AArch64Operand::CreateToken("[", getLoc(), getContext())); const MCExpr *ImmVal; if (getParser().parseExpression(ImmVal)) return ParseStatus::NoMatch; @@ -4537,6 +4550,8 @@ ParseStatus AArch64AsmParser::tryParseZTOperand(OperandVector &Operands) { Operands.push_back(AArch64Operand::CreateImm( MCConstantExpr::create(MCE->getValue(), getContext()), StartLoc, getLoc(), getContext())); + Operands.push_back( + AArch64Operand::CreateToken("]", getLoc(), getContext())); } return ParseStatus::Success; diff --git a/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp index e50ac5c92d50cfa259bd0a2da13b899fc4a381ee..df817f62f99fbf7be5dc834656032523218adcd8 100644 --- a/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp +++ b/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp @@ -140,11 +140,14 @@ DecodeMatrixTileListRegisterClass(MCInst &Inst, unsigned RegMask, static DecodeStatus DecodePPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const MCDisassembler *Decoder); +static DecodeStatus DecodePNRRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodePPR_3bRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const MCDisassembler *Decoder); static DecodeStatus -DecodePPR_p8to15RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, +DecodePNR_p8to15RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const MCDisassembler *Decoder); static DecodeStatus DecodePPR2RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, @@ -736,6 +739,18 @@ static DecodeStatus DecodePPRRegisterClass(MCInst &Inst, unsigned RegNo, return Success; } +static DecodeStatus DecodePNRRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Addr, + const MCDisassembler *Decoder) { + if (RegNo > 15) + return Fail; + + unsigned Register = + AArch64MCRegisterClasses[AArch64::PNRRegClassID].getRegister(RegNo); + Inst.addOperand(MCOperand::createReg(Register)); + return Success; +} + static DecodeStatus DecodePPR_3bRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Addr, const MCDisassembler *Decoder) { @@ -747,13 +762,13 @@ static DecodeStatus DecodePPR_3bRegisterClass(MCInst &Inst, unsigned RegNo, } static DecodeStatus -DecodePPR_p8to15RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Addr, +DecodePNR_p8to15RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Addr, const MCDisassembler *Decoder) { if (RegNo > 7) return Fail; // Just reuse the PPR decode table - return DecodePPRRegisterClass(Inst, RegNo + 8, Addr, Decoder); + return DecodePNRRegisterClass(Inst, RegNo + 8, Addr, Decoder); } static DecodeStatus DecodePPR2RegisterClass(MCInst &Inst, unsigned RegNo, diff --git a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp index c56e3373d3a7fc7fe799a53e4716ef8becb1a884..4cf7e0bd7be4650bca4e224f56ac985adb474b98 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp @@ -532,8 +532,8 @@ bool AArch64CallLowering::fallBackToDAGISel(const MachineFunction &MF) const { } SMEAttrs Attrs(F); - if (Attrs.hasNewZAInterface() || - (!Attrs.hasStreamingInterface() && Attrs.hasStreamingBody())) + if (Attrs.hasZAState() || Attrs.hasStreamingInterfaceOrBody() || + Attrs.hasStreamingCompatibleInterface()) return true; return false; diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp index 2983e9a9be92841e503b89cbed5806f14ded8a7c..016e898bebf932497fbef46cd0e9aeb136625a7a 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp @@ -1335,9 +1335,10 @@ void AArch64InstPrinter::printPredicateAsCounter(const MCInst *MI, const MCSubtargetInfo &STI, raw_ostream &O) { unsigned Reg = MI->getOperand(OpNum).getReg(); + if (Reg < AArch64::PN0 || Reg > AArch64::PN15) + llvm_unreachable("Unsupported predicate-as-counter register"); + O << "pn" << Reg - AArch64::PN0; - assert(Reg <= AArch64::P15 && "Unsupported predicate register"); - O << "pn" << (Reg - AArch64::P0); switch (EltSize) { case 0: break; @@ -1743,10 +1744,11 @@ void AArch64InstPrinter::printVectorIndex(const MCInst *MI, unsigned OpNum, O << "[" << Scale * MI->getOperand(OpNum).getImm() << "]"; } +template void AArch64InstPrinter::printMatrixIndex(const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI, raw_ostream &O) { - O << MI->getOperand(OpNum).getImm(); + O << Scale * MI->getOperand(OpNum).getImm(); } void AArch64InstPrinter::printAlignedLabel(const MCInst *MI, uint64_t Address, diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h index fcaa57402bc2df7760d9d394b5c49e4e66a4ced9..218ddfd918ec7cef8eac27a52173e9f51d809f58 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h @@ -172,6 +172,7 @@ protected: template void printVectorIndex(const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI, raw_ostream &O); + template void printMatrixIndex(const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI, raw_ostream &O); void printAdrAdrpLabel(const MCInst *MI, uint64_t Address, unsigned OpNum, @@ -191,7 +192,6 @@ protected: template void printPredicateAsCounter(const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI, raw_ostream &O); - template void printComplexRotationOp(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp index 2dbbab13e8f3b9a0b537412a178bcb778854e3a0..d8070e21908a3075d791bddd3b3ef35c245c0640 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp @@ -192,6 +192,9 @@ public: uint32_t EncodePPR_p8to15(const MCInst &MI, unsigned OpIdx, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const; + uint32_t EncodePNR_p8to15(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; uint32_t EncodeZPR2StridedRegisterClass(const MCInst &MI, unsigned OpIdx, SmallVectorImpl &Fixups, @@ -551,6 +554,14 @@ AArch64MCCodeEmitter::EncodePPR_p8to15(const MCInst &MI, unsigned OpIdx, return RegOpnd - AArch64::P8; } +uint32_t +AArch64MCCodeEmitter::EncodePNR_p8to15(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + auto RegOpnd = MI.getOperand(OpIdx).getReg(); + return RegOpnd - AArch64::PN8; +} + uint32_t AArch64MCCodeEmitter::EncodeZPR2StridedRegisterClass( const MCInst &MI, unsigned OpIdx, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const { diff --git a/llvm/lib/Target/AArch64/SMEABIPass.cpp b/llvm/lib/Target/AArch64/SMEABIPass.cpp index 83010017c761fdfda385b7a5dbaf8ce4d8439003..8ed33602a96f7b58ee479e32f7b983054dfd9370 100644 --- a/llvm/lib/Target/AArch64/SMEABIPass.cpp +++ b/llvm/lib/Target/AArch64/SMEABIPass.cpp @@ -41,7 +41,8 @@ struct SMEABI : public FunctionPass { bool runOnFunction(Function &F) override; private: - bool updateNewZAFunctions(Module *M, Function *F, IRBuilder<> &Builder); + bool updateNewStateFunctions(Module *M, Function *F, IRBuilder<> &Builder, + SMEAttrs FnAttrs); }; } // end anonymous namespace @@ -60,10 +61,8 @@ FunctionPass *llvm::createSMEABIPass() { return new SMEABI(); } void emitTPIDR2Save(Module *M, IRBuilder<> &Builder) { auto *TPIDR2SaveTy = FunctionType::get(Builder.getVoidTy(), {}, /*IsVarArgs=*/false); - auto Attrs = - AttributeList() - .addFnAttribute(M->getContext(), "aarch64_pstate_sm_compatible") - .addFnAttribute(M->getContext(), "aarch64_pstate_za_preserved"); + auto Attrs = AttributeList().addFnAttribute(M->getContext(), + "aarch64_pstate_sm_compatible"); FunctionCallee Callee = M->getOrInsertFunction("__arm_tpidr2_save", TPIDR2SaveTy, Attrs); CallInst *Call = Builder.CreateCall(Callee); @@ -77,50 +76,80 @@ void emitTPIDR2Save(Module *M, IRBuilder<> &Builder) { Builder.getInt64(0)); } -/// This function generates code to commit a lazy save at the beginning of a -/// function marked with `aarch64_pstate_za_new`. If the value read from -/// TPIDR2_EL0 is not null on entry to the function then the lazy-saving scheme -/// is active and we should call __arm_tpidr2_save to commit the lazy save. -/// Additionally, PSTATE.ZA should be enabled at the beginning of the function -/// and disabled before returning. -bool SMEABI::updateNewZAFunctions(Module *M, Function *F, - IRBuilder<> &Builder) { +/// This function generates code at the beginning and end of a function marked +/// with either `aarch64_new_za` or `aarch64_new_zt0`. +/// At the beginning of the function, the following code is generated: +/// - Commit lazy-save if active [Private-ZA Interface*] +/// - Enable PSTATE.ZA [Private-ZA Interface] +/// - Zero ZA [Has New ZA State] +/// - Zero ZT0 [Has New ZT0 State] +/// +/// * A function with new ZT0 state will not change ZA, so committing the +/// lazy-save is not strictly necessary. However, the lazy-save mechanism +/// may be active on entry to the function, with PSTATE.ZA set to 1. If +/// the new ZT0 function calls a function that does not share ZT0, we will +/// need to conditionally SMSTOP ZA before the call, setting PSTATE.ZA to 0. +/// For this reason, it's easier to always commit the lazy-save at the +/// beginning of the function regardless of whether it has ZA state. +/// +/// At the end of the function, PSTATE.ZA is disabled if the function has a +/// Private-ZA Interface. A function is considered to have a Private-ZA +/// interface if it does not share ZA or ZT0. +/// +bool SMEABI::updateNewStateFunctions(Module *M, Function *F, + IRBuilder<> &Builder, SMEAttrs FnAttrs) { LLVMContext &Context = F->getContext(); BasicBlock *OrigBB = &F->getEntryBlock(); - - // Create the new blocks for reading TPIDR2_EL0 & enabling ZA state. - auto *SaveBB = OrigBB->splitBasicBlock(OrigBB->begin(), "save.za", true); - auto *PreludeBB = BasicBlock::Create(Context, "prelude", F, SaveBB); - - // Read TPIDR2_EL0 in PreludeBB & branch to SaveBB if not 0. - Builder.SetInsertPoint(PreludeBB); - Function *TPIDR2Intr = - Intrinsic::getDeclaration(M, Intrinsic::aarch64_sme_get_tpidr2); - auto *TPIDR2 = Builder.CreateCall(TPIDR2Intr->getFunctionType(), TPIDR2Intr, - {}, "tpidr2"); - auto *Cmp = - Builder.CreateCmp(ICmpInst::ICMP_NE, TPIDR2, Builder.getInt64(0), "cmp"); - Builder.CreateCondBr(Cmp, SaveBB, OrigBB); - - // Create a call __arm_tpidr2_save, which commits the lazy save. - Builder.SetInsertPoint(&SaveBB->back()); - emitTPIDR2Save(M, Builder); - - // Enable pstate.za at the start of the function. Builder.SetInsertPoint(&OrigBB->front()); - Function *EnableZAIntr = - Intrinsic::getDeclaration(M, Intrinsic::aarch64_sme_za_enable); - Builder.CreateCall(EnableZAIntr->getFunctionType(), EnableZAIntr); - - // Before returning, disable pstate.za - for (BasicBlock &BB : *F) { - Instruction *T = BB.getTerminator(); - if (!T || !isa(T)) - continue; - Builder.SetInsertPoint(T); - Function *DisableZAIntr = - Intrinsic::getDeclaration(M, Intrinsic::aarch64_sme_za_disable); - Builder.CreateCall(DisableZAIntr->getFunctionType(), DisableZAIntr); + + // Commit any active lazy-saves if this is a Private-ZA function. If the + // value read from TPIDR2_EL0 is not null on entry to the function then + // the lazy-saving scheme is active and we should call __arm_tpidr2_save + // to commit the lazy save. + if (FnAttrs.hasPrivateZAInterface()) { + // Create the new blocks for reading TPIDR2_EL0 & enabling ZA state. + auto *SaveBB = OrigBB->splitBasicBlock(OrigBB->begin(), "save.za", true); + auto *PreludeBB = BasicBlock::Create(Context, "prelude", F, SaveBB); + + // Read TPIDR2_EL0 in PreludeBB & branch to SaveBB if not 0. + Builder.SetInsertPoint(PreludeBB); + Function *TPIDR2Intr = + Intrinsic::getDeclaration(M, Intrinsic::aarch64_sme_get_tpidr2); + auto *TPIDR2 = Builder.CreateCall(TPIDR2Intr->getFunctionType(), TPIDR2Intr, + {}, "tpidr2"); + auto *Cmp = Builder.CreateCmp(ICmpInst::ICMP_NE, TPIDR2, + Builder.getInt64(0), "cmp"); + Builder.CreateCondBr(Cmp, SaveBB, OrigBB); + + // Create a call __arm_tpidr2_save, which commits the lazy save. + Builder.SetInsertPoint(&SaveBB->back()); + emitTPIDR2Save(M, Builder); + + // Enable pstate.za at the start of the function. + Builder.SetInsertPoint(&OrigBB->front()); + Function *EnableZAIntr = + Intrinsic::getDeclaration(M, Intrinsic::aarch64_sme_za_enable); + Builder.CreateCall(EnableZAIntr->getFunctionType(), EnableZAIntr); + } + + if (FnAttrs.isNewZA()) { + Function *ZeroIntr = + Intrinsic::getDeclaration(M, Intrinsic::aarch64_sme_zero); + Builder.CreateCall(ZeroIntr->getFunctionType(), ZeroIntr, + Builder.getInt32(0xff)); + } + + if (FnAttrs.hasPrivateZAInterface()) { + // Before returning, disable pstate.za + for (BasicBlock &BB : *F) { + Instruction *T = BB.getTerminator(); + if (!T || !isa(T)) + continue; + Builder.SetInsertPoint(T); + Function *DisableZAIntr = + Intrinsic::getDeclaration(M, Intrinsic::aarch64_sme_za_disable); + Builder.CreateCall(DisableZAIntr->getFunctionType(), DisableZAIntr); + } } F->addFnAttr("aarch64_expanded_pstate_za"); @@ -137,8 +166,8 @@ bool SMEABI::runOnFunction(Function &F) { bool Changed = false; SMEAttrs FnAttrs(F); - if (FnAttrs.hasNewZAInterface()) - Changed |= updateNewZAFunctions(M, &F, Builder); + if (FnAttrs.isNewZA()) + Changed |= updateNewStateFunctions(M, &F, Builder, FnAttrs); return Changed; } diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td index 6e3aadd5dd8cc7be1aebee00bd45b0e33da4ebd1..835e74fdbc6414ad8425dc296bb16c7eb6fb7d5d 100644 --- a/llvm/lib/Target/AArch64/SMEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td @@ -33,6 +33,12 @@ def tileslicerange0s4 : ComplexPattern", []>; def am_sme_indexed_b4 :ComplexPattern", [], [SDNPWantRoot]>; +def SDTZALoadStore : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisInt<2>]>; +def AArch64SMELdr : SDNode<"AArch64ISD::SME_ZA_LDR", SDTZALoadStore, + [SDNPHasChain, SDNPSideEffect, SDNPMayLoad]>; +def AArch64SMEStr : SDNode<"AArch64ISD::SME_ZA_STR", SDTZALoadStore, + [SDNPHasChain, SDNPSideEffect, SDNPMayStore]>; + //===----------------------------------------------------------------------===// // SME Pseudo Classes //===----------------------------------------------------------------------===// @@ -190,11 +196,48 @@ class SME_ZA_Tile_TwoPred_TwoVec_Pat(name # _PSEUDO) $tile, $Pn, $Pm, $Zn, $Zm)>; + +//===----------------------------------------------------------------------===// +// SME smstart/smstop +//===----------------------------------------------------------------------===// + +// SME defines three pstate fields to set or clear PSTATE.SM, PSTATE.ZA, or +// both fields: +// +// MSR SVCRSM, # +// MSR SVCRZA, # +// MSR SVCRSMZA, # +// +// It's tricky to using the existing pstate operand defined in +// AArch64SystemOperands.td since it only encodes 5 bits including op1;op2, +// when these fields are also encoded in CRm[3:1]. +def MSRpstatesvcrImm1 + : PstateWriteSimple<(ins svcr_op:$pstatefield, timm0_1:$imm), "msr", + "\t$pstatefield, $imm">, + Sched<[WriteSys]> { + bits<3> pstatefield; + bit imm; + let Inst{18-16} = 0b011; // op1 + let Inst{11-9} = pstatefield; + let Inst{8} = imm; + let Inst{7-5} = 0b011; // op2 + let hasPostISelHook = 1; +} + +def : InstAlias<"smstart", (MSRpstatesvcrImm1 0b011, 0b1)>; +def : InstAlias<"smstart sm", (MSRpstatesvcrImm1 0b001, 0b1)>; +def : InstAlias<"smstart za", (MSRpstatesvcrImm1 0b010, 0b1)>; + +def : InstAlias<"smstop", (MSRpstatesvcrImm1 0b011, 0b0)>; +def : InstAlias<"smstop sm", (MSRpstatesvcrImm1 0b001, 0b0)>; +def : InstAlias<"smstop za", (MSRpstatesvcrImm1 0b010, 0b0)>; + + //===----------------------------------------------------------------------===// // SME Outer Products //===----------------------------------------------------------------------===// -class sme_fp_outer_product_inst sz, bit op, MatrixTileOperand za_ty, +class sme_fp_outer_product_inst sz, bits<2> op, MatrixTileOperand za_ty, ZPRRegOp zpr_ty, string mnemonic> : I<(outs za_ty:$ZAda), (ins za_ty:$_ZAda, PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn, zpr_ty:$Zm), @@ -206,7 +249,7 @@ class sme_fp_outer_product_inst sz, bit op, MatrixTileOperand za_ bits<3> Pn; bits<5> Zn; let Inst{31-25} = 0b1000000; - let Inst{24} = op; + let Inst{24} = op{1}; let Inst{23} = 0b1; let Inst{22-21} = sz; let Inst{20-16} = Zm; @@ -214,25 +257,25 @@ class sme_fp_outer_product_inst sz, bit op, MatrixTileOperand za_ let Inst{12-10} = Pn; let Inst{9-5} = Zn; let Inst{4} = S; - let Inst{3} = op; + let Inst{3} = op{0}; let Constraints = "$ZAda = $_ZAda"; } -multiclass sme_outer_product_fp32 { - def NAME : sme_fp_outer_product_inst, SMEPseudo2Instr { +multiclass sme_outer_product_fp32 sz, ZPRRegOp zpr_ty, string mnemonic, SDPatternOperator op> { + def NAME : sme_fp_outer_product_inst, SMEPseudo2Instr { bits<2> ZAda; let Inst{1-0} = ZAda; let Inst{2} = 0b0; } - def NAME # _PSEUDO : sme_outer_product_pseudo, SMEPseudo2Instr; + def NAME # _PSEUDO : sme_outer_product_pseudo, SMEPseudo2Instr; def : SME_ZA_Tile_TwoPred_TwoVec_Pat; } multiclass sme_outer_product_fp64 { - def NAME : sme_fp_outer_product_inst, SMEPseudo2Instr { + def NAME : sme_fp_outer_product_inst, SMEPseudo2Instr { bits<3> ZAda; let Inst{2-0} = ZAda; } @@ -242,8 +285,8 @@ multiclass sme_outer_product_fp64 def : SME_ZA_Tile_TwoPred_TwoVec_Pat; } -multiclass sme2p1_fmop_tile_fp16{ - def NAME : sme_fp_outer_product_inst { +multiclass sme2p1_fmop_tile_fp16 op, ZPRRegOp zpr_ty>{ + def NAME : sme_fp_outer_product_inst { bits<1> ZAda; let Inst{2-1} = 0b00; let Inst{0} = ZAda; @@ -743,23 +786,23 @@ class sme_spill_inst : sme_spill_fill_base<0b1, (outs), (ins MatrixOp:$ZAt, MatrixIndexGPR32Op12_15:$Rv, sme_elm_idx0_15:$imm4, GPR64sp:$Rn, - imm0_15:$offset), + imm32_0_15:$offset), opcodestr>; let mayLoad = 1 in class sme_fill_inst : sme_spill_fill_base<0b0, (outs MatrixOp:$ZAt), (ins MatrixIndexGPR32Op12_15:$Rv, sme_elm_idx0_15:$imm4, GPR64sp:$Rn, - imm0_15:$offset), + imm32_0_15:$offset), opcodestr>; multiclass sme_spill { def NAME : sme_spill_inst; def : InstAlias(NAME) MatrixOp:$ZAt, MatrixIndexGPR32Op12_15:$Rv, sme_elm_idx0_15:$imm4, GPR64sp:$Rn, 0), 1>; - // base - def : Pat<(int_aarch64_sme_str MatrixIndexGPR32Op12_15:$idx, GPR64sp:$base), - (!cast(NAME) ZA, $idx, 0, $base, 0)>; + + def : Pat<(AArch64SMEStr (i32 MatrixIndexGPR32Op12_15:$slice), (i64 GPR64sp:$base), (i32 sme_elm_idx0_15:$imm)), + (!cast(NAME) ZA, MatrixIndexGPR32Op12_15:$slice, sme_elm_idx0_15:$imm, GPR64sp:$base, imm32_0_15:$imm)>; } multiclass sme_fill { @@ -769,16 +812,15 @@ multiclass sme_fill { MatrixIndexGPR32Op12_15:$Rv, sme_elm_idx0_15:$imm4, GPR64sp:$Rn, 0), 1>; def NAME # _PSEUDO : Pseudo<(outs), - (ins MatrixIndexGPR32Op12_15:$idx, imm0_15:$imm4, + (ins MatrixIndexGPR32Op12_15:$idx, sme_elm_idx0_15:$imm4, GPR64sp:$base), []>, Sched<[]> { // Translated to actual instruction in AArch64ISelLowering.cpp let usesCustomInserter = 1; let mayLoad = 1; } - // base - def : Pat<(int_aarch64_sme_ldr MatrixIndexGPR32Op12_15:$idx, GPR64sp:$base), - (!cast(NAME # _PSEUDO) $idx, 0, $base)>; + def : Pat<(AArch64SMELdr MatrixIndexGPR32Op12_15:$slice, GPR64sp:$base, sme_elm_idx0_15:$imm), + (!cast(NAME # _PSEUDO) MatrixIndexGPR32Op12_15:$slice, sme_elm_idx0_15:$imm, GPR64sp:$base)>; } //===----------------------------------------------------------------------===// @@ -1297,17 +1339,17 @@ multiclass sve2_int_perm_sel_p { } def : InstAlias(NAME # _B) PNRAny:$Pd, - PNRAny:$Pn, PPR8:$Pm, MatrixIndexGPR32Op12_15:$Rv, sme_elm_idx0_15:$imm), 0>; + (!cast(NAME # _B) PNRasPPRAny:$Pd, + PNRasPPRAny:$Pn, PPR8:$Pm, MatrixIndexGPR32Op12_15:$Rv, sme_elm_idx0_15:$imm), 0>; def : InstAlias(NAME # _H) PNRAny:$Pd, - PNRAny:$Pn, PPR16:$Pm, MatrixIndexGPR32Op12_15:$Rv, sme_elm_idx0_7:$imm), 0>; + (!cast(NAME # _H) PNRasPPRAny:$Pd, + PNRasPPRAny:$Pn, PPR16:$Pm, MatrixIndexGPR32Op12_15:$Rv, sme_elm_idx0_7:$imm), 0>; def : InstAlias(NAME # _S) PNRAny:$Pd, - PNRAny:$Pn, PPR32:$Pm, MatrixIndexGPR32Op12_15:$Rv, sme_elm_idx0_3:$imm), 0>; + (!cast(NAME # _S) PNRasPPRAny:$Pd, + PNRasPPRAny:$Pn, PPR32:$Pm, MatrixIndexGPR32Op12_15:$Rv, sme_elm_idx0_3:$imm), 0>; def : InstAlias(NAME # _D) PNRAny:$Pd, - PNRAny:$Pn, PPR64:$Pm, MatrixIndexGPR32Op12_15:$Rv, sme_elm_idx0_1:$imm), 0>; + (!cast(NAME # _D) PNRasPPRAny:$Pd, + PNRasPPRAny:$Pn, PPR64:$Pm, MatrixIndexGPR32Op12_15:$Rv, sme_elm_idx0_1:$imm), 0>; def : Pat<(nxv16i1 (op (nxv16i1 PPRAny:$Pn), (nxv16i1 PPR8:$Pm), MatrixIndexGPR32Op12_15:$idx)), @@ -1413,7 +1455,7 @@ multiclass sme2_dot_mla_add_sub_array_vg4_single op, //===----------------------------------------------------------------------===// // SME2 multiple vectors ternary INT/FP two and four registers -class sme2_dot_mla_add_sub_array_vg2_multi op, +class sme2_dot_mla_add_sub_array_vg2_multi op, MatrixOperand matrix_ty, RegisterOperand multi_vector_ty, string mnemonic> @@ -1427,20 +1469,19 @@ class sme2_dot_mla_add_sub_array_vg2_multi op, bits<2> Rv; bits<3> imm3; let Inst{31-23} = 0b110000011; - let Inst{22} = op{5}; //sz + let Inst{22} = op{6}; //sz let Inst{21} = 0b1; let Inst{20-17} = Zm; let Inst{16-15} = 0b00; let Inst{14-13} = Rv; - let Inst{12-10} = op{4-2}; + let Inst{12-10} = op{5-3}; let Inst{9-6} = Zn; - let Inst{5} = 0b0; - let Inst{4-3} = op{1-0}; + let Inst{5-3} = op{2-0}; let Inst{2-0} = imm3; let Constraints = "$ZAd = $_ZAd"; } -multiclass sme2_dot_mla_add_sub_array_vg2_multi op, +multiclass sme2_dot_mla_add_sub_array_vg2_multi op, MatrixOperand matrix_ty, RegisterOperand multi_vector_ty, ValueType zpr_ty, SDPatternOperator intrinsic> { @@ -1454,7 +1495,7 @@ multiclass sme2_dot_mla_add_sub_array_vg2_multi op, (!cast(NAME) matrix_ty:$ZAd, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, multi_vector_ty:$Zn, multi_vector_ty:$Zm), 0>; } -class sme2_dot_mla_add_sub_array_vg4_multi op, +class sme2_dot_mla_add_sub_array_vg4_multi op, MatrixOperand matrix_ty, RegisterOperand multi_vector_ty, string mnemonic> @@ -1468,20 +1509,20 @@ class sme2_dot_mla_add_sub_array_vg4_multi op, bits<2> Rv; bits<3> imm3; let Inst{31-23} = 0b110000011; - let Inst{22} = op{5}; //sz + let Inst{22} = op{6}; //sz let Inst{21} = 0b1; let Inst{20-18} = Zm; let Inst{17-15} = 0b010; let Inst{14-13} = Rv; - let Inst{12-10} = op{4-2}; + let Inst{12-10} = op{5-3}; let Inst{9-7} = Zn; - let Inst{6-5} = 0b00; - let Inst{4-3} = op{1-0}; + let Inst{6} = 0b0; + let Inst{5-3} = op{2-0}; let Inst{2-0} = imm3; let Constraints = "$ZAd = $_ZAd"; } -multiclass sme2_dot_mla_add_sub_array_vg4_multi op, +multiclass sme2_dot_mla_add_sub_array_vg4_multi op, MatrixOperand matrix_ty, RegisterOperand multi_vector_ty, ValueType zpr_ty, SDPatternOperator intrinsic>{ @@ -1758,8 +1799,8 @@ class sme2_mla_long_array_index_base op0, bits<2> op, Operand index_ty, } multiclass sme2_mla_long_array_index op0, bits<2> op, ValueType zpr_ty, SDPatternOperator intrinsic> { - def _S : sme2_mla_long_array_index_base, SMEPseudo2Instr { + def _HtoS : sme2_mla_long_array_index_base, SMEPseudo2Instr { bits<3> i3; bits<5> Zn; bits<3> imm; @@ -1769,9 +1810,9 @@ multiclass sme2_mla_long_array_index op0, bits<2> op, V let Inst{2-0} = imm; } - def _S_PSEUDO : sme2_za_array_2op_multi_index_pseudo; + def _HtoS_PSEUDO : sme2_za_array_2op_multi_index_pseudo; - def : SME2_ZA_TwoOp_Multi_Index_Pat; + def : SME2_ZA_TwoOp_Multi_Index_Pat; } class sme2_mla_long_array_vg2_index op0, bits<2> op> @@ -1789,14 +1830,14 @@ class sme2_mla_long_array_vg2_index op0, bits<2> op> } multiclass sme2_fp_mla_long_array_vg2_index op, ValueType zpr_ty, SDPatternOperator intrinsic> { - def _S : sme2_mla_long_array_vg2_index, SMEPseudo2Instr; + def _HtoS : sme2_mla_long_array_vg2_index, SMEPseudo2Instr; - def _S_PSEUDO : sme2_za_array_2op_multi_index_pseudo; + def _HtoS_PSEUDO : sme2_za_array_2op_multi_index_pseudo; - def : SME2_ZA_TwoOp_VG2_Multi_Index_Pat; + def : SME2_ZA_TwoOp_VG2_Multi_Index_Pat; def : InstAlias(NAME #_S) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZ_h_mul_r:$Zn, ZPR4b16:$Zm, VectorIndexH32b_timm:$i3), 0>; + (!cast(NAME #_HtoS) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZ_h_mul_r:$Zn, ZPR4b16:$Zm, VectorIndexH32b_timm:$i3), 0>; } multiclass sme2_int_mla_long_array_vg2_index op, SDPatternOperator intrinsic> { @@ -1825,33 +1866,35 @@ class sme2_mla_long_array_vg4_index op0, bits<2> op> } multiclass sme2_fp_mla_long_array_vg4_index op, ValueType zpr_ty, SDPatternOperator intrinsic> { - def _S : sme2_mla_long_array_vg4_index, SMEPseudo2Instr; + def _HtoS : sme2_mla_long_array_vg4_index, SMEPseudo2Instr; - def _S_PSEUDO : sme2_za_array_2op_multi_index_pseudo; + def _HtoS_PSEUDO : sme2_za_array_2op_multi_index_pseudo; - def : SME2_ZA_TwoOp_VG4_Multi_Index_Pat; + def : SME2_ZA_TwoOp_VG4_Multi_Index_Pat; def : InstAlias(NAME #_S) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZZZ_h_mul_r:$Zn, ZPR4b16:$Zm, VectorIndexH32b_timm:$i3), 0>; + (!cast(NAME #_HtoS) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZZZ_h_mul_r:$Zn, ZPR4b16:$Zm, VectorIndexH32b_timm:$i3), 0>; } multiclass sme2_int_mla_long_array_vg4_index op, SDPatternOperator intrinsic> { - def _S : sme2_mla_long_array_vg4_index, SMEPseudo2Instr; + def _HtoS : sme2_mla_long_array_vg4_index, SMEPseudo2Instr; - def _S_PSEUDO : sme2_za_array_2op_multi_index_pseudo; + def _HtoS_PSEUDO : sme2_za_array_2op_multi_index_pseudo; - def : SME2_ZA_TwoOp_VG4_Multi_Index_Pat; + def : SME2_ZA_TwoOp_VG4_Multi_Index_Pat; def : InstAlias(NAME #_S) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZZZ_h_mul_r:$Zn, ZPR4b16:$Zm, VectorIndexH32b_timm:$i3), 0>; + (!cast(NAME #_HtoS) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZZZ_h_mul_r:$Zn, ZPR4b16:$Zm, VectorIndexH32b_timm:$i3), 0>; } -class sme2_mla_long_arrayop0, bits<2> op, Operand index_ty, +class sme2_mla_long_arrayop0, bits<2> op, + MatrixOperand matrix_ty, + Operand index_ty, RegisterOperand first_vector_ty, RegisterOperand second_vector_ty, string mnemonic, string vg_acronym=""> - : I<(outs MatrixOp32:$ZAda), - (ins MatrixOp32:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, + : I<(outs matrix_ty:$ZAda), + (ins matrix_ty:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, index_ty:$imm, first_vector_ty:$Zn, second_vector_ty:$Zm), mnemonic,"\t$ZAda[$Rv, $imm" # !if(!eq(vg_acronym, ""), "", ", " # vg_acronym) # "], $Zn, $Zm", "", []> , Sched<[]> { @@ -1869,8 +1912,8 @@ class sme2_mla_long_arrayop0, bits<2> op, Operand index_ty, } multiclass sme2_mla_long_array_single op0, bits<2> op, ValueType zpr_ty, SDPatternOperator intrinsic> { - def _S : sme2_mla_long_array , SMEPseudo2Instr{ + def _HtoS : sme2_mla_long_array , SMEPseudo2Instr{ bits<4> Zm; bits<5> Zn; bits<3> imm; @@ -1880,15 +1923,15 @@ multiclass sme2_mla_long_array_single op0, bits<2> op, let Inst{2-0} = imm; } - def _S_PSEUDO : sme2_za_array_2op_multi_single_pseudo; + def _HtoS_PSEUDO : sme2_za_array_2op_multi_single_pseudo; - def : SME2_ZA_TwoOp_Multi_Single_Pat; + def : SME2_ZA_TwoOp_Multi_Single_Pat; } -class sme2_mla_long_array_vg24_single op0, bit vg4, bits<2> op, - RegisterOperand first_vector_ty, - string mnemonic, string vg_acronym> - : sme2_mla_long_array op0, bit vg4, bits<2> op, bit o2, + MatrixOperand matrix_ty, RegisterOperand multi_vector_ty, + ZPRRegOp zpr_ty, string mnemonic, string vg_acronym> + : sme2_mla_long_array { bits<4> Zm; bits<5> Zn; @@ -1896,96 +1939,117 @@ class sme2_mla_long_array_vg24_single op0, bit vg4, bits<2> op, let Inst{20} = vg4; let Inst{19-16} = Zm; let Inst{9-5} = Zn; - let Inst{2} = 0b0; + let Inst{2} = o2; let Inst{1-0} = imm; } -multiclass sme2_fp_mla_long_array_vg2_single op, ValueType zpr_ty, SDPatternOperator intrinsic> { - def _S : sme2_mla_long_array_vg24_single<0b00, 0b0, op, ZZ_h, mnemonic, - "vgx2">, SMEPseudo2Instr; + +multiclass sme2_fp_mla_long_array_vg2_single op, MatrixOperand matrix_ty, + RegisterOperand multi_vector_ty, ZPRRegOp vector_ty, + ValueType zpr_ty, SDPatternOperator intrinsic> { + def NAME : sme2_mla_long_array_vg24_single<0b00, 0b0, op{2-1}, op{0}, matrix_ty, multi_vector_ty, + vector_ty, mnemonic, "vgx2">, SMEPseudo2Instr; - def _S_PSEUDO : sme2_za_array_2op_multi_single_pseudo; + def _PSEUDO : sme2_za_array_2op_multi_single_pseudo; - def : SME2_ZA_TwoOp_VG2_Multi_Single_Pat; + def : SME2_ZA_TwoOp_VG2_Multi_Single_Pat; def : InstAlias(NAME #_S) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZ_h:$Zn, ZPR4b16:$Zm), 0>; + (!cast(NAME) matrix_ty:$ZAda, MatrixIndexGPR32Op8_11:$Rv, + uimm2s2range:$imm, multi_vector_ty:$Zn, vector_ty:$Zm), 0>; } multiclass sme2_int_mla_long_array_vg2_single op, SDPatternOperator intrinsic> { - def _S : sme2_mla_long_array_vg24_single<0b01, 0b0, op, ZZ_h, mnemonic, - "vgx2">, SMEPseudo2Instr; + def _HtoS : sme2_mla_long_array_vg24_single<0b01, 0b0, op, 0b0, MatrixOp32, ZZ_h, ZPR4b16, mnemonic, + "vgx2">, SMEPseudo2Instr; - def _S_PSEUDO : sme2_za_array_2op_multi_single_pseudo; + def _HtoS_PSEUDO : sme2_za_array_2op_multi_single_pseudo; - def : SME2_ZA_TwoOp_VG2_Multi_Single_Pat; + def : SME2_ZA_TwoOp_VG2_Multi_Single_Pat; def : InstAlias(NAME #_S) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZ_h:$Zn, ZPR4b16:$Zm), 0>; + (!cast(NAME #_HtoS) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZ_h:$Zn, ZPR4b16:$Zm), 0>; } -multiclass sme2_fp_mla_long_array_vg4_single op, ValueType zpr_ty, SDPatternOperator intrinsic> { - def _S : sme2_mla_long_array_vg24_single<0b00, 0b1, op, ZZZZ_h, mnemonic, - "vgx4">, SMEPseudo2Instr; +multiclass sme2_fp_mla_long_array_vg4_single op, MatrixOperand matrix_ty, + RegisterOperand multi_vector_ty, ZPRRegOp vector_ty, + ValueType zpr_ty, SDPatternOperator intrinsic> { + def NAME : sme2_mla_long_array_vg24_single<0b00, 0b1, op{2-1}, op{0}, matrix_ty, multi_vector_ty, + vector_ty, mnemonic, "vgx4">, SMEPseudo2Instr; - def _S_PSEUDO : sme2_za_array_2op_multi_single_pseudo; + def _PSEUDO : sme2_za_array_2op_multi_single_pseudo; - def : SME2_ZA_TwoOp_VG4_Multi_Single_Pat; + def : SME2_ZA_TwoOp_VG4_Multi_Single_Pat; def : InstAlias(NAME #_S) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZZZ_h:$Zn, ZPR4b16:$Zm), 0>; + (!cast(NAME) matrix_ty:$ZAda, MatrixIndexGPR32Op8_11:$Rv, + uimm2s2range:$imm, multi_vector_ty:$Zn, vector_ty:$Zm), 0>; } multiclass sme2_int_mla_long_array_vg4_single op, SDPatternOperator intrinsic> { - def _S : sme2_mla_long_array_vg24_single<0b01, 0b1, op, ZZZZ_h, mnemonic, - "vgx4">, SMEPseudo2Instr; + def _HtoS : sme2_mla_long_array_vg24_single<0b01, 0b1, op, 0b0, MatrixOp32, ZZZZ_h, ZPR4b16, mnemonic, + "vgx4">, SMEPseudo2Instr; - def _S_PSEUDO : sme2_za_array_2op_multi_single_pseudo; + def _HtoS_PSEUDO : sme2_za_array_2op_multi_single_pseudo; - def : SME2_ZA_TwoOp_VG4_Multi_Single_Pat; + def : SME2_ZA_TwoOp_VG4_Multi_Single_Pat; def : InstAlias(NAME #_S) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZZZ_h:$Zn, ZPR4b16:$Zm), 0>; + (!cast(NAME #_HtoS) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZZZ_h:$Zn, ZPR4b16:$Zm), 0>; } -class sme2_mla_long_array_vg2_multi op0, bits<2> op> - : sme2_mla_long_array { + +class sme2_mla_long_array_vg2_multi op0, bits<3> op, + MatrixOperand matrix_ty, RegisterOperand multi_vector_ty> + : sme2_mla_long_array { bits<4> Zm; bits<4> Zn; bits<2> imm; let Inst{20-17} = Zm; let Inst{16} = 0b0; let Inst{9-6} = Zn; - let Inst{5} = 0b0; + let Inst{5} = op{2}; // fp8 let Inst{2} = 0b0; let Inst{1-0} = imm; } -multiclass sme2_fp_mla_long_array_vg2_multi op, ValueType zpr_ty, SDPatternOperator intrinsic> { - def _S : sme2_mla_long_array_vg2_multi, SMEPseudo2Instr; +multiclass sme2_fp_mla_long_array_vg2_multi op, MatrixOperand matrix_ty, + RegisterOperand multi_vector_ty, + ValueType zpr_ty, SDPatternOperator intrinsic> { + + def NAME : sme2_mla_long_array_vg2_multi, + SMEPseudo2Instr; - def _S_PSEUDO : sme2_za_array_2op_multi_multi_pseudo; + def _PSEUDO : sme2_za_array_2op_multi_multi_pseudo; - def : SME2_ZA_TwoOp_VG2_Multi_Multi_Pat; + def : SME2_ZA_TwoOp_VG2_Multi_Multi_Pat; def : InstAlias(NAME #_S) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZ_h_mul_r:$Zn, ZZ_h_mul_r:$Zm), 0>; + (!cast(NAME) matrix_ty:$ZAda, MatrixIndexGPR32Op8_11:$Rv, + uimm2s2range:$imm, multi_vector_ty:$Zn, multi_vector_ty:$Zm), 0>; } multiclass sme2_int_mla_long_array_vg2_multi op, SDPatternOperator intrinsic> { - def _S : sme2_mla_long_array_vg2_multi, SMEPseudo2Instr; + def _HtoS : sme2_mla_long_array_vg2_multi, + SMEPseudo2Instr; - def _S_PSEUDO : sme2_za_array_2op_multi_multi_pseudo; + def _HtoS_PSEUDO : sme2_za_array_2op_multi_multi_pseudo; - def : SME2_ZA_TwoOp_VG2_Multi_Multi_Pat; + def : SME2_ZA_TwoOp_VG2_Multi_Multi_Pat; def : InstAlias(NAME #_S) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm2, ZZ_h_mul_r:$Zn, ZZ_h_mul_r:$Zm), 0>; + (!cast(NAME #_HtoS) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm2, ZZ_h_mul_r:$Zn, ZZ_h_mul_r:$Zm), 0>; } -class sme2_mla_long_array_vg4_multi op0, bits<2> op> - : sme2_mla_long_array { +class sme2_mla_long_array_vg4_multi op0, bits<3> op, + MatrixOperand matrix_ty, + RegisterOperand multi_vector_ty> + : sme2_mla_long_array { bits<3> Zm; bits<3> Zn; bits<2> imm; @@ -1993,31 +2057,37 @@ class sme2_mla_long_array_vg4_multi op0, bits<2> op> let Inst{17} = 0b0; let Inst{16} = 0b1; let Inst{9-7} = Zn; - let Inst{6-5} = 0b00; + let Inst{6} = 0b0; + let Inst{5} = op{2}; //fp8 let Inst{2} = 0b0; let Inst{1-0} = imm; } -multiclass sme2_fp_mla_long_array_vg4_multi op, ValueType zpr_ty, SDPatternOperator intrinsic> { - def _S : sme2_mla_long_array_vg4_multi, SMEPseudo2Instr; +multiclass sme2_fp_mla_long_array_vg4_multi op, MatrixOperand matrix_ty, + RegisterOperand multi_vector_ty, ValueType zpr_ty, + SDPatternOperator intrinsic> { + def NAME : sme2_mla_long_array_vg4_multi, + SMEPseudo2Instr; - def _S_PSEUDO : sme2_za_array_2op_multi_multi_pseudo; + def _PSEUDO : sme2_za_array_2op_multi_multi_pseudo; - def : SME2_ZA_TwoOp_VG4_Multi_Multi_Pat; + def : SME2_ZA_TwoOp_VG4_Multi_Multi_Pat; def : InstAlias(NAME #_S) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZZZ_h_mul_r:$Zn, ZZZZ_h_mul_r:$Zm), 0>; + (!cast(NAME) matrix_ty:$ZAda, MatrixIndexGPR32Op8_11:$Rv, + uimm2s2range:$imm, multi_vector_ty:$Zn, multi_vector_ty:$Zm), 0>; } multiclass sme2_int_mla_long_array_vg4_multi op, SDPatternOperator intrinsic> { - def _S : sme2_mla_long_array_vg4_multi, SMEPseudo2Instr; + def _HtoS : sme2_mla_long_array_vg4_multi, + SMEPseudo2Instr; - def _S_PSEUDO : sme2_za_array_2op_multi_multi_pseudo; + def _HtoS_PSEUDO : sme2_za_array_2op_multi_multi_pseudo; - def : SME2_ZA_TwoOp_VG4_Multi_Multi_Pat; + def : SME2_ZA_TwoOp_VG4_Multi_Multi_Pat; def : InstAlias(NAME #_S) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm2, ZZZZ_h_mul_r:$Zn, ZZZZ_h_mul_r:$Zm), 0>; + (!cast(NAME #_HtoS) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm2, ZZZZ_h_mul_r:$Zn, ZZZZ_h_mul_r:$Zm), 0>; } //===----------------------------------------------------------------------===// @@ -2308,7 +2378,7 @@ multiclass sme2_zip_vector_vg2 { //===----------------------------------------------------------------------===// // SME2 Dot Products and MLA -class sme2_multi_vec_array_vg2_index op, MatrixOperand matrix_ty, +class sme2_multi_vec_array_vg2_index sz, bits<6> op, MatrixOperand matrix_ty, RegisterOperand multi_vector_ty, ZPRRegOp vector_ty, Operand index_ty, string mnemonic> @@ -2321,8 +2391,8 @@ class sme2_multi_vec_array_vg2_index op, MatrixOperand matrix_ty bits<2> Rv; bits<4> Zn; bits<3> imm3; - let Inst{31-23} = 0b110000010; - let Inst{22} = sz; + let Inst{31-24} = 0b11000001; + let Inst{23-22} = sz; let Inst{21-20} = 0b01; let Inst{19-16} = Zm; let Inst{15} = 0b0; @@ -2336,11 +2406,11 @@ class sme2_multi_vec_array_vg2_index op, MatrixOperand matrix_ty } // SME2 multi-vec ternary indexed two registers 32-bit -multiclass sme2_multi_vec_array_vg2_index_32b op, +multiclass sme2_multi_vec_array_vg2_index_32b sz, bits<4> op, RegisterOperand multi_vector_ty, ZPRRegOp vector_ty, ValueType vt, SDPatternOperator intrinsic> { - def NAME : sme2_multi_vec_array_vg2_index<0b1, {op{3},?,?,op{2-0}}, MatrixOp32, multi_vector_ty, vector_ty, + def NAME : sme2_multi_vec_array_vg2_index, SMEPseudo2Instr { bits<2> i; let Inst{11-10} = i; @@ -2356,9 +2426,10 @@ multiclass sme2_multi_vec_array_vg2_index_32b op, } // SME2.1 multi-vec ternary indexed two registers 16-bit -multiclass sme2p1_multi_vec_array_vg2_index_16b op> { - def NAME : sme2_multi_vec_array_vg2_index<0b0, {0b1,?,?,op,?}, MatrixOp16, - ZZ_h_mul_r, ZPR4b16, +multiclass sme2p1_multi_vec_array_vg2_index_16b sz, bits<3> op, + RegisterOperand multi_vector_ty, ZPRRegOp zpr_ty> { + def NAME : sme2_multi_vec_array_vg2_index { bits<3> i; let Inst{11-10} = i{2-1}; @@ -2366,7 +2437,7 @@ multiclass sme2p1_multi_vec_array_vg2_index_16b op> { } def : InstAlias(NAME) MatrixOp16:$ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, - ZZ_h_mul_r:$Zn, ZPR4b16:$Zm, VectorIndexH:$i), 0>; + multi_vector_ty:$Zn, zpr_ty:$Zm, VectorIndexH:$i), 0>; } // SME2 multi-vec ternary indexed two registers 64-bit @@ -2415,7 +2486,7 @@ multiclass sme2_multi_vec_array_vg2_index_64b op, multi_vector_ty:$Zn, vector_ty:$Zm, VectorIndexD32b_timm:$i1), 0>; } -class sme2_multi_vec_array_vg4_index op, MatrixOperand matrix_ty, +class sme2_multi_vec_array_vg4_index op, MatrixOperand matrix_ty, RegisterOperand multi_vector_ty, ZPRRegOp vector_ty, Operand index_ty, string mnemonic> @@ -2434,10 +2505,9 @@ class sme2_multi_vec_array_vg4_index op, MatrixOperand matrix_ty let Inst{19-16} = Zm; let Inst{15} = 0b1; let Inst{14-13} = Rv; - let Inst{12-10} = op{5-3}; + let Inst{12-10} = op{6-4}; let Inst{9-7} = Zn; - let Inst{6} = 0b0; - let Inst{5-3} = op{2-0}; + let Inst{6-3} = op{3-0}; let Inst{2-0} = imm3; let Constraints = "$ZAda = $_ZAda"; @@ -2448,7 +2518,7 @@ multiclass sme2_multi_vec_array_vg4_index_32b op, RegisterOperand multi_vector_ty, ZPRRegOp vector_ty, ValueType vt, SDPatternOperator intrinsic> { - def NAME : sme2_multi_vec_array_vg4_index<0b1, {op{3},?,?,op{2-0}}, MatrixOp32, multi_vector_ty, + def NAME : sme2_multi_vec_array_vg4_index<0b1, {op{3},?,?,0b0, op{2-0}}, MatrixOp32, multi_vector_ty, vector_ty, VectorIndexS32b_timm, mnemonic>, SMEPseudo2Instr { bits<2> i; let Inst{11-10} = i; @@ -2464,9 +2534,11 @@ multiclass sme2_multi_vec_array_vg4_index_32b op, } // SME2.1 multi-vec ternary indexed four registers 16-bit -multiclass sme2p1_multi_vec_array_vg4_index_16b op> { +multiclass sme2p1_multi_vec_array_vg4_index_16b op, + RegisterOperand multi_vector_ty, + ZPRRegOp zpr_ty> { def NAME : sme2_multi_vec_array_vg4_index<0b0,{0b1,?,?,op,?}, MatrixOp16, - ZZZZ_h_mul_r, ZPR4b16, + multi_vector_ty, zpr_ty, VectorIndexH, mnemonic>{ bits<3> i; let Inst{11-10} = i{2-1}; @@ -2475,7 +2547,7 @@ multiclass sme2p1_multi_vec_array_vg4_index_16b op> { def : InstAlias(NAME) MatrixOp16:$ZAda, MatrixIndexGPR32Op8_11:$Rv, - sme_elm_idx0_7:$imm3, ZZZZ_h_mul_r:$Zn, ZPR4b16:$Zm, VectorIndexH:$i), 0>; + sme_elm_idx0_7:$imm3, multi_vector_ty:$Zn, zpr_ty:$Zm, VectorIndexH:$i), 0>; } // SME2 multi-vec ternary indexed four registers 64-bit @@ -2525,7 +2597,7 @@ multiclass sme2_multi_vec_array_vg4_index_64b op, } //===----------------------------------------------------------------------===// // SME2 multi-vec indexed long long MLA one source 32-bit -class sme2_mla_ll_array_index_32b op> +class sme2_mla_ll_array_index_32b sz, bits<3> op> : I<(outs MatrixOp32:$ZAda), (ins MatrixOp32:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s4range:$imm2, ZPR8:$Zn, ZPR4b8:$Zm, VectorIndexB32b_timm:$i), mnemonic, "\t$ZAda[$Rv, $imm2], $Zn, $Zm$i", @@ -2535,7 +2607,9 @@ class sme2_mla_ll_array_index_32b op> bits<4> i; bits<5> Zn; bits<2> imm2; - let Inst{31-20} = 0b110000010000; + let Inst{31-24} = 0b11000001; + let Inst{23-22} = sz; + let Inst{21-20} = 0b00; let Inst{19-16} = Zm; let Inst{15} = i{3}; let Inst{14-13} = Rv; @@ -2547,8 +2621,8 @@ class sme2_mla_ll_array_index_32b op> let Constraints = "$ZAda = $_ZAda"; } -multiclass sme2_mla_ll_array_index_32b op, SDPatternOperator intrinsic> { - def NAME : sme2_mla_ll_array_index_32b, SMEPseudo2Instr; +multiclass sme2_mla_ll_array_index_32b sz, bits<3> op, SDPatternOperator intrinsic> { + def NAME : sme2_mla_ll_array_index_32b, SMEPseudo2Instr; def _PSEUDO : sme2_za_array_2op_multi_index_pseudo; @@ -2589,7 +2663,7 @@ multiclass sme2_mla_ll_array_index_64b op, SDPatternOpe def : SME2_ZA_TwoOp_Multi_Index_Pat; } -class sme2_mla_ll_array_vg24_index_32b op, +class sme2_mla_ll_array_vg24_index_32b sz, bit vg4, bits<3> op, RegisterOperand vector_ty, string mnemonic> : I<(outs MatrixOp32:$ZAda), @@ -2601,7 +2675,9 @@ class sme2_mla_ll_array_vg24_index_32b op, bits<2> Rv; bits<4> i; bit imm; - let Inst{31-20} = 0b110000010001; + let Inst{31-24} = 0b11000001; + let Inst{23-22} = sz; + let Inst{21-20} = 0b01; let Inst{19-16} = Zm; let Inst{15} = vg4; let Inst{14-13} = Rv; @@ -2616,8 +2692,8 @@ class sme2_mla_ll_array_vg24_index_32b op, //SME2 multi-vec indexed long long MLA two sources 32-bit -multiclass sme2_mla_ll_array_vg2_index_32b op, SDPatternOperator intrinsic> { - def NAME: sme2_mla_ll_array_vg24_index_32b<0b0, op, ZZ_b_mul_r, mnemonic>, SMEPseudo2Instr { +multiclass sme2_mla_ll_array_vg2_index_32b sz, bits<3> op, SDPatternOperator intrinsic> { + def NAME: sme2_mla_ll_array_vg24_index_32b, SMEPseudo2Instr { bits<4> Zn; let Inst{9-6} = Zn; } @@ -2632,11 +2708,11 @@ multiclass sme2_mla_ll_array_vg2_index_32b op, SDPatter // SME2 multi-vec indexed long long MLA four sources 32-bit -multiclass sme2_mla_ll_array_vg4_index_32b op, SDPatternOperator intrinsic> { - def NAME: sme2_mla_ll_array_vg24_index_32b<0b1, op, ZZZZ_b_mul_r, mnemonic>, SMEPseudo2Instr { +multiclass sme2_mla_ll_array_vg4_index_32b sz, bits<4> op, SDPatternOperator intrinsic> { + def NAME: sme2_mla_ll_array_vg24_index_32b, SMEPseudo2Instr { bits<3> Zn; let Inst{9-7} = Zn; - let Inst{6} = 0b0; + let Inst{6} = op{3}; } def _PSEUDO : sme2_za_array_2op_multi_index_pseudo; @@ -2708,7 +2784,7 @@ multiclass sme2_mla_ll_array_vg4_index_64b op, SDPatter //SME2 multiple and single vector long long FMA one source -class sme2_mla_ll_array_single op, +class sme2_mla_ll_array_single op, MatrixOperand matrix_ty, ZPRRegOp vector_ty, ZPRRegOp zpr_ty> : I<(outs matrix_ty:$ZAda), @@ -2721,8 +2797,9 @@ class sme2_mla_ll_array_single op, bits<5> Zn; bits<2> imm; let Inst{31-23} = 0b110000010; - let Inst{22} = op{3}; //sz - let Inst{21-20} = 0b10; + let Inst{22} = op{4}; //sz + let Inst{21} = 0b1; + let Inst{20} = op{3}; //fp8 let Inst{19-16} = Zm; let Inst{15} = 0b0; let Inst{14-13} = Rv; @@ -2734,7 +2811,7 @@ class sme2_mla_ll_array_single op, let Constraints = "$ZAda = $_ZAda"; } -multiclass sme2_mla_ll_array_single op, +multiclass sme2_mla_ll_array_single op, MatrixOperand matrix_ty, ZPRRegOp vector_ty, ZPRRegOp zpr_ty, ValueType vt, SDPatternOperator intrinsic> { def NAME : sme2_mla_ll_array_single, SMEPseudo2Instr; @@ -2744,29 +2821,28 @@ multiclass sme2_mla_ll_array_single op, def : SME2_ZA_TwoOp_Multi_Single_Pat; } -class sme2_mla_ll_array_vg24_single op, MatrixOperand matrix_ty, +class sme2_mla_ll_array_vg24_single op, MatrixOperand matrix_ty, RegisterOperand vector_ty, ZPRRegOp zpr_ty, string mnemonic> : I<(outs matrix_ty:$ZAda), (ins matrix_ty:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm, vector_ty:$Zn, zpr_ty:$Zm), - mnemonic, "\t$ZAda[$Rv, $imm, " # !if(op{3}, "vgx4", "vgx2") # "], $Zn, $Zm", + mnemonic, "\t$ZAda[$Rv, $imm, " # !if(op{4}, "vgx4", "vgx2") # "], $Zn, $Zm", "", []>, Sched<[]> { bits<4> Zm; bits<2> Rv; bits<5> Zn; bit imm; let Inst{31-23} = 0b110000010; - let Inst{22} = op{4}; //sz + let Inst{22} = op{5}; //sz let Inst{21} = 0b1; - let Inst{20} = op{3}; //vg4 + let Inst{20} = op{4}; //vg4 let Inst{19-16} = Zm; let Inst{15} = 0b0; let Inst{14-13} = Rv; let Inst{12-10} = 0b000; let Inst{9-5} = Zn; - let Inst{4-2} = op{2-0}; - let Inst{1} = 0b0; + let Inst{4-1} = op{3-0}; let Inst{0} = imm; let Constraints = "$ZAda = $_ZAda"; @@ -2774,7 +2850,7 @@ class sme2_mla_ll_array_vg24_single op, MatrixOperand matrix_ty, //SME2 single-multi long long MLA two and four sources -multiclass sme2_mla_ll_array_vg24_single op, +multiclass sme2_mla_ll_array_vg24_single op, MatrixOperand matrix_ty, RegisterOperand multi_vector_ty, ZPRRegOp zpr_ty> { @@ -2792,7 +2868,7 @@ multiclass sme2_mla_ll_array_vg2_single op, RegisterOperand multi_vector_ty, ZPRRegOp zpr_ty, ValueType vt, SDPatternOperator intrinsic> { - defm NAME: sme2_mla_ll_array_vg24_single; + defm NAME: sme2_mla_ll_array_vg24_single; def : SME2_ZA_TwoOp_VG2_Multi_Single_Pat; } @@ -2801,14 +2877,14 @@ multiclass sme2_mla_ll_array_vg4_single op, MatrixOperand matrix_ty, RegisterOperand multi_vector_ty, ZPRRegOp zpr_ty, ValueType vt, SDPatternOperator intrinsic> { - defm NAME: sme2_mla_ll_array_vg24_single; + defm NAME: sme2_mla_ll_array_vg24_single; def : SME2_ZA_TwoOp_VG4_Multi_Single_Pat; } // SME2 multiple vectors long long MLA two sources -class sme2_mla_ll_array_vg2_multi op, MatrixOperand matrix_ty, +class sme2_mla_ll_array_vg2_multi op, MatrixOperand matrix_ty, RegisterOperand vector_ty,string mnemonic> : I<(outs matrix_ty:$ZAda), (ins matrix_ty:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm, @@ -2820,22 +2896,21 @@ class sme2_mla_ll_array_vg2_multi op, MatrixOperand matrix_ty, bits<4> Zn; bit imm; let Inst{31-23} = 0b110000011; - let Inst{22} = op{3}; // sz + let Inst{22} = op{4}; // sz let Inst{21} = 0b1; let Inst{20-17} = Zm; let Inst{16-15} = 0b00; let Inst{14-13} = Rv; let Inst{12-10} = 0b000; let Inst{9-6} = Zn; - let Inst{5} = 0b0; - let Inst{4-2} = op{2-0}; + let Inst{5-2} = op{3-0}; let Inst{1} = 0b0; let Inst{0} = imm; let Constraints = "$ZAda = $_ZAda"; } -multiclass sme2_mla_ll_array_vg2_multi op, +multiclass sme2_mla_ll_array_vg2_multi op, MatrixOperand matrix_ty, RegisterOperand vector_ty, ValueType vt, SDPatternOperator intrinsic> { @@ -2851,7 +2926,7 @@ multiclass sme2_mla_ll_array_vg2_multi op, // SME2 multiple vectors long long MLA four sources -class sme2_mla_ll_array_vg4_multi op,MatrixOperand matrix_ty, +class sme2_mla_ll_array_vg4_multi op,MatrixOperand matrix_ty, RegisterOperand vector_ty, string mnemonic> : I<(outs matrix_ty:$ZAda), @@ -2864,22 +2939,22 @@ class sme2_mla_ll_array_vg4_multi op,MatrixOperand matrix_ty, bits<3> Zn; bit imm; let Inst{31-23} = 0b110000011; - let Inst{22} = op{3}; // sz + let Inst{22} = op{4}; // sz let Inst{21} = 0b1; let Inst{20-18} = Zm; let Inst{17-15} = 0b010; let Inst{14-13} = Rv; let Inst{12-10} = 0b000; let Inst{9-7} = Zn; - let Inst{6-5} = 0b00; - let Inst{4-2} = op{2-0}; + let Inst{6} = 0b0; + let Inst{5-2} = op{3-0}; let Inst{1} = 0b0; let Inst{0} = imm; let Constraints = "$ZAda = $_ZAda"; } -multiclass sme2_mla_ll_array_vg4_multi op, +multiclass sme2_mla_ll_array_vg4_multi op, MatrixOperand matrix_ty, RegisterOperand vector_ty, ValueType vt, SDPatternOperator intrinsic> { @@ -2949,7 +3024,7 @@ class sme2_spill_fill_vector opc> // SME2 move to/from lookup table class sme2_movt_zt_to_scalar opc> : I<(outs GPR64:$Rt), (ins ZTR:$ZTt, uimm3s8:$imm3), - mnemonic, "\t$Rt, $ZTt$imm3", + mnemonic, "\t$Rt, $ZTt[$imm3]", "", []>, Sched<[]> { bits<3> imm3; bits<5> Rt; @@ -2961,7 +3036,7 @@ class sme2_movt_zt_to_scalar opc> class sme2_movt_scalar_to_zt opc> : I<(outs ZTR:$ZTt), (ins uimm3s8:$imm3, GPR64:$Rt), - mnemonic, "\t$ZTt$imm3, $Rt", + mnemonic, "\t$ZTt[$imm3], $Rt", "", []>, Sched<[]> { bits<3> imm3; bits<5> Rt; diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td index 4902ec3639ec54494b32e1ad08d3f322f3cf3adc..05115aa252a15a02db2d4bf433256868dc6a448a 100644 --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -365,6 +365,7 @@ class sve_int_ptrue sz8_64, bits<3> opc, string asm, PPRRegOp pprty, let ElementSize = pprty.ElementSize; let hasSideEffects = 0; let isReMaterializable = 1; + let Uses = [VG]; } multiclass sve_int_ptrue opc, string asm, SDPatternOperator op> { @@ -755,6 +756,7 @@ class sve_int_pfalse opc, string asm> let hasSideEffects = 0; let isReMaterializable = 1; + let Uses = [VG]; } multiclass sve_int_pfalse opc, string asm> { @@ -1090,6 +1092,7 @@ class sve_int_count opc, string asm> let hasSideEffects = 0; let isReMaterializable = 1; + let Uses = [VG]; } multiclass sve_int_count opc, string asm, SDPatternOperator op> { @@ -1982,6 +1985,7 @@ class sve_int_dup_mask_imm let DecoderMethod = "DecodeSVELogicalImmInstruction"; let hasSideEffects = 0; let isReMaterializable = 1; + let Uses = [VG]; } multiclass sve_int_dup_mask_imm { @@ -2819,6 +2823,7 @@ class sve_int_arith_vl let Inst{4-0} = Rd; let hasSideEffects = 0; + let Uses = [VG]; } class sve_int_read_vl_a opc2, string asm, bit streaming_sve = 0b0> @@ -2839,6 +2844,7 @@ class sve_int_read_vl_a opc2, string asm, bit streaming_sve = 0b let hasSideEffects = 0; let isReMaterializable = 1; + let Uses = [VG]; } //===----------------------------------------------------------------------===// @@ -4656,6 +4662,7 @@ class sve_int_dup_imm sz8_64, string asm, let hasSideEffects = 0; let isReMaterializable = 1; + let Uses = [VG]; } multiclass sve_int_dup_imm { @@ -4698,6 +4705,7 @@ class sve_int_dup_fpimm sz8_64, Operand fpimmtype, let hasSideEffects = 0; let isReMaterializable = 1; + let Uses = [VG]; } multiclass sve_int_dup_fpimm { @@ -5614,6 +5622,7 @@ class sve_int_index_ii sz8_64, string asm, ZPRRegOp zprty, let hasSideEffects = 0; let isReMaterializable = 1; + let Uses = [VG]; } multiclass sve_int_index_ii { @@ -8721,8 +8730,8 @@ multiclass sve2_crypto_unary_op { // SVE BFloat16 Group //===----------------------------------------------------------------------===// -class sve_float_dot -: I<(outs ZPR32:$Zda), (ins ZPR32:$_Zda, ZPR16:$Zn, ZPR16:$Zm), +class sve_float_dot +: I<(outs dst_ty:$Zda), (ins dst_ty:$_Zda, src_ty:$Zn, src_ty:$Zm), asm, "\t$Zda, $Zn, $Zm", "", []>, Sched<[]> { bits<5> Zda; bits<5> Zn; @@ -8731,7 +8740,8 @@ class sve_float_dot let Inst{22} = bf; let Inst{21} = 0b1; let Inst{20-16} = Zm; - let Inst{15-10} = 0b100000; + let Inst{15-11} = 0b10000; + let Inst{10} = o2; let Inst{9-5} = Zn; let Inst{4-0} = Zda; @@ -8741,24 +8751,24 @@ class sve_float_dot let mayRaiseFPException = 1; } -multiclass sve_float_dot { - def NAME : sve_float_dot; +multiclass sve_float_dot { + def NAME : sve_float_dot; def : SVE_3_Op_Pat(NAME)>; } -class sve_float_dot_indexed -: I<(outs ZPR32:$Zda), (ins ZPR32:$_Zda, ZPR16:$Zn, ZPR3b16:$Zm, VectorIndexS32b:$iop), +class sve_float_dot_indexed +: I<(outs dst_ty:$Zda), (ins dst_ty:$_Zda, src1_ty:$Zn, src2_ty:$Zm, iop_ty:$iop), asm, "\t$Zda, $Zn, $Zm$iop", "", []>, Sched<[]> { bits<5> Zda; bits<5> Zn; bits<3> Zm; - bits<2> iop; let Inst{31-23} = 0b011001000; let Inst{22} = bf; let Inst{21} = 0b1; - let Inst{20-19} = iop; let Inst{18-16} = Zm; - let Inst{15-10} = 0b010000; + let Inst{15-12} = 0b0100; let Inst{9-5} = Zn; let Inst{4-0} = Zda; @@ -8768,8 +8778,14 @@ class sve_float_dot_indexed let mayRaiseFPException = 1; } -multiclass sve_float_dot_indexed { - def NAME : sve_float_dot_indexed; +multiclass sve_float_dot_indexed opc, ZPRRegOp src1_ty, + ZPRRegOp src2_ty, string asm, ValueType InVT, + SDPatternOperator op> { + def NAME : sve_float_dot_indexed { + bits<2> iop; + let Inst{20-19} = iop; + let Inst{11-10} = opc; + } def : SVE_4_Op_Imm_Pat(NAME)>; } @@ -9242,6 +9258,7 @@ class sve2p1_ptrue_pn sz, PNRP8to15RegOp pnrty, SDPatte let Inst{2-0} = PNd; let hasSideEffects = 0; + let Uses = [VG]; } diff --git a/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp b/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp index 0edb7cb986405bb274fd13f0f91984aec39b9e48..cd72944bcdb2a39471955692d74d74c3bea56638 100644 --- a/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp +++ b/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp @@ -20,16 +20,31 @@ void SMEAttrs::set(unsigned M, bool Enable) { assert(!(hasStreamingInterface() && hasStreamingCompatibleInterface()) && "SM_Enabled and SM_Compatible are mutually exclusive"); - assert(!(hasNewZAInterface() && hasSharedZAInterface()) && - "ZA_New and ZA_Shared are mutually exclusive"); - assert(!(hasNewZAInterface() && preservesZA()) && - "ZA_New and ZA_Preserved are mutually exclusive"); + + // ZA Attrs + assert(!(isNewZA() && (Bitmask & SME_ABI_Routine)) && + "ZA_New and SME_ABI_Routine are mutually exclusive"); + + assert( + (!sharesZA() || + (isNewZA() ^ isInZA() ^ isInOutZA() ^ isOutZA() ^ isPreservesZA())) && + "Attributes 'aarch64_new_za', 'aarch64_in_za', 'aarch64_out_za', " + "'aarch64_inout_za' and 'aarch64_preserves_za' are mutually exclusive"); } SMEAttrs::SMEAttrs(const CallBase &CB) { *this = SMEAttrs(CB.getAttributes()); - if (auto *F = CB.getCalledFunction()) - set(SMEAttrs(*F).Bitmask); + if (auto *F = CB.getCalledFunction()) { + set(SMEAttrs(*F).Bitmask | SMEAttrs(F->getName()).Bitmask); + } +} + +SMEAttrs::SMEAttrs(StringRef FuncName) : Bitmask(0) { + if (FuncName == "__arm_tpidr2_save" || FuncName == "__arm_sme_state") + Bitmask |= (SMEAttrs::SM_Compatible | SMEAttrs::SME_ABI_Routine); + if (FuncName == "__arm_tpidr2_restore") + Bitmask |= SMEAttrs::SM_Compatible | encodeZAState(StateValue::In) | + SMEAttrs::SME_ABI_Routine; } SMEAttrs::SMEAttrs(const AttributeList &Attrs) { @@ -40,35 +55,29 @@ SMEAttrs::SMEAttrs(const AttributeList &Attrs) { Bitmask |= SM_Compatible; if (Attrs.hasFnAttr("aarch64_pstate_sm_body")) Bitmask |= SM_Body; - if (Attrs.hasFnAttr("aarch64_pstate_za_shared")) - Bitmask |= ZA_Shared; - if (Attrs.hasFnAttr("aarch64_pstate_za_new")) - Bitmask |= ZA_New; - if (Attrs.hasFnAttr("aarch64_pstate_za_preserved")) - Bitmask |= ZA_Preserved; + if (Attrs.hasFnAttr("aarch64_in_za")) + Bitmask |= encodeZAState(StateValue::In); + if (Attrs.hasFnAttr("aarch64_out_za")) + Bitmask |= encodeZAState(StateValue::Out); + if (Attrs.hasFnAttr("aarch64_inout_za")) + Bitmask |= encodeZAState(StateValue::InOut); + if (Attrs.hasFnAttr("aarch64_preserves_za")) + Bitmask |= encodeZAState(StateValue::Preserved); + if (Attrs.hasFnAttr("aarch64_new_za")) + Bitmask |= encodeZAState(StateValue::New); } -std::optional -SMEAttrs::requiresSMChange(const SMEAttrs &Callee, - bool BodyOverridesInterface) const { - // If the transition is not through a call (e.g. when considering inlining) - // and Callee has a streaming body, then we can ignore the interface of - // Callee. - if (BodyOverridesInterface && Callee.hasStreamingBody()) { - return hasStreamingInterfaceOrBody() ? std::nullopt - : std::optional(true); - } - +bool SMEAttrs::requiresSMChange(const SMEAttrs &Callee) const { if (Callee.hasStreamingCompatibleInterface()) - return std::nullopt; + return false; // Both non-streaming if (hasNonStreamingInterfaceAndBody() && Callee.hasNonStreamingInterface()) - return std::nullopt; + return false; // Both streaming if (hasStreamingInterfaceOrBody() && Callee.hasStreamingInterface()) - return std::nullopt; + return false; - return Callee.hasStreamingInterface(); + return true; } diff --git a/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h b/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h index 1146fd4e3fa8e12c28ee456e2270fc6611d3b026..b57e2176b020739765dd79a553db8ceee0e689bd 100644 --- a/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h +++ b/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h @@ -26,22 +26,31 @@ class SMEAttrs { unsigned Bitmask; public: + enum class StateValue { + None = 0, + In = 1, //aarch64_in_zt0 + Out = 2, //aarch64_out_zt0 + InOut = 3, //aarch64_inout_zt0 + Preserved = 4, //aarch64_preserves_zt0 + New = 5 //aarch64_new_zt0 + }; + // Enum with bitmasks for each individual SME feature. enum Mask { Normal = 0, - SM_Enabled = 1 << 0, // aarch64_pstate_sm_enabled - SM_Compatible = 1 << 1, // aarch64_pstate_sm_compatible - SM_Body = 1 << 2, // aarch64_pstate_sm_body - ZA_Shared = 1 << 3, // aarch64_pstate_sm_shared - ZA_New = 1 << 4, // aarch64_pstate_sm_new - ZA_Preserved = 1 << 5, // aarch64_pstate_sm_preserved - All = ZA_Preserved - 1 + SM_Enabled = 1 << 0, // aarch64_pstate_sm_enabled + SM_Compatible = 1 << 1, // aarch64_pstate_sm_compatible + SM_Body = 1 << 2, // aarch64_pstate_sm_body + SME_ABI_Routine = 1 << 3, // Used for SME ABI routines to avoid lazy saves + ZA_Shift = 4, + ZA_Mask = 0b111 << ZA_Shift, }; SMEAttrs(unsigned Mask = Normal) : Bitmask(0) { set(Mask); } SMEAttrs(const Function &F) : SMEAttrs(F.getAttributes()) {} SMEAttrs(const CallBase &CB); SMEAttrs(const AttributeList &L); + SMEAttrs(StringRef FuncName); void set(unsigned M, bool Enable = true); @@ -63,26 +72,34 @@ public: /// \return true if a call from Caller -> Callee requires a change in /// streaming mode. - /// If \p BodyOverridesInterface is true and Callee has a streaming body, - /// then requiresSMChange considers a call to Callee as having a Streaming - /// interface. This can be useful when considering e.g. inlining, where we - /// explicitly want the body to overrule the interface (because after inlining - /// the interface is no longer relevant). - std::optional - requiresSMChange(const SMEAttrs &Callee, - bool BodyOverridesInterface = false) const; + bool requiresSMChange(const SMEAttrs &Callee) const; - // Interfaces to query PSTATE.ZA - bool hasNewZAInterface() const { return Bitmask & ZA_New; } - bool hasSharedZAInterface() const { return Bitmask & ZA_Shared; } - bool hasPrivateZAInterface() const { return !hasSharedZAInterface(); } - bool preservesZA() const { return Bitmask & ZA_Preserved; } - bool hasZAState() const { - return hasNewZAInterface() || hasSharedZAInterface(); + // Interfaces to query ZA + static StateValue decodeZAState(unsigned Bitmask) { + return static_cast((Bitmask & ZA_Mask) >> ZA_Shift); + } + static unsigned encodeZAState(StateValue S) { + return static_cast(S) << ZA_Shift; } + + bool isNewZA() const { return decodeZAState(Bitmask) == StateValue::New; } + bool isInZA() const { return decodeZAState(Bitmask) == StateValue::In; } + bool isOutZA() const { return decodeZAState(Bitmask) == StateValue::Out; } + bool isInOutZA() const { return decodeZAState(Bitmask) == StateValue::InOut; } + bool isPreservesZA() const { + return decodeZAState(Bitmask) == StateValue::Preserved; + } + bool sharesZA() const { + StateValue State = decodeZAState(Bitmask); + return State == StateValue::In || State == StateValue::Out || + State == StateValue::InOut || State == StateValue::Preserved; + } + bool hasSharedZAInterface() const { return sharesZA(); } + bool hasPrivateZAInterface() const { return !hasSharedZAInterface(); } + bool hasZAState() const { return isNewZA() || sharesZA(); } bool requiresLazySave(const SMEAttrs &Callee) const { return hasZAState() && Callee.hasPrivateZAInterface() && - !Callee.preservesZA(); + !(Callee.Bitmask & SME_ABI_Routine); } }; diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-inline-asm.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-inline-asm.ll index 8cdb0696f7acc5149deb3f638f3bbc071a93e01d..d531c29da7551a96d4f8ceba3c0009fa4fb25848 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-inline-asm.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-inline-asm.ll @@ -26,7 +26,7 @@ define void @asm_simple_register_clobber() { define i64 @asm_register_early_clobber() { ; CHECK-LABEL: name: asm_register_early_clobber ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: INLINEASM &"mov $0, 7; mov $1, 7", 1 /* sideeffect attdialect */, 2555915 /* regdef-ec:GPR64common */, def early-clobber %0, 2555915 /* regdef-ec:GPR64common */, def early-clobber %1, !0 + ; CHECK-NEXT: INLINEASM &"mov $0, 7; mov $1, 7", 1 /* sideeffect attdialect */, 2686987 /* regdef-ec:GPR64common */, def early-clobber %0, 2686987 /* regdef-ec:GPR64common */, def early-clobber %1, !0 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY %0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY %1 ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[COPY]], [[COPY1]] @@ -54,7 +54,7 @@ entry: define i32 @test_single_register_output() nounwind ssp { ; CHECK-LABEL: name: test_single_register_output ; CHECK: bb.1.entry: - ; CHECK-NEXT: INLINEASM &"mov ${0:w}, 7", 0 /* attdialect */, 1507338 /* regdef:GPR32common */, def %0 + ; CHECK-NEXT: INLINEASM &"mov ${0:w}, 7", 0 /* attdialect */, 1638410 /* regdef:GPR32common */, def %0 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %0 ; CHECK-NEXT: $w0 = COPY [[COPY]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 @@ -66,7 +66,7 @@ entry: define i64 @test_single_register_output_s64() nounwind ssp { ; CHECK-LABEL: name: test_single_register_output_s64 ; CHECK: bb.1.entry: - ; CHECK-NEXT: INLINEASM &"mov $0, 7", 0 /* attdialect */, 2555914 /* regdef:GPR64common */, def %0 + ; CHECK-NEXT: INLINEASM &"mov $0, 7", 0 /* attdialect */, 2686986 /* regdef:GPR64common */, def %0 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY %0 ; CHECK-NEXT: $x0 = COPY [[COPY]](s64) ; CHECK-NEXT: RET_ReallyLR implicit $x0 @@ -79,7 +79,7 @@ entry: define float @test_multiple_register_outputs_same() #0 { ; CHECK-LABEL: name: test_multiple_register_outputs_same ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: INLINEASM &"mov $0, #0; mov $1, #0", 0 /* attdialect */, 1507338 /* regdef:GPR32common */, def %0, 1507338 /* regdef:GPR32common */, def %1 + ; CHECK-NEXT: INLINEASM &"mov $0, #0; mov $1, #0", 0 /* attdialect */, 1638410 /* regdef:GPR32common */, def %0, 1638410 /* regdef:GPR32common */, def %1 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %1 ; CHECK-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[COPY]], [[COPY1]] @@ -96,7 +96,7 @@ define float @test_multiple_register_outputs_same() #0 { define double @test_multiple_register_outputs_mixed() #0 { ; CHECK-LABEL: name: test_multiple_register_outputs_mixed ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: INLINEASM &"mov $0, #0; mov $1, #0", 0 /* attdialect */, 1507338 /* regdef:GPR32common */, def %0, 2359306 /* regdef:FPR64 */, def %1 + ; CHECK-NEXT: INLINEASM &"mov $0, #0; mov $1, #0", 0 /* attdialect */, 1638410 /* regdef:GPR32common */, def %0, 2490378 /* regdef:FPR64 */, def %1 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY %1 ; CHECK-NEXT: $d0 = COPY [[COPY1]](s64) @@ -120,13 +120,12 @@ entry: } define zeroext i8 @test_register_output_trunc(ptr %src) nounwind { - ; ; CHECK-LABEL: name: test_register_output_trunc ; CHECK: bb.1.entry: ; CHECK-NEXT: liveins: $x0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK-NEXT: INLINEASM &"mov ${0:w}, 32", 0 /* attdialect */, 1507338 /* regdef:GPR32common */, def %1 + ; CHECK-NEXT: INLINEASM &"mov ${0:w}, 32", 0 /* attdialect */, 1638410 /* regdef:GPR32common */, def %1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %1 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32) ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s8) @@ -156,7 +155,7 @@ define void @test_input_register_imm() { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 42 ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64common = COPY [[C]](s64) - ; CHECK-NEXT: INLINEASM &"mov x0, $0", 1 /* sideeffect attdialect */, 2555913 /* reguse:GPR64common */, [[COPY]] + ; CHECK-NEXT: INLINEASM &"mov x0, $0", 1 /* sideeffect attdialect */, 2686985 /* reguse:GPR64common */, [[COPY]] ; CHECK-NEXT: RET_ReallyLR call void asm sideeffect "mov x0, $0", "r"(i64 42) ret void @@ -191,7 +190,7 @@ define zeroext i8 @test_input_register(ptr %src) nounwind { ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64common = COPY [[COPY]](p0) - ; CHECK-NEXT: INLINEASM &"ldtrb ${0:w}, [$1]", 0 /* attdialect */, 1507338 /* regdef:GPR32common */, def %1, 2555913 /* reguse:GPR64common */, [[COPY1]] + ; CHECK-NEXT: INLINEASM &"ldtrb ${0:w}, [$1]", 0 /* attdialect */, 1638410 /* regdef:GPR32common */, def %1, 2686985 /* reguse:GPR64common */, [[COPY1]] ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY %1 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY2]](s32) ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s8) @@ -208,7 +207,7 @@ define i32 @test_memory_constraint(ptr %a) nounwind { ; CHECK-NEXT: liveins: $x0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK-NEXT: INLINEASM &"ldr $0, $1", 8 /* mayload attdialect */, 1507338 /* regdef:GPR32common */, def %1, 262158 /* mem:m */, [[COPY]](p0) + ; CHECK-NEXT: INLINEASM &"ldr $0, $1", 8 /* mayload attdialect */, 1638410 /* regdef:GPR32common */, def %1, 262158 /* mem:m */, [[COPY]](p0) ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %1 ; CHECK-NEXT: $w0 = COPY [[COPY1]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 @@ -222,7 +221,7 @@ define i16 @test_anyext_input() { ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[C]](s16) ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32common = COPY [[ANYEXT]](s32) - ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 1507338 /* regdef:GPR32common */, def %0, 1507337 /* reguse:GPR32common */, [[COPY]] + ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 1638410 /* regdef:GPR32common */, def %0, 1638409 /* reguse:GPR32common */, [[COPY]] ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %0 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s16) @@ -238,7 +237,7 @@ define i16 @test_anyext_input_with_matching_constraint() { ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[C]](s16) ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32common = COPY [[ANYEXT]](s32) - ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 1507338 /* regdef:GPR32common */, def %0, 2147483657 /* reguse tiedto:$0 */, [[COPY]](tied-def 3) + ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 1638410 /* regdef:GPR32common */, def %0, 2147483657 /* reguse tiedto:$0 */, [[COPY]](tied-def 3) ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %0 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s16) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-unwind-inline-asm.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-unwind-inline-asm.ll index bb7c9e274814ef8acba580c029c91176670fe4f5..c224b0a259fcd18858036a773ef7f3fd0dc3edfd 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-unwind-inline-asm.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-unwind-inline-asm.ll @@ -71,7 +71,7 @@ define void @test2() #0 personality ptr @__gcc_personality_v0 { ; CHECK-NEXT: G_INVOKE_REGION_START ; CHECK-NEXT: EH_LABEL ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64common = COPY [[DEF]](p0) - ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 2555913 /* reguse:GPR64common */, [[COPY]] + ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 2686985 /* reguse:GPR64common */, [[COPY]] ; CHECK-NEXT: EH_LABEL ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-inlineasm.mir b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-inlineasm.mir index 28b56945c7775a980d4c26bea36d415574004257..f75731e351c7826e9a34286dce0dbafc74e998b3 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-inlineasm.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-inlineasm.mir @@ -57,7 +57,7 @@ tracksRegLiveness: true body: | bb.1: ; CHECK-LABEL: name: inlineasm_virt_reg_output - ; CHECK: INLINEASM &"mov ${0:w}, 7", 0 /* attdialect */, 1310730 /* regdef:FPR32 */, def %0 + ; CHECK: INLINEASM &"mov ${0:w}, 7", 0 /* attdialect */, 1310730 /* regdef:PPR2_with_psub_in_PNR_p8to15_and_PPR2_with_psub1_in_PPR_3b */, def %0 ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr(s32) = COPY %0 ; CHECK-NEXT: $w0 = COPY [[COPY]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 @@ -75,7 +75,7 @@ tracksRegLiveness: true body: | bb.1: ; CHECK-LABEL: name: inlineasm_virt_mixed_types - ; CHECK: INLINEASM &"mov $0, #0; mov $1, #0", 0 /* attdialect */, 1310730 /* regdef:FPR32 */, def %0, 2162698 /* regdef:WSeqPairsClass_with_sube32_in_MatrixIndexGPR32_12_15 */, def %1 + ; CHECK: INLINEASM &"mov $0, #0; mov $1, #0", 0 /* attdialect */, 1310730 /* regdef:PPR2_with_psub_in_PNR_p8to15_and_PPR2_with_psub1_in_PPR_3b */, def %0, 2162698 /* regdef:WSeqPairsClass_with_subo32_in_GPR32common */, def %1 ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr(s32) = COPY %0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr(s64) = COPY %1 ; CHECK-NEXT: $d0 = COPY [[COPY1]](s64) diff --git a/llvm/test/CodeGen/AArch64/aarch64-sme2-asm.ll b/llvm/test/CodeGen/AArch64/aarch64-sme2-asm.ll new file mode 100644 index 0000000000000000000000000000000000000000..98e7ad740681e68c11bb1ce1f946c715e9149278 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/aarch64-sme2-asm.ll @@ -0,0 +1,29 @@ +; RUN: llc < %s -mtriple aarch64-none-linux-gnu -mattr=+sme2 -stop-after=finalize-isel | FileCheck %s + +define dso_local void @UphPNR(target("aarch64.svcount") %predcnt) { +entry: +; CHECK: %0:ppr = COPY $p0 +; CHECK: STR_PXI %0, %stack.0.predcnt.addr, 0 :: (store unknown-size into %ir.predcnt.addr, align 2) +; CHECK: %1:pnr_p8to15 = COPY %0 +; CHECK: INLINEASM &"ld1w {z0.s,z1.s,z2.s,z3.s}, $0/z, [x10]", 1 /* sideeffect attdialect */, 393225 /* reguse:PNR_p8to15 */, %1 +; CHECK: RET_ReallyLR + %predcnt.addr = alloca target("aarch64.svcount"), align 2 + store target("aarch64.svcount") %predcnt, ptr %predcnt.addr, align 2 + %0 = load target("aarch64.svcount"), ptr %predcnt.addr, align 2 + call void asm sideeffect "ld1w {z0.s,z1.s,z2.s,z3.s}, $0/z, [x10]", "@3Uph"(target("aarch64.svcount") %0) + ret void +} + +define dso_local void @UpaPNR(target("aarch64.svcount") %predcnt) { +entry: +; CHECK: %0:ppr = COPY $p0 +; CHECK: STR_PXI %0, %stack.0.predcnt.addr, 0 :: (store unknown-size into %ir.predcnt.addr, align 2) +; CHECK: %1:pnr = COPY %0 +; CHECK: INLINEASM &"ld1w {z0.s,z1.s,z2.s,z3.s}, $0/z, [x10]", 1 /* sideeffect attdialect */, 262153 /* reguse:PNR */, %1 +; CHECK: RET_ReallyLR + %predcnt.addr = alloca target("aarch64.svcount"), align 2 + store target("aarch64.svcount") %predcnt, ptr %predcnt.addr, align 2 + %0 = load target("aarch64.svcount"), ptr %predcnt.addr, align 2 + call void asm sideeffect "ld1w {z0.s,z1.s,z2.s,z3.s}, $0/z, [x10]", "@3Upa"(target("aarch64.svcount") %0) + ret void +} \ No newline at end of file diff --git a/llvm/test/CodeGen/AArch64/aarch64-sve-asm.ll b/llvm/test/CodeGen/AArch64/aarch64-sve-asm.ll index ad1093028c1a67181d167cc2769b376479a2c5da..4ca2fb8815797f02275713f742cfe168ccc2b792 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-sve-asm.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-sve-asm.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc < %s -mtriple aarch64-none-linux-gnu -mattr=+sve -stop-after=finalize-isel | FileCheck %s --check-prefix=CHECK target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" @@ -68,3 +69,16 @@ define @test_incp( %Pg, %1 = tail call asm "incp $0.s, $1", "=w,@3Upa,0"( %Pg, %Zn) ret %1 } + +; Function Attrs: nounwind readnone +; CHECK: [[ARG1:%[0-9]+]]:zpr = COPY $z1 +; CHECK: [[ARG2:%[0-9]+]]:zpr = COPY $z0 +; CHECK: [[ARG3:%[0-9]+]]:ppr = COPY $p0 +; CHECK: [[ARG4:%[0-9]+]]:ppr_p8to15 = COPY [[ARG3]] +; CHECK: INLINEASM {{.*}} [[ARG4]] +define @test_svfadd_f16_Uph_constraint( %Pg, %Zn, %Zm) { + %1 = tail call asm "fadd $0.h, $1/m, $2.h, $3.h", "=w,@3Uph,w,w"( %Pg, %Zn, %Zm) + ret %1 +} +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK: {{.*}} diff --git a/llvm/test/CodeGen/AArch64/aarch64-za-clobber.ll b/llvm/test/CodeGen/AArch64/aarch64-za-clobber.ll new file mode 100644 index 0000000000000000000000000000000000000000..a8cba7dc9a91e9aed4284bf556babc2d309525f8 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/aarch64-za-clobber.ll @@ -0,0 +1,16 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-none-linux-gnu -stop-after=aarch64-isel < %s -o - | FileCheck %s + +define void @alpha( %x) local_unnamed_addr { +entry: +; CHECK: INLINEASM &"movt zt0[3, mul vl], z0", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def early-clobber $za + tail call void asm sideeffect "movt zt0[3, mul vl], z0", "~{za}"() + ret void +} + +define void @beta( %x) local_unnamed_addr { +entry: +; CHECK: INLINEASM &"movt zt0[3, mul vl], z0", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def early-clobber $zt0 + tail call void asm sideeffect "movt zt0[3, mul vl], z0", "~{zt0}"() + ret void +} diff --git a/llvm/test/CodeGen/AArch64/callbr-asm-outputs-indirect-isel.ll b/llvm/test/CodeGen/AArch64/callbr-asm-outputs-indirect-isel.ll index cf22216daf516a45a1a66bcee11f10bca4f666a5..ed02fdfc996a4335be07f3ffdef8428a5b0bd912 100644 --- a/llvm/test/CodeGen/AArch64/callbr-asm-outputs-indirect-isel.ll +++ b/llvm/test/CodeGen/AArch64/callbr-asm-outputs-indirect-isel.ll @@ -18,7 +18,7 @@ define i32 @test0() { ; CHECK: bb.0.entry: ; CHECK-NEXT: successors: %bb.2(0x80000000), %bb.1(0x00000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: INLINEASM_BR &"# $0", 0 /* attdialect */, 1507338 /* regdef:GPR32common */, def %5, 13 /* imm */, %bb.1 + ; CHECK-NEXT: INLINEASM_BR &"# $0", 0 /* attdialect */, 1638410 /* regdef:GPR32common */, def %5, 13 /* imm */, %bb.1 ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32all = COPY %5 ; CHECK-NEXT: B %bb.2 ; CHECK-NEXT: {{ $}} @@ -31,7 +31,7 @@ define i32 @test0() { ; CHECK-NEXT: bb.2.direct: ; CHECK-NEXT: successors: %bb.4(0x80000000), %bb.3(0x00000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: INLINEASM_BR &"# $0", 0 /* attdialect */, 1507338 /* regdef:GPR32common */, def %7, 13 /* imm */, %bb.3 + ; CHECK-NEXT: INLINEASM_BR &"# $0", 0 /* attdialect */, 1638410 /* regdef:GPR32common */, def %7, 13 /* imm */, %bb.3 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr32all = COPY %7 ; CHECK-NEXT: B %bb.4 ; CHECK-NEXT: {{ $}} @@ -107,7 +107,7 @@ define i32 @dont_split1() { ; CHECK: bb.0.entry: ; CHECK-NEXT: successors: %bb.1(0x80000000), %bb.2(0x00000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: INLINEASM_BR &"", 0 /* attdialect */, 1507338 /* regdef:GPR32common */, def %1, 13 /* imm */, %bb.2 + ; CHECK-NEXT: INLINEASM_BR &"", 0 /* attdialect */, 1638410 /* regdef:GPR32common */, def %1, 13 /* imm */, %bb.2 ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32all = COPY %1 ; CHECK-NEXT: B %bb.1 ; CHECK-NEXT: {{ $}} @@ -168,7 +168,7 @@ define i32 @dont_split3() { ; CHECK: bb.0.entry: ; CHECK-NEXT: successors: %bb.1(0x80000000), %bb.2(0x00000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: INLINEASM_BR &"", 0 /* attdialect */, 1507338 /* regdef:GPR32common */, def %0, 13 /* imm */, %bb.2 + ; CHECK-NEXT: INLINEASM_BR &"", 0 /* attdialect */, 1638410 /* regdef:GPR32common */, def %0, 13 /* imm */, %bb.2 ; CHECK-NEXT: B %bb.1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1.x: @@ -195,7 +195,7 @@ define i32 @split_me0() { ; CHECK: bb.0.entry: ; CHECK-NEXT: successors: %bb.2(0x80000000), %bb.1(0x00000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: INLINEASM_BR &"", 0 /* attdialect */, 1507338 /* regdef:GPR32common */, def %3, 13 /* imm */, %bb.1 + ; CHECK-NEXT: INLINEASM_BR &"", 0 /* attdialect */, 1638410 /* regdef:GPR32common */, def %3, 13 /* imm */, %bb.1 ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32all = COPY %3 ; CHECK-NEXT: B %bb.2 ; CHECK-NEXT: {{ $}} @@ -245,7 +245,7 @@ define i32 @split_me1(i1 %z) { ; CHECK-NEXT: bb.1.w: ; CHECK-NEXT: successors: %bb.3(0x80000000), %bb.2(0x00000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: INLINEASM_BR &"", 0 /* attdialect */, 1507338 /* regdef:GPR32common */, def %5, 13 /* imm */, %bb.2, 13 /* imm */, %bb.2 + ; CHECK-NEXT: INLINEASM_BR &"", 0 /* attdialect */, 1638410 /* regdef:GPR32common */, def %5, 13 /* imm */, %bb.2, 13 /* imm */, %bb.2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32all = COPY %5 ; CHECK-NEXT: B %bb.3 ; CHECK-NEXT: {{ $}} @@ -298,7 +298,7 @@ define i32 @split_me2(i1 %z) { ; CHECK-NEXT: bb.1.w: ; CHECK-NEXT: successors: %bb.3(0x80000000), %bb.2(0x00000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: INLINEASM_BR &"", 0 /* attdialect */, 1507338 /* regdef:GPR32common */, def %6, 13 /* imm */, %bb.2, 13 /* imm */, %bb.2 + ; CHECK-NEXT: INLINEASM_BR &"", 0 /* attdialect */, 1638410 /* regdef:GPR32common */, def %6, 13 /* imm */, %bb.2, 13 /* imm */, %bb.2 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr32all = COPY %6 ; CHECK-NEXT: B %bb.3 ; CHECK-NEXT: {{ $}} @@ -341,7 +341,7 @@ define i32 @dont_split4() { ; CHECK: bb.0.entry: ; CHECK-NEXT: successors: %bb.1(0x80000000), %bb.2(0x00000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: INLINEASM_BR &"", 0 /* attdialect */, 1507338 /* regdef:GPR32common */, def %3, 13 /* imm */, %bb.2 + ; CHECK-NEXT: INLINEASM_BR &"", 0 /* attdialect */, 1638410 /* regdef:GPR32common */, def %3, 13 /* imm */, %bb.2 ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32all = COPY %3 ; CHECK-NEXT: B %bb.1 ; CHECK-NEXT: {{ $}} @@ -380,7 +380,7 @@ define i32 @dont_split5() { ; CHECK: bb.0.entry: ; CHECK-NEXT: successors: %bb.2(0x80000000), %bb.1(0x00000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: INLINEASM_BR &"", 0 /* attdialect */, 1507338 /* regdef:GPR32common */, def %3, 13 /* imm */, %bb.1 + ; CHECK-NEXT: INLINEASM_BR &"", 0 /* attdialect */, 1638410 /* regdef:GPR32common */, def %3, 13 /* imm */, %bb.1 ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32all = COPY %3 ; CHECK-NEXT: B %bb.2 ; CHECK-NEXT: {{ $}} @@ -411,7 +411,7 @@ define i32 @split_me3() { ; CHECK: bb.0.entry: ; CHECK-NEXT: successors: %bb.2(0x80000000), %bb.1(0x00000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: INLINEASM_BR &"", 0 /* attdialect */, 1507338 /* regdef:GPR32common */, def %3, 13 /* imm */, %bb.1 + ; CHECK-NEXT: INLINEASM_BR &"", 0 /* attdialect */, 1638410 /* regdef:GPR32common */, def %3, 13 /* imm */, %bb.1 ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32all = COPY %3 ; CHECK-NEXT: B %bb.2 ; CHECK-NEXT: {{ $}} @@ -458,7 +458,7 @@ define i32 @dont_split6(i32 %0) { ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[PHI:%[0-9]+]]:gpr32all = PHI [[COPY]], %bb.0, %2, %bb.2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32common = COPY [[PHI]] - ; CHECK-NEXT: INLINEASM_BR &"", 0 /* attdialect */, 1507338 /* regdef:GPR32common */, def %4, 2147483657 /* reguse tiedto:$0 */, [[COPY1]](tied-def 3), 13 /* imm */, %bb.2 + ; CHECK-NEXT: INLINEASM_BR &"", 0 /* attdialect */, 1638410 /* regdef:GPR32common */, def %4, 2147483657 /* reguse tiedto:$0 */, [[COPY1]](tied-def 3), 13 /* imm */, %bb.2 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr32all = COPY %4 ; CHECK-NEXT: B %bb.3 ; CHECK-NEXT: {{ $}} @@ -493,7 +493,7 @@ define i32 @split_me4() { ; CHECK: bb.0.entry: ; CHECK-NEXT: successors: %bb.2(0x80000000), %bb.1(0x00000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: INLINEASM_BR &"", 0 /* attdialect */, 1507338 /* regdef:GPR32common */, def %3, 13 /* imm */, %bb.1 + ; CHECK-NEXT: INLINEASM_BR &"", 0 /* attdialect */, 1638410 /* regdef:GPR32common */, def %3, 13 /* imm */, %bb.1 ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32all = COPY %3 ; CHECK-NEXT: B %bb.2 ; CHECK-NEXT: {{ $}} @@ -524,7 +524,7 @@ define i32 @split_me5() { ; CHECK: bb.0.entry: ; CHECK-NEXT: successors: %bb.2(0x80000000), %bb.1(0x00000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: INLINEASM_BR &"", 0 /* attdialect */, 1507338 /* regdef:GPR32common */, def %3, 13 /* imm */, %bb.1 + ; CHECK-NEXT: INLINEASM_BR &"", 0 /* attdialect */, 1638410 /* regdef:GPR32common */, def %3, 13 /* imm */, %bb.1 ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32all = COPY %3 ; CHECK-NEXT: B %bb.2 ; CHECK-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AArch64/debug-info-sve-dbg-declare.mir b/llvm/test/CodeGen/AArch64/debug-info-sve-dbg-declare.mir index 22cff36afaf7f3e11cf10ae821c1681dd28ef04e..015e8e9ba2b6bb94d97ef66bc7f573c2648df211 100644 --- a/llvm/test/CodeGen/AArch64/debug-info-sve-dbg-declare.mir +++ b/llvm/test/CodeGen/AArch64/debug-info-sve-dbg-declare.mir @@ -193,7 +193,7 @@ body: | liveins: $z0, $z1, $p0, $p1, $w0 renamable $p2 = COPY killed $p0 - renamable $p0 = PTRUE_S 31 + renamable $p0 = PTRUE_S 31, implicit $vg ST1W_IMM killed renamable $z0, renamable $p0, %stack.0.z0.addr, 0 :: (store unknown-size into %ir.z0.addr, align 16) ST1W_IMM killed renamable $z1, renamable $p0, %stack.1.z1.addr, 0 :: (store unknown-size into %ir.z1.addr, align 16) STR_PXI killed renamable $p2, %stack.2.p0.addr, 0 :: (store unknown-size into %ir.p0.addr, align 2) diff --git a/llvm/test/CodeGen/AArch64/debug-info-sve-dbg-value.mir b/llvm/test/CodeGen/AArch64/debug-info-sve-dbg-value.mir index 75917ef32ae2adda4e9f632f2db33b0748247d76..0ea180b20730f2d86120933dc0b580cecb5036f0 100644 --- a/llvm/test/CodeGen/AArch64/debug-info-sve-dbg-value.mir +++ b/llvm/test/CodeGen/AArch64/debug-info-sve-dbg-value.mir @@ -111,7 +111,7 @@ body: | STRXui killed renamable $x1, %stack.1, 0, debug-location !8 DBG_VALUE %stack.1, $noreg, !11, !DIExpression(DW_OP_constu, 16, DW_OP_plus, DW_OP_deref), debug-location !8 - renamable $p2 = PTRUE_S 31, debug-location !DILocation(line: 4, column: 1, scope: !5) + renamable $p2 = PTRUE_S 31, implicit $vg, debug-location !DILocation(line: 4, column: 1, scope: !5) ST1W_IMM renamable $z0, renamable $p2, %stack.2, 0, debug-location !DILocation(line: 5, column: 1, scope: !5) DBG_VALUE %stack.2, $noreg, !12, !DIExpression(DW_OP_deref), debug-location !DILocation(line: 5, column: 1, scope: !5) ST1W_IMM renamable $z1, killed renamable $p2, %stack.3, 0, debug-location !DILocation(line: 6, column: 1, scope: !5) diff --git a/llvm/test/CodeGen/AArch64/emit_fneg_with_non_register_operand.mir b/llvm/test/CodeGen/AArch64/emit_fneg_with_non_register_operand.mir index 6fe094cc6cbb4e8b71dbdf0b5ac5ff9e347bb8ea..8255c7dd6f1e4d9e118e209490726932142e6bc2 100644 --- a/llvm/test/CodeGen/AArch64/emit_fneg_with_non_register_operand.mir +++ b/llvm/test/CodeGen/AArch64/emit_fneg_with_non_register_operand.mir @@ -91,10 +91,10 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[LOADgot:%[0-9]+]]:gpr64common = LOADgot target-flags(aarch64-got) @c ; CHECK-NEXT: [[LDRDui:%[0-9]+]]:fpr64 = LDRDui [[LOADgot]], 0 :: (dereferenceable load (s64) from @c) - ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 2359306 /* regdef:FPR64 */, def %2, 2147483657 /* reguse tiedto:$0 */, [[LDRDui]](tied-def 3) + ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 2359306 /* regdef:WSeqPairsClass_with_sube32_in_MatrixIndexGPR32_8_11 */, def %2, 2147483657 /* reguse tiedto:$0 */, [[LDRDui]](tied-def 3) ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr64 = COPY %2 ; CHECK-NEXT: [[LDRDui1:%[0-9]+]]:fpr64 = LDRDui [[LOADgot]], 0 :: (dereferenceable load (s64) from @c) - ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 2359306 /* regdef:FPR64 */, def %4, 2147483657 /* reguse tiedto:$0 */, [[LDRDui1]](tied-def 3) + ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 2359306 /* regdef:WSeqPairsClass_with_sube32_in_MatrixIndexGPR32_8_11 */, def %4, 2147483657 /* reguse tiedto:$0 */, [[LDRDui1]](tied-def 3) ; CHECK-NEXT: [[FNEGDr:%[0-9]+]]:fpr64 = FNEGDr %2 ; CHECK-NEXT: nofpexcept FCMPDrr %4, killed [[FNEGDr]], implicit-def $nzcv, implicit $fpcr ; CHECK-NEXT: Bcc 1, %bb.2, implicit $nzcv diff --git a/llvm/test/CodeGen/AArch64/framelayout-sve-calleesaves-fix.mir b/llvm/test/CodeGen/AArch64/framelayout-sve-calleesaves-fix.mir index 7d7b3ace8a915cdc5a57230a96eb288ebb008dbb..8725bb7061fabef0e5bfccc1cf0072211f6389f7 100644 --- a/llvm/test/CodeGen/AArch64/framelayout-sve-calleesaves-fix.mir +++ b/llvm/test/CodeGen/AArch64/framelayout-sve-calleesaves-fix.mir @@ -19,8 +19,8 @@ ; CHECK-NEXT: // implicit-def: $p4 ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG - ; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z8, [sp, #1, mul vl] // 16-byte Folded Reload + ; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: .cfi_def_cfa wsp, 16 ; CHECK-NEXT: .cfi_restore z8 diff --git a/llvm/test/CodeGen/AArch64/framelayout-sve.mir b/llvm/test/CodeGen/AArch64/framelayout-sve.mir index 7c87587c6dc4e2c6b2d127b45d43e6d4b47ce49f..eae547fc7033aa5315a9b2fe864e6c23094a59cc 100644 --- a/llvm/test/CodeGen/AArch64/framelayout-sve.mir +++ b/llvm/test/CodeGen/AArch64/framelayout-sve.mir @@ -771,9 +771,9 @@ body: | # CHECK: $sp = frame-destroy ADDXri $sp, 32, 0 # CHECK-NEXT: frame-destroy CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 -# CHECK-NEXT: $z10 = frame-destroy LDR_ZXI $sp, 0 +# CHECK-NEXT: $z10 = frame-destroy LDR_ZXI $sp, 0 # CHECK-NEXT: $z9 = frame-destroy LDR_ZXI $sp, 1 -# CHECK-NEXT: $z8 = frame-destroy LDR_ZXI $sp, 2 +# CHECK-NEXT: $z8 = frame-destroy LDR_ZXI $sp, 2 # CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 3 # CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa $wsp, 16 # CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $z8 @@ -872,14 +872,14 @@ body: | # CHECK-NEXT: frame-destroy CFI_INSTRUCTION escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x98, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 # CHECK: $sp = frame-destroy ADDVL_XXI $sp, 1 # CHECK-NEXT: frame-destroy CFI_INSTRUCTION escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 -# CHECK: $p15 = frame-destroy LDR_PXI $sp, 4 -# CHECK: $p14 = frame-destroy LDR_PXI $sp, 5 -# CHECK: $p5 = frame-destroy LDR_PXI $sp, 14 -# CHECK: $p4 = frame-destroy LDR_PXI $sp, 15 # CHECK: $z23 = frame-destroy LDR_ZXI $sp, 2 # CHECK: $z22 = frame-destroy LDR_ZXI $sp, 3 # CHECK: $z9 = frame-destroy LDR_ZXI $sp, 16 # CHECK: $z8 = frame-destroy LDR_ZXI $sp, 17 +# CHECK: $p15 = frame-destroy LDR_PXI $sp, 4 +# CHECK: $p14 = frame-destroy LDR_PXI $sp, 5 +# CHECK: $p5 = frame-destroy LDR_PXI $sp, 14 +# CHECK: $p4 = frame-destroy LDR_PXI $sp, 15 # CHECK: $sp = frame-destroy ADDVL_XXI $sp, 18 # CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa $wsp, 32 # CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $z8 @@ -1036,14 +1036,14 @@ body: | # CHECK-NEXT: $sp = ANDXri killed $[[TMP]] # CHECK: $sp = frame-destroy ADDVL_XXI $fp, -18 +# CHECK: $z23 = frame-destroy LDR_ZXI $sp, 2 +# CHECK-NEXT: $z22 = frame-destroy LDR_ZXI $sp, 3 +# CHECK: $z9 = frame-destroy LDR_ZXI $sp, 16 +# CHECK-NEXT: $z8 = frame-destroy LDR_ZXI $sp, 17 # CHECK-NEXT: $p15 = frame-destroy LDR_PXI $sp, 4 # CHECK-NEXT: $p14 = frame-destroy LDR_PXI $sp, 5 # CHECK: $p5 = frame-destroy LDR_PXI $sp, 14 # CHECK-NEXT: $p4 = frame-destroy LDR_PXI $sp, 15 -# CHECK-NEXT: $z23 = frame-destroy LDR_ZXI $sp, 2 -# CHECK-NEXT: $z22 = frame-destroy LDR_ZXI $sp, 3 -# CHECK: $z9 = frame-destroy LDR_ZXI $sp, 16 -# CHECK-NEXT: $z8 = frame-destroy LDR_ZXI $sp, 17 # CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $z8 # CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $z9 # CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $z10 @@ -1197,10 +1197,10 @@ body: | # CHECK: $sp = frame-destroy ADDVL_XXI $sp, 7 # CHECK-NEXT: frame-destroy CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 -# CHECK-NEXT: $p15 = frame-destroy LDR_PXI $sp, 6 -# CHECK-NEXT: $p4 = frame-destroy LDR_PXI $sp, 7 # CHECK-NEXT: $z23 = frame-destroy LDR_ZXI $sp, 1 # CHECK-NEXT: $z8 = frame-destroy LDR_ZXI $sp, 2 +# CHECK-NEXT: $p15 = frame-destroy LDR_PXI $sp, 6 +# CHECK-NEXT: $p4 = frame-destroy LDR_PXI $sp, 7 # CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 3 # CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa $wsp, 16 # CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $z8 diff --git a/llvm/test/CodeGen/AArch64/inlineasm-Uc-constraint.ll b/llvm/test/CodeGen/AArch64/inlineasm-Uc-constraint.ll new file mode 100644 index 0000000000000000000000000000000000000000..0bee7ea40cc1aeb460e6854bb63c520e43933362 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/inlineasm-Uc-constraint.ll @@ -0,0 +1,78 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc < %s -o - | FileCheck %s + +target triple = "arm64-none-linux-gnu" + +define void @test_constraints_Uci_w(i32 %a) { +; CHECK-LABEL: test_constraints_Uci_w: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: //APP +; CHECK-NEXT: add x0, x0, x8 +; CHECK-NEXT: //NO_APP +; CHECK-NEXT: ret + call void asm sideeffect "add x0, x0, $0", "@3Uci,~{x0}"(i32 %a) + ret void +} + +; As test_constraints_Uci_w but ensures non-legal types are also covered. +define void @test_constraints_Uci_w_i8(i8 %a) { +; CHECK-LABEL: test_constraints_Uci_w_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: //APP +; CHECK-NEXT: add x0, x0, x8 +; CHECK-NEXT: //NO_APP +; CHECK-NEXT: ret + call void asm sideeffect "add x0, x0, $0", "@3Uci,~{x0}"(i8 %a) + ret void +} + +define void @test_constraints_Uci_x(i64 %a) { +; CHECK-LABEL: test_constraints_Uci_x: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, x0 +; CHECK-NEXT: //APP +; CHECK-NEXT: add x0, x0, x8 +; CHECK-NEXT: //NO_APP +; CHECK-NEXT: ret + call void asm sideeffect "add x0, x0, $0", "@3Uci,~{x0}"(i64 %a) + ret void +} + +define void @test_constraint_Ucj_w(i32 %a) { +; CHECK-LABEL: test_constraint_Ucj_w: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: //APP +; CHECK-NEXT: add x0, x0, x12 +; CHECK-NEXT: //NO_APP +; CHECK-NEXT: ret + call void asm sideeffect "add x0, x0, $0", "@3Ucj,~{x0}"(i32 %a) + ret void +} + +; As test_constraints_Ucj_w but ensures non-legal types are also covered. +define void @test_constraint_Ucj_w_i8(i8 %a) { +; CHECK-LABEL: test_constraint_Ucj_w_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: //APP +; CHECK-NEXT: add x0, x0, x12 +; CHECK-NEXT: //NO_APP +; CHECK-NEXT: ret + call void asm sideeffect "add x0, x0, $0", "@3Ucj,~{x0}"(i8 %a) + ret void +} + +define void @test_constraint_Ucj_x(i64 %a) { +; CHECK-LABEL: test_constraint_Ucj_x: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x12, x0 +; CHECK-NEXT: //APP +; CHECK-NEXT: add x0, x0, x12 +; CHECK-NEXT: //NO_APP +; CHECK-NEXT: ret + call void asm sideeffect "add x0, x0, $0", "@3Ucj,~{x0}"(i64 %a) + ret void +} diff --git a/llvm/test/CodeGen/AArch64/live-debugvalues-sve.mir b/llvm/test/CodeGen/AArch64/live-debugvalues-sve.mir index 8903ca2b865b98939f83c1d49c37988fc374ad81..612453ab53f4385bc9b5c0978e74da0f8da26b4a 100644 --- a/llvm/test/CodeGen/AArch64/live-debugvalues-sve.mir +++ b/llvm/test/CodeGen/AArch64/live-debugvalues-sve.mir @@ -145,7 +145,7 @@ body: | liveins: $z1 ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp, debug-location !34 - renamable $p0 = PTRUE_S 31, debug-location !34 + renamable $p0 = PTRUE_S 31, implicit $vg, debug-location !34 $x0 = ADDXri %stack.0, 0, 0, debug-location !34 ST1W_IMM renamable $z1, killed renamable $p0, %stack.0, 0, debug-location !34 :: (store unknown-size into %stack.0, align 16) $z0 = COPY renamable $z1, debug-location !34 @@ -157,7 +157,7 @@ body: | $z7 = COPY renamable $z1, debug-location !34 BL @bar, csr_aarch64_sve_aapcs, implicit-def dead $lr, implicit $sp, implicit $z0, implicit $z1, implicit $z2, implicit $z3, implicit $z4, implicit $z5, implicit $z6, implicit $z7, implicit $x0, implicit-def $sp, implicit-def $z0, implicit-def $z1, debug-location !34 ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp, debug-location !34 - renamable $p0 = PTRUE_S 31, debug-location !34 + renamable $p0 = PTRUE_S 31, implicit $vg, debug-location !34 $z3 = IMPLICIT_DEF renamable $z1 = LD1W_IMM renamable $p0, %stack.0, 0, debug-location !34 :: (load unknown-size from %stack.0, align 16) ST1W_IMM renamable $z3, killed renamable $p0, %stack.0, 0 :: (store unknown-size into %stack.0, align 16) diff --git a/llvm/test/CodeGen/AArch64/peephole-insvigpr.mir b/llvm/test/CodeGen/AArch64/peephole-insvigpr.mir index 1c2fe27cdbc397ab36564bf7b6f197f7774f9450..546f2ec0c4171525932ad22073a151feec4d9c51 100644 --- a/llvm/test/CodeGen/AArch64/peephole-insvigpr.mir +++ b/llvm/test/CodeGen/AArch64/peephole-insvigpr.mir @@ -26,7 +26,6 @@ ret void } - ; The optimization is not applicable when the source is not a virtual register define void @insert_vec_from_gpr(i32 %v, ptr %p) { entry: ret void @@ -488,7 +487,7 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64common = COPY $x0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:gpr64all = IMPLICIT_DEF ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64sp = COPY [[DEF]] - ; CHECK-NEXT: INLINEASM &"ldr ${0:s}, $1", 8 /* mayload attdialect */, 2359306 /* regdef:FPR64 */, def %1, 262158 /* mem:m */, killed [[COPY1]] + ; CHECK-NEXT: INLINEASM &"ldr ${0:s}, $1", 8 /* mayload attdialect */, 2359306 /* regdef:WSeqPairsClass_with_sube32_in_MatrixIndexGPR32_8_11 */, def %1, 262158 /* mem:m */, killed [[COPY1]] ; CHECK-NEXT: [[MOVIv2d_ns:%[0-9]+]]:fpr128 = MOVIv2d_ns 0 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:fpr64 = COPY [[MOVIv2d_ns]].dsub ; CHECK-NEXT: [[DEF1:%[0-9]+]]:fpr128 = IMPLICIT_DEF diff --git a/llvm/test/CodeGen/AArch64/preserve.ll b/llvm/test/CodeGen/AArch64/preserve.ll index 924e6f8487ebf0bc4af3db73153ad6c857b6214f..7f57420a5fa1a7e913b704dfed57b99e0d69b99e 100644 --- a/llvm/test/CodeGen/AArch64/preserve.ll +++ b/llvm/test/CodeGen/AArch64/preserve.ll @@ -4,13 +4,13 @@ target triple = "aarch64-unknown-unknown" declare void @bar1() define preserve_mostcc void @baz() #0 { -; CHECK: baz Clobbered Registers: $ffr $fpcr $nzcv $sp $vg $wsp $za $b0 $b1 $b2 $b3 $b4 $b5 $b6 $b7 $b16 $b17 $b18 $b19 $b20 $b21 $b22 $b23 $b24 $b25 $b26 $b27 $b28 $b29 $b30 $b31 $d0 $d1 $d2 $d3 $d4 $d5 $d6 $d7 $d16 $d17 $d18 $d19 $d20 $d21 $d22 $d23 $d24 $d25 $d26 $d27 $d28 $d29 $d30 $d31 $h0 $h1 $h2 $h3 $h4 $h5 $h6 $h7 $h16 $h17 $h18 $h19 $h20 $h21 $h22 $h23 $h24 $h25 $h26 $h27 $h28 $h29 $h30 $h31 $p0 $p1 $p2 $p3 $p4 $p5 $p6 $p7 $p8 $p9 $p10 $p11 $p12 $p13 $p14 $p15 $q0 $q1 $q2 $q3 $q4 $q5 $q6 $q7 $q8 $q9 $q10 $q11 $q12 $q13 $q14 $q15 $q16 $q17 $q18 $q19 $q20 $q21 $q22 $q23 $q24 $q25 $q26 $q27 $q28 $q29 $q30 $q31 $s0 $s1 $s2 $s3 $s4 $s5 $s6 $s7 $s16 $s17 $s18 $s19 $s20 $s21 $s22 $s23 $s24 $s25 $s26 $s27 $s28 $s29 $s30 $s31 $w0 $w1 $w2 $w3 $w4 $w5 $w6 $w7 $w8 $w16 $w17 $w18 $x0 $x1 $x2 $x3 $x4 $x5 $x6 $x7 $x8 $x16 $x17 $x18 $z0 $z1 $z2 $z3 $z4 $z5 $z6 $z7 $z8 $z9 $z10 $z11 $z12 $z13 $z14 $z15 $z16 $z17 $z18 $z19 $z20 $z21 $z22 $z23 $z24 $z25 $z26 $z27 $z28 $z29 $z30 $z31 $zab0 $zad0 $zad1 $zad2 $zad3 $zad4 $zad5 $zad6 $zad7 $zah0 $zah1 $zaq0 $zaq1 $zaq2 $zaq3 $zaq4 $zaq5 $zaq6 $zaq7 $zaq8 $zaq9 $zaq10 $zaq11 $zaq12 $zaq13 $zaq14 $zaq15 $zas0 $zas1 $zas2 $zas3 $zt0 $z0_hi $z1_hi $z2_hi $z3_hi $z4_hi $z5_hi $z6_hi $z7_hi $z8_hi $z9_hi $z10_hi $z11_hi $z12_hi $z13_hi $z14_hi $z15_hi $z16_hi $z17_hi $z18_hi $z19_hi $z20_hi $z21_hi $z22_hi $z23_hi $z24_hi $z25_hi $z26_hi $z27_hi $z28_hi $z29_hi $z30_hi $z31_hi $d0_d1 $d1_d2 $d2_d3 $d3_d4 $d4_d5 $d5_d6 $d6_d7 $d7_d8 $d15_d16 $d16_d17 $d17_d18 $d18_d19 $d19_d20 $d20_d21 $d21_d22 $d22_d23 $d23_d24 $d24_d25 $d25_d26 $d26_d27 $d27_d28 $d28_d29 $d29_d30 $d30_d31 $d31_d0 $d0_d1_d2_d3 $d1_d2_d3_d4 $d2_d3_d4_d5 $d3_d4_d5_d6 $d4_d5_d6_d7 $d5_d6_d7_d8 $d6_d7_d8_d9 $d7_d8_d9_d10 $d13_d14_d15_d16 $d14_d15_d16_d17 $d15_d16_d17_d18 $d16_d17_d18_d19 $d17_d18_d19_d20 $d18_d19_d20_d21 $d19_d20_d21_d22 $d20_d21_d22_d23 $d21_d22_d23_d24 $d22_d23_d24_d25 $d23_d24_d25_d26 $d24_d25_d26_d27 $d25_d26_d27_d28 $d26_d27_d28_d29 $d27_d28_d29_d30 $d28_d29_d30_d31 $d29_d30_d31_d0 $d30_d31_d0_d1 $d31_d0_d1_d2 $d0_d1_d2 $d1_d2_d3 $d2_d3_d4 $d3_d4_d5 $d4_d5_d6 $d5_d6_d7 $d6_d7_d8 $d7_d8_d9 $d14_d15_d16 $d15_d16_d17 $d16_d17_d18 $d17_d18_d19 $d18_d19_d20 $d19_d20_d21 $d20_d21_d22 $d21_d22_d23 $d22_d23_d24 $d23_d24_d25 $d24_d25_d26 $d25_d26_d27 $d26_d27_d28 $d27_d28_d29 $d28_d29_d30 $d29_d30_d31 $d30_d31_d0 $d31_d0_d1 $p0_p1 $p1_p2 $p2_p3 $p3_p4 $p4_p5 $p5_p6 $p6_p7 $p7_p8 $p8_p9 $p9_p10 $p10_p11 $p11_p12 $p12_p13 $p13_p14 $p14_p15 $p15_p0 $q0_q1 $q1_q2 $q2_q3 $q3_q4 $q4_q5 $q5_q6 $q6_q7 $q7_q8 $q8_q9 $q9_q10 $q10_q11 $q11_q12 $q12_q13 $q13_q14 $q14_q15 $q15_q16 $q16_q17 $q17_q18 $q18_q19 $q19_q20 $q20_q21 $q21_q22 $q22_q23 $q23_q24 $q24_q25 $q25_q26 $q26_q27 $q27_q28 $q28_q29 $q29_q30 $q30_q31 $q31_q0 $q0_q1_q2_q3 $q1_q2_q3_q4 $q2_q3_q4_q5 $q3_q4_q5_q6 $q4_q5_q6_q7 $q5_q6_q7_q8 $q6_q7_q8_q9 $q7_q8_q9_q10 $q8_q9_q10_q11 $q9_q10_q11_q12 $q10_q11_q12_q13 $q11_q12_q13_q14 $q12_q13_q14_q15 $q13_q14_q15_q16 $q14_q15_q16_q17 $q15_q16_q17_q18 $q16_q17_q18_q19 $q17_q18_q19_q20 $q18_q19_q20_q21 $q19_q20_q21_q22 $q20_q21_q22_q23 $q21_q22_q23_q24 $q22_q23_q24_q25 $q23_q24_q25_q26 $q24_q25_q26_q27 $q25_q26_q27_q28 $q26_q27_q28_q29 $q27_q28_q29_q30 $q28_q29_q30_q31 $q29_q30_q31_q0 $q30_q31_q0_q1 $q31_q0_q1_q2 $q0_q1_q2 $q1_q2_q3 $q2_q3_q4 $q3_q4_q5 $q4_q5_q6 $q5_q6_q7 $q6_q7_q8 $q7_q8_q9 $q8_q9_q10 $q9_q10_q11 $q10_q11_q12 $q11_q12_q13 $q12_q13_q14 $q13_q14_q15 $q14_q15_q16 $q15_q16_q17 $q16_q17_q18 $q17_q18_q19 $q18_q19_q20 $q19_q20_q21 $q20_q21_q22 $q21_q22_q23 $q22_q23_q24 $q23_q24_q25 $q24_q25_q26 $q25_q26_q27 $q26_q27_q28 $q27_q28_q29 $q28_q29_q30 $q29_q30_q31 $q30_q31_q0 $q31_q0_q1 $x0_x1_x2_x3_x4_x5_x6_x7 $x2_x3_x4_x5_x6_x7_x8_x9 $x4_x5_x6_x7_x8_x9_x10_x11 $x6_x7_x8_x9_x10_x11_x12_x13 $x8_x9_x10_x11_x12_x13_x14_x15 $x10_x11_x12_x13_x14_x15_x16_x17 $x12_x13_x14_x15_x16_x17_x18_x19 $x14_x15_x16_x17_x18_x19_x20_x21 $x16_x17_x18_x19_x20_x21_x22_x23 $x18_x19_x20_x21_x22_x23_x24_x25 $w30_wzr $w0_w1 $w2_w3 $w4_w5 $w6_w7 $w8_w9 $w10_w11 $w12_w13 $w14_w15 $w16_w17 $w18_w19 $lr_xzr $x0_x1 $x2_x3 $x4_x5 $x6_x7 $x8_x9 $x10_x11 $x12_x13 $x14_x15 $x16_x17 $x18_x19 $z0_z1 $z1_z2 $z2_z3 $z3_z4 $z4_z5 $z5_z6 $z6_z7 $z7_z8 $z8_z9 $z9_z10 $z10_z11 $z11_z12 $z12_z13 $z13_z14 $z14_z15 $z15_z16 $z16_z17 $z17_z18 $z18_z19 $z19_z20 $z20_z21 $z21_z22 $z22_z23 $z23_z24 $z24_z25 $z25_z26 $z26_z27 $z27_z28 $z28_z29 $z29_z30 $z30_z31 $z31_z0 $z0_z1_z2_z3 $z1_z2_z3_z4 $z2_z3_z4_z5 $z3_z4_z5_z6 $z4_z5_z6_z7 $z5_z6_z7_z8 $z6_z7_z8_z9 $z7_z8_z9_z10 $z8_z9_z10_z11 $z9_z10_z11_z12 $z10_z11_z12_z13 $z11_z12_z13_z14 $z12_z13_z14_z15 $z13_z14_z15_z16 $z14_z15_z16_z17 $z15_z16_z17_z18 $z16_z17_z18_z19 $z17_z18_z19_z20 $z18_z19_z20_z21 $z19_z20_z21_z22 $z20_z21_z22_z23 $z21_z22_z23_z24 $z22_z23_z24_z25 $z23_z24_z25_z26 $z24_z25_z26_z27 $z25_z26_z27_z28 $z26_z27_z28_z29 $z27_z28_z29_z30 $z28_z29_z30_z31 $z29_z30_z31_z0 $z30_z31_z0_z1 $z31_z0_z1_z2 $z0_z1_z2 $z1_z2_z3 $z2_z3_z4 $z3_z4_z5 $z4_z5_z6 $z5_z6_z7 $z6_z7_z8 $z7_z8_z9 $z8_z9_z10 $z9_z10_z11 $z10_z11_z12 $z11_z12_z13 $z12_z13_z14 $z13_z14_z15 $z14_z15_z16 $z15_z16_z17 $z16_z17_z18 $z17_z18_z19 $z18_z19_z20 $z19_z20_z21 $z20_z21_z22 $z21_z22_z23 $z22_z23_z24 $z23_z24_z25 $z24_z25_z26 $z25_z26_z27 $z26_z27_z28 $z27_z28_z29 $z28_z29_z30 $z29_z30_z31 $z30_z31_z0 $z31_z0_z1 $z16_z24 $z17_z25 $z18_z26 $z19_z27 $z20_z28 $z21_z29 $z22_z30 $z23_z31 $z0_z8 $z1_z9 $z2_z10 $z3_z11 $z4_z12 $z5_z13 $z6_z14 $z7_z15 $z16_z20_z24_z28 $z17_z21_z25_z29 $z18_z22_z26_z30 $z19_z23_z27_z31 $z0_z4_z8_z12 $z1_z5_z9_z13 $z2_z6_z10_z14 $z3_z7_z11_z15 +; CHECK: baz Clobbered Registers: $ffr $fpcr $nzcv $sp $vg $wsp $za $b0 $b1 $b2 $b3 $b4 $b5 $b6 $b7 $b16 $b17 $b18 $b19 $b20 $b21 $b22 $b23 $b24 $b25 $b26 $b27 $b28 $b29 $b30 $b31 $d0 $d1 $d2 $d3 $d4 $d5 $d6 $d7 $d16 $d17 $d18 $d19 $d20 $d21 $d22 $d23 $d24 $d25 $d26 $d27 $d28 $d29 $d30 $d31 $h0 $h1 $h2 $h3 $h4 $h5 $h6 $h7 $h16 $h17 $h18 $h19 $h20 $h21 $h22 $h23 $h24 $h25 $h26 $h27 $h28 $h29 $h30 $h31 $p0 $p1 $p2 $p3 $p4 $p5 $p6 $p7 $p8 $p9 $p10 $p11 $p12 $p13 $p14 $p15 $pn0 $pn1 $pn2 $pn3 $pn4 $pn5 $pn6 $pn7 $pn8 $pn9 $pn10 $pn11 $pn12 $pn13 $pn14 $pn15 $q0 $q1 $q2 $q3 $q4 $q5 $q6 $q7 $q8 $q9 $q10 $q11 $q12 $q13 $q14 $q15 $q16 $q17 $q18 $q19 $q20 $q21 $q22 $q23 $q24 $q25 $q26 $q27 $q28 $q29 $q30 $q31 $s0 $s1 $s2 $s3 $s4 $s5 $s6 $s7 $s16 $s17 $s18 $s19 $s20 $s21 $s22 $s23 $s24 $s25 $s26 $s27 $s28 $s29 $s30 $s31 $w0 $w1 $w2 $w3 $w4 $w5 $w6 $w7 $w8 $w16 $w17 $w18 $x0 $x1 $x2 $x3 $x4 $x5 $x6 $x7 $x8 $x16 $x17 $x18 $z0 $z1 $z2 $z3 $z4 $z5 $z6 $z7 $z8 $z9 $z10 $z11 $z12 $z13 $z14 $z15 $z16 $z17 $z18 $z19 $z20 $z21 $z22 $z23 $z24 $z25 $z26 $z27 $z28 $z29 $z30 $z31 $zab0 $zad0 $zad1 $zad2 $zad3 $zad4 $zad5 $zad6 $zad7 $zah0 $zah1 $zaq0 $zaq1 $zaq2 $zaq3 $zaq4 $zaq5 $zaq6 $zaq7 $zaq8 $zaq9 $zaq10 $zaq11 $zaq12 $zaq13 $zaq14 $zaq15 $zas0 $zas1 $zas2 $zas3 $zt0 $z0_hi $z1_hi $z2_hi $z3_hi $z4_hi $z5_hi $z6_hi $z7_hi $z8_hi $z9_hi $z10_hi $z11_hi $z12_hi $z13_hi $z14_hi $z15_hi $z16_hi $z17_hi $z18_hi $z19_hi $z20_hi $z21_hi $z22_hi $z23_hi $z24_hi $z25_hi $z26_hi $z27_hi $z28_hi $z29_hi $z30_hi $z31_hi $d0_d1 $d1_d2 $d2_d3 $d3_d4 $d4_d5 $d5_d6 $d6_d7 $d7_d8 $d15_d16 $d16_d17 $d17_d18 $d18_d19 $d19_d20 $d20_d21 $d21_d22 $d22_d23 $d23_d24 $d24_d25 $d25_d26 $d26_d27 $d27_d28 $d28_d29 $d29_d30 $d30_d31 $d31_d0 $d0_d1_d2_d3 $d1_d2_d3_d4 $d2_d3_d4_d5 $d3_d4_d5_d6 $d4_d5_d6_d7 $d5_d6_d7_d8 $d6_d7_d8_d9 $d7_d8_d9_d10 $d13_d14_d15_d16 $d14_d15_d16_d17 $d15_d16_d17_d18 $d16_d17_d18_d19 $d17_d18_d19_d20 $d18_d19_d20_d21 $d19_d20_d21_d22 $d20_d21_d22_d23 $d21_d22_d23_d24 $d22_d23_d24_d25 $d23_d24_d25_d26 $d24_d25_d26_d27 $d25_d26_d27_d28 $d26_d27_d28_d29 $d27_d28_d29_d30 $d28_d29_d30_d31 $d29_d30_d31_d0 $d30_d31_d0_d1 $d31_d0_d1_d2 $d0_d1_d2 $d1_d2_d3 $d2_d3_d4 $d3_d4_d5 $d4_d5_d6 $d5_d6_d7 $d6_d7_d8 $d7_d8_d9 $d14_d15_d16 $d15_d16_d17 $d16_d17_d18 $d17_d18_d19 $d18_d19_d20 $d19_d20_d21 $d20_d21_d22 $d21_d22_d23 $d22_d23_d24 $d23_d24_d25 $d24_d25_d26 $d25_d26_d27 $d26_d27_d28 $d27_d28_d29 $d28_d29_d30 $d29_d30_d31 $d30_d31_d0 $d31_d0_d1 $p0_p1 $p1_p2 $p2_p3 $p3_p4 $p4_p5 $p5_p6 $p6_p7 $p7_p8 $p8_p9 $p9_p10 $p10_p11 $p11_p12 $p12_p13 $p13_p14 $p14_p15 $p15_p0 $q0_q1 $q1_q2 $q2_q3 $q3_q4 $q4_q5 $q5_q6 $q6_q7 $q7_q8 $q8_q9 $q9_q10 $q10_q11 $q11_q12 $q12_q13 $q13_q14 $q14_q15 $q15_q16 $q16_q17 $q17_q18 $q18_q19 $q19_q20 $q20_q21 $q21_q22 $q22_q23 $q23_q24 $q24_q25 $q25_q26 $q26_q27 $q27_q28 $q28_q29 $q29_q30 $q30_q31 $q31_q0 $q0_q1_q2_q3 $q1_q2_q3_q4 $q2_q3_q4_q5 $q3_q4_q5_q6 $q4_q5_q6_q7 $q5_q6_q7_q8 $q6_q7_q8_q9 $q7_q8_q9_q10 $q8_q9_q10_q11 $q9_q10_q11_q12 $q10_q11_q12_q13 $q11_q12_q13_q14 $q12_q13_q14_q15 $q13_q14_q15_q16 $q14_q15_q16_q17 $q15_q16_q17_q18 $q16_q17_q18_q19 $q17_q18_q19_q20 $q18_q19_q20_q21 $q19_q20_q21_q22 $q20_q21_q22_q23 $q21_q22_q23_q24 $q22_q23_q24_q25 $q23_q24_q25_q26 $q24_q25_q26_q27 $q25_q26_q27_q28 $q26_q27_q28_q29 $q27_q28_q29_q30 $q28_q29_q30_q31 $q29_q30_q31_q0 $q30_q31_q0_q1 $q31_q0_q1_q2 $q0_q1_q2 $q1_q2_q3 $q2_q3_q4 $q3_q4_q5 $q4_q5_q6 $q5_q6_q7 $q6_q7_q8 $q7_q8_q9 $q8_q9_q10 $q9_q10_q11 $q10_q11_q12 $q11_q12_q13 $q12_q13_q14 $q13_q14_q15 $q14_q15_q16 $q15_q16_q17 $q16_q17_q18 $q17_q18_q19 $q18_q19_q20 $q19_q20_q21 $q20_q21_q22 $q21_q22_q23 $q22_q23_q24 $q23_q24_q25 $q24_q25_q26 $q25_q26_q27 $q26_q27_q28 $q27_q28_q29 $q28_q29_q30 $q29_q30_q31 $q30_q31_q0 $q31_q0_q1 $x0_x1_x2_x3_x4_x5_x6_x7 $x2_x3_x4_x5_x6_x7_x8_x9 $x4_x5_x6_x7_x8_x9_x10_x11 $x6_x7_x8_x9_x10_x11_x12_x13 $x8_x9_x10_x11_x12_x13_x14_x15 $x10_x11_x12_x13_x14_x15_x16_x17 $x12_x13_x14_x15_x16_x17_x18_x19 $x14_x15_x16_x17_x18_x19_x20_x21 $x16_x17_x18_x19_x20_x21_x22_x23 $x18_x19_x20_x21_x22_x23_x24_x25 $w30_wzr $w0_w1 $w2_w3 $w4_w5 $w6_w7 $w8_w9 $w10_w11 $w12_w13 $w14_w15 $w16_w17 $w18_w19 $lr_xzr $x0_x1 $x2_x3 $x4_x5 $x6_x7 $x8_x9 $x10_x11 $x12_x13 $x14_x15 $x16_x17 $x18_x19 $z0_z1 $z1_z2 $z2_z3 $z3_z4 $z4_z5 $z5_z6 $z6_z7 $z7_z8 $z8_z9 $z9_z10 $z10_z11 $z11_z12 $z12_z13 $z13_z14 $z14_z15 $z15_z16 $z16_z17 $z17_z18 $z18_z19 $z19_z20 $z20_z21 $z21_z22 $z22_z23 $z23_z24 $z24_z25 $z25_z26 $z26_z27 $z27_z28 $z28_z29 $z29_z30 $z30_z31 $z31_z0 $z0_z1_z2_z3 $z1_z2_z3_z4 $z2_z3_z4_z5 $z3_z4_z5_z6 $z4_z5_z6_z7 $z5_z6_z7_z8 $z6_z7_z8_z9 $z7_z8_z9_z10 $z8_z9_z10_z11 $z9_z10_z11_z12 $z10_z11_z12_z13 $z11_z12_z13_z14 $z12_z13_z14_z15 $z13_z14_z15_z16 $z14_z15_z16_z17 $z15_z16_z17_z18 $z16_z17_z18_z19 $z17_z18_z19_z20 $z18_z19_z20_z21 $z19_z20_z21_z22 $z20_z21_z22_z23 $z21_z22_z23_z24 $z22_z23_z24_z25 $z23_z24_z25_z26 $z24_z25_z26_z27 $z25_z26_z27_z28 $z26_z27_z28_z29 $z27_z28_z29_z30 $z28_z29_z30_z31 $z29_z30_z31_z0 $z30_z31_z0_z1 $z31_z0_z1_z2 $z0_z1_z2 $z1_z2_z3 $z2_z3_z4 $z3_z4_z5 $z4_z5_z6 $z5_z6_z7 $z6_z7_z8 $z7_z8_z9 $z8_z9_z10 $z9_z10_z11 $z10_z11_z12 $z11_z12_z13 $z12_z13_z14 $z13_z14_z15 $z14_z15_z16 $z15_z16_z17 $z16_z17_z18 $z17_z18_z19 $z18_z19_z20 $z19_z20_z21 $z20_z21_z22 $z21_z22_z23 $z22_z23_z24 $z23_z24_z25 $z24_z25_z26 $z25_z26_z27 $z26_z27_z28 $z27_z28_z29 $z28_z29_z30 $z29_z30_z31 $z30_z31_z0 $z31_z0_z1 $z16_z24 $z17_z25 $z18_z26 $z19_z27 $z20_z28 $z21_z29 $z22_z30 $z23_z31 $z0_z8 $z1_z9 $z2_z10 $z3_z11 $z4_z12 $z5_z13 $z6_z14 $z7_z15 $z16_z20_z24_z28 $z17_z21_z25_z29 $z18_z22_z26_z30 $z19_z23_z27_z31 $z0_z4_z8_z12 $z1_z5_z9_z13 $z2_z6_z10_z14 $z3_z7_z11_z15 call void @bar1() call void @bar2() ret void } define preserve_allcc void @foo() #0 { -; CHECK: foo Clobbered Registers: $ffr $fpcr $nzcv $sp $vg $wsp $za $b0 $b1 $b2 $b3 $b4 $b5 $b6 $b7 $d0 $d1 $d2 $d3 $d4 $d5 $d6 $d7 $h0 $h1 $h2 $h3 $h4 $h5 $h6 $h7 $p0 $p1 $p2 $p3 $p4 $p5 $p6 $p7 $p8 $p9 $p10 $p11 $p12 $p13 $p14 $p15 $q0 $q1 $q2 $q3 $q4 $q5 $q6 $q7 $s0 $s1 $s2 $s3 $s4 $s5 $s6 $s7 $w0 $w1 $w2 $w3 $w4 $w5 $w6 $w7 $w8 $w16 $w17 $w18 $x0 $x1 $x2 $x3 $x4 $x5 $x6 $x7 $x8 $x16 $x17 $x18 $z0 $z1 $z2 $z3 $z4 $z5 $z6 $z7 $z8 $z9 $z10 $z11 $z12 $z13 $z14 $z15 $z16 $z17 $z18 $z19 $z20 $z21 $z22 $z23 $z24 $z25 $z26 $z27 $z28 $z29 $z30 $z31 $zab0 $zad0 $zad1 $zad2 $zad3 $zad4 $zad5 $zad6 $zad7 $zah0 $zah1 $zaq0 $zaq1 $zaq2 $zaq3 $zaq4 $zaq5 $zaq6 $zaq7 $zaq8 $zaq9 $zaq10 $zaq11 $zaq12 $zaq13 $zaq14 $zaq15 $zas0 $zas1 $zas2 $zas3 $zt0 $z0_hi $z1_hi $z2_hi $z3_hi $z4_hi $z5_hi $z6_hi $z7_hi $z8_hi $z9_hi $z10_hi $z11_hi $z12_hi $z13_hi $z14_hi $z15_hi $z16_hi $z17_hi $z18_hi $z19_hi $z20_hi $z21_hi $z22_hi $z23_hi $z24_hi $z25_hi $z26_hi $z27_hi $z28_hi $z29_hi $z30_hi $z31_hi $d0_d1 $d1_d2 $d2_d3 $d3_d4 $d4_d5 $d5_d6 $d6_d7 $d7_d8 $d15_d16 $d16_d17 $d17_d18 $d18_d19 $d19_d20 $d20_d21 $d21_d22 $d22_d23 $d23_d24 $d24_d25 $d25_d26 $d26_d27 $d27_d28 $d28_d29 $d29_d30 $d30_d31 $d31_d0 $d0_d1_d2_d3 $d1_d2_d3_d4 $d2_d3_d4_d5 $d3_d4_d5_d6 $d4_d5_d6_d7 $d5_d6_d7_d8 $d6_d7_d8_d9 $d7_d8_d9_d10 $d13_d14_d15_d16 $d14_d15_d16_d17 $d15_d16_d17_d18 $d16_d17_d18_d19 $d17_d18_d19_d20 $d18_d19_d20_d21 $d19_d20_d21_d22 $d20_d21_d22_d23 $d21_d22_d23_d24 $d22_d23_d24_d25 $d23_d24_d25_d26 $d24_d25_d26_d27 $d25_d26_d27_d28 $d26_d27_d28_d29 $d27_d28_d29_d30 $d28_d29_d30_d31 $d29_d30_d31_d0 $d30_d31_d0_d1 $d31_d0_d1_d2 $d0_d1_d2 $d1_d2_d3 $d2_d3_d4 $d3_d4_d5 $d4_d5_d6 $d5_d6_d7 $d6_d7_d8 $d7_d8_d9 $d14_d15_d16 $d15_d16_d17 $d16_d17_d18 $d17_d18_d19 $d18_d19_d20 $d19_d20_d21 $d20_d21_d22 $d21_d22_d23 $d22_d23_d24 $d23_d24_d25 $d24_d25_d26 $d25_d26_d27 $d26_d27_d28 $d27_d28_d29 $d28_d29_d30 $d29_d30_d31 $d30_d31_d0 $d31_d0_d1 $p0_p1 $p1_p2 $p2_p3 $p3_p4 $p4_p5 $p5_p6 $p6_p7 $p7_p8 $p8_p9 $p9_p10 $p10_p11 $p11_p12 $p12_p13 $p13_p14 $p14_p15 $p15_p0 $q0_q1 $q1_q2 $q2_q3 $q3_q4 $q4_q5 $q5_q6 $q6_q7 $q7_q8 $q8_q9 $q9_q10 $q10_q11 $q11_q12 $q12_q13 $q13_q14 $q14_q15 $q15_q16 $q16_q17 $q17_q18 $q18_q19 $q19_q20 $q20_q21 $q21_q22 $q22_q23 $q23_q24 $q24_q25 $q25_q26 $q26_q27 $q27_q28 $q28_q29 $q29_q30 $q30_q31 $q31_q0 $q0_q1_q2_q3 $q1_q2_q3_q4 $q2_q3_q4_q5 $q3_q4_q5_q6 $q4_q5_q6_q7 $q5_q6_q7_q8 $q6_q7_q8_q9 $q7_q8_q9_q10 $q8_q9_q10_q11 $q9_q10_q11_q12 $q10_q11_q12_q13 $q11_q12_q13_q14 $q12_q13_q14_q15 $q13_q14_q15_q16 $q14_q15_q16_q17 $q15_q16_q17_q18 $q16_q17_q18_q19 $q17_q18_q19_q20 $q18_q19_q20_q21 $q19_q20_q21_q22 $q20_q21_q22_q23 $q21_q22_q23_q24 $q22_q23_q24_q25 $q23_q24_q25_q26 $q24_q25_q26_q27 $q25_q26_q27_q28 $q26_q27_q28_q29 $q27_q28_q29_q30 $q28_q29_q30_q31 $q29_q30_q31_q0 $q30_q31_q0_q1 $q31_q0_q1_q2 $q0_q1_q2 $q1_q2_q3 $q2_q3_q4 $q3_q4_q5 $q4_q5_q6 $q5_q6_q7 $q6_q7_q8 $q7_q8_q9 $q8_q9_q10 $q9_q10_q11 $q10_q11_q12 $q11_q12_q13 $q12_q13_q14 $q13_q14_q15 $q14_q15_q16 $q15_q16_q17 $q16_q17_q18 $q17_q18_q19 $q18_q19_q20 $q19_q20_q21 $q20_q21_q22 $q21_q22_q23 $q22_q23_q24 $q23_q24_q25 $q24_q25_q26 $q25_q26_q27 $q26_q27_q28 $q27_q28_q29 $q28_q29_q30 $q29_q30_q31 $q30_q31_q0 $q31_q0_q1 $x0_x1_x2_x3_x4_x5_x6_x7 $x2_x3_x4_x5_x6_x7_x8_x9 $x4_x5_x6_x7_x8_x9_x10_x11 $x6_x7_x8_x9_x10_x11_x12_x13 $x8_x9_x10_x11_x12_x13_x14_x15 $x10_x11_x12_x13_x14_x15_x16_x17 $x12_x13_x14_x15_x16_x17_x18_x19 $x14_x15_x16_x17_x18_x19_x20_x21 $x16_x17_x18_x19_x20_x21_x22_x23 $x18_x19_x20_x21_x22_x23_x24_x25 $w30_wzr $w0_w1 $w2_w3 $w4_w5 $w6_w7 $w8_w9 $w10_w11 $w12_w13 $w14_w15 $w16_w17 $w18_w19 $lr_xzr $x0_x1 $x2_x3 $x4_x5 $x6_x7 $x8_x9 $x10_x11 $x12_x13 $x14_x15 $x16_x17 $x18_x19 $z0_z1 $z1_z2 $z2_z3 $z3_z4 $z4_z5 $z5_z6 $z6_z7 $z7_z8 $z8_z9 $z9_z10 $z10_z11 $z11_z12 $z12_z13 $z13_z14 $z14_z15 $z15_z16 $z16_z17 $z17_z18 $z18_z19 $z19_z20 $z20_z21 $z21_z22 $z22_z23 $z23_z24 $z24_z25 $z25_z26 $z26_z27 $z27_z28 $z28_z29 $z29_z30 $z30_z31 $z31_z0 $z0_z1_z2_z3 $z1_z2_z3_z4 $z2_z3_z4_z5 $z3_z4_z5_z6 $z4_z5_z6_z7 $z5_z6_z7_z8 $z6_z7_z8_z9 $z7_z8_z9_z10 $z8_z9_z10_z11 $z9_z10_z11_z12 $z10_z11_z12_z13 $z11_z12_z13_z14 $z12_z13_z14_z15 $z13_z14_z15_z16 $z14_z15_z16_z17 $z15_z16_z17_z18 $z16_z17_z18_z19 $z17_z18_z19_z20 $z18_z19_z20_z21 $z19_z20_z21_z22 $z20_z21_z22_z23 $z21_z22_z23_z24 $z22_z23_z24_z25 $z23_z24_z25_z26 $z24_z25_z26_z27 $z25_z26_z27_z28 $z26_z27_z28_z29 $z27_z28_z29_z30 $z28_z29_z30_z31 $z29_z30_z31_z0 $z30_z31_z0_z1 $z31_z0_z1_z2 $z0_z1_z2 $z1_z2_z3 $z2_z3_z4 $z3_z4_z5 $z4_z5_z6 $z5_z6_z7 $z6_z7_z8 $z7_z8_z9 $z8_z9_z10 $z9_z10_z11 $z10_z11_z12 $z11_z12_z13 $z12_z13_z14 $z13_z14_z15 $z14_z15_z16 $z15_z16_z17 $z16_z17_z18 $z17_z18_z19 $z18_z19_z20 $z19_z20_z21 $z20_z21_z22 $z21_z22_z23 $z22_z23_z24 $z23_z24_z25 $z24_z25_z26 $z25_z26_z27 $z26_z27_z28 $z27_z28_z29 $z28_z29_z30 $z29_z30_z31 $z30_z31_z0 $z31_z0_z1 $z16_z24 $z17_z25 $z18_z26 $z19_z27 $z20_z28 $z21_z29 $z22_z30 $z23_z31 $z0_z8 $z1_z9 $z2_z10 $z3_z11 $z4_z12 $z5_z13 $z6_z14 $z7_z15 $z16_z20_z24_z28 $z17_z21_z25_z29 $z18_z22_z26_z30 $z19_z23_z27_z31 $z0_z4_z8_z12 $z1_z5_z9_z13 $z2_z6_z10_z14 $z3_z7_z11_z15 +; CHECK: foo Clobbered Registers: $ffr $fpcr $nzcv $sp $vg $wsp $za $b0 $b1 $b2 $b3 $b4 $b5 $b6 $b7 $d0 $d1 $d2 $d3 $d4 $d5 $d6 $d7 $h0 $h1 $h2 $h3 $h4 $h5 $h6 $h7 $p0 $p1 $p2 $p3 $p4 $p5 $p6 $p7 $p8 $p9 $p10 $p11 $p12 $p13 $p14 $p15 $pn0 $pn1 $pn2 $pn3 $pn4 $pn5 $pn6 $pn7 $pn8 $pn9 $pn10 $pn11 $pn12 $pn13 $pn14 $pn15 $q0 $q1 $q2 $q3 $q4 $q5 $q6 $q7 $s0 $s1 $s2 $s3 $s4 $s5 $s6 $s7 $w0 $w1 $w2 $w3 $w4 $w5 $w6 $w7 $w8 $w16 $w17 $w18 $x0 $x1 $x2 $x3 $x4 $x5 $x6 $x7 $x8 $x16 $x17 $x18 $z0 $z1 $z2 $z3 $z4 $z5 $z6 $z7 $z8 $z9 $z10 $z11 $z12 $z13 $z14 $z15 $z16 $z17 $z18 $z19 $z20 $z21 $z22 $z23 $z24 $z25 $z26 $z27 $z28 $z29 $z30 $z31 $zab0 $zad0 $zad1 $zad2 $zad3 $zad4 $zad5 $zad6 $zad7 $zah0 $zah1 $zaq0 $zaq1 $zaq2 $zaq3 $zaq4 $zaq5 $zaq6 $zaq7 $zaq8 $zaq9 $zaq10 $zaq11 $zaq12 $zaq13 $zaq14 $zaq15 $zas0 $zas1 $zas2 $zas3 $zt0 $z0_hi $z1_hi $z2_hi $z3_hi $z4_hi $z5_hi $z6_hi $z7_hi $z8_hi $z9_hi $z10_hi $z11_hi $z12_hi $z13_hi $z14_hi $z15_hi $z16_hi $z17_hi $z18_hi $z19_hi $z20_hi $z21_hi $z22_hi $z23_hi $z24_hi $z25_hi $z26_hi $z27_hi $z28_hi $z29_hi $z30_hi $z31_hi $d0_d1 $d1_d2 $d2_d3 $d3_d4 $d4_d5 $d5_d6 $d6_d7 $d7_d8 $d15_d16 $d16_d17 $d17_d18 $d18_d19 $d19_d20 $d20_d21 $d21_d22 $d22_d23 $d23_d24 $d24_d25 $d25_d26 $d26_d27 $d27_d28 $d28_d29 $d29_d30 $d30_d31 $d31_d0 $d0_d1_d2_d3 $d1_d2_d3_d4 $d2_d3_d4_d5 $d3_d4_d5_d6 $d4_d5_d6_d7 $d5_d6_d7_d8 $d6_d7_d8_d9 $d7_d8_d9_d10 $d13_d14_d15_d16 $d14_d15_d16_d17 $d15_d16_d17_d18 $d16_d17_d18_d19 $d17_d18_d19_d20 $d18_d19_d20_d21 $d19_d20_d21_d22 $d20_d21_d22_d23 $d21_d22_d23_d24 $d22_d23_d24_d25 $d23_d24_d25_d26 $d24_d25_d26_d27 $d25_d26_d27_d28 $d26_d27_d28_d29 $d27_d28_d29_d30 $d28_d29_d30_d31 $d29_d30_d31_d0 $d30_d31_d0_d1 $d31_d0_d1_d2 $d0_d1_d2 $d1_d2_d3 $d2_d3_d4 $d3_d4_d5 $d4_d5_d6 $d5_d6_d7 $d6_d7_d8 $d7_d8_d9 $d14_d15_d16 $d15_d16_d17 $d16_d17_d18 $d17_d18_d19 $d18_d19_d20 $d19_d20_d21 $d20_d21_d22 $d21_d22_d23 $d22_d23_d24 $d23_d24_d25 $d24_d25_d26 $d25_d26_d27 $d26_d27_d28 $d27_d28_d29 $d28_d29_d30 $d29_d30_d31 $d30_d31_d0 $d31_d0_d1 $p0_p1 $p1_p2 $p2_p3 $p3_p4 $p4_p5 $p5_p6 $p6_p7 $p7_p8 $p8_p9 $p9_p10 $p10_p11 $p11_p12 $p12_p13 $p13_p14 $p14_p15 $p15_p0 $q0_q1 $q1_q2 $q2_q3 $q3_q4 $q4_q5 $q5_q6 $q6_q7 $q7_q8 $q8_q9 $q9_q10 $q10_q11 $q11_q12 $q12_q13 $q13_q14 $q14_q15 $q15_q16 $q16_q17 $q17_q18 $q18_q19 $q19_q20 $q20_q21 $q21_q22 $q22_q23 $q23_q24 $q24_q25 $q25_q26 $q26_q27 $q27_q28 $q28_q29 $q29_q30 $q30_q31 $q31_q0 $q0_q1_q2_q3 $q1_q2_q3_q4 $q2_q3_q4_q5 $q3_q4_q5_q6 $q4_q5_q6_q7 $q5_q6_q7_q8 $q6_q7_q8_q9 $q7_q8_q9_q10 $q8_q9_q10_q11 $q9_q10_q11_q12 $q10_q11_q12_q13 $q11_q12_q13_q14 $q12_q13_q14_q15 $q13_q14_q15_q16 $q14_q15_q16_q17 $q15_q16_q17_q18 $q16_q17_q18_q19 $q17_q18_q19_q20 $q18_q19_q20_q21 $q19_q20_q21_q22 $q20_q21_q22_q23 $q21_q22_q23_q24 $q22_q23_q24_q25 $q23_q24_q25_q26 $q24_q25_q26_q27 $q25_q26_q27_q28 $q26_q27_q28_q29 $q27_q28_q29_q30 $q28_q29_q30_q31 $q29_q30_q31_q0 $q30_q31_q0_q1 $q31_q0_q1_q2 $q0_q1_q2 $q1_q2_q3 $q2_q3_q4 $q3_q4_q5 $q4_q5_q6 $q5_q6_q7 $q6_q7_q8 $q7_q8_q9 $q8_q9_q10 $q9_q10_q11 $q10_q11_q12 $q11_q12_q13 $q12_q13_q14 $q13_q14_q15 $q14_q15_q16 $q15_q16_q17 $q16_q17_q18 $q17_q18_q19 $q18_q19_q20 $q19_q20_q21 $q20_q21_q22 $q21_q22_q23 $q22_q23_q24 $q23_q24_q25 $q24_q25_q26 $q25_q26_q27 $q26_q27_q28 $q27_q28_q29 $q28_q29_q30 $q29_q30_q31 $q30_q31_q0 $q31_q0_q1 $x0_x1_x2_x3_x4_x5_x6_x7 $x2_x3_x4_x5_x6_x7_x8_x9 $x4_x5_x6_x7_x8_x9_x10_x11 $x6_x7_x8_x9_x10_x11_x12_x13 $x8_x9_x10_x11_x12_x13_x14_x15 $x10_x11_x12_x13_x14_x15_x16_x17 $x12_x13_x14_x15_x16_x17_x18_x19 $x14_x15_x16_x17_x18_x19_x20_x21 $x16_x17_x18_x19_x20_x21_x22_x23 $x18_x19_x20_x21_x22_x23_x24_x25 $w30_wzr $w0_w1 $w2_w3 $w4_w5 $w6_w7 $w8_w9 $w10_w11 $w12_w13 $w14_w15 $w16_w17 $w18_w19 $lr_xzr $x0_x1 $x2_x3 $x4_x5 $x6_x7 $x8_x9 $x10_x11 $x12_x13 $x14_x15 $x16_x17 $x18_x19 $z0_z1 $z1_z2 $z2_z3 $z3_z4 $z4_z5 $z5_z6 $z6_z7 $z7_z8 $z8_z9 $z9_z10 $z10_z11 $z11_z12 $z12_z13 $z13_z14 $z14_z15 $z15_z16 $z16_z17 $z17_z18 $z18_z19 $z19_z20 $z20_z21 $z21_z22 $z22_z23 $z23_z24 $z24_z25 $z25_z26 $z26_z27 $z27_z28 $z28_z29 $z29_z30 $z30_z31 $z31_z0 $z0_z1_z2_z3 $z1_z2_z3_z4 $z2_z3_z4_z5 $z3_z4_z5_z6 $z4_z5_z6_z7 $z5_z6_z7_z8 $z6_z7_z8_z9 $z7_z8_z9_z10 $z8_z9_z10_z11 $z9_z10_z11_z12 $z10_z11_z12_z13 $z11_z12_z13_z14 $z12_z13_z14_z15 $z13_z14_z15_z16 $z14_z15_z16_z17 $z15_z16_z17_z18 $z16_z17_z18_z19 $z17_z18_z19_z20 $z18_z19_z20_z21 $z19_z20_z21_z22 $z20_z21_z22_z23 $z21_z22_z23_z24 $z22_z23_z24_z25 $z23_z24_z25_z26 $z24_z25_z26_z27 $z25_z26_z27_z28 $z26_z27_z28_z29 $z27_z28_z29_z30 $z28_z29_z30_z31 $z29_z30_z31_z0 $z30_z31_z0_z1 $z31_z0_z1_z2 $z0_z1_z2 $z1_z2_z3 $z2_z3_z4 $z3_z4_z5 $z4_z5_z6 $z5_z6_z7 $z6_z7_z8 $z7_z8_z9 $z8_z9_z10 $z9_z10_z11 $z10_z11_z12 $z11_z12_z13 $z12_z13_z14 $z13_z14_z15 $z14_z15_z16 $z15_z16_z17 $z16_z17_z18 $z17_z18_z19 $z18_z19_z20 $z19_z20_z21 $z20_z21_z22 $z21_z22_z23 $z22_z23_z24 $z23_z24_z25 $z24_z25_z26 $z25_z26_z27 $z26_z27_z28 $z27_z28_z29 $z28_z29_z30 $z29_z30_z31 $z30_z31_z0 $z31_z0_z1 $z16_z24 $z17_z25 $z18_z26 $z19_z27 $z20_z28 $z21_z29 $z22_z30 $z23_z31 $z0_z8 $z1_z9 $z2_z10 $z3_z11 $z4_z12 $z5_z13 $z6_z14 $z7_z15 $z16_z20_z24_z28 $z17_z21_z25_z29 $z18_z22_z26_z30 $z19_z23_z27_z31 $z0_z4_z8_z12 $z1_z5_z9_z13 $z2_z6_z10_z14 $z3_z7_z11_z15 call void @bar1() call void @bar2() ret void diff --git a/llvm/test/CodeGen/AArch64/sme-avoid-coalescing-locally-streaming.ll b/llvm/test/CodeGen/AArch64/sme-avoid-coalescing-locally-streaming.ll new file mode 100644 index 0000000000000000000000000000000000000000..cd5046a9a64752cea8c8f2c70e4e7d3645685bc0 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sme-avoid-coalescing-locally-streaming.ll @@ -0,0 +1,120 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4 +; RUN: llc -mattr=+sme -stop-after=finalize-isel < %s | FileCheck %s --check-prefix=CHECK-COALESCER-BARRIER +; RUN: llc -mattr=+sme -stop-after=virtregrewriter < %s | FileCheck %s --check-prefix=CHECK-REGALLOC + +target triple = "aarch64" + +define void @dont_coalesce_args(<2 x i64> %a) "aarch64_pstate_sm_body" nounwind { + ; CHECK-COALESCER-BARRIER-LABEL: name: dont_coalesce_args + ; CHECK-COALESCER-BARRIER: bb.0 (%ir-block.0): + ; CHECK-COALESCER-BARRIER-NEXT: liveins: $q0 + ; CHECK-COALESCER-BARRIER-NEXT: {{ $}} + ; CHECK-COALESCER-BARRIER-NEXT: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK-COALESCER-BARRIER-NEXT: [[COALESCER_BARRIER_FPR128_:%[0-9]+]]:fpr128 = COALESCER_BARRIER_FPR128 [[COPY]] + ; CHECK-COALESCER-BARRIER-NEXT: MSRpstatesvcrImm1 1, 1, csr_aarch64_smstartstop, implicit-def dead $nzcv, implicit $vg, implicit-def $vg + ; CHECK-COALESCER-BARRIER-NEXT: [[DEF:%[0-9]+]]:zpr = IMPLICIT_DEF + ; CHECK-COALESCER-BARRIER-NEXT: [[INSERT_SUBREG:%[0-9]+]]:zpr = INSERT_SUBREG [[DEF]], [[COALESCER_BARRIER_FPR128_]], %subreg.zsub + ; CHECK-COALESCER-BARRIER-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-COALESCER-BARRIER-NEXT: $z0 = COPY [[INSERT_SUBREG]] + ; CHECK-COALESCER-BARRIER-NEXT: BL @scalable_args, csr_aarch64_sve_aapcs, implicit-def dead $lr, implicit $sp, implicit $z0, implicit-def $sp + ; CHECK-COALESCER-BARRIER-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-COALESCER-BARRIER-NEXT: MSRpstatesvcrImm1 1, 0, csr_aarch64_smstartstop, implicit-def dead $nzcv, implicit $vg, implicit-def $vg + ; CHECK-COALESCER-BARRIER-NEXT: RET_ReallyLR + ; + ; CHECK-REGALLOC-LABEL: name: dont_coalesce_args + ; CHECK-REGALLOC: bb.0 (%ir-block.0): + ; CHECK-REGALLOC-NEXT: liveins: $q0 + ; CHECK-REGALLOC-NEXT: {{ $}} + ; CHECK-REGALLOC-NEXT: renamable $q0 = COALESCER_BARRIER_FPR128 killed renamable $q0 + ; CHECK-REGALLOC-NEXT: STRQui killed renamable $q0, %stack.0, 0 :: (store (s128) into %stack.0) + ; CHECK-REGALLOC-NEXT: MSRpstatesvcrImm1 1, 1, csr_aarch64_smstartstop, implicit-def dead $nzcv, implicit $vg, implicit-def $vg + ; CHECK-REGALLOC-NEXT: renamable $q0 = LDRQui %stack.0, 0 :: (load (s128) from %stack.0) + ; CHECK-REGALLOC-NEXT: renamable $q0 = KILL killed renamable $q0, implicit-def $z0 + ; CHECK-REGALLOC-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-REGALLOC-NEXT: BL @scalable_args, csr_aarch64_sve_aapcs, implicit-def dead $lr, implicit $sp, implicit $z0, implicit-def $sp + ; CHECK-REGALLOC-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-REGALLOC-NEXT: MSRpstatesvcrImm1 1, 0, csr_aarch64_smstartstop, implicit-def dead $nzcv, implicit $vg, implicit-def $vg + ; CHECK-REGALLOC-NEXT: RET_ReallyLR + %sa = call @llvm.vector.insert.nxv2i64.v2i64( poison, <2 x i64> %a, i64 0) + call void @scalable_args( %sa) + ret void +} + +define <2 x i64> @dont_coalesce_res() "aarch64_pstate_sm_body" nounwind { + ; CHECK-COALESCER-BARRIER-LABEL: name: dont_coalesce_res + ; CHECK-COALESCER-BARRIER: bb.0 (%ir-block.0): + ; CHECK-COALESCER-BARRIER-NEXT: MSRpstatesvcrImm1 1, 1, csr_aarch64_smstartstop, implicit-def dead $nzcv, implicit $vg, implicit-def $vg + ; CHECK-COALESCER-BARRIER-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-COALESCER-BARRIER-NEXT: BL @scalable_res, csr_aarch64_sve_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $z0 + ; CHECK-COALESCER-BARRIER-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-COALESCER-BARRIER-NEXT: [[COPY:%[0-9]+]]:zpr = COPY $z0 + ; CHECK-COALESCER-BARRIER-NEXT: [[COPY1:%[0-9]+]]:fpr128 = COPY [[COPY]].zsub + ; CHECK-COALESCER-BARRIER-NEXT: [[COALESCER_BARRIER_FPR128_:%[0-9]+]]:fpr128 = COALESCER_BARRIER_FPR128 [[COPY1]] + ; CHECK-COALESCER-BARRIER-NEXT: MSRpstatesvcrImm1 1, 0, csr_aarch64_smstartstop, implicit-def dead $nzcv, implicit-def $q0, implicit $vg, implicit-def $vg + ; CHECK-COALESCER-BARRIER-NEXT: $q0 = COPY [[COALESCER_BARRIER_FPR128_]] + ; CHECK-COALESCER-BARRIER-NEXT: RET_ReallyLR implicit $q0 + ; + ; CHECK-REGALLOC-LABEL: name: dont_coalesce_res + ; CHECK-REGALLOC: bb.0 (%ir-block.0): + ; CHECK-REGALLOC-NEXT: MSRpstatesvcrImm1 1, 1, csr_aarch64_smstartstop, implicit-def dead $nzcv, implicit $vg, implicit-def $vg + ; CHECK-REGALLOC-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-REGALLOC-NEXT: BL @scalable_res, csr_aarch64_sve_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $z0 + ; CHECK-REGALLOC-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-REGALLOC-NEXT: renamable $q0 = KILL renamable $q0, implicit killed $z0 + ; CHECK-REGALLOC-NEXT: renamable $q0 = COALESCER_BARRIER_FPR128 killed renamable $q0 + ; CHECK-REGALLOC-NEXT: STRQui killed renamable $q0, %stack.0, 0 :: (store (s128) into %stack.0) + ; CHECK-REGALLOC-NEXT: MSRpstatesvcrImm1 1, 0, csr_aarch64_smstartstop, implicit-def dead $nzcv, implicit-def dead $q0, implicit $vg, implicit-def $vg + ; CHECK-REGALLOC-NEXT: $q0 = LDRQui %stack.0, 0 :: (load (s128) from %stack.0) + ; CHECK-REGALLOC-NEXT: RET_ReallyLR implicit $q0 + %sa = call @scalable_res() + %res = call <2 x i64> @llvm.vector.extract.v2i64.nxv2i64( %sa, i64 0) + ret <2 x i64> %res +} + +define <2 x i64> @dont_coalesce_arg_that_is_also_res(<2 x i64> %a) "aarch64_pstate_sm_body" nounwind { + ; CHECK-COALESCER-BARRIER-LABEL: name: dont_coalesce_arg_that_is_also_res + ; CHECK-COALESCER-BARRIER: bb.0 (%ir-block.0): + ; CHECK-COALESCER-BARRIER-NEXT: liveins: $q0 + ; CHECK-COALESCER-BARRIER-NEXT: {{ $}} + ; CHECK-COALESCER-BARRIER-NEXT: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK-COALESCER-BARRIER-NEXT: [[COALESCER_BARRIER_FPR128_:%[0-9]+]]:fpr128 = COALESCER_BARRIER_FPR128 [[COPY]] + ; CHECK-COALESCER-BARRIER-NEXT: MSRpstatesvcrImm1 1, 1, csr_aarch64_smstartstop, implicit-def dead $nzcv, implicit $vg, implicit-def $vg + ; CHECK-COALESCER-BARRIER-NEXT: [[DEF:%[0-9]+]]:zpr = IMPLICIT_DEF + ; CHECK-COALESCER-BARRIER-NEXT: [[INSERT_SUBREG:%[0-9]+]]:zpr = INSERT_SUBREG [[DEF]], [[COALESCER_BARRIER_FPR128_]], %subreg.zsub + ; CHECK-COALESCER-BARRIER-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-COALESCER-BARRIER-NEXT: $z0 = COPY [[INSERT_SUBREG]] + ; CHECK-COALESCER-BARRIER-NEXT: BL @scalable_args, csr_aarch64_sve_aapcs, implicit-def dead $lr, implicit $sp, implicit $z0, implicit-def $sp + ; CHECK-COALESCER-BARRIER-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-COALESCER-BARRIER-NEXT: [[COALESCER_BARRIER_FPR128_1:%[0-9]+]]:fpr128 = COALESCER_BARRIER_FPR128 [[COALESCER_BARRIER_FPR128_]] + ; CHECK-COALESCER-BARRIER-NEXT: MSRpstatesvcrImm1 1, 0, csr_aarch64_smstartstop, implicit-def dead $nzcv, implicit-def $q0, implicit $vg, implicit-def $vg + ; CHECK-COALESCER-BARRIER-NEXT: $q0 = COPY [[COALESCER_BARRIER_FPR128_1]] + ; CHECK-COALESCER-BARRIER-NEXT: RET_ReallyLR implicit $q0 + ; + ; CHECK-REGALLOC-LABEL: name: dont_coalesce_arg_that_is_also_res + ; CHECK-REGALLOC: bb.0 (%ir-block.0): + ; CHECK-REGALLOC-NEXT: liveins: $q0 + ; CHECK-REGALLOC-NEXT: {{ $}} + ; CHECK-REGALLOC-NEXT: renamable $q0 = COALESCER_BARRIER_FPR128 killed renamable $q0 + ; CHECK-REGALLOC-NEXT: STRQui killed renamable $q0, %stack.0, 0 :: (store (s128) into %stack.0) + ; CHECK-REGALLOC-NEXT: MSRpstatesvcrImm1 1, 1, csr_aarch64_smstartstop, implicit-def dead $nzcv, implicit $vg, implicit-def $vg + ; CHECK-REGALLOC-NEXT: renamable $q0 = LDRQui %stack.0, 0 :: (load (s128) from %stack.0) + ; CHECK-REGALLOC-NEXT: renamable $q0 = KILL killed renamable $q0, implicit-def $z0 + ; CHECK-REGALLOC-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-REGALLOC-NEXT: BL @scalable_args, csr_aarch64_sve_aapcs, implicit-def dead $lr, implicit $sp, implicit $z0, implicit-def $sp + ; CHECK-REGALLOC-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-REGALLOC-NEXT: renamable $q0 = LDRQui %stack.0, 0 :: (load (s128) from %stack.0) + ; CHECK-REGALLOC-NEXT: renamable $q0 = COALESCER_BARRIER_FPR128 killed renamable $q0 + ; CHECK-REGALLOC-NEXT: STRQui killed renamable $q0, %stack.0, 0 :: (store (s128) into %stack.0) + ; CHECK-REGALLOC-NEXT: MSRpstatesvcrImm1 1, 0, csr_aarch64_smstartstop, implicit-def dead $nzcv, implicit-def dead $q0, implicit $vg, implicit-def $vg + ; CHECK-REGALLOC-NEXT: $q0 = LDRQui %stack.0, 0 :: (load (s128) from %stack.0) + ; CHECK-REGALLOC-NEXT: RET_ReallyLR implicit $q0 + %sa = call @llvm.vector.insert.nxv2i64.v2i64( poison, <2 x i64> %a, i64 0) + call void @scalable_args( %sa) + ret <2 x i64> %a +} + +declare void @scalable_args() "aarch64_pstate_sm_enabled" +declare @llvm.vector.insert.nxv2i64.v2i64(, <2 x i64>, i64) + +declare @scalable_res() "aarch64_pstate_sm_enabled" +declare <2 x i64> @llvm.vector.extract.v2i64.nxv2i64(, i64) diff --git a/llvm/test/CodeGen/AArch64/sme-call-streaming-compatible-to-normal-fn-wihout-sme-attr.ll b/llvm/test/CodeGen/AArch64/sme-call-streaming-compatible-to-normal-fn-wihout-sme-attr.ll new file mode 100644 index 0000000000000000000000000000000000000000..ec3c972483b4ca781d1a83f9593c0d3c939a8207 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sme-call-streaming-compatible-to-normal-fn-wihout-sme-attr.ll @@ -0,0 +1,80 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc < %s | FileCheck %s + +; Verify that the following code can be compiled without +sme, because if the +; call is not entered in streaming-SVE mode at runtime, the codepath leading +; to the smstop/smstart pair will not be executed either. + +target triple = "aarch64" + +define void @streaming_compatible() #0 { +; CHECK-LABEL: streaming_compatible: +; CHECK: // %bb.0: +; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill +; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: bl __arm_sme_state +; CHECK-NEXT: and x19, x0, #0x1 +; CHECK-NEXT: tbz w19, #0, .LBB0_2 +; CHECK-NEXT: // %bb.1: +; CHECK-NEXT: smstop sm +; CHECK-NEXT: .LBB0_2: +; CHECK-NEXT: bl non_streaming +; CHECK-NEXT: tbz w19, #0, .LBB0_4 +; CHECK-NEXT: // %bb.3: +; CHECK-NEXT: smstart sm +; CHECK-NEXT: .LBB0_4: +; CHECK-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload +; CHECK-NEXT: ret + call void @non_streaming() + ret void +} + +declare void @non_streaming() + + +; Verify that COALESCER_BARRIER is also supported without +sme. + +define void @streaming_compatible_arg(float %f) #0 { +; CHECK-LABEL: streaming_compatible_arg: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #96 +; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: str s0, [sp, #12] // 4-byte Folded Spill +; CHECK-NEXT: bl __arm_sme_state +; CHECK-NEXT: and x19, x0, #0x1 +; CHECK-NEXT: ldr s0, [sp, #12] // 4-byte Folded Reload +; CHECK-NEXT: str s0, [sp, #12] // 4-byte Folded Spill +; CHECK-NEXT: tbz w19, #0, .LBB1_2 +; CHECK-NEXT: // %bb.1: +; CHECK-NEXT: smstop sm +; CHECK-NEXT: .LBB1_2: +; CHECK-NEXT: ldr s0, [sp, #12] // 4-byte Folded Reload +; CHECK-NEXT: bl non_streaming +; CHECK-NEXT: tbz w19, #0, .LBB1_4 +; CHECK-NEXT: // %bb.3: +; CHECK-NEXT: smstart sm +; CHECK-NEXT: .LBB1_4: +; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: add sp, sp, #96 +; CHECK-NEXT: ret + call void @non_streaming(float %f) + ret void +} + + +attributes #0 = { nounwind "aarch64_pstate_sm_compatible" } diff --git a/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll b/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll index 0118d7df25640b86b1d8358f07ca8e269d3cb244..94a711ba1c71332e91a631953b55e467a08bb7ab 100644 --- a/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll +++ b/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll @@ -17,15 +17,15 @@ define double @nonstreaming_caller_streaming_callee(double %x) nounwind noinline ; CHECK-FISEL-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill ; CHECK-FISEL-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill ; CHECK-FISEL-NEXT: str x30, [sp, #80] // 8-byte Folded Spill -; CHECK-FISEL-NEXT: str d0, [sp, #8] // 8-byte Folded Spill +; CHECK-FISEL-NEXT: str d0, [sp] // 8-byte Folded Spill ; CHECK-FISEL-NEXT: smstart sm -; CHECK-FISEL-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload +; CHECK-FISEL-NEXT: ldr d0, [sp] // 8-byte Folded Reload ; CHECK-FISEL-NEXT: bl streaming_callee -; CHECK-FISEL-NEXT: str d0, [sp, #88] // 8-byte Folded Spill +; CHECK-FISEL-NEXT: str d0, [sp, #8] // 8-byte Folded Spill ; CHECK-FISEL-NEXT: smstop sm +; CHECK-FISEL-NEXT: ldr d1, [sp, #8] // 8-byte Folded Reload ; CHECK-FISEL-NEXT: adrp x8, .LCPI0_0 ; CHECK-FISEL-NEXT: ldr d0, [x8, :lo12:.LCPI0_0] -; CHECK-FISEL-NEXT: ldr d1, [sp, #88] // 8-byte Folded Reload ; CHECK-FISEL-NEXT: fadd d0, d1, d0 ; CHECK-FISEL-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload ; CHECK-FISEL-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload @@ -43,15 +43,15 @@ define double @nonstreaming_caller_streaming_callee(double %x) nounwind noinline ; CHECK-GISEL-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill ; CHECK-GISEL-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill ; CHECK-GISEL-NEXT: str x30, [sp, #80] // 8-byte Folded Spill -; CHECK-GISEL-NEXT: str d0, [sp, #8] // 8-byte Folded Spill +; CHECK-GISEL-NEXT: str d0, [sp] // 8-byte Folded Spill ; CHECK-GISEL-NEXT: smstart sm -; CHECK-GISEL-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload +; CHECK-GISEL-NEXT: ldr d0, [sp] // 8-byte Folded Reload ; CHECK-GISEL-NEXT: bl streaming_callee -; CHECK-GISEL-NEXT: str d0, [sp, #88] // 8-byte Folded Spill +; CHECK-GISEL-NEXT: str d0, [sp, #8] // 8-byte Folded Spill ; CHECK-GISEL-NEXT: smstop sm -; CHECK-GISEL-NEXT: mov x8, #4631107791820423168 +; CHECK-GISEL-NEXT: ldr d1, [sp, #8] // 8-byte Folded Reload +; CHECK-GISEL-NEXT: mov x8, #4631107791820423168 // =0x4045000000000000 ; CHECK-GISEL-NEXT: fmov d0, x8 -; CHECK-GISEL-NEXT: ldr d1, [sp, #88] // 8-byte Folded Reload ; CHECK-GISEL-NEXT: fadd d0, d1, d0 ; CHECK-GISEL-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload ; CHECK-GISEL-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload @@ -68,92 +68,68 @@ entry: define double @streaming_caller_nonstreaming_callee(double %x) nounwind noinline optnone "aarch64_pstate_sm_enabled" { -; CHECK-FISEL-LABEL: streaming_caller_nonstreaming_callee: -; CHECK-FISEL: // %bb.0: // %entry -; CHECK-FISEL-NEXT: sub sp, sp, #96 -; CHECK-FISEL-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill -; CHECK-FISEL-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill -; CHECK-FISEL-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill -; CHECK-FISEL-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill -; CHECK-FISEL-NEXT: str x30, [sp, #80] // 8-byte Folded Spill -; CHECK-FISEL-NEXT: str d0, [sp, #8] // 8-byte Folded Spill -; CHECK-FISEL-NEXT: smstop sm -; CHECK-FISEL-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload -; CHECK-FISEL-NEXT: bl normal_callee -; CHECK-FISEL-NEXT: str d0, [sp, #88] // 8-byte Folded Spill -; CHECK-FISEL-NEXT: smstart sm -; CHECK-FISEL-NEXT: adrp x8, .LCPI1_0 -; CHECK-FISEL-NEXT: ldr d0, [x8, :lo12:.LCPI1_0] -; CHECK-FISEL-NEXT: ldr d1, [sp, #88] // 8-byte Folded Reload -; CHECK-FISEL-NEXT: fadd d0, d1, d0 -; CHECK-FISEL-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload -; CHECK-FISEL-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload -; CHECK-FISEL-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload -; CHECK-FISEL-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload -; CHECK-FISEL-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload -; CHECK-FISEL-NEXT: add sp, sp, #96 -; CHECK-FISEL-NEXT: ret -; -; CHECK-GISEL-LABEL: streaming_caller_nonstreaming_callee: -; CHECK-GISEL: // %bb.0: // %entry -; CHECK-GISEL-NEXT: sub sp, sp, #96 -; CHECK-GISEL-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill -; CHECK-GISEL-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill -; CHECK-GISEL-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill -; CHECK-GISEL-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill -; CHECK-GISEL-NEXT: str x30, [sp, #80] // 8-byte Folded Spill -; CHECK-GISEL-NEXT: str d0, [sp, #8] // 8-byte Folded Spill -; CHECK-GISEL-NEXT: smstop sm -; CHECK-GISEL-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload -; CHECK-GISEL-NEXT: bl normal_callee -; CHECK-GISEL-NEXT: str d0, [sp, #88] // 8-byte Folded Spill -; CHECK-GISEL-NEXT: smstart sm -; CHECK-GISEL-NEXT: mov x8, #4631107791820423168 -; CHECK-GISEL-NEXT: fmov d0, x8 -; CHECK-GISEL-NEXT: ldr d1, [sp, #88] // 8-byte Folded Reload -; CHECK-GISEL-NEXT: fadd d0, d1, d0 -; CHECK-GISEL-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload -; CHECK-GISEL-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload -; CHECK-GISEL-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload -; CHECK-GISEL-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload -; CHECK-GISEL-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload -; CHECK-GISEL-NEXT: add sp, sp, #96 -; CHECK-GISEL-NEXT: ret -entry: - %call = call double @normal_callee(double %x) - %add = fadd double %call, 4.200000e+01 - ret double %add -} - -define double @locally_streaming_caller_normal_callee(double %x) nounwind noinline optnone "aarch64_pstate_sm_body" { -; CHECK-COMMON-LABEL: locally_streaming_caller_normal_callee: -; CHECK-COMMON: // %bb.0: +; CHECK-COMMON-LABEL: streaming_caller_nonstreaming_callee: +; CHECK-COMMON: // %bb.0: // %entry ; CHECK-COMMON-NEXT: sub sp, sp, #96 ; CHECK-COMMON-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill ; CHECK-COMMON-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill ; CHECK-COMMON-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill ; CHECK-COMMON-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill ; CHECK-COMMON-NEXT: str x30, [sp, #80] // 8-byte Folded Spill -; CHECK-COMMON-NEXT: str d0, [sp, #88] // 8-byte Folded Spill -; CHECK-COMMON-NEXT: smstart sm +; CHECK-COMMON-NEXT: str d0, [sp] // 8-byte Folded Spill ; CHECK-COMMON-NEXT: smstop sm -; CHECK-COMMON-NEXT: ldr d0, [sp, #88] // 8-byte Folded Reload +; CHECK-COMMON-NEXT: ldr d0, [sp] // 8-byte Folded Reload ; CHECK-COMMON-NEXT: bl normal_callee ; CHECK-COMMON-NEXT: str d0, [sp, #8] // 8-byte Folded Spill ; CHECK-COMMON-NEXT: smstart sm -; CHECK-COMMON-NEXT: mov x8, #4631107791820423168 -; CHECK-COMMON-NEXT: fmov d0, x8 ; CHECK-COMMON-NEXT: ldr d1, [sp, #8] // 8-byte Folded Reload +; CHECK-COMMON-NEXT: mov x8, #4631107791820423168 // =0x4045000000000000 +; CHECK-COMMON-NEXT: fmov d0, x8 ; CHECK-COMMON-NEXT: fadd d0, d1, d0 -; CHECK-COMMON-NEXT: str d0, [sp] // 8-byte Folded Spill -; CHECK-COMMON-NEXT: smstop sm -; CHECK-COMMON-NEXT: ldr d0, [sp] // 8-byte Folded Reload ; CHECK-COMMON-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload ; CHECK-COMMON-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload ; CHECK-COMMON-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload ; CHECK-COMMON-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload ; CHECK-COMMON-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload ; CHECK-COMMON-NEXT: add sp, sp, #96 +; CHECK-COMMON-NEXT: ret +entry: + %call = call double @normal_callee(double %x) + %add = fadd double %call, 4.200000e+01 + ret double %add +} + +define double @locally_streaming_caller_normal_callee(double %x) nounwind noinline optnone "aarch64_pstate_sm_body" { +; CHECK-COMMON-LABEL: locally_streaming_caller_normal_callee: +; CHECK-COMMON: // %bb.0: +; CHECK-COMMON-NEXT: sub sp, sp, #112 +; CHECK-COMMON-NEXT: stp d15, d14, [sp, #32] // 16-byte Folded Spill +; CHECK-COMMON-NEXT: stp d13, d12, [sp, #48] // 16-byte Folded Spill +; CHECK-COMMON-NEXT: stp d11, d10, [sp, #64] // 16-byte Folded Spill +; CHECK-COMMON-NEXT: stp d9, d8, [sp, #80] // 16-byte Folded Spill +; CHECK-COMMON-NEXT: str x30, [sp, #96] // 8-byte Folded Spill +; CHECK-COMMON-NEXT: str d0, [sp, #24] // 8-byte Folded Spill +; CHECK-COMMON-NEXT: smstart sm +; CHECK-COMMON-NEXT: ldr d0, [sp, #24] // 8-byte Folded Reload +; CHECK-COMMON-NEXT: str d0, [sp, #24] // 8-byte Folded Spill +; CHECK-COMMON-NEXT: smstop sm +; CHECK-COMMON-NEXT: ldr d0, [sp, #24] // 8-byte Folded Reload +; CHECK-COMMON-NEXT: bl normal_callee +; CHECK-COMMON-NEXT: str d0, [sp, #16] // 8-byte Folded Spill +; CHECK-COMMON-NEXT: smstart sm +; CHECK-COMMON-NEXT: ldr d1, [sp, #16] // 8-byte Folded Reload +; CHECK-COMMON-NEXT: mov x8, #4631107791820423168 // =0x4045000000000000 +; CHECK-COMMON-NEXT: fmov d0, x8 +; CHECK-COMMON-NEXT: fadd d0, d1, d0 +; CHECK-COMMON-NEXT: str d0, [sp, #8] // 8-byte Folded Spill +; CHECK-COMMON-NEXT: smstop sm +; CHECK-COMMON-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload +; CHECK-COMMON-NEXT: ldr x30, [sp, #96] // 8-byte Folded Reload +; CHECK-COMMON-NEXT: ldp d9, d8, [sp, #80] // 16-byte Folded Reload +; CHECK-COMMON-NEXT: ldp d11, d10, [sp, #64] // 16-byte Folded Reload +; CHECK-COMMON-NEXT: ldp d13, d12, [sp, #48] // 16-byte Folded Reload +; CHECK-COMMON-NEXT: ldp d15, d14, [sp, #32] // 16-byte Folded Reload +; CHECK-COMMON-NEXT: add sp, sp, #112 ; CHECK-COMMON-NEXT: ret %call = call double @normal_callee(double %x); %add = fadd double %call, 4.200000e+01 @@ -223,9 +199,9 @@ define void @normal_call_to_streaming_callee_ptr(ptr %p) nounwind noinline optno ; Check ZA state ; -declare double @za_shared_callee(double) "aarch64_pstate_za_shared" +declare double @za_shared_callee(double) "aarch64_inout_za" -define double @za_new_caller_to_za_shared_callee(double %x) nounwind noinline optnone "aarch64_pstate_za_new"{ +define double @za_new_caller_to_za_shared_callee(double %x) nounwind noinline optnone "aarch64_new_za"{ ; CHECK-COMMON-LABEL: za_new_caller_to_za_shared_callee: ; CHECK-COMMON: // %bb.0: // %prelude ; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill @@ -236,6 +212,8 @@ define double @za_new_caller_to_za_shared_callee(double %x) nounwind noinline o ; CHECK-COMMON-NEXT: msub x8, x8, x8, x9 ; CHECK-COMMON-NEXT: mov sp, x8 ; CHECK-COMMON-NEXT: stur x8, [x29, #-16] +; CHECK-COMMON-NEXT: sturh wzr, [x29, #-6] +; CHECK-COMMON-NEXT: stur wzr, [x29, #-4] ; CHECK-COMMON-NEXT: mrs x8, TPIDR2_EL0 ; CHECK-COMMON-NEXT: cbz x8, .LBB6_2 ; CHECK-COMMON-NEXT: b .LBB6_1 @@ -245,6 +223,7 @@ define double @za_new_caller_to_za_shared_callee(double %x) nounwind noinline o ; CHECK-COMMON-NEXT: b .LBB6_2 ; CHECK-COMMON-NEXT: .LBB6_2: // %entry ; CHECK-COMMON-NEXT: smstart za +; CHECK-COMMON-NEXT: zero {za} ; CHECK-COMMON-NEXT: bl za_shared_callee ; CHECK-COMMON-NEXT: mov x8, #4631107791820423168 ; CHECK-COMMON-NEXT: fmov d1, x8 @@ -259,18 +238,19 @@ entry: ret double %add; } -define double @za_shared_caller_to_za_none_callee(double %x) nounwind noinline optnone "aarch64_pstate_za_shared"{ +define double @za_shared_caller_to_za_none_callee(double %x) nounwind noinline optnone "aarch64_inout_za"{ ; CHECK-COMMON-LABEL: za_shared_caller_to_za_none_callee: ; CHECK-COMMON: // %bb.0: // %entry ; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill ; CHECK-COMMON-NEXT: mov x29, sp ; CHECK-COMMON-NEXT: sub sp, sp, #16 ; CHECK-COMMON-NEXT: rdsvl x8, #1 -; CHECK-COMMON-NEXT: mul x8, x8, x8 ; CHECK-COMMON-NEXT: mov x9, sp -; CHECK-COMMON-NEXT: subs x9, x9, x8 +; CHECK-COMMON-NEXT: msub x9, x8, x8, x9 ; CHECK-COMMON-NEXT: mov sp, x9 ; CHECK-COMMON-NEXT: stur x9, [x29, #-16] +; CHECK-COMMON-NEXT: sturh wzr, [x29, #-6] +; CHECK-COMMON-NEXT: stur wzr, [x29, #-4] ; CHECK-COMMON-NEXT: sturh w8, [x29, #-8] ; CHECK-COMMON-NEXT: sub x8, x29, #16 ; CHECK-COMMON-NEXT: msr TPIDR2_EL0, x8 @@ -298,20 +278,21 @@ entry: } ; Ensure we set up and restore the lazy save correctly for instructions which are lowered to lib calls. -define fp128 @f128_call_za(fp128 %a, fp128 %b) "aarch64_pstate_za_shared" nounwind { +define fp128 @f128_call_za(fp128 %a, fp128 %b) "aarch64_inout_za" nounwind { ; CHECK-COMMON-LABEL: f128_call_za: ; CHECK-COMMON: // %bb.0: ; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill ; CHECK-COMMON-NEXT: mov x29, sp ; CHECK-COMMON-NEXT: sub sp, sp, #16 -; CHECK-COMMON-NEXT: rdsvl x8, #1 -; CHECK-COMMON-NEXT: mov x9, sp -; CHECK-COMMON-NEXT: mul x8, x8, x8 -; CHECK-COMMON-NEXT: sub x9, x9, x8 -; CHECK-COMMON-NEXT: mov sp, x9 +; CHECK-COMMON-NEXT: mov x8, sp +; CHECK-COMMON-NEXT: rdsvl x9, #1 +; CHECK-COMMON-NEXT: msub x8, x9, x9, x8 +; CHECK-COMMON-NEXT: mov sp, x8 ; CHECK-COMMON-NEXT: sub x10, x29, #16 -; CHECK-COMMON-NEXT: stur x9, [x29, #-16] -; CHECK-COMMON-NEXT: sturh w8, [x29, #-8] +; CHECK-COMMON-NEXT: stur wzr, [x29, #-4] +; CHECK-COMMON-NEXT: sturh wzr, [x29, #-6] +; CHECK-COMMON-NEXT: stur x8, [x29, #-16] +; CHECK-COMMON-NEXT: sturh w9, [x29, #-8] ; CHECK-COMMON-NEXT: msr TPIDR2_EL0, x10 ; CHECK-COMMON-NEXT: bl __addtf3 ; CHECK-COMMON-NEXT: smstart za @@ -340,9 +321,9 @@ define fp128 @f128_call_sm(fp128 %a, fp128 %b) "aarch64_pstate_sm_enabled" nounw ; CHECK-COMMON-NEXT: stp d11, d10, [sp, #64] // 16-byte Folded Spill ; CHECK-COMMON-NEXT: stp d9, d8, [sp, #80] // 16-byte Folded Spill ; CHECK-COMMON-NEXT: str x30, [sp, #96] // 8-byte Folded Spill -; CHECK-COMMON-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill +; CHECK-COMMON-NEXT: stp q1, q0, [sp] // 32-byte Folded Spill ; CHECK-COMMON-NEXT: smstop sm -; CHECK-COMMON-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload +; CHECK-COMMON-NEXT: ldp q1, q0, [sp] // 32-byte Folded Reload ; CHECK-COMMON-NEXT: bl __addtf3 ; CHECK-COMMON-NEXT: str q0, [sp, #16] // 16-byte Folded Spill ; CHECK-COMMON-NEXT: smstart sm @@ -357,3 +338,103 @@ define fp128 @f128_call_sm(fp128 %a, fp128 %b) "aarch64_pstate_sm_enabled" nounw %res = fadd fp128 %a, %b ret fp128 %res } + +; As above this should use Selection DAG to make sure the libcall call is lowered correctly. +define double @frem_call_za(double %a, double %b) "aarch64_inout_za" nounwind { +; CHECK-COMMON-LABEL: frem_call_za: +; CHECK-COMMON: // %bb.0: +; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill +; CHECK-COMMON-NEXT: mov x29, sp +; CHECK-COMMON-NEXT: sub sp, sp, #16 +; CHECK-COMMON-NEXT: mov x8, sp +; CHECK-COMMON-NEXT: rdsvl x9, #1 +; CHECK-COMMON-NEXT: msub x8, x9, x9, x8 +; CHECK-COMMON-NEXT: mov sp, x8 +; CHECK-COMMON-NEXT: sub x10, x29, #16 +; CHECK-COMMON-NEXT: stur wzr, [x29, #-4] +; CHECK-COMMON-NEXT: sturh wzr, [x29, #-6] +; CHECK-COMMON-NEXT: stur x8, [x29, #-16] +; CHECK-COMMON-NEXT: sturh w9, [x29, #-8] +; CHECK-COMMON-NEXT: msr TPIDR2_EL0, x10 +; CHECK-COMMON-NEXT: bl fmod +; CHECK-COMMON-NEXT: smstart za +; CHECK-COMMON-NEXT: sub x0, x29, #16 +; CHECK-COMMON-NEXT: mrs x8, TPIDR2_EL0 +; CHECK-COMMON-NEXT: cbnz x8, .LBB10_2 +; CHECK-COMMON-NEXT: // %bb.1: +; CHECK-COMMON-NEXT: bl __arm_tpidr2_restore +; CHECK-COMMON-NEXT: .LBB10_2: +; CHECK-COMMON-NEXT: msr TPIDR2_EL0, xzr +; CHECK-COMMON-NEXT: mov sp, x29 +; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-COMMON-NEXT: ret + %res = frem double %a, %b + ret double %res +} + +; As above this should use Selection DAG to make sure the libcall is lowered correctly. +define float @frem_call_sm(float %a, float %b) "aarch64_pstate_sm_enabled" nounwind { +; CHECK-COMMON-LABEL: frem_call_sm: +; CHECK-COMMON: // %bb.0: +; CHECK-COMMON-NEXT: sub sp, sp, #96 +; CHECK-COMMON-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill +; CHECK-COMMON-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill +; CHECK-COMMON-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill +; CHECK-COMMON-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill +; CHECK-COMMON-NEXT: str x30, [sp, #80] // 8-byte Folded Spill +; CHECK-COMMON-NEXT: stp s1, s0, [sp, #8] // 8-byte Folded Spill +; CHECK-COMMON-NEXT: smstop sm +; CHECK-COMMON-NEXT: ldp s1, s0, [sp, #8] // 8-byte Folded Reload +; CHECK-COMMON-NEXT: bl fmodf +; CHECK-COMMON-NEXT: str s0, [sp, #12] // 4-byte Folded Spill +; CHECK-COMMON-NEXT: smstart sm +; CHECK-COMMON-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload +; CHECK-COMMON-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload +; CHECK-COMMON-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload +; CHECK-COMMON-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload +; CHECK-COMMON-NEXT: ldr s0, [sp, #12] // 4-byte Folded Reload +; CHECK-COMMON-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload +; CHECK-COMMON-NEXT: add sp, sp, #96 +; CHECK-COMMON-NEXT: ret + %res = frem float %a, %b + ret float %res +} + +; As above this should use Selection DAG to make sure the libcall is lowered correctly. +define float @frem_call_sm_compat(float %a, float %b) "aarch64_pstate_sm_compatible" nounwind { +; CHECK-COMMON-LABEL: frem_call_sm_compat: +; CHECK-COMMON: // %bb.0: +; CHECK-COMMON-NEXT: sub sp, sp, #96 +; CHECK-COMMON-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill +; CHECK-COMMON-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill +; CHECK-COMMON-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill +; CHECK-COMMON-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill +; CHECK-COMMON-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill +; CHECK-COMMON-NEXT: stp s0, s1, [sp, #8] // 8-byte Folded Spill +; CHECK-COMMON-NEXT: bl __arm_sme_state +; CHECK-COMMON-NEXT: and x19, x0, #0x1 +; CHECK-COMMON-NEXT: ldr s2, [sp, #8] // 4-byte Folded Reload +; CHECK-COMMON-NEXT: ldr s0, [sp, #12] // 4-byte Folded Reload +; CHECK-COMMON-NEXT: stp s2, s0, [sp, #8] // 8-byte Folded Spill +; CHECK-COMMON-NEXT: tbz w19, #0, .LBB12_2 +; CHECK-COMMON-NEXT: // %bb.1: +; CHECK-COMMON-NEXT: smstop sm +; CHECK-COMMON-NEXT: .LBB12_2: +; CHECK-COMMON-NEXT: ldp s0, s1, [sp, #8] // 8-byte Folded Reload +; CHECK-COMMON-NEXT: bl fmodf +; CHECK-COMMON-NEXT: str s0, [sp, #12] // 4-byte Folded Spill +; CHECK-COMMON-NEXT: tbz w19, #0, .LBB12_4 +; CHECK-COMMON-NEXT: // %bb.3: +; CHECK-COMMON-NEXT: smstart sm +; CHECK-COMMON-NEXT: .LBB12_4: +; CHECK-COMMON-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload +; CHECK-COMMON-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload +; CHECK-COMMON-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload +; CHECK-COMMON-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload +; CHECK-COMMON-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload +; CHECK-COMMON-NEXT: ldr s0, [sp, #12] // 4-byte Folded Reload +; CHECK-COMMON-NEXT: add sp, sp, #96 +; CHECK-COMMON-NEXT: ret + %res = frem float %a, %b + ret float %res +} diff --git a/llvm/test/CodeGen/AArch64/sme-disable-rematerialize-with-streaming-mode-changes.ll b/llvm/test/CodeGen/AArch64/sme-disable-rematerialize-with-streaming-mode-changes.ll new file mode 100644 index 0000000000000000000000000000000000000000..b3aeb1fcc42da1b6253343349d7652e32387ab18 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sme-disable-rematerialize-with-streaming-mode-changes.ll @@ -0,0 +1,71 @@ +; RUN: llc < %s | FileCheck %s + +target triple = "aarch64" + + +define void @dont_rematerialize_cntd(i32 %N) #0 { +; CHECK-LABEL: dont_rematerialize_cntd: +; CHECK: cntd +; CHECK: smstop sm +; CHECK-NOT: cntd +; CHECK: bl foo +; CHECK: smstart sm +entry: + %cmp2 = icmp sgt i32 %N, 0 + br i1 %cmp2, label %for.body, label %for.cond.cleanup + +for.body: ; preds = %entry, %for.body + %index.03 = phi i32 [ %inc, %for.body ], [ 0, %entry ] + call void asm sideeffect "", "~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x26},~{x27}"() nounwind + %.tr = call i32 @llvm.vscale.i32() + %conv = shl nuw nsw i32 %.tr, 4 + call void @foo(i32 %conv) + %inc = add nuw nsw i32 %index.03, 1 + %exitcond.not = icmp eq i32 %inc, %N + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: ; preds = %for.body, %entry + ret void +} + +; This test doesn't strictly make sense, because it passes a scalable predicate +; to a function, which makes little sense if the VL is not the same in/out of +; streaming-SVE mode. If the VL is known to be the same, then we could just as +; well rematerialize the `ptrue` inside the call sequence. However, the purpose +; of this test is more to ensure that the logic works, which may also trigger +; when the value is not being passed as argument (e.g. when it is hoisted from +; a loop and placed inside the call sequence). +; +; FIXME: This test also exposes another bug, where the 'mul vl' addressing mode +; is used before/after the smstop. This will be fixed in a future patch. +define void @dont_rematerialize_ptrue(i32 %N) #0 { +; CHECK-LABEL: dont_rematerialize_ptrue: +; CHECK: ptrue [[PTRUE:p[0-9]+]].b +; CHECK: str [[PTRUE]], [[[SPILL_ADDR:.*]]] +; CHECK: smstop sm +; CHECK: ldr p0, [[[SPILL_ADDR]]] +; CHECK-NOT: ptrue +; CHECK: bl bar +; CHECK: smstart sm +entry: + %cmp2 = icmp sgt i32 %N, 0 + br i1 %cmp2, label %for.body, label %for.cond.cleanup + +for.body: ; preds = %entry, %for.body + %index.03 = phi i32 [ %inc, %for.body ], [ 0, %entry ] + call void asm sideeffect "", "~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x26},~{x27}"() nounwind + %ptrue.ins = insertelement poison, i1 1, i32 0 + %ptrue = shufflevector %ptrue.ins, poison, zeroinitializer + call void @bar( %ptrue) + %inc = add nuw nsw i32 %index.03, 1 + %exitcond.not = icmp eq i32 %inc, %N + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: ; preds = %for.body, %entry + ret void +} +declare void @foo(i32) +declare void @bar() +declare i32 @llvm.vscale.i32() + +attributes #0 = { "aarch64_pstate_sm_enabled" "frame-pointer"="non-leaf" "target-features"="+sme,+sve" } diff --git a/llvm/test/CodeGen/AArch64/sme-intrinsics-loads.ll b/llvm/test/CodeGen/AArch64/sme-intrinsics-loads.ll index ccb3975f0c5b422cadce2ef7485948b1c5197a15..985b0581200ff345b59e6696b20b089fb478107b 100644 --- a/llvm/test/CodeGen/AArch64/sme-intrinsics-loads.ll +++ b/llvm/test/CodeGen/AArch64/sme-intrinsics-loads.ll @@ -252,7 +252,7 @@ define void @ldr(ptr %ptr) { ; CHECK-NEXT: mov w12, wzr ; CHECK-NEXT: ldr za[w12, 0], [x0] ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.ldr(i32 0, ptr %ptr) + call void @llvm.aarch64.sme.ldr(i32 0, ptr %ptr, i32 0) ret void; } @@ -264,7 +264,7 @@ define void @ldr_with_off_15(ptr %ptr) { ; CHECK-NEXT: ldr za[w12, 0], [x8] ; CHECK-NEXT: ret %base = getelementptr i8, ptr %ptr, i64 15 - call void @llvm.aarch64.sme.ldr(i32 15, ptr %base) + call void @llvm.aarch64.sme.ldr(i32 15, ptr %base, i32 0) ret void; } @@ -278,7 +278,7 @@ define void @ldr_with_off_15mulvl(ptr %ptr) { %vscale = call i64 @llvm.vscale.i64() %mulvl = mul i64 %vscale, 240 %base = getelementptr i8, ptr %ptr, i64 %mulvl - call void @llvm.aarch64.sme.ldr(i32 15, ptr %base) + call void @llvm.aarch64.sme.ldr(i32 15, ptr %base, i32 0) ret void; } @@ -292,23 +292,205 @@ define void @ldr_with_off_16mulvl(ptr %ptr) { %vscale = call i64 @llvm.vscale.i64() %mulvl = mul i64 %vscale, 256 %base = getelementptr i8, ptr %ptr, i64 %mulvl - call void @llvm.aarch64.sme.ldr(i32 16, ptr %base) + call void @llvm.aarch64.sme.ldr(i32 16, ptr %base, i32 0) ret void; } +define void @ldr_with_off_var(ptr %base, i32 %off) { +; CHECK-LABEL: ldr_with_off_var: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: sxtw x8, w1 +; CHECK-NEXT: rdsvl x9, #1 +; CHECK-NEXT: add w12, w1, #16 +; CHECK-NEXT: madd x8, x9, x8, x0 +; CHECK-NEXT: ldr za[w12, 0], [x8] +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.ldr(i32 16, ptr %base, i32 %off) + ret void; +} + +define void @ldr_with_off_15imm(ptr %base) { +; CHECK-LABEL: ldr_with_off_15imm: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w12, #16 // =0x10 +; CHECK-NEXT: ldr za[w12, 15], [x0, #15, mul vl] +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.ldr(i32 16, ptr %base, i32 15) + ret void; +} + +define void @ldr_with_off_16imm(ptr %base) { +; CHECK-LABEL: ldr_with_off_16imm: +; CHECK: // %bb.0: +; CHECK-NEXT: rdsvl x8, #1 +; CHECK-NEXT: mov w12, #32 // =0x20 +; CHECK-NEXT: add x8, x0, x8, lsl #4 +; CHECK-NEXT: ldr za[w12, 0], [x8] +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.ldr(i32 16, ptr %base, i32 16) + ret void; +} + +define void @ldr_with_off_many_imm(i32 %tile_slice, ptr %ptr) { +; CHECK-LABEL: ldr_with_off_many_imm: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: ldr za[w12, 1], [x1, #1, mul vl] +; CHECK-NEXT: ldr za[w12, 2], [x1, #2, mul vl] +; CHECK-NEXT: ldr za[w12, 3], [x1, #3, mul vl] +; CHECK-NEXT: ldr za[w12, 4], [x1, #4, mul vl] +; CHECK-NEXT: ret +entry: + tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i32 1) + tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i32 2) + tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i32 3) + tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i32 4) + ret void +} + +define void @ldr_with_off_many_imm_15_18(i32 %tile_slice, ptr %ptr) { +; CHECK-LABEL: ldr_with_off_many_imm_15_18: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: rdsvl x8, #1 +; CHECK-NEXT: add x8, x1, x8, lsl #4 +; CHECK-NEXT: add w13, w0, #16 +; CHECK-NEXT: ldr za[w12, 15], [x1, #15, mul vl] +; CHECK-NEXT: ldr za[w13, 0], [x8] +; CHECK-NEXT: ldr za[w13, 1], [x8, #1, mul vl] +; CHECK-NEXT: ldr za[w13, 2], [x8, #2, mul vl] +; CHECK-NEXT: ret +entry: + tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i32 15) + tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i32 16) + tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i32 17) + tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i32 18) + ret void +} + +define void @ldr_with_off_many_imm_16_19(i32 %tile_slice, ptr %ptr) { +; CHECK-LABEL: ldr_with_off_many_imm_16_19: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: rdsvl x8, #1 +; CHECK-NEXT: add w12, w0, #16 +; CHECK-NEXT: add x8, x1, x8, lsl #4 +; CHECK-NEXT: ldr za[w12, 0], [x8] +; CHECK-NEXT: ldr za[w12, 1], [x8, #1, mul vl] +; CHECK-NEXT: ldr za[w12, 2], [x8, #2, mul vl] +; CHECK-NEXT: ldr za[w12, 3], [x8, #3, mul vl] +; CHECK-NEXT: ret +entry: + tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i32 16) + tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i32 17) + tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i32 18) + tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i32 19) + ret void +} + +define void @ldr_with_off_many_imm_31_34(i32 %tile_slice, ptr %ptr) { +; CHECK-LABEL: ldr_with_off_many_imm_31_34: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: rdsvl x8, #1 +; CHECK-NEXT: add w12, w0, #16 +; CHECK-NEXT: add x9, x1, x8, lsl #4 +; CHECK-NEXT: add x8, x1, x8, lsl #5 +; CHECK-NEXT: add w13, w0, #32 +; CHECK-NEXT: ldr za[w12, 15], [x9, #15, mul vl] +; CHECK-NEXT: ldr za[w13, 0], [x8] +; CHECK-NEXT: ldr za[w13, 1], [x8, #1, mul vl] +; CHECK-NEXT: ldr za[w13, 2], [x8, #2, mul vl] +; CHECK-NEXT: ret +entry: + tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i32 31) + tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i32 32) + tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i32 33) + tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i32 34) + ret void +} + +define void @ldr_with_off_many_imm_32_35(i32 %tile_slice, ptr %ptr, i64 %vnum) { +; CHECK-LABEL: ldr_with_off_many_imm_32_35: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: rdsvl x8, #1 +; CHECK-NEXT: add w12, w0, #32 +; CHECK-NEXT: add x8, x1, x8, lsl #5 +; CHECK-NEXT: ldr za[w12, 0], [x8] +; CHECK-NEXT: ldr za[w12, 1], [x8, #1, mul vl] +; CHECK-NEXT: ldr za[w12, 2], [x8, #2, mul vl] +; CHECK-NEXT: ldr za[w12, 3], [x8, #3, mul vl] +; CHECK-NEXT: ret +entry: + tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i32 32) + tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i32 33) + tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i32 34) + tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i32 35) + ret void +} + +define void @ldr_with_off_many_var(i32 %tile_slice, ptr %ptr, i64 %vnum) { +; CHECK-LABEL: ldr_with_off_many_var: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sxtw x8, w2 +; CHECK-NEXT: rdsvl x9, #1 +; CHECK-NEXT: add w12, w0, w2 +; CHECK-NEXT: madd x8, x9, x8, x1 +; CHECK-NEXT: ldr za[w12, 0], [x8] +; CHECK-NEXT: ldr za[w12, 1], [x8, #1, mul vl] +; CHECK-NEXT: ldr za[w12, 2], [x8, #2, mul vl] +; CHECK-NEXT: ldr za[w12, 3], [x8, #3, mul vl] +; CHECK-NEXT: ret +entry: + %0 = trunc i64 %vnum to i32 + tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i32 %0) + %1 = add i32 %0, 1 + tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i32 %1) + %2 = add i32 %0, 2 + tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i32 %2) + %3 = add i32 %0, 3 + tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i32 %3) + ret void +} + +define void @ldr_with_off_many_var_high(i32 %tile_slice, ptr %ptr, i64 %vnum) { +; CHECK-LABEL: ldr_with_off_many_var_high: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: add w8, w2, #32 +; CHECK-NEXT: rdsvl x10, #1 +; CHECK-NEXT: sxtw x9, w8 +; CHECK-NEXT: add w12, w0, w8 +; CHECK-NEXT: madd x9, x10, x9, x1 +; CHECK-NEXT: ldr za[w12, 1], [x9, #1, mul vl] +; CHECK-NEXT: ldr za[w12, 2], [x9, #2, mul vl] +; CHECK-NEXT: ldr za[w12, 3], [x9, #3, mul vl] +; CHECK-NEXT: ldr za[w12, 4], [x9, #4, mul vl] +; CHECK-NEXT: ret +entry: + %0 = trunc i64 %vnum to i32 + %1 = add i32 %0, 33 + tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i32 %1) + %2 = add i32 %0, 34 + tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i32 %2) + %3 = add i32 %0, 35 + tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i32 %3) + %4 = add i32 %0, 36 + tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i32 %4) + ret void +} + ; Ensure that the tile offset is sunk, given that this is likely to be an 'add' ; that's decomposed into a base + offset in ISel. define void @test_ld1_sink_tile0_offset_operand( %pg, ptr %src, i32 %base, i32 %N) { ; CHECK-LABEL: test_ld1_sink_tile0_offset_operand: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: mov w12, w1 -; CHECK-NEXT: .LBB14_1: // %for.body +; CHECK-NEXT: .LBB24_1: // %for.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ld1w {za0h.s[w12, 0]}, p0/z, [x0] ; CHECK-NEXT: subs w2, w2, #1 ; CHECK-NEXT: ld1w {za0h.s[w12, 1]}, p0/z, [x0] ; CHECK-NEXT: ld1w {za0h.s[w12, 2]}, p0/z, [x0] -; CHECK-NEXT: b.ne .LBB14_1 +; CHECK-NEXT: b.ne .LBB24_1 ; CHECK-NEXT: // %bb.2: // %exit ; CHECK-NEXT: ret entry: @@ -341,5 +523,5 @@ declare void @llvm.aarch64.sme.ld1w.vert(, ptr, i32, i32) declare void @llvm.aarch64.sme.ld1d.vert(, ptr, i32, i32) declare void @llvm.aarch64.sme.ld1q.vert(, ptr, i32, i32) -declare void @llvm.aarch64.sme.ldr(i32, ptr) +declare void @llvm.aarch64.sme.ldr(i32, ptr, i32) declare i64 @llvm.vscale.i64() diff --git a/llvm/test/CodeGen/AArch64/sme-intrinsics-stores.ll b/llvm/test/CodeGen/AArch64/sme-intrinsics-stores.ll index ddff4c7d3cd3e7a715de8f376ee85f29e145143d..366e24df3e8c975e2668ad8a8ee48c00ed141517 100644 --- a/llvm/test/CodeGen/AArch64/sme-intrinsics-stores.ll +++ b/llvm/test/CodeGen/AArch64/sme-intrinsics-stores.ll @@ -252,7 +252,7 @@ define void @str(ptr %ptr) { ; CHECK-NEXT: mov w12, wzr ; CHECK-NEXT: str za[w12, 0], [x0] ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.str(i32 0, ptr %ptr) + call void @llvm.aarch64.sme.str(i32 0, ptr %ptr, i32 0) ret void; } @@ -264,7 +264,7 @@ define void @str_with_off_15(ptr %ptr) { ; CHECK-NEXT: str za[w12, 0], [x8] ; CHECK-NEXT: ret %base = getelementptr i8, ptr %ptr, i64 15 - call void @llvm.aarch64.sme.str(i32 15, ptr %base) + call void @llvm.aarch64.sme.str(i32 15, ptr %base, i32 0) ret void; } @@ -278,7 +278,7 @@ define void @str_with_off_15mulvl(ptr %ptr) { %vscale = call i64 @llvm.vscale.i64() %mulvl = mul i64 %vscale, 240 %base = getelementptr i8, ptr %ptr, i64 %mulvl - call void @llvm.aarch64.sme.str(i32 15, ptr %base) + call void @llvm.aarch64.sme.str(i32 15, ptr %base, i32 0) ret void; } @@ -292,23 +292,210 @@ define void @str_with_off_16mulvl(ptr %ptr) { %vscale = call i64 @llvm.vscale.i64() %mulvl = mul i64 %vscale, 256 %base = getelementptr i8, ptr %ptr, i64 %mulvl - call void @llvm.aarch64.sme.str(i32 16, ptr %base) + call void @llvm.aarch64.sme.str(i32 16, ptr %base, i32 0) ret void; } +define void @str_with_off_var(ptr %base, i32 %off) { +; CHECK-LABEL: str_with_off_var: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: sxtw x8, w1 +; CHECK-NEXT: rdsvl x9, #1 +; CHECK-NEXT: add w12, w1, #16 +; CHECK-NEXT: madd x8, x9, x8, x0 +; CHECK-NEXT: str za[w12, 0], [x8] +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.str(i32 16, ptr %base, i32 %off) + ret void; +} + +define void @str_with_off_15imm(ptr %ptr) { +; CHECK-LABEL: str_with_off_15imm: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w12, #15 // =0xf +; CHECK-NEXT: add x8, x0, #15 +; CHECK-NEXT: str za[w12, 15], [x8, #15, mul vl] +; CHECK-NEXT: ret + %base = getelementptr i8, ptr %ptr, i64 15 + call void @llvm.aarch64.sme.str(i32 15, ptr %base, i32 15) + ret void; +} + +define void @str_with_off_16imm(ptr %ptr) { +; CHECK-LABEL: str_with_off_16imm: +; CHECK: // %bb.0: +; CHECK-NEXT: rdsvl x8, #1 +; CHECK-NEXT: mov w12, #31 // =0x1f +; CHECK-NEXT: add x8, x0, x8, lsl #4 +; CHECK-NEXT: add x8, x8, #15 +; CHECK-NEXT: str za[w12, 0], [x8] +; CHECK-NEXT: ret + %base = getelementptr i8, ptr %ptr, i64 15 + call void @llvm.aarch64.sme.str(i32 15, ptr %base, i32 16) + ret void; +} + +define void @str_with_off_many_imm(i32 %tile_slice, ptr %ptr) { +; CHECK-LABEL: str_with_off_many_imm: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: str za[w12, 1], [x1, #1, mul vl] +; CHECK-NEXT: str za[w12, 2], [x1, #2, mul vl] +; CHECK-NEXT: str za[w12, 3], [x1, #3, mul vl] +; CHECK-NEXT: str za[w12, 4], [x1, #4, mul vl] +; CHECK-NEXT: ret +entry: + tail call void @llvm.aarch64.sme.str(i32 %tile_slice, ptr %ptr, i32 1) + tail call void @llvm.aarch64.sme.str(i32 %tile_slice, ptr %ptr, i32 2) + tail call void @llvm.aarch64.sme.str(i32 %tile_slice, ptr %ptr, i32 3) + tail call void @llvm.aarch64.sme.str(i32 %tile_slice, ptr %ptr, i32 4) + ret void +} + +define void @str_with_off_many_imm_15_18(i32 %tile_slice, ptr %ptr) { +; CHECK-LABEL: str_with_off_many_imm_15_18: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: rdsvl x8, #1 +; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: add x8, x1, x8, lsl #4 +; CHECK-NEXT: add w13, w0, #16 +; CHECK-NEXT: str za[w12, 15], [x1, #15, mul vl] +; CHECK-NEXT: str za[w13, 0], [x8] +; CHECK-NEXT: str za[w13, 1], [x8, #1, mul vl] +; CHECK-NEXT: str za[w13, 2], [x8, #2, mul vl] +; CHECK-NEXT: ret +entry: + tail call void @llvm.aarch64.sme.str(i32 %tile_slice, ptr %ptr, i32 15) + tail call void @llvm.aarch64.sme.str(i32 %tile_slice, ptr %ptr, i32 16) + tail call void @llvm.aarch64.sme.str(i32 %tile_slice, ptr %ptr, i32 17) + tail call void @llvm.aarch64.sme.str(i32 %tile_slice, ptr %ptr, i32 18) + ret void +} + +define void @str_with_off_many_imm_16_19(i32 %tile_slice, ptr %ptr) { +; CHECK-LABEL: str_with_off_many_imm_16_19: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: rdsvl x8, #1 +; CHECK-NEXT: add w12, w0, #16 +; CHECK-NEXT: add x8, x1, x8, lsl #4 +; CHECK-NEXT: str za[w12, 0], [x8] +; CHECK-NEXT: str za[w12, 1], [x8, #1, mul vl] +; CHECK-NEXT: str za[w12, 2], [x8, #2, mul vl] +; CHECK-NEXT: str za[w12, 3], [x8, #3, mul vl] +; CHECK-NEXT: ret +entry: + tail call void @llvm.aarch64.sme.str(i32 %tile_slice, ptr %ptr, i32 16) + tail call void @llvm.aarch64.sme.str(i32 %tile_slice, ptr %ptr, i32 17) + tail call void @llvm.aarch64.sme.str(i32 %tile_slice, ptr %ptr, i32 18) + tail call void @llvm.aarch64.sme.str(i32 %tile_slice, ptr %ptr, i32 19) + ret void +} + +define void @str_with_off_many_imm_31_34(i32 %tile_slice, ptr %ptr) { +; CHECK-LABEL: str_with_off_many_imm_31_34: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: rdsvl x8, #1 +; CHECK-NEXT: add w12, w0, #16 +; CHECK-NEXT: add x9, x1, x8, lsl #4 +; CHECK-NEXT: add x8, x1, x8, lsl #5 +; CHECK-NEXT: add w13, w0, #32 +; CHECK-NEXT: str za[w12, 15], [x9, #15, mul vl] +; CHECK-NEXT: str za[w13, 0], [x8] +; CHECK-NEXT: str za[w13, 1], [x8, #1, mul vl] +; CHECK-NEXT: str za[w13, 2], [x8, #2, mul vl] +; CHECK-NEXT: ret +entry: + tail call void @llvm.aarch64.sme.str(i32 %tile_slice, ptr %ptr, i32 31) + tail call void @llvm.aarch64.sme.str(i32 %tile_slice, ptr %ptr, i32 32) + tail call void @llvm.aarch64.sme.str(i32 %tile_slice, ptr %ptr, i32 33) + tail call void @llvm.aarch64.sme.str(i32 %tile_slice, ptr %ptr, i32 34) + ret void +} + +define void @str_with_off_many_imm_32_35(i32 %tile_slice, ptr %ptr) { +; CHECK-LABEL: str_with_off_many_imm_32_35: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: rdsvl x8, #1 +; CHECK-NEXT: add w12, w0, #32 +; CHECK-NEXT: add x8, x1, x8, lsl #5 +; CHECK-NEXT: str za[w12, 0], [x8] +; CHECK-NEXT: str za[w12, 1], [x8, #1, mul vl] +; CHECK-NEXT: str za[w12, 2], [x8, #2, mul vl] +; CHECK-NEXT: str za[w12, 3], [x8, #3, mul vl] +; CHECK-NEXT: ret +entry: + tail call void @llvm.aarch64.sme.str(i32 %tile_slice, ptr %ptr, i32 32) + tail call void @llvm.aarch64.sme.str(i32 %tile_slice, ptr %ptr, i32 33) + tail call void @llvm.aarch64.sme.str(i32 %tile_slice, ptr %ptr, i32 34) + tail call void @llvm.aarch64.sme.str(i32 %tile_slice, ptr %ptr, i32 35) + ret void +} + +define void @str_with_off_many_var(i32 %tile_slice, ptr %ptr, i64 %vnum) { +; CHECK-LABEL: str_with_off_many_var: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sxtw x8, w2 +; CHECK-NEXT: rdsvl x9, #1 +; CHECK-NEXT: add w12, w0, w2 +; CHECK-NEXT: madd x8, x9, x8, x1 +; CHECK-NEXT: str za[w12, 0], [x8] +; CHECK-NEXT: str za[w12, 1], [x8, #1, mul vl] +; CHECK-NEXT: str za[w12, 2], [x8, #2, mul vl] +; CHECK-NEXT: str za[w12, 3], [x8, #3, mul vl] +; CHECK-NEXT: ret +entry: + %0 = trunc i64 %vnum to i32 + tail call void @llvm.aarch64.sme.str(i32 %tile_slice, ptr %ptr, i32 %0) + %1 = add i32 %0, 1 + tail call void @llvm.aarch64.sme.str(i32 %tile_slice, ptr %ptr, i32 %1) + %2 = add i32 %0, 2 + tail call void @llvm.aarch64.sme.str(i32 %tile_slice, ptr %ptr, i32 %2) + %3 = add i32 %0, 3 + tail call void @llvm.aarch64.sme.str(i32 %tile_slice, ptr %ptr, i32 %3) + ret void +} + +define void @str_with_off_many_var_high(i32 %tile_slice, ptr %ptr, i64 %vnum) { +; CHECK-LABEL: str_with_off_many_var_high: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: add w8, w2, #32 +; CHECK-NEXT: rdsvl x10, #1 +; CHECK-NEXT: sxtw x9, w8 +; CHECK-NEXT: add w12, w0, w8 +; CHECK-NEXT: madd x9, x10, x9, x1 +; CHECK-NEXT: str za[w12, 1], [x9, #1, mul vl] +; CHECK-NEXT: str za[w12, 2], [x9, #2, mul vl] +; CHECK-NEXT: str za[w12, 3], [x9, #3, mul vl] +; CHECK-NEXT: str za[w12, 4], [x9, #4, mul vl] +; CHECK-NEXT: ret +entry: + %0 = trunc i64 %vnum to i32 + %1 = add i32 %0, 33 + tail call void @llvm.aarch64.sme.str(i32 %tile_slice, ptr %ptr, i32 %1) + %2 = add i32 %0, 34 + tail call void @llvm.aarch64.sme.str(i32 %tile_slice, ptr %ptr, i32 %2) + %3 = add i32 %0, 35 + tail call void @llvm.aarch64.sme.str(i32 %tile_slice, ptr %ptr, i32 %3) + %4 = add i32 %0, 36 + tail call void @llvm.aarch64.sme.str(i32 %tile_slice, ptr %ptr, i32 %4) + ret void +} + + ; Ensure that the tile offset is sunk, given that this is likely to be an 'add' ; that's decomposed into a base + offset in ISel. define void @test_sink_tile0_offset_operand( %pg, ptr %src, i32 %base, i32 %N) { ; CHECK-LABEL: test_sink_tile0_offset_operand: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: mov w12, w1 -; CHECK-NEXT: .LBB14_1: // %for.body +; CHECK-NEXT: .LBB24_1: // %for.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: st1w {za0h.s[w12, 0]}, p0, [x0] ; CHECK-NEXT: subs w2, w2, #1 ; CHECK-NEXT: st1w {za0h.s[w12, 1]}, p0, [x0] ; CHECK-NEXT: st1w {za0h.s[w12, 2]}, p0, [x0] -; CHECK-NEXT: b.ne .LBB14_1 +; CHECK-NEXT: b.ne .LBB24_1 ; CHECK-NEXT: // %bb.2: // %exit ; CHECK-NEXT: ret entry: @@ -340,5 +527,5 @@ declare void @llvm.aarch64.sme.st1w.vert(, ptr, i32, i32) declare void @llvm.aarch64.sme.st1d.vert(, ptr, i32, i32) declare void @llvm.aarch64.sme.st1q.vert(, ptr, i32, i32) -declare void @llvm.aarch64.sme.str(i32, ptr) +declare void @llvm.aarch64.sme.str(i32, ptr, i32) declare i64 @llvm.vscale.i64() diff --git a/llvm/test/CodeGen/AArch64/sme-lazy-save-call-remarks.ll b/llvm/test/CodeGen/AArch64/sme-lazy-save-call-remarks.ll new file mode 100644 index 0000000000000000000000000000000000000000..65e50842d5d78fcc07898417b458bd35fde9ce76 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sme-lazy-save-call-remarks.ll @@ -0,0 +1,25 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64 -mattr=+sme --pass-remarks-analysis=sme -o /dev/null < %s 2>&1 | FileCheck %s + +declare void @private_za_callee() +declare float @llvm.cos.f32(float) + +define void @test_lazy_save_1_callee() nounwind "aarch64_inout_za" { +; CHECK: remark: :0:0: call from 'test_lazy_save_1_callee' to 'private_za_callee' sets up a lazy save for ZA + call void @private_za_callee() + ret void +} + +define void @test_lazy_save_2_callees() nounwind "aarch64_inout_za" { +; CHECK: remark: :0:0: call from 'test_lazy_save_2_callees' to 'private_za_callee' sets up a lazy save for ZA + call void @private_za_callee() +; CHECK: remark: :0:0: call from 'test_lazy_save_2_callees' to 'private_za_callee' sets up a lazy save for ZA + call void @private_za_callee() + ret void +} + +define float @test_lazy_save_expanded_intrinsic(float %a) nounwind "aarch64_inout_za" { +; CHECK: remark: :0:0: call from 'test_lazy_save_expanded_intrinsic' to 'cosf' sets up a lazy save for ZA + %res = call float @llvm.cos.f32(float %a) + ret float %res +} diff --git a/llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll b/llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll index 00c1c9d66c3e656a9c473a92df61d3928838f62f..a3c3089d54250ccf9c501714aab9f9abb7721191 100644 --- a/llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll +++ b/llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll @@ -5,20 +5,21 @@ declare void @private_za_callee() declare float @llvm.cos.f32(float) ; Test lazy-save mechanism for a single callee. -define void @test_lazy_save_1_callee() nounwind "aarch64_pstate_za_shared" { +define void @test_lazy_save_1_callee() nounwind "aarch64_inout_za" { ; CHECK-LABEL: test_lazy_save_1_callee: ; CHECK: // %bb.0: ; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill ; CHECK-NEXT: mov x29, sp ; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: rdsvl x8, #1 -; CHECK-NEXT: mov x9, sp -; CHECK-NEXT: mul x8, x8, x8 -; CHECK-NEXT: sub x9, x9, x8 -; CHECK-NEXT: mov sp, x9 +; CHECK-NEXT: mov x8, sp +; CHECK-NEXT: rdsvl x9, #1 +; CHECK-NEXT: msub x8, x9, x9, x8 +; CHECK-NEXT: mov sp, x8 ; CHECK-NEXT: sub x10, x29, #16 -; CHECK-NEXT: stur x9, [x29, #-16] -; CHECK-NEXT: sturh w8, [x29, #-8] +; CHECK-NEXT: stur wzr, [x29, #-4] +; CHECK-NEXT: sturh wzr, [x29, #-6] +; CHECK-NEXT: stur x8, [x29, #-16] +; CHECK-NEXT: sturh w9, [x29, #-8] ; CHECK-NEXT: msr TPIDR2_EL0, x10 ; CHECK-NEXT: bl private_za_callee ; CHECK-NEXT: smstart za @@ -37,19 +38,20 @@ define void @test_lazy_save_1_callee() nounwind "aarch64_pstate_za_shared" { } ; Test lazy-save mechanism for multiple callees. -define void @test_lazy_save_2_callees() nounwind "aarch64_pstate_za_shared" { +define void @test_lazy_save_2_callees() nounwind "aarch64_inout_za" { ; CHECK-LABEL: test_lazy_save_2_callees: ; CHECK: // %bb.0: ; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill ; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: mov x29, sp ; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: rdsvl x8, #1 -; CHECK-NEXT: mul x19, x8, x8 ; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: sub x8, x8, x19 +; CHECK-NEXT: rdsvl x19, #1 +; CHECK-NEXT: msub x8, x19, x19, x8 ; CHECK-NEXT: mov sp, x8 ; CHECK-NEXT: sub x20, x29, #16 +; CHECK-NEXT: stur wzr, [x29, #-4] +; CHECK-NEXT: sturh wzr, [x29, #-6] ; CHECK-NEXT: stur x8, [x29, #-16] ; CHECK-NEXT: sturh w19, [x29, #-8] ; CHECK-NEXT: msr TPIDR2_EL0, x20 @@ -83,20 +85,21 @@ define void @test_lazy_save_2_callees() nounwind "aarch64_pstate_za_shared" { } ; Test a call of an intrinsic that gets expanded to a library call. -define float @test_lazy_save_expanded_intrinsic(float %a) nounwind "aarch64_pstate_za_shared" { +define float @test_lazy_save_expanded_intrinsic(float %a) nounwind "aarch64_inout_za" { ; CHECK-LABEL: test_lazy_save_expanded_intrinsic: ; CHECK: // %bb.0: ; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill ; CHECK-NEXT: mov x29, sp ; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: rdsvl x8, #1 -; CHECK-NEXT: mov x9, sp -; CHECK-NEXT: mul x8, x8, x8 -; CHECK-NEXT: sub x9, x9, x8 -; CHECK-NEXT: mov sp, x9 +; CHECK-NEXT: mov x8, sp +; CHECK-NEXT: rdsvl x9, #1 +; CHECK-NEXT: msub x8, x9, x9, x8 +; CHECK-NEXT: mov sp, x8 ; CHECK-NEXT: sub x10, x29, #16 -; CHECK-NEXT: stur x9, [x29, #-16] -; CHECK-NEXT: sturh w8, [x29, #-8] +; CHECK-NEXT: stur wzr, [x29, #-4] +; CHECK-NEXT: sturh wzr, [x29, #-6] +; CHECK-NEXT: stur x8, [x29, #-16] +; CHECK-NEXT: sturh w9, [x29, #-8] ; CHECK-NEXT: msr TPIDR2_EL0, x10 ; CHECK-NEXT: bl cosf ; CHECK-NEXT: smstart za @@ -115,7 +118,7 @@ define float @test_lazy_save_expanded_intrinsic(float %a) nounwind "aarch64_psta } ; Test a combination of streaming-compatible -> normal call with lazy-save. -define void @test_lazy_save_and_conditional_smstart() nounwind "aarch64_pstate_za_shared" "aarch64_pstate_sm_compatible" { +define void @test_lazy_save_and_conditional_smstart() nounwind "aarch64_inout_za" "aarch64_pstate_sm_compatible" { ; CHECK-LABEL: test_lazy_save_and_conditional_smstart: ; CHECK: // %bb.0: ; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill @@ -126,23 +129,24 @@ define void @test_lazy_save_and_conditional_smstart() nounwind "aarch64_pstate_z ; CHECK-NEXT: add x29, sp, #64 ; CHECK-NEXT: str x19, [sp, #80] // 8-byte Folded Spill ; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: rdsvl x8, #1 -; CHECK-NEXT: mov x9, sp -; CHECK-NEXT: mul x8, x8, x8 -; CHECK-NEXT: sub x9, x9, x8 -; CHECK-NEXT: mov sp, x9 +; CHECK-NEXT: mov x8, sp +; CHECK-NEXT: rdsvl x9, #1 +; CHECK-NEXT: msub x8, x9, x9, x8 +; CHECK-NEXT: mov sp, x8 ; CHECK-NEXT: sub x10, x29, #80 -; CHECK-NEXT: stur x9, [x29, #-80] -; CHECK-NEXT: sturh w8, [x29, #-72] +; CHECK-NEXT: stur wzr, [x29, #-68] +; CHECK-NEXT: sturh wzr, [x29, #-70] +; CHECK-NEXT: stur x8, [x29, #-80] +; CHECK-NEXT: sturh w9, [x29, #-72] ; CHECK-NEXT: msr TPIDR2_EL0, x10 ; CHECK-NEXT: bl __arm_sme_state ; CHECK-NEXT: and x19, x0, #0x1 -; CHECK-NEXT: tbz x19, #0, .LBB3_2 +; CHECK-NEXT: tbz w19, #0, .LBB3_2 ; CHECK-NEXT: // %bb.1: ; CHECK-NEXT: smstop sm ; CHECK-NEXT: .LBB3_2: ; CHECK-NEXT: bl private_za_callee -; CHECK-NEXT: tbz x19, #0, .LBB3_4 +; CHECK-NEXT: tbz w19, #0, .LBB3_4 ; CHECK-NEXT: // %bb.3: ; CHECK-NEXT: smstart sm ; CHECK-NEXT: .LBB3_4: diff --git a/llvm/test/CodeGen/AArch64/sme-new-za-function.ll b/llvm/test/CodeGen/AArch64/sme-new-za-function.ll index 54ef5fd432755f98ebe387b5547fc096b0d90d04..04d26902c536a41edc830a7f742594345e23c312 100644 --- a/llvm/test/CodeGen/AArch64/sme-new-za-function.ll +++ b/llvm/test/CodeGen/AArch64/sme-new-za-function.ll @@ -1,9 +1,9 @@ ; RUN: opt -S -mtriple=aarch64-linux-gnu -aarch64-sme-abi %s | FileCheck %s ; RUN: opt -S -mtriple=aarch64-linux-gnu -aarch64-sme-abi -aarch64-sme-abi %s | FileCheck %s -declare void @shared_za_callee() "aarch64_pstate_za_shared" +declare void @shared_za_callee() "aarch64_inout_za" -define void @private_za() "aarch64_pstate_za_new" { +define void @private_za() "aarch64_new_za" { ; CHECK-LABEL: @private_za( ; CHECK-NEXT: prelude: ; CHECK-NEXT: [[TPIDR2:%.*]] = call i64 @llvm.aarch64.sme.get.tpidr2() @@ -15,6 +15,7 @@ define void @private_za() "aarch64_pstate_za_new" { ; CHECK-NEXT: br label [[TMP0]] ; CHECK: 0: ; CHECK-NEXT: call void @llvm.aarch64.sme.za.enable() +; CHECK-NEXT: call void @llvm.aarch64.sme.zero(i32 255) ; CHECK-NEXT: call void @shared_za_callee() ; CHECK-NEXT: call void @llvm.aarch64.sme.za.disable() ; CHECK-NEXT: ret void @@ -23,7 +24,7 @@ define void @private_za() "aarch64_pstate_za_new" { ret void } -define i32 @private_za_multiple_exit(i32 %a, i32 %b, i64 %cond) "aarch64_pstate_za_new" { +define i32 @private_za_multiple_exit(i32 %a, i32 %b, i64 %cond) "aarch64_new_za" { ; CHECK-LABEL: @private_za_multiple_exit( ; CHECK-NEXT: prelude: ; CHECK-NEXT: [[TPIDR2:%.*]] = call i64 @llvm.aarch64.sme.get.tpidr2() @@ -35,6 +36,7 @@ define i32 @private_za_multiple_exit(i32 %a, i32 %b, i64 %cond) "aarch64_pstate_ ; CHECK-NEXT: br label [[ENTRY]] ; CHECK: entry: ; CHECK-NEXT: call void @llvm.aarch64.sme.za.enable() +; CHECK-NEXT: call void @llvm.aarch64.sme.zero(i32 255) ; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i64 [[COND:%.*]], 1 ; CHECK-NEXT: br i1 [[TOBOOL]], label [[IF_ELSE:%.*]], label [[IF_END:%.*]] ; CHECK: if.else: @@ -60,4 +62,4 @@ if.end: } ; CHECK: declare void @__arm_tpidr2_save() #[[ATTR:[0-9]+]] -; CHECK: attributes #[[ATTR]] = { "aarch64_pstate_sm_compatible" "aarch64_pstate_za_preserved" } +; CHECK: attributes #[[ATTR]] = { "aarch64_pstate_sm_compatible" } diff --git a/llvm/test/CodeGen/AArch64/sme-pstate-sm-changing-call-disable-coalescing.ll b/llvm/test/CodeGen/AArch64/sme-pstate-sm-changing-call-disable-coalescing.ll new file mode 100644 index 0000000000000000000000000000000000000000..b702c71001a6eaecd3050b09ad45f0790a630c11 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sme-pstate-sm-changing-call-disable-coalescing.ll @@ -0,0 +1,1641 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc < %s | FileCheck %s + +target triple = "aarch64-unknown-unknown-eabi-elf" + +; This test verifies that call arguments and results are not coalesced +; with SVE vector registers by the coalescer, such that no 'mul vl' +; ldr/str pairs are generated in the streaming-mode-changing call +; sequence. + +; +; Scalar arguments +; + +define void @dont_coalesce_arg_i8(i8 %arg, ptr %ptr) #0 { +; CHECK-LABEL: dont_coalesce_arg_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill +; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: fmov s0, w0 +; CHECK-NEXT: mov x19, x1 +; CHECK-NEXT: str z0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: smstop sm +; CHECK-NEXT: bl use_i8 +; CHECK-NEXT: smstart sm +; CHECK-NEXT: ldr z0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: st1b { z0.b }, p0, [x19] +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload +; CHECK-NEXT: ret + %vec = insertelement poison, i8 %arg, i32 0 + call void @use_i8(i8 %arg) + store %vec, ptr %ptr + ret void +} + +define void @dont_coalesce_arg_i16(i16 %arg, ptr %ptr) #0 { +; CHECK-LABEL: dont_coalesce_arg_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill +; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: fmov s0, w0 +; CHECK-NEXT: mov x19, x1 +; CHECK-NEXT: str z0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: smstop sm +; CHECK-NEXT: bl use_i16 +; CHECK-NEXT: smstart sm +; CHECK-NEXT: ldr z0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: st1h { z0.h }, p0, [x19] +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload +; CHECK-NEXT: ret + %vec = insertelement poison, i16 %arg, i32 0 + call void @use_i16(i16 %arg) + store %vec, ptr %ptr + ret void +} + +define void @dont_coalesce_arg_i32(i32 %arg, ptr %ptr) #0 { +; CHECK-LABEL: dont_coalesce_arg_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill +; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: fmov s0, w0 +; CHECK-NEXT: mov x19, x1 +; CHECK-NEXT: str z0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: smstop sm +; CHECK-NEXT: bl use_i32 +; CHECK-NEXT: smstart sm +; CHECK-NEXT: ldr z0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: st1w { z0.s }, p0, [x19] +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload +; CHECK-NEXT: ret + %vec = insertelement poison, i32 %arg, i32 0 + call void @use_i32(i32 %arg) + store %vec, ptr %ptr + ret void +} + +define void @dont_coalesce_arg_i64(i64 %arg, ptr %ptr) #0 { +; CHECK-LABEL: dont_coalesce_arg_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill +; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: mov x19, x1 +; CHECK-NEXT: str z0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: smstop sm +; CHECK-NEXT: bl use_i64 +; CHECK-NEXT: smstart sm +; CHECK-NEXT: ldr z0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: st1d { z0.d }, p0, [x19] +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload +; CHECK-NEXT: ret + %vec = insertelement poison, i64 %arg, i32 0 + call void @use_i64(i64 %arg) + store %vec, ptr %ptr + ret void +} + +define void @dont_coalesce_arg_f16(half %arg, ptr %ptr) #0 { +; CHECK-LABEL: dont_coalesce_arg_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill +; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: add x8, sp, #16 +; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0 +; CHECK-NEXT: mov x19, x0 +; CHECK-NEXT: str z0, [x8] // 16-byte Folded Spill +; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 +; CHECK-NEXT: str h0, [sp, #14] // 2-byte Folded Spill +; CHECK-NEXT: smstop sm +; CHECK-NEXT: ldr h0, [sp, #14] // 2-byte Folded Reload +; CHECK-NEXT: bl use_f16 +; CHECK-NEXT: smstart sm +; CHECK-NEXT: add x8, sp, #16 +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: ldr z0, [x8] // 16-byte Folded Reload +; CHECK-NEXT: st1h { z0.h }, p0, [x19] +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload +; CHECK-NEXT: ret + %vec = insertelement poison, half %arg, i32 0 + call void @use_f16(half %arg) + store %vec, ptr %ptr + ret void +} + +define void @dont_coalesce_arg_f32(float %arg, ptr %ptr) #0 { +; CHECK-LABEL: dont_coalesce_arg_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill +; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: add x8, sp, #16 +; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0 +; CHECK-NEXT: mov x19, x0 +; CHECK-NEXT: str z0, [x8] // 16-byte Folded Spill +; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0 +; CHECK-NEXT: str s0, [sp, #12] // 4-byte Folded Spill +; CHECK-NEXT: smstop sm +; CHECK-NEXT: ldr s0, [sp, #12] // 4-byte Folded Reload +; CHECK-NEXT: bl use_f32 +; CHECK-NEXT: smstart sm +; CHECK-NEXT: add x8, sp, #16 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ldr z0, [x8] // 16-byte Folded Reload +; CHECK-NEXT: st1w { z0.s }, p0, [x19] +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload +; CHECK-NEXT: ret + %vec = insertelement poison, float %arg, i32 0 + call void @use_f32(float %arg) + store %vec, ptr %ptr + ret void +} + +define void @dont_coalesce_arg_f64(double %arg, ptr %ptr) #0 { +; CHECK-LABEL: dont_coalesce_arg_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill +; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: add x8, sp, #16 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 +; CHECK-NEXT: mov x19, x0 +; CHECK-NEXT: str z0, [x8] // 16-byte Folded Spill +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 +; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill +; CHECK-NEXT: smstop sm +; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload +; CHECK-NEXT: bl use_f64 +; CHECK-NEXT: smstart sm +; CHECK-NEXT: add x8, sp, #16 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ldr z0, [x8] // 16-byte Folded Reload +; CHECK-NEXT: st1d { z0.d }, p0, [x19] +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload +; CHECK-NEXT: ret + %vec = insertelement poison, double %arg, i32 0 + call void @use_f64(double %arg) + store %vec, ptr %ptr + ret void +} + + +; +; Single-element vector arguments +; + +define void @dont_coalesce_arg_v1i8(<1 x i8> %arg, ptr %ptr) #0 { +; CHECK-LABEL: dont_coalesce_arg_v1i8: +; CHECK: // %bb.0: +; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill +; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: add x8, sp, #16 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 +; CHECK-NEXT: mov x19, x0 +; CHECK-NEXT: str z0, [x8] // 16-byte Folded Spill +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 +; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill +; CHECK-NEXT: smstop sm +; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload +; CHECK-NEXT: bl use_v16i8 +; CHECK-NEXT: smstart sm +; CHECK-NEXT: add x8, sp, #16 +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: ldr z0, [x8] // 16-byte Folded Reload +; CHECK-NEXT: st1b { z0.b }, p0, [x19] +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload +; CHECK-NEXT: ret + %elt = extractelement <1 x i8> %arg, i32 0 + %vec = insertelement poison, i8 %elt, i32 0 + call void @use_v16i8(<1 x i8> %arg) + store %vec, ptr %ptr + ret void +} + +define void @dont_coalesce_arg_v1i16(<1 x i16> %arg, ptr %ptr) #0 { +; CHECK-LABEL: dont_coalesce_arg_v1i16: +; CHECK: // %bb.0: +; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill +; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: add x8, sp, #16 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 +; CHECK-NEXT: mov x19, x0 +; CHECK-NEXT: str z0, [x8] // 16-byte Folded Spill +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 +; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill +; CHECK-NEXT: smstop sm +; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload +; CHECK-NEXT: bl use_v8i16 +; CHECK-NEXT: smstart sm +; CHECK-NEXT: add x8, sp, #16 +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: ldr z0, [x8] // 16-byte Folded Reload +; CHECK-NEXT: st1h { z0.h }, p0, [x19] +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload +; CHECK-NEXT: ret + %elt = extractelement <1 x i16> %arg, i32 0 + %vec = insertelement poison, i16 %elt, i32 0 + call void @use_v8i16(<1 x i16> %arg) + store %vec, ptr %ptr + ret void +} + +define void @dont_coalesce_arg_v1i32(<1 x i32> %arg, ptr %ptr) #0 { +; CHECK-LABEL: dont_coalesce_arg_v1i32: +; CHECK: // %bb.0: +; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill +; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: add x8, sp, #16 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 +; CHECK-NEXT: mov x19, x0 +; CHECK-NEXT: str z0, [x8] // 16-byte Folded Spill +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 +; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill +; CHECK-NEXT: smstop sm +; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload +; CHECK-NEXT: bl use_v4i32 +; CHECK-NEXT: smstart sm +; CHECK-NEXT: add x8, sp, #16 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ldr z0, [x8] // 16-byte Folded Reload +; CHECK-NEXT: st1w { z0.s }, p0, [x19] +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload +; CHECK-NEXT: ret + %elt = extractelement <1 x i32> %arg, i32 0 + %vec = insertelement poison, i32 %elt, i32 0 + call void @use_v4i32(<1 x i32> %arg) + store %vec, ptr %ptr + ret void +} + +define void @dont_coalesce_arg_v1i64(<1 x i64> %arg, ptr %ptr) #0 { +; CHECK-LABEL: dont_coalesce_arg_v1i64: +; CHECK: // %bb.0: +; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill +; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: add x8, sp, #16 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 +; CHECK-NEXT: mov x19, x0 +; CHECK-NEXT: str z0, [x8] // 16-byte Folded Spill +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 +; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill +; CHECK-NEXT: smstop sm +; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload +; CHECK-NEXT: bl use_v2i64 +; CHECK-NEXT: smstart sm +; CHECK-NEXT: add x8, sp, #16 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ldr z0, [x8] // 16-byte Folded Reload +; CHECK-NEXT: st1d { z0.d }, p0, [x19] +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload +; CHECK-NEXT: ret + %elt = extractelement <1 x i64> %arg, i32 0 + %vec = insertelement poison, i64 %elt, i32 0 + call void @use_v2i64(<1 x i64> %arg) + store %vec, ptr %ptr + ret void +} + +define void @dont_coalesce_arg_v1f16(<1 x half> %arg, ptr %ptr) #0 { +; CHECK-LABEL: dont_coalesce_arg_v1f16: +; CHECK: // %bb.0: +; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill +; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: add x8, sp, #16 +; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0 +; CHECK-NEXT: mov x19, x0 +; CHECK-NEXT: str z0, [x8] // 16-byte Folded Spill +; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 +; CHECK-NEXT: str h0, [sp, #14] // 2-byte Folded Spill +; CHECK-NEXT: smstop sm +; CHECK-NEXT: ldr h0, [sp, #14] // 2-byte Folded Reload +; CHECK-NEXT: bl use_v8f16 +; CHECK-NEXT: smstart sm +; CHECK-NEXT: add x8, sp, #16 +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: ldr z0, [x8] // 16-byte Folded Reload +; CHECK-NEXT: st1h { z0.h }, p0, [x19] +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload +; CHECK-NEXT: ret + %elt = extractelement <1 x half> %arg, i32 0 + %vec = insertelement poison, half %elt, i32 0 + call void @use_v8f16(<1 x half> %arg) + store %vec, ptr %ptr + ret void +} + +define void @dont_coalesce_arg_v1f32(<1 x float> %arg, ptr %ptr) #0 { +; CHECK-LABEL: dont_coalesce_arg_v1f32: +; CHECK: // %bb.0: +; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill +; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: add x8, sp, #16 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 +; CHECK-NEXT: mov x19, x0 +; CHECK-NEXT: str z0, [x8] // 16-byte Folded Spill +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 +; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill +; CHECK-NEXT: smstop sm +; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload +; CHECK-NEXT: bl use_v4f32 +; CHECK-NEXT: smstart sm +; CHECK-NEXT: add x8, sp, #16 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ldr z0, [x8] // 16-byte Folded Reload +; CHECK-NEXT: st1w { z0.s }, p0, [x19] +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload +; CHECK-NEXT: ret + %elt = extractelement <1 x float> %arg, i32 0 + %vec = insertelement poison, float %elt, i32 0 + call void @use_v4f32(<1 x float> %arg) + store %vec, ptr %ptr + ret void +} + +define void @dont_coalesce_arg_v1f64(<1 x double> %arg, ptr %ptr) #0 { +; CHECK-LABEL: dont_coalesce_arg_v1f64: +; CHECK: // %bb.0: +; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill +; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: add x8, sp, #16 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 +; CHECK-NEXT: mov x19, x0 +; CHECK-NEXT: str z0, [x8] // 16-byte Folded Spill +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 +; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill +; CHECK-NEXT: smstop sm +; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload +; CHECK-NEXT: bl use_v2f64 +; CHECK-NEXT: smstart sm +; CHECK-NEXT: add x8, sp, #16 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ldr z0, [x8] // 16-byte Folded Reload +; CHECK-NEXT: st1d { z0.d }, p0, [x19] +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload +; CHECK-NEXT: ret + %elt = extractelement <1 x double> %arg, i32 0 + %vec = insertelement poison, double %elt, i32 0 + call void @use_v2f64(<1 x double> %arg) + store %vec, ptr %ptr + ret void +} + +; +; Full vector arguments +; + +define void @dont_coalesce_arg_v16i8(<16 x i8> %arg, ptr %ptr) #0 { +; CHECK-LABEL: dont_coalesce_arg_v16i8: +; CHECK: // %bb.0: +; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill +; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: add x8, sp, #16 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-NEXT: mov x19, x0 +; CHECK-NEXT: str z0, [x8] // 16-byte Folded Spill +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: smstop sm +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: bl use_v16i8 +; CHECK-NEXT: smstart sm +; CHECK-NEXT: add x8, sp, #16 +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: ldr z0, [x8] // 16-byte Folded Reload +; CHECK-NEXT: st1b { z0.b }, p0, [x19] +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload +; CHECK-NEXT: ret + %vec = call @llvm.vector.insert.nxv16i8.v16i8( poison, <16 x i8> %arg, i64 0) + call void @use_v16i8(<16 x i8> %arg) + store %vec, ptr %ptr + ret void +} + +define void @dont_coalesce_arg_v8i16(<8 x i16> %arg, ptr %ptr) #0 { +; CHECK-LABEL: dont_coalesce_arg_v8i16: +; CHECK: // %bb.0: +; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill +; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: add x8, sp, #16 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-NEXT: mov x19, x0 +; CHECK-NEXT: str z0, [x8] // 16-byte Folded Spill +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: smstop sm +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: bl use_v8i16 +; CHECK-NEXT: smstart sm +; CHECK-NEXT: add x8, sp, #16 +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: ldr z0, [x8] // 16-byte Folded Reload +; CHECK-NEXT: st1h { z0.h }, p0, [x19] +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload +; CHECK-NEXT: ret + %vec = call @llvm.vector.insert.nxv8i16.v8i16( poison, <8 x i16> %arg, i64 0) + call void @use_v8i16(<8 x i16> %arg) + store %vec, ptr %ptr + ret void +} + +define void @dont_coalesce_arg_v4i32(<4 x i32> %arg, ptr %ptr) #0 { +; CHECK-LABEL: dont_coalesce_arg_v4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill +; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: add x8, sp, #16 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-NEXT: mov x19, x0 +; CHECK-NEXT: str z0, [x8] // 16-byte Folded Spill +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: smstop sm +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: bl use_v4i32 +; CHECK-NEXT: smstart sm +; CHECK-NEXT: add x8, sp, #16 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ldr z0, [x8] // 16-byte Folded Reload +; CHECK-NEXT: st1w { z0.s }, p0, [x19] +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload +; CHECK-NEXT: ret + %vec = call @llvm.vector.insert.nxv4i32.v4i32( poison, <4 x i32> %arg, i64 0) + call void @use_v4i32(<4 x i32> %arg) + store %vec, ptr %ptr + ret void +} + +define void @dont_coalesce_arg_v2i64(<2 x i64> %arg, ptr %ptr) #0 { +; CHECK-LABEL: dont_coalesce_arg_v2i64: +; CHECK: // %bb.0: +; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill +; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: add x8, sp, #16 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-NEXT: mov x19, x0 +; CHECK-NEXT: str z0, [x8] // 16-byte Folded Spill +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: smstop sm +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: bl use_v2i64 +; CHECK-NEXT: smstart sm +; CHECK-NEXT: add x8, sp, #16 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ldr z0, [x8] // 16-byte Folded Reload +; CHECK-NEXT: st1d { z0.d }, p0, [x19] +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload +; CHECK-NEXT: ret + %vec = call @llvm.vector.insert.nxv2i64.v2i64( poison, <2 x i64> %arg, i64 0) + call void @use_v2i64(<2 x i64> %arg) + store %vec, ptr %ptr + ret void +} + +define void @dont_coalesce_arg_v8f16(<8 x half> %arg, ptr %ptr) #0 { +; CHECK-LABEL: dont_coalesce_arg_v8f16: +; CHECK: // %bb.0: +; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill +; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: add x8, sp, #16 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-NEXT: mov x19, x0 +; CHECK-NEXT: str z0, [x8] // 16-byte Folded Spill +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: smstop sm +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: bl use_v8f16 +; CHECK-NEXT: smstart sm +; CHECK-NEXT: add x8, sp, #16 +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: ldr z0, [x8] // 16-byte Folded Reload +; CHECK-NEXT: st1h { z0.h }, p0, [x19] +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload +; CHECK-NEXT: ret + %vec = call @llvm.vector.insert.nxv8f16.v8f16( poison, <8 x half> %arg, i64 0) + call void @use_v8f16(<8 x half> %arg) + store %vec, ptr %ptr + ret void +} + +define void @dont_coalesce_arg_v8bf16(<8 x bfloat> %arg, ptr %ptr) #0 { +; CHECK-LABEL: dont_coalesce_arg_v8bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill +; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: add x8, sp, #16 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-NEXT: mov x19, x0 +; CHECK-NEXT: str z0, [x8] // 16-byte Folded Spill +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: smstop sm +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: bl use_v8bf16 +; CHECK-NEXT: smstart sm +; CHECK-NEXT: add x8, sp, #16 +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: ldr z0, [x8] // 16-byte Folded Reload +; CHECK-NEXT: st1h { z0.h }, p0, [x19] +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload +; CHECK-NEXT: ret + %vec = call @llvm.vector.insert.nxv8bf16.v8bf16( poison, <8 x bfloat> %arg, i64 0) + call void @use_v8bf16(<8 x bfloat> %arg) + store %vec, ptr %ptr + ret void +} + +define void @dont_coalesce_arg_v4f32(<4 x float> %arg, ptr %ptr) #0 { +; CHECK-LABEL: dont_coalesce_arg_v4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill +; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: add x8, sp, #16 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-NEXT: mov x19, x0 +; CHECK-NEXT: str z0, [x8] // 16-byte Folded Spill +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: smstop sm +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: bl use_v4f32 +; CHECK-NEXT: smstart sm +; CHECK-NEXT: add x8, sp, #16 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ldr z0, [x8] // 16-byte Folded Reload +; CHECK-NEXT: st1d { z0.d }, p0, [x19] +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload +; CHECK-NEXT: ret + %vec = call @llvm.vector.insert.nxv4f32.v4f32( poison, <4 x float> %arg, i64 0) + call void @use_v4f32(<4 x float> %arg) + store %vec, ptr %ptr + ret void +} + +define void @dont_coalesce_arg_v2f64(<2 x double> %arg, ptr %ptr) #0 { +; CHECK-LABEL: dont_coalesce_arg_v2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill +; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: add x8, sp, #16 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-NEXT: mov x19, x0 +; CHECK-NEXT: str z0, [x8] // 16-byte Folded Spill +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: smstop sm +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: bl use_v2f64 +; CHECK-NEXT: smstart sm +; CHECK-NEXT: add x8, sp, #16 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ldr z0, [x8] // 16-byte Folded Reload +; CHECK-NEXT: st1d { z0.d }, p0, [x19] +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload +; CHECK-NEXT: ret + %vec = call @llvm.vector.insert.nxv2f64.v2f64( poison, <2 x double> %arg, i64 0) + call void @use_v2f64(<2 x double> %arg) + store %vec, ptr %ptr + ret void +} + +; +; <8 x i1> type will need type promotion. +; +define void @dont_coalesce_arg_v8i1(<8 x i1> %arg, ptr %ptr) #0 { +; CHECK-LABEL: dont_coalesce_arg_v8i1: +; CHECK: // %bb.0: +; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill +; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 +; CHECK-NEXT: mov z1.d, z0.d +; CHECK-NEXT: add x8, sp, #16 +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: and z1.b, z1.b, #0x1 +; CHECK-NEXT: cmpne p0.b, p0/z, z1.b, #0 +; CHECK-NEXT: mov x19, x0 +; CHECK-NEXT: str p0, [x8, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 +; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill +; CHECK-NEXT: smstop sm +; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload +; CHECK-NEXT: bl use_v8i1 +; CHECK-NEXT: smstart sm +; CHECK-NEXT: add x8, sp, #16 +; CHECK-NEXT: ldr p0, [x8, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: str p0, [x19] +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload +; CHECK-NEXT: ret + %vec = call @llvm.vector.insert.nxv8i1.v8i1( poison, <8 x i1> %arg, i64 0) + call void @use_v8i1(<8 x i1> %arg) + store %vec, ptr %ptr + ret void +} + +; +; Scalar return values +; + +define void @dont_coalesce_res_i8(ptr %ptr) #0 { +; CHECK-LABEL: dont_coalesce_res_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill +; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: mov x19, x0 +; CHECK-NEXT: smstop sm +; CHECK-NEXT: bl get_i8 +; CHECK-NEXT: smstart sm +; CHECK-NEXT: fmov s0, w0 +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: st1b { z0.b }, p0, [x19] +; CHECK-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload +; CHECK-NEXT: ret + %res = call i8 @get_i8() + %vec = insertelement poison, i8 %res, i32 0 + store %vec, ptr %ptr + ret void +} + +define void @dont_coalesce_res_i16(ptr %ptr) #0 { +; CHECK-LABEL: dont_coalesce_res_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill +; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: mov x19, x0 +; CHECK-NEXT: smstop sm +; CHECK-NEXT: bl get_i16 +; CHECK-NEXT: smstart sm +; CHECK-NEXT: fmov s0, w0 +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: st1h { z0.h }, p0, [x19] +; CHECK-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload +; CHECK-NEXT: ret + %res = call i16 @get_i16() + %vec = insertelement poison, i16 %res, i32 0 + store %vec, ptr %ptr + ret void +} + +define void @dont_coalesce_res_i32(ptr %ptr) #0 { +; CHECK-LABEL: dont_coalesce_res_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill +; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: mov x19, x0 +; CHECK-NEXT: smstop sm +; CHECK-NEXT: bl get_i32 +; CHECK-NEXT: smstart sm +; CHECK-NEXT: fmov s0, w0 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: st1w { z0.s }, p0, [x19] +; CHECK-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload +; CHECK-NEXT: ret + %res = call i32 @get_i32() + %vec = insertelement poison, i32 %res, i32 0 + store %vec, ptr %ptr + ret void +} + +define void @dont_coalesce_res_i64(ptr %ptr) #0 { +; CHECK-LABEL: dont_coalesce_res_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill +; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: mov x19, x0 +; CHECK-NEXT: smstop sm +; CHECK-NEXT: bl get_i64 +; CHECK-NEXT: smstart sm +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: st1d { z0.d }, p0, [x19] +; CHECK-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload +; CHECK-NEXT: ret + %res = call i64 @get_i64() + %vec = insertelement poison, i64 %res, i32 0 + store %vec, ptr %ptr + ret void +} + +define void @dont_coalesce_res_f16(ptr %ptr) #0 { +; CHECK-LABEL: dont_coalesce_res_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #96 +; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: mov x19, x0 +; CHECK-NEXT: smstop sm +; CHECK-NEXT: bl get_f16 +; CHECK-NEXT: str h0, [sp, #14] // 2-byte Folded Spill +; CHECK-NEXT: smstart sm +; CHECK-NEXT: ldr h0, [sp, #14] // 2-byte Folded Reload +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0 +; CHECK-NEXT: st1h { z0.h }, p0, [x19] +; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: add sp, sp, #96 +; CHECK-NEXT: ret + %res = call half @get_f16() + %vec = insertelement poison, half %res, i32 0 + store %vec, ptr %ptr + ret void +} + +define void @dont_coalesce_res_f32(ptr %ptr) #0 { +; CHECK-LABEL: dont_coalesce_res_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #96 +; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: mov x19, x0 +; CHECK-NEXT: smstop sm +; CHECK-NEXT: bl get_f32 +; CHECK-NEXT: str s0, [sp, #12] // 4-byte Folded Spill +; CHECK-NEXT: smstart sm +; CHECK-NEXT: ldr s0, [sp, #12] // 4-byte Folded Reload +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0 +; CHECK-NEXT: st1w { z0.s }, p0, [x19] +; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: add sp, sp, #96 +; CHECK-NEXT: ret + %res = call float @get_f32() + %vec = insertelement poison, float %res, i32 0 + store %vec, ptr %ptr + ret void +} + +define void @dont_coalesce_res_f64(ptr %ptr) #0 { +; CHECK-LABEL: dont_coalesce_res_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #96 +; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: mov x19, x0 +; CHECK-NEXT: smstop sm +; CHECK-NEXT: bl get_f64 +; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill +; CHECK-NEXT: smstart sm +; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 +; CHECK-NEXT: st1d { z0.d }, p0, [x19] +; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: add sp, sp, #96 +; CHECK-NEXT: ret + %res = call double @get_f64() + %vec = insertelement poison, double %res, i32 0 + store %vec, ptr %ptr + ret void +} + +; +; Single-element vector result values +; + +define void @dont_coalesce_res_v1i8(ptr %ptr) #0 { +; CHECK-LABEL: dont_coalesce_res_v1i8: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #96 +; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: mov x19, x0 +; CHECK-NEXT: smstop sm +; CHECK-NEXT: bl get_v1i8 +; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill +; CHECK-NEXT: smstart sm +; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 +; CHECK-NEXT: st1b { z0.b }, p0, [x19] +; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: add sp, sp, #96 +; CHECK-NEXT: ret + %res = call <1 x i8> @get_v1i8() + %elt = extractelement <1 x i8> %res, i32 0 + %vec = insertelement poison, i8 %elt, i32 0 + store %vec, ptr %ptr + ret void +} + +define void @dont_coalesce_res_v1i16(ptr %ptr) #0 { +; CHECK-LABEL: dont_coalesce_res_v1i16: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #96 +; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: mov x19, x0 +; CHECK-NEXT: smstop sm +; CHECK-NEXT: bl get_v1i16 +; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill +; CHECK-NEXT: smstart sm +; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 +; CHECK-NEXT: st1h { z0.h }, p0, [x19] +; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: add sp, sp, #96 +; CHECK-NEXT: ret + %res = call <1 x i16> @get_v1i16() + %elt = extractelement <1 x i16> %res, i32 0 + %vec = insertelement poison, i16 %elt, i32 0 + store %vec, ptr %ptr + ret void +} + +define void @dont_coalesce_res_v1i32(ptr %ptr) #0 { +; CHECK-LABEL: dont_coalesce_res_v1i32: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #96 +; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: mov x19, x0 +; CHECK-NEXT: smstop sm +; CHECK-NEXT: bl get_v1i32 +; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill +; CHECK-NEXT: smstart sm +; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 +; CHECK-NEXT: st1w { z0.s }, p0, [x19] +; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: add sp, sp, #96 +; CHECK-NEXT: ret + %res = call <1 x i32> @get_v1i32() + %elt = extractelement <1 x i32> %res, i32 0 + %vec = insertelement poison, i32 %elt, i32 0 + store %vec, ptr %ptr + ret void +} + +define void @dont_coalesce_res_v1i64(ptr %ptr) #0 { +; CHECK-LABEL: dont_coalesce_res_v1i64: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #96 +; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: mov x19, x0 +; CHECK-NEXT: smstop sm +; CHECK-NEXT: bl get_v1i64 +; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill +; CHECK-NEXT: smstart sm +; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 +; CHECK-NEXT: st1d { z0.d }, p0, [x19] +; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: add sp, sp, #96 +; CHECK-NEXT: ret + %res = call <1 x i64> @get_v1i64() + %elt = extractelement <1 x i64> %res, i32 0 + %vec = insertelement poison, i64 %elt, i32 0 + store %vec, ptr %ptr + ret void +} + +define void @dont_coalesce_res_v1f16(ptr %ptr) #0 { +; CHECK-LABEL: dont_coalesce_res_v1f16: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #96 +; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: mov x19, x0 +; CHECK-NEXT: smstop sm +; CHECK-NEXT: bl get_v1f16 +; CHECK-NEXT: str h0, [sp, #14] // 2-byte Folded Spill +; CHECK-NEXT: smstart sm +; CHECK-NEXT: ldr h0, [sp, #14] // 2-byte Folded Reload +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0 +; CHECK-NEXT: st1h { z0.h }, p0, [x19] +; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: add sp, sp, #96 +; CHECK-NEXT: ret + %res = call <1 x half> @get_v1f16() + %elt = extractelement <1 x half> %res, i32 0 + %vec = insertelement poison, half %elt, i32 0 + store %vec, ptr %ptr + ret void +} + +define void @dont_coalesce_res_v1f32(ptr %ptr) #0 { +; CHECK-LABEL: dont_coalesce_res_v1f32: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #96 +; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: mov x19, x0 +; CHECK-NEXT: smstop sm +; CHECK-NEXT: bl get_v1f32 +; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill +; CHECK-NEXT: smstart sm +; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 +; CHECK-NEXT: st1w { z0.s }, p0, [x19] +; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: add sp, sp, #96 +; CHECK-NEXT: ret + %res = call <1 x float> @get_v1f32() + %elt = extractelement <1 x float> %res, i32 0 + %vec = insertelement poison, float %elt, i32 0 + store %vec, ptr %ptr + ret void +} + +define void @dont_coalesce_res_v1f64(ptr %ptr) #0 { +; CHECK-LABEL: dont_coalesce_res_v1f64: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #96 +; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: mov x19, x0 +; CHECK-NEXT: smstop sm +; CHECK-NEXT: bl get_v1f64 +; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill +; CHECK-NEXT: smstart sm +; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 +; CHECK-NEXT: st1d { z0.d }, p0, [x19] +; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: add sp, sp, #96 +; CHECK-NEXT: ret + %res = call <1 x double> @get_v1f64() + %elt = extractelement <1 x double> %res, i32 0 + %vec = insertelement poison, double %elt, i32 0 + store %vec, ptr %ptr + ret void +} + +; +; Full vector result values +; + +define void @dont_coalesce_res_v16i8(ptr %ptr) #0 { +; CHECK-LABEL: dont_coalesce_res_v16i8: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #96 +; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: mov x19, x0 +; CHECK-NEXT: smstop sm +; CHECK-NEXT: bl get_v16i8 +; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: smstart sm +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-NEXT: st1b { z0.b }, p0, [x19] +; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: add sp, sp, #96 +; CHECK-NEXT: ret + %res = call <16 x i8> @get_v16i8() + %vec = call @llvm.vector.insert.nxv16i8.v16i8( poison, <16 x i8> %res, i64 0) + store %vec, ptr %ptr + ret void +} + +define void @dont_coalesce_res_v8i16(ptr %ptr) #0 { +; CHECK-LABEL: dont_coalesce_res_v8i16: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #96 +; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: mov x19, x0 +; CHECK-NEXT: smstop sm +; CHECK-NEXT: bl get_v8i16 +; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: smstart sm +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-NEXT: st1h { z0.h }, p0, [x19] +; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: add sp, sp, #96 +; CHECK-NEXT: ret + %res = call <8 x i16> @get_v8i16() + %vec = call @llvm.vector.insert.nxv8i16.v8i16( poison, <8 x i16> %res, i64 0) + store %vec, ptr %ptr + ret void +} + +define void @dont_coalesce_res_v4i32(ptr %ptr) #0 { +; CHECK-LABEL: dont_coalesce_res_v4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #96 +; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: mov x19, x0 +; CHECK-NEXT: smstop sm +; CHECK-NEXT: bl get_v4i32 +; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: smstart sm +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-NEXT: st1w { z0.s }, p0, [x19] +; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: add sp, sp, #96 +; CHECK-NEXT: ret + %res = call <4 x i32> @get_v4i32() + %vec = call @llvm.vector.insert.nxv4i32.v4i32( poison, <4 x i32> %res, i64 0) + store %vec, ptr %ptr + ret void +} + +define void @dont_coalesce_res_v2i64(ptr %ptr) #0 { +; CHECK-LABEL: dont_coalesce_res_v2i64: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #96 +; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: mov x19, x0 +; CHECK-NEXT: smstop sm +; CHECK-NEXT: bl get_v2i64 +; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: smstart sm +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-NEXT: st1d { z0.d }, p0, [x19] +; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: add sp, sp, #96 +; CHECK-NEXT: ret + %res = call <2 x i64> @get_v2i64() + %vec = call @llvm.vector.insert.nxv2i64.v2i64( poison, <2 x i64> %res, i64 0) + store %vec, ptr %ptr + ret void +} + +define void @dont_coalesce_res_v8f16(ptr %ptr) #0 { +; CHECK-LABEL: dont_coalesce_res_v8f16: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #96 +; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: mov x19, x0 +; CHECK-NEXT: smstop sm +; CHECK-NEXT: bl get_v8f16 +; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: smstart sm +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-NEXT: st1h { z0.h }, p0, [x19] +; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: add sp, sp, #96 +; CHECK-NEXT: ret + %res = call <8 x half> @get_v8f16() + %vec = call @llvm.vector.insert.nxv8f16.v8f16( poison, <8 x half> %res, i64 0) + store %vec, ptr %ptr + ret void +} + +define void @dont_coalesce_res_v4f32(ptr %ptr) #0 { +; CHECK-LABEL: dont_coalesce_res_v4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #96 +; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: mov x19, x0 +; CHECK-NEXT: smstop sm +; CHECK-NEXT: bl get_v4f32 +; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: smstart sm +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-NEXT: st1w { z0.s }, p0, [x19] +; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: add sp, sp, #96 +; CHECK-NEXT: ret + %res = call <4 x float> @get_v4f32() + %vec = call @llvm.vector.insert.nxv4f32.v4f32( poison, <4 x float> %res, i64 0) + store %vec, ptr %ptr + ret void +} + +define void @dont_coalesce_res_v2f64(ptr %ptr) #0 { +; CHECK-LABEL: dont_coalesce_res_v2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #96 +; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: mov x19, x0 +; CHECK-NEXT: smstop sm +; CHECK-NEXT: bl get_v2f64 +; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: smstart sm +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-NEXT: st1d { z0.d }, p0, [x19] +; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: add sp, sp, #96 +; CHECK-NEXT: ret + %res = call <2 x double> @get_v2f64() + %vec = call @llvm.vector.insert.nxv2f64.v2f64( poison, <2 x double> %res, i64 0) + store %vec, ptr %ptr + ret void +} + +declare half @get_f16() +declare float @get_f32() +declare double @get_f64() +declare <1 x half> @get_v1f16() +declare <1 x float> @get_v1f32() +declare <1 x double> @get_v1f64() +declare <8 x half> @get_v8f16() +declare <4 x float> @get_v4f32() +declare <2 x double> @get_v2f64() + +declare i8 @get_i8() +declare i16 @get_i16() +declare i32 @get_i32() +declare i64 @get_i64() +declare <1 x i8> @get_v1i8() +declare <1 x i16> @get_v1i16() +declare <1 x i32> @get_v1i32() +declare <2 x i64> @get_v1i64() +declare <16 x i8> @get_v16i8() +declare <8 x i16> @get_v8i16() +declare <4 x i32> @get_v4i32() +declare <2 x i64> @get_v2i64() + +declare void @use_f16(half) +declare void @use_f32(float) +declare void @use_f64(double) +declare void @use_v1f16(<1 x half>) +declare void @use_v1f32(<1 x float>) +declare void @use_v1f64(<1 x double>) +declare void @use_v8f16(<8 x half>) +declare void @use_v8bf16(<8 x bfloat>) +declare void @use_v4f32(<4 x float>) +declare void @use_v2f64(<2 x double>) + +declare void @use_i8(i8) +declare void @use_i16(i16) +declare void @use_i32(i32) +declare void @use_i64(i64) +declare void @use_v1i8(<1 x i8>) +declare void @use_v1i16(<1 x i16>) +declare void @use_v1i32(<1 x i32>) +declare void @use_v1i64(<1 x i64>) +declare void @use_v16i8(<16 x i8>) +declare void @use_v8i16(<8 x i16>) +declare void @use_v4i32(<4 x i32>) +declare void @use_v2i64(<2 x i64>) +declare void @use_v8i1(<8 x i1>) + +declare @llvm.vector.insert.nxv8i1.v8i1(, <8 x i1>, i64) +declare @llvm.vector.insert.nxv16i8.v16i8(, <16 x i8>, i64) +declare @llvm.vector.insert.nxv8i16.v8i16(, <8 x i16>, i64) +declare @llvm.vector.insert.nxv4i32.v4i32(, <4 x i32>, i64) +declare @llvm.vector.insert.nxv2i64.v2i64(, <2 x i64>, i64) +declare @llvm.vector.insert.nxv8f16.v8f16(, <8 x half>, i64) +declare @llvm.vector.insert.nxv4f32.v4f32(, <4 x float>, i64) +declare @llvm.vector.insert.nxv8bf16.v8bf16(, <8 x bfloat>, i64) +declare @llvm.vector.insert.nxv2f64.v2f64(, <2 x double>, i64) + +attributes #0 = { nounwind "aarch64_pstate_sm_enabled" "target-features"="+sve,+sme" } diff --git a/llvm/test/CodeGen/AArch64/sme-shared-za-interface.ll b/llvm/test/CodeGen/AArch64/sme-shared-za-interface.ll index 47d465cc320b5c491f04d8855006f424ca0e019c..49d7ae006bb1cf5d6da18969c9bee7595529187b 100644 --- a/llvm/test/CodeGen/AArch64/sme-shared-za-interface.ll +++ b/llvm/test/CodeGen/AArch64/sme-shared-za-interface.ll @@ -4,20 +4,21 @@ declare void @private_za_callee() ; Ensure that we don't use tail call optimization when a lazy-save is required. -define void @disable_tailcallopt() "aarch64_pstate_za_shared" nounwind { +define void @disable_tailcallopt() "aarch64_inout_za" nounwind { ; CHECK-LABEL: disable_tailcallopt: ; CHECK: // %bb.0: ; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill ; CHECK-NEXT: mov x29, sp ; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: rdsvl x8, #1 -; CHECK-NEXT: mov x9, sp -; CHECK-NEXT: mul x8, x8, x8 -; CHECK-NEXT: sub x9, x9, x8 -; CHECK-NEXT: mov sp, x9 +; CHECK-NEXT: mov x8, sp +; CHECK-NEXT: rdsvl x9, #1 +; CHECK-NEXT: msub x8, x9, x9, x8 +; CHECK-NEXT: mov sp, x8 ; CHECK-NEXT: sub x10, x29, #16 -; CHECK-NEXT: stur x9, [x29, #-16] -; CHECK-NEXT: sturh w8, [x29, #-8] +; CHECK-NEXT: stur wzr, [x29, #-4] +; CHECK-NEXT: sturh wzr, [x29, #-6] +; CHECK-NEXT: stur x8, [x29, #-16] +; CHECK-NEXT: sturh w9, [x29, #-8] ; CHECK-NEXT: msr TPIDR2_EL0, x10 ; CHECK-NEXT: bl private_za_callee ; CHECK-NEXT: smstart za @@ -36,20 +37,21 @@ define void @disable_tailcallopt() "aarch64_pstate_za_shared" nounwind { } ; Ensure we set up and restore the lazy save correctly for instructions which are lowered to lib calls -define fp128 @f128_call_za(fp128 %a, fp128 %b) "aarch64_pstate_za_shared" nounwind { +define fp128 @f128_call_za(fp128 %a, fp128 %b) "aarch64_inout_za" nounwind { ; CHECK-LABEL: f128_call_za: ; CHECK: // %bb.0: ; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill ; CHECK-NEXT: mov x29, sp ; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: rdsvl x8, #1 -; CHECK-NEXT: mov x9, sp -; CHECK-NEXT: mul x8, x8, x8 -; CHECK-NEXT: sub x9, x9, x8 -; CHECK-NEXT: mov sp, x9 +; CHECK-NEXT: mov x8, sp +; CHECK-NEXT: rdsvl x9, #1 +; CHECK-NEXT: msub x8, x9, x9, x8 +; CHECK-NEXT: mov sp, x8 ; CHECK-NEXT: sub x10, x29, #16 -; CHECK-NEXT: stur x9, [x29, #-16] -; CHECK-NEXT: sturh w8, [x29, #-8] +; CHECK-NEXT: stur wzr, [x29, #-4] +; CHECK-NEXT: sturh wzr, [x29, #-6] +; CHECK-NEXT: stur x8, [x29, #-16] +; CHECK-NEXT: sturh w9, [x29, #-8] ; CHECK-NEXT: msr TPIDR2_EL0, x10 ; CHECK-NEXT: bl __addtf3 ; CHECK-NEXT: smstart za diff --git a/llvm/test/CodeGen/AArch64/sme-streaming-body-streaming-compatible-interface.ll b/llvm/test/CodeGen/AArch64/sme-streaming-body-streaming-compatible-interface.ll new file mode 100644 index 0000000000000000000000000000000000000000..a50e44892c614416f083ee0c6ed17818d2ca9ac1 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sme-streaming-body-streaming-compatible-interface.ll @@ -0,0 +1,128 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -start-after=simplifycfg -enable-tail-merge=false -verify-machineinstrs < %s | FileCheck %s + +declare void @normal_callee(); +declare void @streaming_callee() "aarch64_pstate_sm_enabled"; +declare void @streaming_compatible_callee() "aarch64_pstate_sm_compatible"; + +define float @sm_body_sm_compatible_simple() "aarch64_pstate_sm_compatible" "aarch64_pstate_sm_body" nounwind { +; CHECK-LABEL: sm_body_sm_compatible_simple: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #96 +; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #80] // 8-byte Folded Spill +; CHECK-NEXT: bl __arm_sme_state +; CHECK-NEXT: and x8, x0, #0x1 +; CHECK-NEXT: tbnz w8, #0, .LBB0_2 +; CHECK-NEXT: // %bb.1: +; CHECK-NEXT: smstart sm +; CHECK-NEXT: .LBB0_2: +; CHECK-NEXT: fmov s0, wzr +; CHECK-NEXT: str s0, [sp, #12] // 4-byte Folded Spill +; CHECK-NEXT: tbnz w8, #0, .LBB0_4 +; CHECK-NEXT: // %bb.3: +; CHECK-NEXT: smstop sm +; CHECK-NEXT: .LBB0_4: +; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr s0, [sp, #12] // 4-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload +; CHECK-NEXT: add sp, sp, #96 +; CHECK-NEXT: ret + ret float zeroinitializer +} + +define void @sm_body_caller_sm_compatible_caller_normal_callee() "aarch64_pstate_sm_compatible" "aarch64_pstate_sm_body" nounwind { +; CHECK-LABEL: sm_body_caller_sm_compatible_caller_normal_callee: +; CHECK: // %bb.0: +; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill +; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: bl __arm_sme_state +; CHECK-NEXT: and x19, x0, #0x1 +; CHECK-NEXT: tbnz w19, #0, .LBB1_2 +; CHECK-NEXT: // %bb.1: +; CHECK-NEXT: smstart sm +; CHECK-NEXT: .LBB1_2: +; CHECK-NEXT: smstop sm +; CHECK-NEXT: bl normal_callee +; CHECK-NEXT: smstart sm +; CHECK-NEXT: tbnz w19, #0, .LBB1_4 +; CHECK-NEXT: // %bb.3: +; CHECK-NEXT: smstop sm +; CHECK-NEXT: .LBB1_4: +; CHECK-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload +; CHECK-NEXT: ret + call void @normal_callee() + ret void +} + +; Function Attrs: nounwind uwtable vscale_range(1,16) +define void @streaming_body_and_streaming_compatible_interface_multi_basic_block(i32 noundef %x) "aarch64_pstate_sm_compatible" "aarch64_pstate_sm_body" nounwind { +; CHECK-LABEL: streaming_body_and_streaming_compatible_interface_multi_basic_block: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill +; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: bl __arm_sme_state +; CHECK-NEXT: and x19, x0, #0x1 +; CHECK-NEXT: tbnz w19, #0, .LBB2_2 +; CHECK-NEXT: // %bb.1: // %entry +; CHECK-NEXT: smstart sm +; CHECK-NEXT: .LBB2_2: // %entry +; CHECK-NEXT: cbz w8, .LBB2_6 +; CHECK-NEXT: // %bb.3: // %if.else +; CHECK-NEXT: bl streaming_compatible_callee +; CHECK-NEXT: tbnz w19, #0, .LBB2_5 +; CHECK-NEXT: // %bb.4: // %if.else +; CHECK-NEXT: smstop sm +; CHECK-NEXT: .LBB2_5: // %if.else +; CHECK-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB2_6: // %if.then +; CHECK-NEXT: smstop sm +; CHECK-NEXT: bl normal_callee +; CHECK-NEXT: smstart sm +; CHECK-NEXT: tbnz w19, #0, .LBB2_8 +; CHECK-NEXT: // %bb.7: // %if.then +; CHECK-NEXT: smstop sm +; CHECK-NEXT: .LBB2_8: // %if.then +; CHECK-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload +; CHECK-NEXT: ret +entry: + %cmp = icmp eq i32 %x, 0 + br i1 %cmp, label %if.then, label %if.else + +if.then: ; preds = %entry + tail call void @normal_callee() + br label %return + +if.else: ; preds = %entry + tail call void @streaming_compatible_callee() + br label %return + +return: ; preds = %if.else, %if.then + ret void +} diff --git a/llvm/test/CodeGen/AArch64/sme-streaming-body.ll b/llvm/test/CodeGen/AArch64/sme-streaming-body.ll index 7561e75d04efa0f9427682d14fd618b7b77cc6a9..384e7dc94f785f2d77281cccbc0d9d3edc2977ae 100644 --- a/llvm/test/CodeGen/AArch64/sme-streaming-body.ll +++ b/llvm/test/CodeGen/AArch64/sme-streaming-body.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -start-after=simplifycfg -enable-tail-merge=false -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -start-after=simplifycfg -enable-tail-merge=false -verify-machineinstrs < %s | FileCheck %s declare void @normal_callee(); declare void @streaming_callee() "aarch64_pstate_sm_enabled"; @@ -237,25 +237,31 @@ declare void @use_ptr(ptr) "aarch64_pstate_sm_compatible" define double @call_to_intrinsic_without_chain(double %x) nounwind "aarch64_pstate_sm_body" { ; CHECK-LABEL: call_to_intrinsic_without_chain: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill -; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill -; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill -; CHECK-NEXT: str d0, [sp, #72] // 8-byte Folded Spill +; CHECK-NEXT: sub sp, sp, #96 +; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #80] // 8-byte Folded Spill +; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill ; CHECK-NEXT: smstart sm +; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload +; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill ; CHECK-NEXT: smstop sm -; CHECK-NEXT: ldr d0, [sp, #72] // 8-byte Folded Reload +; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload ; CHECK-NEXT: bl cos -; CHECK-NEXT: str d0, [sp, #72] // 8-byte Folded Spill +; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill ; CHECK-NEXT: smstart sm +; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload +; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill ; CHECK-NEXT: smstop sm -; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: ldr d0, [sp, #72] // 8-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload -; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload +; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload +; CHECK-NEXT: add sp, sp, #96 ; CHECK-NEXT: ret entry: %0 = call fast double @llvm.cos.f64(double %x) @@ -306,3 +312,24 @@ for.cond.cleanup: ret float %add } + +define void @disable_tailcallopt() "aarch64_pstate_sm_body" nounwind { +; CHECK-LABEL: disable_tailcallopt: +; CHECK: // %bb.0: +; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill +; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: smstart sm +; CHECK-NEXT: bl streaming_compatible_callee +; CHECK-NEXT: smstop sm +; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload +; CHECK-NEXT: ret + tail call void @streaming_compatible_callee(); + ret void; +} diff --git a/llvm/test/CodeGen/AArch64/sme-streaming-compatible-interface.ll b/llvm/test/CodeGen/AArch64/sme-streaming-compatible-interface.ll index 214a5ce38f276bbb22e24ce46723a619b239c7f9..d1c542bb1c5ce696199aa033e630731da99db35b 100644 --- a/llvm/test/CodeGen/AArch64/sme-streaming-compatible-interface.ll +++ b/llvm/test/CodeGen/AArch64/sme-streaming-compatible-interface.ll @@ -43,12 +43,12 @@ define void @streaming_compatible_caller_normal_callee() "aarch64_pstate_sm_comp ; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill ; CHECK-NEXT: bl __arm_sme_state ; CHECK-NEXT: and x19, x0, #0x1 -; CHECK-NEXT: tbz x19, #0, .LBB1_2 +; CHECK-NEXT: tbz w19, #0, .LBB1_2 ; CHECK-NEXT: // %bb.1: ; CHECK-NEXT: smstop sm ; CHECK-NEXT: .LBB1_2: ; CHECK-NEXT: bl normal_callee -; CHECK-NEXT: tbz x19, #0, .LBB1_4 +; CHECK-NEXT: tbz w19, #0, .LBB1_4 ; CHECK-NEXT: // %bb.3: ; CHECK-NEXT: smstart sm ; CHECK-NEXT: .LBB1_4: @@ -79,12 +79,12 @@ define void @streaming_compatible_caller_streaming_callee() "aarch64_pstate_sm_c ; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill ; CHECK-NEXT: bl __arm_sme_state ; CHECK-NEXT: and x19, x0, #0x1 -; CHECK-NEXT: tbnz x19, #0, .LBB2_2 +; CHECK-NEXT: tbnz w19, #0, .LBB2_2 ; CHECK-NEXT: // %bb.1: ; CHECK-NEXT: smstart sm ; CHECK-NEXT: .LBB2_2: ; CHECK-NEXT: bl streaming_callee -; CHECK-NEXT: tbnz x19, #0, .LBB2_4 +; CHECK-NEXT: tbnz w19, #0, .LBB2_4 ; CHECK-NEXT: // %bb.3: ; CHECK-NEXT: smstop sm ; CHECK-NEXT: .LBB2_4: @@ -129,30 +129,37 @@ define <2 x double> @streaming_compatible_with_neon_vectors(<2 x double> %arg) " ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill ; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill -; CHECK-NEXT: addvl sp, sp, #-2 +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: add x8, sp, #16 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 -; CHECK-NEXT: str z0, [sp, #1, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z0, [x8] // 16-byte Folded Spill ; CHECK-NEXT: bl __arm_sme_state +; CHECK-NEXT: add x8, sp, #16 ; CHECK-NEXT: and x19, x0, #0x1 -; CHECK-NEXT: tbz x19, #0, .LBB4_2 +; CHECK-NEXT: ldr z0, [x8] // 16-byte Folded Reload +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: tbz w19, #0, .LBB4_2 ; CHECK-NEXT: // %bb.1: ; CHECK-NEXT: smstop sm ; CHECK-NEXT: .LBB4_2: -; CHECK-NEXT: ldr z0, [sp, #1, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: bl normal_callee_vec_arg -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 -; CHECK-NEXT: str z0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: tbz x19, #0, .LBB4_4 +; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: tbz w19, #0, .LBB4_4 ; CHECK-NEXT: // %bb.3: ; CHECK-NEXT: smstart sm ; CHECK-NEXT: .LBB4_4: -; CHECK-NEXT: ldr z0, [sp, #1, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr z1, [sp] // 16-byte Folded Reload +; CHECK-NEXT: add x8, sp, #16 +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: ldr z1, [x8] // 16-byte Folded Reload ; CHECK-NEXT: fadd z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 -; CHECK-NEXT: addvl sp, sp, #2 +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload @@ -204,14 +211,14 @@ define @streaming_compatible_with_scalable_vectors( @streaming_compatible_with_scalable_vectors( @streaming_compatible_with_scalable_vectors( @streaming_compatible_with_predicate_vectors( @streaming_compatible_with_predicate_vectors( @streaming_compatible_with_predicate_vectors(&1 | FileCheck %s + +declare void @normal_callee() +declare void @streaming_callee() "aarch64_pstate_sm_enabled" +declare void @streaming_compatible_callee() "aarch64_pstate_sm_compatible" + +; CHECK: remark: :0:0: call from 'normal_caller_streaming_callee' to 'streaming_callee' requires a streaming mode transition +define void @normal_caller_streaming_callee() nounwind { + call void @streaming_callee() + ret void; +} + +; CHECK: remark: :0:0: call from 'streaming_caller_normal_callee' to 'normal_callee' requires a streaming mode transition +define void @streaming_caller_normal_callee() nounwind "aarch64_pstate_sm_enabled" { + call void @normal_callee() + ret void; +} + +; CHECK-NOT: streaming_caller_streaming_callee +define void @streaming_caller_streaming_callee() nounwind "aarch64_pstate_sm_enabled" { + call void @streaming_callee() + ret void; +} + +; CHECK-NOT: streaming_caller_streaming_compatible_callee +define void @streaming_caller_streaming_compatible_callee() nounwind "aarch64_pstate_sm_enabled" { + call void @streaming_compatible_callee() + ret void; +} + +; CHECK: remark: :0:0: call from 'call_to_function_pointer_streaming_enabled' to 'unknown callee' requires a streaming mode transition +define void @call_to_function_pointer_streaming_enabled(ptr %p) nounwind { + call void %p() "aarch64_pstate_sm_enabled" + ret void +} + +; CHECK: remark: :0:0: call from 'smstart_clobber_simdfp' to 'streaming_callee' requires a streaming mode transition +define <4 x i32> @smstart_clobber_simdfp(<4 x i32> %x) nounwind { + call void @streaming_callee() + ret <4 x i32> %x; +} + +; CHECK: remark: :0:0: call from 'smstart_clobber_sve' to 'streaming_callee' requires a streaming mode transition +define @smstart_clobber_sve( %x) nounwind { + call void @streaming_callee() + ret %x; +} + +; CHECK: remark: :0:0: call from 'smstart_clobber_sve_duplicate' to 'streaming_callee' requires a streaming mode transition +; CHECK: remark: :0:0: call from 'smstart_clobber_sve_duplicate' to 'streaming_callee' requires a streaming mode transition +define @smstart_clobber_sve_duplicate( %x) nounwind { + call void @streaming_callee() + call void @streaming_callee() + ret %x; +} + +; CHECK: remark: :0:0: call from 'call_to_intrinsic_without_chain' to 'cos' requires a streaming mode transition +define double @call_to_intrinsic_without_chain(double %x) nounwind "aarch64_pstate_sm_enabled" { +entry: + %res = call fast double @llvm.cos.f64(double %x) + %res.fadd = fadd fast double %res, %x + ret double %res.fadd +} + +declare double @llvm.cos.f64(double) + +; CHECK: remark: :0:0: call from 'disable_tailcallopt' to 'streaming_callee' requires a streaming mode transition +define void @disable_tailcallopt() nounwind { + tail call void @streaming_callee() + ret void; +} + +; CHECK: remark: :0:0: call from 'call_to_non_streaming_pass_sve_objects' to 'foo' requires a streaming mode transition +define i8 @call_to_non_streaming_pass_sve_objects(ptr nocapture noundef readnone %ptr) #0 { +entry: + %Data1 = alloca , align 16 + %Data2 = alloca , align 16 + %Data3 = alloca , align 16 + %0 = tail call i64 @llvm.aarch64.sme.cntsb() + call void @foo(ptr noundef nonnull %Data1, ptr noundef nonnull %Data2, ptr noundef nonnull %Data3, i64 noundef %0) + %1 = load , ptr %Data1, align 16 + %vecext = extractelement %1, i64 0 + ret i8 %vecext +} + +declare i64 @llvm.aarch64.sme.cntsb() + +declare void @foo(ptr noundef, ptr noundef, ptr noundef, i64 noundef) + +attributes #0 = { nounwind vscale_range(1,16) "aarch64_pstate_sm_enabled" } diff --git a/llvm/test/CodeGen/AArch64/sme-streaming-interface.ll b/llvm/test/CodeGen/AArch64/sme-streaming-interface.ll index 2f4c907c8724777e70f4e541b9edb735ea560532..9270a6392c41074e54dfe763622c0cb0808e0542 100644 --- a/llvm/test/CodeGen/AArch64/sme-streaming-interface.ll +++ b/llvm/test/CodeGen/AArch64/sme-streaming-interface.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -verify-machineinstrs < %s | FileCheck %s ; This file tests the following combinations related to streaming-enabled functions: ; [ ] N -> S (Normal -> Streaming) @@ -187,18 +187,6 @@ define @smstart_clobber_sve( %x) #0 { ; CHECK-NEXT: smstop sm ; CHECK-NEXT: ldr z0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #1 -; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload @@ -215,6 +203,18 @@ define @smstart_clobber_sve( %x) #0 { ; CHECK-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #18 ; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload ; CHECK-NEXT: ret @@ -267,18 +267,6 @@ define @smstart_clobber_sve_duplicate( %x) ; CHECK-NEXT: smstop sm ; CHECK-NEXT: ldr z0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #1 -; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload @@ -295,6 +283,18 @@ define @smstart_clobber_sve_duplicate( %x) ; CHECK-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #18 ; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload ; CHECK-NEXT: ret @@ -313,20 +313,19 @@ define double @call_to_intrinsic_without_chain(double %x) nounwind "aarch64_psta ; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill ; CHECK-NEXT: str x30, [sp, #80] // 8-byte Folded Spill -; CHECK-NEXT: str d0, [sp, #88] // 8-byte Folded Spill +; CHECK-NEXT: stp d0, d0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: smstop sm -; CHECK-NEXT: ldr d0, [sp, #88] // 8-byte Folded Reload +; CHECK-NEXT: ldr d0, [sp] // 8-byte Folded Reload ; CHECK-NEXT: bl cos -; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill +; CHECK-NEXT: str d0, [sp] // 8-byte Folded Spill ; CHECK-NEXT: smstart sm +; CHECK-NEXT: ldp d1, d0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: fadd d0, d1, d0 ; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: ldr d0, [sp, #88] // 8-byte Folded Reload -; CHECK-NEXT: ldr d1, [sp, #8] // 8-byte Folded Reload ; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload -; CHECK-NEXT: fadd d0, d1, d0 ; CHECK-NEXT: add sp, sp, #96 ; CHECK-NEXT: ret entry: @@ -369,15 +368,11 @@ define i8 @call_to_non_streaming_pass_sve_objects(ptr nocapture noundef readnone ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-3 -; CHECK-NEXT: rdsvl x8, #1 -; CHECK-NEXT: addvl x9, sp, #2 -; CHECK-NEXT: addvl x10, sp, #1 -; CHECK-NEXT: mov x11, sp +; CHECK-NEXT: rdsvl x3, #1 +; CHECK-NEXT: addvl x0, sp, #2 +; CHECK-NEXT: addvl x1, sp, #1 +; CHECK-NEXT: mov x2, sp ; CHECK-NEXT: smstop sm -; CHECK-NEXT: mov x0, x9 -; CHECK-NEXT: mov x1, x10 -; CHECK-NEXT: mov x2, x11 -; CHECK-NEXT: mov x3, x8 ; CHECK-NEXT: bl foo ; CHECK-NEXT: smstart sm ; CHECK-NEXT: ptrue p0.b @@ -401,9 +396,38 @@ entry: ret i8 %vecext } +define void @call_to_non_streaming_pass_args(ptr nocapture noundef readnone %ptr, i64 %long1, i64 %long2, i32 %int1, i32 %int2, float %float1, float %float2, double %double1, double %double2) #1 { +; CHECK-LABEL: call_to_non_streaming_pass_args: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sub sp, sp, #112 +; CHECK-NEXT: stp d15, d14, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp d13, d12, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #96] // 8-byte Folded Spill +; CHECK-NEXT: stp s1, s0, [sp, #24] // 8-byte Folded Spill +; CHECK-NEXT: stp d3, d2, [sp, #8] // 16-byte Folded Spill +; CHECK-NEXT: smstop sm +; CHECK-NEXT: ldp d3, d2, [sp, #8] // 16-byte Folded Reload +; CHECK-NEXT: ldp s1, s0, [sp, #24] // 8-byte Folded Reload +; CHECK-NEXT: bl bar +; CHECK-NEXT: smstart sm +; CHECK-NEXT: ldp d9, d8, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: ldp d13, d12, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #96] // 8-byte Folded Reload +; CHECK-NEXT: add sp, sp, #112 +; CHECK-NEXT: ret +entry: + call void @bar(ptr noundef nonnull %ptr, i64 %long1, i64 %long2, i32 %int1, i32 %int2, float %float1, float %float2, double %double1, double %double2) + ret void +} + declare i64 @llvm.aarch64.sme.cntsb() declare void @foo(ptr noundef, ptr noundef, ptr noundef, i64 noundef) +declare void @bar(ptr noundef, i64 noundef, i64 noundef, i32 noundef, i32 noundef, float noundef, float noundef, double noundef, double noundef) attributes #0 = { nounwind "target-features"="+sve" } attributes #1 = { nounwind vscale_range(1,16) "aarch64_pstate_sm_enabled" } diff --git a/llvm/test/CodeGen/AArch64/sme-streaming-mode-changing-call-disable-stackslot-scavenging.ll b/llvm/test/CodeGen/AArch64/sme-streaming-mode-changing-call-disable-stackslot-scavenging.ll new file mode 100644 index 0000000000000000000000000000000000000000..0e6de90289e90a7aae49182c916d094fe628079f --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sme-streaming-mode-changing-call-disable-stackslot-scavenging.ll @@ -0,0 +1,49 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc < %s | FileCheck %s + +target triple = "aarch64" + +; This function would normally scavenge a stackslot from the callee-save +; area, which would lead to spilling 's0' to that stackslot before the +; smstop and filling it with 'addvl + ' after the smstop because +; the frame-pointer is not available. +; This would not be valid, since the vector-length has changed so 'addvl' +; cannot be used. This is testing that the stackslot-scavenging is disabled +; when there are any streaming-mode-changing call-sequences in the +; function. +define void @test_no_stackslot_scavenging(float %f) #0 { +; CHECK-LABEL: test_no_stackslot_scavenging: +; CHECK: // %bb.0: +; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill +; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: stp x30, x24, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: //APP +; CHECK-NEXT: //NO_APP +; CHECK-NEXT: str s0, [sp, #12] // 4-byte Folded Spill +; CHECK-NEXT: smstop sm +; CHECK-NEXT: ldr s0, [sp, #12] // 4-byte Folded Reload +; CHECK-NEXT: bl use_f +; CHECK-NEXT: smstart sm +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: ldp x30, x24, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload +; CHECK-NEXT: ret + %ptr = alloca + call void asm sideeffect "", "~{x24}"() nounwind + call void @use_f(float %f) + ret void +} + +declare void @use_f(float) + +attributes #0 = { nounwind "target-features"="+sme" "aarch64_pstate_sm_enabled" } diff --git a/llvm/test/CodeGen/AArch64/sme-write-vg.ll b/llvm/test/CodeGen/AArch64/sme-write-vg.ll new file mode 100644 index 0000000000000000000000000000000000000000..577606d45484f13b0a72c11f72f03c956184be4d --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sme-write-vg.ll @@ -0,0 +1,24 @@ +; RUN: llc -mattr=+sme -stop-after=finalize-isel < %s | FileCheck %s + +target triple = "aarch64" + +; Check that we don't define VG for 'smstart za' and 'smstop za' +define void @smstart_za() "aarch64_new_za" nounwind { + ; CHECK-LABEL: name: smstart_za + ; CHECK-NOT: implicit-def {{[^,]*}}$vg + ret void +} + +; Check that we do define VG for 'smstart sm' and 'smstop sm' +define void @smstart_sm() nounwind { + ; CHECK-LABEL: name: smstart_sm + ; CHECK: MSRpstatesvcrImm1 1, 1, + ; CHECK-SAME: implicit-def {{[^,]*}}$vg + ; CHECK: MSRpstatesvcrImm1 1, 0, + ; CHECK-SAME: implicit-def {{[^,]*}}$vg + call void @require_sm() + ret void +} + +declare void @require_sm() "aarch64_pstate_sm_enabled" +declare void @require_za() "aarch64_inout_za" diff --git a/llvm/test/CodeGen/AArch64/spillfill-sve.mir b/llvm/test/CodeGen/AArch64/spillfill-sve.mir index 951dbc72defc82a0c9f7bb03528281250f612018..08d55f07b7c4bf89dcbde14d0d635e0f071880b6 100644 --- a/llvm/test/CodeGen/AArch64/spillfill-sve.mir +++ b/llvm/test/CodeGen/AArch64/spillfill-sve.mir @@ -7,12 +7,14 @@ target triple = "aarch64--linux-gnu" define aarch64_sve_vector_pcs void @spills_fills_stack_id_ppr() #0 { entry: unreachable } + define aarch64_sve_vector_pcs void @spills_fills_stack_id_pnr() #1 { entry: unreachable } define aarch64_sve_vector_pcs void @spills_fills_stack_id_zpr() #0 { entry: unreachable } define aarch64_sve_vector_pcs void @spills_fills_stack_id_zpr2() #0 { entry: unreachable } define aarch64_sve_vector_pcs void @spills_fills_stack_id_zpr3() #0 { entry: unreachable } define aarch64_sve_vector_pcs void @spills_fills_stack_id_zpr4() #0 { entry: unreachable } attributes #0 = { nounwind "target-features"="+sve" } + attributes #1 = { nounwind "target-features"="+sve2p1" } ... --- @@ -59,6 +61,49 @@ body: | RET_ReallyLR ... --- +name: spills_fills_stack_id_pnr +tracksRegLiveness: true +registers: + - { id: 0, class: pnr } +stack: +liveins: + - { reg: '$pn0', virtual-reg: '%0' } +body: | + bb.0.entry: + liveins: $pn0 + + ; CHECK-LABEL: name: spills_fills_stack_id_pnr + ; CHECK: stack: + ; CHECK: - { id: 0, name: '', type: spill-slot, offset: 0, size: 2, alignment: 2 + ; CHECK-NEXT: stack-id: scalable-vector, callee-saved-register: '' + + ; EXPAND-LABEL: name: spills_fills_stack_id_pnr + ; EXPAND: STR_PXI $p0, $sp, 7 + ; EXPAND: $p0 = LDR_PXI $sp, 7, implicit-def $pn0 + + %0:pnr = COPY $pn0 + + $pn0 = IMPLICIT_DEF + $pn1 = IMPLICIT_DEF + $pn2 = IMPLICIT_DEF + $pn3 = IMPLICIT_DEF + $pn4 = IMPLICIT_DEF + $pn5 = IMPLICIT_DEF + $pn6 = IMPLICIT_DEF + $pn7 = IMPLICIT_DEF + $pn8 = IMPLICIT_DEF + $pn9 = IMPLICIT_DEF + $pn10 = IMPLICIT_DEF + $pn11 = IMPLICIT_DEF + $pn12 = IMPLICIT_DEF + $pn13 = IMPLICIT_DEF + $pn14 = IMPLICIT_DEF + $pn15 = IMPLICIT_DEF + + $pn0 = COPY %0 + RET_ReallyLR +... +--- name: spills_fills_stack_id_zpr tracksRegLiveness: true registers: diff --git a/llvm/test/CodeGen/AArch64/sve-alloca.ll b/llvm/test/CodeGen/AArch64/sve-alloca.ll index 90eed07c242bf6833b883b0b3672fb4bb03f76db..565311d8e3f8fe456d626a990d29a3bbed395fd9 100644 --- a/llvm/test/CodeGen/AArch64/sve-alloca.ll +++ b/llvm/test/CodeGen/AArch64/sve-alloca.ll @@ -66,18 +66,6 @@ define void @foo( %dst, i1 %cond) { ; CHECK-NEXT: st1d { z0.d }, p0, [x0] ; CHECK-NEXT: bl bar ; CHECK-NEXT: addvl sp, x29, #-18 -; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload @@ -94,6 +82,18 @@ define void @foo( %dst, i1 %cond) { ; CHECK-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: mov sp, x29 ; CHECK-NEXT: ldp x28, x19, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload diff --git a/llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll b/llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll index a97649523565dc0f3fdb7053ee49cbdb5b68f68d..c551fcf4da70dc3160c32277f793114406dbca69 100644 --- a/llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll +++ b/llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll @@ -567,18 +567,6 @@ define @sve_caller_non_sve_callee_high_range( @sve_caller_non_sve_callee_high_range( @sve_ret_caller_non_sve_callee_high_range() { ; CHECK-NEXT: addvl x0, sp, #1 ; CHECK-NEXT: bl non_sve_callee_high_range ; CHECK-NEXT: addvl sp, sp, #2 -; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload @@ -687,6 +675,18 @@ define @sve_ret_caller_non_sve_callee_high_range() { ; CHECK-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #18 ; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve-localstackalloc.mir b/llvm/test/CodeGen/AArch64/sve-localstackalloc.mir index 3fbb7889c8b79b83b9162388f7299e017722583d..6063c8dfc792c955fec2d147a4c8f5a7dbaabed1 100644 --- a/llvm/test/CodeGen/AArch64/sve-localstackalloc.mir +++ b/llvm/test/CodeGen/AArch64/sve-localstackalloc.mir @@ -48,7 +48,7 @@ body: | %2:gpr32 = COPY $w0 %1:zpr = COPY $z1 %0:zpr = COPY $z0 - %5:ppr_3b = PTRUE_B 31 + %5:ppr_3b = PTRUE_B 31, implicit $vg %6:gpr64sp = ADDXri %stack.0, 0, 0 ST1B_IMM %1, %5, %6, 1 :: (store unknown-size, align 16) ST1B_IMM %0, %5, %stack.0, 0 :: (store unknown-size into %stack.0, align 16) diff --git a/llvm/test/CodeGen/AArch64/sve-pfalse-machine-cse.mir b/llvm/test/CodeGen/AArch64/sve-pfalse-machine-cse.mir index b76fe7821b6c691a410e76648105f6499819084c..8395a7619fbb467bc9a52ddcd48933d52d7128ed 100644 --- a/llvm/test/CodeGen/AArch64/sve-pfalse-machine-cse.mir +++ b/llvm/test/CodeGen/AArch64/sve-pfalse-machine-cse.mir @@ -11,15 +11,15 @@ body: | ; CHECK: liveins: $p0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:ppr = COPY $p0 - ; CHECK-NEXT: [[PFALSE:%[0-9]+]]:ppr = PFALSE + ; CHECK-NEXT: [[PFALSE:%[0-9]+]]:ppr = PFALSE implicit $vg ; CHECK-NEXT: [[UZP1_PPP_B:%[0-9]+]]:ppr = UZP1_PPP_B [[COPY]], [[PFALSE]] ; CHECK-NEXT: [[UZP1_PPP_B1:%[0-9]+]]:ppr = UZP1_PPP_B killed [[UZP1_PPP_B]], [[PFALSE]] ; CHECK-NEXT: $p0 = COPY [[UZP1_PPP_B1]] ; CHECK-NEXT: RET_ReallyLR implicit $p0 %0:ppr = COPY $p0 - %2:ppr = PFALSE + %2:ppr = PFALSE implicit $vg %3:ppr = UZP1_PPP_B %0, %2 - %4:ppr = PFALSE + %4:ppr = PFALSE implicit $vg %5:ppr = UZP1_PPP_B killed %3, %4 $p0 = COPY %5 RET_ReallyLR implicit $p0 diff --git a/llvm/test/CodeGen/AArch64/sve-pseudos-expand-undef.mir b/llvm/test/CodeGen/AArch64/sve-pseudos-expand-undef.mir index df0e50de4d1a7a8523c0e73c33160415f9fe089b..ae70f91a4ec6414c25f78c513fd09e671d6254a5 100644 --- a/llvm/test/CodeGen/AArch64/sve-pseudos-expand-undef.mir +++ b/llvm/test/CodeGen/AArch64/sve-pseudos-expand-undef.mir @@ -26,7 +26,7 @@ body: | name: expand_mls_to_msb body: | bb.0: - renamable $p0 = PTRUE_B 31 + renamable $p0 = PTRUE_B 31, implicit $vg renamable $z0 = MLS_ZPZZZ_B_UNDEF killed renamable $p0, killed renamable $z2, killed renamable $z0, killed renamable $z1 RET_ReallyLR implicit $z0 ... @@ -36,7 +36,7 @@ body: | name: expand_mla_to_mad body: | bb.0: - renamable $p0 = PTRUE_B 31 + renamable $p0 = PTRUE_B 31, implicit $vg renamable $z0 = MLA_ZPZZZ_B_UNDEF killed renamable $p0, killed renamable $z2, killed renamable $z0, killed renamable $z1 RET_ReallyLR implicit $z0 ... diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.mir b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.mir index 81318aa5c2a58da84c29221bbb44f08096d9c0ee..5169113697dc603c3f8417f1fe7130b81631df51 100644 --- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.mir +++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.mir @@ -174,7 +174,7 @@ body: | %1:zpr = COPY $z0 %0:ppr_3b = COPY $p0 %2:ppr = CMPEQ_PPzZI_B %0, %1, 0, implicit-def dead $nzcv - %3:ppr = PTRUE_B 31 + %3:ppr = PTRUE_B 31, implicit $vg PTEST_PP killed %3, killed %2, implicit-def $nzcv %4:gpr32 = COPY $wzr %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv @@ -409,14 +409,14 @@ body: | ; CHECK-LABEL: name: cmpeq_imm_nxv16i8_ptest_not_all_active ; CHECK: %2:ppr = CMPEQ_PPzZI_B %0, %1, 0, implicit-def dead $nzcv - ; CHECK-NEXT: %3:ppr = PTRUE_B 0 + ; CHECK-NEXT: %3:ppr = PTRUE_B 0, implicit $vg ; CHECK-NEXT: PTEST_PP killed %3, killed %2, implicit-def $nzcv ; CHECK-NEXT: %4:gpr32 = COPY $wzr ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:zpr = COPY $z0 %0:ppr_3b = COPY $p0 %2:ppr = CMPEQ_PPzZI_B %0, %1, 0, implicit-def dead $nzcv - %3:ppr = PTRUE_B 0 + %3:ppr = PTRUE_B 0, implicit $vg PTEST_PP killed %3, killed %2, implicit-def $nzcv %4:gpr32 = COPY $wzr %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv @@ -446,14 +446,14 @@ body: | ; CHECK-LABEL: name: cmpeq_imm_nxv16i8_ptest_of_halfs ; CHECK: %2:ppr = CMPEQ_PPzZI_B %0, %1, 0, implicit-def dead $nzcv - ; CHECK-NEXT: %3:ppr = PTRUE_H 31 + ; CHECK-NEXT: %3:ppr = PTRUE_H 31, implicit $vg ; CHECK-NEXT: PTEST_PP killed %3, killed %2, implicit-def $nzcv ; CHECK-NEXT: %4:gpr32 = COPY $wzr ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:zpr = COPY $z0 %0:ppr_3b = COPY $p0 %2:ppr = CMPEQ_PPzZI_B %0, %1, 0, implicit-def dead $nzcv - %3:ppr = PTRUE_H 31 + %3:ppr = PTRUE_H 31, implicit $vg PTEST_PP killed %3, killed %2, implicit-def $nzcv %4:gpr32 = COPY $wzr %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilege.mir b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilege.mir index 8f7467d99154e3285d3b1dee155aa32889737bfe..c1d9dfff73447ce19e91785eafaeda48bd6ad740 100644 --- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilege.mir +++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilege.mir @@ -30,7 +30,7 @@ body: | ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:gpr32 = COPY $w1 %0:gpr32 = COPY $w0 - %2:ppr = PTRUE_B 31 + %2:ppr = PTRUE_B 31, implicit $vg %3:ppr = WHILEGE_PWW_B %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr @@ -63,7 +63,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_B 31 + %2:ppr = PTRUE_B 31, implicit $vg, implicit $vg %3:ppr = WHILEGE_PXX_B %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr @@ -98,7 +98,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr32 = COPY $w1 %0:gpr32 = COPY $w0 - %2:ppr = PTRUE_H 31 + %2:ppr = PTRUE_H 31, implicit $vg %4:ppr = WHILEGE_PWW_H %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -133,7 +133,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_H 31 + %2:ppr = PTRUE_H 31, implicit $vg %4:ppr = WHILEGE_PXX_H %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -168,7 +168,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr32 = COPY $w1 %0:gpr32 = COPY $w0 - %2:ppr = PTRUE_S 31 + %2:ppr = PTRUE_S 31, implicit $vg %4:ppr = WHILEGE_PWW_S %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -203,7 +203,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_S 31 + %2:ppr = PTRUE_S 31, implicit $vg %4:ppr = WHILEGE_PXX_S %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -238,7 +238,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr32 = COPY $w1 %0:gpr32 = COPY $w0 - %2:ppr = PTRUE_D 31 + %2:ppr = PTRUE_D 31, implicit $vg %4:ppr = WHILEGE_PWW_D %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -273,7 +273,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_D 31 + %2:ppr = PTRUE_D 31, implicit $vg %4:ppr = WHILEGE_PXX_D %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -313,7 +313,7 @@ body: | ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_B 0 + %2:ppr = PTRUE_B 0, implicit $vg %3:ppr = WHILEGE_PXX_B %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr @@ -353,7 +353,7 @@ body: | ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_H 31 + %2:ppr = PTRUE_H 31, implicit $vg %3:ppr = WHILEGE_PXX_B %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr @@ -393,7 +393,7 @@ body: | ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_S 31 + %2:ppr = PTRUE_S 31, implicit $vg %3:ppr = WHILEGE_PXX_B %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr @@ -433,7 +433,7 @@ body: | ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_D 31 + %2:ppr = PTRUE_D 31, implicit $vg %3:ppr = WHILEGE_PXX_B %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilegt.mir b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilegt.mir index 217d984560e36c472cf64e848c701290e49b0380..c6df21f85db77354f3f924059e16972d31302a86 100644 --- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilegt.mir +++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilegt.mir @@ -30,7 +30,7 @@ body: | ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:gpr32 = COPY $w1 %0:gpr32 = COPY $w0 - %2:ppr = PTRUE_B 31 + %2:ppr = PTRUE_B 31, implicit $vg %3:ppr = WHILEGT_PWW_B %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr @@ -63,7 +63,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_B 31 + %2:ppr = PTRUE_B 31, implicit $vg, implicit $vg %3:ppr = WHILEGT_PXX_B %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr @@ -98,7 +98,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr32 = COPY $w1 %0:gpr32 = COPY $w0 - %2:ppr = PTRUE_H 31 + %2:ppr = PTRUE_H 31, implicit $vg, implicit $vg %4:ppr = WHILEGT_PWW_H %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -133,7 +133,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_H 31 + %2:ppr = PTRUE_H 31, implicit $vg, implicit $vg %4:ppr = WHILEGT_PXX_H %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -168,7 +168,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr32 = COPY $w1 %0:gpr32 = COPY $w0 - %2:ppr = PTRUE_S 31 + %2:ppr = PTRUE_S 31, implicit $vg, implicit $vg %4:ppr = WHILEGT_PWW_S %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -203,7 +203,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_S 31 + %2:ppr = PTRUE_S 31, implicit $vg, implicit $vg %4:ppr = WHILEGT_PXX_S %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -238,7 +238,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr32 = COPY $w1 %0:gpr32 = COPY $w0 - %2:ppr = PTRUE_D 31 + %2:ppr = PTRUE_D 31, implicit $vg, implicit $vg %4:ppr = WHILEGT_PWW_D %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -273,7 +273,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_D 31 + %2:ppr = PTRUE_D 31, implicit $vg, implicit $vg %4:ppr = WHILEGT_PXX_D %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -313,7 +313,7 @@ body: | ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_H 1 + %2:ppr = PTRUE_H 1, implicit $vg, implicit $vg %3:ppr = WHILEGT_PXX_H %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr @@ -353,7 +353,7 @@ body: | ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_B 31 + %2:ppr = PTRUE_B 31, implicit $vg %3:ppr = WHILEGT_PXX_H %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr @@ -393,7 +393,7 @@ body: | ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_S 31 + %2:ppr = PTRUE_S 31, implicit $vg %3:ppr = WHILEGT_PXX_H %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr @@ -433,7 +433,7 @@ body: | ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_D 31 + %2:ppr = PTRUE_D 31, implicit $vg %3:ppr = WHILEGT_PXX_H %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilehi.mir b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilehi.mir index 8d6f466c6b735d1c0ae6b25d1283aaa9bd1ef6c7..7d8aed3c325a01863c90b48f5fd245eeabdbc048 100644 --- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilehi.mir +++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilehi.mir @@ -30,7 +30,7 @@ body: | ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:gpr32 = COPY $w1 %0:gpr32 = COPY $w0 - %2:ppr = PTRUE_B 31 + %2:ppr = PTRUE_B 31, implicit $vg %3:ppr = WHILEHI_PWW_B %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr @@ -63,7 +63,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_B 31 + %2:ppr = PTRUE_B 31, implicit $vg %3:ppr = WHILEHI_PXX_B %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr @@ -98,7 +98,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr32 = COPY $w1 %0:gpr32 = COPY $w0 - %2:ppr = PTRUE_H 31 + %2:ppr = PTRUE_H 31, implicit $vg %4:ppr = WHILEHI_PWW_H %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -133,7 +133,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_H 31 + %2:ppr = PTRUE_H 31, implicit $vg %4:ppr = WHILEHI_PXX_H %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -168,7 +168,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr32 = COPY $w1 %0:gpr32 = COPY $w0 - %2:ppr = PTRUE_S 31 + %2:ppr = PTRUE_S 31, implicit $vg %4:ppr = WHILEHI_PWW_S %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -203,7 +203,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_S 31 + %2:ppr = PTRUE_S 31, implicit $vg %4:ppr = WHILEHI_PXX_S %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -238,7 +238,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr32 = COPY $w1 %0:gpr32 = COPY $w0 - %2:ppr = PTRUE_D 31 + %2:ppr = PTRUE_D 31, implicit $vg %4:ppr = WHILEHI_PWW_D %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -273,7 +273,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_D 31 + %2:ppr = PTRUE_D 31, implicit $vg %4:ppr = WHILEHI_PXX_D %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -313,7 +313,7 @@ body: | ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_S 29 + %2:ppr = PTRUE_S 29, implicit $vg %3:ppr = WHILEHI_PXX_S %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr @@ -353,7 +353,7 @@ body: | ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_B 31 + %2:ppr = PTRUE_B 31, implicit $vg %3:ppr = WHILEHI_PXX_S %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr @@ -393,7 +393,7 @@ body: | ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_H 31 + %2:ppr = PTRUE_H 31, implicit $vg %3:ppr = WHILEHI_PXX_S %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr @@ -433,7 +433,7 @@ body: | ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_D 31 + %2:ppr = PTRUE_D 31, implicit $vg %3:ppr = WHILEHI_PXX_S %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilehs.mir b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilehs.mir index da76a30f843b7ae15ead69a1874387467efcb4b5..f4dbfbc3db1cab61218c155c28455af1f51f0634 100644 --- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilehs.mir +++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilehs.mir @@ -30,7 +30,7 @@ body: | ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:gpr32 = COPY $w1 %0:gpr32 = COPY $w0 - %2:ppr = PTRUE_B 31 + %2:ppr = PTRUE_B 31, implicit $vg %3:ppr = WHILEHS_PWW_B %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr @@ -63,7 +63,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_B 31 + %2:ppr = PTRUE_B 31, implicit $vg %3:ppr = WHILEHS_PXX_B %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr @@ -98,7 +98,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr32 = COPY $w1 %0:gpr32 = COPY $w0 - %2:ppr = PTRUE_H 31 + %2:ppr = PTRUE_H 31, implicit $vg %4:ppr = WHILEHS_PWW_H %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -133,7 +133,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_H 31 + %2:ppr = PTRUE_H 31, implicit $vg %4:ppr = WHILEHS_PXX_H %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -168,7 +168,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr32 = COPY $w1 %0:gpr32 = COPY $w0 - %2:ppr = PTRUE_S 31 + %2:ppr = PTRUE_S 31, implicit $vg %4:ppr = WHILEHS_PWW_S %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -203,7 +203,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_S 31 + %2:ppr = PTRUE_S 31, implicit $vg %4:ppr = WHILEHS_PXX_S %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -238,7 +238,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr32 = COPY $w1 %0:gpr32 = COPY $w0 - %2:ppr = PTRUE_D 31 + %2:ppr = PTRUE_D 31, implicit $vg %4:ppr = WHILEHS_PWW_D %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -273,7 +273,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_D 31 + %2:ppr = PTRUE_D 31, implicit $vg %4:ppr = WHILEHS_PXX_D %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -313,7 +313,7 @@ body: | ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_D 30 + %2:ppr = PTRUE_D 30, implicit $vg %3:ppr = WHILEHS_PXX_D %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr @@ -353,7 +353,7 @@ body: | ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_B 31 + %2:ppr = PTRUE_B 31, implicit $vg %3:ppr = WHILEHS_PXX_D %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr @@ -393,7 +393,7 @@ body: | ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_H 31 + %2:ppr = PTRUE_H 31, implicit $vg %3:ppr = WHILEHS_PXX_D %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr @@ -433,7 +433,7 @@ body: | ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_S 31 + %2:ppr = PTRUE_S 31, implicit $vg %3:ppr = WHILEHS_PXX_D %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilele.mir b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilele.mir index 32954d593c1ddd21eb2dc180684f9707d455c608..dc2265490cb55e4c81c2c92937e798407393728e 100644 --- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilele.mir +++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilele.mir @@ -30,7 +30,7 @@ body: | ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:gpr32 = COPY $w1 %0:gpr32 = COPY $w0 - %2:ppr = PTRUE_B 31 + %2:ppr = PTRUE_B 31, implicit $vg %3:ppr = WHILELE_PWW_B %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr @@ -63,7 +63,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_B 31 + %2:ppr = PTRUE_B 31, implicit $vg %3:ppr = WHILELE_PXX_B %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr @@ -98,7 +98,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr32 = COPY $w1 %0:gpr32 = COPY $w0 - %2:ppr = PTRUE_H 31 + %2:ppr = PTRUE_H 31, implicit $vg %4:ppr = WHILELE_PWW_H %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -133,7 +133,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_H 31 + %2:ppr = PTRUE_H 31, implicit $vg %4:ppr = WHILELE_PXX_H %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -168,7 +168,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr32 = COPY $w1 %0:gpr32 = COPY $w0 - %2:ppr = PTRUE_S 31 + %2:ppr = PTRUE_S 31, implicit $vg %4:ppr = WHILELE_PWW_S %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -203,7 +203,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_S 31 + %2:ppr = PTRUE_S 31, implicit $vg %4:ppr = WHILELE_PXX_S %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -238,7 +238,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr32 = COPY $w1 %0:gpr32 = COPY $w0 - %2:ppr = PTRUE_D 31 + %2:ppr = PTRUE_D 31, implicit $vg %4:ppr = WHILELE_PWW_D %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -273,7 +273,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_D 31 + %2:ppr = PTRUE_D 31, implicit $vg %4:ppr = WHILELE_PXX_D %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -313,7 +313,7 @@ body: | ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:gpr32 = COPY $w1 %0:gpr32 = COPY $w0 - %2:ppr = PTRUE_B 7 + %2:ppr = PTRUE_B 7, implicit $vg %3:ppr = WHILELE_PWW_B %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr @@ -353,7 +353,7 @@ body: | ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:gpr32 = COPY $w1 %0:gpr32 = COPY $w0 - %2:ppr = PTRUE_H 31 + %2:ppr = PTRUE_H 31, implicit $vg %3:ppr = WHILELE_PWW_B %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr @@ -393,7 +393,7 @@ body: | ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:gpr32 = COPY $w1 %0:gpr32 = COPY $w0 - %2:ppr = PTRUE_S 31 + %2:ppr = PTRUE_S 31, implicit $vg %3:ppr = WHILELE_PWW_B %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr @@ -433,7 +433,7 @@ body: | ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:gpr32 = COPY $w1 %0:gpr32 = COPY $w0 - %2:ppr = PTRUE_D 31 + %2:ppr = PTRUE_D 31, implicit $vg %3:ppr = WHILELE_PWW_B %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilelo.mir b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilelo.mir index cca0ab8ef210b99577dc1bdd2f040bed94b0fefe..4d66e3e57da8b39390ef70b1ffebf06ec7db543b 100644 --- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilelo.mir +++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilelo.mir @@ -30,7 +30,7 @@ body: | ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:gpr32 = COPY $w1 %0:gpr32 = COPY $w0 - %2:ppr = PTRUE_B 31 + %2:ppr = PTRUE_B 31, implicit $vg %3:ppr = WHILELO_PWW_B %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr @@ -63,7 +63,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_B 31 + %2:ppr = PTRUE_B 31, implicit $vg %3:ppr = WHILELO_PXX_B %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr @@ -98,7 +98,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr32 = COPY $w1 %0:gpr32 = COPY $w0 - %2:ppr = PTRUE_H 31 + %2:ppr = PTRUE_H 31, implicit $vg %4:ppr = WHILELO_PWW_H %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -133,7 +133,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_H 31 + %2:ppr = PTRUE_H 31, implicit $vg %4:ppr = WHILELO_PXX_H %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -168,7 +168,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr32 = COPY $w1 %0:gpr32 = COPY $w0 - %2:ppr = PTRUE_S 31 + %2:ppr = PTRUE_S 31, implicit $vg %4:ppr = WHILELO_PWW_S %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -203,7 +203,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_S 31 + %2:ppr = PTRUE_S 31, implicit $vg %4:ppr = WHILELO_PXX_S %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -238,7 +238,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr32 = COPY $w1 %0:gpr32 = COPY $w0 - %2:ppr = PTRUE_D 31 + %2:ppr = PTRUE_D 31, implicit $vg %4:ppr = WHILELO_PWW_D %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -273,7 +273,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_D 31 + %2:ppr = PTRUE_D 31, implicit $vg %4:ppr = WHILELO_PXX_D %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -313,7 +313,7 @@ body: | ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:gpr32 = COPY $w1 %0:gpr32 = COPY $w0 - %2:ppr = PTRUE_H 6 + %2:ppr = PTRUE_H 6, implicit $vg %3:ppr = WHILELO_PWW_H %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr @@ -353,7 +353,7 @@ body: | ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:gpr32 = COPY $w1 %0:gpr32 = COPY $w0 - %2:ppr = PTRUE_B 31 + %2:ppr = PTRUE_B 31, implicit $vg %3:ppr = WHILELO_PWW_H %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr @@ -393,7 +393,7 @@ body: | ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:gpr32 = COPY $w1 %0:gpr32 = COPY $w0 - %2:ppr = PTRUE_S 31 + %2:ppr = PTRUE_S 31, implicit $vg %3:ppr = WHILELO_PWW_H %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr @@ -433,7 +433,7 @@ body: | ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:gpr32 = COPY $w1 %0:gpr32 = COPY $w0 - %2:ppr = PTRUE_D 31 + %2:ppr = PTRUE_D 31, implicit $vg %3:ppr = WHILELO_PWW_H %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilels.mir b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilels.mir index 4bae3a1986f451b05556ceeb4e57d80f22a1fc4f..ea02f8c70ef86c68d3dfed7f140650636b28f78a 100644 --- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilels.mir +++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilels.mir @@ -30,7 +30,7 @@ body: | ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:gpr32 = COPY $w1 %0:gpr32 = COPY $w0 - %2:ppr = PTRUE_B 31 + %2:ppr = PTRUE_B 31, implicit $vg %3:ppr = WHILELS_PWW_B %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr @@ -63,7 +63,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_B 31 + %2:ppr = PTRUE_B 31, implicit $vg %3:ppr = WHILELS_PXX_B %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr @@ -98,7 +98,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr32 = COPY $w1 %0:gpr32 = COPY $w0 - %2:ppr = PTRUE_H 31 + %2:ppr = PTRUE_H 31, implicit $vg %4:ppr = WHILELS_PWW_H %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -133,7 +133,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_H 31 + %2:ppr = PTRUE_H 31, implicit $vg %4:ppr = WHILELS_PXX_H %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -168,7 +168,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr32 = COPY $w1 %0:gpr32 = COPY $w0 - %2:ppr = PTRUE_S 31 + %2:ppr = PTRUE_S 31, implicit $vg %4:ppr = WHILELS_PWW_S %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -203,7 +203,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_S 31 + %2:ppr = PTRUE_S 31, implicit $vg %4:ppr = WHILELS_PXX_S %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -238,7 +238,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr32 = COPY $w1 %0:gpr32 = COPY $w0 - %2:ppr = PTRUE_D 31 + %2:ppr = PTRUE_D 31, implicit $vg %4:ppr = WHILELS_PWW_D %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -273,7 +273,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_D 31 + %2:ppr = PTRUE_D 31, implicit $vg %4:ppr = WHILELS_PXX_D %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -313,7 +313,7 @@ body: | ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:gpr32 = COPY $w1 %0:gpr32 = COPY $w0 - %2:ppr = PTRUE_S 5 + %2:ppr = PTRUE_S 5, implicit $vg %3:ppr = WHILELS_PWW_S %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr @@ -353,7 +353,7 @@ body: | ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:gpr32 = COPY $w1 %0:gpr32 = COPY $w0 - %2:ppr = PTRUE_B 31 + %2:ppr = PTRUE_B 31, implicit $vg %3:ppr = WHILELS_PWW_S %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr @@ -393,7 +393,7 @@ body: | ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:gpr32 = COPY $w1 %0:gpr32 = COPY $w0 - %2:ppr = PTRUE_H 31 + %2:ppr = PTRUE_H 31, implicit $vg %3:ppr = WHILELS_PWW_S %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr @@ -433,7 +433,7 @@ body: | ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:gpr32 = COPY $w1 %0:gpr32 = COPY $w0 - %2:ppr = PTRUE_D 31 + %2:ppr = PTRUE_D 31, implicit $vg %3:ppr = WHILELS_PWW_S %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilelt.mir b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilelt.mir index 3c6a9e21b4c6c15afe88f29b0b4102507abdbb97..d08781f203e328043c7eaf7080c954e5c9a0c841 100644 --- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilelt.mir +++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilelt.mir @@ -30,7 +30,7 @@ body: | ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:gpr32 = COPY $w1 %0:gpr32 = COPY $w0 - %2:ppr = PTRUE_B 31 + %2:ppr = PTRUE_B 31, implicit $vg %3:ppr = WHILELT_PWW_B %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr @@ -63,7 +63,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_B 31 + %2:ppr = PTRUE_B 31, implicit $vg %3:ppr = WHILELT_PXX_B %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr @@ -98,7 +98,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr32 = COPY $w1 %0:gpr32 = COPY $w0 - %2:ppr = PTRUE_H 31 + %2:ppr = PTRUE_H 31, implicit $vg %4:ppr = WHILELT_PWW_H %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -133,7 +133,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_H 31 + %2:ppr = PTRUE_H 31, implicit $vg %4:ppr = WHILELT_PXX_H %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -168,7 +168,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr32 = COPY $w1 %0:gpr32 = COPY $w0 - %2:ppr = PTRUE_S 31 + %2:ppr = PTRUE_S 31, implicit $vg %4:ppr = WHILELT_PWW_S %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -203,7 +203,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_S 31 + %2:ppr = PTRUE_S 31, implicit $vg %4:ppr = WHILELT_PXX_S %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -238,7 +238,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr32 = COPY $w1 %0:gpr32 = COPY $w0 - %2:ppr = PTRUE_D 31 + %2:ppr = PTRUE_D 31, implicit $vg %4:ppr = WHILELT_PWW_D %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -273,7 +273,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_D 31 + %2:ppr = PTRUE_D 31, implicit $vg %4:ppr = WHILELT_PXX_D %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -313,7 +313,7 @@ body: | ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:gpr32 = COPY $w1 %0:gpr32 = COPY $w0 - %2:ppr = PTRUE_D 4 + %2:ppr = PTRUE_D 4, implicit $vg %3:ppr = WHILELT_PWW_D %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr @@ -353,7 +353,7 @@ body: | ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:gpr32 = COPY $w1 %0:gpr32 = COPY $w0 - %2:ppr = PTRUE_B 31 + %2:ppr = PTRUE_B 31, implicit $vg %3:ppr = WHILELT_PWW_D %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr @@ -393,7 +393,7 @@ body: | ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:gpr32 = COPY $w1 %0:gpr32 = COPY $w0 - %2:ppr = PTRUE_H 31 + %2:ppr = PTRUE_H 31, implicit $vg %3:ppr = WHILELT_PWW_D %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr @@ -433,7 +433,7 @@ body: | ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:gpr32 = COPY $w1 %0:gpr32 = COPY $w0 - %2:ppr = PTRUE_S 31 + %2:ppr = PTRUE_S 31, implicit $vg %3:ppr = WHILELT_PWW_D %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilerw.mir b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilerw.mir index 27cdf593df776f6635a01d17b74fdfbbf6d49074..d800009b9537f3d79bd1f4b0672161f8eecfec92 100644 --- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilerw.mir +++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilerw.mir @@ -30,7 +30,7 @@ body: | ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_B 31 + %2:ppr = PTRUE_B 31, implicit $vg %3:ppr = WHILERW_PXX_B %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr @@ -65,7 +65,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_H 31 + %2:ppr = PTRUE_H 31, implicit $vg %4:ppr = WHILERW_PXX_H %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -100,7 +100,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_S 31 + %2:ppr = PTRUE_S 31, implicit $vg %4:ppr = WHILERW_PXX_S %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -135,7 +135,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_D 31 + %2:ppr = PTRUE_D 31, implicit $vg %4:ppr = WHILERW_PXX_D %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -175,7 +175,7 @@ body: | ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_B 0 + %2:ppr = PTRUE_B 0, implicit $vg %3:ppr = WHILERW_PXX_B %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr @@ -215,7 +215,7 @@ body: | ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_H 31 + %2:ppr = PTRUE_H 31, implicit $vg %3:ppr = WHILERW_PXX_B %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr @@ -255,7 +255,7 @@ body: | ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_S 31 + %2:ppr = PTRUE_S 31, implicit $vg %3:ppr = WHILERW_PXX_B %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr @@ -295,7 +295,7 @@ body: | ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_D 31 + %2:ppr = PTRUE_D 31, implicit $vg %3:ppr = WHILERW_PXX_B %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilewr.mir b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilewr.mir index 3b49b1ec2c804578d7bf18990e8dbb8524954493..9f8b7c3197ecf2286653a0894a48c34abb1f032e 100644 --- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilewr.mir +++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilewr.mir @@ -30,7 +30,7 @@ body: | ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_B 31 + %2:ppr = PTRUE_B 31, implicit $vg %3:ppr = WHILEWR_PXX_B %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr @@ -65,7 +65,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_H 31 + %2:ppr = PTRUE_H 31, implicit $vg %4:ppr = WHILEWR_PXX_H %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -100,7 +100,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_S 31 + %2:ppr = PTRUE_S 31, implicit $vg %4:ppr = WHILEWR_PXX_S %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -135,7 +135,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_D 31 + %2:ppr = PTRUE_D 31, implicit $vg %4:ppr = WHILEWR_PXX_D %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -175,7 +175,7 @@ body: | ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_B 0 + %2:ppr = PTRUE_B 0, implicit $vg %3:ppr = WHILEWR_PXX_B %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr @@ -215,7 +215,7 @@ body: | ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_H 31 + %2:ppr = PTRUE_H 31, implicit $vg %3:ppr = WHILEWR_PXX_B %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr @@ -255,7 +255,7 @@ body: | ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_S 31 + %2:ppr = PTRUE_S 31, implicit $vg %3:ppr = WHILEWR_PXX_B %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr @@ -295,7 +295,7 @@ body: | ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_D 31 + %2:ppr = PTRUE_D 31, implicit $vg %3:ppr = WHILEWR_PXX_B %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll index 0bd767cd43655794686e1e85edb5e990e35d8f10..4ae4e6538703caebbf68546b9a52633632d448e6 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll @@ -1122,3 +1122,201 @@ define void @scvtf_v4i64_v4f64(ptr %a, ptr %b) { store <4 x double> %res, ptr %b ret void } + +define half @scvtf_i16_f16(ptr %0) { +; CHECK-LABEL: scvtf_i16_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ldrsh w8, [x0] +; CHECK-NEXT: scvtf h0, w8 +; CHECK-NEXT: ret + %2 = load i16, ptr %0, align 64 + %3 = sitofp i16 %2 to half + ret half %3 +} + +define float @scvtf_i16_f32(ptr %0) { +; CHECK-LABEL: scvtf_i16_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ldrsh w8, [x0] +; CHECK-NEXT: scvtf s0, w8 +; CHECK-NEXT: ret + %2 = load i16, ptr %0, align 64 + %3 = sitofp i16 %2 to float + ret float %3 +} + +define double @scvtf_i16_f64(ptr %0) { +; CHECK-LABEL: scvtf_i16_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ldrsh w8, [x0] +; CHECK-NEXT: scvtf d0, w8 +; CHECK-NEXT: ret + %2 = load i16, ptr %0, align 64 + %3 = sitofp i16 %2 to double + ret double %3 +} + +define half @scvtf_i32_f16(ptr %0) { +; CHECK-LABEL: scvtf_i32_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: scvtf h0, w8 +; CHECK-NEXT: ret + %2 = load i32, ptr %0, align 64 + %3 = sitofp i32 %2 to half + ret half %3 +} + +define float @scvtf_i32_f32(ptr %0) { +; CHECK-LABEL: scvtf_i32_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: scvtf s0, w8 +; CHECK-NEXT: ret + %2 = load i32, ptr %0, align 64 + %3 = sitofp i32 %2 to float + ret float %3 +} + +define double @scvtf_i32_f64(ptr %0) { +; CHECK-LABEL: scvtf_i32_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: scvtf d0, w8 +; CHECK-NEXT: ret + %2 = load i32, ptr %0, align 64 + %3 = sitofp i32 %2 to double + ret double %3 +} + +define half @scvtf_i64_f16(ptr %0) { +; CHECK-LABEL: scvtf_i64_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: scvtf h0, x8 +; CHECK-NEXT: ret + %2 = load i64, ptr %0, align 64 + %3 = sitofp i64 %2 to half + ret half %3 +} + +define float @scvtf_i64_f32(ptr %0) { +; CHECK-LABEL: scvtf_i64_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: scvtf s0, x8 +; CHECK-NEXT: ret + %2 = load i64, ptr %0, align 64 + %3 = sitofp i64 %2 to float + ret float %3 +} + +define double @scvtf_i64_f64(ptr %0) { +; CHECK-LABEL: scvtf_i64_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: scvtf d0, x8 +; CHECK-NEXT: ret + %2 = load i64, ptr %0, align 64 + %3 = sitofp i64 %2 to double + ret double %3 +} + +define half @ucvtf_i16_f16(ptr %0) { +; CHECK-LABEL: ucvtf_i16_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ldrh w8, [x0] +; CHECK-NEXT: ucvtf h0, w8 +; CHECK-NEXT: ret + %2 = load i16, ptr %0, align 64 + %3 = uitofp i16 %2 to half + ret half %3 +} + +define float @ucvtf_i16_f32(ptr %0) { +; CHECK-LABEL: ucvtf_i16_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr h0, [x0] +; CHECK-NEXT: ucvtf s0, s0 +; CHECK-NEXT: ret + %2 = load i16, ptr %0, align 64 + %3 = uitofp i16 %2 to float + ret float %3 +} + +define double @ucvtf_i16_f64(ptr %0) { +; CHECK-LABEL: ucvtf_i16_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr h0, [x0] +; CHECK-NEXT: ucvtf d0, d0 +; CHECK-NEXT: ret + %2 = load i16, ptr %0, align 64 + %3 = uitofp i16 %2 to double + ret double %3 +} + +define half @ucvtf_i32_f16(ptr %0) { +; CHECK-LABEL: ucvtf_i32_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: ucvtf h0, w8 +; CHECK-NEXT: ret + %2 = load i32, ptr %0, align 64 + %3 = uitofp i32 %2 to half + ret half %3 +} + +define float @ucvtf_i32_f32(ptr %0) { +; CHECK-LABEL: ucvtf_i32_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: ucvtf s0, w8 +; CHECK-NEXT: ret + %2 = load i32, ptr %0, align 64 + %3 = uitofp i32 %2 to float + ret float %3 +} + +define double @ucvtf_i32_f64(ptr %0) { +; CHECK-LABEL: ucvtf_i32_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr s0, [x0] +; CHECK-NEXT: ucvtf d0, d0 +; CHECK-NEXT: ret + %2 = load i32, ptr %0, align 64 + %3 = uitofp i32 %2 to double + ret double %3 +} + +define half @ucvtf_i64_f16(ptr %0) { +; CHECK-LABEL: ucvtf_i64_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: ucvtf h0, x8 +; CHECK-NEXT: ret + %2 = load i64, ptr %0, align 64 + %3 = uitofp i64 %2 to half + ret half %3 +} + +define float @ucvtf_i64_f32(ptr %0) { +; CHECK-LABEL: ucvtf_i64_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: ucvtf s0, x8 +; CHECK-NEXT: ret + %2 = load i64, ptr %0, align 64 + %3 = uitofp i64 %2 to float + ret float %3 +} + +define double @ucvtf_i64_f64(ptr %0) { +; CHECK-LABEL: ucvtf_i64_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: ucvtf d0, x8 +; CHECK-NEXT: ret + %2 = load i64, ptr %0, align 64 + %3 = uitofp i64 %2 to double + ret double %3 +} diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-load.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-load.ll index e746770e29a2f5105ac4edec3f65871b2e022680..050c7353b97d9129223e9666491906982549555b 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-load.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-load.ll @@ -335,6 +335,58 @@ define <4 x double> @masked_load_v4f64(ptr %src, <4 x i1> %mask) { ret <4 x double> %load } +define <3 x i32> @masked_load_zext_v3i32(ptr %load_ptr, <3 x i1> %pm) { +; CHECK-LABEL: masked_load_zext_v3i32: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: adrp x8, .LCPI13_0 +; CHECK-NEXT: strh w3, [sp, #12] +; CHECK-NEXT: strh w2, [sp, #10] +; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: strh w1, [sp, #8] +; CHECK-NEXT: ldr d0, [x8, :lo12:.LCPI13_0] +; CHECK-NEXT: ldr d1, [sp, #8] +; CHECK-NEXT: and z0.d, z1.d, z0.d +; CHECK-NEXT: lsl z0.h, z0.h, #15 +; CHECK-NEXT: asr z0.h, z0.h, #15 +; CHECK-NEXT: uunpklo z0.s, z0.h +; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0 +; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0] +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: ret + %load_value = tail call <3 x i16> @llvm.masked.load.v3i16.p0(ptr %load_ptr, i32 4, <3 x i1> %pm, <3 x i16> zeroinitializer) + %extend = zext <3 x i16> %load_value to <3 x i32> + ret <3 x i32> %extend; +} + +define <3 x i32> @masked_load_sext_v3i32(ptr %load_ptr, <3 x i1> %pm) { +; CHECK-LABEL: masked_load_sext_v3i32: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: adrp x8, .LCPI14_0 +; CHECK-NEXT: strh w3, [sp, #12] +; CHECK-NEXT: strh w2, [sp, #10] +; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: strh w1, [sp, #8] +; CHECK-NEXT: ldr d0, [x8, :lo12:.LCPI14_0] +; CHECK-NEXT: ldr d1, [sp, #8] +; CHECK-NEXT: and z0.d, z1.d, z0.d +; CHECK-NEXT: lsl z0.h, z0.h, #15 +; CHECK-NEXT: asr z0.h, z0.h, #15 +; CHECK-NEXT: uunpklo z0.s, z0.h +; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0 +; CHECK-NEXT: ld1sh { z0.s }, p0/z, [x0] +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: ret + %load_value = tail call <3 x i16> @llvm.masked.load.v3i16.p0(ptr %load_ptr, i32 4, <3 x i1> %pm, <3 x i16> zeroinitializer) + %extend = sext <3 x i16> %load_value to <3 x i32> + ret <3 x i32> %extend; +} + declare <4 x i8> @llvm.masked.load.v4i8(ptr, i32, <4 x i1>, <4 x i8>) declare <8 x i8> @llvm.masked.load.v8i8(ptr, i32, <8 x i1>, <8 x i8>) declare <16 x i8> @llvm.masked.load.v16i8(ptr, i32, <16 x i1>, <16 x i8>) @@ -351,3 +403,5 @@ declare <8 x float> @llvm.masked.load.v8f32(ptr, i32, <8 x i1>, <8 x float>) declare <2 x double> @llvm.masked.load.v2f64(ptr, i32, <2 x i1>, <2 x double>) declare <4 x double> @llvm.masked.load.v4f64(ptr, i32, <4 x i1>, <4 x double>) + +declare <3 x i16> @llvm.masked.load.v3i16.p0(ptr, i32, <3 x i1>, <3 x i16>) diff --git a/llvm/test/CodeGen/AArch64/sve-tailcall.ll b/llvm/test/CodeGen/AArch64/sve-tailcall.ll index 58135e44fa912dbf2861f062604ddc2a1e15d694..4ddf007768fd2c0a04e58ee3715ace0d7156df89 100644 --- a/llvm/test/CodeGen/AArch64/sve-tailcall.ll +++ b/llvm/test/CodeGen/AArch64/sve-tailcall.ll @@ -83,18 +83,6 @@ define i32 @sve_caller_non_sve_callee( %arg) nounwind { ; CHECK-NEXT: //APP ; CHECK-NEXT: //NO_APP ; CHECK-NEXT: bl non_sve_callee -; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload @@ -111,6 +99,18 @@ define i32 @sve_caller_non_sve_callee( %arg) nounwind { ; CHECK-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #18 ; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload ; CHECK-NEXT: ret @@ -158,18 +158,6 @@ define i32 @sve_caller_non_sve_callee_fastcc( %arg) nounwind { ; CHECK-NEXT: //APP ; CHECK-NEXT: //NO_APP ; CHECK-NEXT: bl non_sve_callee -; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload @@ -186,6 +174,18 @@ define i32 @sve_caller_non_sve_callee_fastcc( %arg) nounwind { ; CHECK-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #18 ; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/unwind-preserved.ll b/llvm/test/CodeGen/AArch64/unwind-preserved.ll index f3c4d217e6fcaa1d060f4579b5857426d2a34858..822be14faaeb1f83e51e89926ebbf0d892b63a9c 100644 --- a/llvm/test/CodeGen/AArch64/unwind-preserved.ll +++ b/llvm/test/CodeGen/AArch64/unwind-preserved.ll @@ -63,18 +63,6 @@ define @invoke_callee_may_throw_sve( %v) uw ; CHECK-NEXT: ldr z0, [sp, #1, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 144 * VG -; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload @@ -91,6 +79,18 @@ define @invoke_callee_may_throw_sve( %v) uw ; CHECK-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #18 ; CHECK-NEXT: .cfi_def_cfa wsp, 16 ; CHECK-NEXT: .cfi_restore z8 @@ -112,18 +112,6 @@ define @invoke_callee_may_throw_sve( %v) uw ; CHECK-NEXT: ldr z0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 144 * VG -; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload @@ -140,6 +128,18 @@ define @invoke_callee_may_throw_sve( %v) uw ; CHECK-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #18 ; CHECK-NEXT: .cfi_def_cfa wsp, 16 ; CHECK-NEXT: .cfi_restore z8 @@ -215,18 +215,6 @@ define @invoke_callee_may_throw_sve( %v) uw ; GISEL-NEXT: ldr z0, [sp, #1, mul vl] // 16-byte Folded Reload ; GISEL-NEXT: addvl sp, sp, #2 ; GISEL-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 144 * VG -; GISEL-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload -; GISEL-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload -; GISEL-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload -; GISEL-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload -; GISEL-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload -; GISEL-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload -; GISEL-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload -; GISEL-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload -; GISEL-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload -; GISEL-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload -; GISEL-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload -; GISEL-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload ; GISEL-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload ; GISEL-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload ; GISEL-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload @@ -243,6 +231,18 @@ define @invoke_callee_may_throw_sve( %v) uw ; GISEL-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload ; GISEL-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload ; GISEL-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload +; GISEL-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload +; GISEL-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload +; GISEL-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload +; GISEL-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload +; GISEL-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload +; GISEL-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload +; GISEL-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload +; GISEL-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload +; GISEL-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload +; GISEL-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload +; GISEL-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload +; GISEL-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload ; GISEL-NEXT: addvl sp, sp, #18 ; GISEL-NEXT: .cfi_def_cfa wsp, 16 ; GISEL-NEXT: .cfi_restore z8 @@ -264,18 +264,6 @@ define @invoke_callee_may_throw_sve( %v) uw ; GISEL-NEXT: ldr z0, [sp] // 16-byte Folded Reload ; GISEL-NEXT: addvl sp, sp, #2 ; GISEL-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 144 * VG -; GISEL-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload -; GISEL-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload -; GISEL-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload -; GISEL-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload -; GISEL-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload -; GISEL-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload -; GISEL-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload -; GISEL-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload -; GISEL-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload -; GISEL-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload -; GISEL-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload -; GISEL-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload ; GISEL-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload ; GISEL-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload ; GISEL-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload @@ -292,6 +280,18 @@ define @invoke_callee_may_throw_sve( %v) uw ; GISEL-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload ; GISEL-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload ; GISEL-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload +; GISEL-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload +; GISEL-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload +; GISEL-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload +; GISEL-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload +; GISEL-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload +; GISEL-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload +; GISEL-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload +; GISEL-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload +; GISEL-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload +; GISEL-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload +; GISEL-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload +; GISEL-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload ; GISEL-NEXT: addvl sp, sp, #18 ; GISEL-NEXT: .cfi_def_cfa wsp, 16 ; GISEL-NEXT: .cfi_restore z8 diff --git a/llvm/test/MC/AArch64/SME/directives-negative.s b/llvm/test/MC/AArch64/SME/directives-negative.s index 3df90b3016869ee39ef24f0545daa6cf04ef8b8f..123c3a383d71efc31e43c8e6594c8cbdc089b622 100644 --- a/llvm/test/MC/AArch64/SME/directives-negative.s +++ b/llvm/test/MC/AArch64/SME/directives-negative.s @@ -2,9 +2,9 @@ .arch_extension sme .arch_extension nosme -smstart +zero {za} // CHECK: error: instruction requires: sme -// CHECK-NEXT: smstart +// CHECK-NEXT: zero {za} .arch_extension sme-f64f64 .arch_extension nosme-f64f64 @@ -20,9 +20,9 @@ addha za0.d, p0/m, p0/m, z0.d .arch armv9-a+sme .arch armv9-a+nosme -smstart +zero {za} // CHECK: error: instruction requires: sme -// CHECK-NEXT: smstart +// CHECK-NEXT: zero {za} .arch armv9-a+sme-f64f64 .arch armv9-a+nosme-f64f64 diff --git a/llvm/test/MC/AArch64/SME/smstart.s b/llvm/test/MC/AArch64/SME/smstart.s index 1628a279bb3535d01936c55ab067139f928f2c5c..07a696d28df8c8185cf0317e4d0f9c66221aa6ac 100644 --- a/llvm/test/MC/AArch64/SME/smstart.s +++ b/llvm/test/MC/AArch64/SME/smstart.s @@ -1,38 +1,28 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ -// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme < %s \ // RUN: | llvm-objdump -d --mattr=+sme - | FileCheck %s --check-prefix=CHECK-INST // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme < %s \ -// RUN: | llvm-objdump -d --mattr=-sme - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: | llvm-objdump -d --mattr=-sme - | FileCheck %s --check-prefix=CHECK-INST smstart // CHECK-INST: smstart // CHECK-ENCODING: [0x7f,0x47,0x03,0xd5] -// CHECK-ERROR: instruction requires: sme -// CHECK-UNKNOWN: d503477f msr S0_3_C4_C7_3, xzr smstart sm // CHECK-INST: smstart sm // CHECK-ENCODING: [0x7f,0x43,0x03,0xd5] -// CHECK-ERROR: instruction requires: sme -// CHECK-UNKNOWN: d503437f msr S0_3_C4_C3_3, xzr smstart za // CHECK-INST: smstart za // CHECK-ENCODING: [0x7f,0x45,0x03,0xd5] -// CHECK-ERROR: instruction requires: sme -// CHECK-UNKNOWN: d503457f msr S0_3_C4_C5_3, xzr smstart SM // CHECK-INST: smstart sm // CHECK-ENCODING: [0x7f,0x43,0x03,0xd5] -// CHECK-ERROR: instruction requires: sme -// CHECK-UNKNOWN: d503437f msr S0_3_C4_C3_3, xzr smstart ZA // CHECK-INST: smstart za // CHECK-ENCODING: [0x7f,0x45,0x03,0xd5] -// CHECK-ERROR: instruction requires: sme -// CHECK-UNKNOWN: d503457f msr S0_3_C4_C5_3, xzr diff --git a/llvm/test/MC/AArch64/SME/smstop.s b/llvm/test/MC/AArch64/SME/smstop.s index b7c21d939e9a9db44c865f0bcc141f64ca9ad10c..df9d1118d96ffffbc3ae91398558986dbefe9d12 100644 --- a/llvm/test/MC/AArch64/SME/smstop.s +++ b/llvm/test/MC/AArch64/SME/smstop.s @@ -1,38 +1,28 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ -// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme < %s \ // RUN: | llvm-objdump -d --mattr=+sme - | FileCheck %s --check-prefix=CHECK-INST // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme < %s \ -// RUN: | llvm-objdump -d --mattr=-sme - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: | llvm-objdump -d --mattr=-sme - | FileCheck %s --check-prefix=CHECK-INST smstop // CHECK-INST: smstop // CHECK-ENCODING: [0x7f,0x46,0x03,0xd5] -// CHECK-ERROR: instruction requires: sme -// CHECK-UNKNOWN: d503467f msr S0_3_C4_C6_3, xzr smstop sm // CHECK-INST: smstop sm // CHECK-ENCODING: [0x7f,0x42,0x03,0xd5] -// CHECK-ERROR: instruction requires: sme -// CHECK-UNKNOWN: d503427f msr S0_3_C4_C2_3, xzr smstop za // CHECK-INST: smstop za // CHECK-ENCODING: [0x7f,0x44,0x03,0xd5] -// CHECK-ERROR: instruction requires: sme -// CHECK-UNKNOWN: d503447f msr S0_3_C4_C4_3, xzr smstop SM // CHECK-INST: smstop sm // CHECK-ENCODING: [0x7f,0x42,0x03,0xd5] -// CHECK-ERROR: instruction requires: sme -// CHECK-UNKNOWN: d503427f msr S0_3_C4_C2_3, xzr smstop ZA // CHECK-INST: smstop za // CHECK-ENCODING: [0x7f,0x44,0x03,0xd5] -// CHECK-ERROR: instruction requires: sme -// CHECK-UNKNOWN: d503447f msr S0_3_C4_C4_3, xzr diff --git a/llvm/test/MC/AArch64/SME/system-regs.s b/llvm/test/MC/AArch64/SME/system-regs.s index 48033d026c44bde5b28693fde41de3b22a049bf8..093b64a70fb9316f47b09d40dfb5fe9bf5293466 100644 --- a/llvm/test/MC/AArch64/SME/system-regs.s +++ b/llvm/test/MC/AArch64/SME/system-regs.s @@ -119,37 +119,37 @@ msr SVCRSM, #0 // CHECK-INST: smstop sm // CHECK-ENCODING: [0x7f,0x42,0x03,0xd5] // CHECK-ERROR: expected writable system register or pstate -// CHECK-UNKNOWN: d503427f msr S0_3_C4_C2_3, xzr +// CHECK-UNKNOWN: d503427f smstop sm msr SVCRSM, #1 // CHECK-INST: smstart // CHECK-ENCODING: [0x7f,0x43,0x03,0xd5] // CHECK-ERROR: expected writable system register or pstate -// CHECK-UNKNOWN: d503437f msr S0_3_C4_C3_3, xzr +// CHECK-UNKNOWN: d503437f smstart msr SVCRZA, #0 // CHECK-INST: smstop za // CHECK-ENCODING: [0x7f,0x44,0x03,0xd5] // CHECK-ERROR: expected writable system register or pstate -// CHECK-UNKNOWN: d503447f msr S0_3_C4_C4_3, xzr +// CHECK-UNKNOWN: d503447f smstop za msr SVCRZA, #1 // CHECK-INST: smstart za // CHECK-ENCODING: [0x7f,0x45,0x03,0xd5] // CHECK-ERROR: expected writable system register or pstate -// CHECK-UNKNOWN: d503457f msr S0_3_C4_C5_3, xzr +// CHECK-UNKNOWN: d503457f smstart za msr SVCRSMZA, #0 // CHECK-INST: smstop // CHECK-ENCODING: [0x7f,0x46,0x03,0xd5] // CHECK-ERROR: expected writable system register or pstate -// CHECK-UNKNOWN: d503467f msr S0_3_C4_C6_3, xzr +// CHECK-UNKNOWN: d503467f smstop msr SVCRSMZA, #1 // CHECK-INST: smstart // CHECK-ENCODING: [0x7f,0x47,0x03,0xd5] // CHECK-ERROR: expected writable system register or pstate -// CHECK-UNKNOWN: d503477f msr S0_3_C4_C7_3, xzr +// CHECK-UNKNOWN: d503477f smstart msr TPIDR2_EL0, x3 // CHECK-INST: msr TPIDR2_EL0, x3 diff --git a/llvm/test/MC/AArch64/SVE/cntp-diagnostics.s b/llvm/test/MC/AArch64/SVE/cntp-diagnostics.s index 9c4173f50dee0193eff34676d313edc4d9578c0f..99b38289ade736f1088745c6ffd3486fa1646485 100644 --- a/llvm/test/MC/AArch64/SVE/cntp-diagnostics.s +++ b/llvm/test/MC/AArch64/SVE/cntp-diagnostics.s @@ -1,7 +1,7 @@ // RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve 2>&1 < %s| FileCheck %s cntp sp -// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction // CHECK-NEXT: cntp sp // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SVE/predicate-as-counter-aliases.s b/llvm/test/MC/AArch64/SVE/predicate-as-counter-aliases.s index bca2cf913ff64a2818ed1e1e3e2e184675236417..fd6c59888e097d3d5085907f3e2863fcdd9d7887 100644 --- a/llvm/test/MC/AArch64/SVE/predicate-as-counter-aliases.s +++ b/llvm/test/MC/AArch64/SVE/predicate-as-counter-aliases.s @@ -1,50 +1,50 @@ -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1 < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR -// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p1 < %s \ -// RUN: | llvm-objdump --no-print-imm-hex -d --mattr=+sve2p1 - | FileCheck %s --check-prefix=CHECK-INST -// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p1 < %s \ +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve < %s \ +// RUN: | llvm-objdump --no-print-imm-hex -d --mattr=+sve - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve < %s \ // RUN: | llvm-objdump --no-print-imm-hex -d --mattr=-sve - | FileCheck %s --check-prefix=CHECK-UNKNOWN ldr pn0, [x0] // CHECK-INST: ldr p0, [x0] // CHECK-ENCODING: [0x00,0x00,0x80,0x85] -// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-ERROR: instruction requires: sve or sme // CHECK-UNKNOWN: 85800000 ldr pn5, [x10, #255, mul vl] // CHECK-INST: ldr p5, [x10, #255, mul vl] // CHECK-ENCODING: [0x45,0x1d,0x9f,0x85] -// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-ERROR: instruction requires: sve or sme // CHECK-UNKNOWN: 859f1d45 str pn0, [x0] // CHECK-INST: str p0, [x0] // CHECK-ENCODING: [0x00,0x00,0x80,0xe5] -// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-ERROR: instruction requires: sve or sme // CHECK-UNKNOWN: e5800000 str pn5, [x10, #255, mul vl] // CHECK-INST: str p5, [x10, #255, mul vl] // CHECK-ENCODING: [0x45,0x1d,0x9f,0xe5] -// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-ERROR: instruction requires: sve or sme // CHECK-UNKNOWN: e59f1d45 mov pn0.b, pn0.b // CHECK-INST: mov p0.b, p0.b // CHECK-ENCODING: [0x00,0x40,0x80,0x25] -// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-ERROR: instruction requires: sve or sme // CHECK-UNKNOWN: 25804000 pfalse pn15.b // CHECK-INST: pfalse p15.b // CHECK-ENCODING: [0x0f,0xe4,0x18,0x25] -// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-ERROR: instruction requires: sve or sme // CHECK-UNKNOWN: 2518e40f diff --git a/llvm/test/Transforms/Inline/AArch64/sme-pstatesm-attrs.ll b/llvm/test/Transforms/Inline/AArch64/sme-pstatesm-attrs.ll index 4e7074ec75dd63ee5263734090056d588be37f71..580fabfd00cb449ad3124c27b09778ebd145f9f4 100644 --- a/llvm/test/Transforms/Inline/AArch64/sme-pstatesm-attrs.ll +++ b/llvm/test/Transforms/Inline/AArch64/sme-pstatesm-attrs.ll @@ -1,45 +1,74 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 ; RUN: opt < %s -mtriple=aarch64-unknown-linux-gnu -mattr=+sme -S -passes=inline | FileCheck %s -declare void @inlined_body() "aarch64_pstate_sm_compatible"; +declare i32 @llvm.vscale.i32() -; -; Define some functions that will be called by the functions below. -; These just call a '...body()' function. If we see the call to one of -; these functions being replaced by '...body()', then we know it has been -; inlined. -; +; Define some functions that merely call llvm.vscale.i32(), which will be called +; by the other functions below. If we see the call to one of these functions +; being replaced by 'llvm.vscale()', then we know it has been inlined. -define void @normal_callee() { +define i32 @normal_callee() { +; CHECK-LABEL: define i32 @normal_callee +; CHECK-SAME: () #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: ret i32 [[RES]] +; entry: - call void @inlined_body() - ret void + %res = call i32 @llvm.vscale.i32() + ret i32 %res } -define void @streaming_callee() "aarch64_pstate_sm_enabled" { +define i32 @streaming_callee() "aarch64_pstate_sm_enabled" { +; CHECK-LABEL: define i32 @streaming_callee +; CHECK-SAME: () #[[ATTR2:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: ret i32 [[RES]] +; entry: - call void @inlined_body() - ret void + %res = call i32 @llvm.vscale.i32() + ret i32 %res } -define void @locally_streaming_callee() "aarch64_pstate_sm_body" { +define i32 @locally_streaming_callee() "aarch64_pstate_sm_body" { +; CHECK-LABEL: define i32 @locally_streaming_callee +; CHECK-SAME: () #[[ATTR3:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: ret i32 [[RES]] +; entry: - call void @inlined_body() - ret void + %res = call i32 @llvm.vscale.i32() + ret i32 %res } -define void @streaming_compatible_callee() "aarch64_pstate_sm_compatible" { +define i32 @streaming_compatible_callee() "aarch64_pstate_sm_compatible" { +; CHECK-LABEL: define i32 @streaming_compatible_callee +; CHECK-SAME: () #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: ret i32 [[RES]] +; entry: - call void @inlined_body() - ret void + %res = call i32 @llvm.vscale.i32() + ret i32 %res } -define void @streaming_compatible_locally_streaming_callee() "aarch64_pstate_sm_compatible" "aarch64_pstate_sm_body" { +declare i32 @llvm.vscale() + +define i32 @streaming_compatible_locally_streaming_callee() "aarch64_pstate_sm_compatible" "aarch64_pstate_sm_body" { +; CHECK-LABEL: define i32 @streaming_compatible_locally_streaming_callee +; CHECK-SAME: () #[[ATTR4:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: ret i32 [[RES]] +; entry: - call void @inlined_body() - ret void + %res = call i32 @llvm.vscale() + ret i32 %res } -; ; Now test that inlining only happens when their streaming modes match. ; Test for a number of combinations, where: ; N Normal-interface (PSTATE.SM=0 on entry/exit) @@ -57,12 +86,16 @@ entry: ; [ ] N -> SC ; [ ] N -> N + B ; [ ] N -> SC + B -define void @normal_caller_normal_callee_inline() { -; CHECK-LABEL: @normal_caller_normal_callee_inline( -; CHECK: call void @inlined_body() +define i32 @normal_caller_normal_callee_inline() { +; CHECK-LABEL: define i32 @normal_caller_normal_callee_inline +; CHECK-SAME: () #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RES_I:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: ret i32 [[RES_I]] +; entry: - call void @normal_callee() - ret void + %res = call i32 @normal_callee() + ret i32 %res } ; [ ] N -> N @@ -70,12 +103,16 @@ entry: ; [ ] N -> SC ; [ ] N -> N + B ; [ ] N -> SC + B -define void @normal_caller_streaming_callee_dont_inline() { -; CHECK-LABEL: @normal_caller_streaming_callee_dont_inline( -; CHECK: call void @streaming_callee() +define i32 @normal_caller_streaming_callee_dont_inline() { +; CHECK-LABEL: define i32 @normal_caller_streaming_callee_dont_inline +; CHECK-SAME: () #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RES:%.*]] = call i32 @streaming_callee() +; CHECK-NEXT: ret i32 [[RES]] +; entry: - call void @streaming_callee() - ret void + %res = call i32 @streaming_callee() + ret i32 %res } ; [ ] N -> N @@ -83,12 +120,16 @@ entry: ; [x] N -> SC ; [ ] N -> N + B ; [ ] N -> SC + B -define void @normal_caller_streaming_compatible_callee_inline() { -; CHECK-LABEL: @normal_caller_streaming_compatible_callee_inline( -; CHECK: call void @inlined_body() +define i32 @normal_caller_streaming_compatible_callee_inline() { +; CHECK-LABEL: define i32 @normal_caller_streaming_compatible_callee_inline +; CHECK-SAME: () #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RES_I:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: ret i32 [[RES_I]] +; entry: - call void @streaming_compatible_callee() - ret void + %res = call i32 @streaming_compatible_callee() + ret i32 %res } ; [ ] N -> N @@ -96,12 +137,16 @@ entry: ; [ ] N -> SC ; [x] N -> N + B ; [ ] N -> SC + B -define void @normal_caller_locally_streaming_callee_dont_inline() { -; CHECK-LABEL: @normal_caller_locally_streaming_callee_dont_inline( -; CHECK: call void @locally_streaming_callee() +define i32 @normal_caller_locally_streaming_callee_dont_inline() { +; CHECK-LABEL: define i32 @normal_caller_locally_streaming_callee_dont_inline +; CHECK-SAME: () #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RES:%.*]] = call i32 @locally_streaming_callee() +; CHECK-NEXT: ret i32 [[RES]] +; entry: - call void @locally_streaming_callee() - ret void + %res = call i32 @locally_streaming_callee() + ret i32 %res } ; [ ] N -> N @@ -109,12 +154,16 @@ entry: ; [ ] N -> SC ; [ ] N -> N + B ; [x] N -> SC + B -define void @normal_caller_streaming_compatible_locally_streaming_callee_dont_inline() { -; CHECK-LABEL: @normal_caller_streaming_compatible_locally_streaming_callee_dont_inline( -; CHECK: call void @streaming_compatible_locally_streaming_callee() +define i32 @normal_caller_streaming_compatible_locally_streaming_callee_dont_inline() { +; CHECK-LABEL: define i32 @normal_caller_streaming_compatible_locally_streaming_callee_dont_inline +; CHECK-SAME: () #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RES:%.*]] = call i32 @streaming_compatible_locally_streaming_callee() +; CHECK-NEXT: ret i32 [[RES]] +; entry: - call void @streaming_compatible_locally_streaming_callee() - ret void + %res = call i32 @streaming_compatible_locally_streaming_callee() + ret i32 %res } ; [x] S -> N @@ -122,12 +171,16 @@ entry: ; [ ] S -> SC ; [ ] S -> N + B ; [ ] S -> SC + B -define void @streaming_caller_normal_callee_dont_inline() "aarch64_pstate_sm_enabled" { -; CHECK-LABEL: @streaming_caller_normal_callee_dont_inline( -; CHECK: call void @normal_callee() +define i32 @streaming_caller_normal_callee_dont_inline() "aarch64_pstate_sm_enabled" { +; CHECK-LABEL: define i32 @streaming_caller_normal_callee_dont_inline +; CHECK-SAME: () #[[ATTR2]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RES:%.*]] = call i32 @normal_callee() +; CHECK-NEXT: ret i32 [[RES]] +; entry: - call void @normal_callee() - ret void + %res = call i32 @normal_callee() + ret i32 %res } ; [ ] S -> N @@ -135,12 +188,16 @@ entry: ; [ ] S -> SC ; [ ] S -> N + B ; [ ] S -> SC + B -define void @streaming_caller_streaming_callee_inline() "aarch64_pstate_sm_enabled" { -; CHECK-LABEL: @streaming_caller_streaming_callee_inline( -; CHECK: call void @inlined_body() +define i32 @streaming_caller_streaming_callee_inline() "aarch64_pstate_sm_enabled" { +; CHECK-LABEL: define i32 @streaming_caller_streaming_callee_inline +; CHECK-SAME: () #[[ATTR2]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RES_I:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: ret i32 [[RES_I]] +; entry: - call void @streaming_callee() - ret void + %res = call i32 @streaming_callee() + ret i32 %res } ; [ ] S -> N @@ -148,12 +205,16 @@ entry: ; [x] S -> SC ; [ ] S -> N + B ; [ ] S -> SC + B -define void @streaming_caller_streaming_compatible_callee_inline() "aarch64_pstate_sm_enabled" { -; CHECK-LABEL: @streaming_caller_streaming_compatible_callee_inline( -; CHECK: call void @inlined_body() +define i32 @streaming_caller_streaming_compatible_callee_inline() "aarch64_pstate_sm_enabled" { +; CHECK-LABEL: define i32 @streaming_caller_streaming_compatible_callee_inline +; CHECK-SAME: () #[[ATTR2]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RES_I:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: ret i32 [[RES_I]] +; entry: - call void @streaming_compatible_callee() - ret void + %res = call i32 @streaming_compatible_callee() + ret i32 %res } ; [ ] S -> N @@ -161,12 +222,16 @@ entry: ; [ ] S -> SC ; [x] S -> N + B ; [ ] S -> SC + B -define void @streaming_caller_locally_streaming_callee_inline() "aarch64_pstate_sm_enabled" { -; CHECK-LABEL: @streaming_caller_locally_streaming_callee_inline( -; CHECK: call void @inlined_body() +define i32 @streaming_caller_locally_streaming_callee_inline() "aarch64_pstate_sm_enabled" { +; CHECK-LABEL: define i32 @streaming_caller_locally_streaming_callee_inline +; CHECK-SAME: () #[[ATTR2]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RES_I:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: ret i32 [[RES_I]] +; entry: - call void @locally_streaming_callee() - ret void + %res = call i32 @locally_streaming_callee() + ret i32 %res } ; [ ] S -> N @@ -174,12 +239,16 @@ entry: ; [ ] S -> SC ; [ ] S -> N + B ; [x] S -> SC + B -define void @streaming_caller_streaming_compatible_locally_streaming_callee_inline() "aarch64_pstate_sm_enabled" { -; CHECK-LABEL: @streaming_caller_streaming_compatible_locally_streaming_callee_inline( -; CHECK: call void @inlined_body() +define i32 @streaming_caller_streaming_compatible_locally_streaming_callee_inline() "aarch64_pstate_sm_enabled" { +; CHECK-LABEL: define i32 @streaming_caller_streaming_compatible_locally_streaming_callee_inline +; CHECK-SAME: () #[[ATTR2]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RES_I:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: ret i32 [[RES_I]] +; entry: - call void @streaming_compatible_locally_streaming_callee() - ret void + %res = call i32 @streaming_compatible_locally_streaming_callee() + ret i32 %res } ; [x] N + B -> N @@ -187,12 +256,16 @@ entry: ; [ ] N + B -> SC ; [ ] N + B -> N + B ; [ ] N + B -> SC + B -define void @locally_streaming_caller_normal_callee_dont_inline() "aarch64_pstate_sm_body" { -; CHECK-LABEL: @locally_streaming_caller_normal_callee_dont_inline( -; CHECK: call void @normal_callee() +define i32 @locally_streaming_caller_normal_callee_dont_inline() "aarch64_pstate_sm_body" { +; CHECK-LABEL: define i32 @locally_streaming_caller_normal_callee_dont_inline +; CHECK-SAME: () #[[ATTR3]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RES:%.*]] = call i32 @normal_callee() +; CHECK-NEXT: ret i32 [[RES]] +; entry: - call void @normal_callee() - ret void + %res = call i32 @normal_callee() + ret i32 %res } ; [ ] N + B -> N @@ -200,12 +273,16 @@ entry: ; [ ] N + B -> SC ; [ ] N + B -> N + B ; [ ] N + B -> SC + B -define void @locally_streaming_caller_streaming_callee_inline() "aarch64_pstate_sm_body" { -; CHECK-LABEL: @locally_streaming_caller_streaming_callee_inline( -; CHECK: call void @inlined_body() +define i32 @locally_streaming_caller_streaming_callee_inline() "aarch64_pstate_sm_body" { +; CHECK-LABEL: define i32 @locally_streaming_caller_streaming_callee_inline +; CHECK-SAME: () #[[ATTR3]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RES_I:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: ret i32 [[RES_I]] +; entry: - call void @streaming_callee() - ret void + %res = call i32 @streaming_callee() + ret i32 %res } ; [ ] N + B -> N @@ -213,12 +290,16 @@ entry: ; [x] N + B -> SC ; [ ] N + B -> N + B ; [ ] N + B -> SC + B -define void @locally_streaming_caller_streaming_compatible_callee_inline() "aarch64_pstate_sm_body" { -; CHECK-LABEL: @locally_streaming_caller_streaming_compatible_callee_inline( -; CHECK: call void @inlined_body() +define i32 @locally_streaming_caller_streaming_compatible_callee_inline() "aarch64_pstate_sm_body" { +; CHECK-LABEL: define i32 @locally_streaming_caller_streaming_compatible_callee_inline +; CHECK-SAME: () #[[ATTR3]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RES_I:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: ret i32 [[RES_I]] +; entry: - call void @streaming_compatible_callee() - ret void + %res = call i32 @streaming_compatible_callee() + ret i32 %res } ; [ ] N + B -> N @@ -226,12 +307,16 @@ entry: ; [ ] N + B -> SC ; [x] N + B -> N + B ; [ ] N + B -> SC + B -define void @locally_streaming_caller_locally_streaming_callee_inline() "aarch64_pstate_sm_body" { -; CHECK-LABEL: @locally_streaming_caller_locally_streaming_callee_inline( -; CHECK: call void @inlined_body() +define i32 @locally_streaming_caller_locally_streaming_callee_inline() "aarch64_pstate_sm_body" { +; CHECK-LABEL: define i32 @locally_streaming_caller_locally_streaming_callee_inline +; CHECK-SAME: () #[[ATTR3]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RES_I:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: ret i32 [[RES_I]] +; entry: - call void @locally_streaming_callee() - ret void + %res = call i32 @locally_streaming_callee() + ret i32 %res } ; [ ] N + B -> N @@ -239,12 +324,16 @@ entry: ; [ ] N + B -> SC ; [ ] N + B -> N + B ; [x] N + B -> SC + B -define void @locally_streaming_caller_streaming_compatible_locally_streaming_callee_inline() "aarch64_pstate_sm_body" { -; CHECK-LABEL: @locally_streaming_caller_streaming_compatible_locally_streaming_callee_inline( -; CHECK: call void @inlined_body() +define i32 @locally_streaming_caller_streaming_compatible_locally_streaming_callee_inline() "aarch64_pstate_sm_body" { +; CHECK-LABEL: define i32 @locally_streaming_caller_streaming_compatible_locally_streaming_callee_inline +; CHECK-SAME: () #[[ATTR3]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RES_I:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: ret i32 [[RES_I]] +; entry: - call void @streaming_compatible_locally_streaming_callee() - ret void + %res = call i32 @streaming_compatible_locally_streaming_callee() + ret i32 %res } ; [x] SC -> N @@ -252,12 +341,16 @@ entry: ; [ ] SC -> SC ; [ ] SC -> N + B ; [ ] SC -> SC + B -define void @streaming_compatible_caller_normal_callee_dont_inline() "aarch64_pstate_sm_compatible" { -; CHECK-LABEL: @streaming_compatible_caller_normal_callee_dont_inline( -; CHECK: call void @normal_callee() +define i32 @streaming_compatible_caller_normal_callee_dont_inline() "aarch64_pstate_sm_compatible" { +; CHECK-LABEL: define i32 @streaming_compatible_caller_normal_callee_dont_inline +; CHECK-SAME: () #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RES:%.*]] = call i32 @normal_callee() +; CHECK-NEXT: ret i32 [[RES]] +; entry: - call void @normal_callee() - ret void + %res = call i32 @normal_callee() + ret i32 %res } ; [ ] SC -> N @@ -265,12 +358,16 @@ entry: ; [ ] SC -> SC ; [ ] SC -> N + B ; [ ] SC -> SC + B -define void @streaming_compatible_caller_streaming_callee_dont_inline() "aarch64_pstate_sm_compatible" { -; CHECK-LABEL: @streaming_compatible_caller_streaming_callee_dont_inline( -; CHECK: call void @streaming_callee() +define i32 @streaming_compatible_caller_streaming_callee_dont_inline() "aarch64_pstate_sm_compatible" { +; CHECK-LABEL: define i32 @streaming_compatible_caller_streaming_callee_dont_inline +; CHECK-SAME: () #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RES:%.*]] = call i32 @streaming_callee() +; CHECK-NEXT: ret i32 [[RES]] +; entry: - call void @streaming_callee() - ret void + %res = call i32 @streaming_callee() + ret i32 %res } ; [ ] SC -> N @@ -278,12 +375,16 @@ entry: ; [x] SC -> SC ; [ ] SC -> N + B ; [ ] SC -> SC + B -define void @streaming_compatible_caller_streaming_compatible_callee_inline() "aarch64_pstate_sm_compatible" { -; CHECK-LABEL: @streaming_compatible_caller_streaming_compatible_callee_inline( -; CHECK: call void @inlined_body() +define i32 @streaming_compatible_caller_streaming_compatible_callee_inline() "aarch64_pstate_sm_compatible" { +; CHECK-LABEL: define i32 @streaming_compatible_caller_streaming_compatible_callee_inline +; CHECK-SAME: () #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RES_I:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: ret i32 [[RES_I]] +; entry: - call void @streaming_compatible_callee() - ret void + %res = call i32 @streaming_compatible_callee() + ret i32 %res } ; [ ] SC -> N @@ -291,12 +392,16 @@ entry: ; [ ] SC -> SC ; [x] SC -> N + B ; [ ] SC -> SC + B -define void @streaming_compatible_caller_locally_streaming_callee_dont_inline() "aarch64_pstate_sm_compatible" { -; CHECK-LABEL: @streaming_compatible_caller_locally_streaming_callee_dont_inline( -; CHECK: call void @locally_streaming_callee() +define i32 @streaming_compatible_caller_locally_streaming_callee_dont_inline() "aarch64_pstate_sm_compatible" { +; CHECK-LABEL: define i32 @streaming_compatible_caller_locally_streaming_callee_dont_inline +; CHECK-SAME: () #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RES:%.*]] = call i32 @locally_streaming_callee() +; CHECK-NEXT: ret i32 [[RES]] +; entry: - call void @locally_streaming_callee() - ret void + %res = call i32 @locally_streaming_callee() + ret i32 %res } ; [ ] SC -> N @@ -304,24 +409,32 @@ entry: ; [ ] SC -> SC ; [ ] SC -> N + B ; [x] SC -> SC + B -define void @streaming_compatible_caller_streaming_compatible_locally_streaming_callee_dont_inline() "aarch64_pstate_sm_compatible" { -; CHECK-LABEL: @streaming_compatible_caller_streaming_compatible_locally_streaming_callee_dont_inline( -; CHECK: call void @streaming_compatible_locally_streaming_callee() +define i32 @streaming_compatible_caller_streaming_compatible_locally_streaming_callee_dont_inline() "aarch64_pstate_sm_compatible" { +; CHECK-LABEL: define i32 @streaming_compatible_caller_streaming_compatible_locally_streaming_callee_dont_inline +; CHECK-SAME: () #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RES:%.*]] = call i32 @streaming_compatible_locally_streaming_callee() +; CHECK-NEXT: ret i32 [[RES]] +; entry: - call void @streaming_compatible_locally_streaming_callee() - ret void + %res = call i32 @streaming_compatible_locally_streaming_callee() + ret i32 %res } ; [x] SC + B -> N ; [ ] SC + B -> S ; [ ] SC + B -> SC ; [ ] SC + B -> N + B ; [ ] SC + B -> SC + B -define void @streaming_compatible_locally_streaming_caller_normal_callee_dont_inline() "aarch64_pstate_sm_compatible" "aarch64_pstate_sm_body" { -; CHECK-LABEL: @streaming_compatible_locally_streaming_caller_normal_callee_dont_inline( -; CHECK: call void @normal_callee() +define i32 @streaming_compatible_locally_streaming_caller_normal_callee_dont_inline() "aarch64_pstate_sm_compatible" "aarch64_pstate_sm_body" { +; CHECK-LABEL: define i32 @streaming_compatible_locally_streaming_caller_normal_callee_dont_inline +; CHECK-SAME: () #[[ATTR4]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RES:%.*]] = call i32 @normal_callee() +; CHECK-NEXT: ret i32 [[RES]] +; entry: - call void @normal_callee() - ret void + %res = call i32 @normal_callee() + ret i32 %res } ; [ ] SC + B -> N @@ -329,12 +442,16 @@ entry: ; [ ] SC + B -> SC ; [ ] SC + B -> N + B ; [ ] SC + B -> SC + B -define void @streaming_compatible_locally_streaming_caller_streaming_callee_inline() "aarch64_pstate_sm_compatible" "aarch64_pstate_sm_body" { -; CHECK-LABEL: @streaming_compatible_locally_streaming_caller_streaming_callee_inline( -; CHECK: call void @inlined_body() +define i32 @streaming_compatible_locally_streaming_caller_streaming_callee_inline() "aarch64_pstate_sm_compatible" "aarch64_pstate_sm_body" { +; CHECK-LABEL: define i32 @streaming_compatible_locally_streaming_caller_streaming_callee_inline +; CHECK-SAME: () #[[ATTR4]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RES_I:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: ret i32 [[RES_I]] +; entry: - call void @streaming_callee() - ret void + %res = call i32 @streaming_callee() + ret i32 %res } ; [ ] SC + B -> N @@ -342,12 +459,16 @@ entry: ; [x] SC + B -> SC ; [ ] SC + B -> N + B ; [ ] SC + B -> SC + B -define void @streaming_compatible_locally_streaming_caller_streaming_compatible_callee_inline() "aarch64_pstate_sm_compatible" "aarch64_pstate_sm_body" { -; CHECK-LABEL: @streaming_compatible_locally_streaming_caller_streaming_compatible_callee_inline( -; CHECK: call void @inlined_body() +define i32 @streaming_compatible_locally_streaming_caller_streaming_compatible_callee_inline() "aarch64_pstate_sm_compatible" "aarch64_pstate_sm_body" { +; CHECK-LABEL: define i32 @streaming_compatible_locally_streaming_caller_streaming_compatible_callee_inline +; CHECK-SAME: () #[[ATTR4]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RES_I:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: ret i32 [[RES_I]] +; entry: - call void @streaming_compatible_callee() - ret void + %res = call i32 @streaming_compatible_callee() + ret i32 %res } ; [ ] SC + B -> N @@ -355,12 +476,16 @@ entry: ; [ ] SC + B -> SC ; [x] SC + B -> N + B ; [ ] SC + B -> SC + B -define void @streaming_compatible_locally_streaming_caller_locally_streaming_callee_inline() "aarch64_pstate_sm_compatible" "aarch64_pstate_sm_body" { -; CHECK-LABEL: @streaming_compatible_locally_streaming_caller_locally_streaming_callee_inline( -; CHECK: call void @inlined_body() +define i32 @streaming_compatible_locally_streaming_caller_locally_streaming_callee_inline() "aarch64_pstate_sm_compatible" "aarch64_pstate_sm_body" { +; CHECK-LABEL: define i32 @streaming_compatible_locally_streaming_caller_locally_streaming_callee_inline +; CHECK-SAME: () #[[ATTR4]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RES_I:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: ret i32 [[RES_I]] +; entry: - call void @locally_streaming_callee() - ret void + %res = call i32 @locally_streaming_callee() + ret i32 %res } ; [ ] SC + B -> N @@ -368,10 +493,92 @@ entry: ; [ ] SC + B -> SC ; [ ] SC + B -> N + B ; [x] SC + B -> SC + B -define void @streaming_compatible_locally_streaming_caller_and_callee_inline() "aarch64_pstate_sm_compatible" "aarch64_pstate_sm_body" { -; CHECK-LABEL: @streaming_compatible_locally_streaming_caller_and_callee_inline( -; CHECK: call void @inlined_body() +define i32 @streaming_compatible_locally_streaming_caller_and_callee_inline() "aarch64_pstate_sm_compatible" "aarch64_pstate_sm_body" { +; CHECK-LABEL: define i32 @streaming_compatible_locally_streaming_caller_and_callee_inline +; CHECK-SAME: () #[[ATTR4]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RES_I:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: ret i32 [[RES_I]] +; entry: - call void @streaming_compatible_locally_streaming_callee() + %res = call i32 @streaming_compatible_locally_streaming_callee() + ret i32 %res +} + +define void @normal_callee_with_inlineasm() { +; CHECK-LABEL: define void @normal_callee_with_inlineasm +; CHECK-SAME: () #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: ret void +; +entry: + call void asm sideeffect "; inlineasm", ""() ret void } + +define void @streaming_caller_normal_callee_with_inlineasm_dont_inline() "aarch64_pstate_sm_enabled" { +; CHECK-LABEL: define void @streaming_caller_normal_callee_with_inlineasm_dont_inline +; CHECK-SAME: () #[[ATTR2]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: call void @normal_callee_with_inlineasm() +; CHECK-NEXT: ret void +; +entry: + call void @normal_callee_with_inlineasm() + ret void +} + +define i64 @normal_callee_with_intrinsic_call() { +; CHECK-LABEL: define i64 @normal_callee_with_intrinsic_call +; CHECK-SAME: () #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RES:%.*]] = call i64 @llvm.aarch64.sve.cntb(i32 4) +; CHECK-NEXT: ret i64 [[RES]] +; +entry: + %res = call i64 @llvm.aarch64.sve.cntb(i32 4) + ret i64 %res +} + +define i64 @streaming_caller_normal_callee_with_intrinsic_call_dont_inline() "aarch64_pstate_sm_enabled" { +; CHECK-LABEL: define i64 @streaming_caller_normal_callee_with_intrinsic_call_dont_inline +; CHECK-SAME: () #[[ATTR2]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RES:%.*]] = call i64 @normal_callee_with_intrinsic_call() +; CHECK-NEXT: ret i64 [[RES]] +; +entry: + %res = call i64 @normal_callee_with_intrinsic_call() + ret i64 %res +} + +declare i64 @llvm.aarch64.sve.cntb(i32) + +define i64 @normal_callee_call_sme_state() { +; CHECK-LABEL: define i64 @normal_callee_call_sme_state +; CHECK-SAME: () #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RES:%.*]] = call { i64, i64 } @__arm_sme_state() +; CHECK-NEXT: [[RES_0:%.*]] = extractvalue { i64, i64 } [[RES]], 0 +; CHECK-NEXT: ret i64 [[RES_0]] +; +entry: + %res = call {i64, i64} @__arm_sme_state() + %res.0 = extractvalue {i64, i64} %res, 0 + ret i64 %res.0 +} + +declare {i64, i64} @__arm_sme_state() + +define i64 @streaming_caller_normal_callee_call_sme_state_dont_inline() "aarch64_pstate_sm_enabled" { +; CHECK-LABEL: define i64 @streaming_caller_normal_callee_call_sme_state_dont_inline +; CHECK-SAME: () #[[ATTR2]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RES:%.*]] = call i64 @normal_callee_call_sme_state() +; CHECK-NEXT: ret i64 [[RES]] +; +entry: + %res = call i64 @normal_callee_call_sme_state() + ret i64 %res +} diff --git a/llvm/test/Transforms/Inline/AArch64/sme-pstateza-attrs.ll b/llvm/test/Transforms/Inline/AArch64/sme-pstateza-attrs.ll index bdc9e637fe90c2d1f12c5da0811925ee0c940bf4..816492768cc0f5a146777735b3282080815d39bc 100644 --- a/llvm/test/Transforms/Inline/AArch64/sme-pstateza-attrs.ll +++ b/llvm/test/Transforms/Inline/AArch64/sme-pstateza-attrs.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 ; RUN: opt -mtriple=aarch64-unknown-linux-gnu -mattr=+sme -S -passes=inline < %s | FileCheck %s declare void @inlined_body() @@ -10,18 +11,35 @@ declare void @inlined_body() ; define void @nonza_callee() { +; CHECK-LABEL: define void @nonza_callee +; CHECK-SAME: () #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: call void @inlined_body() +; CHECK-NEXT: ret void +; entry: call void @inlined_body() ret void } -define void @shared_za_callee() "aarch64_pstate_za_shared" { +define void @shared_za_callee() "aarch64_inout_za" { +; CHECK-LABEL: define void @shared_za_callee +; CHECK-SAME: () #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: call void @inlined_body() +; CHECK-NEXT: ret void +; entry: call void @inlined_body() ret void } -define void @new_za_callee() "aarch64_pstate_za_new" { +define void @new_za_callee() "aarch64_new_za" { +; CHECK-LABEL: define void @new_za_callee +; CHECK-SAME: () #[[ATTR2:[0-9]+]] { +; CHECK-NEXT: call void @inlined_body() +; CHECK-NEXT: ret void +; call void @inlined_body() ret void } @@ -37,8 +55,12 @@ define void @new_za_callee() "aarch64_pstate_za_new" { ; [ ] N -> S (This combination is invalid) ; [ ] N -> Z define void @nonza_caller_nonza_callee_inline() { -; CHECK-LABEL: @nonza_caller_nonza_callee_inline( -; CHECK: call void @inlined_body() +; CHECK-LABEL: define void @nonza_caller_nonza_callee_inline +; CHECK-SAME: () #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: call void @inlined_body() +; CHECK-NEXT: ret void +; entry: call void @nonza_callee() ret void @@ -48,8 +70,12 @@ entry: ; [ ] N -> S (This combination is invalid) ; [x] N -> Z define void @nonza_caller_new_za_callee_dont_inline() { -; CHECK-LABEL: @nonza_caller_new_za_callee_dont_inline( -; CHECK: call void @new_za_callee() +; CHECK-LABEL: define void @nonza_caller_new_za_callee_dont_inline +; CHECK-SAME: () #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: call void @new_za_callee() +; CHECK-NEXT: ret void +; entry: call void @new_za_callee() ret void @@ -58,9 +84,13 @@ entry: ; [x] Z -> N ; [ ] Z -> S ; [ ] Z -> Z -define void @new_za_caller_nonza_callee_dont_inline() "aarch64_pstate_za_new" { -; CHECK-LABEL: @new_za_caller_nonza_callee_dont_inline( -; CHECK: call void @nonza_callee() +define void @new_za_caller_nonza_callee_inline() "aarch64_new_za" { +; CHECK-LABEL: define void @new_za_caller_nonza_callee_inline +; CHECK-SAME: () #[[ATTR2]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: call void @inlined_body() +; CHECK-NEXT: ret void +; entry: call void @nonza_callee() ret void @@ -69,9 +99,13 @@ entry: ; [ ] Z -> N ; [x] Z -> S ; [ ] Z -> Z -define void @new_za_caller_shared_za_callee_inline() "aarch64_pstate_za_new" { -; CHECK-LABEL: @new_za_caller_shared_za_callee_inline( -; CHECK: call void @inlined_body() +define void @new_za_caller_shared_za_callee_inline() "aarch64_new_za" { +; CHECK-LABEL: define void @new_za_caller_shared_za_callee_inline +; CHECK-SAME: () #[[ATTR2]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: call void @inlined_body() +; CHECK-NEXT: ret void +; entry: call void @shared_za_callee() ret void @@ -80,9 +114,13 @@ entry: ; [ ] Z -> N ; [ ] Z -> S ; [x] Z -> Z -define void @new_za_caller_new_za_callee_dont_inline() "aarch64_pstate_za_new" { -; CHECK-LABEL: @new_za_caller_new_za_callee_dont_inline( -; CHECK: call void @new_za_callee() +define void @new_za_caller_new_za_callee_dont_inline() "aarch64_new_za" { +; CHECK-LABEL: define void @new_za_caller_new_za_callee_dont_inline +; CHECK-SAME: () #[[ATTR2]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: call void @new_za_callee() +; CHECK-NEXT: ret void +; entry: call void @new_za_callee() ret void @@ -91,9 +129,13 @@ entry: ; [x] Z -> N ; [ ] Z -> S ; [ ] Z -> Z -define void @shared_za_caller_nonza_callee_dont_inline() "aarch64_pstate_za_shared" { -; CHECK-LABEL: @shared_za_caller_nonza_callee_dont_inline( -; CHECK: call void @nonza_callee() +define void @shared_za_caller_nonza_callee_inline() "aarch64_inout_za" { +; CHECK-LABEL: define void @shared_za_caller_nonza_callee_inline +; CHECK-SAME: () #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: call void @inlined_body() +; CHECK-NEXT: ret void +; entry: call void @nonza_callee() ret void @@ -102,9 +144,13 @@ entry: ; [ ] S -> N ; [x] S -> Z ; [ ] S -> S -define void @shared_za_caller_new_za_callee_dont_inline() "aarch64_pstate_za_shared" { -; CHECK-LABEL: @shared_za_caller_new_za_callee_dont_inline( -; CHECK: call void @new_za_callee() +define void @shared_za_caller_new_za_callee_dont_inline() "aarch64_inout_za" { +; CHECK-LABEL: define void @shared_za_caller_new_za_callee_dont_inline +; CHECK-SAME: () #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: call void @new_za_callee() +; CHECK-NEXT: ret void +; entry: call void @new_za_callee() ret void @@ -113,10 +159,78 @@ entry: ; [ ] S -> N ; [ ] S -> Z ; [x] S -> S -define void @shared_za_caller_shared_za_callee_inline() "aarch64_pstate_za_shared" { -; CHECK-LABEL: @shared_za_caller_shared_za_callee_inline( -; CHECK: call void @inlined_body() +define void @shared_za_caller_shared_za_callee_inline() "aarch64_inout_za" { +; CHECK-LABEL: define void @shared_za_caller_shared_za_callee_inline +; CHECK-SAME: () #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: call void @inlined_body() +; CHECK-NEXT: ret void +; entry: call void @shared_za_callee() ret void } + +define void @private_za_callee_call_za_disable() { +; CHECK-LABEL: define void @private_za_callee_call_za_disable +; CHECK-SAME: () #[[ATTR0]] { +; CHECK-NEXT: call void @__arm_za_disable() +; CHECK-NEXT: ret void +; + call void @__arm_za_disable() + ret void +} + +define void @shared_za_caller_private_za_callee_call_za_disable() "aarch64_inout_za" { +; CHECK-LABEL: define void @shared_za_caller_private_za_callee_call_za_disable +; CHECK-SAME: () #[[ATTR1]] { +; CHECK-NEXT: call void @private_za_callee_call_za_disable() +; CHECK-NEXT: ret void +; + call void @private_za_callee_call_za_disable() + ret void +} + +define void @private_za_callee_call_tpidr2_save() { +; CHECK-LABEL: define void @private_za_callee_call_tpidr2_save +; CHECK-SAME: () #[[ATTR0]] { +; CHECK-NEXT: call void @__arm_tpidr2_save() +; CHECK-NEXT: ret void +; + call void @__arm_tpidr2_save() + ret void +} + +define void @shared_za_caller_private_za_callee_call_tpidr2_save_dont_inline() "aarch64_inout_za" { +; CHECK-LABEL: define void @shared_za_caller_private_za_callee_call_tpidr2_save_dont_inline +; CHECK-SAME: () #[[ATTR1]] { +; CHECK-NEXT: call void @private_za_callee_call_tpidr2_save() +; CHECK-NEXT: ret void +; + call void @private_za_callee_call_tpidr2_save() + ret void +} + +define void @private_za_callee_call_tpidr2_restore(ptr %ptr) { +; CHECK-LABEL: define void @private_za_callee_call_tpidr2_restore +; CHECK-SAME: (ptr [[PTR:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: call void @__arm_tpidr2_restore(ptr [[PTR]]) +; CHECK-NEXT: ret void +; + call void @__arm_tpidr2_restore(ptr %ptr) + ret void +} + +define void @shared_za_caller_private_za_callee_call_tpidr2_restore_dont_inline(ptr %ptr) "aarch64_inout_za" { +; CHECK-LABEL: define void @shared_za_caller_private_za_callee_call_tpidr2_restore_dont_inline +; CHECK-SAME: (ptr [[PTR:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: call void @private_za_callee_call_tpidr2_restore(ptr [[PTR]]) +; CHECK-NEXT: ret void +; + call void @private_za_callee_call_tpidr2_restore(ptr %ptr) + ret void +} + +declare void @__arm_za_disable() +declare void @__arm_tpidr2_save() +declare void @__arm_tpidr2_restore(ptr) diff --git a/llvm/test/Verifier/sme-attributes.ll b/llvm/test/Verifier/sme-attributes.ll index 7f788cfd09f07276277483447ed0120c9c4aaddf..c992cd7adcd88c02953a6098aed1b21c03ef1dc9 100644 --- a/llvm/test/Verifier/sme-attributes.ll +++ b/llvm/test/Verifier/sme-attributes.ll @@ -3,8 +3,32 @@ declare void @sm_attrs() "aarch64_pstate_sm_enabled" "aarch64_pstate_sm_compatible"; ; CHECK: Attributes 'aarch64_pstate_sm_enabled and aarch64_pstate_sm_compatible' are incompatible! -declare void @za_preserved() "aarch64_pstate_za_new" "aarch64_pstate_za_preserved"; -; CHECK: Attributes 'aarch64_pstate_za_new and aarch64_pstate_za_preserved' are incompatible! +declare void @za_new_preserved() "aarch64_new_za" "aarch64_preserves_za"; +; CHECK: Attributes 'aarch64_new_za', 'aarch64_in_za', 'aarch64_out_za', 'aarch64_inout_za' and 'aarch64_preserves_za' are mutually exclusive -declare void @za_shared() "aarch64_pstate_za_new" "aarch64_pstate_za_shared"; -; CHECK: Attributes 'aarch64_pstate_za_new and aarch64_pstate_za_shared' are incompatible! +declare void @za_new_in() "aarch64_new_za" "aarch64_in_za"; +; CHECK: Attributes 'aarch64_new_za', 'aarch64_in_za', 'aarch64_out_za', 'aarch64_inout_za' and 'aarch64_preserves_za' are mutually exclusive + +declare void @za_new_inout() "aarch64_new_za" "aarch64_inout_za"; +; CHECK: Attributes 'aarch64_new_za', 'aarch64_in_za', 'aarch64_out_za', 'aarch64_inout_za' and 'aarch64_preserves_za' are mutually exclusive + +declare void @za_new_out() "aarch64_new_za" "aarch64_out_za"; +; CHECK: Attributes 'aarch64_new_za', 'aarch64_in_za', 'aarch64_out_za', 'aarch64_inout_za' and 'aarch64_preserves_za' are mutually exclusive + +declare void @za_preserved_in() "aarch64_preserves_za" "aarch64_in_za"; +; CHECK: Attributes 'aarch64_new_za', 'aarch64_in_za', 'aarch64_out_za', 'aarch64_inout_za' and 'aarch64_preserves_za' are mutually exclusive + +declare void @za_preserved_inout() "aarch64_preserves_za" "aarch64_inout_za"; +; CHECK: Attributes 'aarch64_new_za', 'aarch64_in_za', 'aarch64_out_za', 'aarch64_inout_za' and 'aarch64_preserves_za' are mutually exclusive + +declare void @za_preserved_out() "aarch64_preserves_za" "aarch64_out_za"; +; CHECK: Attributes 'aarch64_new_za', 'aarch64_in_za', 'aarch64_out_za', 'aarch64_inout_za' and 'aarch64_preserves_za' are mutually exclusive + +declare void @za_in_inout() "aarch64_in_za" "aarch64_inout_za"; +; CHECK: Attributes 'aarch64_new_za', 'aarch64_in_za', 'aarch64_out_za', 'aarch64_inout_za' and 'aarch64_preserves_za' are mutually exclusive + +declare void @za_in_out() "aarch64_in_za" "aarch64_out_za"; +; CHECK: Attributes 'aarch64_new_za', 'aarch64_in_za', 'aarch64_out_za', 'aarch64_inout_za' and 'aarch64_preserves_za' are mutually exclusive + +declare void @za_inout_out() "aarch64_inout_za" "aarch64_out_za"; +; CHECK: Attributes 'aarch64_new_za', 'aarch64_in_za', 'aarch64_out_za', 'aarch64_inout_za' and 'aarch64_preserves_za' are mutually exclusive diff --git a/llvm/unittests/Target/AArch64/SMEAttributesTest.cpp b/llvm/unittests/Target/AArch64/SMEAttributesTest.cpp index 233c75a138ae8051e682c7254d99eac7f7294831..ab96ee147a9ae5d79b531c2205d5cb24e2c692ae 100644 --- a/llvm/unittests/Target/AArch64/SMEAttributesTest.cpp +++ b/llvm/unittests/Target/AArch64/SMEAttributesTest.cpp @@ -38,25 +38,25 @@ TEST(SMEAttributes, Constructors) { ->getFunction("foo")) .hasStreamingCompatibleInterface()); - ASSERT_TRUE(SA(*parseIR("declare void @foo() \"aarch64_pstate_za_shared\"") - ->getFunction("foo")) - .hasSharedZAInterface()); - - ASSERT_TRUE(SA(*parseIR("declare void @foo() \"aarch64_pstate_za_new\"") + ASSERT_TRUE( + SA(*parseIR("declare void @foo() \"aarch64_in_za\"")->getFunction("foo")) + .isInZA()); + ASSERT_TRUE( + SA(*parseIR("declare void @foo() \"aarch64_out_za\"")->getFunction("foo")) + .isOutZA()); + ASSERT_TRUE(SA(*parseIR("declare void @foo() \"aarch64_inout_za\"") ->getFunction("foo")) - .hasNewZAInterface()); - - ASSERT_TRUE(SA(*parseIR("declare void @foo() \"aarch64_pstate_za_preserved\"") + .isInOutZA()); + ASSERT_TRUE(SA(*parseIR("declare void @foo() \"aarch64_preserves_za\"") ->getFunction("foo")) - .preservesZA()); + .isPreservesZA()); + ASSERT_TRUE( + SA(*parseIR("declare void @foo() \"aarch64_new_za\"")->getFunction("foo")) + .isNewZA()); // Invalid combinations. EXPECT_DEBUG_DEATH(SA(SA::SM_Enabled | SA::SM_Compatible), "SM_Enabled and SM_Compatible are mutually exclusive"); - EXPECT_DEBUG_DEATH(SA(SA::ZA_New | SA::ZA_Shared), - "ZA_New and ZA_Shared are mutually exclusive"); - EXPECT_DEBUG_DEATH(SA(SA::ZA_New | SA::ZA_Preserved), - "ZA_New and ZA_Preserved are mutually exclusive"); // Test that the set() methods equally check validity. EXPECT_DEBUG_DEATH(SA(SA::SM_Enabled).set(SA::SM_Compatible), @@ -79,22 +79,69 @@ TEST(SMEAttributes, Basics) { ASSERT_TRUE(SA(SA::SM_Compatible | SA::SM_Body).hasStreamingBody()); ASSERT_FALSE(SA(SA::SM_Compatible | SA::SM_Body).hasNonStreamingInterface()); - // Test PSTATE.ZA interfaces. - ASSERT_FALSE(SA(SA::ZA_Shared).hasPrivateZAInterface()); - ASSERT_TRUE(SA(SA::ZA_Shared).hasSharedZAInterface()); - ASSERT_TRUE(SA(SA::ZA_Shared).hasZAState()); - ASSERT_FALSE(SA(SA::ZA_Shared).preservesZA()); - ASSERT_TRUE(SA(SA::ZA_Shared | SA::ZA_Preserved).preservesZA()); - - ASSERT_TRUE(SA(SA::ZA_New).hasPrivateZAInterface()); - ASSERT_TRUE(SA(SA::ZA_New).hasNewZAInterface()); - ASSERT_TRUE(SA(SA::ZA_New).hasZAState()); - ASSERT_FALSE(SA(SA::ZA_New).preservesZA()); - - ASSERT_TRUE(SA(SA::Normal).hasPrivateZAInterface()); - ASSERT_FALSE(SA(SA::Normal).hasNewZAInterface()); + // Test ZA State interfaces + SA ZA_In = SA(SA::encodeZAState(SA::StateValue::In)); + ASSERT_TRUE(ZA_In.isInZA()); + ASSERT_FALSE(ZA_In.isOutZA()); + ASSERT_FALSE(ZA_In.isInOutZA()); + ASSERT_FALSE(ZA_In.isPreservesZA()); + ASSERT_FALSE(ZA_In.isNewZA()); + ASSERT_TRUE(ZA_In.sharesZA()); + ASSERT_TRUE(ZA_In.hasZAState()); + ASSERT_TRUE(ZA_In.hasSharedZAInterface()); + ASSERT_FALSE(ZA_In.hasPrivateZAInterface()); + + SA ZA_Out = SA(SA::encodeZAState(SA::StateValue::Out)); + ASSERT_TRUE(ZA_Out.isOutZA()); + ASSERT_FALSE(ZA_Out.isInZA()); + ASSERT_FALSE(ZA_Out.isInOutZA()); + ASSERT_FALSE(ZA_Out.isPreservesZA()); + ASSERT_FALSE(ZA_Out.isNewZA()); + ASSERT_TRUE(ZA_Out.sharesZA()); + ASSERT_TRUE(ZA_Out.hasZAState()); + ASSERT_TRUE(ZA_Out.hasSharedZAInterface()); + ASSERT_FALSE(ZA_Out.hasPrivateZAInterface()); + + SA ZA_InOut = SA(SA::encodeZAState(SA::StateValue::InOut)); + ASSERT_TRUE(ZA_InOut.isInOutZA()); + ASSERT_FALSE(ZA_InOut.isInZA()); + ASSERT_FALSE(ZA_InOut.isOutZA()); + ASSERT_FALSE(ZA_InOut.isPreservesZA()); + ASSERT_FALSE(ZA_InOut.isNewZA()); + ASSERT_TRUE(ZA_InOut.sharesZA()); + ASSERT_TRUE(ZA_InOut.hasZAState()); + ASSERT_TRUE(ZA_InOut.hasSharedZAInterface()); + ASSERT_FALSE(ZA_InOut.hasPrivateZAInterface()); + + SA ZA_Preserved = SA(SA::encodeZAState(SA::StateValue::Preserved)); + ASSERT_TRUE(ZA_Preserved.isPreservesZA()); + ASSERT_FALSE(ZA_Preserved.isInZA()); + ASSERT_FALSE(ZA_Preserved.isOutZA()); + ASSERT_FALSE(ZA_Preserved.isInOutZA()); + ASSERT_FALSE(ZA_Preserved.isNewZA()); + ASSERT_TRUE(ZA_Preserved.sharesZA()); + ASSERT_TRUE(ZA_Preserved.hasZAState()); + ASSERT_TRUE(ZA_Preserved.hasSharedZAInterface()); + ASSERT_FALSE(ZA_Preserved.hasPrivateZAInterface()); + + SA ZA_New = SA(SA::encodeZAState(SA::StateValue::New)); + ASSERT_TRUE(ZA_New.isNewZA()); + ASSERT_FALSE(ZA_New.isInZA()); + ASSERT_FALSE(ZA_New.isOutZA()); + ASSERT_FALSE(ZA_New.isInOutZA()); + ASSERT_FALSE(ZA_New.isPreservesZA()); + ASSERT_FALSE(ZA_New.sharesZA()); + ASSERT_TRUE(ZA_New.hasZAState()); + ASSERT_FALSE(ZA_New.hasSharedZAInterface()); + ASSERT_TRUE(ZA_New.hasPrivateZAInterface()); + + ASSERT_FALSE(SA(SA::Normal).isInZA()); + ASSERT_FALSE(SA(SA::Normal).isOutZA()); + ASSERT_FALSE(SA(SA::Normal).isInOutZA()); + ASSERT_FALSE(SA(SA::Normal).isPreservesZA()); + ASSERT_FALSE(SA(SA::Normal).isNewZA()); + ASSERT_FALSE(SA(SA::Normal).sharesZA()); ASSERT_FALSE(SA(SA::Normal).hasZAState()); - ASSERT_FALSE(SA(SA::Normal).preservesZA()); } TEST(SMEAttributes, Transitions) { @@ -102,86 +149,51 @@ TEST(SMEAttributes, Transitions) { ASSERT_FALSE(SA(SA::Normal).requiresSMChange(SA(SA::Normal))); // Normal -> Normal + LocallyStreaming ASSERT_FALSE(SA(SA::Normal).requiresSMChange(SA(SA::Normal | SA::SM_Body))); - ASSERT_EQ(*SA(SA::Normal) - .requiresSMChange(SA(SA::Normal | SA::SM_Body), - /*BodyOverridesInterface=*/true), - true); // Normal -> Streaming - ASSERT_EQ(*SA(SA::Normal).requiresSMChange(SA(SA::SM_Enabled)), true); + ASSERT_TRUE(SA(SA::Normal).requiresSMChange(SA(SA::SM_Enabled))); // Normal -> Streaming + LocallyStreaming - ASSERT_EQ(*SA(SA::Normal).requiresSMChange(SA(SA::SM_Enabled | SA::SM_Body)), - true); - ASSERT_EQ(*SA(SA::Normal) - .requiresSMChange(SA(SA::SM_Enabled | SA::SM_Body), - /*BodyOverridesInterface=*/true), - true); + ASSERT_TRUE( + SA(SA::Normal).requiresSMChange(SA(SA::SM_Enabled | SA::SM_Body))); // Normal -> Streaming-compatible ASSERT_FALSE(SA(SA::Normal).requiresSMChange(SA(SA::SM_Compatible))); // Normal -> Streaming-compatible + LocallyStreaming ASSERT_FALSE( SA(SA::Normal).requiresSMChange(SA(SA::SM_Compatible | SA::SM_Body))); - ASSERT_EQ(*SA(SA::Normal) - .requiresSMChange(SA(SA::SM_Compatible | SA::SM_Body), - /*BodyOverridesInterface=*/true), - true); // Streaming -> Normal - ASSERT_EQ(*SA(SA::SM_Enabled).requiresSMChange(SA(SA::Normal)), false); + ASSERT_TRUE(SA(SA::SM_Enabled).requiresSMChange(SA(SA::Normal))); // Streaming -> Normal + LocallyStreaming - ASSERT_EQ(*SA(SA::SM_Enabled).requiresSMChange(SA(SA::Normal | SA::SM_Body)), - false); - ASSERT_FALSE(SA(SA::SM_Enabled) - .requiresSMChange(SA(SA::Normal | SA::SM_Body), - /*BodyOverridesInterface=*/true)); + ASSERT_TRUE( + SA(SA::SM_Enabled).requiresSMChange(SA(SA::Normal | SA::SM_Body))); // Streaming -> Streaming ASSERT_FALSE(SA(SA::SM_Enabled).requiresSMChange(SA(SA::SM_Enabled))); // Streaming -> Streaming + LocallyStreaming ASSERT_FALSE( SA(SA::SM_Enabled).requiresSMChange(SA(SA::SM_Enabled | SA::SM_Body))); - ASSERT_FALSE(SA(SA::SM_Enabled) - .requiresSMChange(SA(SA::SM_Enabled | SA::SM_Body), - /*BodyOverridesInterface=*/true)); // Streaming -> Streaming-compatible ASSERT_FALSE(SA(SA::SM_Enabled).requiresSMChange(SA(SA::SM_Compatible))); // Streaming -> Streaming-compatible + LocallyStreaming ASSERT_FALSE( SA(SA::SM_Enabled).requiresSMChange(SA(SA::SM_Compatible | SA::SM_Body))); - ASSERT_FALSE(SA(SA::SM_Enabled) - .requiresSMChange(SA(SA::SM_Compatible | SA::SM_Body), - /*BodyOverridesInterface=*/true)); // Streaming-compatible -> Normal - ASSERT_EQ(*SA(SA::SM_Compatible).requiresSMChange(SA(SA::Normal)), false); - ASSERT_EQ( - *SA(SA::SM_Compatible).requiresSMChange(SA(SA::Normal | SA::SM_Body)), - false); - ASSERT_EQ(*SA(SA::SM_Compatible) - .requiresSMChange(SA(SA::Normal | SA::SM_Body), - /*BodyOverridesInterface=*/true), - true); + ASSERT_TRUE(SA(SA::SM_Compatible).requiresSMChange(SA(SA::Normal))); + ASSERT_TRUE( + SA(SA::SM_Compatible).requiresSMChange(SA(SA::Normal | SA::SM_Body))); // Streaming-compatible -> Streaming - ASSERT_EQ(*SA(SA::SM_Compatible).requiresSMChange(SA(SA::SM_Enabled)), true); + ASSERT_TRUE(SA(SA::SM_Compatible).requiresSMChange(SA(SA::SM_Enabled))); // Streaming-compatible -> Streaming + LocallyStreaming - ASSERT_EQ( - *SA(SA::SM_Compatible).requiresSMChange(SA(SA::SM_Enabled | SA::SM_Body)), - true); - ASSERT_EQ(*SA(SA::SM_Compatible) - .requiresSMChange(SA(SA::SM_Enabled | SA::SM_Body), - /*BodyOverridesInterface=*/true), - true); + ASSERT_TRUE( + SA(SA::SM_Compatible).requiresSMChange(SA(SA::SM_Enabled | SA::SM_Body))); // Streaming-compatible -> Streaming-compatible ASSERT_FALSE(SA(SA::SM_Compatible).requiresSMChange(SA(SA::SM_Compatible))); // Streaming-compatible -> Streaming-compatible + LocallyStreaming ASSERT_FALSE(SA(SA::SM_Compatible) .requiresSMChange(SA(SA::SM_Compatible | SA::SM_Body))); - ASSERT_EQ(*SA(SA::SM_Compatible) - .requiresSMChange(SA(SA::SM_Compatible | SA::SM_Body), - /*BodyOverridesInterface=*/true), - true); } diff --git a/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn index bad86c2c984a75a5504eae3326fb84463d0561f0..e465ae7e00f376c0344606080c58ca67044bfad4 100644 --- a/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn @@ -22,11 +22,11 @@ clang_tablegen("arm_sve") { output_name = "arm_sve.h" } -# Generate arm_sme_draft_spec_subject_to_change.h -clang_tablegen("arm_sme_draft_spec_subject_to_change") { +# Generate arm_sme.h +clang_tablegen("arm_sme") { args = [ "-gen-arm-sme-header" ] td_file = "//clang/include/clang/Basic/arm_sme.td" - output_name = "arm_sme_draft_spec_subject_to_change.h" + output_name = "arm_sme.h" } # Generate arm_bf16.h @@ -65,7 +65,7 @@ copy("tablegen_headers") { ":arm_fp16", ":arm_mve", ":arm_neon", - ":arm_sme_draft_spec_subject_to_change", + ":arm_sme", ":arm_sve", ":riscv_vector", ] diff --git a/mlir/test/Target/LLVMIR/arm-sme.mlir b/mlir/test/Target/LLVMIR/arm-sme.mlir index 7beec1f61aa92363cea3709048aeaa97e60ea07d..ebda250bb30273b4e8d4ceb1c98ced61410525f7 100644 --- a/mlir/test/Target/LLVMIR/arm-sme.mlir +++ b/mlir/test/Target/LLVMIR/arm-sme.mlir @@ -222,7 +222,7 @@ llvm.func @arm_sme_store(%nxv1i1 : vector<[1]xi1>, "arm_sme.intr.st1b.vert"(%nxv16i1, %p8, %c0, %c0) : (vector<[16]xi1>, !llvm.ptr, i32, i32) -> () // CHECK: call void @llvm.aarch64.sme.str - "arm_sme.intr.str"(%c0, %p8) : (i32, !llvm.ptr) -> () + "arm_sme.intr.str"(%c0, %ptr, %c0) : (i32, !llvm.ptr, i32) -> () llvm.return } diff --git a/utils/bazel/llvm-project-overlay/clang/BUILD.bazel b/utils/bazel/llvm-project-overlay/clang/BUILD.bazel index 037719a51dd1e8be69fb3c4d68eae4eb2c29b2ef..56a4a72fece629e861cb86c76a4cd8f075bb38ae 100644 --- a/utils/bazel/llvm-project-overlay/clang/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/clang/BUILD.bazel @@ -1625,14 +1625,14 @@ gentbl( ) gentbl( - name = "headers_arm_sme_draft_spec_subject_to_change_gen", + name = "headers_arm_sme_gen", copts = [ "-Wno-implicit-fallthrough", "-Wno-error=frame-larger-than=", ], tbl_outs = [( "-gen-arm-sme-header", - "lib/Headers/arm_sme_draft_spec_subject_to_change.h", + "lib/Headers/arm_sme.h", )], tblgen = ":clang-tblgen", td_file = "include/clang/Basic/arm_sme.td", @@ -1673,7 +1673,7 @@ builtin_headers = glob( "lib/Headers/arm_mve.h", "lib/Headers/arm_neon.h", "lib/Headers/arm_sve.h", - "lib/Headers/arm_sme_draft_spec_subject_to_change.h", + "lib/Headers/arm_sme.h", "lib/Headers/arm_bf16.h", "lib/Headers/module.modulemap", "lib/Headers/riscv_vector.h",