From 086365be7ce0aaeb159fa80d02c426989d5115f3 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Sun, 2 Oct 2022 00:47:10 -0700 Subject: [PATCH 01/41] [test] Make Linux/sem_init_glibc.cpp robust and fix it for 32-bit ports defining sem_init@GLIBC_2.0 (i386, mips32, powerpc32) for glibc>=2.36. Fix https://github.com/llvm/llvm-project/issues/58079 Reviewed By: mgorny Differential Revision: https://reviews.llvm.org/D135023 (cherry picked from commit 6f46ff3765dcdc178b9cf52ebd8c03437806798a) --- .../sanitizer_common_interceptors.inc | 2 +- .../TestCases/Linux/sem_init_glibc.cpp | 43 +++++++++---------- 2 files changed, 21 insertions(+), 24 deletions(-) diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc index 9af296b1853a..b29665a63390 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc +++ b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc @@ -6703,7 +6703,7 @@ INTERCEPTOR(int, sem_init, __sanitizer_sem_t *s, int pshared, unsigned value) { COMMON_INTERCEPTOR_ENTER(ctx, sem_init, s, pshared, value); // Workaround a bug in glibc's "old" semaphore implementation by // zero-initializing the sem_t contents. This has to be done here because - // interceptors bind to the lowest symbols version by default, hitting the + // interceptors bind to the lowest version before glibc 2.36, hitting the // buggy code path while the non-sanitized build of the same code works fine. REAL(memset)(s, 0, sizeof(*s)); int res = REAL(sem_init)(s, pshared, value); diff --git a/compiler-rt/test/sanitizer_common/TestCases/Linux/sem_init_glibc.cpp b/compiler-rt/test/sanitizer_common/TestCases/Linux/sem_init_glibc.cpp index d623ccabb5b5..234c5019f692 100644 --- a/compiler-rt/test/sanitizer_common/TestCases/Linux/sem_init_glibc.cpp +++ b/compiler-rt/test/sanitizer_common/TestCases/Linux/sem_init_glibc.cpp @@ -1,39 +1,36 @@ // RUN: %clangxx -O0 -g %s -lutil -o %t && %run %t // This test depends on the glibc layout of struct sem_t and checks that we // don't leave sem_t::private uninitialized. -// UNSUPPORTED: android, lsan-x86, ubsan, target-is-mips64, target-is-mips64el +// UNSUPPORTED: android, lsan-x86, ubsan #include #include #include #include #include -// On powerpc64be semval_t must be 64 bits even with "old" versions of glibc. -#if __PPC64__ && __BIG_ENDIAN__ -typedef uint64_t semval_t; - -// This condition needs to correspond to __HAVE_64B_ATOMICS macro in glibc. -#elif (defined(__x86_64__) || defined(__aarch64__) || defined(__powerpc64__) || \ - defined(__s390x__) || defined(__sparc64__) || defined(__alpha__) || \ - defined(__ia64__) || defined(__m68k__)) && __GLIBC_PREREQ(2, 21) -typedef uint64_t semval_t; -#else +// musl and glibc's __HAVE_64B_ATOMICS==0 ports (e.g. arm, i386) use 32-bit sem +// values. 64-bit glibc ports defining sem_init@GLIBC_2.0 (mips64) use 32-bit as +// well, if the sem_init interceptor picks the oldest versioned symbol +// (glibc<2.36, see https://sourceware.org/PR14932). +#if !defined(__GLIBC__) || defined(__ILP32__) || \ + !__GLIBC_PREREQ(2, 36) && defined(__mips64__) typedef unsigned semval_t; +#else +typedef uint64_t semval_t; #endif -// glibc 2.21 has introduced some changes in the way the semaphore value is -// handled for 32-bit platforms, but since these changes are not ABI-breaking -// they are not versioned. On newer platforms such as ARM, there is only one -// version of the symbol, so it's enough to check the glibc version. However, -// for old platforms such as i386, glibc contains two or even three versions of -// the sem_init symbol, and the sanitizers always pick the oldest one. -// Therefore, it is not enough to rely on the __GLIBC_PREREQ macro - we should -// instead check the platform as well to make sure we only expect the new -// behavior on platforms where the older symbols do not exist. -#if defined(__arm__) && __GLIBC_PREREQ(2, 21) -#define GET_SEM_VALUE(V) ((V) >> 1) +// glibc __HAVE_64B_ATOMICS==0 ports define a sem_init which shifts the value by +// 1 (https://sourceware.org/PR12674 glibc 2.21). The version is picked if +// either glibc>=2.36 or sem_init@GLIBC_2.0 is absent (arm and newer ports). +// +// The __GLIBC_PREREQ check is brittle in that it requires matched +// __GLIBC_PREREQ values for build time and run time. +#if defined(__GLIBC__) && defined(__ILP32__) && \ + (__GLIBC_PREREQ(2, 36) || (__GLIBC_PREREQ(2, 21) && !defined(__i386__) && \ + !defined(__mips__) && !defined(__powerpc__))) +# define GET_SEM_VALUE(V) ((V) >> 1) #else -#define GET_SEM_VALUE(V) (V) +# define GET_SEM_VALUE(V) (V) #endif void my_sem_init(bool priv, int value, semval_t *a, unsigned char *b) { -- Gitee From ceee53ce564c3034ea9636b3c416182efadc5967 Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Mon, 17 Oct 2022 17:03:35 -0700 Subject: [PATCH 02/41] [SROA] Don't speculate phis with different load user types Fixes an SROA crash. Fallout from opaque pointers since with typed pointers we'd bail out at the bitcast. Reviewed By: nikic Differential Revision: https://reviews.llvm.org/D136119 (cherry picked from commit 6219ec07c6f8d1ead51beca7cf21fbf2323c51d7) --- llvm/lib/Transforms/Scalar/SROA.cpp | 19 +++++---- .../phi-speculate-different-load-types.ll | 41 +++++++++++++++++++ 2 files changed, 53 insertions(+), 7 deletions(-) create mode 100644 llvm/test/Transforms/SROA/phi-speculate-different-load-types.ll diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp index 143a035749c7..644c5c82e58e 100644 --- a/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/llvm/lib/Transforms/Scalar/SROA.cpp @@ -1210,8 +1210,7 @@ static bool isSafePHIToSpeculate(PHINode &PN) { BasicBlock *BB = PN.getParent(); Align MaxAlign; uint64_t APWidth = DL.getIndexTypeSizeInBits(PN.getType()); - APInt MaxSize(APWidth, 0); - bool HaveLoad = false; + Type *LoadType = nullptr; for (User *U : PN.users()) { LoadInst *LI = dyn_cast(U); if (!LI || !LI->isSimple()) @@ -1223,21 +1222,27 @@ static bool isSafePHIToSpeculate(PHINode &PN) { if (LI->getParent() != BB) return false; + if (LoadType) { + if (LoadType != LI->getType()) + return false; + } else { + LoadType = LI->getType(); + } + // Ensure that there are no instructions between the PHI and the load that // could store. for (BasicBlock::iterator BBI(PN); &*BBI != LI; ++BBI) if (BBI->mayWriteToMemory()) return false; - uint64_t Size = DL.getTypeStoreSize(LI->getType()).getFixedSize(); MaxAlign = std::max(MaxAlign, LI->getAlign()); - MaxSize = MaxSize.ult(Size) ? APInt(APWidth, Size) : MaxSize; - HaveLoad = true; } - if (!HaveLoad) + if (!LoadType) return false; + APInt LoadSize = APInt(APWidth, DL.getTypeStoreSize(LoadType).getFixedSize()); + // We can only transform this if it is safe to push the loads into the // predecessor blocks. The only thing to watch out for is that we can't put // a possibly trapping load in the predecessor if it is a critical edge. @@ -1259,7 +1264,7 @@ static bool isSafePHIToSpeculate(PHINode &PN) { // If this pointer is always safe to load, or if we can prove that there // is already a load in the block, then we can move the load to the pred // block. - if (isSafeToLoadUnconditionally(InVal, MaxAlign, MaxSize, DL, TI)) + if (isSafeToLoadUnconditionally(InVal, MaxAlign, LoadSize, DL, TI)) continue; return false; diff --git a/llvm/test/Transforms/SROA/phi-speculate-different-load-types.ll b/llvm/test/Transforms/SROA/phi-speculate-different-load-types.ll new file mode 100644 index 000000000000..7893da340736 --- /dev/null +++ b/llvm/test/Transforms/SROA/phi-speculate-different-load-types.ll @@ -0,0 +1,41 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes=sroa < %s -S | FileCheck %s + +define void @f(i1 %i) { +; CHECK-LABEL: @f( +; CHECK-NEXT: [[A1:%.*]] = alloca i64, align 8 +; CHECK-NEXT: [[A2:%.*]] = alloca i64, align 8 +; CHECK-NEXT: br i1 [[I:%.*]], label [[BB1:%.*]], label [[BB:%.*]] +; CHECK: bb: +; CHECK-NEXT: br label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: br label [[BB2]] +; CHECK: bb2: +; CHECK-NEXT: [[TMP3:%.*]] = phi ptr [ [[A1]], [[BB1]] ], [ [[A2]], [[BB]] ] +; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 4 +; CHECK-NEXT: call void @use32(i32 [[TMP5]]) +; CHECK-NEXT: call void @use64(i64 [[TMP4]]) +; CHECK-NEXT: ret void +; + %a1 = alloca i64 + %a2 = alloca i64 + br i1 %i, label %bb1, label %bb + +bb: + br label %bb2 + +bb1: + br label %bb2 + +bb2: + %tmp3 = phi ptr [ %a1, %bb1 ], [ %a2, %bb ] + %tmp5 = load i32, ptr %tmp3 + %tmp4 = load i64, ptr %tmp3 + call void @use32(i32 %tmp5) + call void @use64(i64 %tmp4) + ret void +} + +declare void @use32(i32) +declare void @use64(i64) -- Gitee From 455e1d765ad6379c98a499917d16fc8d31da3b2a Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Sun, 23 Oct 2022 09:49:16 +0200 Subject: [PATCH 03/41] [InstCombine] Bail out of casting calls when a conversion from/to byval is involved. Fixes #58307 Reviewed By: nikic Differential Revision: https://reviews.llvm.org/D135738 (cherry picked from commit 86e57e66da9380eaa90a9d0830d7f2c5fe87af99) --- .../InstCombine/InstCombineCalls.cpp | 4 +++ llvm/test/Transforms/InstCombine/byval.ll | 3 +- .../test/Transforms/InstCombine/cast-byval.ll | 31 +++++++++++++++++++ 3 files changed, 36 insertions(+), 2 deletions(-) create mode 100644 llvm/test/Transforms/InstCombine/cast-byval.ll diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index bc01d2ef7fe2..52596b30494f 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -3289,6 +3289,10 @@ bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) { if (CallerPAL.hasParamAttr(i, Attribute::SwiftError)) return false; + if (CallerPAL.hasParamAttr(i, Attribute::ByVal) != + Callee->getAttributes().hasParamAttr(i, Attribute::ByVal)) + return false; // Cannot transform to or from byval. + // If the parameter is passed as a byval argument, then we have to have a // sized type and the sized type has to have the same size as the old type. if (ParamTy != ActTy && CallerPAL.hasParamAttr(i, Attribute::ByVal)) { diff --git a/llvm/test/Transforms/InstCombine/byval.ll b/llvm/test/Transforms/InstCombine/byval.ll index e62bbe21c806..45750869524b 100644 --- a/llvm/test/Transforms/InstCombine/byval.ll +++ b/llvm/test/Transforms/InstCombine/byval.ll @@ -7,8 +7,7 @@ declare void @add_byval_callee_2(double* byval(double)) define void @add_byval(i64* %in) { ; CHECK-LABEL: @add_byval( -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[IN:%.*]] to double* -; CHECK-NEXT: call void @add_byval_callee(double* byval(double) [[TMP1]]) +; CHECK-NEXT: call void bitcast (void (double*)* @add_byval_callee to void (i64*)*)(i64* byval(i64) [[IN:%.*]]) ; CHECK-NEXT: ret void ; %tmp = bitcast void (double*)* @add_byval_callee to void (i64*)* diff --git a/llvm/test/Transforms/InstCombine/cast-byval.ll b/llvm/test/Transforms/InstCombine/cast-byval.ll new file mode 100644 index 000000000000..b3e3055837c2 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/cast-byval.ll @@ -0,0 +1,31 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; Check that function calls involving conversion from/to byval aren't transformed. +; RUN: opt < %s -passes=instcombine -S | FileCheck %s + +%Foo = type { i64 } +define i64 @foo (ptr byval(%Foo) %foo) { +; CHECK-LABEL: @foo( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[FOO:%.*]], align 4 +; CHECK-NEXT: ret i64 [[TMP1]] +; + %1 = load i64, ptr %foo, align 4 + ret i64 %1 +} + +define i64 @bar(i64 %0) { +; CHECK-LABEL: @bar( +; CHECK-NEXT: [[TMP2:%.*]] = tail call i64 @foo(i64 [[TMP0:%.*]]) +; CHECK-NEXT: ret i64 [[TMP2]] +; + %2 = tail call i64 @foo(i64 %0) + ret i64 %2 +} + +define i64 @qux(ptr byval(%Foo) %qux) { +; CHECK-LABEL: @qux( +; CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @bar(ptr nonnull byval([[FOO:%.*]]) [[QUX:%.*]]) +; CHECK-NEXT: ret i64 [[TMP1]] +; + %1 = tail call i64 @bar(ptr byval(%Foo) %qux) + ret i64 %1 +} -- Gitee From 5388da13992c133de7ff510705564ae2d4b1a264 Mon Sep 17 00:00:00 2001 From: Tobias Hieta Date: Mon, 24 Oct 2022 13:45:36 +0200 Subject: [PATCH 04/41] Bump version to 15.0.4 --- libcxx/include/__config | 2 +- llvm/CMakeLists.txt | 2 +- llvm/utils/gn/secondary/llvm/version.gni | 2 +- llvm/utils/lit/lit/__init__.py | 2 +- utils/bazel/llvm-project-overlay/clang/BUILD.bazel | 6 +++--- .../clang/include/clang/Config/config.h | 2 +- utils/bazel/llvm-project-overlay/lld/BUILD.bazel | 2 +- .../llvm/include/llvm/Config/llvm-config.h | 4 ++-- 8 files changed, 11 insertions(+), 11 deletions(-) diff --git a/libcxx/include/__config b/libcxx/include/__config index 589b5c3b2241..810189c94a94 100644 --- a/libcxx/include/__config +++ b/libcxx/include/__config @@ -36,7 +36,7 @@ #ifdef __cplusplus -# define _LIBCPP_VERSION 15003 +# define _LIBCPP_VERSION 15004 # define _LIBCPP_CONCAT_IMPL(_X, _Y) _X##_Y # define _LIBCPP_CONCAT(_X, _Y) _LIBCPP_CONCAT_IMPL(_X, _Y) diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt index 1a45f2eff013..3d6f5e6f9d3d 100644 --- a/llvm/CMakeLists.txt +++ b/llvm/CMakeLists.txt @@ -22,7 +22,7 @@ if(NOT DEFINED LLVM_VERSION_MINOR) set(LLVM_VERSION_MINOR 0) endif() if(NOT DEFINED LLVM_VERSION_PATCH) - set(LLVM_VERSION_PATCH 3) + set(LLVM_VERSION_PATCH 4) endif() if(NOT DEFINED LLVM_VERSION_SUFFIX) set(LLVM_VERSION_SUFFIX) diff --git a/llvm/utils/gn/secondary/llvm/version.gni b/llvm/utils/gn/secondary/llvm/version.gni index 6d64da0180df..3b890e00bec3 100644 --- a/llvm/utils/gn/secondary/llvm/version.gni +++ b/llvm/utils/gn/secondary/llvm/version.gni @@ -1,4 +1,4 @@ llvm_version_major = 15 llvm_version_minor = 0 -llvm_version_patch = 3 +llvm_version_patch = 4 llvm_version = "$llvm_version_major.$llvm_version_minor.$llvm_version_patch" diff --git a/llvm/utils/lit/lit/__init__.py b/llvm/utils/lit/lit/__init__.py index 5cb7ccdc67da..04f6e94535e4 100644 --- a/llvm/utils/lit/lit/__init__.py +++ b/llvm/utils/lit/lit/__init__.py @@ -2,7 +2,7 @@ __author__ = 'Daniel Dunbar' __email__ = 'daniel@minormatter.com' -__versioninfo__ = (15, 0, 3) +__versioninfo__ = (15, 0, 4) __version__ = '.'.join(str(v) for v in __versioninfo__) + 'dev' __all__ = [] diff --git a/utils/bazel/llvm-project-overlay/clang/BUILD.bazel b/utils/bazel/llvm-project-overlay/clang/BUILD.bazel index 6cb8889bf50f..96b462717be9 100644 --- a/utils/bazel/llvm-project-overlay/clang/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/clang/BUILD.bazel @@ -358,11 +358,11 @@ genrule( name = "basic_version_gen", outs = ["include/clang/Basic/Version.inc"], cmd = ( - "echo '#define CLANG_VERSION 15.0.3' >> $@\n" + + "echo '#define CLANG_VERSION 15.0.4' >> $@\n" + "echo '#define CLANG_VERSION_MAJOR 15' >> $@\n" + "echo '#define CLANG_VERSION_MINOR 0' >> $@\n" + - "echo '#define CLANG_VERSION_PATCHLEVEL 3' >> $@\n" + - "echo '#define CLANG_VERSION_STRING \"15.0.3\"' >> $@\n" + "echo '#define CLANG_VERSION_PATCHLEVEL 4' >> $@\n" + + "echo '#define CLANG_VERSION_STRING \"15.0.4\"' >> $@\n" ), ) diff --git a/utils/bazel/llvm-project-overlay/clang/include/clang/Config/config.h b/utils/bazel/llvm-project-overlay/clang/include/clang/Config/config.h index cedeb0a2c951..5d157492eae3 100644 --- a/utils/bazel/llvm-project-overlay/clang/include/clang/Config/config.h +++ b/utils/bazel/llvm-project-overlay/clang/include/clang/Config/config.h @@ -93,7 +93,7 @@ /* CLANG_HAVE_RLIMITS defined conditionally below */ /* The LLVM product name and version */ -#define BACKEND_PACKAGE_STRING "LLVM 15.0.3" +#define BACKEND_PACKAGE_STRING "LLVM 15.0.4" /* Linker version detected at compile time. */ /* #undef HOST_LINK_VERSION */ diff --git a/utils/bazel/llvm-project-overlay/lld/BUILD.bazel b/utils/bazel/llvm-project-overlay/lld/BUILD.bazel index 139298d1533f..e3a8c98ffa22 100644 --- a/utils/bazel/llvm-project-overlay/lld/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/lld/BUILD.bazel @@ -13,7 +13,7 @@ package( genrule( name = "config_version_gen", outs = ["include/lld/Common/Version.inc"], - cmd = "echo '#define LLD_VERSION_STRING \"15.0.3\"' > $@", + cmd = "echo '#define LLD_VERSION_STRING \"15.0.4\"' > $@", ) genrule( diff --git a/utils/bazel/llvm-project-overlay/llvm/include/llvm/Config/llvm-config.h b/utils/bazel/llvm-project-overlay/llvm/include/llvm/Config/llvm-config.h index 37bb4cf22a89..7a86202e8e0d 100644 --- a/utils/bazel/llvm-project-overlay/llvm/include/llvm/Config/llvm-config.h +++ b/utils/bazel/llvm-project-overlay/llvm/include/llvm/Config/llvm-config.h @@ -80,10 +80,10 @@ #define LLVM_VERSION_MINOR 0 /* Patch version of the LLVM API */ -#define LLVM_VERSION_PATCH 3 +#define LLVM_VERSION_PATCH 4 /* LLVM version string */ -#define LLVM_VERSION_STRING "15.0.3" +#define LLVM_VERSION_STRING "15.0.4" /* Whether LLVM records statistics for use with GetStatistics(), * PrintStatistics() or PrintStatisticsJSON() -- Gitee From 5834fe66318b55a54024fd93f69ab801a4c61d33 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Fri, 21 Oct 2022 11:05:53 +0200 Subject: [PATCH 05/41] [AutoUpgrade] Fix remangling when upgrading struct return type This was remangling the old function rather than the new one, and could result in failures when we were performing both a struct return upgrade and an opaque pointer upgrade. (cherry picked from commit c8938809d155682ef5eec170897b8c26b8cbf3ea) --- llvm/lib/IR/AutoUpgrade.cpp | 2 +- .../opaque-ptr-intrinsic-remangling.ll | 19 +++++++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index 75594f90c926..b9962da1d302 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -1040,7 +1040,7 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { Name, F->getParent()); // The new function may also need remangling. - if (auto Result = llvm::Intrinsic::remangleIntrinsicFunction(F)) + if (auto Result = llvm::Intrinsic::remangleIntrinsicFunction(NewFn)) NewFn = *Result; return true; } diff --git a/llvm/test/Assembler/opaque-ptr-intrinsic-remangling.ll b/llvm/test/Assembler/opaque-ptr-intrinsic-remangling.ll index c885897f3155..04638b0ddac9 100644 --- a/llvm/test/Assembler/opaque-ptr-intrinsic-remangling.ll +++ b/llvm/test/Assembler/opaque-ptr-intrinsic-remangling.ll @@ -3,6 +3,8 @@ ; Make sure that opaque pointer intrinsic remangling upgrade works. +%int8x16x2_t = type { <16 x i8>, <16 x i8> } + declare i32* @fake_personality_function() declare void @func() @@ -43,5 +45,22 @@ define i8* @test_ptr_annotation(i8* %p) { ret i8* %p2 } + +define void @test_struct_return(%int8x16x2_t* %res.p, i8* %a) { +; CHECK-LABEL: @test_struct_return( +; CHECK-NEXT: [[TMP1:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x2.v16i8.p0(ptr [[A:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[TMP1]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = insertvalue [[INT8X16X2_T:%.*]] poison, <16 x i8> [[TMP2]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[TMP1]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = insertvalue [[INT8X16X2_T]] [[TMP3]], <16 x i8> [[TMP4]], 1 +; CHECK-NEXT: store [[INT8X16X2_T]] [[TMP5]], ptr [[RES_P:%.*]], align 16 +; CHECK-NEXT: ret void +; + %res = call %int8x16x2_t @llvm.aarch64.neon.ld1x2.v16i8.p0i8(i8* %a) + store %int8x16x2_t %res, %int8x16x2_t* %res.p + ret void +} + declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...) declare i8* @llvm.ptr.annotation.p0i8(i8*, i8*, i8*, i32, i8*) +declare %int8x16x2_t @llvm.aarch64.neon.ld1x2.v16i8.p0i8(i8*) -- Gitee From dccd0613025a6284e2a2296a56099b00a9dadcfe Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Fri, 21 Oct 2022 09:43:25 -0700 Subject: [PATCH 06/41] [ELF] Suppress "duplicate symbol" when resolving STB_WEAK and STB_GNU_UNIQUE in different COMDATs ``` template struct A { A() {} int value = 0; }; template struct B { static A a; }; template A B::a; inline int foo() { return B::a.value; } ``` ``` clang++ -c -fno-pic a.cc -o weak.o g++ -c -fno-pic a.cc -o unique.o # --enable-gnu-unique-object # Duplicate symbol error. In postParse, we do not check `sym.binding` ld.lld -e 0 weak.o unique.o ``` Mixing GCC and Clang object files in this case is not ideal. .bss._ZGVN1BIiE1aE has different COMDAT groups. It appears to work in practice because the guard variable prevents harm due to double initialization. For the linker, we just stick with the rule that a weak binding does not cause "duplicate symbol" errors. Close https://github.com/llvm/llvm-project/issues/58232 Differential Revision: https://reviews.llvm.org/D136381 (cherry picked from commit 0051b6bb78772b0658f28e5f31ddf91c1589aab5) --- lld/ELF/InputFiles.cpp | 2 +- lld/test/ELF/comdat-binding2.s | 42 ++++++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 1 deletion(-) create mode 100644 lld/test/ELF/comdat-binding2.s diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp index 927dc272b532..473809b05e9c 100644 --- a/lld/ELF/InputFiles.cpp +++ b/lld/ELF/InputFiles.cpp @@ -1157,7 +1157,7 @@ template void ObjFile::postParse() { continue; } - if (binding == STB_WEAK) + if (sym.binding == STB_WEAK || binding == STB_WEAK) continue; std::lock_guard lock(mu); ctx->duplicates.push_back({&sym, this, sec, eSym.st_value}); diff --git a/lld/test/ELF/comdat-binding2.s b/lld/test/ELF/comdat-binding2.s new file mode 100644 index 000000000000..3ffd7252836c --- /dev/null +++ b/lld/test/ELF/comdat-binding2.s @@ -0,0 +1,42 @@ +# REQUIRES: x86 +## Test we don't report duplicate definition errors when mixing Clang STB_WEAK +## and GCC STB_GNU_UNIQUE symbols. + +# RUN: rm -rf %t && split-file %s %t && cd %t +# RUN: llvm-mc -filetype=obj -triple=x86_64 weak.s -o weak.o +# RUN: llvm-mc -filetype=obj -triple=x86_64 unique.s -o unique.o +# RUN: ld.lld weak.o unique.o -o weak +# RUN: llvm-readelf -s weak | FileCheck %s --check-prefix=WEAK +# RUN: ld.lld unique.o weak.o -o unique +# RUN: llvm-readelf -s unique | FileCheck %s --check-prefix=UNIQUE + +# WEAK: OBJECT WEAK DEFAULT [[#]] _ZN1BIiE1aE +# UNIQUE: OBJECT UNIQUE DEFAULT [[#]] _ZN1BIiE1aE + +#--- weak.s +## Clang + .type _ZN1BIiE1aE,@object + .section .bss._ZN1BIiE1aE,"aGwR",@nobits,_ZN1BIiE1aE,comdat + .weak _ZN1BIiE1aE +_ZN1BIiE1aE: + .zero 4 + + .type _ZGVN1BIiE1aE,@object + .section .bss._ZGVN1BIiE1aE,"aGw",@nobits,_ZN1BIiE1aE,comdat + .weak _ZGVN1BIiE1aE +_ZGVN1BIiE1aE: + .quad 0 + +#--- unique.s +## GCC -fgnu-unique. Note the different group signature for the second group. + .weak _ZN1BIiE1aE + .section .bss._ZN1BIiE1aE,"awG",@nobits,_ZN1BIiE1aE,comdat + .type _ZN1BIiE1aE, @gnu_unique_object +_ZN1BIiE1aE: + .zero 4 + + .weak _ZGVN1BIiE1aE + .section .bss._ZGVN1BIiE1aE,"awG",@nobits,_ZGVN1BIiE1aE,comdat + .type _ZGVN1BIiE1aE, @gnu_unique_object +_ZGVN1BIiE1aE: + .zero 8 -- Gitee From 1e1c5204c25914951fbda55af5593056586e6b6f Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Mon, 24 Oct 2022 14:48:32 -0700 Subject: [PATCH 07/41] [SystemZ] Relase notes for LLVM 15 Unfortunately these notes were compiled until now, but these are the release notes for SystemZ. (Did not find anything under clang) @tstellar Should I push this patch onto release/15.x once approved, or will you apply it? Differential Revision: https://reviews.llvm.org/D134430 --- llvm/docs/ReleaseNotes.rst | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst index 9324d26cbdd8..0660bc134652 100644 --- a/llvm/docs/ReleaseNotes.rst +++ b/llvm/docs/ReleaseNotes.rst @@ -227,6 +227,20 @@ Changes to the WebAssembly Backend * ... +Changes to the SystemZ Backend +------------------------------ + +* Support z16 processor name. +* Machine scheduler description for z16. +* Add support for inline assembly address operands ("p") as well as for SystemZ + specific address operands ("ZQ", "ZR", "ZS" and "ZT"). +* Efficient handling of small memcpy/memset operations up to 32 bytes. +* Tuning of the inliner. +* Fixing emission of library calls so that narrow integer arguments are sign or + zero extended per the SystemZ ABI. +* Support added for libunwind. +* Various minor improvements and bugfixes. + Changes to the X86 Backend -------------------------- -- Gitee From 2d5c43ad484482cd23794b8b056a9b13ee920369 Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Tue, 16 Aug 2022 17:53:34 -0700 Subject: [PATCH 08/41] [lldb] Automatically unwrap parameter packs in template argument accessors When looking at template arguments in LLDB, we usually care about what the user passed in his code, not whether some of those arguments where passed as a variadic parameter pack. This patch extends all the C++ APIs to look at template parameters to take an additional 'expand_pack' boolean that automatically unwraps the potential argument packs. The equivalent SBAPI calls have been changed to pass true for this parameter. A byproduct of the patch is to also fix the support for template type that have only a parameter pack as argument (like the OnlyPack type in the test). Those were not recognized as template instanciations before. The added test verifies that the SBAPI is able to iterate over the arguments of a variadic template. The original patch was written by Fred Riss almost 4 years ago. Differential revision: https://reviews.llvm.org/D51387 (cherry picked from commit b706f56133a77f9d7c55270ac24ff59e6fce3fa4) --- lldb/include/lldb/API/SBType.h | 2 + lldb/include/lldb/Symbol/CompilerType.h | 24 ++++-- lldb/include/lldb/Symbol/TypeSystem.h | 14 ++-- lldb/source/API/SBType.cpp | 12 ++- .../TypeSystem/Clang/TypeSystemClang.cpp | 81 ++++++++++++++----- .../TypeSystem/Clang/TypeSystemClang.h | 19 ++--- lldb/source/Symbol/CompilerType.cpp | 18 +++-- lldb/source/Symbol/TypeSystem.cpp | 12 +-- .../TestTemplatePackArgs.py | 38 +++++++++ .../class-template-parameter-pack/main.cpp | 8 +- lldb/unittests/Symbol/TestTypeSystemClang.cpp | 30 ++++--- 11 files changed, 190 insertions(+), 68 deletions(-) create mode 100644 lldb/test/API/lang/cpp/class-template-parameter-pack/TestTemplatePackArgs.py diff --git a/lldb/include/lldb/API/SBType.h b/lldb/include/lldb/API/SBType.h index 244d328b51f4..aa45aeeec476 100644 --- a/lldb/include/lldb/API/SBType.h +++ b/lldb/include/lldb/API/SBType.h @@ -182,6 +182,8 @@ public: lldb::SBType GetTemplateArgumentType(uint32_t idx); + /// Return the TemplateArgumentKind of the template argument at index idx. + /// Variadic argument packs are automatically expanded. lldb::TemplateArgumentKind GetTemplateArgumentKind(uint32_t idx); lldb::SBType GetFunctionReturnType(); diff --git a/lldb/include/lldb/Symbol/CompilerType.h b/lldb/include/lldb/Symbol/CompilerType.h index 0ad05a27570e..aefd19d0a859 100644 --- a/lldb/include/lldb/Symbol/CompilerType.h +++ b/lldb/include/lldb/Symbol/CompilerType.h @@ -338,14 +338,28 @@ public: GetIndexOfChildMemberWithName(const char *name, bool omit_empty_base_classes, std::vector &child_indexes) const; - size_t GetNumTemplateArguments() const; - - lldb::TemplateArgumentKind GetTemplateArgumentKind(size_t idx) const; - CompilerType GetTypeTemplateArgument(size_t idx) const; + /// Return the number of template arguments the type has. + /// If expand_pack is true, then variadic argument packs are automatically + /// expanded to their supplied arguments. If it is false an argument pack + /// will only count as 1 argument. + size_t GetNumTemplateArguments(bool expand_pack = false) const; + + // Return the TemplateArgumentKind of the template argument at index idx. + // If expand_pack is true, then variadic argument packs are automatically + // expanded to their supplied arguments. With expand_pack set to false, an + // arguement pack will count as 1 argument and return a type of Pack. + lldb::TemplateArgumentKind + GetTemplateArgumentKind(size_t idx, bool expand_pack = false) const; + CompilerType GetTypeTemplateArgument(size_t idx, + bool expand_pack = false) const; /// Returns the value of the template argument and its type. + /// If expand_pack is true, then variadic argument packs are automatically + /// expanded to their supplied arguments. With expand_pack set to false, an + /// arguement pack will count as 1 argument and it is invalid to call this + /// method on the pack argument. llvm::Optional - GetIntegralTemplateArgument(size_t idx) const; + GetIntegralTemplateArgument(size_t idx, bool expand_pack = false) const; CompilerType GetTypeForFormatters() const; diff --git a/lldb/include/lldb/Symbol/TypeSystem.h b/lldb/include/lldb/Symbol/TypeSystem.h index be5783596897..769449a4933b 100644 --- a/lldb/include/lldb/Symbol/TypeSystem.h +++ b/lldb/include/lldb/Symbol/TypeSystem.h @@ -346,14 +346,18 @@ public: const char *name, bool omit_empty_base_classes, std::vector &child_indexes) = 0; - virtual size_t GetNumTemplateArguments(lldb::opaque_compiler_type_t type); + virtual size_t GetNumTemplateArguments(lldb::opaque_compiler_type_t type, + bool expand_pack); virtual lldb::TemplateArgumentKind - GetTemplateArgumentKind(lldb::opaque_compiler_type_t type, size_t idx); - virtual CompilerType GetTypeTemplateArgument(lldb::opaque_compiler_type_t type, - size_t idx); + GetTemplateArgumentKind(lldb::opaque_compiler_type_t type, size_t idx, + bool expand_pack); + virtual CompilerType + GetTypeTemplateArgument(lldb::opaque_compiler_type_t type, size_t idx, + bool expand_pack); virtual llvm::Optional - GetIntegralTemplateArgument(lldb::opaque_compiler_type_t type, size_t idx); + GetIntegralTemplateArgument(lldb::opaque_compiler_type_t type, size_t idx, + bool expand_pack); // Dumping types diff --git a/lldb/source/API/SBType.cpp b/lldb/source/API/SBType.cpp index 533930c0544b..adc60a084367 100644 --- a/lldb/source/API/SBType.cpp +++ b/lldb/source/API/SBType.cpp @@ -542,7 +542,8 @@ uint32_t SBType::GetNumberOfTemplateArguments() { LLDB_INSTRUMENT_VA(this); if (IsValid()) - return m_opaque_sp->GetCompilerType(false).GetNumTemplateArguments(); + return m_opaque_sp->GetCompilerType(false).GetNumTemplateArguments( + /*expand_pack=*/true); return 0; } @@ -553,13 +554,15 @@ lldb::SBType SBType::GetTemplateArgumentType(uint32_t idx) { return SBType(); CompilerType type; + const bool expand_pack = true; switch(GetTemplateArgumentKind(idx)) { case eTemplateArgumentKindType: - type = m_opaque_sp->GetCompilerType(false).GetTypeTemplateArgument(idx); + type = m_opaque_sp->GetCompilerType(false).GetTypeTemplateArgument( + idx, expand_pack); break; case eTemplateArgumentKindIntegral: type = m_opaque_sp->GetCompilerType(false) - .GetIntegralTemplateArgument(idx) + .GetIntegralTemplateArgument(idx, expand_pack) ->type; break; default: @@ -574,7 +577,8 @@ lldb::TemplateArgumentKind SBType::GetTemplateArgumentKind(uint32_t idx) { LLDB_INSTRUMENT_VA(this, idx); if (IsValid()) - return m_opaque_sp->GetCompilerType(false).GetTemplateArgumentKind(idx); + return m_opaque_sp->GetCompilerType(false).GetTemplateArgumentKind( + idx, /*expand_pack=*/true); return eTemplateArgumentKindNull; } diff --git a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp index c6eb693bba6b..a1ebe5830bb9 100644 --- a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp +++ b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp @@ -7096,7 +7096,8 @@ TypeSystemClang::GetIndexOfChildWithName(lldb::opaque_compiler_type_t type, } size_t -TypeSystemClang::GetNumTemplateArguments(lldb::opaque_compiler_type_t type) { +TypeSystemClang::GetNumTemplateArguments(lldb::opaque_compiler_type_t type, + bool expand_pack) { if (!type) return 0; @@ -7111,8 +7112,17 @@ TypeSystemClang::GetNumTemplateArguments(lldb::opaque_compiler_type_t type) { const clang::ClassTemplateSpecializationDecl *template_decl = llvm::dyn_cast( cxx_record_decl); - if (template_decl) - return template_decl->getTemplateArgs().size(); + if (template_decl) { + const auto &template_arg_list = template_decl->getTemplateArgs(); + size_t num_args = template_arg_list.size(); + assert(num_args && "template specialization without any args"); + if (expand_pack && num_args) { + const auto &pack = template_arg_list[num_args - 1]; + if (pack.getKind() == clang::TemplateArgument::Pack) + num_args += pack.pack_size() - 1; + } + return num_args; + } } } break; @@ -7149,15 +7159,51 @@ TypeSystemClang::GetAsTemplateSpecialization( } } +const TemplateArgument * +GetNthTemplateArgument(const clang::ClassTemplateSpecializationDecl *decl, + size_t idx, bool expand_pack) { + const auto &args = decl->getTemplateArgs(); + const size_t args_size = args.size(); + + assert(args_size && "template specialization without any args"); + if (!args_size) + return nullptr; + + const size_t last_idx = args_size - 1; + + // We're asked for a template argument that can't be a parameter pack, so + // return it without worrying about 'expand_pack'. + if (idx < last_idx) + return &args[idx]; + + // We're asked for the last template argument but we don't want/need to + // expand it. + if (!expand_pack || args[last_idx].getKind() != clang::TemplateArgument::Pack) + return idx >= args.size() ? nullptr : &args[idx]; + + // Index into the expanded pack. + // Note that 'idx' counts from the beginning of all template arguments + // (including the ones preceding the parameter pack). + const auto &pack = args[last_idx]; + const size_t pack_idx = idx - last_idx; + const size_t pack_size = pack.pack_size(); + assert(pack_idx < pack_size && "parameter pack index out-of-bounds"); + return &pack.pack_elements()[pack_idx]; +} + lldb::TemplateArgumentKind TypeSystemClang::GetTemplateArgumentKind(lldb::opaque_compiler_type_t type, - size_t arg_idx) { + size_t arg_idx, bool expand_pack) { const clang::ClassTemplateSpecializationDecl *template_decl = GetAsTemplateSpecialization(type); - if (! template_decl || arg_idx >= template_decl->getTemplateArgs().size()) + if (!template_decl) + return eTemplateArgumentKindNull; + + const auto *arg = GetNthTemplateArgument(template_decl, arg_idx, expand_pack); + if (!arg) return eTemplateArgumentKindNull; - switch (template_decl->getTemplateArgs()[arg_idx].getKind()) { + switch (arg->getKind()) { case clang::TemplateArgument::Null: return eTemplateArgumentKindNull; @@ -7190,35 +7236,32 @@ TypeSystemClang::GetTemplateArgumentKind(lldb::opaque_compiler_type_t type, CompilerType TypeSystemClang::GetTypeTemplateArgument(lldb::opaque_compiler_type_t type, - size_t idx) { + size_t idx, bool expand_pack) { const clang::ClassTemplateSpecializationDecl *template_decl = GetAsTemplateSpecialization(type); - if (!template_decl || idx >= template_decl->getTemplateArgs().size()) + if (!template_decl) return CompilerType(); - const clang::TemplateArgument &template_arg = - template_decl->getTemplateArgs()[idx]; - if (template_arg.getKind() != clang::TemplateArgument::Type) + const auto *arg = GetNthTemplateArgument(template_decl, idx, expand_pack); + if (!arg || arg->getKind() != clang::TemplateArgument::Type) return CompilerType(); - return GetType(template_arg.getAsType()); + return GetType(arg->getAsType()); } Optional TypeSystemClang::GetIntegralTemplateArgument(lldb::opaque_compiler_type_t type, - size_t idx) { + size_t idx, bool expand_pack) { const clang::ClassTemplateSpecializationDecl *template_decl = GetAsTemplateSpecialization(type); - if (! template_decl || idx >= template_decl->getTemplateArgs().size()) + if (!template_decl) return llvm::None; - const clang::TemplateArgument &template_arg = - template_decl->getTemplateArgs()[idx]; - if (template_arg.getKind() != clang::TemplateArgument::Integral) + const auto *arg = GetNthTemplateArgument(template_decl, idx, expand_pack); + if (!arg || arg->getKind() != clang::TemplateArgument::Integral) return llvm::None; - return { - {template_arg.getAsIntegral(), GetType(template_arg.getIntegralType())}}; + return {{arg->getAsIntegral(), GetType(arg->getIntegralType())}}; } CompilerType TypeSystemClang::GetTypeForFormatters(void *type) { diff --git a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h index 24dbb71c8f4d..7f25a6df548f 100644 --- a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h +++ b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h @@ -91,7 +91,7 @@ public: void SetOwningModule(OptionalClangModuleID id); /// \} }; - + /// A TypeSystem implementation based on Clang. /// /// This class uses a single clang::ASTContext as the backend for storing @@ -334,7 +334,7 @@ public: llvm::SmallVector names; llvm::SmallVector args; - + const char * pack_name = nullptr; std::unique_ptr packed_args; }; @@ -537,7 +537,7 @@ public: #ifndef NDEBUG bool Verify(lldb::opaque_compiler_type_t type) override; #endif - + bool IsArrayType(lldb::opaque_compiler_type_t type, CompilerType *element_type, uint64_t *size, bool *is_incomplete) override; @@ -810,16 +810,17 @@ public: const char *name, bool omit_empty_base_classes, std::vector &child_indexes) override; - size_t GetNumTemplateArguments(lldb::opaque_compiler_type_t type) override; + size_t GetNumTemplateArguments(lldb::opaque_compiler_type_t type, + bool expand_pack) override; lldb::TemplateArgumentKind - GetTemplateArgumentKind(lldb::opaque_compiler_type_t type, - size_t idx) override; + GetTemplateArgumentKind(lldb::opaque_compiler_type_t type, size_t idx, + bool expand_pack) override; CompilerType GetTypeTemplateArgument(lldb::opaque_compiler_type_t type, - size_t idx) override; + size_t idx, bool expand_pack) override; llvm::Optional - GetIntegralTemplateArgument(lldb::opaque_compiler_type_t type, - size_t idx) override; + GetIntegralTemplateArgument(lldb::opaque_compiler_type_t type, size_t idx, + bool expand_pack) override; CompilerType GetTypeForFormatters(void *type) override; diff --git a/lldb/source/Symbol/CompilerType.cpp b/lldb/source/Symbol/CompilerType.cpp index ac98352c235e..bef456583687 100644 --- a/lldb/source/Symbol/CompilerType.cpp +++ b/lldb/source/Symbol/CompilerType.cpp @@ -659,30 +659,32 @@ size_t CompilerType::GetIndexOfChildMemberWithName( return 0; } -size_t CompilerType::GetNumTemplateArguments() const { +size_t CompilerType::GetNumTemplateArguments(bool expand_pack) const { if (IsValid()) { - return m_type_system->GetNumTemplateArguments(m_type); + return m_type_system->GetNumTemplateArguments(m_type, expand_pack); } return 0; } -TemplateArgumentKind CompilerType::GetTemplateArgumentKind(size_t idx) const { +TemplateArgumentKind +CompilerType::GetTemplateArgumentKind(size_t idx, bool expand_pack) const { if (IsValid()) - return m_type_system->GetTemplateArgumentKind(m_type, idx); + return m_type_system->GetTemplateArgumentKind(m_type, idx, expand_pack); return eTemplateArgumentKindNull; } -CompilerType CompilerType::GetTypeTemplateArgument(size_t idx) const { +CompilerType CompilerType::GetTypeTemplateArgument(size_t idx, + bool expand_pack) const { if (IsValid()) { - return m_type_system->GetTypeTemplateArgument(m_type, idx); + return m_type_system->GetTypeTemplateArgument(m_type, idx, expand_pack); } return CompilerType(); } llvm::Optional -CompilerType::GetIntegralTemplateArgument(size_t idx) const { +CompilerType::GetIntegralTemplateArgument(size_t idx, bool expand_pack) const { if (IsValid()) - return m_type_system->GetIntegralTemplateArgument(m_type, idx); + return m_type_system->GetIntegralTemplateArgument(m_type, idx, expand_pack); return llvm::None; } diff --git a/lldb/source/Symbol/TypeSystem.cpp b/lldb/source/Symbol/TypeSystem.cpp index 3092dc0bf0a4..412373533aab 100644 --- a/lldb/source/Symbol/TypeSystem.cpp +++ b/lldb/source/Symbol/TypeSystem.cpp @@ -118,23 +118,25 @@ CompilerType TypeSystem::GetTypeForFormatters(void *type) { return CompilerType(this, type); } -size_t TypeSystem::GetNumTemplateArguments(lldb::opaque_compiler_type_t type) { +size_t TypeSystem::GetNumTemplateArguments(lldb::opaque_compiler_type_t type, + bool expand_pack) { return 0; } TemplateArgumentKind -TypeSystem::GetTemplateArgumentKind(opaque_compiler_type_t type, size_t idx) { +TypeSystem::GetTemplateArgumentKind(opaque_compiler_type_t type, size_t idx, + bool expand_pack) { return eTemplateArgumentKindNull; } CompilerType TypeSystem::GetTypeTemplateArgument(opaque_compiler_type_t type, - size_t idx) { + size_t idx, bool expand_pack) { return CompilerType(); } llvm::Optional -TypeSystem::GetIntegralTemplateArgument(opaque_compiler_type_t type, - size_t idx) { +TypeSystem::GetIntegralTemplateArgument(opaque_compiler_type_t type, size_t idx, + bool expand_pack) { return llvm::None; } diff --git a/lldb/test/API/lang/cpp/class-template-parameter-pack/TestTemplatePackArgs.py b/lldb/test/API/lang/cpp/class-template-parameter-pack/TestTemplatePackArgs.py new file mode 100644 index 000000000000..180d3f503c4f --- /dev/null +++ b/lldb/test/API/lang/cpp/class-template-parameter-pack/TestTemplatePackArgs.py @@ -0,0 +1,38 @@ +""" +Test that the type of arguments to C++ template classes that have variadic +parameters can be enumerated. +""" +import lldb +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil + + +class TemplatePackArgsTestCase(TestBase): + + mydir = TestBase.compute_mydir(__file__) + + def test_template_argument_pack(self): + self.build() + (_, _, thread, _) = lldbutil.run_to_source_breakpoint(self, + 'breakpoint here', lldb.SBFileSpec('main.cpp'), exe_name = 'a.out') + frame = thread.GetSelectedFrame() + + empty_pack = frame.FindVariable('emptyPack') + self.assertTrue(empty_pack.IsValid(), + 'make sure we find the emptyPack variable') + + only_pack = frame.FindVariable('onlyPack') + self.assertTrue(only_pack.IsValid(), + 'make sure we find the onlyPack variable') + self.assertEqual(only_pack.GetType().GetNumberOfTemplateArguments(), 4) + self.assertEqual(only_pack.GetType().GetTemplateArgumentType(0).GetName(), 'int') + self.assertEqual(only_pack.GetType().GetTemplateArgumentType(1).GetName(), 'char') + self.assertEqual(only_pack.GetType().GetTemplateArgumentType(2).GetName(), 'double') + # Access the C template parameter. + nested_template = only_pack.GetType().GetTemplateArgumentType(3) + self.assertEqual(nested_template.GetName(), 'D') + self.assertEqual(nested_template.GetNumberOfTemplateArguments(), 3) + self.assertEqual(nested_template.GetTemplateArgumentType(0).GetName(), 'int') + self.assertEqual(nested_template.GetTemplateArgumentType(1).GetName(), 'int') + self.assertEqual(nested_template.GetTemplateArgumentType(2).GetName(), 'bool') diff --git a/lldb/test/API/lang/cpp/class-template-parameter-pack/main.cpp b/lldb/test/API/lang/cpp/class-template-parameter-pack/main.cpp index 8bb0a42b58a3..26f8eab545c8 100644 --- a/lldb/test/API/lang/cpp/class-template-parameter-pack/main.cpp +++ b/lldb/test/API/lang/cpp/class-template-parameter-pack/main.cpp @@ -26,7 +26,13 @@ template <> struct D : D { bool argsAre_Int_bool() { return true; } }; +template struct OnlyPack {}; +template struct EmptyPack {}; + int main(int argc, char const *argv[]) { + EmptyPack emptyPack; + OnlyPack> onlyPack; + C myC; C myLesserC; myC.member = 64; @@ -34,7 +40,7 @@ int main(int argc, char const *argv[]) { (void)C().argsAre_16_32(); (void)(myC.member != 64); D myD; - D myLesserD; + D myLesserD; // breakpoint here myD.member = 64; (void)D().argsAre_Int_bool(); (void)D().argsAre_Int_bool(); diff --git a/lldb/unittests/Symbol/TestTypeSystemClang.cpp b/lldb/unittests/Symbol/TestTypeSystemClang.cpp index e78a084f59f0..4da17cf3610c 100644 --- a/lldb/unittests/Symbol/TestTypeSystemClang.cpp +++ b/lldb/unittests/Symbol/TestTypeSystemClang.cpp @@ -500,18 +500,24 @@ TEST_F(TestTypeSystemClang, TemplateArguments) { for (CompilerType t : {type, typedef_type, auto_type}) { SCOPED_TRACE(t.GetTypeName().AsCString()); - EXPECT_EQ(m_ast->GetTemplateArgumentKind(t.GetOpaqueQualType(), 0), - eTemplateArgumentKindType); - EXPECT_EQ(m_ast->GetTypeTemplateArgument(t.GetOpaqueQualType(), 0), - int_type); - EXPECT_EQ(llvm::None, - m_ast->GetIntegralTemplateArgument(t.GetOpaqueQualType(), 0)); - - EXPECT_EQ(m_ast->GetTemplateArgumentKind(t.GetOpaqueQualType(), 1), - eTemplateArgumentKindIntegral); - EXPECT_EQ(m_ast->GetTypeTemplateArgument(t.GetOpaqueQualType(), 1), - CompilerType()); - auto result = m_ast->GetIntegralTemplateArgument(t.GetOpaqueQualType(), 1); + const bool expand_pack = false; + EXPECT_EQ( + m_ast->GetTemplateArgumentKind(t.GetOpaqueQualType(), 0, expand_pack), + eTemplateArgumentKindType); + EXPECT_EQ( + m_ast->GetTypeTemplateArgument(t.GetOpaqueQualType(), 0, expand_pack), + int_type); + EXPECT_EQ(llvm::None, m_ast->GetIntegralTemplateArgument( + t.GetOpaqueQualType(), 0, expand_pack)); + + EXPECT_EQ( + m_ast->GetTemplateArgumentKind(t.GetOpaqueQualType(), 1, expand_pack), + eTemplateArgumentKindIntegral); + EXPECT_EQ( + m_ast->GetTypeTemplateArgument(t.GetOpaqueQualType(), 1, expand_pack), + CompilerType()); + auto result = m_ast->GetIntegralTemplateArgument(t.GetOpaqueQualType(), 1, + expand_pack); ASSERT_NE(llvm::None, result); EXPECT_EQ(arg, result->value); EXPECT_EQ(int_type, result->type); -- Gitee From db68723804fd30d5e7da1fb2ad2aab409ef58d29 Mon Sep 17 00:00:00 2001 From: Tom Praschan <13141438+tom-anders@users.noreply.github.com> Date: Sun, 18 Sep 2022 18:48:11 +0200 Subject: [PATCH 09/41] [clangd] Return earlier when snippet is empty Fixes github.com/clangd/clangd/issues/1216 If the Snippet string is empty, Snippet.front() would trigger a crash. Move the Snippet->empty() check up a few lines to avoid this. Should not break any existing behavior. Differential Revision: https://reviews.llvm.org/D134137 (cherry picked from commit 60528c690a4c334d2a3a2c22eb97af9e67d7a91d) --- clang-tools-extra/clangd/CodeComplete.cpp | 5 +++-- .../clangd/unittests/CodeCompleteTests.cpp | 17 +++++++++++++++++ 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/clang-tools-extra/clangd/CodeComplete.cpp b/clang-tools-extra/clangd/CodeComplete.cpp index edbb1722ae38..55982f41250f 100644 --- a/clang-tools-extra/clangd/CodeComplete.cpp +++ b/clang-tools-extra/clangd/CodeComplete.cpp @@ -486,6 +486,9 @@ private: // we need to complete 'forward<$1>($0)'. return "($0)"; + if (Snippet->empty()) + return ""; + bool MayHaveArgList = Completion.Kind == CompletionItemKind::Function || Completion.Kind == CompletionItemKind::Method || Completion.Kind == CompletionItemKind::Constructor || @@ -524,8 +527,6 @@ private: return *Snippet; // Replace argument snippets with a simplified pattern. - if (Snippet->empty()) - return ""; if (MayHaveArgList) { // Functions snippets can be of 2 types: // - containing only function arguments, e.g. diff --git a/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp b/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp index 5050ab203b8d..079a4ec70623 100644 --- a/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp +++ b/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp @@ -1014,6 +1014,23 @@ TEST(CodeCompleteTest, NoColonColonAtTheEnd) { EXPECT_THAT(Results.Completions, Not(Contains(labeled("clang::")))); } +TEST(CompletionTests, EmptySnippetDoesNotCrash) { + // See https://github.com/clangd/clangd/issues/1216 + auto Results = completions(R"cpp( + int main() { + auto w = [&](auto &&f) { return f(f); }; + auto f = w([&](auto &&f) { + return [&](auto &&n) { + if (n == 0) { + return 1; + } + return n * ^(f)(n - 1); + }; + })(10); + } + )cpp"); +} + TEST(CompletionTest, BacktrackCrashes) { // Sema calls code completion callbacks twice in these cases. auto Results = completions(R"cpp( -- Gitee From 3010b7e000006f680b6bf3141be988291fa1da41 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timm=20B=C3=A4der?= Date: Fri, 21 Oct 2022 11:36:37 +0200 Subject: [PATCH 10/41] [clang][driver] Remove dynamic gcc-toolset/devtoolset logic This breaks when the newest available devtoolset directory is not a complete toolset: https://github.com/llvm/llvm-project/issues/57843 Remove this again in favor or just adding the two new directories for devtoolset/gcc-toolset 12. This reverts commit 35aaf548237a4f213ba9d95de53b33c5ce1eadce. This reverts commit 9f97720268911abae2ad9d90e270358db234a1c1. Fixes https://github.com/llvm/llvm-project/issues/57843 Differential Revision: https://reviews.llvm.org/D136435 --- clang/lib/Driver/ToolChains/Gnu.cpp | 40 ++++------- clang/unittests/Driver/ToolChainTest.cpp | 92 ------------------------ 2 files changed, 15 insertions(+), 117 deletions(-) diff --git a/clang/lib/Driver/ToolChains/Gnu.cpp b/clang/lib/Driver/ToolChains/Gnu.cpp index f203cae1d329..665cdc3132fb 100644 --- a/clang/lib/Driver/ToolChains/Gnu.cpp +++ b/clang/lib/Driver/ToolChains/Gnu.cpp @@ -2139,31 +2139,21 @@ void Generic_GCC::GCCInstallationDetector::AddDefaultGCCPrefixes( // and gcc-toolsets. if (SysRoot.empty() && TargetTriple.getOS() == llvm::Triple::Linux && D.getVFS().exists("/opt/rh")) { - // Find the directory in /opt/rh/ starting with gcc-toolset-* or - // devtoolset-* with the highest version number and add that - // one to our prefixes. - std::string ChosenToolsetDir; - unsigned ChosenToolsetVersion = 0; - std::error_code EC; - for (llvm::vfs::directory_iterator LI = D.getVFS().dir_begin("/opt/rh", EC), - LE; - !EC && LI != LE; LI = LI.increment(EC)) { - StringRef ToolsetDir = llvm::sys::path::filename(LI->path()); - unsigned ToolsetVersion; - if ((!ToolsetDir.startswith("gcc-toolset-") && - !ToolsetDir.startswith("devtoolset-")) || - ToolsetDir.substr(ToolsetDir.rfind('-') + 1) - .getAsInteger(10, ToolsetVersion)) - continue; - - if (ToolsetVersion > ChosenToolsetVersion) { - ChosenToolsetVersion = ToolsetVersion; - ChosenToolsetDir = "/opt/rh/" + ToolsetDir.str(); - } - } - - if (ChosenToolsetVersion > 0) - Prefixes.push_back(ChosenToolsetDir + "/root/usr"); + // TODO: We may want to remove this, since the functionality + // can be achieved using config files. + Prefixes.push_back("/opt/rh/gcc-toolset-12/root/usr"); + Prefixes.push_back("/opt/rh/gcc-toolset-11/root/usr"); + Prefixes.push_back("/opt/rh/gcc-toolset-10/root/usr"); + Prefixes.push_back("/opt/rh/devtoolset-12/root/usr"); + Prefixes.push_back("/opt/rh/devtoolset-11/root/usr"); + Prefixes.push_back("/opt/rh/devtoolset-10/root/usr"); + Prefixes.push_back("/opt/rh/devtoolset-9/root/usr"); + Prefixes.push_back("/opt/rh/devtoolset-8/root/usr"); + Prefixes.push_back("/opt/rh/devtoolset-7/root/usr"); + Prefixes.push_back("/opt/rh/devtoolset-6/root/usr"); + Prefixes.push_back("/opt/rh/devtoolset-4/root/usr"); + Prefixes.push_back("/opt/rh/devtoolset-3/root/usr"); + Prefixes.push_back("/opt/rh/devtoolset-2/root/usr"); } // Fall back to /usr which is used by most non-Solaris systems. diff --git a/clang/unittests/Driver/ToolChainTest.cpp b/clang/unittests/Driver/ToolChainTest.cpp index 64bc616523f0..c434dfcb3e86 100644 --- a/clang/unittests/Driver/ToolChainTest.cpp +++ b/clang/unittests/Driver/ToolChainTest.cpp @@ -18,7 +18,6 @@ #include "clang/Driver/Driver.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/MC/TargetRegistry.h" -#include "llvm/Support/Host.h" #include "llvm/Support/TargetSelect.h" #include "llvm/Support/VirtualFileSystem.h" #include "llvm/Support/raw_ostream.h" @@ -570,95 +569,4 @@ TEST(DxcModeTest, ValidatorVersionValidation) { Diags.Clear(); DiagConsumer->clear(); } - -TEST(ToolChainTest, Toolsets) { - // Ignore this test on Windows hosts. - llvm::Triple Host(llvm::sys::getProcessTriple()); - if (Host.isOSWindows()) - GTEST_SKIP(); - - IntrusiveRefCntPtr DiagOpts = new DiagnosticOptions(); - IntrusiveRefCntPtr DiagID(new DiagnosticIDs()); - - // Check (newer) GCC toolset installation. - { - IntrusiveRefCntPtr InMemoryFileSystem( - new llvm::vfs::InMemoryFileSystem); - - // These should be ignored. - InMemoryFileSystem->addFile("/opt/rh/gcc-toolset-2", 0, - llvm::MemoryBuffer::getMemBuffer("\n")); - InMemoryFileSystem->addFile("/opt/rh/gcc-toolset-", 0, - llvm::MemoryBuffer::getMemBuffer("\n")); - InMemoryFileSystem->addFile("/opt/rh/gcc-toolset--", 0, - llvm::MemoryBuffer::getMemBuffer("\n")); - InMemoryFileSystem->addFile("/opt/rh/gcc-toolset--1", 0, - llvm::MemoryBuffer::getMemBuffer("\n")); - - // File needed for GCC installation detection. - InMemoryFileSystem->addFile("/opt/rh/gcc-toolset-12/root/usr/lib/gcc/" - "x86_64-redhat-linux/11/crtbegin.o", - 0, llvm::MemoryBuffer::getMemBuffer("\n")); - - DiagnosticsEngine Diags(DiagID, &*DiagOpts, new SimpleDiagnosticConsumer); - Driver TheDriver("/bin/clang", "x86_64-redhat-linux", Diags, - "clang LLVM compiler", InMemoryFileSystem); - std::unique_ptr C( - TheDriver.BuildCompilation({"clang", "--gcc-toolchain="})); - ASSERT_TRUE(C); - std::string S; - { - llvm::raw_string_ostream OS(S); - C->getDefaultToolChain().printVerboseInfo(OS); - } - EXPECT_EQ("Found candidate GCC installation: " - "/opt/rh/gcc-toolset-12/root/usr/lib/gcc/x86_64-redhat-linux/11\n" - "Selected GCC installation: " - "/opt/rh/gcc-toolset-12/root/usr/lib/gcc/x86_64-redhat-linux/11\n" - "Candidate multilib: .;@m64\n" - "Selected multilib: .;@m64\n", - S); - } - - // And older devtoolset. - { - IntrusiveRefCntPtr InMemoryFileSystem( - new llvm::vfs::InMemoryFileSystem); - - // These should be ignored. - InMemoryFileSystem->addFile("/opt/rh/devtoolset-2", 0, - llvm::MemoryBuffer::getMemBuffer("\n")); - InMemoryFileSystem->addFile("/opt/rh/devtoolset-", 0, - llvm::MemoryBuffer::getMemBuffer("\n")); - InMemoryFileSystem->addFile("/opt/rh/devtoolset--", 0, - llvm::MemoryBuffer::getMemBuffer("\n")); - InMemoryFileSystem->addFile("/opt/rh/devtoolset--1", 0, - llvm::MemoryBuffer::getMemBuffer("\n")); - - // File needed for GCC installation detection. - InMemoryFileSystem->addFile("/opt/rh/devtoolset-12/root/usr/lib/gcc/" - "x86_64-redhat-linux/11/crtbegin.o", - 0, llvm::MemoryBuffer::getMemBuffer("\n")); - - DiagnosticsEngine Diags(DiagID, &*DiagOpts, new SimpleDiagnosticConsumer); - Driver TheDriver("/bin/clang", "x86_64-redhat-linux", Diags, - "clang LLVM compiler", InMemoryFileSystem); - std::unique_ptr C( - TheDriver.BuildCompilation({"clang", "--gcc-toolchain="})); - ASSERT_TRUE(C); - std::string S; - { - llvm::raw_string_ostream OS(S); - C->getDefaultToolChain().printVerboseInfo(OS); - } - EXPECT_EQ("Found candidate GCC installation: " - "/opt/rh/devtoolset-12/root/usr/lib/gcc/x86_64-redhat-linux/11\n" - "Selected GCC installation: " - "/opt/rh/devtoolset-12/root/usr/lib/gcc/x86_64-redhat-linux/11\n" - "Candidate multilib: .;@m64\n" - "Selected multilib: .;@m64\n", - S); - } -} - } // end anonymous namespace. -- Gitee From dd711a9391226189476f23f793fb98e244d52a4b Mon Sep 17 00:00:00 2001 From: Jez Ng Date: Tue, 11 Oct 2022 23:50:46 -0400 Subject: [PATCH 11/41] [lld-macho] Canonicalize personality pointers in EH frames We already do this for personality pointers referenced from compact unwind entries; this patch extends that behavior to personalities referenced via EH frames as well. This reduces the number of distinct personalities we need in the final binary, and helps us avoid hitting the "too many personalities" error. I renamed `UnwindInfoSection::prepareRelocations()` to simply `prepare` since we now do some non-reloc-specific stuff within. Fixes #58277. Reviewed By: #lld-macho, oontvoo Differential Revision: https://reviews.llvm.org/D135728 (cherry picked from commit 7b45dfc6811a52ff4e9a6054dc276d70d77fddaf) --- lld/MachO/UnwindInfoSection.cpp | 35 ++++++++++++++--- lld/MachO/UnwindInfoSection.h | 2 +- lld/MachO/Writer.cpp | 2 +- lld/test/MachO/eh-frame-personality-dedup.s | 43 +++++++++++++++++++++ 4 files changed, 75 insertions(+), 7 deletions(-) create mode 100644 lld/test/MachO/eh-frame-personality-dedup.s diff --git a/lld/MachO/UnwindInfoSection.cpp b/lld/MachO/UnwindInfoSection.cpp index ca6cbdfbb8bb..8f267251b7c0 100644 --- a/lld/MachO/UnwindInfoSection.cpp +++ b/lld/MachO/UnwindInfoSection.cpp @@ -158,7 +158,7 @@ class UnwindInfoSectionImpl final : public UnwindInfoSection { public: UnwindInfoSectionImpl() : cuOffsets(target->wordSize) {} uint64_t getSize() const override { return unwindInfoSize; } - void prepareRelocations() override; + void prepare() override; void finalize() override; void writeTo(uint8_t *buf) const override; @@ -166,6 +166,7 @@ private: void prepareRelocations(ConcatInputSection *); void relocateCompactUnwind(std::vector &); void encodePersonalities(); + Symbol *canonicalizePersonality(Symbol *); uint64_t unwindInfoSize = 0; std::vector symbolsVec; @@ -218,14 +219,24 @@ void UnwindInfoSection::addSymbol(const Defined *d) { } } -void UnwindInfoSectionImpl::prepareRelocations() { +void UnwindInfoSectionImpl::prepare() { // This iteration needs to be deterministic, since prepareRelocations may add // entries to the GOT. Hence the use of a MapVector for // UnwindInfoSection::symbols. for (const Defined *d : make_second_range(symbols)) - if (d->unwindEntry && - d->unwindEntry->getName() == section_names::compactUnwind) - prepareRelocations(d->unwindEntry); + if (d->unwindEntry) { + if (d->unwindEntry->getName() == section_names::compactUnwind) { + prepareRelocations(d->unwindEntry); + } else { + // We don't have to add entries to the GOT here because FDEs have + // explicit GOT relocations, so Writer::scanRelocations() will add those + // GOT entries. However, we still need to canonicalize the personality + // pointers (like prepareRelocations() does for CU entries) in order + // to avoid overflowing the 3-personality limit. + FDE &fde = cast(d->getFile())->fdes[d->unwindEntry]; + fde.personality = canonicalizePersonality(fde.personality); + } + } } // Compact unwind relocations have different semantics, so we handle them in a @@ -279,6 +290,7 @@ void UnwindInfoSectionImpl::prepareRelocations(ConcatInputSection *isec) { continue; } + // Similar to canonicalizePersonality(), but we also register a GOT entry. if (auto *defined = dyn_cast(s)) { // Check if we have created a synthetic symbol at the same address. Symbol *&personality = @@ -291,6 +303,7 @@ void UnwindInfoSectionImpl::prepareRelocations(ConcatInputSection *isec) { } continue; } + assert(isa(s)); in.got->addEntry(s); continue; @@ -320,6 +333,18 @@ void UnwindInfoSectionImpl::prepareRelocations(ConcatInputSection *isec) { } } +Symbol *UnwindInfoSectionImpl::canonicalizePersonality(Symbol *personality) { + if (auto *defined = dyn_cast_or_null(personality)) { + // Check if we have created a synthetic symbol at the same address. + Symbol *&synth = personalityTable[{defined->isec, defined->value}]; + if (synth == nullptr) + synth = defined; + else if (synth != defined) + return synth; + } + return personality; +} + // We need to apply the relocations to the pre-link compact unwind section // before converting it to post-link form. There should only be absolute // relocations here: since we are not emitting the pre-link CU section, there diff --git a/lld/MachO/UnwindInfoSection.h b/lld/MachO/UnwindInfoSection.h index c6b334731c75..f2bc3213a127 100644 --- a/lld/MachO/UnwindInfoSection.h +++ b/lld/MachO/UnwindInfoSection.h @@ -24,7 +24,7 @@ public: // section entirely. bool isNeeded() const override { return !allEntriesAreOmitted; } void addSymbol(const Defined *); - virtual void prepareRelocations() = 0; + virtual void prepare() = 0; protected: UnwindInfoSection(); diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp index 3c44a60f4be2..ce9672dd0b4f 100644 --- a/lld/MachO/Writer.cpp +++ b/lld/MachO/Writer.cpp @@ -675,7 +675,7 @@ void Writer::scanRelocations() { } } - in.unwindInfo->prepareRelocations(); + in.unwindInfo->prepare(); } void Writer::scanSymbols() { diff --git a/lld/test/MachO/eh-frame-personality-dedup.s b/lld/test/MachO/eh-frame-personality-dedup.s new file mode 100644 index 000000000000..b14ddb23465d --- /dev/null +++ b/lld/test/MachO/eh-frame-personality-dedup.s @@ -0,0 +1,43 @@ +# REQUIRES: x86 +# RUN: rm -rf %t; split-file %s %t +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin19.0.0 %t/eh-frame.s -o %t/eh-frame.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin19.0.0 %t/cu.s -o %t/cu.o +# RUN: %lld -dylib %t/cu.o %t/eh-frame.o -o %t/out + +## Sanity check: we want our input to contain a section (and not symbol) +## relocation for the personality reference. +# RUN: llvm-readobj --relocations %t/cu.o | FileCheck %s --check-prefix=SECT-RELOC +# SECT-RELOC: Section __compact_unwind { +# SECT-RELOC-NEXT: __text +# SECT-RELOC-NEXT: __text +# SECT-RELOC-NEXT: } + +## Verify that the personality referenced via a symbol reloc in eh-frame.s gets +## dedup'ed with the personality referenced via a section reloc in cu.s. +# RUN: llvm-objdump --macho --unwind-info %t/out | FileCheck %s +# CHECK: Personality functions: (count = 1) + +#--- eh-frame.s +_fun: + .cfi_startproc + .cfi_personality 155, _my_personality + ## cfi_escape cannot be encoded in compact unwind + .cfi_escape 0 + ret + .cfi_endproc + +.subsections_via_symbols + +#--- cu.s +.globl _my_personality +_fun: + .cfi_startproc + .cfi_personality 155, _my_personality + .cfi_def_cfa_offset 16 + ret + .cfi_endproc + +_my_personality: + nop + +.subsections_via_symbols -- Gitee From 9d46557baa84ab4342707c119cf9c77c9148230e Mon Sep 17 00:00:00 2001 From: Guillaume Chatelet Date: Wed, 26 Oct 2022 09:34:34 +0000 Subject: [PATCH 12/41] Take memset_inline into account in analyzeLoadFromClobberingMemInst This appeared in https://reviews.llvm.org/D126903#3884061 Differential Revision: https://reviews.llvm.org/D136752 (cherry picked from commit 1a726cfa83667585dd87a9955ed5e331cad45d18) --- llvm/lib/Transforms/Utils/VNCoercion.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/Utils/VNCoercion.cpp b/llvm/lib/Transforms/Utils/VNCoercion.cpp index 42be67f3cfc0..264da2187754 100644 --- a/llvm/lib/Transforms/Utils/VNCoercion.cpp +++ b/llvm/lib/Transforms/Utils/VNCoercion.cpp @@ -356,9 +356,9 @@ int analyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr, // If this is memset, we just need to see if the offset is valid in the size // of the memset.. - if (MI->getIntrinsicID() == Intrinsic::memset) { + if (const auto *memset_inst = dyn_cast(MI)) { if (DL.isNonIntegralPointerType(LoadTy->getScalarType())) { - auto *CI = dyn_cast(cast(MI)->getValue()); + auto *CI = dyn_cast(memset_inst->getValue()); if (!CI || !CI->isZero()) return -1; } -- Gitee From 08bd84e8a6358eb412fcef279f8875e2d69a3374 Mon Sep 17 00:00:00 2001 From: Koakuma Date: Tue, 18 Oct 2022 00:01:55 +0000 Subject: [PATCH 13/41] [SPARC] Make calls to function with big return values work Implement CanLowerReturn and associated CallingConv changes for SPARC/SPARC64. In particular, for SPARC64 there's new `RetCC_Sparc64_*` functions that handles the return case of the calling convention. It uses the same analysis as `CC_Sparc64_*` family of funtions, but fails if the return value doesn't fit into the return registers. This makes calls to functions with big return values converted to an sret function as expected, instead of crashing LLVM. Reviewed By: MaskRay Differential Revision: https://reviews.llvm.org/D132465 (cherry picked from commit d3fcbee10d893b9e01e563c3840414ba89283484) --- .../SelectionDAG/SelectionDAGBuilder.cpp | 1 + llvm/lib/Target/Sparc/SparcCallingConv.td | 10 +- llvm/lib/Target/Sparc/SparcISelLowering.cpp | 61 ++++- llvm/lib/Target/Sparc/SparcISelLowering.h | 5 + llvm/test/CodeGen/SPARC/64abi.ll | 27 -- llvm/test/CodeGen/SPARC/bigreturn.ll | 254 ++++++++++++++++++ 6 files changed, 322 insertions(+), 36 deletions(-) create mode 100644 llvm/test/CodeGen/SPARC/bigreturn.ll diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 35650b9bd00e..ecdaef0442da 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -9693,6 +9693,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { Entry.Alignment = Alignment; CLI.getArgs().insert(CLI.getArgs().begin(), Entry); CLI.NumFixedArgs += 1; + CLI.getArgs()[0].IndirectType = CLI.RetTy; CLI.RetTy = Type::getVoidTy(CLI.RetTy->getContext()); // sret demotion isn't compatible with tail-calls, since the sret argument diff --git a/llvm/lib/Target/Sparc/SparcCallingConv.td b/llvm/lib/Target/Sparc/SparcCallingConv.td index e6d23f741ea5..8afd0a7fc09a 100644 --- a/llvm/lib/Target/Sparc/SparcCallingConv.td +++ b/llvm/lib/Target/Sparc/SparcCallingConv.td @@ -125,10 +125,14 @@ def CC_Sparc64 : CallingConv<[ def RetCC_Sparc64 : CallingConv<[ // A single f32 return value always goes in %f0. The ABI doesn't specify what // happens to multiple f32 return values outside a struct. - CCIfType<[f32], CCCustom<"CC_Sparc64_Half">>, + CCIfType<[f32], CCCustom<"RetCC_Sparc64_Half">>, - // Otherwise, return values are passed exactly like arguments. - CCDelegateTo + // Otherwise, return values are passed exactly like arguments, except that + // returns that are too big to fit into the registers is passed as an sret + // instead. + CCIfInReg>>, + CCIfType<[i32], CCPromoteToType>, + CCCustom<"RetCC_Sparc64_Full"> ]>; // Callee-saved registers are handled by the register window mechanism. diff --git a/llvm/lib/Target/Sparc/SparcISelLowering.cpp b/llvm/lib/Target/Sparc/SparcISelLowering.cpp index 2cb74e7709c7..f55675089102 100644 --- a/llvm/lib/Target/Sparc/SparcISelLowering.cpp +++ b/llvm/lib/Target/Sparc/SparcISelLowering.cpp @@ -101,9 +101,9 @@ static bool CC_Sparc_Assign_Ret_Split_64(unsigned &ValNo, MVT &ValVT, } // Allocate a full-sized argument for the 64-bit ABI. -static bool CC_Sparc64_Full(unsigned &ValNo, MVT &ValVT, - MVT &LocVT, CCValAssign::LocInfo &LocInfo, - ISD::ArgFlagsTy &ArgFlags, CCState &State) { +static bool Analyze_CC_Sparc64_Full(bool IsReturn, unsigned &ValNo, MVT &ValVT, + MVT &LocVT, CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, CCState &State) { assert((LocVT == MVT::f32 || LocVT == MVT::f128 || LocVT.getSizeInBits() == 64) && "Can't handle non-64 bits locations"); @@ -133,6 +133,11 @@ static bool CC_Sparc64_Full(unsigned &ValNo, MVT &ValVT, return true; } + // Bail out if this is a return CC and we run out of registers to place + // values into. + if (IsReturn) + return false; + // This argument goes on the stack in an 8-byte slot. // When passing floats, LocVT is smaller than 8 bytes. Adjust the offset to // the right-aligned float. The first 4 bytes of the stack slot are undefined. @@ -146,9 +151,9 @@ static bool CC_Sparc64_Full(unsigned &ValNo, MVT &ValVT, // Allocate a half-sized argument for the 64-bit ABI. // // This is used when passing { float, int } structs by value in registers. -static bool CC_Sparc64_Half(unsigned &ValNo, MVT &ValVT, - MVT &LocVT, CCValAssign::LocInfo &LocInfo, - ISD::ArgFlagsTy &ArgFlags, CCState &State) { +static bool Analyze_CC_Sparc64_Half(bool IsReturn, unsigned &ValNo, MVT &ValVT, + MVT &LocVT, CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, CCState &State) { assert(LocVT.getSizeInBits() == 32 && "Can't handle non-32 bits locations"); unsigned Offset = State.AllocateStack(4, Align(4)); @@ -174,10 +179,43 @@ static bool CC_Sparc64_Half(unsigned &ValNo, MVT &ValVT, return true; } + // Bail out if this is a return CC and we run out of registers to place + // values into. + if (IsReturn) + return false; + State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); return true; } +static bool CC_Sparc64_Full(unsigned &ValNo, MVT &ValVT, MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, CCState &State) { + return Analyze_CC_Sparc64_Full(false, ValNo, ValVT, LocVT, LocInfo, ArgFlags, + State); +} + +static bool CC_Sparc64_Half(unsigned &ValNo, MVT &ValVT, MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, CCState &State) { + return Analyze_CC_Sparc64_Half(false, ValNo, ValVT, LocVT, LocInfo, ArgFlags, + State); +} + +static bool RetCC_Sparc64_Full(unsigned &ValNo, MVT &ValVT, MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, CCState &State) { + return Analyze_CC_Sparc64_Full(true, ValNo, ValVT, LocVT, LocInfo, ArgFlags, + State); +} + +static bool RetCC_Sparc64_Half(unsigned &ValNo, MVT &ValVT, MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, CCState &State) { + return Analyze_CC_Sparc64_Half(true, ValNo, ValVT, LocVT, LocInfo, ArgFlags, + State); +} + #include "SparcGenCallingConv.inc" // The calling conventions in SparcCallingConv.td are described in terms of the @@ -191,6 +229,15 @@ static unsigned toCallerWindow(unsigned Reg) { return Reg; } +bool SparcTargetLowering::CanLowerReturn( + CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg, + const SmallVectorImpl &Outs, LLVMContext &Context) const { + SmallVector RVLocs; + CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context); + return CCInfo.CheckReturn(Outs, Subtarget->is64Bit() ? RetCC_Sparc64 + : RetCC_Sparc32); +} + SDValue SparcTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, @@ -1031,6 +1078,7 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI, // Copy all of the result registers out of their specified physreg. for (unsigned i = 0; i != RVLocs.size(); ++i) { + assert(RVLocs[i].isRegLoc() && "Can only return in registers!"); if (RVLocs[i].getLocVT() == MVT::v2i32) { SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2i32); SDValue Lo = DAG.getCopyFromReg( @@ -1346,6 +1394,7 @@ SparcTargetLowering::LowerCall_64(TargetLowering::CallLoweringInfo &CLI, // Copy all of the result registers out of their specified physreg. for (unsigned i = 0; i != RVLocs.size(); ++i) { CCValAssign &VA = RVLocs[i]; + assert(VA.isRegLoc() && "Can only return in registers!"); unsigned Reg = toCallerWindow(VA.getLocReg()); // When returning 'inreg {i32, i32 }', two consecutive i32 arguments can diff --git a/llvm/lib/Target/Sparc/SparcISelLowering.h b/llvm/lib/Target/Sparc/SparcISelLowering.h index 2768bb20566a..16e4f2687054 100644 --- a/llvm/lib/Target/Sparc/SparcISelLowering.h +++ b/llvm/lib/Target/Sparc/SparcISelLowering.h @@ -144,6 +144,11 @@ namespace llvm { SDValue LowerCall_64(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl &InVals) const; + bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, + bool isVarArg, + const SmallVectorImpl &Outs, + LLVMContext &Context) const override; + SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Outs, const SmallVectorImpl &OutVals, diff --git a/llvm/test/CodeGen/SPARC/64abi.ll b/llvm/test/CodeGen/SPARC/64abi.ll index 6b181d8b3432..27865f718151 100644 --- a/llvm/test/CodeGen/SPARC/64abi.ll +++ b/llvm/test/CodeGen/SPARC/64abi.ll @@ -293,33 +293,6 @@ define void @call_inreg_ii(i32* %p, i32 %i1, i32 %i2) { ret void } -; Structs up to 32 bytes in size can be returned in registers. -; CHECK-LABEL: ret_i64_pair: -; CHECK: ldx [%i2], %i0 -; CHECK: ldx [%i3], %i1 -define { i64, i64 } @ret_i64_pair(i32 %a0, i32 %a1, i64* %p, i64* %q) { - %r1 = load i64, i64* %p - %rv1 = insertvalue { i64, i64 } undef, i64 %r1, 0 - store i64 0, i64* %p - %r2 = load i64, i64* %q - %rv2 = insertvalue { i64, i64 } %rv1, i64 %r2, 1 - ret { i64, i64 } %rv2 -} - -; CHECK-LABEL: call_ret_i64_pair: -; CHECK: call ret_i64_pair -; CHECK: stx %o0, [%i0] -; CHECK: stx %o1, [%i0] -define void @call_ret_i64_pair(i64* %i0) { - %rv = call { i64, i64 } @ret_i64_pair(i32 undef, i32 undef, - i64* undef, i64* undef) - %e0 = extractvalue { i64, i64 } %rv, 0 - store volatile i64 %e0, i64* %i0 - %e1 = extractvalue { i64, i64 } %rv, 1 - store i64 %e1, i64* %i0 - ret void -} - ; This is not a C struct, the i32 member uses 8 bytes, but the float only 4. ; CHECK-LABEL: ret_i32_float_pair: ; CHECK: ld [%i2], %i0 diff --git a/llvm/test/CodeGen/SPARC/bigreturn.ll b/llvm/test/CodeGen/SPARC/bigreturn.ll new file mode 100644 index 000000000000..25b4eeecadc0 --- /dev/null +++ b/llvm/test/CodeGen/SPARC/bigreturn.ll @@ -0,0 +1,254 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=sparc -disable-sparc-delay-filler -disable-sparc-leaf-proc | FileCheck --check-prefix=SPARC %s +; RUN: llc < %s -mtriple=sparc64 -disable-sparc-delay-filler -disable-sparc-leaf-proc | FileCheck --check-prefix=SPARC64 %s + +;; Structs up to six registers in size can be returned in registers. +;; Note that the maximum return size and member placement is NOT +;; compatible with the C ABI - see SparcCallingConv.td. +define { i32, i32 } @ret_i32_pair(i32 %a0, i32 %a1, i32* %p, i32* %q) { +; SPARC-LABEL: ret_i32_pair: +; SPARC: .cfi_startproc +; SPARC-NEXT: ! %bb.0: +; SPARC-NEXT: save %sp, -96, %sp +; SPARC-NEXT: .cfi_def_cfa_register %fp +; SPARC-NEXT: .cfi_window_save +; SPARC-NEXT: .cfi_register %o7, %i7 +; SPARC-NEXT: ld [%i2], %i0 +; SPARC-NEXT: st %g0, [%i2] +; SPARC-NEXT: ld [%i3], %i1 +; SPARC-NEXT: restore +; SPARC-NEXT: retl +; SPARC-NEXT: nop +; +; SPARC64-LABEL: ret_i32_pair: +; SPARC64: .cfi_startproc +; SPARC64-NEXT: ! %bb.0: +; SPARC64-NEXT: save %sp, -128, %sp +; SPARC64-NEXT: .cfi_def_cfa_register %fp +; SPARC64-NEXT: .cfi_window_save +; SPARC64-NEXT: .cfi_register %o7, %i7 +; SPARC64-NEXT: ld [%i2], %i0 +; SPARC64-NEXT: st %g0, [%i2] +; SPARC64-NEXT: ld [%i3], %i1 +; SPARC64-NEXT: restore +; SPARC64-NEXT: retl +; SPARC64-NEXT: nop + %r1 = load i32, i32* %p + %rv1 = insertvalue { i32, i32 } undef, i32 %r1, 0 + store i32 0, i32* %p + %r2 = load i32, i32* %q + %rv2 = insertvalue { i32, i32 } %rv1, i32 %r2, 1 + ret { i32, i32 } %rv2 +} + +define void @call_ret_i32_pair(i32* %i0) { +; SPARC-LABEL: call_ret_i32_pair: +; SPARC: .cfi_startproc +; SPARC-NEXT: ! %bb.0: +; SPARC-NEXT: save %sp, -96, %sp +; SPARC-NEXT: .cfi_def_cfa_register %fp +; SPARC-NEXT: .cfi_window_save +; SPARC-NEXT: .cfi_register %o7, %i7 +; SPARC-NEXT: call ret_i32_pair +; SPARC-NEXT: nop +; SPARC-NEXT: st %o0, [%i0] +; SPARC-NEXT: st %o1, [%i0] +; SPARC-NEXT: restore +; SPARC-NEXT: retl +; SPARC-NEXT: nop +; +; SPARC64-LABEL: call_ret_i32_pair: +; SPARC64: .cfi_startproc +; SPARC64-NEXT: ! %bb.0: +; SPARC64-NEXT: save %sp, -176, %sp +; SPARC64-NEXT: .cfi_def_cfa_register %fp +; SPARC64-NEXT: .cfi_window_save +; SPARC64-NEXT: .cfi_register %o7, %i7 +; SPARC64-NEXT: call ret_i32_pair +; SPARC64-NEXT: nop +; SPARC64-NEXT: st %o0, [%i0] +; SPARC64-NEXT: st %o1, [%i0] +; SPARC64-NEXT: restore +; SPARC64-NEXT: retl +; SPARC64-NEXT: nop + %rv = call { i32, i32 } @ret_i32_pair(i32 undef, i32 undef, + i32* undef, i32* undef) + %e0 = extractvalue { i32, i32 } %rv, 0 + store volatile i32 %e0, i32* %i0 + %e1 = extractvalue { i32, i32 } %rv, 1 + store i32 %e1, i32* %i0 + ret void +} + +;; Functions returning structs more than six registers' worth of space +;; should be automatically treated as an sret function. +declare { [16 x i32] } @ret_i32_arr(i32 %input) + +define i32 @call_ret_i32_arr(i32 %0) { +; SPARC-LABEL: call_ret_i32_arr: +; SPARC: .cfi_startproc +; SPARC-NEXT: ! %bb.0: +; SPARC-NEXT: save %sp, -160, %sp +; SPARC-NEXT: .cfi_def_cfa_register %fp +; SPARC-NEXT: .cfi_window_save +; SPARC-NEXT: .cfi_register %o7, %i7 +; SPARC-NEXT: add %fp, -64, %i1 +; SPARC-NEXT: st %i1, [%sp+64] +; SPARC-NEXT: mov %i0, %o0 +; SPARC-NEXT: call ret_i32_arr +; SPARC-NEXT: nop +; SPARC-NEXT: unimp 64 +; SPARC-NEXT: ld [%fp+-4], %i0 +; SPARC-NEXT: restore +; SPARC-NEXT: retl +; SPARC-NEXT: nop +; +; SPARC64-LABEL: call_ret_i32_arr: +; SPARC64: .cfi_startproc +; SPARC64-NEXT: ! %bb.0: +; SPARC64-NEXT: save %sp, -240, %sp +; SPARC64-NEXT: .cfi_def_cfa_register %fp +; SPARC64-NEXT: .cfi_window_save +; SPARC64-NEXT: .cfi_register %o7, %i7 +; SPARC64-NEXT: add %fp, 1983, %o0 +; SPARC64-NEXT: mov %i0, %o1 +; SPARC64-NEXT: call ret_i32_arr +; SPARC64-NEXT: nop +; SPARC64-NEXT: ld [%fp+2043], %i0 +; SPARC64-NEXT: restore +; SPARC64-NEXT: retl +; SPARC64-NEXT: nop + %2 = call { [16 x i32] } @ret_i32_arr(i32 %0) + %3 = extractvalue { [16 x i32] } %2, 0 + %4 = extractvalue [16 x i32] %3, 15 + ret i32 %4 +} + +;; Structs up to six registers in size can be returned in registers. +;; Note that the maximum return size and member placement is NOT +;; compatible with the C ABI - see SparcCallingConv.td. +define { i64, i64 } @ret_i64_pair(i32 %a0, i32 %a1, i64* %p, i64* %q) { +; SPARC-LABEL: ret_i64_pair: +; SPARC: .cfi_startproc +; SPARC-NEXT: ! %bb.0: +; SPARC-NEXT: save %sp, -96, %sp +; SPARC-NEXT: .cfi_def_cfa_register %fp +; SPARC-NEXT: .cfi_window_save +; SPARC-NEXT: .cfi_register %o7, %i7 +; SPARC-NEXT: mov %g0, %i4 +; SPARC-NEXT: ldd [%i2], %i0 +; SPARC-NEXT: mov %i4, %i5 +; SPARC-NEXT: std %i4, [%i2] +; SPARC-NEXT: ldd [%i3], %i2 +; SPARC-NEXT: restore +; SPARC-NEXT: retl +; SPARC-NEXT: nop +; +; SPARC64-LABEL: ret_i64_pair: +; SPARC64: .cfi_startproc +; SPARC64-NEXT: ! %bb.0: +; SPARC64-NEXT: save %sp, -128, %sp +; SPARC64-NEXT: .cfi_def_cfa_register %fp +; SPARC64-NEXT: .cfi_window_save +; SPARC64-NEXT: .cfi_register %o7, %i7 +; SPARC64-NEXT: ldx [%i2], %i0 +; SPARC64-NEXT: stx %g0, [%i2] +; SPARC64-NEXT: ldx [%i3], %i1 +; SPARC64-NEXT: restore +; SPARC64-NEXT: retl +; SPARC64-NEXT: nop + %r1 = load i64, i64* %p + %rv1 = insertvalue { i64, i64 } undef, i64 %r1, 0 + store i64 0, i64* %p + %r2 = load i64, i64* %q + %rv2 = insertvalue { i64, i64 } %rv1, i64 %r2, 1 + ret { i64, i64 } %rv2 +} + +define void @call_ret_i64_pair(i64* %i0) { +; SPARC-LABEL: call_ret_i64_pair: +; SPARC: .cfi_startproc +; SPARC-NEXT: ! %bb.0: +; SPARC-NEXT: save %sp, -96, %sp +; SPARC-NEXT: .cfi_def_cfa_register %fp +; SPARC-NEXT: .cfi_window_save +; SPARC-NEXT: .cfi_register %o7, %i7 +; SPARC-NEXT: call ret_i64_pair +; SPARC-NEXT: nop +; SPARC-NEXT: ! kill: def $o0 killed $o0 killed $o0_o1 def $o0_o1 +; SPARC-NEXT: ! kill: def $o2 killed $o2 killed $o2_o3 def $o2_o3 +; SPARC-NEXT: ! kill: def $o1 killed $o1 killed $o0_o1 def $o0_o1 +; SPARC-NEXT: std %o0, [%i0] +; SPARC-NEXT: ! kill: def $o3 killed $o3 killed $o2_o3 def $o2_o3 +; SPARC-NEXT: std %o2, [%i0] +; SPARC-NEXT: restore +; SPARC-NEXT: retl +; SPARC-NEXT: nop +; +; SPARC64-LABEL: call_ret_i64_pair: +; SPARC64: .cfi_startproc +; SPARC64-NEXT: ! %bb.0: +; SPARC64-NEXT: save %sp, -176, %sp +; SPARC64-NEXT: .cfi_def_cfa_register %fp +; SPARC64-NEXT: .cfi_window_save +; SPARC64-NEXT: .cfi_register %o7, %i7 +; SPARC64-NEXT: call ret_i64_pair +; SPARC64-NEXT: nop +; SPARC64-NEXT: stx %o0, [%i0] +; SPARC64-NEXT: stx %o1, [%i0] +; SPARC64-NEXT: restore +; SPARC64-NEXT: retl +; SPARC64-NEXT: nop + %rv = call { i64, i64 } @ret_i64_pair(i32 undef, i32 undef, + i64* undef, i64* undef) + %e0 = extractvalue { i64, i64 } %rv, 0 + store volatile i64 %e0, i64* %i0 + %e1 = extractvalue { i64, i64 } %rv, 1 + store i64 %e1, i64* %i0 + ret void +} + +;; Functions returning structs more than six registers' worth of space +;; should be automatically treated as an sret function. +declare { [16 x i64] } @ret_i64_arr(i64 %input) + +define i64 @call_ret_i64_arr(i64 %0) { +; SPARC-LABEL: call_ret_i64_arr: +; SPARC: .cfi_startproc +; SPARC-NEXT: ! %bb.0: +; SPARC-NEXT: save %sp, -224, %sp +; SPARC-NEXT: .cfi_def_cfa_register %fp +; SPARC-NEXT: .cfi_window_save +; SPARC-NEXT: .cfi_register %o7, %i7 +; SPARC-NEXT: add %fp, -128, %i2 +; SPARC-NEXT: st %i2, [%sp+64] +; SPARC-NEXT: mov %i0, %o0 +; SPARC-NEXT: mov %i1, %o1 +; SPARC-NEXT: call ret_i64_arr +; SPARC-NEXT: nop +; SPARC-NEXT: unimp 128 +; SPARC-NEXT: ldd [%fp+-8], %i0 +; SPARC-NEXT: restore +; SPARC-NEXT: retl +; SPARC-NEXT: nop +; +; SPARC64-LABEL: call_ret_i64_arr: +; SPARC64: .cfi_startproc +; SPARC64-NEXT: ! %bb.0: +; SPARC64-NEXT: save %sp, -304, %sp +; SPARC64-NEXT: .cfi_def_cfa_register %fp +; SPARC64-NEXT: .cfi_window_save +; SPARC64-NEXT: .cfi_register %o7, %i7 +; SPARC64-NEXT: add %fp, 1919, %o0 +; SPARC64-NEXT: mov %i0, %o1 +; SPARC64-NEXT: call ret_i64_arr +; SPARC64-NEXT: nop +; SPARC64-NEXT: ldx [%fp+2039], %i0 +; SPARC64-NEXT: restore +; SPARC64-NEXT: retl +; SPARC64-NEXT: nop + %2 = call { [16 x i64] } @ret_i64_arr(i64 %0) + %3 = extractvalue { [16 x i64] } %2, 0 + %4 = extractvalue [16 x i64] %3, 15 + ret i64 %4 +} -- Gitee From 80a9fc840b1b0c5bdd6509578283af3b02782d48 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Wed, 26 Oct 2022 16:15:02 -0700 Subject: [PATCH 14/41] [clang][Sema] Fix a clang crash with btf_type_tag For the following program, $ cat t.c struct t { int (__attribute__((btf_type_tag("rcu"))) *f)(); int a; }; int foo(struct t *arg) { return arg->a; } Compiling with 'clang -g -O2 -S t.c' will cause a failure like below: clang: /home/yhs/work/llvm-project/clang/lib/Sema/SemaType.cpp:6391: void {anonymous}::DeclaratorLocFiller::VisitParenTypeLoc(clang::ParenTypeLoc): Assertion `Chunk.Kind == DeclaratorChunk::Paren' failed. PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace, preprocessed source, and associated run script. Stack dump: ...... #5 0x00007f89e4280ea5 abort (/lib64/libc.so.6+0x21ea5) #6 0x00007f89e4280d79 _nl_load_domain.cold.0 (/lib64/libc.so.6+0x21d79) #7 0x00007f89e42a6456 (/lib64/libc.so.6+0x47456) #8 0x00000000045c2596 GetTypeSourceInfoForDeclarator((anonymous namespace)::TypeProcessingState&, clang::QualType, clang::TypeSourceInfo*) SemaType.cpp:0:0 #9 0x00000000045ccfa5 GetFullTypeForDeclarator((anonymous namespace)::TypeProcessingState&, clang::QualType, clang::TypeSourceInfo*) SemaType.cpp:0:0 ...... The reason of the failure is due to the mismatch of TypeLoc and D.getTypeObject().Kind. For example, the TypeLoc is BTFTagAttributedType 0x88614e0 'int btf_type_tag(rcu)()' sugar |-ParenType 0x8861480 'int ()' sugar | `-FunctionNoProtoType 0x8861450 'int ()' cdecl | `-BuiltinType 0x87fd500 'int' while corresponding D.getTypeObject().Kind points to DeclaratorChunk::Paren, and this will cause later assertion. To fix the issue, similar to AttributedTypeLoc, let us skip BTFTagAttributedTypeLoc in GetTypeSourceInfoForDeclarator(). Differential Revision: https://reviews.llvm.org/D136807 --- clang/docs/ReleaseNotes.rst | 2 ++ clang/lib/Sema/SemaType.cpp | 3 +++ clang/test/CodeGen/attr-btf_type_tag-func-ptr.c | 15 +++++++++++++++ 3 files changed, 20 insertions(+) create mode 100644 clang/test/CodeGen/attr-btf_type_tag-func-ptr.c diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index a17b033f5eb3..a1ee8f0459ae 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -225,6 +225,8 @@ Bug Fixes - Fix a crash when generating code coverage information for an ``if consteval`` statement. This fixes `Issue 57377 `_. +- Fix a crash when a ``btf_type_tag`` attribute is applied to the pointee of + a function pointer. Improvements to Clang's diagnostics ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp index 3ab5d26a9a75..edcac4d2ee9a 100644 --- a/clang/lib/Sema/SemaType.cpp +++ b/clang/lib/Sema/SemaType.cpp @@ -6443,6 +6443,9 @@ GetTypeSourceInfoForDeclarator(TypeProcessingState &State, CurrTL = TL.getNextTypeLoc().getUnqualifiedLoc(); } + while (BTFTagAttributedTypeLoc TL = CurrTL.getAs()) + CurrTL = TL.getNextTypeLoc().getUnqualifiedLoc(); + while (DependentAddressSpaceTypeLoc TL = CurrTL.getAs()) { fillDependentAddressSpaceTypeLoc(TL, D.getTypeObject(i).getAttrs()); diff --git a/clang/test/CodeGen/attr-btf_type_tag-func-ptr.c b/clang/test/CodeGen/attr-btf_type_tag-func-ptr.c new file mode 100644 index 000000000000..29ca5f58e4b8 --- /dev/null +++ b/clang/test/CodeGen/attr-btf_type_tag-func-ptr.c @@ -0,0 +1,15 @@ +// RUN: %clang_cc1 -triple %itanium_abi_triple -debug-info-kind=limited -S -emit-llvm -o - %s | FileCheck %s + +struct t { + int (__attribute__((btf_type_tag("rcu"))) *f)(); + int a; +}; +int foo(struct t *arg) { + return arg->a; +} + +// CHECK: !DIDerivedType(tag: DW_TAG_member, name: "f" +// CHECK-SAME: baseType: ![[L18:[0-9]+]] +// CHECK: ![[L18]] = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: ![[#]], size: [[#]], annotations: ![[L21:[0-9]+]]) +// CHECK: ![[L21]] = !{![[L22:[0-9]+]]} +// CHECK: ![[L22]] = !{!"btf_type_tag", !"rcu"} -- Gitee From 5c68a1cb123161b54b72ce90e7975d95a8eaf2a4 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 26 Sep 2022 23:07:49 -0400 Subject: [PATCH 15/41] AMDGPU: Make various vector undefs legal Surprisingly these were getting legalized to something zero initialized. This fixes an infinite loop when combining some vector types. Also fixes zero initializing some undef values. SimplifyDemandedVectorElts / SimplifyDemandedBits are not checking for the legality of the output undefs they are replacing unused operations with. This resulted in turning vectors into undefs that were later re-legalized back into zero vectors. (cherry picked from commit 7a84624079a2656c684bed6100708544500c5a32) --- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 2 + llvm/test/CodeGen/AMDGPU/commute-shifts.ll | 16 -- .../AMDGPU/cross-block-use-is-not-abi-copy.ll | 4 +- .../CodeGen/AMDGPU/dagcombine-fma-fmad.ll | 10 +- .../CodeGen/AMDGPU/extract-subvector-16bit.ll | 78 +------ llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll | 14 +- llvm/test/CodeGen/AMDGPU/select-undef.ll | 219 +++++++++++++++++- llvm/test/CodeGen/AMDGPU/skip-if-dead.ll | 113 ++------- llvm/test/CodeGen/AMDGPU/v1024.ll | 2 + .../CodeGen/AMDGPU/vgpr-tuple-allocation.ll | 144 ++---------- llvm/test/CodeGen/AMDGPU/wqm.ll | 162 ++----------- 11 files changed, 306 insertions(+), 458 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index f7d139adc63b..f6b7d1ffc6d2 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -249,6 +249,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, case ISD::STORE: case ISD::BUILD_VECTOR: case ISD::BITCAST: + case ISD::UNDEF: case ISD::EXTRACT_VECTOR_ELT: case ISD::INSERT_VECTOR_ELT: case ISD::EXTRACT_SUBVECTOR: @@ -516,6 +517,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, case ISD::STORE: case ISD::BUILD_VECTOR: case ISD::BITCAST: + case ISD::UNDEF: case ISD::EXTRACT_VECTOR_ELT: case ISD::INSERT_VECTOR_ELT: case ISD::INSERT_SUBVECTOR: diff --git a/llvm/test/CodeGen/AMDGPU/commute-shifts.ll b/llvm/test/CodeGen/AMDGPU/commute-shifts.ll index 8df85ba872bf..3697946cb5c3 100644 --- a/llvm/test/CodeGen/AMDGPU/commute-shifts.ll +++ b/llvm/test/CodeGen/AMDGPU/commute-shifts.ll @@ -5,14 +5,6 @@ define amdgpu_ps float @main(float %arg0, float %arg1) #0 { ; SI-LABEL: main: ; SI: ; %bb.0: ; %bb -; SI-NEXT: s_mov_b32 s0, 0 -; SI-NEXT: s_mov_b32 s1, s0 -; SI-NEXT: s_mov_b32 s2, s0 -; SI-NEXT: s_mov_b32 s3, s0 -; SI-NEXT: s_mov_b32 s4, s0 -; SI-NEXT: s_mov_b32 s5, s0 -; SI-NEXT: s_mov_b32 s6, s0 -; SI-NEXT: s_mov_b32 s7, s0 ; SI-NEXT: image_load v2, v0, s[0:7] dmask:0x1 unorm ; SI-NEXT: v_cvt_i32_f32_e32 v0, v0 ; SI-NEXT: v_and_b32_e32 v0, 7, v0 @@ -26,14 +18,6 @@ define amdgpu_ps float @main(float %arg0, float %arg1) #0 { ; ; VI-LABEL: main: ; VI: ; %bb.0: ; %bb -; VI-NEXT: s_mov_b32 s0, 0 -; VI-NEXT: s_mov_b32 s1, s0 -; VI-NEXT: s_mov_b32 s2, s0 -; VI-NEXT: s_mov_b32 s3, s0 -; VI-NEXT: s_mov_b32 s4, s0 -; VI-NEXT: s_mov_b32 s5, s0 -; VI-NEXT: s_mov_b32 s6, s0 -; VI-NEXT: s_mov_b32 s7, s0 ; VI-NEXT: image_load v2, v0, s[0:7] dmask:0x1 unorm ; VI-NEXT: v_cvt_i32_f32_e32 v0, v0 ; VI-NEXT: v_and_b32_e32 v0, 7, v0 diff --git a/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll b/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll index 29fc098899ee..5d985850446c 100644 --- a/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll +++ b/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll @@ -213,7 +213,7 @@ if.else: ; preds = %entry br label %if.end if.end: ; preds = %if.else, %if.then - %call6.sink = phi <3 x i16> [ %call6, %if.else ], [ undef, %if.then ] + %call6.sink = phi <3 x i16> [ %call6, %if.else ], [ zeroinitializer, %if.then ] store <3 x i16> %call6.sink, <3 x i16> addrspace(1)* undef ret void } @@ -266,7 +266,7 @@ if.else: ; preds = %entry br label %if.end if.end: ; preds = %if.else, %if.then - %call6.sink = phi <3 x half> [ %call6, %if.else ], [ undef, %if.then ] + %call6.sink = phi <3 x half> [ %call6, %if.else ], [ zeroinitializer, %if.then ] store <3 x half> %call6.sink, <3 x half> addrspace(1)* undef ret void } diff --git a/llvm/test/CodeGen/AMDGPU/dagcombine-fma-fmad.ll b/llvm/test/CodeGen/AMDGPU/dagcombine-fma-fmad.ll index 8af7575f03d0..0b629efffbb3 100644 --- a/llvm/test/CodeGen/AMDGPU/dagcombine-fma-fmad.ll +++ b/llvm/test/CodeGen/AMDGPU/dagcombine-fma-fmad.ll @@ -4,16 +4,8 @@ define amdgpu_ps float @_amdgpu_ps_main() #0 { ; GCN-LABEL: _amdgpu_ps_main: ; GCN: ; %bb.0: ; %.entry -; GCN-NEXT: s_mov_b32 s0, 0 -; GCN-NEXT: v_mov_b32_e32 v4, 0 -; GCN-NEXT: s_mov_b32 s1, s0 -; GCN-NEXT: s_mov_b32 s2, s0 -; GCN-NEXT: s_mov_b32 s3, s0 -; GCN-NEXT: s_mov_b32 s4, s0 -; GCN-NEXT: s_mov_b32 s5, s0 -; GCN-NEXT: s_mov_b32 s6, s0 -; GCN-NEXT: s_mov_b32 s7, s0 ; GCN-NEXT: image_sample v[0:1], v[0:1], s[0:7], s[0:3] dmask:0x3 dim:SQ_RSRC_IMG_2D +; GCN-NEXT: v_mov_b32_e32 v4, 0 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_clause 0x1 ; GCN-NEXT: image_sample v2, v[0:1], s[0:7], s[0:3] dmask:0x4 dim:SQ_RSRC_IMG_2D diff --git a/llvm/test/CodeGen/AMDGPU/extract-subvector-16bit.ll b/llvm/test/CodeGen/AMDGPU/extract-subvector-16bit.ll index 6456c87a31fb..cbfd8ec5cb16 100644 --- a/llvm/test/CodeGen/AMDGPU/extract-subvector-16bit.ll +++ b/llvm/test/CodeGen/AMDGPU/extract-subvector-16bit.ll @@ -100,14 +100,7 @@ define <4 x i16> @vec_8xi16_extract_4xi16(<8 x i16> addrspace(1) * %p0, <8 x i16 ; GFX9-NEXT: s_cbranch_execz .LBB0_3 ; GFX9-NEXT: s_branch .LBB0_4 ; GFX9-NEXT: .LBB0_2: -; GFX9-NEXT: s_mov_b32 s8, 0 -; GFX9-NEXT: s_mov_b32 s9, s8 -; GFX9-NEXT: s_mov_b32 s10, s8 -; GFX9-NEXT: s_mov_b32 s11, s8 -; GFX9-NEXT: v_mov_b32_e32 v2, s8 -; GFX9-NEXT: v_mov_b32_e32 v3, s9 -; GFX9-NEXT: v_mov_b32_e32 v4, s10 -; GFX9-NEXT: v_mov_b32_e32 v5, s11 +; GFX9-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: .LBB0_3: ; %T ; GFX9-NEXT: global_load_dwordx4 v[2:5], v[0:1], off glc ; GFX9-NEXT: s_waitcnt vmcnt(0) @@ -244,14 +237,7 @@ define <4 x i16> @vec_8xi16_extract_4xi16_2(<8 x i16> addrspace(1) * %p0, <8 x i ; GFX9-NEXT: s_cbranch_execz .LBB1_3 ; GFX9-NEXT: s_branch .LBB1_4 ; GFX9-NEXT: .LBB1_2: -; GFX9-NEXT: s_mov_b32 s8, 0 -; GFX9-NEXT: s_mov_b32 s9, s8 -; GFX9-NEXT: s_mov_b32 s10, s8 -; GFX9-NEXT: s_mov_b32 s11, s8 -; GFX9-NEXT: v_mov_b32_e32 v2, s8 -; GFX9-NEXT: v_mov_b32_e32 v3, s9 -; GFX9-NEXT: v_mov_b32_e32 v4, s10 -; GFX9-NEXT: v_mov_b32_e32 v5, s11 +; GFX9-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: .LBB1_3: ; %T ; GFX9-NEXT: global_load_dwordx4 v[2:5], v[0:1], off glc ; GFX9-NEXT: s_waitcnt vmcnt(0) @@ -386,14 +372,7 @@ define <4 x half> @vec_8xf16_extract_4xf16(<8 x half> addrspace(1) * %p0, <8 x h ; GFX9-NEXT: s_cbranch_execz .LBB2_3 ; GFX9-NEXT: s_branch .LBB2_4 ; GFX9-NEXT: .LBB2_2: -; GFX9-NEXT: s_mov_b32 s8, 0 -; GFX9-NEXT: s_mov_b32 s9, s8 -; GFX9-NEXT: s_mov_b32 s10, s8 -; GFX9-NEXT: s_mov_b32 s11, s8 -; GFX9-NEXT: v_mov_b32_e32 v2, s8 -; GFX9-NEXT: v_mov_b32_e32 v3, s9 -; GFX9-NEXT: v_mov_b32_e32 v4, s10 -; GFX9-NEXT: v_mov_b32_e32 v5, s11 +; GFX9-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: .LBB2_3: ; %T ; GFX9-NEXT: global_load_dwordx4 v[2:5], v[0:1], off glc ; GFX9-NEXT: s_waitcnt vmcnt(0) @@ -567,22 +546,7 @@ define <4 x i16> @vec_16xi16_extract_4xi16(<16 x i16> addrspace(1) * %p0, <16 x ; GFX9-NEXT: s_cbranch_execz .LBB3_3 ; GFX9-NEXT: s_branch .LBB3_4 ; GFX9-NEXT: .LBB3_2: -; GFX9-NEXT: s_mov_b32 s8, 0 -; GFX9-NEXT: s_mov_b32 s9, s8 -; GFX9-NEXT: s_mov_b32 s10, s8 -; GFX9-NEXT: s_mov_b32 s11, s8 -; GFX9-NEXT: s_mov_b32 s12, s8 -; GFX9-NEXT: s_mov_b32 s13, s8 -; GFX9-NEXT: s_mov_b32 s14, s8 -; GFX9-NEXT: s_mov_b32 s15, s8 -; GFX9-NEXT: v_mov_b32_e32 v4, s8 -; GFX9-NEXT: v_mov_b32_e32 v5, s9 -; GFX9-NEXT: v_mov_b32_e32 v6, s10 -; GFX9-NEXT: v_mov_b32_e32 v7, s11 -; GFX9-NEXT: v_mov_b32_e32 v8, s12 -; GFX9-NEXT: v_mov_b32_e32 v9, s13 -; GFX9-NEXT: v_mov_b32_e32 v10, s14 -; GFX9-NEXT: v_mov_b32_e32 v11, s15 +; GFX9-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11 ; GFX9-NEXT: .LBB3_3: ; %T ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: global_load_dwordx4 v[2:5], v[0:1], off offset:16 glc @@ -759,22 +723,7 @@ define <4 x i16> @vec_16xi16_extract_4xi16_2(<16 x i16> addrspace(1) * %p0, <16 ; GFX9-NEXT: s_cbranch_execz .LBB4_3 ; GFX9-NEXT: s_branch .LBB4_4 ; GFX9-NEXT: .LBB4_2: -; GFX9-NEXT: s_mov_b32 s8, 0 -; GFX9-NEXT: s_mov_b32 s9, s8 -; GFX9-NEXT: s_mov_b32 s10, s8 -; GFX9-NEXT: s_mov_b32 s11, s8 -; GFX9-NEXT: s_mov_b32 s12, s8 -; GFX9-NEXT: s_mov_b32 s13, s8 -; GFX9-NEXT: s_mov_b32 s14, s8 -; GFX9-NEXT: s_mov_b32 s15, s8 -; GFX9-NEXT: v_mov_b32_e32 v4, s8 -; GFX9-NEXT: v_mov_b32_e32 v5, s9 -; GFX9-NEXT: v_mov_b32_e32 v6, s10 -; GFX9-NEXT: v_mov_b32_e32 v7, s11 -; GFX9-NEXT: v_mov_b32_e32 v8, s12 -; GFX9-NEXT: v_mov_b32_e32 v9, s13 -; GFX9-NEXT: v_mov_b32_e32 v10, s14 -; GFX9-NEXT: v_mov_b32_e32 v11, s15 +; GFX9-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11 ; GFX9-NEXT: .LBB4_3: ; %T ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: global_load_dwordx4 v[2:5], v[0:1], off offset:16 glc @@ -949,22 +898,7 @@ define <4 x half> @vec_16xf16_extract_4xf16(<16 x half> addrspace(1) * %p0, <16 ; GFX9-NEXT: s_cbranch_execz .LBB5_3 ; GFX9-NEXT: s_branch .LBB5_4 ; GFX9-NEXT: .LBB5_2: -; GFX9-NEXT: s_mov_b32 s8, 0 -; GFX9-NEXT: s_mov_b32 s9, s8 -; GFX9-NEXT: s_mov_b32 s10, s8 -; GFX9-NEXT: s_mov_b32 s11, s8 -; GFX9-NEXT: s_mov_b32 s12, s8 -; GFX9-NEXT: s_mov_b32 s13, s8 -; GFX9-NEXT: s_mov_b32 s14, s8 -; GFX9-NEXT: s_mov_b32 s15, s8 -; GFX9-NEXT: v_mov_b32_e32 v4, s8 -; GFX9-NEXT: v_mov_b32_e32 v5, s9 -; GFX9-NEXT: v_mov_b32_e32 v6, s10 -; GFX9-NEXT: v_mov_b32_e32 v7, s11 -; GFX9-NEXT: v_mov_b32_e32 v8, s12 -; GFX9-NEXT: v_mov_b32_e32 v9, s13 -; GFX9-NEXT: v_mov_b32_e32 v10, s14 -; GFX9-NEXT: v_mov_b32_e32 v11, s15 +; GFX9-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11 ; GFX9-NEXT: .LBB5_3: ; %T ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: global_load_dwordx4 v[2:5], v[0:1], off offset:16 glc diff --git a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll index cc4ece6c7059..f742d2c0bda4 100644 --- a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll +++ b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll @@ -374,18 +374,10 @@ define <4 x float> @insertelement_to_sgpr() nounwind { ; GCN-LABEL: insertelement_to_sgpr: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: s_load_dwordx4 s[12:15], s[4:5], 0x0 +; GCN-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: s_mov_b32 s12, 0 -; GCN-NEXT: s_mov_b32 s4, s12 -; GCN-NEXT: s_mov_b32 s5, s12 -; GCN-NEXT: s_mov_b32 s6, s12 -; GCN-NEXT: s_mov_b32 s7, s12 -; GCN-NEXT: s_mov_b32 s8, s12 -; GCN-NEXT: s_mov_b32 s9, s12 -; GCN-NEXT: s_mov_b32 s10, s12 -; GCN-NEXT: s_mov_b32 s11, s12 -; GCN-NEXT: image_gather4_lz v[0:3], v[0:1], s[4:11], s[12:15] dmask:0x1 +; GCN-NEXT: s_mov_b32 s4, 0 +; GCN-NEXT: image_gather4_lz v[0:3], v[0:1], s[4:11], s[4:7] dmask:0x1 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 s[30:31] %tmp = load <4 x i32>, <4 x i32> addrspace(4)* undef diff --git a/llvm/test/CodeGen/AMDGPU/select-undef.ll b/llvm/test/CodeGen/AMDGPU/select-undef.ll index 6597d6784e0c..f02cd3fc5e4e 100644 --- a/llvm/test/CodeGen/AMDGPU/select-undef.ll +++ b/llvm/test/CodeGen/AMDGPU/select-undef.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN %s ; GCN-LABEL: {{^}}select_undef_lhs: ; GCN: s_waitcnt @@ -43,3 +43,220 @@ define void @select_undef_n2(float addrspace(1)* %a, i32 %c) { } declare float @llvm.amdgcn.rcp.f32(float) + + +; Make sure the vector undef isn't lowered into 0s. +; GCN-LABEL: {{^}}undef_v6f32: +; GCN-NOT: v_mov_b32_e32 v{{[0-9]+}}, 0 +; GCN-NOT: s_mov_b32 s{{[0-9]+}}, 0 +; GCN: s_cbranch_vccnz +define amdgpu_kernel void @undef_v6f32(<6 x float> addrspace(3)* %ptr, i1 %cond) { +entry: + br label %loop + +loop: + %phi = phi <6 x float> [ undef, %entry ], [ %add, %loop ] + %load = load volatile <6 x float>, <6 x float> addrspace(3)* undef + %add = fadd <6 x float> %load, %phi + br i1 %cond, label %loop, label %ret + +ret: + store volatile <6 x float> %add, <6 x float> addrspace(3)* undef + ret void +} + +; GCN-LABEL: {{^}}undef_v6i32: +; GCN-NOT: v_mov_b32_e32 v{{[0-9]+}}, 0 +; GCN-NOT: s_mov_b32 s{{[0-9]+}}, 0 +; GCN: s_cbranch_vccnz +define amdgpu_kernel void @undef_v6i32(<6 x i32> addrspace(3)* %ptr, i1 %cond) { +entry: + br label %loop + +loop: + %phi = phi <6 x i32> [ undef, %entry ], [ %add, %loop ] + %load = load volatile <6 x i32>, <6 x i32> addrspace(3)* undef + %add = add <6 x i32> %load, %phi + br i1 %cond, label %loop, label %ret + +ret: + store volatile <6 x i32> %add, <6 x i32> addrspace(3)* undef + ret void +} + +; Make sure the vector undef isn't lowered into 0s. +; GCN-LABEL: {{^}}undef_v5f32: +; GCN-NOT: v_mov_b32_e32 v{{[0-9]+}}, 0 +; GCN-NOT: s_mov_b32 s{{[0-9]+}}, 0 +; GCN: s_cbranch_vccnz +define amdgpu_kernel void @undef_v5f32(<5 x float> addrspace(3)* %ptr, i1 %cond) { +entry: + br label %loop + +loop: + %phi = phi <5 x float> [ undef, %entry ], [ %add, %loop ] + %load = load volatile <5 x float>, <5 x float> addrspace(3)* undef + %add = fadd <5 x float> %load, %phi + br i1 %cond, label %loop, label %ret + +ret: + store volatile <5 x float> %add, <5 x float> addrspace(3)* undef + ret void +} + +; GCN-LABEL: {{^}}undef_v5i32: +; GCN-NOT: v_mov_b32_e32 v{{[0-9]+}}, 0 +; GCN-NOT: s_mov_b32 s{{[0-9]+}}, 0 +; GCN: s_cbranch_vccnz +define amdgpu_kernel void @undef_v5i32(<5 x i32> addrspace(3)* %ptr, i1 %cond) { +entry: + br label %loop + +loop: + %phi = phi <5 x i32> [ undef, %entry ], [ %add, %loop ] + %load = load volatile <5 x i32>, <5 x i32> addrspace(3)* undef + %add = add <5 x i32> %load, %phi + br i1 %cond, label %loop, label %ret + +ret: + store volatile <5 x i32> %add, <5 x i32> addrspace(3)* undef + ret void +} + +; Make sure the vector undef isn't lowered into 0s. +; GCN-LABEL: {{^}}undef_v3f64: +; GCN-NOT: v_mov_b32_e32 v{{[0-9]+}}, 0 +; GCN-NOT: s_mov_b32 s{{[0-9]+}}, 0 +; GCN: s_cbranch_vccnz +define amdgpu_kernel void @undef_v3f64(<3 x double> addrspace(3)* %ptr, i1 %cond) { +entry: + br label %loop + +loop: + %phi = phi <3 x double> [ undef, %entry ], [ %add, %loop ] + %load = load volatile <3 x double>, <3 x double> addrspace(3)* %ptr + %add = fadd <3 x double> %load, %phi + br i1 %cond, label %loop, label %ret + +ret: + store volatile <3 x double> %add, <3 x double> addrspace(3)* %ptr + ret void +} + +; GCN-LABEL: {{^}}undef_v3i64: +; GCN-NOT: v_mov_b32_e32 v{{[0-9]+}}, 0 +; GCN-NOT: s_mov_b32 s{{[0-9]+}}, 0 +; GCN: s_cbranch_vccnz +define amdgpu_kernel void @undef_v3i64(<3 x i64> addrspace(3)* %ptr, i1 %cond) { +entry: + br label %loop + +loop: + %phi = phi <3 x i64> [ undef, %entry ], [ %add, %loop ] + %load = load volatile <3 x i64>, <3 x i64> addrspace(3)* %ptr + %add = add <3 x i64> %load, %phi + br i1 %cond, label %loop, label %ret + +ret: + store volatile <3 x i64> %add, <3 x i64> addrspace(3)* %ptr + ret void +} + +; Make sure the vector undef isn't lowered into 0s. +; GCN-LABEL: {{^}}undef_v4f16: +; GCN-NOT: v_mov_b32_e32 v{{[0-9]+}}, 0 +; GCN-NOT: s_mov_b32 s{{[0-9]+}}, 0 +; GCN: s_cbranch_vccnz +define amdgpu_kernel void @undef_v4f16(<4 x half> addrspace(3)* %ptr, i1 %cond) { +entry: + br label %loop + +loop: + %phi = phi <4 x half> [ undef, %entry ], [ %add, %loop ] + %load = load volatile <4 x half>, <4 x half> addrspace(3)* %ptr + %add = fadd <4 x half> %load, %phi + br i1 %cond, label %loop, label %ret + +ret: + store volatile <4 x half> %add, <4 x half> addrspace(3)* %ptr + ret void +} + +; GCN-LABEL: {{^}}undef_v4i16: +; GCN-NOT: v_mov_b32_e32 v{{[0-9]+}}, 0 +; GCN-NOT: s_mov_b32 s{{[0-9]+}}, 0 +; GCN: s_cbranch_vccnz +define amdgpu_kernel void @undef_v4i16(<4 x i16> addrspace(3)* %ptr, i1 %cond) { +entry: + br label %loop + +loop: + %phi = phi <4 x i16> [ undef, %entry ], [ %add, %loop ] + %load = load volatile <4 x i16>, <4 x i16> addrspace(3)* %ptr + %add = add <4 x i16> %load, %phi + br i1 %cond, label %loop, label %ret + +ret: + store volatile <4 x i16> %add, <4 x i16> addrspace(3)* %ptr + ret void +} + +; Make sure the vector undef isn't lowered into 0s. +; GCN-LABEL: {{^}}undef_v2f16: +; GCN-NOT: v_mov_b32_e32 v{{[0-9]+}}, 0 +; GCN-NOT: s_mov_b32 s{{[0-9]+}}, 0 +; GCN: s_cbranch_vccnz +define amdgpu_kernel void @undef_v2f16(<2 x half> addrspace(3)* %ptr, i1 %cond) { +entry: + br label %loop + +loop: + %phi = phi <2 x half> [ undef, %entry ], [ %add, %loop ] + %load = load volatile <2 x half>, <2 x half> addrspace(3)* %ptr + %add = fadd <2 x half> %load, %phi + br i1 %cond, label %loop, label %ret + +ret: + store volatile <2 x half> %add, <2 x half> addrspace(3)* %ptr + ret void +} + +; GCN-LABEL: {{^}}undef_v2i16: +; GCN-NOT: v_mov_b32_e32 v{{[0-9]+}}, 0 +; GCN-NOT: s_mov_b32 s{{[0-9]+}}, 0 +; GCN: s_cbranch_vccnz +define amdgpu_kernel void @undef_v2i16(<2 x i16> addrspace(3)* %ptr, i1 %cond) { +entry: + br label %loop + +loop: + %phi = phi <2 x i16> [ undef, %entry ], [ %add, %loop ] + %load = load volatile <2 x i16>, <2 x i16> addrspace(3)* %ptr + %add = add <2 x i16> %load, %phi + br i1 %cond, label %loop, label %ret + +ret: + store volatile <2 x i16> %add, <2 x i16> addrspace(3)* %ptr + ret void +} + +; We were expanding undef vectors into zero vectors. Optimizations +; would then see we used no elements of the vector, and reform the +; undef vector resulting in a combiner loop. +; GCN-LABEL: {{^}}inf_loop_undef_vector: +; GCN: s_waitcnt +; GCN-NEXT: v_mad_u64_u32 +; GCN-NEXT: v_mul_lo_u32 +; GCN-NEXT: v_mul_lo_u32 +; GCN-NEXT: v_add3_u32 +; GCN-NEXT: global_store_dwordx2 +define void @inf_loop_undef_vector(<6 x float> %arg, float %arg1, i64 %arg2) { + %i = insertelement <6 x float> %arg, float %arg1, i64 2 + %i3 = bitcast <6 x float> %i to <3 x i64> + %i4 = extractelement <3 x i64> %i3, i64 0 + %i5 = extractelement <3 x i64> %i3, i64 1 + %i6 = mul i64 %i5, %arg2 + %i7 = add i64 %i6, %i4 + store volatile i64 %i7, i64 addrspace(1)* undef, align 4 + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll b/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll index ada6c1da04e2..7080c84f7b50 100644 --- a/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll +++ b/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll @@ -1397,28 +1397,20 @@ bb7: ; preds = %bb4 define amdgpu_ps void @if_after_kill_block(float %arg, float %arg1, float %arg2, float %arg3) #0 { ; SI-LABEL: if_after_kill_block: ; SI: ; %bb.0: ; %bb -; SI-NEXT: s_mov_b64 s[2:3], exec +; SI-NEXT: s_mov_b64 s[0:1], exec ; SI-NEXT: s_wqm_b64 exec, exec -; SI-NEXT: s_mov_b32 s0, 0 ; SI-NEXT: v_cmp_nle_f32_e32 vcc, 0, v1 -; SI-NEXT: s_and_saveexec_b64 s[4:5], vcc -; SI-NEXT: s_xor_b64 s[4:5], exec, s[4:5] +; SI-NEXT: s_and_saveexec_b64 s[2:3], vcc +; SI-NEXT: s_xor_b64 s[2:3], exec, s[2:3] ; SI-NEXT: s_cbranch_execz .LBB13_3 ; SI-NEXT: ; %bb.1: ; %bb3 ; SI-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0 -; SI-NEXT: s_andn2_b64 s[2:3], s[2:3], vcc +; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc ; SI-NEXT: s_cbranch_scc0 .LBB13_6 ; SI-NEXT: ; %bb.2: ; %bb3 ; SI-NEXT: s_andn2_b64 exec, exec, vcc ; SI-NEXT: .LBB13_3: ; %bb4 -; SI-NEXT: s_or_b64 exec, exec, s[4:5] -; SI-NEXT: s_mov_b32 s1, s0 -; SI-NEXT: s_mov_b32 s2, s0 -; SI-NEXT: s_mov_b32 s3, s0 -; SI-NEXT: s_mov_b32 s4, s0 -; SI-NEXT: s_mov_b32 s5, s0 -; SI-NEXT: s_mov_b32 s6, s0 -; SI-NEXT: s_mov_b32 s7, s0 +; SI-NEXT: s_or_b64 exec, exec, s[2:3] ; SI-NEXT: image_sample_c v0, v[2:3], s[0:7], s[0:3] dmask:0x10 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: v_cmp_neq_f32_e32 vcc, 0, v0 @@ -1439,28 +1431,20 @@ define amdgpu_ps void @if_after_kill_block(float %arg, float %arg1, float %arg2, ; ; GFX10-WAVE64-LABEL: if_after_kill_block: ; GFX10-WAVE64: ; %bb.0: ; %bb -; GFX10-WAVE64-NEXT: s_mov_b64 s[2:3], exec +; GFX10-WAVE64-NEXT: s_mov_b64 s[0:1], exec ; GFX10-WAVE64-NEXT: s_wqm_b64 exec, exec ; GFX10-WAVE64-NEXT: v_cmp_nle_f32_e32 vcc, 0, v1 -; GFX10-WAVE64-NEXT: s_mov_b32 s0, 0 -; GFX10-WAVE64-NEXT: s_and_saveexec_b64 s[4:5], vcc -; GFX10-WAVE64-NEXT: s_xor_b64 s[4:5], exec, s[4:5] +; GFX10-WAVE64-NEXT: s_and_saveexec_b64 s[2:3], vcc +; GFX10-WAVE64-NEXT: s_xor_b64 s[2:3], exec, s[2:3] ; GFX10-WAVE64-NEXT: s_cbranch_execz .LBB13_3 ; GFX10-WAVE64-NEXT: ; %bb.1: ; %bb3 ; GFX10-WAVE64-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0 -; GFX10-WAVE64-NEXT: s_andn2_b64 s[2:3], s[2:3], vcc +; GFX10-WAVE64-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc ; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB13_6 ; GFX10-WAVE64-NEXT: ; %bb.2: ; %bb3 ; GFX10-WAVE64-NEXT: s_andn2_b64 exec, exec, vcc ; GFX10-WAVE64-NEXT: .LBB13_3: ; %bb4 -; GFX10-WAVE64-NEXT: s_or_b64 exec, exec, s[4:5] -; GFX10-WAVE64-NEXT: s_mov_b32 s1, s0 -; GFX10-WAVE64-NEXT: s_mov_b32 s2, s0 -; GFX10-WAVE64-NEXT: s_mov_b32 s3, s0 -; GFX10-WAVE64-NEXT: s_mov_b32 s4, s0 -; GFX10-WAVE64-NEXT: s_mov_b32 s5, s0 -; GFX10-WAVE64-NEXT: s_mov_b32 s6, s0 -; GFX10-WAVE64-NEXT: s_mov_b32 s7, s0 +; GFX10-WAVE64-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX10-WAVE64-NEXT: image_sample_c v0, v[2:3], s[0:7], s[0:3] dmask:0x10 dim:SQ_RSRC_IMG_1D ; GFX10-WAVE64-NEXT: s_waitcnt vmcnt(0) ; GFX10-WAVE64-NEXT: v_cmp_neq_f32_e32 vcc, 0, v0 @@ -1479,28 +1463,20 @@ define amdgpu_ps void @if_after_kill_block(float %arg, float %arg1, float %arg2, ; ; GFX10-WAVE32-LABEL: if_after_kill_block: ; GFX10-WAVE32: ; %bb.0: ; %bb -; GFX10-WAVE32-NEXT: s_mov_b32 s1, exec_lo +; GFX10-WAVE32-NEXT: s_mov_b32 s0, exec_lo ; GFX10-WAVE32-NEXT: s_wqm_b32 exec_lo, exec_lo ; GFX10-WAVE32-NEXT: v_cmp_nle_f32_e32 vcc_lo, 0, v1 -; GFX10-WAVE32-NEXT: s_mov_b32 s0, 0 -; GFX10-WAVE32-NEXT: s_and_saveexec_b32 s2, vcc_lo -; GFX10-WAVE32-NEXT: s_xor_b32 s2, exec_lo, s2 +; GFX10-WAVE32-NEXT: s_and_saveexec_b32 s1, vcc_lo +; GFX10-WAVE32-NEXT: s_xor_b32 s1, exec_lo, s1 ; GFX10-WAVE32-NEXT: s_cbranch_execz .LBB13_3 ; GFX10-WAVE32-NEXT: ; %bb.1: ; %bb3 ; GFX10-WAVE32-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0, v0 -; GFX10-WAVE32-NEXT: s_andn2_b32 s1, s1, vcc_lo +; GFX10-WAVE32-NEXT: s_andn2_b32 s0, s0, vcc_lo ; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB13_6 ; GFX10-WAVE32-NEXT: ; %bb.2: ; %bb3 ; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, vcc_lo ; GFX10-WAVE32-NEXT: .LBB13_3: ; %bb4 -; GFX10-WAVE32-NEXT: s_or_b32 exec_lo, exec_lo, s2 -; GFX10-WAVE32-NEXT: s_mov_b32 s1, s0 -; GFX10-WAVE32-NEXT: s_mov_b32 s2, s0 -; GFX10-WAVE32-NEXT: s_mov_b32 s3, s0 -; GFX10-WAVE32-NEXT: s_mov_b32 s4, s0 -; GFX10-WAVE32-NEXT: s_mov_b32 s5, s0 -; GFX10-WAVE32-NEXT: s_mov_b32 s6, s0 -; GFX10-WAVE32-NEXT: s_mov_b32 s7, s0 +; GFX10-WAVE32-NEXT: s_or_b32 exec_lo, exec_lo, s1 ; GFX10-WAVE32-NEXT: image_sample_c v0, v[2:3], s[0:7], s[0:3] dmask:0x10 dim:SQ_RSRC_IMG_1D ; GFX10-WAVE32-NEXT: s_waitcnt vmcnt(0) ; GFX10-WAVE32-NEXT: v_cmp_neq_f32_e32 vcc_lo, 0, v0 @@ -1519,29 +1495,22 @@ define amdgpu_ps void @if_after_kill_block(float %arg, float %arg1, float %arg2, ; ; GFX11-LABEL: if_after_kill_block: ; GFX11: ; %bb.0: ; %bb -; GFX11-NEXT: s_mov_b64 s[2:3], exec +; GFX11-NEXT: s_mov_b64 s[0:1], exec ; GFX11-NEXT: s_wqm_b64 exec, exec -; GFX11-NEXT: s_mov_b32 s0, 0 -; GFX11-NEXT: s_mov_b64 s[4:5], exec +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-NEXT: s_mov_b64 s[2:3], exec ; GFX11-NEXT: v_cmpx_nle_f32_e32 0, v1 -; GFX11-NEXT: s_xor_b64 s[4:5], exec, s[4:5] +; GFX11-NEXT: s_xor_b64 s[2:3], exec, s[2:3] ; GFX11-NEXT: s_cbranch_execz .LBB13_3 ; GFX11-NEXT: ; %bb.1: ; %bb3 ; GFX11-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0 -; GFX11-NEXT: s_and_not1_b64 s[2:3], s[2:3], vcc +; GFX11-NEXT: s_and_not1_b64 s[0:1], s[0:1], vcc ; GFX11-NEXT: s_cbranch_scc0 .LBB13_6 ; GFX11-NEXT: ; %bb.2: ; %bb3 ; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc ; GFX11-NEXT: .LBB13_3: ; %bb4 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-NEXT: s_or_b64 exec, exec, s[4:5] -; GFX11-NEXT: s_mov_b32 s1, s0 -; GFX11-NEXT: s_mov_b32 s2, s0 -; GFX11-NEXT: s_mov_b32 s3, s0 -; GFX11-NEXT: s_mov_b32 s4, s0 -; GFX11-NEXT: s_mov_b32 s5, s0 -; GFX11-NEXT: s_mov_b32 s6, s0 -; GFX11-NEXT: s_mov_b32 s7, s0 +; GFX11-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX11-NEXT: image_sample_c v0, v[2:3], s[0:7], s[0:3] dmask:0x10 dim:SQ_RSRC_IMG_1D ; GFX11-NEXT: s_mov_b64 s[0:1], exec ; GFX11-NEXT: s_waitcnt vmcnt(0) @@ -1584,19 +1553,11 @@ bb9: ; preds = %bb4 define amdgpu_ps void @cbranch_kill(i32 inreg %0, float %val0, float %val1) { ; SI-LABEL: cbranch_kill: ; SI: ; %bb.0: ; %.entry -; SI-NEXT: s_mov_b32 s4, 0 ; SI-NEXT: s_mov_b64 s[0:1], exec ; SI-NEXT: v_mov_b32_e32 v4, 0 ; SI-NEXT: v_mov_b32_e32 v2, v1 ; SI-NEXT: v_mov_b32_e32 v3, v1 -; SI-NEXT: s_mov_b32 s5, s4 -; SI-NEXT: s_mov_b32 s6, s4 -; SI-NEXT: s_mov_b32 s7, s4 -; SI-NEXT: s_mov_b32 s8, s4 -; SI-NEXT: s_mov_b32 s9, s4 -; SI-NEXT: s_mov_b32 s10, s4 -; SI-NEXT: s_mov_b32 s11, s4 -; SI-NEXT: image_sample_l v1, v[1:4], s[4:11], s[0:3] dmask:0x1 da +; SI-NEXT: image_sample_l v1, v[1:4], s[0:7], s[0:3] dmask:0x1 da ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: v_cmp_ge_f32_e32 vcc, 0, v1 ; SI-NEXT: s_and_saveexec_b64 s[2:3], vcc @@ -1627,16 +1588,8 @@ define amdgpu_ps void @cbranch_kill(i32 inreg %0, float %val0, float %val1) { ; GFX10-WAVE64-LABEL: cbranch_kill: ; GFX10-WAVE64: ; %bb.0: ; %.entry ; GFX10-WAVE64-NEXT: v_mov_b32_e32 v2, 0 -; GFX10-WAVE64-NEXT: s_mov_b32 s4, 0 ; GFX10-WAVE64-NEXT: s_mov_b64 s[0:1], exec -; GFX10-WAVE64-NEXT: s_mov_b32 s5, s4 -; GFX10-WAVE64-NEXT: s_mov_b32 s6, s4 -; GFX10-WAVE64-NEXT: s_mov_b32 s7, s4 -; GFX10-WAVE64-NEXT: s_mov_b32 s8, s4 -; GFX10-WAVE64-NEXT: s_mov_b32 s9, s4 -; GFX10-WAVE64-NEXT: s_mov_b32 s10, s4 -; GFX10-WAVE64-NEXT: s_mov_b32 s11, s4 -; GFX10-WAVE64-NEXT: image_sample_l v1, [v1, v1, v1, v2], s[4:11], s[0:3] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY +; GFX10-WAVE64-NEXT: image_sample_l v1, [v1, v1, v1, v2], s[0:7], s[0:3] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY ; GFX10-WAVE64-NEXT: s_waitcnt vmcnt(0) ; GFX10-WAVE64-NEXT: v_cmp_ge_f32_e32 vcc, 0, v1 ; GFX10-WAVE64-NEXT: s_and_saveexec_b64 s[2:3], vcc @@ -1667,16 +1620,8 @@ define amdgpu_ps void @cbranch_kill(i32 inreg %0, float %val0, float %val1) { ; GFX10-WAVE32-LABEL: cbranch_kill: ; GFX10-WAVE32: ; %bb.0: ; %.entry ; GFX10-WAVE32-NEXT: v_mov_b32_e32 v2, 0 -; GFX10-WAVE32-NEXT: s_mov_b32 s4, 0 ; GFX10-WAVE32-NEXT: s_mov_b32 s0, exec_lo -; GFX10-WAVE32-NEXT: s_mov_b32 s5, s4 -; GFX10-WAVE32-NEXT: s_mov_b32 s6, s4 -; GFX10-WAVE32-NEXT: s_mov_b32 s7, s4 -; GFX10-WAVE32-NEXT: s_mov_b32 s8, s4 -; GFX10-WAVE32-NEXT: s_mov_b32 s9, s4 -; GFX10-WAVE32-NEXT: s_mov_b32 s10, s4 -; GFX10-WAVE32-NEXT: s_mov_b32 s11, s4 -; GFX10-WAVE32-NEXT: image_sample_l v1, [v1, v1, v1, v2], s[4:11], s[0:3] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY +; GFX10-WAVE32-NEXT: image_sample_l v1, [v1, v1, v1, v2], s[0:7], s[0:3] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY ; GFX10-WAVE32-NEXT: s_waitcnt vmcnt(0) ; GFX10-WAVE32-NEXT: v_cmp_ge_f32_e32 vcc_lo, 0, v1 ; GFX10-WAVE32-NEXT: s_and_saveexec_b32 s1, vcc_lo @@ -1707,16 +1652,8 @@ define amdgpu_ps void @cbranch_kill(i32 inreg %0, float %val0, float %val1) { ; GFX11-LABEL: cbranch_kill: ; GFX11: ; %bb.0: ; %.entry ; GFX11-NEXT: v_mov_b32_e32 v2, 0 -; GFX11-NEXT: s_mov_b32 s4, 0 ; GFX11-NEXT: s_mov_b64 s[0:1], exec -; GFX11-NEXT: s_mov_b32 s5, s4 -; GFX11-NEXT: s_mov_b32 s6, s4 -; GFX11-NEXT: s_mov_b32 s7, s4 -; GFX11-NEXT: s_mov_b32 s8, s4 -; GFX11-NEXT: s_mov_b32 s9, s4 -; GFX11-NEXT: s_mov_b32 s10, s4 -; GFX11-NEXT: s_mov_b32 s11, s4 -; GFX11-NEXT: image_sample_l v1, [v1, v1, v1, v2], s[4:11], s[0:3] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY +; GFX11-NEXT: image_sample_l v1, [v1, v1, v1, v2], s[0:7], s[0:3] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY ; GFX11-NEXT: s_mov_b64 s[2:3], exec ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_cmpx_ge_f32_e32 0, v1 diff --git a/llvm/test/CodeGen/AMDGPU/v1024.ll b/llvm/test/CodeGen/AMDGPU/v1024.ll index a5e0454a3634..1326ba437f94 100644 --- a/llvm/test/CodeGen/AMDGPU/v1024.ll +++ b/llvm/test/CodeGen/AMDGPU/v1024.ll @@ -10,6 +10,7 @@ define amdgpu_kernel void @test_v1024() { entry: %alloca = alloca <32 x i32>, align 16, addrspace(5) %cast = bitcast <32 x i32> addrspace(5)* %alloca to i8 addrspace(5)* + call void @llvm.memset.p5i8.i32(i8 addrspace(5)* %cast, i8 0, i32 128, i1 false) br i1 undef, label %if.then.i.i, label %if.else.i if.then.i.i: ; preds = %entry @@ -24,6 +25,7 @@ if.then.i62.i: ; preds = %if.else.i, %if.then ret void } +declare void @llvm.memset.p5i8.i32(i8 addrspace(5)* nocapture readonly, i8, i32, i1 immarg) declare void @llvm.memcpy.p5i8.p5i8.i64(i8 addrspace(5)* nocapture writeonly, i8 addrspace(5)* nocapture readonly, i64, i1 immarg) declare void @llvm.memcpy.p1i8.p5i8.i64(i8 addrspace(1)* nocapture writeonly, i8 addrspace(5)* nocapture readonly, i64, i1 immarg) diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-tuple-allocation.ll b/llvm/test/CodeGen/AMDGPU/vgpr-tuple-allocation.ll index 5164b072a6dd..ed0de729dafd 100644 --- a/llvm/test/CodeGen/AMDGPU/vgpr-tuple-allocation.ll +++ b/llvm/test/CodeGen/AMDGPU/vgpr-tuple-allocation.ll @@ -14,7 +14,6 @@ define <4 x float> @non_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp, ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-NEXT: s_mov_b32 s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: v_mov_b32_e32 v36, v16 @@ -22,13 +21,6 @@ define <4 x float> @non_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp, ; GFX9-NEXT: v_mov_b32_e32 v34, v14 ; GFX9-NEXT: v_mov_b32_e32 v33, v13 ; GFX9-NEXT: v_mov_b32_e32 v32, v12 -; GFX9-NEXT: s_mov_b32 s5, s4 -; GFX9-NEXT: s_mov_b32 s6, s4 -; GFX9-NEXT: s_mov_b32 s7, s4 -; GFX9-NEXT: s_mov_b32 s8, s4 -; GFX9-NEXT: s_mov_b32 s9, s4 -; GFX9-NEXT: s_mov_b32 s10, s4 -; GFX9-NEXT: s_mov_b32 s11, s4 ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill @@ -82,16 +74,8 @@ define <4 x float> @non_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp, ; GFX10-NEXT: v_mov_b32_e32 v34, v14 ; GFX10-NEXT: v_mov_b32_e32 v33, v13 ; GFX10-NEXT: v_mov_b32_e32 v32, v12 -; GFX10-NEXT: s_mov_b32 s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_mov_b32 s5, s4 -; GFX10-NEXT: s_mov_b32 s6, s4 -; GFX10-NEXT: s_mov_b32 s7, s4 -; GFX10-NEXT: s_mov_b32 s8, s4 -; GFX10-NEXT: s_mov_b32 s9, s4 -; GFX10-NEXT: s_mov_b32 s10, s4 -; GFX10-NEXT: s_mov_b32 s11, s4 ; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill ; GFX10-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GFX10-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill @@ -145,16 +129,8 @@ define <4 x float> @non_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp, ; GFX11-NEXT: v_dual_mov_b32 v36, v16 :: v_dual_mov_b32 v35, v15 ; GFX11-NEXT: v_dual_mov_b32 v34, v14 :: v_dual_mov_b32 v33, v13 ; GFX11-NEXT: v_mov_b32_e32 v32, v12 -; GFX11-NEXT: s_mov_b32 s0, 0 ; GFX11-NEXT: v_writelane_b32 v40, s33, 2 ; GFX11-NEXT: s_mov_b32 s33, s32 -; GFX11-NEXT: s_mov_b32 s1, s0 -; GFX11-NEXT: s_mov_b32 s2, s0 -; GFX11-NEXT: s_mov_b32 s3, s0 -; GFX11-NEXT: s_mov_b32 s4, s0 -; GFX11-NEXT: s_mov_b32 s5, s0 -; GFX11-NEXT: s_mov_b32 s6, s0 -; GFX11-NEXT: s_mov_b32 s7, s0 ; GFX11-NEXT: s_clause 0x3 ; GFX11-NEXT: scratch_store_b32 off, v41, s33 offset:12 ; GFX11-NEXT: scratch_store_b32 off, v42, s33 offset:8 @@ -225,65 +201,41 @@ define <4 x float> @call_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp, ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-NEXT: v_writelane_b32 v40, s33, 10 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: v_writelane_b32 v40, s36, 2 -; GFX9-NEXT: v_writelane_b32 v40, s37, 3 -; GFX9-NEXT: v_writelane_b32 v40, s38, 4 -; GFX9-NEXT: v_writelane_b32 v40, s39, 5 -; GFX9-NEXT: v_writelane_b32 v40, s40, 6 -; GFX9-NEXT: v_writelane_b32 v40, s41, 7 +; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: v_writelane_b32 v40, s42, 8 -; GFX9-NEXT: s_mov_b32 s36, 0 ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s33 ; 4-byte Folded Spill -; GFX9-NEXT: v_writelane_b32 v40, s43, 9 ; GFX9-NEXT: v_mov_b32_e32 v45, v16 ; GFX9-NEXT: v_mov_b32_e32 v44, v15 ; GFX9-NEXT: v_mov_b32_e32 v43, v14 ; GFX9-NEXT: v_mov_b32_e32 v42, v13 ; GFX9-NEXT: v_mov_b32_e32 v41, v12 -; GFX9-NEXT: s_mov_b32 s37, s36 -; GFX9-NEXT: s_mov_b32 s38, s36 -; GFX9-NEXT: s_mov_b32 s39, s36 -; GFX9-NEXT: s_mov_b32 s40, s36 -; GFX9-NEXT: s_mov_b32 s41, s36 -; GFX9-NEXT: s_mov_b32 s42, s36 -; GFX9-NEXT: s_mov_b32 s43, s36 -; GFX9-NEXT: image_gather4_c_b_cl v[0:3], v[41:45], s[36:43], s[4:7] dmask:0x1 +; GFX9-NEXT: image_gather4_c_b_cl v[0:3], v[41:45], s[4:11], s[4:7] dmask:0x1 ; GFX9-NEXT: s_addk_i32 s32, 0x800 ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, extern_func@gotpcrel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, extern_func@gotpcrel32@hi+12 ; GFX9-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 +; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: image_gather4_c_b_cl v[0:3], v[41:45], s[36:43], s[4:7] dmask:0x1 +; GFX9-NEXT: image_gather4_c_b_cl v[0:3], v[41:45], s[4:11], s[4:7] dmask:0x1 ; GFX9-NEXT: s_nop 0 ; GFX9-NEXT: buffer_load_dword v45, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX9-NEXT: buffer_load_dword v44, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload ; GFX9-NEXT: buffer_load_dword v43, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload ; GFX9-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload ; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload -; GFX9-NEXT: v_readlane_b32 s43, v40, 9 -; GFX9-NEXT: v_readlane_b32 s42, v40, 8 -; GFX9-NEXT: v_readlane_b32 s41, v40, 7 -; GFX9-NEXT: v_readlane_b32 s40, v40, 6 -; GFX9-NEXT: v_readlane_b32 s39, v40, 5 -; GFX9-NEXT: v_readlane_b32 s38, v40, 4 -; GFX9-NEXT: v_readlane_b32 s37, v40, 3 -; GFX9-NEXT: v_readlane_b32 s36, v40, 2 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xf800 -; GFX9-NEXT: v_readlane_b32 s33, v40, 10 +; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload ; GFX9-NEXT: s_mov_b64 exec, s[4:5] @@ -298,66 +250,42 @@ define <4 x float> @call_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp, ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s4 -; GFX10-NEXT: v_writelane_b32 v40, s33, 10 +; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill ; GFX10-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill ; GFX10-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GFX10-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX10-NEXT: buffer_store_dword v45, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: image_gather4_c_b_cl v[0:3], v[12:16], s[4:11], s[4:7] dmask:0x1 dim:SQ_RSRC_IMG_2D ; GFX10-NEXT: s_addk_i32 s32, 0x400 +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_getpc_b64 s[4:5] +; GFX10-NEXT: s_add_u32 s4, s4, extern_func@gotpcrel32@lo+4 +; GFX10-NEXT: s_addc_u32 s5, s5, extern_func@gotpcrel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 ; GFX10-NEXT: v_mov_b32_e32 v41, v16 ; GFX10-NEXT: v_mov_b32_e32 v42, v15 ; GFX10-NEXT: v_mov_b32_e32 v43, v14 -; GFX10-NEXT: v_mov_b32_e32 v44, v13 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-NEXT: v_mov_b32_e32 v44, v13 ; GFX10-NEXT: v_mov_b32_e32 v45, v12 -; GFX10-NEXT: v_writelane_b32 v40, s36, 2 -; GFX10-NEXT: s_mov_b32 s36, 0 -; GFX10-NEXT: v_writelane_b32 v40, s37, 3 -; GFX10-NEXT: s_mov_b32 s37, s36 -; GFX10-NEXT: v_writelane_b32 v40, s38, 4 -; GFX10-NEXT: s_mov_b32 s38, s36 -; GFX10-NEXT: v_writelane_b32 v40, s39, 5 -; GFX10-NEXT: s_mov_b32 s39, s36 -; GFX10-NEXT: v_writelane_b32 v40, s40, 6 -; GFX10-NEXT: s_mov_b32 s40, s36 -; GFX10-NEXT: v_writelane_b32 v40, s41, 7 -; GFX10-NEXT: s_mov_b32 s41, s36 -; GFX10-NEXT: v_writelane_b32 v40, s42, 8 -; GFX10-NEXT: s_mov_b32 s42, s36 -; GFX10-NEXT: v_writelane_b32 v40, s43, 9 -; GFX10-NEXT: s_mov_b32 s43, s36 -; GFX10-NEXT: image_gather4_c_b_cl v[0:3], v[12:16], s[36:43], s[4:7] dmask:0x1 dim:SQ_RSRC_IMG_2D -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_getpc_b64 s[4:5] -; GFX10-NEXT: s_add_u32 s4, s4, extern_func@gotpcrel32@lo+4 -; GFX10-NEXT: s_addc_u32 s5, s5, extern_func@gotpcrel32@hi+12 -; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_store_dwordx4 v[0:1], v[0:3], off ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX10-NEXT: image_gather4_c_b_cl v[0:3], [v45, v44, v43, v42, v41], s[36:43], s[4:7] dmask:0x1 dim:SQ_RSRC_IMG_2D +; GFX10-NEXT: image_gather4_c_b_cl v[0:3], [v45, v44, v43, v42, v41], s[4:11], s[4:7] dmask:0x1 dim:SQ_RSRC_IMG_2D ; GFX10-NEXT: s_clause 0x4 ; GFX10-NEXT: buffer_load_dword v45, off, s[0:3], s33 ; GFX10-NEXT: buffer_load_dword v44, off, s[0:3], s33 offset:4 ; GFX10-NEXT: buffer_load_dword v43, off, s[0:3], s33 offset:8 ; GFX10-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:12 ; GFX10-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:16 -; GFX10-NEXT: v_readlane_b32 s43, v40, 9 -; GFX10-NEXT: v_readlane_b32 s42, v40, 8 -; GFX10-NEXT: v_readlane_b32 s41, v40, 7 -; GFX10-NEXT: v_readlane_b32 s40, v40, 6 -; GFX10-NEXT: v_readlane_b32 s39, v40, 5 -; GFX10-NEXT: v_readlane_b32 s38, v40, 4 -; GFX10-NEXT: v_readlane_b32 s37, v40, 3 -; GFX10-NEXT: v_readlane_b32 s36, v40, 2 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfc00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 10 +; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s4, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 @@ -372,7 +300,7 @@ define <4 x float> @call_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp, ; GFX11-NEXT: s_or_saveexec_b32 s0, -1 ; GFX11-NEXT: scratch_store_b32 off, v40, s32 offset:20 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v40, s33, 10 +; GFX11-NEXT: v_writelane_b32 v40, s33, 2 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_clause 0x4 ; GFX11-NEXT: scratch_store_b32 off, v41, s33 offset:16 @@ -380,56 +308,32 @@ define <4 x float> @call_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp, ; GFX11-NEXT: scratch_store_b32 off, v43, s33 offset:8 ; GFX11-NEXT: scratch_store_b32 off, v44, s33 offset:4 ; GFX11-NEXT: scratch_store_b32 off, v45, s33 +; GFX11-NEXT: image_gather4_c_b_cl v[0:3], v[12:16], s[0:7], s[0:3] dmask:0x1 dim:SQ_RSRC_IMG_2D ; GFX11-NEXT: s_add_i32 s32, s32, 32 +; GFX11-NEXT: s_getpc_b64 s[0:1] +; GFX11-NEXT: s_add_u32 s0, s0, extern_func@gotpcrel32@lo+4 +; GFX11-NEXT: s_addc_u32 s1, s1, extern_func@gotpcrel32@hi+12 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-NEXT: v_dual_mov_b32 v41, v16 :: v_dual_mov_b32 v42, v15 ; GFX11-NEXT: v_dual_mov_b32 v43, v14 :: v_dual_mov_b32 v44, v13 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: v_mov_b32_e32 v45, v12 -; GFX11-NEXT: v_writelane_b32 v40, s36, 2 -; GFX11-NEXT: s_mov_b32 s36, 0 -; GFX11-NEXT: v_writelane_b32 v40, s37, 3 -; GFX11-NEXT: s_mov_b32 s37, s36 -; GFX11-NEXT: v_writelane_b32 v40, s38, 4 -; GFX11-NEXT: s_mov_b32 s38, s36 -; GFX11-NEXT: v_writelane_b32 v40, s39, 5 -; GFX11-NEXT: s_mov_b32 s39, s36 -; GFX11-NEXT: v_writelane_b32 v40, s40, 6 -; GFX11-NEXT: s_mov_b32 s40, s36 -; GFX11-NEXT: v_writelane_b32 v40, s41, 7 -; GFX11-NEXT: s_mov_b32 s41, s36 -; GFX11-NEXT: v_writelane_b32 v40, s42, 8 -; GFX11-NEXT: s_mov_b32 s42, s36 -; GFX11-NEXT: v_writelane_b32 v40, s43, 9 -; GFX11-NEXT: s_mov_b32 s43, s36 -; GFX11-NEXT: image_gather4_c_b_cl v[0:3], v[12:16], s[36:43], s[0:3] dmask:0x1 dim:SQ_RSRC_IMG_2D -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, extern_func@gotpcrel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, extern_func@gotpcrel32@hi+12 -; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: image_gather4_c_b_cl v[0:3], [v45, v44, v43, v42, v41], s[36:43], s[0:3] dmask:0x1 dim:SQ_RSRC_IMG_2D +; GFX11-NEXT: image_gather4_c_b_cl v[0:3], [v45, v44, v43, v42, v41], s[0:7], s[0:3] dmask:0x1 dim:SQ_RSRC_IMG_2D ; GFX11-NEXT: s_clause 0x4 ; GFX11-NEXT: scratch_load_b32 v45, off, s33 ; GFX11-NEXT: scratch_load_b32 v44, off, s33 offset:4 ; GFX11-NEXT: scratch_load_b32 v43, off, s33 offset:8 ; GFX11-NEXT: scratch_load_b32 v42, off, s33 offset:12 ; GFX11-NEXT: scratch_load_b32 v41, off, s33 offset:16 -; GFX11-NEXT: v_readlane_b32 s43, v40, 9 -; GFX11-NEXT: v_readlane_b32 s42, v40, 8 -; GFX11-NEXT: v_readlane_b32 s41, v40, 7 -; GFX11-NEXT: v_readlane_b32 s40, v40, 6 -; GFX11-NEXT: v_readlane_b32 s39, v40, 5 -; GFX11-NEXT: v_readlane_b32 s38, v40, 4 -; GFX11-NEXT: v_readlane_b32 s37, v40, 3 -; GFX11-NEXT: v_readlane_b32 s36, v40, 2 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 ; GFX11-NEXT: s_addk_i32 s32, 0xffe0 -; GFX11-NEXT: v_readlane_b32 s33, v40, 10 +; GFX11-NEXT: v_readlane_b32 s33, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s0, -1 ; GFX11-NEXT: scratch_load_b32 v40, off, s32 offset:20 ; 4-byte Folded Reload ; GFX11-NEXT: s_mov_b32 exec_lo, s0 diff --git a/llvm/test/CodeGen/AMDGPU/wqm.ll b/llvm/test/CodeGen/AMDGPU/wqm.ll index dc85462631d4..16c30174657a 100644 --- a/llvm/test/CodeGen/AMDGPU/wqm.ll +++ b/llvm/test/CodeGen/AMDGPU/wqm.ll @@ -1833,87 +1833,54 @@ main_body: define amdgpu_ps <4 x float> @test_loop_vcc(<4 x float> %in) nounwind { ; GFX9-W64-LABEL: test_loop_vcc: ; GFX9-W64: ; %bb.0: ; %entry -; GFX9-W64-NEXT: s_mov_b64 s[8:9], exec +; GFX9-W64-NEXT: s_mov_b64 s[0:1], exec ; GFX9-W64-NEXT: s_wqm_b64 exec, exec ; GFX9-W64-NEXT: v_mov_b32_e32 v7, v3 ; GFX9-W64-NEXT: v_mov_b32_e32 v6, v2 ; GFX9-W64-NEXT: v_mov_b32_e32 v5, v1 ; GFX9-W64-NEXT: v_mov_b32_e32 v4, v0 -; GFX9-W64-NEXT: s_and_b64 exec, exec, s[8:9] -; GFX9-W64-NEXT: s_mov_b32 s0, 0 -; GFX9-W64-NEXT: s_mov_b32 s1, s0 -; GFX9-W64-NEXT: s_mov_b32 s2, s0 -; GFX9-W64-NEXT: s_mov_b32 s3, s0 -; GFX9-W64-NEXT: s_mov_b32 s4, s0 -; GFX9-W64-NEXT: s_mov_b32 s5, s0 -; GFX9-W64-NEXT: s_mov_b32 s6, s0 -; GFX9-W64-NEXT: s_mov_b32 s7, s0 +; GFX9-W64-NEXT: s_and_b64 exec, exec, s[0:1] ; GFX9-W64-NEXT: image_store v[4:7], v0, s[0:7] dmask:0xf unorm ; GFX9-W64-NEXT: s_wqm_b64 exec, exec ; GFX9-W64-NEXT: v_mov_b32_e32 v8, 0 -; GFX9-W64-NEXT: s_mov_b32 s10, 0x40e00000 +; GFX9-W64-NEXT: s_mov_b32 s4, 0x40e00000 ; GFX9-W64-NEXT: s_branch .LBB31_2 ; GFX9-W64-NEXT: .LBB31_1: ; %body ; GFX9-W64-NEXT: ; in Loop: Header=BB31_2 Depth=1 -; GFX9-W64-NEXT: s_mov_b32 s1, s0 -; GFX9-W64-NEXT: s_mov_b32 s2, s0 -; GFX9-W64-NEXT: s_mov_b32 s3, s0 -; GFX9-W64-NEXT: s_mov_b32 s4, s0 -; GFX9-W64-NEXT: s_mov_b32 s5, s0 -; GFX9-W64-NEXT: s_mov_b32 s6, s0 -; GFX9-W64-NEXT: s_mov_b32 s7, s0 ; GFX9-W64-NEXT: image_sample v[4:7], v0, s[0:7], s[0:3] dmask:0xf ; GFX9-W64-NEXT: v_add_f32_e32 v8, 2.0, v8 -; GFX9-W64-NEXT: s_mov_b64 s[2:3], 0 ; GFX9-W64-NEXT: s_cbranch_execz .LBB31_4 ; GFX9-W64-NEXT: .LBB31_2: ; %loop ; GFX9-W64-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX9-W64-NEXT: s_waitcnt vmcnt(0) ; GFX9-W64-NEXT: v_mov_b32_e32 v0, v4 -; GFX9-W64-NEXT: v_cmp_lt_f32_e32 vcc, s10, v8 +; GFX9-W64-NEXT: v_cmp_lt_f32_e32 vcc, s4, v8 ; GFX9-W64-NEXT: v_mov_b32_e32 v1, v5 ; GFX9-W64-NEXT: v_mov_b32_e32 v2, v6 ; GFX9-W64-NEXT: v_mov_b32_e32 v3, v7 ; GFX9-W64-NEXT: s_cbranch_vccz .LBB31_1 ; GFX9-W64-NEXT: ; %bb.3: -; GFX9-W64-NEXT: s_mov_b64 s[2:3], -1 ; GFX9-W64-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX9-W64-NEXT: ; implicit-def: $vgpr8 ; GFX9-W64-NEXT: .LBB31_4: ; %break -; GFX9-W64-NEXT: s_and_b64 exec, exec, s[8:9] +; GFX9-W64-NEXT: s_and_b64 exec, exec, s[0:1] ; GFX9-W64-NEXT: s_waitcnt vmcnt(0) ; GFX9-W64-NEXT: ; return to shader part epilog ; ; GFX10-W32-LABEL: test_loop_vcc: ; GFX10-W32: ; %bb.0: ; %entry -; GFX10-W32-NEXT: s_mov_b32 s8, exec_lo +; GFX10-W32-NEXT: s_mov_b32 s0, exec_lo ; GFX10-W32-NEXT: s_wqm_b32 exec_lo, exec_lo ; GFX10-W32-NEXT: v_mov_b32_e32 v8, 0 -; GFX10-W32-NEXT: s_mov_b32 s0, 0 -; GFX10-W32-NEXT: s_mov_b32 s1, s0 -; GFX10-W32-NEXT: s_mov_b32 s2, s0 -; GFX10-W32-NEXT: s_mov_b32 s3, s0 -; GFX10-W32-NEXT: s_mov_b32 s4, s0 -; GFX10-W32-NEXT: s_mov_b32 s5, s0 -; GFX10-W32-NEXT: s_and_b32 exec_lo, exec_lo, s8 -; GFX10-W32-NEXT: s_mov_b32 s6, s0 -; GFX10-W32-NEXT: s_mov_b32 s7, s0 +; GFX10-W32-NEXT: s_and_b32 exec_lo, exec_lo, s0 ; GFX10-W32-NEXT: image_store v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm ; GFX10-W32-NEXT: s_wqm_b32 exec_lo, exec_lo ; GFX10-W32-NEXT: s_branch .LBB31_2 ; GFX10-W32-NEXT: .p2align 6 ; GFX10-W32-NEXT: .LBB31_1: ; %body ; GFX10-W32-NEXT: ; in Loop: Header=BB31_2 Depth=1 -; GFX10-W32-NEXT: s_mov_b32 s1, s0 -; GFX10-W32-NEXT: s_mov_b32 s2, s0 -; GFX10-W32-NEXT: s_mov_b32 s3, s0 -; GFX10-W32-NEXT: s_mov_b32 s4, s0 -; GFX10-W32-NEXT: s_mov_b32 s5, s0 -; GFX10-W32-NEXT: s_mov_b32 s6, s0 -; GFX10-W32-NEXT: s_mov_b32 s7, s0 -; GFX10-W32-NEXT: v_add_f32_e32 v8, 2.0, v8 ; GFX10-W32-NEXT: image_sample v[0:3], v4, s[0:7], s[0:3] dmask:0xf dim:SQ_RSRC_IMG_1D -; GFX10-W32-NEXT: s_mov_b32 s1, 0 +; GFX10-W32-NEXT: v_add_f32_e32 v8, 2.0, v8 ; GFX10-W32-NEXT: s_cbranch_execz .LBB31_4 ; GFX10-W32-NEXT: .LBB31_2: ; %loop ; GFX10-W32-NEXT: ; =>This Inner Loop Header: Depth=1 @@ -1925,11 +1892,10 @@ define amdgpu_ps <4 x float> @test_loop_vcc(<4 x float> %in) nounwind { ; GFX10-W32-NEXT: v_mov_b32_e32 v4, v0 ; GFX10-W32-NEXT: s_cbranch_vccz .LBB31_1 ; GFX10-W32-NEXT: ; %bb.3: -; GFX10-W32-NEXT: s_mov_b32 s1, -1 ; GFX10-W32-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX10-W32-NEXT: ; implicit-def: $vgpr8 ; GFX10-W32-NEXT: .LBB31_4: ; %break -; GFX10-W32-NEXT: s_and_b32 exec_lo, exec_lo, s8 +; GFX10-W32-NEXT: s_and_b32 exec_lo, exec_lo, s0 ; GFX10-W32-NEXT: s_waitcnt vmcnt(0) ; GFX10-W32-NEXT: v_mov_b32_e32 v0, v4 ; GFX10-W32-NEXT: v_mov_b32_e32 v1, v5 @@ -1999,14 +1965,6 @@ define amdgpu_ps void @test_alloca(float %data, i32 %a, i32 %idx) nounwind { ; GFX9-W64-NEXT: v_lshl_add_u32 v0, v2, 2, v0 ; GFX9-W64-NEXT: buffer_load_dword v0, v0, s[8:11], 0 offen ; GFX9-W64-NEXT: s_and_b64 exec, exec, s[0:1] -; GFX9-W64-NEXT: s_mov_b32 s0, 0 -; GFX9-W64-NEXT: s_mov_b32 s1, s0 -; GFX9-W64-NEXT: s_mov_b32 s2, s0 -; GFX9-W64-NEXT: s_mov_b32 s3, s0 -; GFX9-W64-NEXT: s_mov_b32 s4, s0 -; GFX9-W64-NEXT: s_mov_b32 s5, s0 -; GFX9-W64-NEXT: s_mov_b32 s6, s0 -; GFX9-W64-NEXT: s_mov_b32 s7, s0 ; GFX9-W64-NEXT: s_waitcnt vmcnt(0) ; GFX9-W64-NEXT: image_sample v[0:3], v0, s[0:7], s[0:3] dmask:0xf ; GFX9-W64-NEXT: s_waitcnt vmcnt(0) @@ -2035,14 +1993,6 @@ define amdgpu_ps void @test_alloca(float %data, i32 %a, i32 %idx) nounwind { ; GFX10-W32-NEXT: s_wqm_b32 exec_lo, exec_lo ; GFX10-W32-NEXT: buffer_load_dword v0, v2, s[8:11], 0 offen ; GFX10-W32-NEXT: s_and_b32 exec_lo, exec_lo, s0 -; GFX10-W32-NEXT: s_mov_b32 s0, 0 -; GFX10-W32-NEXT: s_mov_b32 s1, s0 -; GFX10-W32-NEXT: s_mov_b32 s2, s0 -; GFX10-W32-NEXT: s_mov_b32 s3, s0 -; GFX10-W32-NEXT: s_mov_b32 s4, s0 -; GFX10-W32-NEXT: s_mov_b32 s5, s0 -; GFX10-W32-NEXT: s_mov_b32 s6, s0 -; GFX10-W32-NEXT: s_mov_b32 s7, s0 ; GFX10-W32-NEXT: s_waitcnt vmcnt(0) ; GFX10-W32-NEXT: image_sample v[0:3], v0, s[0:7], s[0:3] dmask:0xf dim:SQ_RSRC_IMG_1D ; GFX10-W32-NEXT: s_waitcnt vmcnt(0) @@ -2079,18 +2029,10 @@ entry: define amdgpu_ps <4 x float> @test_nonvoid_return() nounwind { ; GFX9-W64-LABEL: test_nonvoid_return: ; GFX9-W64: ; %bb.0: -; GFX9-W64-NEXT: s_mov_b32 s0, 0 -; GFX9-W64-NEXT: s_mov_b64 s[8:9], exec -; GFX9-W64-NEXT: s_mov_b32 s1, s0 -; GFX9-W64-NEXT: s_mov_b32 s2, s0 -; GFX9-W64-NEXT: s_mov_b32 s3, s0 -; GFX9-W64-NEXT: s_mov_b32 s4, s0 -; GFX9-W64-NEXT: s_mov_b32 s5, s0 -; GFX9-W64-NEXT: s_mov_b32 s6, s0 -; GFX9-W64-NEXT: s_mov_b32 s7, s0 +; GFX9-W64-NEXT: s_mov_b64 s[0:1], exec ; GFX9-W64-NEXT: s_wqm_b64 exec, exec ; GFX9-W64-NEXT: image_sample v0, v0, s[0:7], s[0:3] dmask:0x1 -; GFX9-W64-NEXT: s_and_b64 exec, exec, s[8:9] +; GFX9-W64-NEXT: s_and_b64 exec, exec, s[0:1] ; GFX9-W64-NEXT: s_waitcnt vmcnt(0) ; GFX9-W64-NEXT: image_sample v[0:3], v0, s[0:7], s[0:3] dmask:0xf ; GFX9-W64-NEXT: s_waitcnt vmcnt(0) @@ -2098,18 +2040,10 @@ define amdgpu_ps <4 x float> @test_nonvoid_return() nounwind { ; ; GFX10-W32-LABEL: test_nonvoid_return: ; GFX10-W32: ; %bb.0: -; GFX10-W32-NEXT: s_mov_b32 s0, 0 -; GFX10-W32-NEXT: s_mov_b32 s8, exec_lo -; GFX10-W32-NEXT: s_mov_b32 s1, s0 -; GFX10-W32-NEXT: s_mov_b32 s2, s0 -; GFX10-W32-NEXT: s_mov_b32 s3, s0 -; GFX10-W32-NEXT: s_mov_b32 s4, s0 -; GFX10-W32-NEXT: s_mov_b32 s5, s0 -; GFX10-W32-NEXT: s_mov_b32 s6, s0 -; GFX10-W32-NEXT: s_mov_b32 s7, s0 +; GFX10-W32-NEXT: s_mov_b32 s0, exec_lo ; GFX10-W32-NEXT: s_wqm_b32 exec_lo, exec_lo ; GFX10-W32-NEXT: image_sample v0, v0, s[0:7], s[0:3] dmask:0x1 dim:SQ_RSRC_IMG_1D -; GFX10-W32-NEXT: s_and_b32 exec_lo, exec_lo, s8 +; GFX10-W32-NEXT: s_and_b32 exec_lo, exec_lo, s0 ; GFX10-W32-NEXT: s_waitcnt vmcnt(0) ; GFX10-W32-NEXT: image_sample v[0:3], v0, s[0:7], s[0:3] dmask:0xf dim:SQ_RSRC_IMG_1D ; GFX10-W32-NEXT: s_waitcnt vmcnt(0) @@ -2128,20 +2062,11 @@ define amdgpu_ps <4 x float> @test_nonvoid_return() nounwind { define amdgpu_ps <4 x float> @test_nonvoid_return_unreachable(i32 inreg %c) nounwind { ; GFX9-W64-LABEL: test_nonvoid_return_unreachable: ; GFX9-W64: ; %bb.0: ; %entry -; GFX9-W64-NEXT: s_mov_b32 s4, 0 -; GFX9-W64-NEXT: s_mov_b64 s[2:3], exec -; GFX9-W64-NEXT: s_mov_b32 s5, s4 -; GFX9-W64-NEXT: s_mov_b32 s6, s4 -; GFX9-W64-NEXT: s_mov_b32 s7, s4 -; GFX9-W64-NEXT: s_mov_b32 s8, s4 -; GFX9-W64-NEXT: s_mov_b32 s9, s4 -; GFX9-W64-NEXT: s_mov_b32 s10, s4 -; GFX9-W64-NEXT: s_mov_b32 s11, s4 ; GFX9-W64-NEXT: s_wqm_b64 exec, exec -; GFX9-W64-NEXT: image_sample v0, v0, s[4:11], s[0:3] dmask:0x1 -; GFX9-W64-NEXT: s_and_b64 exec, exec, s[2:3] +; GFX9-W64-NEXT: image_sample v0, v0, s[0:7], s[0:3] dmask:0x1 +; GFX9-W64-NEXT: s_and_b64 exec, exec, exec ; GFX9-W64-NEXT: s_waitcnt vmcnt(0) -; GFX9-W64-NEXT: image_sample v[0:3], v0, s[4:11], s[0:3] dmask:0xf +; GFX9-W64-NEXT: image_sample v[0:3], v0, s[0:7], s[0:3] dmask:0xf ; GFX9-W64-NEXT: s_cmp_lt_i32 s0, 1 ; GFX9-W64-NEXT: s_cbranch_scc0 .LBB34_2 ; GFX9-W64-NEXT: ; %bb.1: ; %else @@ -2155,20 +2080,11 @@ define amdgpu_ps <4 x float> @test_nonvoid_return_unreachable(i32 inreg %c) noun ; ; GFX10-W32-LABEL: test_nonvoid_return_unreachable: ; GFX10-W32: ; %bb.0: ; %entry -; GFX10-W32-NEXT: s_mov_b32 s4, 0 -; GFX10-W32-NEXT: s_mov_b32 s1, exec_lo -; GFX10-W32-NEXT: s_mov_b32 s5, s4 -; GFX10-W32-NEXT: s_mov_b32 s6, s4 -; GFX10-W32-NEXT: s_mov_b32 s7, s4 -; GFX10-W32-NEXT: s_mov_b32 s8, s4 -; GFX10-W32-NEXT: s_mov_b32 s9, s4 -; GFX10-W32-NEXT: s_mov_b32 s10, s4 -; GFX10-W32-NEXT: s_mov_b32 s11, s4 ; GFX10-W32-NEXT: s_wqm_b32 exec_lo, exec_lo -; GFX10-W32-NEXT: image_sample v0, v0, s[4:11], s[0:3] dmask:0x1 dim:SQ_RSRC_IMG_1D -; GFX10-W32-NEXT: s_and_b32 exec_lo, exec_lo, s1 +; GFX10-W32-NEXT: image_sample v0, v0, s[0:7], s[0:3] dmask:0x1 dim:SQ_RSRC_IMG_1D +; GFX10-W32-NEXT: s_and_b32 exec_lo, exec_lo, exec_lo ; GFX10-W32-NEXT: s_waitcnt vmcnt(0) -; GFX10-W32-NEXT: image_sample v[0:3], v0, s[4:11], s[0:3] dmask:0xf dim:SQ_RSRC_IMG_1D +; GFX10-W32-NEXT: image_sample v[0:3], v0, s[0:7], s[0:3] dmask:0xf dim:SQ_RSRC_IMG_1D ; GFX10-W32-NEXT: s_cmp_lt_i32 s0, 1 ; GFX10-W32-NEXT: s_cbranch_scc0 .LBB34_2 ; GFX10-W32-NEXT: ; %bb.1: ; %else @@ -2215,33 +2131,17 @@ define amdgpu_ps <4 x float> @test_scc(i32 inreg %sel, i32 %idx) #1 { ; GFX9-W64-NEXT: s_cmp_lt_i32 s0, 1 ; GFX9-W64-NEXT: s_cbranch_scc0 .LBB35_2 ; GFX9-W64-NEXT: ; %bb.1: ; %else -; GFX9-W64-NEXT: s_mov_b32 s4, 0 ; GFX9-W64-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-W64-NEXT: v_mov_b32_e32 v1, 1 -; GFX9-W64-NEXT: s_mov_b32 s5, s4 -; GFX9-W64-NEXT: s_mov_b32 s6, s4 -; GFX9-W64-NEXT: s_mov_b32 s7, s4 -; GFX9-W64-NEXT: s_mov_b32 s8, s4 -; GFX9-W64-NEXT: s_mov_b32 s9, s4 -; GFX9-W64-NEXT: s_mov_b32 s10, s4 -; GFX9-W64-NEXT: s_mov_b32 s11, s4 -; GFX9-W64-NEXT: image_sample v[0:3], v[0:1], s[4:11], s[0:3] dmask:0xf +; GFX9-W64-NEXT: image_sample v[0:3], v[0:1], s[0:7], s[0:3] dmask:0xf ; GFX9-W64-NEXT: s_cbranch_execz .LBB35_3 ; GFX9-W64-NEXT: s_branch .LBB35_4 ; GFX9-W64-NEXT: .LBB35_2: ; GFX9-W64-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX9-W64-NEXT: .LBB35_3: ; %if -; GFX9-W64-NEXT: s_mov_b32 s4, 0 -; GFX9-W64-NEXT: s_mov_b32 s5, s4 -; GFX9-W64-NEXT: s_mov_b32 s6, s4 -; GFX9-W64-NEXT: s_mov_b32 s7, s4 -; GFX9-W64-NEXT: s_mov_b32 s8, s4 -; GFX9-W64-NEXT: s_mov_b32 s9, s4 -; GFX9-W64-NEXT: s_mov_b32 s10, s4 -; GFX9-W64-NEXT: s_mov_b32 s11, s4 ; GFX9-W64-NEXT: s_waitcnt vmcnt(0) ; GFX9-W64-NEXT: v_mov_b32_e32 v0, 0 -; GFX9-W64-NEXT: image_sample v[0:3], v0, s[4:11], s[0:3] dmask:0xf +; GFX9-W64-NEXT: image_sample v[0:3], v0, s[0:7], s[0:3] dmask:0xf ; GFX9-W64-NEXT: .LBB35_4: ; %end ; GFX9-W64-NEXT: s_and_b64 exec, exec, s[2:3] ; GFX9-W64-NEXT: v_mov_b32_e32 v5, 1.0 @@ -2252,21 +2152,13 @@ define amdgpu_ps <4 x float> @test_scc(i32 inreg %sel, i32 %idx) #1 { ; GFX10-W32-LABEL: test_scc: ; GFX10-W32: ; %bb.0: ; %main_body ; GFX10-W32-NEXT: v_mov_b32_e32 v4, v0 -; GFX10-W32-NEXT: s_mov_b32 s8, exec_lo +; GFX10-W32-NEXT: s_mov_b32 s1, exec_lo ; GFX10-W32-NEXT: s_wqm_b32 exec_lo, exec_lo ; GFX10-W32-NEXT: s_cmp_lt_i32 s0, 1 ; GFX10-W32-NEXT: s_cbranch_scc0 .LBB35_2 ; GFX10-W32-NEXT: ; %bb.1: ; %else ; GFX10-W32-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-W32-NEXT: v_mov_b32_e32 v1, 1 -; GFX10-W32-NEXT: s_mov_b32 s0, 0 -; GFX10-W32-NEXT: s_mov_b32 s1, s0 -; GFX10-W32-NEXT: s_mov_b32 s2, s0 -; GFX10-W32-NEXT: s_mov_b32 s3, s0 -; GFX10-W32-NEXT: s_mov_b32 s4, s0 -; GFX10-W32-NEXT: s_mov_b32 s5, s0 -; GFX10-W32-NEXT: s_mov_b32 s6, s0 -; GFX10-W32-NEXT: s_mov_b32 s7, s0 ; GFX10-W32-NEXT: image_sample v[0:3], v[0:1], s[0:7], s[0:3] dmask:0xf dim:SQ_RSRC_IMG_2D ; GFX10-W32-NEXT: s_cbranch_execz .LBB35_3 ; GFX10-W32-NEXT: s_branch .LBB35_4 @@ -2275,17 +2167,9 @@ define amdgpu_ps <4 x float> @test_scc(i32 inreg %sel, i32 %idx) #1 { ; GFX10-W32-NEXT: .LBB35_3: ; %if ; GFX10-W32-NEXT: s_waitcnt vmcnt(0) ; GFX10-W32-NEXT: v_mov_b32_e32 v0, 0 -; GFX10-W32-NEXT: s_mov_b32 s0, 0 -; GFX10-W32-NEXT: s_mov_b32 s1, s0 -; GFX10-W32-NEXT: s_mov_b32 s2, s0 -; GFX10-W32-NEXT: s_mov_b32 s3, s0 -; GFX10-W32-NEXT: s_mov_b32 s4, s0 -; GFX10-W32-NEXT: s_mov_b32 s5, s0 -; GFX10-W32-NEXT: s_mov_b32 s6, s0 -; GFX10-W32-NEXT: s_mov_b32 s7, s0 ; GFX10-W32-NEXT: image_sample v[0:3], v0, s[0:7], s[0:3] dmask:0xf dim:SQ_RSRC_IMG_1D ; GFX10-W32-NEXT: .LBB35_4: ; %end -; GFX10-W32-NEXT: s_and_b32 exec_lo, exec_lo, s8 +; GFX10-W32-NEXT: s_and_b32 exec_lo, exec_lo, s1 ; GFX10-W32-NEXT: v_mov_b32_e32 v5, 1.0 ; GFX10-W32-NEXT: buffer_store_dword v5, v4, s[0:3], 0 idxen ; GFX10-W32-NEXT: s_waitcnt vmcnt(0) -- Gitee From c8e7a87b1ed6b00dc2c3543a80c892d5948f8849 Mon Sep 17 00:00:00 2001 From: Sam James Date: Tue, 8 Nov 2022 01:36:43 +0000 Subject: [PATCH 16/41] [CMake] Fix -Wstrict-prototypes Fixes warnings (or errors, if someone injects -Werror in their build system, which happens in fact with some folks vendoring LLVM too) with Clang 16: ``` +/var/tmp/portage.notmp/portage/sys-devel/llvm-15.0.4/work/llvm_build-abi_x86_64.amd64/CMakeFiles/CMakeTmp/src.c:3:9: warning: a function declaration without a prototype is deprecated in all versions of C [-Wstrict-prototypes] -/var/tmp/portage.notmp/portage/sys-devel/llvm-14.0.4/work/llvm_build-abi_x86_64.amd64/CMakeFiles/CMakeTmp/src.c:3:9: error: a function declaration without a prototype is deprecated in all versions of C [-Werror,-Wstrict-prototypes] int main() {return 0;} ^ void ``` Differential Revision: https://reviews.llvm.org/D137503 (cherry picked from commit 32a2af44e1e882f13d1cc2817f0a8d4d8b375d4d) --- .../cmake/Modules/CompilerRTDarwinUtils.cmake | 2 +- compiler-rt/cmake/config-ix.cmake | 2 +- compiler-rt/lib/builtins/CMakeLists.txt | 2 +- libcxx/cmake/config-ix.cmake | 2 +- libcxxabi/cmake/config-ix.cmake | 2 +- libunwind/cmake/config-ix.cmake | 2 +- lldb/tools/debugserver/source/CMakeLists.txt | 2 +- llvm/cmake/config-ix.cmake | 2 +- llvm/cmake/modules/FindFFI.cmake | 2 +- llvm/cmake/modules/FindTerminfo.cmake | 2 +- llvm/cmake/modules/FindZ3.cmake | 3 ++- llvm/cmake/modules/HandleLLVMOptions.cmake | 2 +- openmp/runtime/cmake/config-ix.cmake | 2 +- polly/lib/External/CMakeLists.txt | 24 +++++++++---------- 14 files changed, 26 insertions(+), 25 deletions(-) diff --git a/compiler-rt/cmake/Modules/CompilerRTDarwinUtils.cmake b/compiler-rt/cmake/Modules/CompilerRTDarwinUtils.cmake index 2c9983c6a1ae..640c7e7124c9 100644 --- a/compiler-rt/cmake/Modules/CompilerRTDarwinUtils.cmake +++ b/compiler-rt/cmake/Modules/CompilerRTDarwinUtils.cmake @@ -116,7 +116,7 @@ function(darwin_test_archs os valid_archs) if(NOT TEST_COMPILE_ONLY) message(STATUS "Finding valid architectures for ${os}...") set(SIMPLE_C ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/src.c) - file(WRITE ${SIMPLE_C} "#include \nint main() { printf(__FILE__); return 0; }\n") + file(WRITE ${SIMPLE_C} "#include \nint main(void) { printf(__FILE__); return 0; }\n") set(os_linker_flags) foreach(flag ${DARWIN_${os}_LINK_FLAGS}) diff --git a/compiler-rt/cmake/config-ix.cmake b/compiler-rt/cmake/config-ix.cmake index cd45176cf2ba..9077e8f9ffe0 100644 --- a/compiler-rt/cmake/config-ix.cmake +++ b/compiler-rt/cmake/config-ix.cmake @@ -209,7 +209,7 @@ set(COMPILER_RT_SUPPORTED_ARCH) # runtime libraries supported by our current compilers cross-compiling # abilities. set(SIMPLE_SOURCE ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/simple.cc) -file(WRITE ${SIMPLE_SOURCE} "#include \n#include \nint main() { printf(\"hello, world\"); }\n") +file(WRITE ${SIMPLE_SOURCE} "#include \n#include \nint main(void) { printf(\"hello, world\"); }\n") # Detect whether the current target platform is 32-bit or 64-bit, and setup # the correct commandline flags needed to attempt to target 32-bit and 64-bit. diff --git a/compiler-rt/lib/builtins/CMakeLists.txt b/compiler-rt/lib/builtins/CMakeLists.txt index ec668e294d6d..df02682ae00f 100644 --- a/compiler-rt/lib/builtins/CMakeLists.txt +++ b/compiler-rt/lib/builtins/CMakeLists.txt @@ -745,7 +745,7 @@ else () SOURCE "#if !(__ARM_FP & 0x8) #error No double-precision support! #endif - int main() { return 0; }") + int main(void) { return 0; }") if(NOT COMPILER_RT_HAS_${arch}_VFP_DP) list(REMOVE_ITEM ${arch}_SOURCES ${arm_Thumb1_VFPv2_DP_SOURCES}) endif() diff --git a/libcxx/cmake/config-ix.cmake b/libcxx/cmake/config-ix.cmake index 209e6214a471..e65d32072852 100644 --- a/libcxx/cmake/config-ix.cmake +++ b/libcxx/cmake/config-ix.cmake @@ -94,7 +94,7 @@ if(CMAKE_CXX_COMPILER_ID MATCHES "Clang") set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -Werror=unknown-pragmas") check_c_source_compiles(" #pragma comment(lib, \"c\") -int main() { return 0; } +int main(void) { return 0; } " C_SUPPORTS_COMMENT_LIB_PRAGMA) cmake_pop_check_state() endif() diff --git a/libcxxabi/cmake/config-ix.cmake b/libcxxabi/cmake/config-ix.cmake index 079cabf25da5..fa16a108e98a 100644 --- a/libcxxabi/cmake/config-ix.cmake +++ b/libcxxabi/cmake/config-ix.cmake @@ -77,7 +77,7 @@ if(CMAKE_CXX_COMPILER_ID MATCHES "Clang") set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -Werror=unknown-pragmas") check_c_source_compiles(" #pragma comment(lib, \"c\") -int main() { return 0; } +int main(void) { return 0; } " C_SUPPORTS_COMMENT_LIB_PRAGMA) cmake_pop_check_state() endif() diff --git a/libunwind/cmake/config-ix.cmake b/libunwind/cmake/config-ix.cmake index c9b65b3cefc0..1b027cf37213 100644 --- a/libunwind/cmake/config-ix.cmake +++ b/libunwind/cmake/config-ix.cmake @@ -85,7 +85,7 @@ if(CMAKE_CXX_COMPILER_ID MATCHES "Clang") set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -Werror=unknown-pragmas") check_c_source_compiles(" #pragma comment(lib, \"c\") -int main() { return 0; } +int main(void) { return 0; } " C_SUPPORTS_COMMENT_LIB_PRAGMA) cmake_pop_check_state() endif() diff --git a/lldb/tools/debugserver/source/CMakeLists.txt b/lldb/tools/debugserver/source/CMakeLists.txt index f636e387bf1f..c6e7e8cf49e8 100644 --- a/lldb/tools/debugserver/source/CMakeLists.txt +++ b/lldb/tools/debugserver/source/CMakeLists.txt @@ -95,7 +95,7 @@ check_c_source_compiles( #else #error Not building for ARM64 #endif - int main() { return 0; } + int main(void) { return 0; } " BUILDING_FOR_ARM64_OSX ) diff --git a/llvm/cmake/config-ix.cmake b/llvm/cmake/config-ix.cmake index 83512760d8dd..7e657fd1532d 100644 --- a/llvm/cmake/config-ix.cmake +++ b/llvm/cmake/config-ix.cmake @@ -71,7 +71,7 @@ if(APPLE) CHECK_C_SOURCE_COMPILES(" static const char *__crashreporter_info__ = 0; asm(\".desc ___crashreporter_info__, 0x10\"); - int main() { return 0; }" + int main(void) { return 0; }" HAVE_CRASHREPORTER_INFO) endif() diff --git a/llvm/cmake/modules/FindFFI.cmake b/llvm/cmake/modules/FindFFI.cmake index b0d859af8959..a493a89d6301 100644 --- a/llvm/cmake/modules/FindFFI.cmake +++ b/llvm/cmake/modules/FindFFI.cmake @@ -45,7 +45,7 @@ if(FFI_LIBRARIES) struct ffi_cif; typedef struct ffi_cif ffi_cif; void ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue); - int main() { ffi_call(0, 0, 0, 0); }" + int main(void) { ffi_call(0, 0, 0, 0); }" HAVE_FFI_CALL) cmake_pop_check_state() endif() diff --git a/llvm/cmake/modules/FindTerminfo.cmake b/llvm/cmake/modules/FindTerminfo.cmake index 65edb80fa69a..eef1f95853eb 100644 --- a/llvm/cmake/modules/FindTerminfo.cmake +++ b/llvm/cmake/modules/FindTerminfo.cmake @@ -20,7 +20,7 @@ if(Terminfo_LIBRARIES) list(APPEND CMAKE_REQUIRED_LIBRARIES ${Terminfo_LIBRARIES}) check_c_source_compiles(" int setupterm(char *term, int filedes, int *errret); - int main() { return setupterm(0, 0, 0); }" + int main(void) { return setupterm(0, 0, 0); }" Terminfo_LINKABLE) cmake_pop_check_state() endif() diff --git a/llvm/cmake/modules/FindZ3.cmake b/llvm/cmake/modules/FindZ3.cmake index 118b1eac3b32..6fb56d74184d 100644 --- a/llvm/cmake/modules/FindZ3.cmake +++ b/llvm/cmake/modules/FindZ3.cmake @@ -18,8 +18,9 @@ function(check_z3_version z3_include z3_lib) # The program that will be executed to print Z3's version. file(WRITE ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/testz3.cpp "#include + #include #include - int main() { + int main(void) { unsigned int major, minor, build, rev; Z3_get_version(&major, &minor, &build, &rev); printf(\"%u.%u.%u\", major, minor, build); diff --git a/llvm/cmake/modules/HandleLLVMOptions.cmake b/llvm/cmake/modules/HandleLLVMOptions.cmake index 56d05f5b5fce..0fca934be9cf 100644 --- a/llvm/cmake/modules/HandleLLVMOptions.cmake +++ b/llvm/cmake/modules/HandleLLVMOptions.cmake @@ -779,7 +779,7 @@ if (LLVM_ENABLE_WARNINGS AND (LLVM_COMPILER_IS_GCC_COMPATIBLE OR CLANG_CL)) # line is also a // comment. set(OLD_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS}) set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -Werror -Wcomment") - CHECK_C_SOURCE_COMPILES("// \\\\\\n//\\nint main() {return 0;}" + CHECK_C_SOURCE_COMPILES("// \\\\\\n//\\nint main(void) {return 0;}" C_WCOMMENT_ALLOWS_LINE_WRAP) set(CMAKE_REQUIRED_FLAGS ${OLD_CMAKE_REQUIRED_FLAGS}) if (NOT C_WCOMMENT_ALLOWS_LINE_WRAP) diff --git a/openmp/runtime/cmake/config-ix.cmake b/openmp/runtime/cmake/config-ix.cmake index 775c58f98484..bd7585545b72 100644 --- a/openmp/runtime/cmake/config-ix.cmake +++ b/openmp/runtime/cmake/config-ix.cmake @@ -27,7 +27,7 @@ function(libomp_check_version_symbols retval) void func2() { printf(\"World\"); } __asm__(\".symver func1, func@VER1\"); __asm__(\".symver func2, func@VER2\"); - int main() { + int main(void) { func1(); func2(); return 0; diff --git a/polly/lib/External/CMakeLists.txt b/polly/lib/External/CMakeLists.txt index 2f912e7daeb2..c0a5b32e283f 100644 --- a/polly/lib/External/CMakeLists.txt +++ b/polly/lib/External/CMakeLists.txt @@ -64,7 +64,7 @@ if (POLLY_BUNDLED_ISL) check_c_source_compiles(" ${_includes} ${_type} typeVar; - int main() { + int main(void) { return 0; } " ${_variable}) @@ -73,7 +73,7 @@ if (POLLY_BUNDLED_ISL) check_c_source_compiles(" int func(void) __attribute__((__warn_unused_result__)); - int main() { return 0; } + int main(void) { return 0; } " HAS_ATTRIBUTE_WARN_UNUSED_RESULT) set(GCC_WARN_UNUSED_RESULT) if (HAS_ATTRIBUTE_WARN_UNUSED_RESULT) @@ -82,22 +82,22 @@ if (POLLY_BUNDLED_ISL) check_c_source_compiles(" __attribute__ ((unused)) static void foo(void); - int main() { return 0; } + int main(void) { return 0; } " HAVE___ATTRIBUTE__) check_c_source_compiles_numeric(" #include - int main() { (void)ffs(0); return 0; } + int main(void) { (void)ffs(0); return 0; } " HAVE_DECL_FFS) check_c_source_compiles_numeric(" - int main() { (void)__builtin_ffs(0); return 0; } + int main(void) { (void)__builtin_ffs(0); return 0; } " HAVE_DECL___BUILTIN_FFS) check_c_source_compiles_numeric(" #include - int main() { (void)_BitScanForward(NULL, 0); return 0; } + int main(void) { (void)_BitScanForward(NULL, 0); return 0; } " HAVE_DECL__BITSCANFORWARD) if (NOT HAVE_DECL_FFS AND @@ -109,12 +109,12 @@ if (POLLY_BUNDLED_ISL) check_c_source_compiles_numeric(" #include - int main() { (void)strcasecmp(\"\", \"\"); return 0; } + int main(void) { (void)strcasecmp(\"\", \"\"); return 0; } " HAVE_DECL_STRCASECMP) check_c_source_compiles_numeric(" #include - int main() { (void)_stricmp(\"\", \"\"); return 0; } + int main(void) { (void)_stricmp(\"\", \"\"); return 0; } " HAVE_DECL__STRICMP) if (NOT HAVE_DECL_STRCASECMP AND NOT HAVE_DECL__STRICMP) @@ -124,12 +124,12 @@ if (POLLY_BUNDLED_ISL) check_c_source_compiles_numeric(" #include - int main() { (void)strncasecmp(\"\", \"\", 0); return 0; } + int main(void) { (void)strncasecmp(\"\", \"\", 0); return 0; } " HAVE_DECL_STRNCASECMP) check_c_source_compiles_numeric(" #include - int main() { (void)_strnicmp(\"\", \"\", 0); return 0; } + int main(void) { (void)_strnicmp(\"\", \"\", 0); return 0; } " HAVE_DECL__STRNICMP) if (NOT HAVE_DECL_STRNCASECMP AND NOT HAVE_DECL__STRNICMP) @@ -139,12 +139,12 @@ if (POLLY_BUNDLED_ISL) check_c_source_compiles_numeric(" #include - int main() { snprintf((void*)0, 0, \" \"); return 0; } + int main(void) { snprintf((void*)0, 0, \" \"); return 0; } " HAVE_DECL_SNPRINTF) check_c_source_compiles_numeric(" #include - int main() { _snprintf((void*)0, 0, \" \"); return 0; } + int main(void) { _snprintf((void*)0, 0, \" \"); return 0; } " HAVE_DECL__SNPRINTF) if (NOT HAVE_DECL_SNPRINTF AND NOT HAVE_DECL__SNPRINTF) -- Gitee From 931b6d51d84e2a5cbbdc925d546819d4d3b7c63e Mon Sep 17 00:00:00 2001 From: Aaron Ballman Date: Wed, 2 Nov 2022 07:56:43 -0400 Subject: [PATCH 17/41] Reenable POSIX builtin library functions in gnu2x mode gnu17 and earlier modes automatically expose several POSIX C APIs, and this was accidentally disabled for gnu2x in 7d644e1215b376ec5e915df9ea2eeb56e2d94626. This restores the behavior for gnu2x mode (without changing the behavior in C standards modes instead of GNU modes). Fixes #56607 --- clang/docs/ReleaseNotes.rst | 4 ++++ clang/lib/Sema/SemaLookup.cpp | 8 +++----- clang/test/Sema/gnu-builtins.c | 13 +++++++++++++ 3 files changed, 20 insertions(+), 5 deletions(-) create mode 100644 clang/test/Sema/gnu-builtins.c diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index a1ee8f0459ae..13cca2ebbfb8 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -227,6 +227,10 @@ Bug Fixes `Issue 57377 `_. - Fix a crash when a ``btf_type_tag`` attribute is applied to the pointee of a function pointer. +- Clang 14 predeclared some builtin POSIX library functions in ``gnu2x`` mode, + and Clang 15 accidentally stopped predeclaring those functions in that + language mode. Clang 16 now predeclares those functions again. This fixes + `Issue 56607 `_. Improvements to Clang's diagnostics ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/clang/lib/Sema/SemaLookup.cpp b/clang/lib/Sema/SemaLookup.cpp index 68158ec977cf..5d0d87fd2422 100644 --- a/clang/lib/Sema/SemaLookup.cpp +++ b/clang/lib/Sema/SemaLookup.cpp @@ -939,11 +939,9 @@ bool Sema::LookupBuiltin(LookupResult &R) { // If this is a builtin on this (or all) targets, create the decl. if (unsigned BuiltinID = II->getBuiltinID()) { - // In C++, C2x, and OpenCL (spec v1.2 s6.9.f), we don't have any - // predefined library functions like 'malloc'. Instead, we'll just - // error. - if ((getLangOpts().CPlusPlus || getLangOpts().OpenCL || - getLangOpts().C2x) && + // In C++ and OpenCL (spec v1.2 s6.9.f), we don't have any predefined + // library functions like 'malloc'. Instead, we'll just error. + if ((getLangOpts().CPlusPlus || getLangOpts().OpenCL) && Context.BuiltinInfo.isPredefinedLibFunction(BuiltinID)) return false; diff --git a/clang/test/Sema/gnu-builtins.c b/clang/test/Sema/gnu-builtins.c new file mode 100644 index 000000000000..c4da8b39363c --- /dev/null +++ b/clang/test/Sema/gnu-builtins.c @@ -0,0 +1,13 @@ +// RUN: %clang_cc1 -fsyntax-only -verify=gnu -std=gnu17 %s +// RUN: %clang_cc1 -fsyntax-only -verify=gnu -std=gnu2x %s +// RUN: %clang_cc1 -fsyntax-only -verify=std -std=c17 %s +// RUN: %clang_cc1 -fsyntax-only -verify=std -std=c2x %s + +// std-no-diagnostics + +// 'index' is a builtin library function, but only in GNU mode. So this should +// give an error in GNU modes but be okay in non-GNU mode. +// FIXME: the error is correct, but these notes are pretty awful. +int index; // gnu-error {{redefinition of 'index' as different kind of symbol}} \ + gnu-note {{unguarded header; consider using #ifdef guards or #pragma once}} \ + gnu-note {{previous definition is here}} -- Gitee From 58ba50a52edeff3cae9cfa3bdd0ee000873ffef9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20G=C3=B3rny?= Date: Mon, 7 Nov 2022 14:46:58 +0100 Subject: [PATCH 18/41] [cmake] Add missing CMakePushCheckState include to FindLibEdit.cmake Add the missing include to fix an error when `cmake_push_check_state()` is called and incidentally the CMakePushCheckState module is not loaded by any other check running prior to `FindLibEdit.cmake`: CMake Error at /var/no-tmpfs/portage/dev-util/lldb-15.0.4/work/cmake/Modules/FindLibEdit.cmake:24 (cmake_push_check_state): Unknown CMake command "cmake_push_check_state". Call Stack (most recent call first): cmake/modules/LLDBConfig.cmake:52 (find_package) cmake/modules/LLDBConfig.cmake:59 (add_optional_dependency) CMakeLists.txt:28 (include) Gentoo Bug: https://bugs.gentoo.org/880065 Differential Revision: https://reviews.llvm.org/D137555 (cherry picked from commit 3676a86a4322e8c2b9c541f057b5d3704146b8f3) --- cmake/Modules/FindLibEdit.cmake | 1 + 1 file changed, 1 insertion(+) diff --git a/cmake/Modules/FindLibEdit.cmake b/cmake/Modules/FindLibEdit.cmake index 7e62d4d839ae..de8f5a2e7101 100644 --- a/cmake/Modules/FindLibEdit.cmake +++ b/cmake/Modules/FindLibEdit.cmake @@ -21,6 +21,7 @@ find_library(LibEdit_LIBRARIES NAMES edit HINTS ${PC_LIBEDIT_LIBRARY_DIRS}) include(CheckIncludeFile) if(LibEdit_INCLUDE_DIRS AND EXISTS "${LibEdit_INCLUDE_DIRS}/histedit.h") + include(CMakePushCheckState) cmake_push_check_state() list(APPEND CMAKE_REQUIRED_INCLUDES ${LibEdit_INCLUDE_DIRS}) list(APPEND CMAKE_REQUIRED_LIBRARIES ${LibEdit_LIBRARIES}) -- Gitee From 6750e341b076d10a336c662ed6916500fdb20105 Mon Sep 17 00:00:00 2001 From: "chenglin.bi" Date: Wed, 9 Nov 2022 05:07:44 +0800 Subject: [PATCH 19/41] [TypePromotion] Replace Zext to Truncate for the case src bitwidth is larger Fix: https://github.com/llvm/llvm-project/issues/58843 Reviewed By: samtebbs Differential Revision: https://reviews.llvm.org/D137613 (cherry picked from commit 597f44409236bf7fa933a4ce18af23772e28fb43) --- llvm/lib/CodeGen/TypePromotion.cpp | 8 +++++++- .../TypePromotion/AArch64/pr58843.ll | 20 +++++++++++++++++++ 2 files changed, 27 insertions(+), 1 deletion(-) create mode 100644 llvm/test/Transforms/TypePromotion/AArch64/pr58843.ll diff --git a/llvm/lib/CodeGen/TypePromotion.cpp b/llvm/lib/CodeGen/TypePromotion.cpp index 8dc8d381ad16..a63118067139 100644 --- a/llvm/lib/CodeGen/TypePromotion.cpp +++ b/llvm/lib/CodeGen/TypePromotion.cpp @@ -569,7 +569,8 @@ void IRPromoter::TruncateSinks() { void IRPromoter::Cleanup() { LLVM_DEBUG(dbgs() << "IR Promotion: Cleanup..\n"); // Some zexts will now have become redundant, along with their trunc - // operands, so remove them + // operands, so remove them. + // Some zexts need to be replaced with truncate if src bitwidth is larger. for (auto *V : Visited) { if (!isa(V)) continue; @@ -584,6 +585,11 @@ void IRPromoter::Cleanup() { << "\n"); ReplaceAllUsersOfWith(ZExt, Src); continue; + } else if (ZExt->getSrcTy()->getScalarSizeInBits() > PromotedWidth) { + IRBuilder<> Builder{ZExt}; + Value *Trunc = Builder.CreateTrunc(Src, ZExt->getDestTy()); + ReplaceAllUsersOfWith(ZExt, Trunc); + continue; } // We've inserted a trunc for a zext sink, but we already know that the diff --git a/llvm/test/Transforms/TypePromotion/AArch64/pr58843.ll b/llvm/test/Transforms/TypePromotion/AArch64/pr58843.ll new file mode 100644 index 000000000000..983a32029c65 --- /dev/null +++ b/llvm/test/Transforms/TypePromotion/AArch64/pr58843.ll @@ -0,0 +1,20 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -mtriple=aarch64 -type-promotion -verify -S %s -o - | FileCheck %s +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" + +; Check the case don't crash due to zext source type bitwidth +; larger than dest type bitwidth. +define i1 @test(i8 %arg) { +; CHECK-LABEL: @test( +; CHECK-NEXT: [[EXT1:%.*]] = zext i8 [[ARG:%.*]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[EXT1]], 7 +; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[TMP2]], 0 +; CHECK-NEXT: ret i1 [[CMP]] +; + %ext1 = zext i8 %arg to i64 + %trunc = trunc i64 %ext1 to i3 + %ext2 = zext i3 %trunc to i8 + %cmp = icmp ne i8 %ext2, 0 + ret i1 %cmp +} -- Gitee From d75ae21044ad893572855cefb0c0898a771b2094 Mon Sep 17 00:00:00 2001 From: Sam James Date: Wed, 19 Oct 2022 19:50:20 +0100 Subject: [PATCH 20/41] Set LLVM_ATOMIC_LIB variable for convenient linking against libatomic * Set LLVM_ATOMIC_LIB to keep track of when we need to link against libatomic. * Add detection of mold linker which is required for this. * Use --as-needed when linking against libatomic as a bonus. On some platforms, libatomic may be required only sometimes. Bug: https://bugs.gentoo.org/832675 Thanks-to: Arfrever Frehtes Taifersar Arahesis Tested-by: erhard_f@mailbox.org Differential Revision: https://reviews.llvm.org/D136280 (cherry picked from commit fa981b541365190ae646d2dce575706cd0626cf7) --- llvm/cmake/modules/AddLLVM.cmake | 1 + llvm/cmake/modules/CheckAtomic.cmake | 13 +++++++++++++ llvm/lib/Support/CMakeLists.txt | 4 +--- llvm/tools/dsymutil/CMakeLists.txt | 4 +--- 4 files changed, 16 insertions(+), 6 deletions(-) diff --git a/llvm/cmake/modules/AddLLVM.cmake b/llvm/cmake/modules/AddLLVM.cmake index 057431208322..1f0507f395cf 100644 --- a/llvm/cmake/modules/AddLLVM.cmake +++ b/llvm/cmake/modules/AddLLVM.cmake @@ -212,6 +212,7 @@ if (NOT DEFINED LLVM_LINKER_DETECTED AND NOT WIN32) else() if("${stdout}" MATCHES "^mold") set(LLVM_LINKER_DETECTED YES CACHE INTERNAL "") + set(LLVM_LINKER_IS_MOLD YES CACHE INTERNAL "") message(STATUS "Linker detection: mold") elseif("${stdout}" MATCHES "GNU gold") set(LLVM_LINKER_DETECTED YES CACHE INTERNAL "") diff --git a/llvm/cmake/modules/CheckAtomic.cmake b/llvm/cmake/modules/CheckAtomic.cmake index 3c5ba72993a3..f11cadf39ff6 100644 --- a/llvm/cmake/modules/CheckAtomic.cmake +++ b/llvm/cmake/modules/CheckAtomic.cmake @@ -82,6 +82,19 @@ elseif(LLVM_COMPILER_IS_GCC_COMPATIBLE OR CMAKE_CXX_COMPILER_ID MATCHES "XL") endif() endif() +# Set variable LLVM_ATOMIC_LIB specifying flags for linking against libatomic. +if(HAVE_CXX_ATOMICS_WITH_LIB OR HAVE_CXX_ATOMICS64_WITH_LIB) + # Use options --push-state, --as-needed and --pop-state if linker is known to support them. + # Use single option -Wl of compiler driver to avoid incorrect re-ordering of options by CMake. + if(LLVM_LINKER_IS_GNULD OR LLVM_LINKER_IS_GOLD OR LLVM_LINKER_IS_LLD OR LLVM_LINKER_IS_MOLD) + set(LLVM_ATOMIC_LIB "-Wl,--push-state,--as-needed,-latomic,--pop-state") + else() + set(LLVM_ATOMIC_LIB "-latomic") + endif() +else() + set(LLVM_ATOMIC_LIB) +endif() + ## TODO: This define is only used for the legacy atomic operations in ## llvm's Atomic.h, which should be replaced. Other code simply ## assumes C++11 works. diff --git a/llvm/lib/Support/CMakeLists.txt b/llvm/lib/Support/CMakeLists.txt index 806cbc884cc5..ff23ec74df96 100644 --- a/llvm/lib/Support/CMakeLists.txt +++ b/llvm/lib/Support/CMakeLists.txt @@ -59,9 +59,7 @@ elseif( CMAKE_HOST_UNIX ) if( LLVM_ENABLE_TERMINFO ) set(imported_libs ${imported_libs} Terminfo::terminfo) endif() - if( LLVM_ENABLE_THREADS AND (HAVE_LIBATOMIC OR HAVE_CXX_LIBATOMICS64) ) - set(system_libs ${system_libs} atomic) - endif() + set(system_libs ${system_libs} ${LLVM_ATOMIC_LIB}) set(system_libs ${system_libs} ${LLVM_PTHREAD_LIB}) if( UNIX AND NOT (BEOS OR HAIKU) ) set(system_libs ${system_libs} m) diff --git a/llvm/tools/dsymutil/CMakeLists.txt b/llvm/tools/dsymutil/CMakeLists.txt index a255c1c5daf5..38028cd3d80a 100644 --- a/llvm/tools/dsymutil/CMakeLists.txt +++ b/llvm/tools/dsymutil/CMakeLists.txt @@ -40,6 +40,4 @@ if(APPLE) target_link_libraries(dsymutil PRIVATE "-framework CoreFoundation") endif(APPLE) -if(HAVE_CXX_ATOMICS_WITH_LIB OR HAVE_CXX_ATOMICS64_WITH_LIB) - target_link_libraries(dsymutil PRIVATE atomic) -endif() +target_link_libraries(dsymutil PRIVATE ${LLVM_ATOMIC_LIB}) -- Gitee From 4c3d83810ad7bde70b2665df9a15947695e92adb Mon Sep 17 00:00:00 2001 From: Sam James Date: Wed, 19 Oct 2022 20:09:34 +0100 Subject: [PATCH 21/41] Link liblldCOFF against libatomic when necessary Also simplify code for liblldCommon using the new LLVM_ATOMIC_LIB variable. Depends on D136280. Bug: https://bugs.gentoo.org/832675 Thanks-to: Arfrever Frehtes Taifersar Arahesis Tested-by: erhard_f@mailbox.org Differential Revision: https://reviews.llvm.org/D136281 (cherry picked from commit f0b451c77f14947e3e7d314f048679fa2f5c6298) --- lld/COFF/CMakeLists.txt | 1 + lld/Common/CMakeLists.txt | 9 ++------- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/lld/COFF/CMakeLists.txt b/lld/COFF/CMakeLists.txt index d289bd591034..55aec26854c8 100644 --- a/lld/COFF/CMakeLists.txt +++ b/lld/COFF/CMakeLists.txt @@ -44,6 +44,7 @@ add_lld_library(lldCOFF LINK_LIBS lldCommon ${LLVM_PTHREAD_LIB} + ${LLVM_ATOMIC_LIB} DEPENDS COFFOptionsTableGen diff --git a/lld/Common/CMakeLists.txt b/lld/Common/CMakeLists.txt index 1ae7da1f5f7f..9c23ed395223 100644 --- a/lld/Common/CMakeLists.txt +++ b/lld/Common/CMakeLists.txt @@ -1,9 +1,3 @@ -set(LLD_SYSTEM_LIBS ${LLVM_PTHREAD_LIB}) - -if(NOT HAVE_CXX_ATOMICS64_WITHOUT_LIB) - list(APPEND LLD_SYSTEM_LIBS atomic) -endif() - find_first_existing_vc_file("${LLVM_MAIN_SRC_DIR}" llvm_vc) find_first_existing_vc_file("${LLD_SOURCE_DIR}" lld_vc) @@ -54,7 +48,8 @@ add_lld_library(lldCommon Target LINK_LIBS - ${LLD_SYSTEM_LIBS} + ${LLVM_PTHREAD_LIB} + ${LLVM_ATOMIC_LIB} DEPENDS intrinsics_gen -- Gitee From 0988addf2680b3717be47fd6f2493f33fe886f90 Mon Sep 17 00:00:00 2001 From: Sam James Date: Wed, 19 Oct 2022 20:12:10 +0100 Subject: [PATCH 22/41] Link libclangBasic against libatomic when necessary. This is necessary at least on PPC32. Depends on D136280. Bug: https://bugs.gentoo.org/874024 Thanks-to: Arfrever Frehtes Taifersar Arahesis Tested-by: erhard_f@mailbox.org Differential Revision: https://reviews.llvm.org/D136282 (cherry picked from commit 20132d8eaa68a6c53e152718beda1dc0f4c9ff6c) --- clang/CMakeLists.txt | 1 + clang/lib/Basic/CMakeLists.txt | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/clang/CMakeLists.txt b/clang/CMakeLists.txt index 13d76e7fd935..e3bc4b468fb6 100644 --- a/clang/CMakeLists.txt +++ b/clang/CMakeLists.txt @@ -117,6 +117,7 @@ if(CLANG_BUILT_STANDALONE) include(TableGen) include(HandleLLVMOptions) include(VersionFromVCS) + include(CheckAtomic) include(GetErrcMessages) include(LLVMDistributionSupport) diff --git a/clang/lib/Basic/CMakeLists.txt b/clang/lib/Basic/CMakeLists.txt index 3e052c0cf995..c38c9fddb424 100644 --- a/clang/lib/Basic/CMakeLists.txt +++ b/clang/lib/Basic/CMakeLists.txt @@ -110,3 +110,7 @@ add_clang_library(clangBasic omp_gen ) +target_link_libraries(clangBasic + PRIVATE + ${LLVM_ATOMIC_LIB} +) -- Gitee From 11c3a21f8d1ba21c7744ba9d272c26c2ce3c58a0 Mon Sep 17 00:00:00 2001 From: Balazs Benics Date: Thu, 13 Oct 2022 08:41:31 +0200 Subject: [PATCH 23/41] [analyzer] Workaround crash on encountering Class non-type template parameters The Clang Static Analyzer will crash on this code: ```lang=C++ struct Box { int value; }; template int get() { return V.value; } template int get(); ``` https://godbolt.org/z/5Yb1sMMMb The problem is that we don't account for encountering `TemplateParamObjectDecl`s within the `DeclRefExpr` handler in the `ExprEngine`. IMO we should create a new memregion for representing such template param objects, to model their language semantics. Such as: - it should have global static storage - for two identical values, their addresses should be identical as well http://eel.is/c%2B%2Bdraft/temp.param#8 I was thinking of introducing a `TemplateParamObjectRegion` under `DeclRegion` for this purpose. It could have `TemplateParamObjectDecl` as a field. The `TemplateParamObjectDecl::getValue()` returns `APValue`, which might represent multiple levels of structures, unions and other goodies - making the transformation from `APValue` to `SVal` a bit complicated. That being said, for now, I think having `Unknowns` for such cases is definitely an improvement to crashing, hence I'm proposing this patch. Reviewed By: xazax.hun Differential Revision: https://reviews.llvm.org/D135763 (cherry picked from commit b062ee7dc4515b0a42157717105839627d5542bb) --- clang/lib/StaticAnalyzer/Core/ExprEngine.cpp | 6 ++++ .../test/Analysis/template-param-objects.cpp | 33 +++++++++++++++++++ 2 files changed, 39 insertions(+) create mode 100644 clang/test/Analysis/template-param-objects.cpp diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp index 19149d079822..ab65612bce90 100644 --- a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp +++ b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp @@ -2839,6 +2839,12 @@ void ExprEngine::VisitCommonDeclRefExpr(const Expr *Ex, const NamedDecl *D, return; } + if (const auto *TPO = dyn_cast(D)) { + // FIXME: We should meaningfully implement this. + (void)TPO; + return; + } + llvm_unreachable("Support for this Decl not implemented."); } diff --git a/clang/test/Analysis/template-param-objects.cpp b/clang/test/Analysis/template-param-objects.cpp new file mode 100644 index 000000000000..dde95fa62cb6 --- /dev/null +++ b/clang/test/Analysis/template-param-objects.cpp @@ -0,0 +1,33 @@ +// RUN: %clang_analyze_cc1 -analyzer-checker=core,debug.ExprInspection \ +// RUN: -analyzer-config eagerly-assume=false -std=c++20 -verify %s + +template void clang_analyzer_dump(T); +void clang_analyzer_eval(bool); + +struct Box { + int value; +}; +bool operator ==(Box lhs, Box rhs) { + return lhs.value == rhs.value; +} +template void dumps() { + clang_analyzer_dump(V); // expected-warning {{lazyCompoundVal}} + clang_analyzer_dump(&V); // expected-warning {{Unknown}} + clang_analyzer_dump(V.value); // expected-warning {{Unknown}} FIXME: It should be '6 S32b'. + clang_analyzer_dump(&V.value); // expected-warning {{Unknown}} +} +template void dumps(); + +// [temp.param].7.3.2: +// "All such template parameters in the program of the same type with the +// same value denote the same template parameter object." +template void stable_addresses() { + clang_analyzer_eval(&A1 == &A2); // expected-warning {{UNKNOWN}} FIXME: It should be TRUE. + clang_analyzer_eval(&B1 == &B2); // expected-warning {{UNKNOWN}} FIXME: It should be TRUE. + clang_analyzer_eval(&A1 == &B2); // expected-warning {{UNKNOWN}} FIXME: It should be FALSE. + + clang_analyzer_eval(A1 == A2); // expected-warning {{UNKNOWN}} FIXME: It should be TRUE. + clang_analyzer_eval(B1 == B2); // expected-warning {{UNKNOWN}} FIXME: It should be TRUE. + clang_analyzer_eval(A1 == B2); // expected-warning {{UNKNOWN}} FIXME: It should be FALSE. +} +template void stable_addresses(); -- Gitee From 68799e789fc5f2a5ff25beffa4dc8828780c08fb Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Tue, 1 Nov 2022 11:37:10 -0700 Subject: [PATCH 24/41] [GlobalOpt] Don't remove inalloca from varargs functions Varargs and inalloca have a weird interaction where varargs are actually passed via the inalloca alloca. Removing inalloca breaks the varargs because they're still not passed as separate arguments. Fixes #58718 Reviewed By: rnk Differential Revision: https://reviews.llvm.org/D137182 (cherry picked from commit 8c49b01a1ee051ab2c09be4cffc83656ccc0fbe6) --- llvm/lib/Transforms/IPO/GlobalOpt.cpp | 2 +- .../Transforms/GlobalOpt/inalloca-varargs.ll | 38 +++++++++++++++++++ 2 files changed, 39 insertions(+), 1 deletion(-) create mode 100644 llvm/test/Transforms/GlobalOpt/inalloca-varargs.ll diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp index 6df0409256bb..6fc7b29c5b78 100644 --- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp +++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp @@ -2003,7 +2003,7 @@ OptimizeFunctions(Module &M, // FIXME: We should also hoist alloca affected by this to the entry // block if possible. if (F.getAttributes().hasAttrSomewhere(Attribute::InAlloca) && - !F.hasAddressTaken() && !hasMustTailCallers(&F)) { + !F.hasAddressTaken() && !hasMustTailCallers(&F) && !F.isVarArg()) { RemoveAttribute(&F, Attribute::InAlloca); Changed = true; } diff --git a/llvm/test/Transforms/GlobalOpt/inalloca-varargs.ll b/llvm/test/Transforms/GlobalOpt/inalloca-varargs.ll new file mode 100644 index 000000000000..188210782edd --- /dev/null +++ b/llvm/test/Transforms/GlobalOpt/inalloca-varargs.ll @@ -0,0 +1,38 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature +; RUN: opt -passes=globalopt -S < %s | FileCheck %s + +define i32 @main(ptr %a) { +; CHECK-LABEL: define {{[^@]+}}@main +; CHECK-SAME: (ptr [[A:%.*]]) local_unnamed_addr { +; CHECK-NEXT: [[ARGMEM:%.*]] = alloca inalloca <{ ptr, i32 }>, align 4 +; CHECK-NEXT: store ptr [[A]], ptr [[ARGMEM]], align 8 +; CHECK-NEXT: [[G0:%.*]] = getelementptr inbounds <{ ptr, i32 }>, ptr [[ARGMEM]], i32 0, i32 1 +; CHECK-NEXT: store i32 5, ptr [[G0]], align 4 +; CHECK-NEXT: [[CALL3:%.*]] = call i32 (ptr, ...) @i(ptr inalloca(ptr) [[ARGMEM]]) +; CHECK-NEXT: ret i32 [[CALL3]] +; + %argmem = alloca inalloca <{ ptr, i32 }>, align 4 + store ptr %a, ptr %argmem, align 8 + %g0 = getelementptr inbounds <{ ptr, i32 }>, ptr %argmem, i32 0, i32 1 + store i32 5, ptr %g0, align 4 + %call3 = call i32 (ptr, ...) @i(ptr inalloca(ptr) %argmem) + ret i32 %call3 +} + +define internal i32 @i(ptr inalloca(ptr) %a, ...) { +; CHECK-LABEL: define {{[^@]+}}@i +; CHECK-SAME: (ptr inalloca(ptr) [[A:%.*]], ...) unnamed_addr { +; CHECK-NEXT: [[AP:%.*]] = alloca ptr, align 4 +; CHECK-NEXT: call void @llvm.va_start(ptr [[AP]]) +; CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[AP]], align 4 +; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[ARGP_CUR]], align 4 +; CHECK-NEXT: ret i32 [[L]] +; + %ap = alloca ptr, align 4 + call void @llvm.va_start(ptr %ap) + %argp.cur = load ptr, ptr %ap, align 4 + %l = load i32, ptr %argp.cur, align 4 + ret i32 %l +} + +declare void @llvm.va_start(ptr) -- Gitee From 392963bb1daf7ec8822a0f02929a8ada17eb0a0a Mon Sep 17 00:00:00 2001 From: Jitka Plesnikova Date: Wed, 21 Sep 2022 11:42:46 +0200 Subject: [PATCH 25/41] [lldb] Fix 'error: non-const lvalue...' caused by SWIG 4.1.0 Fix the failure caused by change in SwigValueWraper for C++11 and later for improved move semantics in SWIG commit. https://github.com/swig/swig/commit/d1055f4b3d51cb8060893f8036846ac743302dab (cherry picked from commit f0a25fe0b746f56295d5c02116ba28d2f965c175) --- lldb/bindings/python/python-typemaps.swig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lldb/bindings/python/python-typemaps.swig b/lldb/bindings/python/python-typemaps.swig index bf3de66b91bf..d45431c771ca 100644 --- a/lldb/bindings/python/python-typemaps.swig +++ b/lldb/bindings/python/python-typemaps.swig @@ -435,7 +435,7 @@ template <> bool SetNumberFromPyObject(double &number, PyObject *obj) { %typemap(out) lldb::FileSP { $result = nullptr; - lldb::FileSP &sp = $1; + const lldb::FileSP &sp = $1; if (sp) { PythonFile pyfile = unwrapOrSetPythonException(PythonFile::FromFile(*sp)); if (!pyfile.IsValid()) -- Gitee From dc8f6ffc3bf297098a1dfd3fbce801afbe9f5238 Mon Sep 17 00:00:00 2001 From: serge-sans-paille Date: Thu, 29 Sep 2022 21:48:38 +0200 Subject: [PATCH 26/41] [lldb] Get rid of __STDC_LIMIT_MACROS and __STDC_CONSTANT_MACROS C++11 made the use of these macro obsolete, see https://sourceware.org/bugzilla/show_bug.cgi?id=15366 As a side effect this prevents https://github.com/swig/swig/issues/2193. Differential Revision: https://reviews.llvm.org/D134877 (cherry picked from commit 81fc5f7909a4ef5a8d4b5da2a10f77f7cb01ba63) --- lldb/bindings/CMakeLists.txt | 2 -- lldb/bindings/interfaces.swig | 3 --- 2 files changed, 5 deletions(-) diff --git a/lldb/bindings/CMakeLists.txt b/lldb/bindings/CMakeLists.txt index c8aa0bcf9681..9eed2f1e6299 100644 --- a/lldb/bindings/CMakeLists.txt +++ b/lldb/bindings/CMakeLists.txt @@ -26,8 +26,6 @@ set(SWIG_COMMON_FLAGS -features autodoc -I${LLDB_SOURCE_DIR}/include -I${CMAKE_CURRENT_SOURCE_DIR} - -D__STDC_LIMIT_MACROS - -D__STDC_CONSTANT_MACROS ${DARWIN_EXTRAS} ) diff --git a/lldb/bindings/interfaces.swig b/lldb/bindings/interfaces.swig index c9a6d0f06056..021c7683d170 100644 --- a/lldb/bindings/interfaces.swig +++ b/lldb/bindings/interfaces.swig @@ -1,8 +1,5 @@ /* Various liblldb typedefs that SWIG needs to know about. */ #define __extension__ /* Undefine GCC keyword to make Swig happy when processing glibc's stdint.h. */ -/* The ISO C99 standard specifies that in C++ implementations limit macros such - as INT32_MAX should only be defined if __STDC_LIMIT_MACROS is. */ -#define __STDC_LIMIT_MACROS %include "stdint.i" %include "lldb/lldb-defines.h" -- Gitee From a399896637584c86acd61afca5bbfe8ed76a7c7d Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Sun, 13 Nov 2022 22:05:37 +0000 Subject: [PATCH 27/41] [VectorUtils] Skip interleave members with diff type and alloca sizes. Currently, codegen doesn't support cases where the type size doesn't match the alloc size. Skip them for now. Fixes #58722. (cherry picked from commit 758699c39984296f20a4dac44c6892065601c4cd) --- llvm/lib/Analysis/VectorUtils.cpp | 7 +- ...interleave-allocsize-not-equal-typesize.ll | 141 ++++++++++++++++++ 2 files changed, 147 insertions(+), 1 deletion(-) create mode 100644 llvm/test/Transforms/LoopVectorize/AArch64/interleave-allocsize-not-equal-typesize.ll diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp index c4795a80ead2..bc20f33f174c 100644 --- a/llvm/lib/Analysis/VectorUtils.cpp +++ b/llvm/lib/Analysis/VectorUtils.cpp @@ -1110,6 +1110,12 @@ void InterleavedAccessInfo::collectConstStrideAccesses( continue; Type *ElementTy = getLoadStoreType(&I); + // Currently, codegen doesn't support cases where the type size doesn't + // match the alloc size. Skip them for now. + uint64_t Size = DL.getTypeAllocSize(ElementTy); + if (Size * 8 != DL.getTypeSizeInBits(ElementTy)) + continue; + // We don't check wrapping here because we don't know yet if Ptr will be // part of a full group or a group with gaps. Checking wrapping for all // pointers (even those that end up in groups with no gaps) will be overly @@ -1121,7 +1127,6 @@ void InterleavedAccessInfo::collectConstStrideAccesses( /*Assume=*/true, /*ShouldCheckWrap=*/false); const SCEV *Scev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr); - uint64_t Size = DL.getTypeAllocSize(ElementTy); AccessStrideInfo[&I] = StrideDescriptor(Stride, Scev, Size, getLoadStoreAlignment(&I)); } diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/interleave-allocsize-not-equal-typesize.ll b/llvm/test/Transforms/LoopVectorize/AArch64/interleave-allocsize-not-equal-typesize.ll new file mode 100644 index 000000000000..32ca12ee7e0e --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/AArch64/interleave-allocsize-not-equal-typesize.ll @@ -0,0 +1,141 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes=loop-vectorize -S %s | FileCheck %s + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-unknown-linux-gnu" + +; Make sure LV does not crash when analyzing potential interleave groups with +; accesses where the typesize doesn't match to allocsize. +define void @pr58722_load_interleave_group(ptr %src, ptr %dst) { +; CHECK-LABEL: @pr58722_load_interleave_group( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] +; CHECK: vector.memcheck: +; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[DST:%.*]], i64 40004 +; CHECK-NEXT: [[UGLYGEP1:%.*]] = getelementptr i8, ptr [[SRC:%.*]], i64 80007 +; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[DST]], [[UGLYGEP1]] +; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[SRC]], [[UGLYGEP]] +; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] +; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[TMP2]] +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[TMP3]] +; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4, !alias.scope !0 +; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4, !alias.scope !0 +; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i32> poison, i32 [[TMP8]], i32 0 +; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x i32> [[TMP10]], i32 [[TMP9]], i32 1 +; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP6]], align 4, !alias.scope !0 +; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4, !alias.scope !0 +; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> poison, i32 [[TMP12]], i32 0 +; CHECK-NEXT: [[TMP15:%.*]] = insertelement <2 x i32> [[TMP14]], i32 [[TMP13]], i32 1 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 1 +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i64 1 +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 1 +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i64 1 +; CHECK-NEXT: [[TMP20:%.*]] = load i24, ptr [[TMP16]], align 4, !alias.scope !0 +; CHECK-NEXT: [[TMP21:%.*]] = load i24, ptr [[TMP17]], align 4, !alias.scope !0 +; CHECK-NEXT: [[TMP22:%.*]] = insertelement <2 x i24> poison, i24 [[TMP20]], i32 0 +; CHECK-NEXT: [[TMP23:%.*]] = insertelement <2 x i24> [[TMP22]], i24 [[TMP21]], i32 1 +; CHECK-NEXT: [[TMP24:%.*]] = load i24, ptr [[TMP18]], align 4, !alias.scope !0 +; CHECK-NEXT: [[TMP25:%.*]] = load i24, ptr [[TMP19]], align 4, !alias.scope !0 +; CHECK-NEXT: [[TMP26:%.*]] = insertelement <2 x i24> poison, i24 [[TMP24]], i32 0 +; CHECK-NEXT: [[TMP27:%.*]] = insertelement <2 x i24> [[TMP26]], i24 [[TMP25]], i32 1 +; CHECK-NEXT: [[TMP28:%.*]] = zext <2 x i24> [[TMP23]] to <2 x i32> +; CHECK-NEXT: [[TMP29:%.*]] = zext <2 x i24> [[TMP27]] to <2 x i32> +; CHECK-NEXT: [[TMP30:%.*]] = add <2 x i32> [[TMP11]], [[TMP28]] +; CHECK-NEXT: [[TMP31:%.*]] = add <2 x i32> [[TMP15]], [[TMP29]] +; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP2]] +; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i32 0 +; CHECK-NEXT: store <2 x i32> [[TMP30]], ptr [[TMP34]], align 4, !alias.scope !3, !noalias !0 +; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i32 2 +; CHECK-NEXT: store <2 x i32> [[TMP31]], ptr [[TMP35]], align 4, !alias.scope !3, !noalias !0 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP36:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000 +; CHECK-NEXT: br i1 [[TMP36]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 10001, 10000 +; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 10000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[GEP_IV:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[V1:%.*]] = load i32, ptr [[GEP_IV]], align 4 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[GEP_IV]], i64 1 +; CHECK-NEXT: [[V2:%.*]] = load i24, ptr [[GEP]], align 4 +; CHECK-NEXT: [[V2_EXT:%.*]] = zext i24 [[V2]] to i32 +; CHECK-NEXT: [[SUM:%.*]] = add i32 [[V1]], [[V2_EXT]] +; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[IV]] +; CHECK-NEXT: store i32 [[SUM]], ptr [[GEP_DST]], align 4 +; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV]], 10000 +; CHECK-NEXT: br i1 [[CMP]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %gep.iv = getelementptr inbounds i64, ptr %src, i64 %iv + %v1 = load i32, ptr %gep.iv, align 4 + %gep = getelementptr inbounds i32, ptr %gep.iv, i64 1 + %v2 = load i24, ptr %gep, align 4 + %v2.ext = zext i24 %v2 to i32 + %sum = add i32 %v1, %v2.ext + %gep.dst = getelementptr inbounds i32, ptr %dst, i64 %iv + store i32 %sum, ptr %gep.dst + %iv.next = add i64 %iv, 1 + %cmp = icmp eq i64 %iv, 10000 + br i1 %cmp, label %exit, label %loop + +exit: + ret void +} + +define void @pr58722_store_interleave_group(ptr %src, ptr %dst) { +; CHECK-LABEL: @pr58722_store_interleave_group( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[GEP_IV:%.*]] = getelementptr inbounds i64, ptr [[SRC:%.*]], i32 [[IV]] +; CHECK-NEXT: store i32 [[IV]], ptr [[GEP_IV]], align 4 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i64, ptr [[GEP_IV]], i64 1 +; CHECK-NEXT: [[TRUNC_IV:%.*]] = trunc i32 [[IV]] to i24 +; CHECK-NEXT: store i24 [[TRUNC_IV]], ptr [[GEP]], align 4 +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 2 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[IV]], 10000 +; CHECK-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[LOOP]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] + %gep.iv = getelementptr inbounds i64, ptr %src, i32 %iv + store i32 %iv, ptr %gep.iv + %gep = getelementptr inbounds i64, ptr %gep.iv, i64 1 + %trunc.iv = trunc i32 %iv to i24 + store i24 %trunc.iv, ptr %gep + %iv.next = add i32 %iv, 2 + %cmp = icmp eq i32 %iv, 10000 + br i1 %cmp, label %exit, label %loop + +exit: + ret void +} -- Gitee From 154e88af7ec97d9b9f389e55d45bf07108a9a097 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Tue, 15 Nov 2022 22:28:29 -0800 Subject: [PATCH 28/41] Bump version to 15.0.5 --- libcxx/include/__config | 2 +- llvm/CMakeLists.txt | 2 +- llvm/utils/gn/secondary/llvm/version.gni | 2 +- llvm/utils/lit/lit/__init__.py | 2 +- utils/bazel/llvm-project-overlay/clang/BUILD.bazel | 6 +++--- .../clang/include/clang/Config/config.h | 2 +- utils/bazel/llvm-project-overlay/lld/BUILD.bazel | 2 +- .../llvm/include/llvm/Config/llvm-config.h | 4 ++-- 8 files changed, 11 insertions(+), 11 deletions(-) diff --git a/libcxx/include/__config b/libcxx/include/__config index 810189c94a94..d5ac34eba37a 100644 --- a/libcxx/include/__config +++ b/libcxx/include/__config @@ -36,7 +36,7 @@ #ifdef __cplusplus -# define _LIBCPP_VERSION 15004 +# define _LIBCPP_VERSION 15005 # define _LIBCPP_CONCAT_IMPL(_X, _Y) _X##_Y # define _LIBCPP_CONCAT(_X, _Y) _LIBCPP_CONCAT_IMPL(_X, _Y) diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt index 3d6f5e6f9d3d..b101a1a187c9 100644 --- a/llvm/CMakeLists.txt +++ b/llvm/CMakeLists.txt @@ -22,7 +22,7 @@ if(NOT DEFINED LLVM_VERSION_MINOR) set(LLVM_VERSION_MINOR 0) endif() if(NOT DEFINED LLVM_VERSION_PATCH) - set(LLVM_VERSION_PATCH 4) + set(LLVM_VERSION_PATCH 5) endif() if(NOT DEFINED LLVM_VERSION_SUFFIX) set(LLVM_VERSION_SUFFIX) diff --git a/llvm/utils/gn/secondary/llvm/version.gni b/llvm/utils/gn/secondary/llvm/version.gni index 3b890e00bec3..391132f1a316 100644 --- a/llvm/utils/gn/secondary/llvm/version.gni +++ b/llvm/utils/gn/secondary/llvm/version.gni @@ -1,4 +1,4 @@ llvm_version_major = 15 llvm_version_minor = 0 -llvm_version_patch = 4 +llvm_version_patch = 5 llvm_version = "$llvm_version_major.$llvm_version_minor.$llvm_version_patch" diff --git a/llvm/utils/lit/lit/__init__.py b/llvm/utils/lit/lit/__init__.py index 04f6e94535e4..e3c8693c0421 100644 --- a/llvm/utils/lit/lit/__init__.py +++ b/llvm/utils/lit/lit/__init__.py @@ -2,7 +2,7 @@ __author__ = 'Daniel Dunbar' __email__ = 'daniel@minormatter.com' -__versioninfo__ = (15, 0, 4) +__versioninfo__ = (15, 0, 5) __version__ = '.'.join(str(v) for v in __versioninfo__) + 'dev' __all__ = [] diff --git a/utils/bazel/llvm-project-overlay/clang/BUILD.bazel b/utils/bazel/llvm-project-overlay/clang/BUILD.bazel index 96b462717be9..8f21799d4888 100644 --- a/utils/bazel/llvm-project-overlay/clang/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/clang/BUILD.bazel @@ -358,11 +358,11 @@ genrule( name = "basic_version_gen", outs = ["include/clang/Basic/Version.inc"], cmd = ( - "echo '#define CLANG_VERSION 15.0.4' >> $@\n" + + "echo '#define CLANG_VERSION 15.0.5' >> $@\n" + "echo '#define CLANG_VERSION_MAJOR 15' >> $@\n" + "echo '#define CLANG_VERSION_MINOR 0' >> $@\n" + - "echo '#define CLANG_VERSION_PATCHLEVEL 4' >> $@\n" + - "echo '#define CLANG_VERSION_STRING \"15.0.4\"' >> $@\n" + "echo '#define CLANG_VERSION_PATCHLEVEL 5' >> $@\n" + + "echo '#define CLANG_VERSION_STRING \"15.0.5\"' >> $@\n" ), ) diff --git a/utils/bazel/llvm-project-overlay/clang/include/clang/Config/config.h b/utils/bazel/llvm-project-overlay/clang/include/clang/Config/config.h index 5d157492eae3..ec15fccc089b 100644 --- a/utils/bazel/llvm-project-overlay/clang/include/clang/Config/config.h +++ b/utils/bazel/llvm-project-overlay/clang/include/clang/Config/config.h @@ -93,7 +93,7 @@ /* CLANG_HAVE_RLIMITS defined conditionally below */ /* The LLVM product name and version */ -#define BACKEND_PACKAGE_STRING "LLVM 15.0.4" +#define BACKEND_PACKAGE_STRING "LLVM 15.0.5" /* Linker version detected at compile time. */ /* #undef HOST_LINK_VERSION */ diff --git a/utils/bazel/llvm-project-overlay/lld/BUILD.bazel b/utils/bazel/llvm-project-overlay/lld/BUILD.bazel index e3a8c98ffa22..b4e7f5ccbdc4 100644 --- a/utils/bazel/llvm-project-overlay/lld/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/lld/BUILD.bazel @@ -13,7 +13,7 @@ package( genrule( name = "config_version_gen", outs = ["include/lld/Common/Version.inc"], - cmd = "echo '#define LLD_VERSION_STRING \"15.0.4\"' > $@", + cmd = "echo '#define LLD_VERSION_STRING \"15.0.5\"' > $@", ) genrule( diff --git a/utils/bazel/llvm-project-overlay/llvm/include/llvm/Config/llvm-config.h b/utils/bazel/llvm-project-overlay/llvm/include/llvm/Config/llvm-config.h index 7a86202e8e0d..16ffcbf0d2f6 100644 --- a/utils/bazel/llvm-project-overlay/llvm/include/llvm/Config/llvm-config.h +++ b/utils/bazel/llvm-project-overlay/llvm/include/llvm/Config/llvm-config.h @@ -80,10 +80,10 @@ #define LLVM_VERSION_MINOR 0 /* Patch version of the LLVM API */ -#define LLVM_VERSION_PATCH 4 +#define LLVM_VERSION_PATCH 5 /* LLVM version string */ -#define LLVM_VERSION_STRING "15.0.4" +#define LLVM_VERSION_STRING "15.0.5" /* Whether LLVM records statistics for use with GetStatistics(), * PrintStatistics() or PrintStatisticsJSON() -- Gitee From 25a36ca5c791611588a455e7e7e8a3593e90f9a3 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Mon, 21 Nov 2022 10:38:24 -0800 Subject: [PATCH 29/41] Bump version to 15.0.6 --- libcxx/include/__config | 2 +- llvm/CMakeLists.txt | 2 +- llvm/utils/gn/secondary/llvm/version.gni | 2 +- llvm/utils/lit/lit/__init__.py | 2 +- utils/bazel/llvm-project-overlay/clang/BUILD.bazel | 6 +++--- .../clang/include/clang/Config/config.h | 2 +- utils/bazel/llvm-project-overlay/lld/BUILD.bazel | 2 +- .../llvm/include/llvm/Config/llvm-config.h | 4 ++-- 8 files changed, 11 insertions(+), 11 deletions(-) diff --git a/libcxx/include/__config b/libcxx/include/__config index d5ac34eba37a..5f62b974170f 100644 --- a/libcxx/include/__config +++ b/libcxx/include/__config @@ -36,7 +36,7 @@ #ifdef __cplusplus -# define _LIBCPP_VERSION 15005 +# define _LIBCPP_VERSION 15006 # define _LIBCPP_CONCAT_IMPL(_X, _Y) _X##_Y # define _LIBCPP_CONCAT(_X, _Y) _LIBCPP_CONCAT_IMPL(_X, _Y) diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt index b101a1a187c9..28e28cfe7b6a 100644 --- a/llvm/CMakeLists.txt +++ b/llvm/CMakeLists.txt @@ -22,7 +22,7 @@ if(NOT DEFINED LLVM_VERSION_MINOR) set(LLVM_VERSION_MINOR 0) endif() if(NOT DEFINED LLVM_VERSION_PATCH) - set(LLVM_VERSION_PATCH 5) + set(LLVM_VERSION_PATCH 6) endif() if(NOT DEFINED LLVM_VERSION_SUFFIX) set(LLVM_VERSION_SUFFIX) diff --git a/llvm/utils/gn/secondary/llvm/version.gni b/llvm/utils/gn/secondary/llvm/version.gni index 391132f1a316..00e1dc6f1411 100644 --- a/llvm/utils/gn/secondary/llvm/version.gni +++ b/llvm/utils/gn/secondary/llvm/version.gni @@ -1,4 +1,4 @@ llvm_version_major = 15 llvm_version_minor = 0 -llvm_version_patch = 5 +llvm_version_patch = 6 llvm_version = "$llvm_version_major.$llvm_version_minor.$llvm_version_patch" diff --git a/llvm/utils/lit/lit/__init__.py b/llvm/utils/lit/lit/__init__.py index e3c8693c0421..b34d17b9dc47 100644 --- a/llvm/utils/lit/lit/__init__.py +++ b/llvm/utils/lit/lit/__init__.py @@ -2,7 +2,7 @@ __author__ = 'Daniel Dunbar' __email__ = 'daniel@minormatter.com' -__versioninfo__ = (15, 0, 5) +__versioninfo__ = (15, 0, 6) __version__ = '.'.join(str(v) for v in __versioninfo__) + 'dev' __all__ = [] diff --git a/utils/bazel/llvm-project-overlay/clang/BUILD.bazel b/utils/bazel/llvm-project-overlay/clang/BUILD.bazel index 8f21799d4888..5ee87d516519 100644 --- a/utils/bazel/llvm-project-overlay/clang/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/clang/BUILD.bazel @@ -358,11 +358,11 @@ genrule( name = "basic_version_gen", outs = ["include/clang/Basic/Version.inc"], cmd = ( - "echo '#define CLANG_VERSION 15.0.5' >> $@\n" + + "echo '#define CLANG_VERSION 15.0.6' >> $@\n" + "echo '#define CLANG_VERSION_MAJOR 15' >> $@\n" + "echo '#define CLANG_VERSION_MINOR 0' >> $@\n" + - "echo '#define CLANG_VERSION_PATCHLEVEL 5' >> $@\n" + - "echo '#define CLANG_VERSION_STRING \"15.0.5\"' >> $@\n" + "echo '#define CLANG_VERSION_PATCHLEVEL 6' >> $@\n" + + "echo '#define CLANG_VERSION_STRING \"15.0.6\"' >> $@\n" ), ) diff --git a/utils/bazel/llvm-project-overlay/clang/include/clang/Config/config.h b/utils/bazel/llvm-project-overlay/clang/include/clang/Config/config.h index ec15fccc089b..ff4feb2b4af9 100644 --- a/utils/bazel/llvm-project-overlay/clang/include/clang/Config/config.h +++ b/utils/bazel/llvm-project-overlay/clang/include/clang/Config/config.h @@ -93,7 +93,7 @@ /* CLANG_HAVE_RLIMITS defined conditionally below */ /* The LLVM product name and version */ -#define BACKEND_PACKAGE_STRING "LLVM 15.0.5" +#define BACKEND_PACKAGE_STRING "LLVM 15.0.6" /* Linker version detected at compile time. */ /* #undef HOST_LINK_VERSION */ diff --git a/utils/bazel/llvm-project-overlay/lld/BUILD.bazel b/utils/bazel/llvm-project-overlay/lld/BUILD.bazel index b4e7f5ccbdc4..6ba06c794413 100644 --- a/utils/bazel/llvm-project-overlay/lld/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/lld/BUILD.bazel @@ -13,7 +13,7 @@ package( genrule( name = "config_version_gen", outs = ["include/lld/Common/Version.inc"], - cmd = "echo '#define LLD_VERSION_STRING \"15.0.5\"' > $@", + cmd = "echo '#define LLD_VERSION_STRING \"15.0.6\"' > $@", ) genrule( diff --git a/utils/bazel/llvm-project-overlay/llvm/include/llvm/Config/llvm-config.h b/utils/bazel/llvm-project-overlay/llvm/include/llvm/Config/llvm-config.h index 16ffcbf0d2f6..a44e9f60c0e1 100644 --- a/utils/bazel/llvm-project-overlay/llvm/include/llvm/Config/llvm-config.h +++ b/utils/bazel/llvm-project-overlay/llvm/include/llvm/Config/llvm-config.h @@ -80,10 +80,10 @@ #define LLVM_VERSION_MINOR 0 /* Patch version of the LLVM API */ -#define LLVM_VERSION_PATCH 5 +#define LLVM_VERSION_PATCH 6 /* LLVM version string */ -#define LLVM_VERSION_STRING "15.0.5" +#define LLVM_VERSION_STRING "15.0.6" /* Whether LLVM records statistics for use with GetStatistics(), * PrintStatistics() or PrintStatisticsJSON() -- Gitee From e6e61e9b2ef7c0fa3fe2cd7c612e00ecf57a9dd8 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Thu, 17 Nov 2022 16:07:15 -0800 Subject: [PATCH 30/41] Revert "Reapply: Add an error message to the default SIGPIPE handler" This patch is spamming compiles with unhelpful and confusing messages. E.g. the Linux kernel uses "grep -q" in several places. It's meant to quit with a return code of zero when the first match is found. This can cause a SIGPIPE signal, but that's expected, and there's no way to turn this error message off to avoid spurious error messages. UNIX03 apparently doesn't require printing an error message on SIGPIPE, but specifically when there's an error on the stdout stream in a normal program flow, e.g. when SIGPIPE trap is disabled. A separate patch is planned to address the specific case we care most about (involving llvm-nm). This reverts commit b89bcefa6202e310eb3167dd1c37f1807377ec8d. Link: https://github.com/llvm/llvm-project/issues/59037 Link: https://github.com/ClangBuiltLinux/linux/issues/1651 Differential Revision: https://reviews.llvm.org/D138244 (cherry picked from commit 4787efa38066adb51e2c049499d25b3610c0877b) --- llvm/lib/Support/Unix/Signals.inc | 4 ---- llvm/test/Support/unix03-sigpipe-exit.test | 26 ---------------------- 2 files changed, 30 deletions(-) delete mode 100644 llvm/test/Support/unix03-sigpipe-exit.test diff --git a/llvm/lib/Support/Unix/Signals.inc b/llvm/lib/Support/Unix/Signals.inc index bf145bffe8bf..23ac012b9e00 100644 --- a/llvm/lib/Support/Unix/Signals.inc +++ b/llvm/lib/Support/Unix/Signals.inc @@ -432,10 +432,6 @@ void llvm::sys::SetOneShotPipeSignalFunction(void (*Handler)()) { } void llvm::sys::DefaultOneShotPipeSignalHandler() { - // UNIX03 conformance requires a non-zero exit code and an error message - // to stderr when writing to a closed stdout fails. - errs() << "error: write on a pipe with no reader\n"; - // Send a special return code that drivers can check for, from sysexits.h. exit(EX_IOERR); } diff --git a/llvm/test/Support/unix03-sigpipe-exit.test b/llvm/test/Support/unix03-sigpipe-exit.test deleted file mode 100644 index 01680841db00..000000000000 --- a/llvm/test/Support/unix03-sigpipe-exit.test +++ /dev/null @@ -1,26 +0,0 @@ -## Test that when writing to a closed stdout, LLVM tools finish with a non-zero -## exit code and an error message on stderr. The test uses llvm-cxxfilt, but -## it's a logic from the default SIGPIPE handler, so it applies to all the tools. -## This is required for UNIX03 conformance. - -# UNSUPPORTED: system-windows - -# RUN: not %python %s llvm-cxxfilt 2>&1 | FileCheck %s -# CHECK: error: write on a pipe with no reader - -import subprocess -import sys - -with subprocess.Popen([sys.argv[1]], stdout=subprocess.PIPE, stdin=subprocess.PIPE) as process: - process.stdout.close() - - # llvm-cxxfilt with no extra arguments runs interactively and writes input - # to output. Writing continuously to stdin should trigger SIGPIPE when the - # subprocess attempts to write out bytes to a closed stdout. - try: - while True: - process.stdin.write("foo\n".encode("utf-8")) - except BrokenPipeError: - # Clear stdin, pipe is broken and closing it on cleanup will raise an exception. - process.stdin = None -sys.exit(process.returncode) -- Gitee From abcd0341d8465d55d7293dfc7142fef5055047fa Mon Sep 17 00:00:00 2001 From: Brett Werling Date: Wed, 16 Nov 2022 08:16:11 -0800 Subject: [PATCH 31/41] [ELF] Handle GCC collect2 -plugin-opt= on Windows Follows up on commit cd5d5ce235081005173566c99c592550021de058 by additionally ignoring relative paths ending in "lto-wrapper.exe" as can be the case for GCC cross-compiled for Windows. Reviewed By: tejohnson Differential Revision: https://reviews.llvm.org/D138065 (cherry picked from commit cf4f35b78871aecc21e663067c57e60595bd7197) --- lld/ELF/Driver.cpp | 9 ++++++--- lld/test/ELF/lto-plugin-ignore.s | 2 ++ 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index 296fb4220012..58d863776430 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -1330,12 +1330,15 @@ static void readConfigs(opt::InputArgList &args) { parseClangOption(std::string("-") + arg->getValue(), arg->getSpelling()); // GCC collect2 passes -plugin-opt=path/to/lto-wrapper with an absolute or - // relative path. Just ignore. If not ended with "lto-wrapper", consider it an + // relative path. Just ignore. If not ended with "lto-wrapper" (or + // "lto-wrapper.exe" for GCC cross-compiled for Windows), consider it an // unsupported LLVMgold.so option and error. - for (opt::Arg *arg : args.filtered(OPT_plugin_opt_eq)) - if (!StringRef(arg->getValue()).endswith("lto-wrapper")) + for (opt::Arg *arg : args.filtered(OPT_plugin_opt_eq)) { + StringRef v(arg->getValue()); + if (!v.endswith("lto-wrapper") && !v.endswith("lto-wrapper.exe")) error(arg->getSpelling() + ": unknown plugin option '" + arg->getValue() + "'"); + } config->passPlugins = args::getStrings(args, OPT_load_pass_plugins); diff --git a/lld/test/ELF/lto-plugin-ignore.s b/lld/test/ELF/lto-plugin-ignore.s index 2935bad149de..dd39139b6243 100644 --- a/lld/test/ELF/lto-plugin-ignore.s +++ b/lld/test/ELF/lto-plugin-ignore.s @@ -8,7 +8,9 @@ # RUN: ld.lld %t.o -o /dev/null \ # RUN: -plugin path/to/liblto_plugin.so \ # RUN: -plugin-opt=/path/to/lto-wrapper \ +# RUN: -plugin-opt=/path/to/lto-wrapper.exe \ # RUN: -plugin-opt=relative/path/to/lto-wrapper \ +# RUN: -plugin-opt=relative/path/to/lto-wrapper.exe \ # RUN: -plugin-opt=-fresolution=zed \ # RUN: -plugin-opt=-pass-through=-lgcc \ # RUN: -plugin-opt=-pass-through=-lgcc_eh \ -- Gitee From 088f33605d8a61ff519c580a71b1dd57d16a03f8 Mon Sep 17 00:00:00 2001 From: yronglin Date: Thu, 17 Nov 2022 23:06:21 +0800 Subject: [PATCH 32/41] [CodeGen][ARM] Fix ARMABIInfo::EmitVAAarg crash with empty record type variadic arg Fix ARMABIInfo::EmitVAAarg crash with empty record type variadic arg Open issue: https://github.com/llvm/llvm-project/issues/58794 Reviewed By: rjmccall Differential Revision: https://reviews.llvm.org/D138137 (cherry picked from commit 80f444646c62ccc8b2399d60ac91e62e6e576da6) --- clang/lib/CodeGen/TargetInfo.cpp | 8 ++++---- clang/test/CodeGen/arm-vaarg.c | 23 +++++++++++++++++++++++ 2 files changed, 27 insertions(+), 4 deletions(-) create mode 100644 clang/test/CodeGen/arm-vaarg.c diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp index 36e10e4df4c1..44743fa0206f 100644 --- a/clang/lib/CodeGen/TargetInfo.cpp +++ b/clang/lib/CodeGen/TargetInfo.cpp @@ -7047,10 +7047,10 @@ Address ARMABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, // Empty records are ignored for parameter passing purposes. if (isEmptyRecord(getContext(), Ty, true)) { - Address Addr = Address(CGF.Builder.CreateLoad(VAListAddr), - getVAListElementType(CGF), SlotSize); - Addr = CGF.Builder.CreateElementBitCast(Addr, CGF.ConvertTypeForMem(Ty)); - return Addr; + VAListAddr = CGF.Builder.CreateElementBitCast(VAListAddr, CGF.Int8PtrTy); + auto *Load = CGF.Builder.CreateLoad(VAListAddr); + Address Addr = Address(Load, CGF.Int8Ty, SlotSize); + return CGF.Builder.CreateElementBitCast(Addr, CGF.ConvertTypeForMem(Ty)); } CharUnits TySize = getContext().getTypeSizeInChars(Ty); diff --git a/clang/test/CodeGen/arm-vaarg.c b/clang/test/CodeGen/arm-vaarg.c new file mode 100644 index 000000000000..4dab397a20de --- /dev/null +++ b/clang/test/CodeGen/arm-vaarg.c @@ -0,0 +1,23 @@ +// RUN: %clang -Xclang -no-opaque-pointers -mfloat-abi=soft -target arm-linux-gnu -emit-llvm -S -o - %s | FileCheck %s + +struct Empty {}; + +struct Empty emptyvar; + +void take_args(int a, ...) { +// CHECK: [[ALLOCA_VA_LIST:%[a-zA-Z0-9._]+]] = alloca %struct.__va_list, align 4 +// CHECK: call void @llvm.va_start +// CHECK-NEXT: [[AP_ADDR:%[a-zA-Z0-9._]+]] = bitcast %struct.__va_list* [[ALLOCA_VA_LIST]] to i8** +// CHECK-NEXT: [[LOAD_AP:%[a-zA-Z0-9._]+]] = load i8*, i8** [[AP_ADDR]], align 4 +// CHECK-NEXT: [[EMPTY_PTR:%[a-zA-Z0-9._]+]] = bitcast i8* [[LOAD_AP]] to %struct.Empty* + + // It's conceivable that EMPTY_PTR may not actually be a valid pointer + // (e.g. it's at the very bottom of the stack and the next page is + // invalid). This doesn't matter provided it's never loaded (there's no + // well-defined way to tell), but it becomes a problem if we do try to use it. +// CHECK-NOT: load %struct.Empty, %struct.Empty* [[EMPTY_PTR]] + __builtin_va_list l; + __builtin_va_start(l, a); + emptyvar = __builtin_va_arg(l, struct Empty); + __builtin_va_end(l); +} -- Gitee From a8af9f679231a55b8a0f5707d8727679a98f4b06 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Fri, 6 Jan 2023 13:09:17 -0800 Subject: [PATCH 33/41] Bump version to 15.0.7 --- libcxx/include/__config | 2 +- llvm/CMakeLists.txt | 2 +- llvm/utils/gn/secondary/llvm/version.gni | 2 +- llvm/utils/lit/lit/__init__.py | 2 +- utils/bazel/llvm-project-overlay/clang/BUILD.bazel | 6 +++--- .../clang/include/clang/Config/config.h | 2 +- utils/bazel/llvm-project-overlay/lld/BUILD.bazel | 2 +- .../llvm/include/llvm/Config/llvm-config.h | 4 ++-- 8 files changed, 11 insertions(+), 11 deletions(-) diff --git a/libcxx/include/__config b/libcxx/include/__config index 5f62b974170f..c97cbae96fba 100644 --- a/libcxx/include/__config +++ b/libcxx/include/__config @@ -36,7 +36,7 @@ #ifdef __cplusplus -# define _LIBCPP_VERSION 15006 +# define _LIBCPP_VERSION 15007 # define _LIBCPP_CONCAT_IMPL(_X, _Y) _X##_Y # define _LIBCPP_CONCAT(_X, _Y) _LIBCPP_CONCAT_IMPL(_X, _Y) diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt index 28e28cfe7b6a..db207e3328be 100644 --- a/llvm/CMakeLists.txt +++ b/llvm/CMakeLists.txt @@ -22,7 +22,7 @@ if(NOT DEFINED LLVM_VERSION_MINOR) set(LLVM_VERSION_MINOR 0) endif() if(NOT DEFINED LLVM_VERSION_PATCH) - set(LLVM_VERSION_PATCH 6) + set(LLVM_VERSION_PATCH 7) endif() if(NOT DEFINED LLVM_VERSION_SUFFIX) set(LLVM_VERSION_SUFFIX) diff --git a/llvm/utils/gn/secondary/llvm/version.gni b/llvm/utils/gn/secondary/llvm/version.gni index 00e1dc6f1411..d485a133f921 100644 --- a/llvm/utils/gn/secondary/llvm/version.gni +++ b/llvm/utils/gn/secondary/llvm/version.gni @@ -1,4 +1,4 @@ llvm_version_major = 15 llvm_version_minor = 0 -llvm_version_patch = 6 +llvm_version_patch = 7 llvm_version = "$llvm_version_major.$llvm_version_minor.$llvm_version_patch" diff --git a/llvm/utils/lit/lit/__init__.py b/llvm/utils/lit/lit/__init__.py index b34d17b9dc47..928d6db8d3cd 100644 --- a/llvm/utils/lit/lit/__init__.py +++ b/llvm/utils/lit/lit/__init__.py @@ -2,7 +2,7 @@ __author__ = 'Daniel Dunbar' __email__ = 'daniel@minormatter.com' -__versioninfo__ = (15, 0, 6) +__versioninfo__ = (15, 0, 7) __version__ = '.'.join(str(v) for v in __versioninfo__) + 'dev' __all__ = [] diff --git a/utils/bazel/llvm-project-overlay/clang/BUILD.bazel b/utils/bazel/llvm-project-overlay/clang/BUILD.bazel index 5ee87d516519..59932954c949 100644 --- a/utils/bazel/llvm-project-overlay/clang/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/clang/BUILD.bazel @@ -358,11 +358,11 @@ genrule( name = "basic_version_gen", outs = ["include/clang/Basic/Version.inc"], cmd = ( - "echo '#define CLANG_VERSION 15.0.6' >> $@\n" + + "echo '#define CLANG_VERSION 15.0.7' >> $@\n" + "echo '#define CLANG_VERSION_MAJOR 15' >> $@\n" + "echo '#define CLANG_VERSION_MINOR 0' >> $@\n" + - "echo '#define CLANG_VERSION_PATCHLEVEL 6' >> $@\n" + - "echo '#define CLANG_VERSION_STRING \"15.0.6\"' >> $@\n" + "echo '#define CLANG_VERSION_PATCHLEVEL 7' >> $@\n" + + "echo '#define CLANG_VERSION_STRING \"15.0.7\"' >> $@\n" ), ) diff --git a/utils/bazel/llvm-project-overlay/clang/include/clang/Config/config.h b/utils/bazel/llvm-project-overlay/clang/include/clang/Config/config.h index ff4feb2b4af9..5ee35630d35a 100644 --- a/utils/bazel/llvm-project-overlay/clang/include/clang/Config/config.h +++ b/utils/bazel/llvm-project-overlay/clang/include/clang/Config/config.h @@ -93,7 +93,7 @@ /* CLANG_HAVE_RLIMITS defined conditionally below */ /* The LLVM product name and version */ -#define BACKEND_PACKAGE_STRING "LLVM 15.0.6" +#define BACKEND_PACKAGE_STRING "LLVM 15.0.7" /* Linker version detected at compile time. */ /* #undef HOST_LINK_VERSION */ diff --git a/utils/bazel/llvm-project-overlay/lld/BUILD.bazel b/utils/bazel/llvm-project-overlay/lld/BUILD.bazel index 6ba06c794413..06b57f39e333 100644 --- a/utils/bazel/llvm-project-overlay/lld/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/lld/BUILD.bazel @@ -13,7 +13,7 @@ package( genrule( name = "config_version_gen", outs = ["include/lld/Common/Version.inc"], - cmd = "echo '#define LLD_VERSION_STRING \"15.0.6\"' > $@", + cmd = "echo '#define LLD_VERSION_STRING \"15.0.7\"' > $@", ) genrule( diff --git a/utils/bazel/llvm-project-overlay/llvm/include/llvm/Config/llvm-config.h b/utils/bazel/llvm-project-overlay/llvm/include/llvm/Config/llvm-config.h index a44e9f60c0e1..1d5c3a4e879b 100644 --- a/utils/bazel/llvm-project-overlay/llvm/include/llvm/Config/llvm-config.h +++ b/utils/bazel/llvm-project-overlay/llvm/include/llvm/Config/llvm-config.h @@ -80,10 +80,10 @@ #define LLVM_VERSION_MINOR 0 /* Patch version of the LLVM API */ -#define LLVM_VERSION_PATCH 6 +#define LLVM_VERSION_PATCH 7 /* LLVM version string */ -#define LLVM_VERSION_STRING "15.0.6" +#define LLVM_VERSION_STRING "15.0.7" /* Whether LLVM records statistics for use with GetStatistics(), * PrintStatistics() or PrintStatisticsJSON() -- Gitee From 74d3ba1af5c09b85331c90105c461484762ee3e4 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Tue, 13 Dec 2022 15:06:29 -0800 Subject: [PATCH 34/41] [X86] Don't zero out %eax if both %al and %ah are used The iterator over super and sub registers doesn't include both 8-bit registers in its list. So if both registers are used and only one of them is live on return, then we need to make sure that the other 8-bit register is also marked as live and not zeroed out. Reviewed By: nickdesaulniers Differential Revision: https://reviews.llvm.org/D139679 (cherry picked from commit 14d4cddc5506fb0fd3c4ac556b4edd970aa151eb) --- llvm/lib/CodeGen/PrologEpilogInserter.cpp | 8 +- .../CodeGen/X86/zero-call-used-regs-i386.ll | 112 ++++++++++++++++++ 2 files changed, 119 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/X86/zero-call-used-regs-i386.ll diff --git a/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/llvm/lib/CodeGen/PrologEpilogInserter.cpp index 85d051cfdbe7..a8d40edd88d3 100644 --- a/llvm/lib/CodeGen/PrologEpilogInserter.cpp +++ b/llvm/lib/CodeGen/PrologEpilogInserter.cpp @@ -1237,7 +1237,13 @@ void PEI::insertZeroCallUsedRegs(MachineFunction &MF) { if (!MO.isReg()) continue; - for (MCPhysReg SReg : TRI.sub_and_superregs_inclusive(MO.getReg())) + MCRegister Reg = MO.getReg(); + + // This picks up sibling registers (e.q. %al -> %ah). + for (MCRegUnitIterator Unit(Reg, &TRI); Unit.isValid(); ++Unit) + RegsToZero.reset(*Unit); + + for (MCPhysReg SReg : TRI.sub_and_superregs_inclusive(Reg)) RegsToZero.reset(SReg); } } diff --git a/llvm/test/CodeGen/X86/zero-call-used-regs-i386.ll b/llvm/test/CodeGen/X86/zero-call-used-regs-i386.ll new file mode 100644 index 000000000000..33e501ca8503 --- /dev/null +++ b/llvm/test/CodeGen/X86/zero-call-used-regs-i386.ll @@ -0,0 +1,112 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -opaque-pointers | FileCheck %s --check-prefix=I386 +; +; Make sure we don't zero out %eax when both %ah and %al are used. +; +; PR1766: https://github.com/ClangBuiltLinux/linux/issues/1766 + +%struct.maple_subtree_state = type { ptr } + +@mas_data_end_type = dso_local local_unnamed_addr global i32 0, align 4 +@ma_meta_end_mn_0_0_0_0_0_0 = dso_local local_unnamed_addr global i8 0, align 1 +@mt_pivots_0 = dso_local local_unnamed_addr global i8 0, align 1 +@mas_data_end___trans_tmp_2 = dso_local local_unnamed_addr global ptr null, align 4 +@mt_slots_0 = dso_local local_unnamed_addr global i8 0, align 1 + +define dso_local zeroext i1 @test1(ptr nocapture noundef readonly %0) local_unnamed_addr "zero-call-used-regs"="used-gpr" nounwind { +; I386-LABEL: test1: +; I386: # %bb.0: +; I386-NEXT: pushl %ebx +; I386-NEXT: subl $24, %esp +; I386-NEXT: movl {{[0-9]+}}(%esp), %eax +; I386-NEXT: movl (%eax), %eax +; I386-NEXT: movzbl (%eax), %ebx +; I386-NEXT: calll bar +; I386-NEXT: testb %al, %al +; I386-NEXT: # implicit-def: $al +; I386-NEXT: # kill: killed $al +; I386-NEXT: je .LBB0_6 +; I386-NEXT: # %bb.1: +; I386-NEXT: cmpl $0, mas_data_end_type +; I386-NEXT: je .LBB0_3 +; I386-NEXT: # %bb.2: +; I386-NEXT: movzbl ma_meta_end_mn_0_0_0_0_0_0, %eax +; I386-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; I386-NEXT: jmp .LBB0_6 +; I386-NEXT: .LBB0_3: +; I386-NEXT: movb mt_pivots_0, %ah +; I386-NEXT: movb %ah, %al +; I386-NEXT: decb %al +; I386-NEXT: movl mas_data_end___trans_tmp_2, %ecx +; I386-NEXT: movsbl %al, %edx +; I386-NEXT: cmpl $0, (%ecx,%edx,4) +; I386-NEXT: je .LBB0_5 +; I386-NEXT: # %bb.4: +; I386-NEXT: movb %al, %ah +; I386-NEXT: .LBB0_5: +; I386-NEXT: movb %ah, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; I386-NEXT: .LBB0_6: +; I386-NEXT: movb mt_slots_0, %bh +; I386-NEXT: leal {{[0-9]+}}(%esp), %eax +; I386-NEXT: movl %eax, (%esp) +; I386-NEXT: calll baz +; I386-NEXT: subl $4, %esp +; I386-NEXT: cmpb %bh, %bl +; I386-NEXT: jae .LBB0_8 +; I386-NEXT: # %bb.7: +; I386-NEXT: movsbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; I386-NEXT: movl %eax, (%esp) +; I386-NEXT: calll gaz +; I386-NEXT: .LBB0_8: +; I386-NEXT: movb $1, %al +; I386-NEXT: addl $24, %esp +; I386-NEXT: popl %ebx +; I386-NEXT: xorl %ecx, %ecx +; I386-NEXT: xorl %edx, %edx +; I386-NEXT: retl + %2 = alloca %struct.maple_subtree_state, align 4 + %3 = load ptr, ptr %0, align 4 + %4 = load i8, ptr %3, align 1 + %5 = tail call zeroext i1 @bar() + br i1 %5, label %6, label %20 + +6: ; preds = %1 + %7 = load i32, ptr @mas_data_end_type, align 4 + %8 = icmp eq i32 %7, 0 + br i1 %8, label %11, label %9 + +9: ; preds = %6 + %10 = load i8, ptr @ma_meta_end_mn_0_0_0_0_0_0, align 1 + br label %20 + +11: ; preds = %6 + %12 = load i8, ptr @mt_pivots_0, align 1 + %13 = add i8 %12, -1 + %14 = load ptr, ptr @mas_data_end___trans_tmp_2, align 4 + %15 = sext i8 %13 to i32 + %16 = getelementptr inbounds [1 x i32], ptr %14, i32 0, i32 %15 + %17 = load i32, ptr %16, align 4 + %18 = icmp eq i32 %17, 0 + %19 = select i1 %18, i8 %12, i8 %13 + br label %20 + +20: ; preds = %11, %9, %1 + %21 = phi i8 [ undef, %1 ], [ %10, %9 ], [ %19, %11 ] + %22 = load i8, ptr @mt_slots_0, align 1 + call void @baz(ptr nonnull sret(%struct.maple_subtree_state) align 4 %2) + %23 = icmp ult i8 %4, %22 + br i1 %23, label %24, label %25 + +24: ; preds = %20 + call void @gaz(i8 noundef signext %21) + br label %25 + +25: ; preds = %20, %24 + ret i1 true +} + +declare dso_local zeroext i1 @bar(...) local_unnamed_addr + +declare dso_local void @baz(ptr sret(%struct.maple_subtree_state) align 4, ...) local_unnamed_addr + +declare dso_local void @gaz(i8 noundef signext) local_unnamed_addr -- Gitee From 67fd0d2af4bf9e939ebcccb2b66552a31789af94 Mon Sep 17 00:00:00 2001 From: "chenglin.bi" Date: Tue, 3 Jan 2023 18:12:15 +0800 Subject: [PATCH 35/41] [TypePromotion] Add truncate in ConvertTruncs when the original truncate type is not extend type If the src type is not extend type, after convert the truncate to and we need to truncate the and also to make sure the all user is legal. The old fix D137613 doesn't work when the truncate convert to and have the other users. So this time I try to add the truncate after and to avoid all these potential issues. Fix: #59554 Reviewed By: samparker Differential Revision: https://reviews.llvm.org/D140869 (cherry picked from commit a0b470c9848c638e86f97eca53063642e07cea67) --- llvm/lib/CodeGen/TypePromotion.cpp | 8 ++--- .../TypePromotion/AArch64/pr58843.ll | 20 ----------- .../TypePromotion/AArch64/trunc-zext-chain.ll | 36 +++++++++++++++++++ 3 files changed, 38 insertions(+), 26 deletions(-) delete mode 100644 llvm/test/Transforms/TypePromotion/AArch64/pr58843.ll diff --git a/llvm/lib/CodeGen/TypePromotion.cpp b/llvm/lib/CodeGen/TypePromotion.cpp index a63118067139..36e3c1245f1c 100644 --- a/llvm/lib/CodeGen/TypePromotion.cpp +++ b/llvm/lib/CodeGen/TypePromotion.cpp @@ -570,7 +570,6 @@ void IRPromoter::Cleanup() { LLVM_DEBUG(dbgs() << "IR Promotion: Cleanup..\n"); // Some zexts will now have become redundant, along with their trunc // operands, so remove them. - // Some zexts need to be replaced with truncate if src bitwidth is larger. for (auto *V : Visited) { if (!isa(V)) continue; @@ -585,11 +584,6 @@ void IRPromoter::Cleanup() { << "\n"); ReplaceAllUsersOfWith(ZExt, Src); continue; - } else if (ZExt->getSrcTy()->getScalarSizeInBits() > PromotedWidth) { - IRBuilder<> Builder{ZExt}; - Value *Trunc = Builder.CreateTrunc(Src, ZExt->getDestTy()); - ReplaceAllUsersOfWith(ZExt, Trunc); - continue; } // We've inserted a trunc for a zext sink, but we already know that the @@ -626,6 +620,8 @@ void IRPromoter::ConvertTruncs() { ConstantInt *Mask = ConstantInt::get(SrcTy, APInt::getMaxValue(NumBits).getZExtValue()); Value *Masked = Builder.CreateAnd(Trunc->getOperand(0), Mask); + if (SrcTy != ExtTy) + Masked = Builder.CreateTrunc(Masked, ExtTy); if (auto *I = dyn_cast(Masked)) NewInsts.insert(I); diff --git a/llvm/test/Transforms/TypePromotion/AArch64/pr58843.ll b/llvm/test/Transforms/TypePromotion/AArch64/pr58843.ll deleted file mode 100644 index 983a32029c65..000000000000 --- a/llvm/test/Transforms/TypePromotion/AArch64/pr58843.ll +++ /dev/null @@ -1,20 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -mtriple=aarch64 -type-promotion -verify -S %s -o - | FileCheck %s -target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" - -; Check the case don't crash due to zext source type bitwidth -; larger than dest type bitwidth. -define i1 @test(i8 %arg) { -; CHECK-LABEL: @test( -; CHECK-NEXT: [[EXT1:%.*]] = zext i8 [[ARG:%.*]] to i64 -; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[EXT1]], 7 -; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 -; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[TMP2]], 0 -; CHECK-NEXT: ret i1 [[CMP]] -; - %ext1 = zext i8 %arg to i64 - %trunc = trunc i64 %ext1 to i3 - %ext2 = zext i3 %trunc to i8 - %cmp = icmp ne i8 %ext2, 0 - ret i1 %cmp -} diff --git a/llvm/test/Transforms/TypePromotion/AArch64/trunc-zext-chain.ll b/llvm/test/Transforms/TypePromotion/AArch64/trunc-zext-chain.ll index 0a846ba115ec..bcf8dfdd7cb0 100644 --- a/llvm/test/Transforms/TypePromotion/AArch64/trunc-zext-chain.ll +++ b/llvm/test/Transforms/TypePromotion/AArch64/trunc-zext-chain.ll @@ -177,3 +177,39 @@ latch: ; preds = %bb14, %bb9 exit: ret i64 %var30 } + +; Check the case don't crash due to zext source type bitwidth +; larger than dest type bitwidth. +define i1 @pr58843(i8 %arg) { +; CHECK-LABEL: @pr58843( +; CHECK-NEXT: [[EXT1:%.*]] = zext i8 [[ARG:%.*]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[EXT1]], 7 +; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[TMP2]], 0 +; CHECK-NEXT: ret i1 [[CMP]] +; + %ext1 = zext i8 %arg to i64 + %trunc = trunc i64 %ext1 to i3 + %ext2 = zext i3 %trunc to i8 + %cmp = icmp ne i8 %ext2, 0 + ret i1 %cmp +} + +; Check the case don't crash due to xor two op have different +; types +define i1 @pr59554(i8 %arg) { +; CHECK-LABEL: @pr59554( +; CHECK-NEXT: [[ARG_EXT:%.*]] = zext i8 [[ARG:%.*]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[ARG_EXT]], 7 +; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 +; CHECK-NEXT: [[SWITCH_TABLEIDX:%.*]] = xor i32 [[TMP2]], 1 +; CHECK-NEXT: [[SWITCH_LOBIT:%.*]] = icmp ne i32 [[TMP2]], 0 +; CHECK-NEXT: ret i1 [[SWITCH_LOBIT]] +; + %arg.ext = zext i8 %arg to i64 + %trunc = trunc i64 %arg.ext to i3 + %switch.tableidx = xor i3 %trunc, 1 + %switch.maskindex = zext i3 %trunc to i8 + %switch.lobit = icmp ne i8 %switch.maskindex, 0 + ret i1 %switch.lobit +} -- Gitee From 1095870e8ceddc5371f446f4e7c3473f89a461cd Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Mon, 17 Oct 2022 13:36:19 -0700 Subject: [PATCH 36/41] [wasm-ld] Define a `__heap_end` symbol marking the end of allocated memory. Define a `__heap_end` symbol that marks the end of the memory region that starts at `__heap_base`. This will allow malloc implementations to know how much memory they can use at `__heap_base` even if someone has done a `memory.grow` before they can initialize their state. Differential Revision: https://reviews.llvm.org/D136110 --- lld/test/wasm/export-all.s | 7 +++++-- lld/test/wasm/mutable-global-exports.s | 7 +++++-- lld/wasm/Driver.cpp | 1 + lld/wasm/Symbols.cpp | 1 + lld/wasm/Symbols.h | 11 +++++++---- lld/wasm/Writer.cpp | 16 +++++++++++++--- 6 files changed, 32 insertions(+), 11 deletions(-) diff --git a/lld/test/wasm/export-all.s b/lld/test/wasm/export-all.s index 009da9f6a381..5af835ce485e 100644 --- a/lld/test/wasm/export-all.s +++ b/lld/test/wasm/export-all.s @@ -40,9 +40,12 @@ foo: # CHECK-NEXT: - Name: __heap_base # CHECK-NEXT: Kind: GLOBAL # CHECK-NEXT: Index: 4 -# CHECK-NEXT: - Name: __memory_base +# CHECK-NEXT: - Name: __heap_end # CHECK-NEXT: Kind: GLOBAL # CHECK-NEXT: Index: 5 -# CHECK-NEXT: - Name: __table_base +# CHECK-NEXT: - Name: __memory_base # CHECK-NEXT: Kind: GLOBAL # CHECK-NEXT: Index: 6 +# CHECK-NEXT: - Name: __table_base +# CHECK-NEXT: Kind: GLOBAL +# CHECK-NEXT: Index: 7 diff --git a/lld/test/wasm/mutable-global-exports.s b/lld/test/wasm/mutable-global-exports.s index e2e45ff93a4b..98009610ac55 100644 --- a/lld/test/wasm/mutable-global-exports.s +++ b/lld/test/wasm/mutable-global-exports.s @@ -79,10 +79,13 @@ _start: # CHECK-ALL-NEXT: - Name: __heap_base # CHECK-ALL-NEXT: Kind: GLOBAL # CHECK-ALL-NEXT: Index: 5 -# CHECK-ALL-NEXT: - Name: __memory_base +# CHECK-ALL-NEXT: - Name: __heap_end # CHECK-ALL-NEXT: Kind: GLOBAL # CHECK-ALL-NEXT: Index: 6 -# CHECK-ALL-NEXT: - Name: __table_base +# CHECK-ALL-NEXT: - Name: __memory_base # CHECK-ALL-NEXT: Kind: GLOBAL # CHECK-ALL-NEXT: Index: 7 +# CHECK-ALL-NEXT: - Name: __table_base +# CHECK-ALL-NEXT: Kind: GLOBAL +# CHECK-ALL-NEXT: Index: 8 # CHECK-ALL-NEXT: - Type: CODE diff --git a/lld/wasm/Driver.cpp b/lld/wasm/Driver.cpp index 0a0f0c8a05bd..4afbfe24110f 100644 --- a/lld/wasm/Driver.cpp +++ b/lld/wasm/Driver.cpp @@ -681,6 +681,7 @@ static void createOptionalSymbols() { if (!config->isPic) { WasmSym::globalBase = symtab->addOptionalDataSymbol("__global_base"); WasmSym::heapBase = symtab->addOptionalDataSymbol("__heap_base"); + WasmSym::heapEnd = symtab->addOptionalDataSymbol("__heap_end"); WasmSym::definedMemoryBase = symtab->addOptionalDataSymbol("__memory_base"); WasmSym::definedTableBase = symtab->addOptionalDataSymbol("__table_base"); if (config->is64.value_or(false)) diff --git a/lld/wasm/Symbols.cpp b/lld/wasm/Symbols.cpp index e0670cea6425..a79c5bec3b3b 100644 --- a/lld/wasm/Symbols.cpp +++ b/lld/wasm/Symbols.cpp @@ -83,6 +83,7 @@ DefinedData *WasmSym::dsoHandle; DefinedData *WasmSym::dataEnd; DefinedData *WasmSym::globalBase; DefinedData *WasmSym::heapBase; +DefinedData *WasmSym::heapEnd; DefinedData *WasmSym::initMemoryFlag; GlobalSymbol *WasmSym::stackPointer; GlobalSymbol *WasmSym::tlsBase; diff --git a/lld/wasm/Symbols.h b/lld/wasm/Symbols.h index c17b720a90fa..32e75a69c5f8 100644 --- a/lld/wasm/Symbols.h +++ b/lld/wasm/Symbols.h @@ -538,11 +538,14 @@ struct WasmSym { // Symbol marking the end of the data and bss. static DefinedData *dataEnd; - // __heap_base - // Symbol marking the end of the data, bss and explicit stack. Any linear - // memory following this address is not used by the linked code and can - // therefore be used as a backing store for brk()/malloc() implementations. + // __heap_base/__heap_end + // Symbols marking the beginning and end of the "heap". It starts at the end + // of the data, bss and explicit stack, and extends to the end of the linear + // memory allocated by wasm-ld. This region of memory is not used by the + // linked code, so it may be used as a backing store for `sbrk` or `malloc` + // implementations. static DefinedData *heapBase; + static DefinedData *heapEnd; // __wasm_init_memory_flag // Symbol whose contents are nonzero iff memory has already been initialized. diff --git a/lld/wasm/Writer.cpp b/lld/wasm/Writer.cpp index f98c95526c9e..f6bbaa02b571 100644 --- a/lld/wasm/Writer.cpp +++ b/lld/wasm/Writer.cpp @@ -340,10 +340,20 @@ void Writer::layoutMemory() { Twine(maxMemorySetting)); memoryPtr = config->initialMemory; } - out.memorySec->numMemoryPages = - alignTo(memoryPtr, WasmPageSize) / WasmPageSize; + + memoryPtr = alignTo(memoryPtr, WasmPageSize); + + out.memorySec->numMemoryPages = memoryPtr / WasmPageSize; log("mem: total pages = " + Twine(out.memorySec->numMemoryPages)); + if (WasmSym::heapEnd) { + // Set `__heap_end` to follow the end of the statically allocated linear + // memory. The fact that this comes last means that a malloc/brk + // implementation can grow the heap at runtime. + log("mem: heap end = " + Twine(memoryPtr)); + WasmSym::heapEnd->setVA(memoryPtr); + } + if (config->maxMemory != 0) { if (config->maxMemory != alignTo(config->maxMemory, WasmPageSize)) error("maximum memory must be " + Twine(WasmPageSize) + "-byte aligned"); @@ -363,7 +373,7 @@ void Writer::layoutMemory() { if (config->isPic) max = maxMemorySetting; else - max = alignTo(memoryPtr, WasmPageSize); + max = memoryPtr; } out.memorySec->maxMemoryPages = max / WasmPageSize; log("mem: max pages = " + Twine(out.memorySec->maxMemoryPages)); -- Gitee From 948cadd6d4247f7a5bd8fd6b1386062653d54c84 Mon Sep 17 00:00:00 2001 From: Josh Stone Date: Tue, 3 Jan 2023 16:04:45 -0800 Subject: [PATCH 37/41] [RegAllocFast] Handle new debug values for spills These new debug values get inserted after the place where the spill happens, which means they won't be reached by the reverse traversal of basic block instructions. This would crash or fail assertions if they contained any virtual registers to be replaced. We can manually handle the new debug values right away to resolve this. Fixes https://github.com/llvm/llvm-project/issues/59172 Reviewed By: StephenTozer Differential Revision: https://reviews.llvm.org/D139590 (cherry picked from commit 87f57f459e7acbb00a6ca4ee6dec6014c5a97e07) --- llvm/lib/CodeGen/RegAllocFast.cpp | 3 + .../PowerPC/regalloc-fast-debug-spill.ll | 250 ++++++++++++++++++ 2 files changed, 253 insertions(+) create mode 100644 llvm/test/CodeGen/PowerPC/regalloc-fast-debug-spill.ll diff --git a/llvm/lib/CodeGen/RegAllocFast.cpp b/llvm/lib/CodeGen/RegAllocFast.cpp index 9e4e26f1392e..cb552f212fbb 100644 --- a/llvm/lib/CodeGen/RegAllocFast.cpp +++ b/llvm/lib/CodeGen/RegAllocFast.cpp @@ -443,6 +443,9 @@ void RegAllocFast::spill(MachineBasicBlock::iterator Before, Register VirtReg, SpilledOperandsMap[MO->getParent()].push_back(MO); for (auto MISpilledOperands : SpilledOperandsMap) { MachineInstr &DBG = *MISpilledOperands.first; + // We don't have enough support for tracking operands of DBG_VALUE_LISTs. + if (DBG.isDebugValueList()) + continue; MachineInstr *NewDV = buildDbgValueForSpill( *MBB, Before, *MISpilledOperands.first, FI, MISpilledOperands.second); assert(NewDV->getParent() == MBB && "dangling parent pointer"); diff --git a/llvm/test/CodeGen/PowerPC/regalloc-fast-debug-spill.ll b/llvm/test/CodeGen/PowerPC/regalloc-fast-debug-spill.ll new file mode 100644 index 000000000000..cae3cb3bbdf8 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/regalloc-fast-debug-spill.ll @@ -0,0 +1,250 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -O0 < %s | FileCheck %s + +; This test would previously crash in RegisterScavenging, or with assertions +; enabled it would fail when RegAllocFast calls clearVirtRegs. This was due to +; unhandled virt regs in cloned DBG_VALUE_LIST for spills, which are now skipped. +; https://github.com/llvm/llvm-project/issues/59172 + +target datalayout = "e-m:e-i64:64-n32:64-S128-v256:256:256-v512:512:512" +target triple = "powerpc64le-unknown-linux-gnu" + +; Function Attrs: argmemonly nocallback nofree nounwind willreturn +declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) #0 + +; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn +declare void @llvm.dbg.value(metadata, metadata, metadata) #1 + +define void @read_to_end(i1 %0) personality ptr null { +; CHECK-LABEL: read_to_end: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr 0 +; CHECK-NEXT: std 0, 16(1) +; CHECK-NEXT: stdu 1, -80(1) +; CHECK-NEXT: .cfi_def_cfa_offset 80 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: andi. 3, 3, 1 +; CHECK-NEXT: mfocrf 3, 128 +; CHECK-NEXT: rlwinm 3, 3, 1, 0, 0 +; CHECK-NEXT: stw 3, 60(1) +; CHECK-NEXT: ld 3, 0(0) +; CHECK-NEXT: std 3, 64(1) # 8-byte Folded Spill +; CHECK-NEXT: li 3, 0 +; CHECK-NEXT: std 3, 72(1) # 8-byte Folded Spill +; CHECK-NEXT: #DEBUG_VALUE: spec_extend:iterator <- [DW_OP_LLVM_arg 0, DW_OP_LLVM_arg 1, DW_OP_constu 1, DW_OP_mul, DW_OP_plus, DW_OP_stack_value, DW_OP_LLVM_fragment 64 64] undef, $x3 +; CHECK-NEXT: creqv 20, 20, 20 +; CHECK-NEXT: crxor 20, 1, 20 +; CHECK-NEXT: bc 12, 20, .LBB0_2 +; CHECK-NEXT: b .LBB0_1 +; CHECK-NEXT: .LBB0_1: +; CHECK-NEXT: addi 1, 1, 80 +; CHECK-NEXT: ld 0, 16(1) +; CHECK-NEXT: mtlr 0 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB0_2: +; CHECK-NEXT: ld 5, 72(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 4, 64(1) # 8-byte Folded Reload +; CHECK-NEXT: li 3, 0 +; CHECK-NEXT: bl memcpy +; CHECK-NEXT: nop +; CHECK-NEXT: lwz 4, 60(1) +; CHECK-NEXT: # implicit-def: $cr5lt +; CHECK-NEXT: mfocrf 3, 4 +; CHECK-NEXT: rlwimi 3, 4, 12, 20, 20 +; CHECK-NEXT: mtocrf 4, 3 +; CHECK-NEXT: bc 12, 20, .LBB0_4 +; CHECK-NEXT: b .LBB0_3 +; CHECK-NEXT: .LBB0_3: +; CHECK-NEXT: b .LBB0_4 +; CHECK-NEXT: .LBB0_4: +; CHECK-NEXT: addi 1, 1, 80 +; CHECK-NEXT: ld 0, 16(1) +; CHECK-NEXT: mtlr 0 +; CHECK-NEXT: blr + %2 = load ptr, ptr null, align 8 + %3 = sub i64 0, 0 + call void @llvm.dbg.value(metadata !DIArgList(ptr %2, i64 %3), metadata !129, metadata !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_arg, 1, DW_OP_constu, 1, DW_OP_mul, DW_OP_plus, DW_OP_stack_value, DW_OP_LLVM_fragment, 64, 64)), !dbg !140 + br i1 %0, label %4, label %5 + +4: ; preds = %1 + ret void + +5: ; preds = %1 + tail call void @llvm.memcpy.p0.p0.i64(ptr null, ptr %2, i64 %3, i1 false) + br i1 %0, label %7, label %6 + +6: ; preds = %5 + br label %7 + +7: ; preds = %6, %5 + ret void +} + +attributes #0 = { argmemonly nocallback nofree nounwind willreturn } +attributes #1 = { nocallback nofree nosync nounwind readnone speculatable willreturn } + +!llvm.module.flags = !{!0} +!llvm.dbg.cu = !{!1} + +!0 = !{i32 2, !"Debug Info Version", i32 3} +!1 = distinct !DICompileUnit(language: DW_LANG_Rust, file: !2, producer: "clang LLVM (rustc version 1.67.0-dev)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !3, globals: !4) +!2 = !DIFile(filename: "library/std/src/lib.rs/@/std.ff910444-cgu.11", directory: "/home/jistone/rust") +!3 = !{} +!4 = !{!5, !12, !18, !23, !28, !34, !43, !49, !52, !58, !64, !68, !77, !81, !86, !90, !98, !102, !106, !111, !117, !124} +!5 = !DIGlobalVariableExpression(var: !6, expr: !DIExpression()) +!6 = distinct !DIGlobalVariable(name: "::{vtable}", scope: null, file: !7, type: !8, isLocal: true, isDefinition: true) +!7 = !DIFile(filename: "", directory: "") +!8 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "::{vtable_type}", file: !7, size: 384, align: 64, flags: DIFlagArtificial, elements: !3, vtableHolder: !9, templateParams: !3, identifier: "1ec913b2a90798f33a12cdc627a17d3d") +!9 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "String", scope: !10, file: !7, size: 192, align: 64, elements: !3, templateParams: !3, identifier: "b616ccc9e18737e903266aae12eea82") +!10 = !DINamespace(name: "string", scope: !11) +!11 = !DINamespace(name: "alloc", scope: null) +!12 = !DIGlobalVariableExpression(var: !13, expr: !DIExpression()) +!13 = distinct !DIGlobalVariable(name: "::{vtable}", scope: null, file: !7, type: !14, isLocal: true, isDefinition: true) +!14 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "::{vtable_type}", file: !7, size: 256, align: 64, flags: DIFlagArtificial, elements: !3, vtableHolder: !15, templateParams: !3, identifier: "7f09904511177108b2e94c43effbe403") +!15 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "BorrowMutError", scope: !16, file: !7, align: 8, elements: !3, identifier: "acf9edd4524e0ff9a9398905d3ba31a6") +!16 = !DINamespace(name: "cell", scope: !17) +!17 = !DINamespace(name: "core", scope: null) +!18 = !DIGlobalVariableExpression(var: !19, expr: !DIExpression()) +!19 = distinct !DIGlobalVariable(name: "::{vtable}", scope: null, file: !7, type: !20, isLocal: true, isDefinition: true) +!20 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "::{vtable_type}", file: !7, size: 256, align: 64, flags: DIFlagArtificial, elements: !3, vtableHolder: !21, templateParams: !3, identifier: "84cb7e6d80fc4c532d8f45aaa75a7ae3") +!21 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Error", scope: !22, file: !7, align: 8, elements: !3, identifier: "abcb9fb1fe4fda8598a8687b517935b") +!22 = !DINamespace(name: "fmt", scope: !17) +!23 = !DIGlobalVariableExpression(var: !24, expr: !DIExpression()) +!24 = distinct !DIGlobalVariable(name: "::{vtable}", scope: null, file: !7, type: !25, isLocal: true, isDefinition: true) +!25 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "::{vtable_type}", file: !7, size: 256, align: 64, flags: DIFlagArtificial, elements: !3, vtableHolder: !26, templateParams: !3, identifier: "bafa31943f8233dbf8d2de6a615f899") +!26 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "TryFromSliceError", scope: !27, file: !7, align: 8, elements: !3, templateParams: !3, identifier: "2dd7cf8d77337f63be7c7f5feb370b37") +!27 = !DINamespace(name: "array", scope: !17) +!28 = !DIGlobalVariableExpression(var: !29, expr: !DIExpression()) +!29 = distinct !DIGlobalVariable(name: "::{vtable}", scope: null, file: !7, type: !30, isLocal: true, isDefinition: true) +!30 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "::{vtable_type}", file: !7, size: 256, align: 64, flags: DIFlagArtificial, elements: !3, vtableHolder: !31, templateParams: !3, identifier: "3292395ea0f5a7e3e88f36db52eb440c") +!31 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "TryFromIntError", scope: !32, file: !7, align: 8, elements: !3, templateParams: !3, identifier: "5b131d57d001578fbf4fb83f2028eb12") +!32 = !DINamespace(name: "error", scope: !33) +!33 = !DINamespace(name: "num", scope: !17) +!34 = !DIGlobalVariableExpression(var: !35, expr: !DIExpression()) +!35 = distinct !DIGlobalVariable(name: "OUTPUT_CAPTURE_USED", linkageName: "_ZN3std2io5stdio19OUTPUT_CAPTURE_USED17h6bb564f9f9e20f1bE", scope: !36, file: !39, line: 38, type: !40, isLocal: true, isDefinition: true, align: 8) +!36 = !DINamespace(name: "stdio", scope: !37) +!37 = !DINamespace(name: "io", scope: !38) +!38 = !DINamespace(name: "std", scope: null) +!39 = !DIFile(filename: "library/std/src/io/stdio.rs", directory: "/home/jistone/rust", checksumkind: CSK_MD5, checksum: "6e6a519ce8370e29f07d850a34a413c1") +!40 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "AtomicBool", scope: !41, file: !7, size: 8, align: 8, elements: !3, templateParams: !3, identifier: "c1cddf0305d4e6a98a8ddd4b6fdb5b91") +!41 = !DINamespace(name: "atomic", scope: !42) +!42 = !DINamespace(name: "sync", scope: !17) +!43 = !DIGlobalVariableExpression(var: !44, expr: !DIExpression()) +!44 = distinct !DIGlobalVariable(name: "INSTANCE", linkageName: "_ZN3std2io5stdio5stdin8INSTANCE17h225ddf7c6608f4aaE", scope: !45, file: !39, line: 320, type: !46, isLocal: true, isDefinition: true, align: 64) +!45 = !DINamespace(name: "stdin", scope: !36) +!46 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "OnceLock>>", scope: !47, file: !7, size: 448, align: 64, elements: !3, templateParams: !3, identifier: "2236c08b0846b8b2f33c235183822718") +!47 = !DINamespace(name: "once_lock", scope: !48) +!48 = !DINamespace(name: "sync", scope: !38) +!49 = !DIGlobalVariableExpression(var: !50, expr: !DIExpression()) +!50 = distinct !DIGlobalVariable(name: "STDOUT", linkageName: "_ZN3std2io5stdio6STDOUT17hd8472b9eb112f94aE", scope: !36, file: !39, line: 554, type: !51, isLocal: true, isDefinition: true, align: 64) +!51 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "OnceLock>>>", scope: !47, file: !7, size: 512, align: 64, elements: !3, templateParams: !3, identifier: "31535135376bcf9dc80438cb0beaa95") +!52 = !DIGlobalVariableExpression(var: !53, expr: !DIExpression()) +!53 = distinct !DIGlobalVariable(name: "INSTANCE", linkageName: "_ZN3std2io5stdio6stderr8INSTANCE17he81b75fda1609dccE", scope: !54, file: !39, line: 844, type: !55, isLocal: true, isDefinition: true, align: 64) +!54 = !DINamespace(name: "stderr", scope: !36) +!55 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "ReentrantMutex>", scope: !56, file: !7, size: 192, align: 64, elements: !3, templateParams: !3, identifier: "5c48123cbde8afbfd832b70e8bb014b") +!56 = !DINamespace(name: "remutex", scope: !57) +!57 = !DINamespace(name: "sys_common", scope: !38) +!58 = !DIGlobalVariableExpression(var: !59, expr: !DIExpression()) +!59 = distinct !DIGlobalVariable(name: " as core::fmt::Write>::{vtable}", scope: null, file: !7, type: !60, isLocal: true, isDefinition: true) +!60 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: " as core::fmt::Write>::{vtable_type}", file: !7, size: 384, align: 64, flags: DIFlagArtificial, elements: !3, vtableHolder: !61, templateParams: !3, identifier: "43681bec3eba7b0defb75fd847230ef3") +!61 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Adapter", scope: !62, file: !7, size: 128, align: 64, elements: !3, templateParams: !3, identifier: "eda3cd8f60f00feb7019a3d90d2413dd") +!62 = !DINamespace(name: "write_fmt", scope: !63) +!63 = !DINamespace(name: "Write", scope: !37) +!64 = !DIGlobalVariableExpression(var: !65, expr: !DIExpression()) +!65 = distinct !DIGlobalVariable(name: " as core::fmt::Write>::{vtable}", scope: null, file: !7, type: !66, isLocal: true, isDefinition: true) +!66 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: " as core::fmt::Write>::{vtable_type}", file: !7, size: 384, align: 64, flags: DIFlagArtificial, elements: !3, vtableHolder: !67, templateParams: !3, identifier: "2235adf22355a080446df25ada963d8f") +!67 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Adapter", scope: !62, file: !7, size: 128, align: 64, elements: !3, templateParams: !3, identifier: "50f418463bae1beffe989359452b170a") +!68 = !DIGlobalVariableExpression(var: !69, expr: !DIExpression()) +!69 = distinct !DIGlobalVariable(name: "__KEY", linkageName: "_ZN3std2io5stdio14OUTPUT_CAPTURE7__getit5__KEY17h82ea5b0c4e81236dE", scope: !70, file: !72, line: 331, type: !73, isLocal: true, isDefinition: true, align: 64) +!70 = !DINamespace(name: "__getit", scope: !71) +!71 = !DINamespace(name: "OUTPUT_CAPTURE", scope: !36) +!72 = !DIFile(filename: "library/std/src/thread/local.rs", directory: "/home/jistone/rust", checksumkind: CSK_MD5, checksum: "e3766fd5751a888dc2040f63031e944e") +!73 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Key>>>>>", scope: !74, file: !7, size: 192, align: 64, elements: !3, templateParams: !3, identifier: "4d1514f685cbd010d7b86d2eef080b6c") +!74 = !DINamespace(name: "fast", scope: !75) +!75 = !DINamespace(name: "local", scope: !76) +!76 = !DINamespace(name: "thread", scope: !38) +!77 = !DIGlobalVariableExpression(var: !78, expr: !DIExpression()) +!78 = distinct !DIGlobalVariable(name: "::{vtable}", scope: null, file: !7, type: !79, isLocal: true, isDefinition: true) +!79 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "::{vtable_type}", file: !7, size: 256, align: 64, flags: DIFlagArtificial, elements: !3, vtableHolder: !80, templateParams: !3, identifier: "1a7c0806435616633a284f48de1194c5") +!80 = !DIBasicType(name: "i32", size: 32, encoding: DW_ATE_signed) +!81 = !DIGlobalVariableExpression(var: !82, expr: !DIExpression()) +!82 = distinct !DIGlobalVariable(name: "::{vtable}", scope: null, file: !7, type: !83, isLocal: true, isDefinition: true) +!83 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "::{vtable_type}", file: !7, size: 256, align: 64, flags: DIFlagArtificial, elements: !3, vtableHolder: !84, templateParams: !3, identifier: "8e82e7dafb168f2995a6711175975a8") +!84 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "PathBuf", scope: !85, file: !7, size: 192, align: 64, elements: !3, templateParams: !3, identifier: "85bd755ad2187534379df2cc01ef53a0") +!85 = !DINamespace(name: "path", scope: !38) +!86 = !DIGlobalVariableExpression(var: !87, expr: !DIExpression()) +!87 = distinct !DIGlobalVariable(name: "::{vtable}", scope: null, file: !7, type: !88, isLocal: true, isDefinition: true) +!88 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "::{vtable_type}", file: !7, size: 256, align: 64, flags: DIFlagArtificial, elements: !3, vtableHolder: !89, templateParams: !3, identifier: "a8f7c32dd1df279746df60c6d46ce35e") +!89 = !DIBasicType(name: "bool", size: 8, encoding: DW_ATE_boolean) +!90 = !DIGlobalVariableExpression(var: !91, expr: !DIExpression()) +!91 = distinct !DIGlobalVariable(name: "STATX_STATE", linkageName: "_ZN3std3sys4unix2fs9try_statx11STATX_STATE17h465ade0d62262837E", scope: !92, file: !96, line: 157, type: !97, isLocal: true, isDefinition: true, align: 8) +!92 = !DINamespace(name: "try_statx", scope: !93) +!93 = !DINamespace(name: "fs", scope: !94) +!94 = !DINamespace(name: "unix", scope: !95) +!95 = !DINamespace(name: "sys", scope: !38) +!96 = !DIFile(filename: "library/std/src/sys/unix/fs.rs", directory: "/home/jistone/rust", checksumkind: CSK_MD5, checksum: "594559328c68ee77afe955cd571273ee") +!97 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "AtomicU8", scope: !41, file: !7, size: 8, align: 8, elements: !3, templateParams: !3, identifier: "dda3b691bea8e1b5292414dd97926af2") +!98 = !DIGlobalVariableExpression(var: !99, expr: !DIExpression()) +!99 = distinct !DIGlobalVariable(name: "<&bool as core::fmt::Debug>::{vtable}", scope: null, file: !7, type: !100, isLocal: true, isDefinition: true) +!100 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "<&bool as core::fmt::Debug>::{vtable_type}", file: !7, size: 256, align: 64, flags: DIFlagArtificial, elements: !3, vtableHolder: !101, templateParams: !3, identifier: "5e8d2c48c9cc79c318e2bd28b03e141a") +!101 = !DIDerivedType(tag: DW_TAG_pointer_type, name: "&bool", baseType: !89, size: 64, align: 64, dwarfAddressSpace: 0) +!102 = !DIGlobalVariableExpression(var: !103, expr: !DIExpression()) +!103 = distinct !DIGlobalVariable(name: "<&i32 as core::fmt::Debug>::{vtable}", scope: null, file: !7, type: !104, isLocal: true, isDefinition: true) +!104 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "<&i32 as core::fmt::Debug>::{vtable_type}", file: !7, size: 256, align: 64, flags: DIFlagArtificial, elements: !3, vtableHolder: !105, templateParams: !3, identifier: "d4029746615b6a868ffbc67515d99878") +!105 = !DIDerivedType(tag: DW_TAG_pointer_type, name: "&i32", baseType: !80, size: 64, align: 64, dwarfAddressSpace: 0) +!106 = !DIGlobalVariableExpression(var: !107, expr: !DIExpression()) +!107 = distinct !DIGlobalVariable(name: "<&u32 as core::fmt::Debug>::{vtable}", scope: null, file: !7, type: !108, isLocal: true, isDefinition: true) +!108 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "<&u32 as core::fmt::Debug>::{vtable_type}", file: !7, size: 256, align: 64, flags: DIFlagArtificial, elements: !3, vtableHolder: !109, templateParams: !3, identifier: "178e0e76b9d9178d686381b2d05a7777") +!109 = !DIDerivedType(tag: DW_TAG_pointer_type, name: "&u32", baseType: !110, size: 64, align: 64, dwarfAddressSpace: 0) +!110 = !DIBasicType(name: "u32", size: 32, encoding: DW_ATE_unsigned) +!111 = !DIGlobalVariableExpression(var: !112, expr: !DIExpression()) +!112 = distinct !DIGlobalVariable(name: "<&core::option::Option as core::fmt::Debug>::{vtable}", scope: null, file: !7, type: !113, isLocal: true, isDefinition: true) +!113 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "<&core::option::Option as core::fmt::Debug>::{vtable_type}", file: !7, size: 256, align: 64, flags: DIFlagArtificial, elements: !3, vtableHolder: !114, templateParams: !3, identifier: "7ca8386b4d420d719587fa3255329a7a") +!114 = !DIDerivedType(tag: DW_TAG_pointer_type, name: "&core::option::Option", baseType: !115, size: 64, align: 64, dwarfAddressSpace: 0) +!115 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Option", scope: !116, file: !7, size: 128, align: 64, elements: !3, templateParams: !3, identifier: "ad8474e495013fa1e3af4a6b53a05f4b") +!116 = !DINamespace(name: "option", scope: !17) +!117 = !DIGlobalVariableExpression(var: !118, expr: !DIExpression()) +!118 = distinct !DIGlobalVariable(name: "HAS_CLONE3", linkageName: "_ZN3std3sys4unix7process13process_inner66_$LT$impl$u20$std..sys..unix..process..process_common..Command$GT$7do_fork10HAS_CLONE317h7d23eb353ae1c9a8E", scope: !119, file: !123, line: 148, type: !40, isLocal: true, isDefinition: true, align: 8) +!119 = !DINamespace(name: "do_fork", scope: !120) +!120 = !DINamespace(name: "{impl#0}", scope: !121) +!121 = !DINamespace(name: "process_inner", scope: !122) +!122 = !DINamespace(name: "process", scope: !94) +!123 = !DIFile(filename: "library/std/src/sys/unix/process/process_unix.rs", directory: "/home/jistone/rust", checksumkind: CSK_MD5, checksum: "91761d638041a5dd66c0a64d968debe6") +!124 = !DIGlobalVariableExpression(var: !125, expr: !DIExpression()) +!125 = distinct !DIGlobalVariable(name: "::{vtable}", scope: null, file: !7, type: !126, isLocal: true, isDefinition: true) +!126 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "::{vtable_type}", file: !7, size: 256, align: 64, flags: DIFlagArtificial, elements: !3, vtableHolder: !127, templateParams: !3, identifier: "13903f30d26ee5869ef7a3fc63a2e03d") +!127 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "NonZeroI32", scope: !128, file: !7, size: 32, align: 32, elements: !3, templateParams: !3, identifier: "e292f11a32f1ce5cf3b26864e4a0f5e5") +!128 = !DINamespace(name: "nonzero", scope: !33) +!129 = !DILocalVariable(name: "iterator", arg: 2, scope: !130, file: !131, line: 83, type: !137) +!130 = distinct !DISubprogram(name: "spec_extend", linkageName: "_ZN132_$LT$alloc..vec..Vec$LT$T$C$A$GT$$u20$as$u20$alloc..vec..spec_extend..SpecExtend$LT$$RF$T$C$core..slice..iter..Iter$LT$T$GT$$GT$$GT$11spec_extend17hb56b69f474ec1e6dE", scope: !132, file: !131, line: 83, type: !135, scopeLine: 83, flags: DIFlagPrototyped, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !1, templateParams: !3, retainedNodes: !3) +!131 = !DIFile(filename: "library/alloc/src/vec/spec_extend.rs", directory: "/home/jistone/rust", checksumkind: CSK_MD5, checksum: "0614d5dabe9e343254af1b3fa1ec7315") +!132 = !DINamespace(name: "{impl#4}", scope: !133) +!133 = !DINamespace(name: "spec_extend", scope: !134) +!134 = !DINamespace(name: "vec", scope: !11) +!135 = distinct !DISubroutineType(types: !136) +!136 = !{null} +!137 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Iter", scope: !138, file: !7, size: 128, align: 64, elements: !3, templateParams: !3, identifier: "c31ab6f02ccece1f1a6e93425acabaa1") +!138 = !DINamespace(name: "iter", scope: !139) +!139 = !DINamespace(name: "slice", scope: !17) +!140 = !DILocation(line: 0, scope: !130, inlinedAt: !141) +!141 = distinct !DILocation(line: 2392, column: 9, scope: !142, inlinedAt: !146) +!142 = distinct !DISubprogram(name: "extend_from_slice", linkageName: "_ZN5alloc3vec16Vec$LT$T$C$A$GT$17extend_from_slice17hbc8d29f2694fd768E", scope: !144, file: !143, line: 2391, type: !145, scopeLine: 2391, flags: DIFlagPrototyped, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !1, templateParams: !3, retainedNodes: !3) +!143 = !DIFile(filename: "library/alloc/src/vec/mod.rs", directory: "/home/jistone/rust", checksumkind: CSK_MD5, checksum: "0d69d0c0c11b3e47364cf6be0d07c829") +!144 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Vec", scope: !134, file: !7, size: 192, align: 64, elements: !3, templateParams: !3, identifier: "f970dea4d30c1daf847db520fef9390d") +!145 = distinct !DISubroutineType(types: !136) +!146 = distinct !DILocation(line: 330, column: 9, scope: !147, inlinedAt: !154) +!147 = distinct !DILexicalBlock(scope: !149, file: !148, line: 329, column: 9) +!148 = !DIFile(filename: "library/std/src/io/buffered/bufreader.rs", directory: "/home/jistone/rust", checksumkind: CSK_MD5, checksum: "5375e06de487f85ee2f6d21c8a84ce7d") +!149 = distinct !DISubprogram(name: "read_to_end", linkageName: "_ZN82_$LT$std..io..buffered..bufreader..BufReader$LT$R$GT$$u20$as$u20$std..io..Read$GT$11read_to_end17h9f09720ee76e6db9E", scope: !150, file: !148, line: 328, type: !153, scopeLine: 328, flags: DIFlagPrototyped, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !1, templateParams: !3, retainedNodes: !3) +!150 = !DINamespace(name: "{impl#3}", scope: !151) +!151 = !DINamespace(name: "bufreader", scope: !152) +!152 = !DINamespace(name: "buffered", scope: !37) +!153 = distinct !DISubroutineType(types: !3) +!154 = distinct !DILocation(line: 464, column: 9, scope: !155, inlinedAt: !158) +!155 = distinct !DISubprogram(name: "read_to_end", linkageName: "_ZN59_$LT$std..io..stdio..StdinLock$u20$as$u20$std..io..Read$GT$11read_to_end17h38999a681cc6c5b5E", scope: !156, file: !39, line: 463, type: !157, scopeLine: 463, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !1, templateParams: !3, retainedNodes: !3) +!156 = !DINamespace(name: "{impl#7}", scope: !36) +!157 = distinct !DISubroutineType(types: !3) +!158 = distinct !DILocation(line: 430, column: 9, scope: !159) +!159 = distinct !DISubprogram(name: "read_to_end", linkageName: "_ZN55_$LT$std..io..stdio..Stdin$u20$as$u20$std..io..Read$GT$11read_to_end17haba70a09681d41d3E", scope: !160, file: !39, line: 429, type: !161, scopeLine: 429, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !1, templateParams: !3, retainedNodes: !3) +!160 = !DINamespace(name: "{impl#5}", scope: !36) +!161 = !DISubroutineType(types: !3) -- Gitee From 939f5a33711370697f9ad1de4267cfc7399dfe86 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Sun, 8 Jan 2023 00:32:42 +0100 Subject: [PATCH 38/41] libc++: bring back the unsigned in the return type in wcstoull_l got remove here: https://github.com/llvm/llvm-project/commit/67b0b02ec9f2bbc57bf8f0550828d97f460ac11f#diff-e41832b8aa26da45585a57c5111531f2e1d07e91a67c4f8bf1cd6d566ae45a2bR42 Differential Revision: https://reviews.llvm.org/D141208 (cherry picked from commit fc87452916c0d8759625aad65e9335778ce9cc68) --- libcxx/include/__support/musl/xlocale.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libcxx/include/__support/musl/xlocale.h b/libcxx/include/__support/musl/xlocale.h index f564c87885ac..e674a41fa622 100644 --- a/libcxx/include/__support/musl/xlocale.h +++ b/libcxx/include/__support/musl/xlocale.h @@ -39,7 +39,7 @@ wcstoll_l(const wchar_t *__nptr, wchar_t **__endptr, int __base, locale_t) { return ::wcstoll(__nptr, __endptr, __base); } -inline _LIBCPP_HIDE_FROM_ABI long long +inline _LIBCPP_HIDE_FROM_ABI unsigned long long wcstoull_l(const wchar_t *__nptr, wchar_t **__endptr, int __base, locale_t) { return ::wcstoull(__nptr, __endptr, __base); } -- Gitee From 8dfdcc7b7bf66834a761bd8de445840ef68e4d1a Mon Sep 17 00:00:00 2001 From: Nikolas Klauser Date: Thu, 17 Nov 2022 21:34:29 +0100 Subject: [PATCH 39/41] [libc++] Fix memory leaks when throwing inside std::vector constructors Fixes #58392 Reviewed By: ldionne, #libc Spies: alexfh, hans, joanahalili, dblaikie, libcxx-commits Differential Revision: https://reviews.llvm.org/D138601 --- libcxx/include/vector | 119 ++++++--- .../vector.bool/ctor_exceptions.pass.cpp | 141 +++++++++++ .../vector/vector.cons/exceptions.pass.cpp | 229 ++++++++++++++++++ 3 files changed, 453 insertions(+), 36 deletions(-) create mode 100644 libcxx/test/std/containers/sequences/vector.bool/ctor_exceptions.pass.cpp create mode 100644 libcxx/test/std/containers/sequences/vector/vector.cons/exceptions.pass.cpp diff --git a/libcxx/include/vector b/libcxx/include/vector index 252a0f051ff5..63759407ce94 100644 --- a/libcxx/include/vector +++ b/libcxx/include/vector @@ -297,6 +297,7 @@ erase_if(vector& c, Predicate pred); // C++20 #include <__utility/forward.h> #include <__utility/move.h> #include <__utility/swap.h> +#include <__utility/transaction.h> #include #include #include @@ -425,18 +426,27 @@ public: value_type, typename iterator_traits<_ForwardIterator>::reference>::value>::type* = 0); - _LIBCPP_CONSTEXPR_AFTER_CXX17 _LIBCPP_INLINE_VISIBILITY - ~vector() - { - __annotate_delete(); - std::__debug_db_erase_c(this); +private: + class __destroy_vector { + public: + _LIBCPP_CONSTEXPR __destroy_vector(vector& __vec) : __vec_(__vec) {} - if (this->__begin_ != nullptr) - { - __clear(); - __alloc_traits::deallocate(__alloc(), this->__begin_, capacity()); + _LIBCPP_CONSTEXPR_AFTER_CXX17 _LIBCPP_HIDE_FROM_ABI void operator()() { + __vec_.__annotate_delete(); + std::__debug_db_erase_c(std::addressof(__vec_)); + + if (__vec_.__begin_ != nullptr) { + __vec_.__clear(); + __alloc_traits::deallocate(__vec_.__alloc(), __vec_.__begin_, __vec_.capacity()); + } } - } + + private: + vector& __vec_; + }; + +public: + _LIBCPP_CONSTEXPR_AFTER_CXX17 _LIBCPP_HIDE_FROM_ABI ~vector() { __destroy_vector(*this)(); } _LIBCPP_CONSTEXPR_AFTER_CXX17 vector(const vector& __x); _LIBCPP_CONSTEXPR_AFTER_CXX17 vector(const vector& __x, const __type_identity_t& __a); @@ -1075,12 +1085,14 @@ template _LIBCPP_CONSTEXPR_AFTER_CXX17 vector<_Tp, _Allocator>::vector(size_type __n) { - _VSTD::__debug_db_insert_c(this); + auto __guard = std::__make_transaction(__destroy_vector(*this)); + std::__debug_db_insert_c(this); if (__n > 0) { __vallocate(__n); __construct_at_end(__n); } + __guard.__complete(); } #if _LIBCPP_STD_VER > 11 @@ -1089,12 +1101,14 @@ _LIBCPP_CONSTEXPR_AFTER_CXX17 vector<_Tp, _Allocator>::vector(size_type __n, const allocator_type& __a) : __end_cap_(nullptr, __a) { - _VSTD::__debug_db_insert_c(this); + auto __guard = std::__make_transaction(__destroy_vector(*this)); + std::__debug_db_insert_c(this); if (__n > 0) { __vallocate(__n); __construct_at_end(__n); } + __guard.__complete(); } #endif @@ -1102,12 +1116,14 @@ template _LIBCPP_CONSTEXPR_AFTER_CXX17 vector<_Tp, _Allocator>::vector(size_type __n, const value_type& __x) { - _VSTD::__debug_db_insert_c(this); + auto __guard = std::__make_transaction(__destroy_vector(*this)); + std::__debug_db_insert_c(this); if (__n > 0) { __vallocate(__n); __construct_at_end(__n, __x); } + __guard.__complete(); } template @@ -1120,9 +1136,11 @@ vector<_Tp, _Allocator>::vector(_InputIterator __first, typename iterator_traits<_InputIterator>::reference>::value, _InputIterator>::type __last) { - _VSTD::__debug_db_insert_c(this); + auto __guard = std::__make_transaction(__destroy_vector(*this)); + std::__debug_db_insert_c(this); for (; __first != __last; ++__first) emplace_back(*__first); + __guard.__complete(); } template @@ -1135,9 +1153,11 @@ vector<_Tp, _Allocator>::vector(_InputIterator __first, _InputIterator __last, c typename iterator_traits<_InputIterator>::reference>::value>::type*) : __end_cap_(nullptr, __a) { - _VSTD::__debug_db_insert_c(this); + auto __guard = std::__make_transaction(__destroy_vector(*this)); + std::__debug_db_insert_c(this); for (; __first != __last; ++__first) emplace_back(*__first); + __guard.__complete(); } template @@ -1150,13 +1170,15 @@ vector<_Tp, _Allocator>::vector(_ForwardIterator __first, typename iterator_traits<_ForwardIterator>::reference>::value, _ForwardIterator>::type __last) { - _VSTD::__debug_db_insert_c(this); - size_type __n = static_cast(_VSTD::distance(__first, __last)); + auto __guard = std::__make_transaction(__destroy_vector(*this)); + std::__debug_db_insert_c(this); + size_type __n = static_cast(std::distance(__first, __last)); if (__n > 0) { __vallocate(__n); __construct_at_end(__first, __last, __n); } + __guard.__complete(); } template @@ -1169,13 +1191,15 @@ vector<_Tp, _Allocator>::vector(_ForwardIterator __first, _ForwardIterator __las typename iterator_traits<_ForwardIterator>::reference>::value>::type*) : __end_cap_(nullptr, __a) { - _VSTD::__debug_db_insert_c(this); - size_type __n = static_cast(_VSTD::distance(__first, __last)); + auto __guard = std::__make_transaction(__destroy_vector(*this)); + std::__debug_db_insert_c(this); + size_type __n = static_cast(std::distance(__first, __last)); if (__n > 0) { __vallocate(__n); __construct_at_end(__first, __last, __n); } + __guard.__complete(); } template @@ -1183,13 +1207,15 @@ _LIBCPP_CONSTEXPR_AFTER_CXX17 vector<_Tp, _Allocator>::vector(const vector& __x) : __end_cap_(nullptr, __alloc_traits::select_on_container_copy_construction(__x.__alloc())) { - _VSTD::__debug_db_insert_c(this); + auto __guard = std::__make_transaction(__destroy_vector(*this)); + std::__debug_db_insert_c(this); size_type __n = __x.size(); if (__n > 0) { __vallocate(__n); __construct_at_end(__x.__begin_, __x.__end_, __n); } + __guard.__complete(); } template @@ -1197,13 +1223,15 @@ _LIBCPP_CONSTEXPR_AFTER_CXX17 vector<_Tp, _Allocator>::vector(const vector& __x, const __type_identity_t& __a) : __end_cap_(nullptr, __a) { - _VSTD::__debug_db_insert_c(this); + auto __guard = std::__make_transaction(__destroy_vector(*this)); + std::__debug_db_insert_c(this); size_type __n = __x.size(); if (__n > 0) { __vallocate(__n); __construct_at_end(__x.__begin_, __x.__end_, __n); } + __guard.__complete(); } template @@ -1243,7 +1271,9 @@ vector<_Tp, _Allocator>::vector(vector&& __x, const __type_identity_t _Ip; + auto __guard = std::__make_transaction(__destroy_vector(*this)); assign(_Ip(__x.begin()), _Ip(__x.end())); + __guard.__complete(); } } @@ -1254,12 +1284,14 @@ _LIBCPP_CONSTEXPR_AFTER_CXX17 inline _LIBCPP_INLINE_VISIBILITY vector<_Tp, _Allocator>::vector(initializer_list __il) { - _VSTD::__debug_db_insert_c(this); + auto __guard = std::__make_transaction(__destroy_vector(*this)); + std::__debug_db_insert_c(this); if (__il.size() > 0) { __vallocate(__il.size()); __construct_at_end(__il.begin(), __il.end(), __il.size()); } + __guard.__complete(); } template @@ -1268,12 +1300,14 @@ inline _LIBCPP_INLINE_VISIBILITY vector<_Tp, _Allocator>::vector(initializer_list __il, const allocator_type& __a) : __end_cap_(nullptr, __a) { - _VSTD::__debug_db_insert_c(this); + auto __guard = std::__make_transaction(__destroy_vector(*this)); + std::__debug_db_insert_c(this); if (__il.size() > 0) { __vallocate(__il.size()); __construct_at_end(__il.begin(), __il.end(), __il.size()); } + __guard.__complete(); } #endif // _LIBCPP_CXX03_LANG @@ -2111,8 +2145,26 @@ public: #else _NOEXCEPT; #endif - _LIBCPP_CONSTEXPR_AFTER_CXX17 ~vector(); - _LIBCPP_CONSTEXPR_AFTER_CXX17 explicit vector(size_type __n); + +private: + class __destroy_vector { + public: + _LIBCPP_CONSTEXPR __destroy_vector(vector& __vec) : __vec_(__vec) {} + + _LIBCPP_CONSTEXPR_AFTER_CXX17 _LIBCPP_HIDE_FROM_ABI void operator()() { + if (__vec_.__begin_ != nullptr) + __storage_traits::deallocate(__vec_.__alloc(), __vec_.__begin_, __vec_.__cap()); + std::__debug_db_invalidate_all(this); + } + + private: + vector& __vec_; + }; + +public: + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX17 ~vector() { __destroy_vector(*this)(); } + + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX17 explicit vector(size_type __n); #if _LIBCPP_STD_VER > 11 _LIBCPP_CONSTEXPR_AFTER_CXX17 explicit vector(size_type __n, const allocator_type& __a); #endif @@ -2647,12 +2699,14 @@ vector::vector(_ForwardIterator __first, _ForwardIterator __la __size_(0), __cap_alloc_(0, __default_init_tag()) { - size_type __n = static_cast(_VSTD::distance(__first, __last)); + auto __guard = std::__make_transaction(__destroy_vector(*this)); + size_type __n = static_cast(std::distance(__first, __last)); if (__n > 0) { __vallocate(__n); __construct_at_end(__first, __last); } + __guard.__complete(); } template @@ -2664,12 +2718,14 @@ vector::vector(_ForwardIterator __first, _ForwardIterator __la __size_(0), __cap_alloc_(0, static_cast<__storage_allocator>(__a)) { - size_type __n = static_cast(_VSTD::distance(__first, __last)); + auto __guard = std::__make_transaction(__destroy_vector(*this)); + size_type __n = static_cast(std::distance(__first, __last)); if (__n > 0) { __vallocate(__n); __construct_at_end(__first, __last); } + __guard.__complete(); } #ifndef _LIBCPP_CXX03_LANG @@ -2706,15 +2762,6 @@ vector::vector(initializer_list __il, const alloca #endif // _LIBCPP_CXX03_LANG -template -_LIBCPP_CONSTEXPR_AFTER_CXX17 -vector::~vector() -{ - if (__begin_ != nullptr) - __storage_traits::deallocate(__alloc(), __begin_, __cap()); - std::__debug_db_invalidate_all(this); -} - template _LIBCPP_CONSTEXPR_AFTER_CXX17 vector::vector(const vector& __v) diff --git a/libcxx/test/std/containers/sequences/vector.bool/ctor_exceptions.pass.cpp b/libcxx/test/std/containers/sequences/vector.bool/ctor_exceptions.pass.cpp new file mode 100644 index 000000000000..592d733de42d --- /dev/null +++ b/libcxx/test/std/containers/sequences/vector.bool/ctor_exceptions.pass.cpp @@ -0,0 +1,141 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: no-exceptions + +// (bug report: https://llvm.org/PR58392) +// Check that vector constructors don't leak memory when an operation inside the constructor throws an exception + +#include +#include + +#include "count_new.h" +#include "test_iterators.h" + +template +struct Allocator { + using value_type = T; + using is_always_equal = std::false_type; + + template + Allocator(const Allocator&) {} + + Allocator(bool should_throw = true) { + if (should_throw) + throw 0; + } + + T* allocate(int n) { return std::allocator().allocate(n); } + void deallocate(T* ptr, int n) { std::allocator().deallocate(ptr, n); } + + friend bool operator==(const Allocator&, const Allocator&) { return false; } +}; + +template +struct Iterator { + using iterator_category = IterCat; + using difference_type = std::ptrdiff_t; + using value_type = bool; + using reference = bool&; + using pointer = bool*; + + int i_; + bool b_ = true; + Iterator(int i = 0) : i_(i) {} + bool& operator*() { + if (i_ == 1) + throw 1; + return b_; + } + + friend bool operator==(const Iterator& lhs, const Iterator& rhs) { return lhs.i_ == rhs.i_; } + + friend bool operator!=(const Iterator& lhs, const Iterator& rhs) { return lhs.i_ != rhs.i_; } + + Iterator& operator++() { + ++i_; + return *this; + } + + Iterator operator++(int) { + auto tmp = *this; + ++i_; + return tmp; + } +}; + +void check_new_delete_called() { + assert(globalMemCounter.new_called == globalMemCounter.delete_called); + assert(globalMemCounter.new_array_called == globalMemCounter.delete_array_called); + assert(globalMemCounter.aligned_new_called == globalMemCounter.aligned_delete_called); + assert(globalMemCounter.aligned_new_array_called == globalMemCounter.aligned_delete_array_called); +} + +int main(int, char**) { + using AllocVec = std::vector >; + +#if TEST_STD_VER >= 14 + try { // Throw in vector(size_type, const allocator_type&) from allocator + Allocator alloc(false); + AllocVec get_alloc(0, alloc); + } catch (int) { + } + check_new_delete_called(); +#endif // TEST_STD_VER >= 14 + + try { // Throw in vector(InputIterator, InputIterator) from input iterator + std::vector vec((Iterator()), Iterator(2)); + } catch (int) { + } + check_new_delete_called(); + + try { // Throw in vector(InputIterator, InputIterator) from forward iterator + std::vector vec((Iterator()), Iterator(2)); + } catch (int) { + } + check_new_delete_called(); + + try { // Throw in vector(InputIterator, InputIterator) from allocator + int a[] = {1, 2}; + AllocVec vec(cpp17_input_iterator(a), cpp17_input_iterator(a + 2)); + } catch (int) { + } + check_new_delete_called(); + + try { // Throw in vector(InputIterator, InputIterator, const allocator_type&) from input iterator + std::allocator alloc; + std::vector vec(Iterator(), Iterator(2), alloc); + } catch (int) { + } + check_new_delete_called(); + + try { // Throw in vector(InputIterator, InputIterator, const allocator_type&) from forward iterator + std::allocator alloc; + std::vector vec(Iterator(), Iterator(2), alloc); + } catch (int) { + } + check_new_delete_called(); + + try { // Throw in vector(InputIterator, InputIterator, const allocator_type&) from allocator + bool a[] = {true, false}; + Allocator alloc(false); + AllocVec vec(cpp17_input_iterator(a), cpp17_input_iterator(a + 2), alloc); + } catch (int) { + } + check_new_delete_called(); + + try { // Throw in vector(InputIterator, InputIterator, const allocator_type&) from allocator + bool a[] = {true, false}; + Allocator alloc(false); + AllocVec vec(forward_iterator(a), forward_iterator(a + 2), alloc); + } catch (int) { + } + check_new_delete_called(); + + return 0; +} diff --git a/libcxx/test/std/containers/sequences/vector/vector.cons/exceptions.pass.cpp b/libcxx/test/std/containers/sequences/vector/vector.cons/exceptions.pass.cpp new file mode 100644 index 000000000000..26ad7b4fd05a --- /dev/null +++ b/libcxx/test/std/containers/sequences/vector/vector.cons/exceptions.pass.cpp @@ -0,0 +1,229 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: no-exceptions + +// (bug report: https://llvm.org/PR58392) +// Check that vector constructors don't leak memory when an operation inside the constructor throws an exception + +#include +#include + +#include "count_new.h" +#include "test_iterators.h" + +template +struct Allocator { + using value_type = T; + using is_always_equal = std::false_type; + + Allocator(bool should_throw = true) { + if (should_throw) + throw 0; + } + + T* allocate(int n) { return std::allocator().allocate(n); } + void deallocate(T* ptr, int n) { std::allocator().deallocate(ptr, n); } + + friend bool operator==(const Allocator&, const Allocator&) { return false; } +}; + +struct ThrowingT { + int* throw_after_n_ = nullptr; + ThrowingT() { throw 0; } + + ThrowingT(int& throw_after_n) : throw_after_n_(&throw_after_n) { + if (throw_after_n == 0) + throw 0; + --throw_after_n; + } + + ThrowingT(const ThrowingT&) { + if (throw_after_n_ == nullptr || *throw_after_n_ == 0) + throw 1; + --*throw_after_n_; + } + + ThrowingT& operator=(const ThrowingT&) { + if (throw_after_n_ == nullptr || *throw_after_n_ == 0) + throw 1; + --*throw_after_n_; + return *this; + } +}; + +template +struct Iterator { + using iterator_category = IterCat; + using difference_type = std::ptrdiff_t; + using value_type = int; + using reference = int&; + using pointer = int*; + + int i_; + Iterator(int i = 0) : i_(i) {} + int& operator*() { + if (i_ == 1) + throw 1; + return i_; + } + + friend bool operator==(const Iterator& lhs, const Iterator& rhs) { return lhs.i_ == rhs.i_; } + + friend bool operator!=(const Iterator& lhs, const Iterator& rhs) { return lhs.i_ != rhs.i_; } + + Iterator& operator++() { + ++i_; + return *this; + } + + Iterator operator++(int) { + auto tmp = *this; + ++i_; + return tmp; + } +}; + +void check_new_delete_called() { + assert(globalMemCounter.new_called == globalMemCounter.delete_called); + assert(globalMemCounter.new_array_called == globalMemCounter.delete_array_called); + assert(globalMemCounter.aligned_new_called == globalMemCounter.aligned_delete_called); + assert(globalMemCounter.aligned_new_array_called == globalMemCounter.aligned_delete_array_called); +} + +int main(int, char**) { + using AllocVec = std::vector >; + try { // vector() + AllocVec vec; + } catch (int) { + } + check_new_delete_called(); + + try { // Throw in vector(size_type) from type + std::vector get_alloc(1); + } catch (int) { + } + check_new_delete_called(); + +#if TEST_STD_VER >= 14 + try { // Throw in vector(size_type, value_type) from type + int throw_after = 1; + ThrowingT v(throw_after); + std::vector get_alloc(1, v); + } catch (int) { + } + check_new_delete_called(); + + try { // Throw in vector(size_type, const allocator_type&) from allocator + Allocator alloc(false); + AllocVec get_alloc(0, alloc); + } catch (int) { + } + check_new_delete_called(); + + try { // Throw in vector(size_type, const allocator_type&) from the type + std::vector vec(1, std::allocator()); + } catch (int) { + } + check_new_delete_called(); +#endif // TEST_STD_VER >= 14 + + try { // Throw in vector(InputIterator, InputIterator) from input iterator + std::vector vec((Iterator()), Iterator(2)); + } catch (int) { + } + check_new_delete_called(); + + try { // Throw in vector(InputIterator, InputIterator) from forward iterator + std::vector vec((Iterator()), Iterator(2)); + } catch (int) { + } + check_new_delete_called(); + + try { // Throw in vector(InputIterator, InputIterator) from allocator + int a[] = {1, 2}; + AllocVec vec(cpp17_input_iterator(a), cpp17_input_iterator(a + 2)); + } catch (int) { + } + check_new_delete_called(); + + try { // Throw in vector(InputIterator, InputIterator, const allocator_type&) from input iterator + std::allocator alloc; + std::vector vec(Iterator(), Iterator(2), alloc); + } catch (int) { + } + check_new_delete_called(); + + try { // Throw in vector(InputIterator, InputIterator, const allocator_type&) from forward iterator + std::allocator alloc; + std::vector vec(Iterator(), Iterator(2), alloc); + } catch (int) { + } + check_new_delete_called(); + + try { // Throw in vector(InputIterator, InputIterator, const allocator_type&) from allocator + int a[] = {1, 2}; + Allocator alloc(false); + AllocVec vec(cpp17_input_iterator(a), cpp17_input_iterator(a + 2), alloc); + } catch (int) { + } + check_new_delete_called(); + + try { // Throw in vector(InputIterator, InputIterator, const allocator_type&) from allocator + int a[] = {1, 2}; + Allocator alloc(false); + AllocVec vec(forward_iterator(a), forward_iterator(a + 2), alloc); + } catch (int) { + } + check_new_delete_called(); + + try { // Throw in vector(const vector&) from type + std::vector vec; + int throw_after = 0; + vec.emplace_back(throw_after); + auto vec2 = vec; + } catch (int) { + } + check_new_delete_called(); + + try { // Throw in vector(const vector&, const allocator_type&) from type + std::vector vec; + int throw_after = 1; + vec.emplace_back(throw_after); + std::vector vec2(vec, std::allocator()); + } catch (int) { + } + check_new_delete_called(); + + try { // Throw in vector(vector&&, const allocator_type&) from type + std::vector > vec(Allocator(false)); + int throw_after = 1; + vec.emplace_back(throw_after); + std::vector > vec2(std::move(vec), Allocator(false)); + } catch (int) { + } + check_new_delete_called(); + +#if TEST_STD_VER >= 11 + try { // Throw in vector(initializer_list) from type + int throw_after = 1; + std::vector vec({ThrowingT(throw_after)}); + } catch (int) { + } + check_new_delete_called(); + + try { // Throw in vector(initializer_list, const allocator_type&) constructor from type + int throw_after = 1; + std::vector vec({ThrowingT(throw_after)}, std::allocator()); + } catch (int) { + } + check_new_delete_called(); +#endif // TEST_STD_VER >= 11 + + return 0; +} -- Gitee From cbaba90c719d4363aead90c2552cb5fdbe39c7c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=BE=9B=E8=BF=8E=E6=9E=97?= Date: Fri, 4 Nov 2022 10:18:37 +0000 Subject: [PATCH 40/41] add build.sh. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 辛迎林 --- README_OE.md | 21 +++ build.sh | 164 ++++++++++++++++++ .../lsan/TestCases/Linux/log-path_test.cpp | 1 + compiler-rt/test/lsan/TestCases/malloc_zero.c | 1 + .../test/lsan/TestCases/realloc_too_big.c | 1 + .../archer/tests/races/critical-unrelated.c | 2 + .../tests/races/lock-nested-unrelated.c | 1 + .../tools/archer/tests/races/lock-unrelated.c | 2 + .../archer/tests/races/parallel-simple.c | 2 + .../archer/tests/races/task-dependency.c | 2 + .../tests/races/task-taskgroup-unrelated.c | 2 + .../archer/tests/races/task-taskwait-nested.c | 2 + openmp/tools/archer/tests/races/task-two.c | 2 + .../archer/tests/task/task_late_fulfill.c | 1 + 14 files changed, 204 insertions(+) create mode 100644 README_OE.md create mode 100755 build.sh diff --git a/README_OE.md b/README_OE.md new file mode 100644 index 000000000000..af956a923c04 --- /dev/null +++ b/README_OE.md @@ -0,0 +1,21 @@ + 我们提供了build.sh脚本来构建llvm-project。 + 我们已经验证了它可以在openEuler系统成功构建。 + + 在构建编译器之前,确保已经安装了以下工具: + gcc \ + g++ \ + libgcc \ + openssl-devel \ + cmake \ + python3 \ + python3-setuptools \ + python-wheel \ + texinfo \ + binutils \ + libstdc++-static \ + libatomic + + 具体的编译选项可以在build.sh中查看,也可以通过“./build.sh -h”命令查看。 + 例如: + ./build.sh -r -b release -X X86 + diff --git a/build.sh b/build.sh new file mode 100755 index 000000000000..4fed7989a657 --- /dev/null +++ b/build.sh @@ -0,0 +1,164 @@ +#!/bin/bash + +# Tools to use for bootstrapping. +C_COMPILER_PATH=gcc +CXX_COMPILER_PATH=g++ + +# Initialize our own variables: +buildtype=RelWithDebInfo +backends="ARM;AArch64;X86" +split_dwarf=on +use_ccache="0" +do_install="0" +clean=0 +unit_test="" +verbose="" +dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )" +build_dir_name="build" +install_dir_name="install" +build_prefix="$dir/$build_dir_name" +install_prefix="$dir/$install_dir_name" + +# Use 8 threads for builds and tests by default. Use more threads if possible, +# but avoid overloading the system by using up to 50% of available cores. +threads=8 +nproc=$(type -p nproc) +if [ -x "$nproc" -a -f /proc/loadavg ]; then + loadavg=$(awk '{printf "%.0f\n", $1}' < /proc/loadavg) + let threads="($($nproc) - $loadavg) / 2" + if [ $threads -le 0 ]; then + threads=1 + fi +fi + +# Exit script on first error. +set -e + +usage() { + cat <> %t.log // RUN: %adb_shell 'cat %device_rundir/%t.log.*' >> %t.log // RUN: FileCheck %s --check-prefix=CHECK-ERROR < %t.log.* +// REQUIRES: x86_64 #include #include diff --git a/compiler-rt/test/lsan/TestCases/malloc_zero.c b/compiler-rt/test/lsan/TestCases/malloc_zero.c index 5c8d1850a0f9..d6d2db6a62c0 100644 --- a/compiler-rt/test/lsan/TestCases/malloc_zero.c +++ b/compiler-rt/test/lsan/TestCases/malloc_zero.c @@ -3,6 +3,7 @@ /// Fails when only leak sanitizer is enabled // UNSUPPORTED: arm-linux, armhf-linux +// REQUIRES: x86_64 #include #include diff --git a/compiler-rt/test/lsan/TestCases/realloc_too_big.c b/compiler-rt/test/lsan/TestCases/realloc_too_big.c index bb1316024b5c..ad0c0a78e154 100644 --- a/compiler-rt/test/lsan/TestCases/realloc_too_big.c +++ b/compiler-rt/test/lsan/TestCases/realloc_too_big.c @@ -3,6 +3,7 @@ /// Fails when only leak sanitizer is enabled // UNSUPPORTED: arm-linux, armhf-linux +// REQUIRES: x86_64 #include #include diff --git a/openmp/tools/archer/tests/races/critical-unrelated.c b/openmp/tools/archer/tests/races/critical-unrelated.c index bff8b9763c14..1a4d39a72362 100644 --- a/openmp/tools/archer/tests/races/critical-unrelated.c +++ b/openmp/tools/archer/tests/races/critical-unrelated.c @@ -13,6 +13,8 @@ // RUN: %libarcher-compile-and-run-race | FileCheck %s // RUN: %libarcher-compile-and-run-race-noserial | FileCheck %s // REQUIRES: tsan +// REQUIRES: x86_64 + #include #include diff --git a/openmp/tools/archer/tests/races/lock-nested-unrelated.c b/openmp/tools/archer/tests/races/lock-nested-unrelated.c index e24b4cdedc71..13b6e3417458 100644 --- a/openmp/tools/archer/tests/races/lock-nested-unrelated.c +++ b/openmp/tools/archer/tests/races/lock-nested-unrelated.c @@ -13,6 +13,7 @@ // RUN: %libarcher-compile-and-run-race | FileCheck %s // RUN: %libarcher-compile-and-run-race-noserial | FileCheck %s // REQUIRES: tsan +// REQUIRES: x86_64 #include #include diff --git a/openmp/tools/archer/tests/races/lock-unrelated.c b/openmp/tools/archer/tests/races/lock-unrelated.c index 4245490a703f..891549ff91ff 100644 --- a/openmp/tools/archer/tests/races/lock-unrelated.c +++ b/openmp/tools/archer/tests/races/lock-unrelated.c @@ -13,6 +13,8 @@ // RUN: %libarcher-compile-and-run-race | FileCheck %s // RUN: %libarcher-compile-and-run-race-noserial | FileCheck %s // REQUIRES: tsan +// REQUIRES: x86_64 + #include #include diff --git a/openmp/tools/archer/tests/races/parallel-simple.c b/openmp/tools/archer/tests/races/parallel-simple.c index 700e4a7f3313..809ef6f4c114 100644 --- a/openmp/tools/archer/tests/races/parallel-simple.c +++ b/openmp/tools/archer/tests/races/parallel-simple.c @@ -13,6 +13,8 @@ // RUN: %libarcher-compile-and-run-race | FileCheck %s // RUN: %libarcher-compile-and-run-race-noserial | FileCheck %s // REQUIRES: tsan +// REQUIRES: x86_64 + #include #include diff --git a/openmp/tools/archer/tests/races/task-dependency.c b/openmp/tools/archer/tests/races/task-dependency.c index f6496ac8596d..63b4cf4bfc56 100644 --- a/openmp/tools/archer/tests/races/task-dependency.c +++ b/openmp/tools/archer/tests/races/task-dependency.c @@ -13,6 +13,8 @@ // RUN: %libarcher-compile-and-run-race | FileCheck %s // RUN: %libarcher-compile-and-run-race-noserial | FileCheck %s // REQUIRES: tsan +// REQUIRES: x86_64 + #include "ompt/ompt-signal.h" #include #include diff --git a/openmp/tools/archer/tests/races/task-taskgroup-unrelated.c b/openmp/tools/archer/tests/races/task-taskgroup-unrelated.c index 04b2957b4863..770e31c8f831 100644 --- a/openmp/tools/archer/tests/races/task-taskgroup-unrelated.c +++ b/openmp/tools/archer/tests/races/task-taskgroup-unrelated.c @@ -13,6 +13,8 @@ // RUN: %libarcher-compile-and-run-race | FileCheck %s // RUN: %libarcher-compile-and-run-race-noserial | FileCheck %s // REQUIRES: tsan +// REQUIRES: x86_64 + #include "ompt/ompt-signal.h" #include #include diff --git a/openmp/tools/archer/tests/races/task-taskwait-nested.c b/openmp/tools/archer/tests/races/task-taskwait-nested.c index 02f1fb576c87..64aa03f3f8db 100644 --- a/openmp/tools/archer/tests/races/task-taskwait-nested.c +++ b/openmp/tools/archer/tests/races/task-taskwait-nested.c @@ -13,6 +13,8 @@ // RUN: %libarcher-compile-and-run-race | FileCheck %s // RUN: %libarcher-compile-and-run-race-noserial | FileCheck %s // REQUIRES: tsan +// REQUIRES: x86_64 + #include "ompt/ompt-signal.h" #include #include diff --git a/openmp/tools/archer/tests/races/task-two.c b/openmp/tools/archer/tests/races/task-two.c index 06d5bc021ee6..f1fa654b2da9 100644 --- a/openmp/tools/archer/tests/races/task-two.c +++ b/openmp/tools/archer/tests/races/task-two.c @@ -13,6 +13,8 @@ // RUN: %libarcher-compile-and-run-race | FileCheck %s // RUN: %libarcher-compile-and-run-race-noserial | FileCheck %s // REQUIRES: tsan +// REQUIRES: x86_64 + #include #include #include diff --git a/openmp/tools/archer/tests/task/task_late_fulfill.c b/openmp/tools/archer/tests/task/task_late_fulfill.c index 31d096deec75..23f54eeb9b57 100644 --- a/openmp/tools/archer/tests/task/task_late_fulfill.c +++ b/openmp/tools/archer/tests/task/task_late_fulfill.c @@ -10,6 +10,7 @@ // icc compiler does not support detach clause. // UNSUPPORTED: icc // REQUIRES: tsan +// REQUIRES: x86_64 #include #include -- Gitee From ec80fbeeeee7cee2441bcd58f9ed3898073102f0 Mon Sep 17 00:00:00 2001 From: cf_zhao Date: Wed, 14 Dec 2022 10:32:07 +0800 Subject: [PATCH 41/41] [Build] Add several options to slim the install folder. --- build.sh | 46 ++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 40 insertions(+), 6 deletions(-) diff --git a/build.sh b/build.sh index 4fed7989a657..7b6b7908abbd 100755 --- a/build.sh +++ b/build.sh @@ -7,11 +7,15 @@ CXX_COMPILER_PATH=g++ # Initialize our own variables: buildtype=RelWithDebInfo backends="ARM;AArch64;X86" +enabled_projects="clang;lld;compiler-rt;openmp;clang-tools-extra" +embedded_toolchain="0" split_dwarf=on use_ccache="0" do_install="0" clean=0 unit_test="" +install="install" +install_toolchain_only="0" verbose="" dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )" build_dir_name="build" @@ -38,16 +42,19 @@ usage() { cat <