From 086365be7ce0aaeb159fa80d02c426989d5115f3 Mon Sep 17 00:00:00 2001
From: Fangrui Song <i@maskray.me>
Date: Sun, 2 Oct 2022 00:47:10 -0700
Subject: [PATCH 01/41] [test] Make Linux/sem_init_glibc.cpp robust

and fix it for 32-bit ports defining sem_init@GLIBC_2.0 (i386, mips32, powerpc32) for glibc>=2.36.

Fix https://github.com/llvm/llvm-project/issues/58079

Reviewed By: mgorny

Differential Revision: https://reviews.llvm.org/D135023

(cherry picked from commit 6f46ff3765dcdc178b9cf52ebd8c03437806798a)
---
 .../sanitizer_common_interceptors.inc         |  2 +-
 .../TestCases/Linux/sem_init_glibc.cpp        | 43 +++++++++----------
 2 files changed, 21 insertions(+), 24 deletions(-)

diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc
index 9af296b1853a..b29665a63390 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc
@@ -6703,7 +6703,7 @@ INTERCEPTOR(int, sem_init, __sanitizer_sem_t *s, int pshared, unsigned value) {
   COMMON_INTERCEPTOR_ENTER(ctx, sem_init, s, pshared, value);
   // Workaround a bug in glibc's "old" semaphore implementation by
   // zero-initializing the sem_t contents. This has to be done here because
-  // interceptors bind to the lowest symbols version by default, hitting the
+  // interceptors bind to the lowest version before glibc 2.36, hitting the
   // buggy code path while the non-sanitized build of the same code works fine.
   REAL(memset)(s, 0, sizeof(*s));
   int res = REAL(sem_init)(s, pshared, value);
diff --git a/compiler-rt/test/sanitizer_common/TestCases/Linux/sem_init_glibc.cpp b/compiler-rt/test/sanitizer_common/TestCases/Linux/sem_init_glibc.cpp
index d623ccabb5b5..234c5019f692 100644
--- a/compiler-rt/test/sanitizer_common/TestCases/Linux/sem_init_glibc.cpp
+++ b/compiler-rt/test/sanitizer_common/TestCases/Linux/sem_init_glibc.cpp
@@ -1,39 +1,36 @@
 // RUN: %clangxx -O0 -g %s -lutil -o %t && %run %t
 // This test depends on the glibc layout of struct sem_t and checks that we
 // don't leave sem_t::private uninitialized.
-// UNSUPPORTED: android, lsan-x86, ubsan, target-is-mips64, target-is-mips64el
+// UNSUPPORTED: android, lsan-x86, ubsan
 #include <features.h>
 #include <assert.h>
 #include <semaphore.h>
 #include <string.h>
 #include <stdint.h>
 
-// On powerpc64be semval_t must be 64 bits even with "old" versions of glibc.
-#if __PPC64__ && __BIG_ENDIAN__
-typedef uint64_t semval_t;
-
-// This condition needs to correspond to __HAVE_64B_ATOMICS macro in glibc.
-#elif (defined(__x86_64__) || defined(__aarch64__) || defined(__powerpc64__) || \
-     defined(__s390x__) || defined(__sparc64__) || defined(__alpha__) || \
-     defined(__ia64__) || defined(__m68k__)) && __GLIBC_PREREQ(2, 21)
-typedef uint64_t semval_t;
-#else
+// musl and glibc's __HAVE_64B_ATOMICS==0 ports (e.g. arm, i386) use 32-bit sem
+// values. 64-bit glibc ports defining sem_init@GLIBC_2.0 (mips64) use 32-bit as
+// well, if the sem_init interceptor picks the oldest versioned symbol
+// (glibc<2.36, see https://sourceware.org/PR14932).
+#if !defined(__GLIBC__) || defined(__ILP32__) ||                               \
+    !__GLIBC_PREREQ(2, 36) && defined(__mips64__)
 typedef unsigned semval_t;
+#else
+typedef uint64_t semval_t;
 #endif
 
-// glibc 2.21 has introduced some changes in the way the semaphore value is
-// handled for 32-bit platforms, but since these changes are not ABI-breaking
-// they are not versioned. On newer platforms such as ARM, there is only one
-// version of the symbol, so it's enough to check the glibc version. However,
-// for old platforms such as i386, glibc contains two or even three versions of
-// the sem_init symbol, and the sanitizers always pick the oldest one.
-// Therefore, it is not enough to rely on the __GLIBC_PREREQ macro - we should
-// instead check the platform as well to make sure we only expect the new
-// behavior on platforms where the older symbols do not exist.
-#if defined(__arm__) && __GLIBC_PREREQ(2, 21)
-#define GET_SEM_VALUE(V) ((V) >> 1)
+// glibc __HAVE_64B_ATOMICS==0 ports define a sem_init which shifts the value by
+// 1 (https://sourceware.org/PR12674 glibc 2.21). The version is picked if
+// either glibc>=2.36 or sem_init@GLIBC_2.0 is absent (arm and newer ports).
+//
+// The __GLIBC_PREREQ check is brittle in that it requires matched
+// __GLIBC_PREREQ values for build time and run time.
+#if defined(__GLIBC__) && defined(__ILP32__) &&                                \
+    (__GLIBC_PREREQ(2, 36) || (__GLIBC_PREREQ(2, 21) && !defined(__i386__) &&  \
+                               !defined(__mips__) && !defined(__powerpc__)))
+#  define GET_SEM_VALUE(V) ((V) >> 1)
 #else
-#define GET_SEM_VALUE(V) (V)
+#  define GET_SEM_VALUE(V) (V)
 #endif
 
 void my_sem_init(bool priv, int value, semval_t *a, unsigned char *b) {
-- 
Gitee


From ceee53ce564c3034ea9636b3c416182efadc5967 Mon Sep 17 00:00:00 2001
From: Arthur Eubanks <aeubanks@google.com>
Date: Mon, 17 Oct 2022 17:03:35 -0700
Subject: [PATCH 02/41] [SROA] Don't speculate phis with different load user
 types

Fixes an SROA crash.

Fallout from opaque pointers since with typed pointers we'd bail out at the bitcast.

Reviewed By: nikic

Differential Revision: https://reviews.llvm.org/D136119

(cherry picked from commit 6219ec07c6f8d1ead51beca7cf21fbf2323c51d7)
---
 llvm/lib/Transforms/Scalar/SROA.cpp           | 19 +++++----
 .../phi-speculate-different-load-types.ll     | 41 +++++++++++++++++++
 2 files changed, 53 insertions(+), 7 deletions(-)
 create mode 100644 llvm/test/Transforms/SROA/phi-speculate-different-load-types.ll

diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp
index 143a035749c7..644c5c82e58e 100644
--- a/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -1210,8 +1210,7 @@ static bool isSafePHIToSpeculate(PHINode &PN) {
   BasicBlock *BB = PN.getParent();
   Align MaxAlign;
   uint64_t APWidth = DL.getIndexTypeSizeInBits(PN.getType());
-  APInt MaxSize(APWidth, 0);
-  bool HaveLoad = false;
+  Type *LoadType = nullptr;
   for (User *U : PN.users()) {
     LoadInst *LI = dyn_cast<LoadInst>(U);
     if (!LI || !LI->isSimple())
@@ -1223,21 +1222,27 @@ static bool isSafePHIToSpeculate(PHINode &PN) {
     if (LI->getParent() != BB)
       return false;
 
+    if (LoadType) {
+      if (LoadType != LI->getType())
+        return false;
+    } else {
+      LoadType = LI->getType();
+    }
+
     // Ensure that there are no instructions between the PHI and the load that
     // could store.
     for (BasicBlock::iterator BBI(PN); &*BBI != LI; ++BBI)
       if (BBI->mayWriteToMemory())
         return false;
 
-    uint64_t Size = DL.getTypeStoreSize(LI->getType()).getFixedSize();
     MaxAlign = std::max(MaxAlign, LI->getAlign());
-    MaxSize = MaxSize.ult(Size) ? APInt(APWidth, Size) : MaxSize;
-    HaveLoad = true;
   }
 
-  if (!HaveLoad)
+  if (!LoadType)
     return false;
 
+  APInt LoadSize = APInt(APWidth, DL.getTypeStoreSize(LoadType).getFixedSize());
+
   // We can only transform this if it is safe to push the loads into the
   // predecessor blocks. The only thing to watch out for is that we can't put
   // a possibly trapping load in the predecessor if it is a critical edge.
@@ -1259,7 +1264,7 @@ static bool isSafePHIToSpeculate(PHINode &PN) {
     // If this pointer is always safe to load, or if we can prove that there
     // is already a load in the block, then we can move the load to the pred
     // block.
-    if (isSafeToLoadUnconditionally(InVal, MaxAlign, MaxSize, DL, TI))
+    if (isSafeToLoadUnconditionally(InVal, MaxAlign, LoadSize, DL, TI))
       continue;
 
     return false;
diff --git a/llvm/test/Transforms/SROA/phi-speculate-different-load-types.ll b/llvm/test/Transforms/SROA/phi-speculate-different-load-types.ll
new file mode 100644
index 000000000000..7893da340736
--- /dev/null
+++ b/llvm/test/Transforms/SROA/phi-speculate-different-load-types.ll
@@ -0,0 +1,41 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -passes=sroa < %s -S | FileCheck %s
+
+define void @f(i1 %i) {
+; CHECK-LABEL: @f(
+; CHECK-NEXT:    [[A1:%.*]] = alloca i64, align 8
+; CHECK-NEXT:    [[A2:%.*]] = alloca i64, align 8
+; CHECK-NEXT:    br i1 [[I:%.*]], label [[BB1:%.*]], label [[BB:%.*]]
+; CHECK:       bb:
+; CHECK-NEXT:    br label [[BB2:%.*]]
+; CHECK:       bb1:
+; CHECK-NEXT:    br label [[BB2]]
+; CHECK:       bb2:
+; CHECK-NEXT:    [[TMP3:%.*]] = phi ptr [ [[A1]], [[BB1]] ], [ [[A2]], [[BB]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 4
+; CHECK-NEXT:    call void @use32(i32 [[TMP5]])
+; CHECK-NEXT:    call void @use64(i64 [[TMP4]])
+; CHECK-NEXT:    ret void
+;
+  %a1 = alloca i64
+  %a2 = alloca i64
+  br i1 %i, label %bb1, label %bb
+
+bb:
+  br label %bb2
+
+bb1:
+  br label %bb2
+
+bb2:
+  %tmp3 = phi ptr [ %a1, %bb1 ], [ %a2, %bb ]
+  %tmp5 = load i32, ptr %tmp3
+  %tmp4 = load i64, ptr %tmp3
+  call void @use32(i32 %tmp5)
+  call void @use64(i64 %tmp4)
+  ret void
+}
+
+declare void @use32(i32)
+declare void @use64(i64)
-- 
Gitee


From 455e1d765ad6379c98a499917d16fc8d31da3b2a Mon Sep 17 00:00:00 2001
From: Mike Hommey <mh@glandium.org>
Date: Sun, 23 Oct 2022 09:49:16 +0200
Subject: [PATCH 03/41] [InstCombine] Bail out of casting calls when a
 conversion from/to byval is involved.

Fixes #58307

Reviewed By: nikic

Differential Revision: https://reviews.llvm.org/D135738

(cherry picked from commit 86e57e66da9380eaa90a9d0830d7f2c5fe87af99)
---
 .../InstCombine/InstCombineCalls.cpp          |  4 +++
 llvm/test/Transforms/InstCombine/byval.ll     |  3 +-
 .../test/Transforms/InstCombine/cast-byval.ll | 31 +++++++++++++++++++
 3 files changed, 36 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/Transforms/InstCombine/cast-byval.ll

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index bc01d2ef7fe2..52596b30494f 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -3289,6 +3289,10 @@ bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) {
     if (CallerPAL.hasParamAttr(i, Attribute::SwiftError))
       return false;
 
+    if (CallerPAL.hasParamAttr(i, Attribute::ByVal) !=
+        Callee->getAttributes().hasParamAttr(i, Attribute::ByVal))
+      return false; // Cannot transform to or from byval.
+
     // If the parameter is passed as a byval argument, then we have to have a
     // sized type and the sized type has to have the same size as the old type.
     if (ParamTy != ActTy && CallerPAL.hasParamAttr(i, Attribute::ByVal)) {
diff --git a/llvm/test/Transforms/InstCombine/byval.ll b/llvm/test/Transforms/InstCombine/byval.ll
index e62bbe21c806..45750869524b 100644
--- a/llvm/test/Transforms/InstCombine/byval.ll
+++ b/llvm/test/Transforms/InstCombine/byval.ll
@@ -7,8 +7,7 @@ declare void @add_byval_callee_2(double* byval(double))
 
 define void @add_byval(i64* %in) {
 ; CHECK-LABEL: @add_byval(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64* [[IN:%.*]] to double*
-; CHECK-NEXT:    call void @add_byval_callee(double* byval(double) [[TMP1]])
+; CHECK-NEXT:    call void bitcast (void (double*)* @add_byval_callee to void (i64*)*)(i64* byval(i64) [[IN:%.*]])
 ; CHECK-NEXT:    ret void
 ;
   %tmp = bitcast void (double*)* @add_byval_callee to void (i64*)*
diff --git a/llvm/test/Transforms/InstCombine/cast-byval.ll b/llvm/test/Transforms/InstCombine/cast-byval.ll
new file mode 100644
index 000000000000..b3e3055837c2
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/cast-byval.ll
@@ -0,0 +1,31 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; Check that function calls involving conversion from/to byval aren't transformed.
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+
+%Foo = type { i64 }
+define i64 @foo (ptr byval(%Foo) %foo) {
+; CHECK-LABEL: @foo(
+; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr [[FOO:%.*]], align 4
+; CHECK-NEXT:    ret i64 [[TMP1]]
+;
+  %1 = load i64, ptr %foo, align 4
+  ret i64 %1
+}
+
+define i64 @bar(i64 %0) {
+; CHECK-LABEL: @bar(
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call i64 @foo(i64 [[TMP0:%.*]])
+; CHECK-NEXT:    ret i64 [[TMP2]]
+;
+  %2 = tail call i64 @foo(i64 %0)
+  ret i64 %2
+}
+
+define i64 @qux(ptr byval(%Foo) %qux) {
+; CHECK-LABEL: @qux(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @bar(ptr nonnull byval([[FOO:%.*]]) [[QUX:%.*]])
+; CHECK-NEXT:    ret i64 [[TMP1]]
+;
+  %1 = tail call i64 @bar(ptr byval(%Foo) %qux)
+  ret i64 %1
+}
-- 
Gitee


From 5388da13992c133de7ff510705564ae2d4b1a264 Mon Sep 17 00:00:00 2001
From: Tobias Hieta <tobias@hieta.se>
Date: Mon, 24 Oct 2022 13:45:36 +0200
Subject: [PATCH 04/41] Bump version to 15.0.4

---
 libcxx/include/__config                                     | 2 +-
 llvm/CMakeLists.txt                                         | 2 +-
 llvm/utils/gn/secondary/llvm/version.gni                    | 2 +-
 llvm/utils/lit/lit/__init__.py                              | 2 +-
 utils/bazel/llvm-project-overlay/clang/BUILD.bazel          | 6 +++---
 .../clang/include/clang/Config/config.h                     | 2 +-
 utils/bazel/llvm-project-overlay/lld/BUILD.bazel            | 2 +-
 .../llvm/include/llvm/Config/llvm-config.h                  | 4 ++--
 8 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/libcxx/include/__config b/libcxx/include/__config
index 589b5c3b2241..810189c94a94 100644
--- a/libcxx/include/__config
+++ b/libcxx/include/__config
@@ -36,7 +36,7 @@
 
 #ifdef __cplusplus
 
-#  define _LIBCPP_VERSION 15003
+#  define _LIBCPP_VERSION 15004
 
 #  define _LIBCPP_CONCAT_IMPL(_X, _Y) _X##_Y
 #  define _LIBCPP_CONCAT(_X, _Y) _LIBCPP_CONCAT_IMPL(_X, _Y)
diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt
index 1a45f2eff013..3d6f5e6f9d3d 100644
--- a/llvm/CMakeLists.txt
+++ b/llvm/CMakeLists.txt
@@ -22,7 +22,7 @@ if(NOT DEFINED LLVM_VERSION_MINOR)
   set(LLVM_VERSION_MINOR 0)
 endif()
 if(NOT DEFINED LLVM_VERSION_PATCH)
-  set(LLVM_VERSION_PATCH 3)
+  set(LLVM_VERSION_PATCH 4)
 endif()
 if(NOT DEFINED LLVM_VERSION_SUFFIX)
   set(LLVM_VERSION_SUFFIX)
diff --git a/llvm/utils/gn/secondary/llvm/version.gni b/llvm/utils/gn/secondary/llvm/version.gni
index 6d64da0180df..3b890e00bec3 100644
--- a/llvm/utils/gn/secondary/llvm/version.gni
+++ b/llvm/utils/gn/secondary/llvm/version.gni
@@ -1,4 +1,4 @@
 llvm_version_major = 15
 llvm_version_minor = 0
-llvm_version_patch = 3
+llvm_version_patch = 4
 llvm_version = "$llvm_version_major.$llvm_version_minor.$llvm_version_patch"
diff --git a/llvm/utils/lit/lit/__init__.py b/llvm/utils/lit/lit/__init__.py
index 5cb7ccdc67da..04f6e94535e4 100644
--- a/llvm/utils/lit/lit/__init__.py
+++ b/llvm/utils/lit/lit/__init__.py
@@ -2,7 +2,7 @@
 
 __author__ = 'Daniel Dunbar'
 __email__ = 'daniel@minormatter.com'
-__versioninfo__ = (15, 0, 3)
+__versioninfo__ = (15, 0, 4)
 __version__ = '.'.join(str(v) for v in __versioninfo__) + 'dev'
 
 __all__ = []
diff --git a/utils/bazel/llvm-project-overlay/clang/BUILD.bazel b/utils/bazel/llvm-project-overlay/clang/BUILD.bazel
index 6cb8889bf50f..96b462717be9 100644
--- a/utils/bazel/llvm-project-overlay/clang/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/clang/BUILD.bazel
@@ -358,11 +358,11 @@ genrule(
     name = "basic_version_gen",
     outs = ["include/clang/Basic/Version.inc"],
     cmd = (
-        "echo '#define CLANG_VERSION 15.0.3' >> $@\n" +
+        "echo '#define CLANG_VERSION 15.0.4' >> $@\n" +
         "echo '#define CLANG_VERSION_MAJOR 15' >> $@\n" +
         "echo '#define CLANG_VERSION_MINOR 0' >> $@\n" +
-        "echo '#define CLANG_VERSION_PATCHLEVEL 3' >> $@\n" +
-        "echo '#define CLANG_VERSION_STRING \"15.0.3\"' >> $@\n"
+        "echo '#define CLANG_VERSION_PATCHLEVEL 4' >> $@\n" +
+        "echo '#define CLANG_VERSION_STRING \"15.0.4\"' >> $@\n"
     ),
 )
 
diff --git a/utils/bazel/llvm-project-overlay/clang/include/clang/Config/config.h b/utils/bazel/llvm-project-overlay/clang/include/clang/Config/config.h
index cedeb0a2c951..5d157492eae3 100644
--- a/utils/bazel/llvm-project-overlay/clang/include/clang/Config/config.h
+++ b/utils/bazel/llvm-project-overlay/clang/include/clang/Config/config.h
@@ -93,7 +93,7 @@
 /* CLANG_HAVE_RLIMITS defined conditionally below */
 
 /* The LLVM product name and version */
-#define BACKEND_PACKAGE_STRING "LLVM 15.0.3"
+#define BACKEND_PACKAGE_STRING "LLVM 15.0.4"
 
 /* Linker version detected at compile time. */
 /* #undef HOST_LINK_VERSION */
diff --git a/utils/bazel/llvm-project-overlay/lld/BUILD.bazel b/utils/bazel/llvm-project-overlay/lld/BUILD.bazel
index 139298d1533f..e3a8c98ffa22 100644
--- a/utils/bazel/llvm-project-overlay/lld/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/lld/BUILD.bazel
@@ -13,7 +13,7 @@ package(
 genrule(
     name = "config_version_gen",
     outs = ["include/lld/Common/Version.inc"],
-    cmd = "echo '#define LLD_VERSION_STRING \"15.0.3\"' > $@",
+    cmd = "echo '#define LLD_VERSION_STRING \"15.0.4\"' > $@",
 )
 
 genrule(
diff --git a/utils/bazel/llvm-project-overlay/llvm/include/llvm/Config/llvm-config.h b/utils/bazel/llvm-project-overlay/llvm/include/llvm/Config/llvm-config.h
index 37bb4cf22a89..7a86202e8e0d 100644
--- a/utils/bazel/llvm-project-overlay/llvm/include/llvm/Config/llvm-config.h
+++ b/utils/bazel/llvm-project-overlay/llvm/include/llvm/Config/llvm-config.h
@@ -80,10 +80,10 @@
 #define LLVM_VERSION_MINOR 0
 
 /* Patch version of the LLVM API */
-#define LLVM_VERSION_PATCH 3
+#define LLVM_VERSION_PATCH 4
 
 /* LLVM version string */
-#define LLVM_VERSION_STRING "15.0.3"
+#define LLVM_VERSION_STRING "15.0.4"
 
 /* Whether LLVM records statistics for use with GetStatistics(),
  * PrintStatistics() or PrintStatisticsJSON()
-- 
Gitee


From 5834fe66318b55a54024fd93f69ab801a4c61d33 Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov@redhat.com>
Date: Fri, 21 Oct 2022 11:05:53 +0200
Subject: [PATCH 05/41] [AutoUpgrade] Fix remangling when upgrading struct
 return type

This was remangling the old function rather than the new one, and
could result in failures when we were performing both a struct
return upgrade and an opaque pointer upgrade.

(cherry picked from commit c8938809d155682ef5eec170897b8c26b8cbf3ea)
---
 llvm/lib/IR/AutoUpgrade.cpp                   |  2 +-
 .../opaque-ptr-intrinsic-remangling.ll        | 19 +++++++++++++++++++
 2 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index 75594f90c926..b9962da1d302 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -1040,7 +1040,7 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
                                Name, F->getParent());
 
       // The new function may also need remangling.
-      if (auto Result = llvm::Intrinsic::remangleIntrinsicFunction(F))
+      if (auto Result = llvm::Intrinsic::remangleIntrinsicFunction(NewFn))
         NewFn = *Result;
       return true;
     }
diff --git a/llvm/test/Assembler/opaque-ptr-intrinsic-remangling.ll b/llvm/test/Assembler/opaque-ptr-intrinsic-remangling.ll
index c885897f3155..04638b0ddac9 100644
--- a/llvm/test/Assembler/opaque-ptr-intrinsic-remangling.ll
+++ b/llvm/test/Assembler/opaque-ptr-intrinsic-remangling.ll
@@ -3,6 +3,8 @@
 
 ; Make sure that opaque pointer intrinsic remangling upgrade works.
 
+%int8x16x2_t = type { <16 x i8>, <16 x i8> }
+
 declare i32* @fake_personality_function()
 declare void @func()
 
@@ -43,5 +45,22 @@ define i8* @test_ptr_annotation(i8* %p) {
   ret i8* %p2
 }
 
+
+define void @test_struct_return(%int8x16x2_t* %res.p, i8* %a) {
+; CHECK-LABEL: @test_struct_return(
+; CHECK-NEXT:    [[TMP1:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x2.v16i8.p0(ptr [[A:%.*]])
+; CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[TMP1]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = insertvalue [[INT8X16X2_T:%.*]] poison, <16 x i8> [[TMP2]], 0
+; CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[TMP1]], 1
+; CHECK-NEXT:    [[TMP5:%.*]] = insertvalue [[INT8X16X2_T]] [[TMP3]], <16 x i8> [[TMP4]], 1
+; CHECK-NEXT:    store [[INT8X16X2_T]] [[TMP5]], ptr [[RES_P:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+  %res = call %int8x16x2_t @llvm.aarch64.neon.ld1x2.v16i8.p0i8(i8* %a)
+  store %int8x16x2_t %res, %int8x16x2_t* %res.p
+  ret void
+}
+
 declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
 declare i8* @llvm.ptr.annotation.p0i8(i8*, i8*, i8*, i32, i8*)
+declare %int8x16x2_t @llvm.aarch64.neon.ld1x2.v16i8.p0i8(i8*)
-- 
Gitee


From dccd0613025a6284e2a2296a56099b00a9dadcfe Mon Sep 17 00:00:00 2001
From: Fangrui Song <i@maskray.me>
Date: Fri, 21 Oct 2022 09:43:25 -0700
Subject: [PATCH 06/41] [ELF] Suppress "duplicate symbol" when resolving
 STB_WEAK and STB_GNU_UNIQUE in different COMDATs

```
template <typename T> struct A {
  A() {}
  int value = 0;
};

template <typename Value> struct B {
  static A<int> a;
};

template <typename Value> A<int> B<Value>::a;

inline int foo() {
  return B<int>::a.value;
}
```

```
clang++ -c -fno-pic a.cc -o weak.o
g++ -c -fno-pic a.cc -o unique.o  # --enable-gnu-unique-object

# Duplicate symbol error. In postParse, we do not check `sym.binding`
ld.lld -e 0 weak.o unique.o
```

Mixing GCC and Clang object files in this case is not ideal. .bss._ZGVN1BIiE1aE
has different COMDAT groups. It appears to work in practice because the guard
variable prevents harm due to double initialization.

For the linker, we just stick with the rule that a weak binding does not cause
"duplicate symbol" errors.

Close https://github.com/llvm/llvm-project/issues/58232

Differential Revision: https://reviews.llvm.org/D136381

(cherry picked from commit 0051b6bb78772b0658f28e5f31ddf91c1589aab5)
---
 lld/ELF/InputFiles.cpp         |  2 +-
 lld/test/ELF/comdat-binding2.s | 42 ++++++++++++++++++++++++++++++++++
 2 files changed, 43 insertions(+), 1 deletion(-)
 create mode 100644 lld/test/ELF/comdat-binding2.s

diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp
index 927dc272b532..473809b05e9c 100644
--- a/lld/ELF/InputFiles.cpp
+++ b/lld/ELF/InputFiles.cpp
@@ -1157,7 +1157,7 @@ template <class ELFT> void ObjFile<ELFT>::postParse() {
       continue;
     }
 
-    if (binding == STB_WEAK)
+    if (sym.binding == STB_WEAK || binding == STB_WEAK)
       continue;
     std::lock_guard<std::mutex> lock(mu);
     ctx->duplicates.push_back({&sym, this, sec, eSym.st_value});
diff --git a/lld/test/ELF/comdat-binding2.s b/lld/test/ELF/comdat-binding2.s
new file mode 100644
index 000000000000..3ffd7252836c
--- /dev/null
+++ b/lld/test/ELF/comdat-binding2.s
@@ -0,0 +1,42 @@
+# REQUIRES: x86
+## Test we don't report duplicate definition errors when mixing Clang STB_WEAK
+## and GCC STB_GNU_UNIQUE symbols.
+
+# RUN: rm -rf %t && split-file %s %t && cd %t
+# RUN: llvm-mc -filetype=obj -triple=x86_64 weak.s -o weak.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64 unique.s -o unique.o
+# RUN: ld.lld weak.o unique.o -o weak
+# RUN: llvm-readelf -s weak | FileCheck %s --check-prefix=WEAK
+# RUN: ld.lld unique.o weak.o -o unique
+# RUN: llvm-readelf -s unique | FileCheck %s --check-prefix=UNIQUE
+
+# WEAK:   OBJECT  WEAK   DEFAULT [[#]] _ZN1BIiE1aE
+# UNIQUE: OBJECT  UNIQUE DEFAULT [[#]] _ZN1BIiE1aE
+
+#--- weak.s
+## Clang
+	.type	_ZN1BIiE1aE,@object
+	.section	.bss._ZN1BIiE1aE,"aGwR",@nobits,_ZN1BIiE1aE,comdat
+	.weak	_ZN1BIiE1aE
+_ZN1BIiE1aE:
+	.zero	4
+
+	.type	_ZGVN1BIiE1aE,@object
+	.section	.bss._ZGVN1BIiE1aE,"aGw",@nobits,_ZN1BIiE1aE,comdat
+	.weak	_ZGVN1BIiE1aE
+_ZGVN1BIiE1aE:
+	.quad	0
+
+#--- unique.s
+## GCC -fgnu-unique. Note the different group signature for the second group.
+	.weak	_ZN1BIiE1aE
+	.section	.bss._ZN1BIiE1aE,"awG",@nobits,_ZN1BIiE1aE,comdat
+	.type	_ZN1BIiE1aE, @gnu_unique_object
+_ZN1BIiE1aE:
+	.zero	4
+
+	.weak	_ZGVN1BIiE1aE
+	.section	.bss._ZGVN1BIiE1aE,"awG",@nobits,_ZGVN1BIiE1aE,comdat
+	.type	_ZGVN1BIiE1aE, @gnu_unique_object
+_ZGVN1BIiE1aE:
+	.zero	8
-- 
Gitee


From 1e1c5204c25914951fbda55af5593056586e6b6f Mon Sep 17 00:00:00 2001
From: Tom Stellard <tstellar@redhat.com>
Date: Mon, 24 Oct 2022 14:48:32 -0700
Subject: [PATCH 07/41] [SystemZ] Relase notes for LLVM 15

Unfortunately these notes were compiled until now, but these are the release notes for SystemZ.

(Did not find anything under clang)

@tstellar Should I push this patch onto release/15.x once approved, or will you apply it?

Differential Revision: https://reviews.llvm.org/D134430
---
 llvm/docs/ReleaseNotes.rst | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst
index 9324d26cbdd8..0660bc134652 100644
--- a/llvm/docs/ReleaseNotes.rst
+++ b/llvm/docs/ReleaseNotes.rst
@@ -227,6 +227,20 @@ Changes to the WebAssembly Backend
 
 * ...
 
+Changes to the SystemZ Backend
+------------------------------
+
+* Support z16 processor name.
+* Machine scheduler description for z16.
+* Add support for inline assembly address operands ("p") as well as for SystemZ
+  specific address operands ("ZQ", "ZR", "ZS" and "ZT").
+* Efficient handling of small memcpy/memset operations up to 32 bytes.
+* Tuning of the inliner.
+* Fixing emission of library calls so that narrow integer arguments are sign or
+  zero extended per the SystemZ ABI.
+* Support added for libunwind.
+* Various minor improvements and bugfixes.
+
 Changes to the X86 Backend
 --------------------------
 
-- 
Gitee


From 2d5c43ad484482cd23794b8b056a9b13ee920369 Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere <jonas@devlieghere.com>
Date: Tue, 16 Aug 2022 17:53:34 -0700
Subject: [PATCH 08/41] [lldb]  Automatically unwrap parameter packs in
 template argument accessors

When looking at template arguments in LLDB, we usually care about what
the user passed in his code, not whether some of those arguments where
passed as a variadic parameter pack.

This patch extends all the C++ APIs to look at template parameters to
take an additional 'expand_pack' boolean that automatically unwraps the
potential argument packs. The equivalent SBAPI calls have been changed
to pass true for this parameter.

A byproduct of the patch is to also fix the support for template type
that have only a parameter pack as argument (like the OnlyPack type in
the test). Those were not recognized as template instanciations before.

The added test verifies that the SBAPI is able to iterate over the
arguments of a variadic template.

The original patch was written by Fred Riss almost 4 years ago.

Differential revision: https://reviews.llvm.org/D51387

(cherry picked from commit b706f56133a77f9d7c55270ac24ff59e6fce3fa4)
---
 lldb/include/lldb/API/SBType.h                |  2 +
 lldb/include/lldb/Symbol/CompilerType.h       | 24 ++++--
 lldb/include/lldb/Symbol/TypeSystem.h         | 14 ++--
 lldb/source/API/SBType.cpp                    | 12 ++-
 .../TypeSystem/Clang/TypeSystemClang.cpp      | 81 ++++++++++++++-----
 .../TypeSystem/Clang/TypeSystemClang.h        | 19 ++---
 lldb/source/Symbol/CompilerType.cpp           | 18 +++--
 lldb/source/Symbol/TypeSystem.cpp             | 12 +--
 .../TestTemplatePackArgs.py                   | 38 +++++++++
 .../class-template-parameter-pack/main.cpp    |  8 +-
 lldb/unittests/Symbol/TestTypeSystemClang.cpp | 30 ++++---
 11 files changed, 190 insertions(+), 68 deletions(-)
 create mode 100644 lldb/test/API/lang/cpp/class-template-parameter-pack/TestTemplatePackArgs.py

diff --git a/lldb/include/lldb/API/SBType.h b/lldb/include/lldb/API/SBType.h
index 244d328b51f4..aa45aeeec476 100644
--- a/lldb/include/lldb/API/SBType.h
+++ b/lldb/include/lldb/API/SBType.h
@@ -182,6 +182,8 @@ public:
 
   lldb::SBType GetTemplateArgumentType(uint32_t idx);
 
+  /// Return the TemplateArgumentKind of the template argument at index idx.
+  /// Variadic argument packs are automatically expanded.
   lldb::TemplateArgumentKind GetTemplateArgumentKind(uint32_t idx);
 
   lldb::SBType GetFunctionReturnType();
diff --git a/lldb/include/lldb/Symbol/CompilerType.h b/lldb/include/lldb/Symbol/CompilerType.h
index 0ad05a27570e..aefd19d0a859 100644
--- a/lldb/include/lldb/Symbol/CompilerType.h
+++ b/lldb/include/lldb/Symbol/CompilerType.h
@@ -338,14 +338,28 @@ public:
   GetIndexOfChildMemberWithName(const char *name, bool omit_empty_base_classes,
                                 std::vector<uint32_t> &child_indexes) const;
 
-  size_t GetNumTemplateArguments() const;
-
-  lldb::TemplateArgumentKind GetTemplateArgumentKind(size_t idx) const;
-  CompilerType GetTypeTemplateArgument(size_t idx) const;
+  /// Return the number of template arguments the type has.
+  /// If expand_pack is true, then variadic argument packs are automatically
+  /// expanded to their supplied arguments. If it is false an argument pack
+  /// will only count as 1 argument.
+  size_t GetNumTemplateArguments(bool expand_pack = false) const;
+
+  // Return the TemplateArgumentKind of the template argument at index idx.
+  // If expand_pack is true, then variadic argument packs are automatically
+  // expanded to their supplied arguments. With expand_pack set to false, an
+  // arguement pack will count as 1 argument and return a type of Pack.
+  lldb::TemplateArgumentKind
+  GetTemplateArgumentKind(size_t idx, bool expand_pack = false) const;
+  CompilerType GetTypeTemplateArgument(size_t idx,
+                                       bool expand_pack = false) const;
 
   /// Returns the value of the template argument and its type.
+  /// If expand_pack is true, then variadic argument packs are automatically
+  /// expanded to their supplied arguments. With expand_pack set to false, an
+  /// arguement pack will count as 1 argument and it is invalid to call this
+  /// method on the pack argument.
   llvm::Optional<IntegralTemplateArgument>
-  GetIntegralTemplateArgument(size_t idx) const;
+  GetIntegralTemplateArgument(size_t idx, bool expand_pack = false) const;
 
   CompilerType GetTypeForFormatters() const;
 
diff --git a/lldb/include/lldb/Symbol/TypeSystem.h b/lldb/include/lldb/Symbol/TypeSystem.h
index be5783596897..769449a4933b 100644
--- a/lldb/include/lldb/Symbol/TypeSystem.h
+++ b/lldb/include/lldb/Symbol/TypeSystem.h
@@ -346,14 +346,18 @@ public:
                                 const char *name, bool omit_empty_base_classes,
                                 std::vector<uint32_t> &child_indexes) = 0;
 
-  virtual size_t GetNumTemplateArguments(lldb::opaque_compiler_type_t type);
+  virtual size_t GetNumTemplateArguments(lldb::opaque_compiler_type_t type,
+                                         bool expand_pack);
 
   virtual lldb::TemplateArgumentKind
-  GetTemplateArgumentKind(lldb::opaque_compiler_type_t type, size_t idx);
-  virtual CompilerType GetTypeTemplateArgument(lldb::opaque_compiler_type_t type,
-                                           size_t idx);
+  GetTemplateArgumentKind(lldb::opaque_compiler_type_t type, size_t idx,
+                          bool expand_pack);
+  virtual CompilerType
+  GetTypeTemplateArgument(lldb::opaque_compiler_type_t type, size_t idx,
+                          bool expand_pack);
   virtual llvm::Optional<CompilerType::IntegralTemplateArgument>
-  GetIntegralTemplateArgument(lldb::opaque_compiler_type_t type, size_t idx);
+  GetIntegralTemplateArgument(lldb::opaque_compiler_type_t type, size_t idx,
+                              bool expand_pack);
 
   // Dumping types
 
diff --git a/lldb/source/API/SBType.cpp b/lldb/source/API/SBType.cpp
index 533930c0544b..adc60a084367 100644
--- a/lldb/source/API/SBType.cpp
+++ b/lldb/source/API/SBType.cpp
@@ -542,7 +542,8 @@ uint32_t SBType::GetNumberOfTemplateArguments() {
   LLDB_INSTRUMENT_VA(this);
 
   if (IsValid())
-    return m_opaque_sp->GetCompilerType(false).GetNumTemplateArguments();
+    return m_opaque_sp->GetCompilerType(false).GetNumTemplateArguments(
+        /*expand_pack=*/true);
   return 0;
 }
 
@@ -553,13 +554,15 @@ lldb::SBType SBType::GetTemplateArgumentType(uint32_t idx) {
     return SBType();
 
   CompilerType type;
+  const bool expand_pack = true;
   switch(GetTemplateArgumentKind(idx)) {
     case eTemplateArgumentKindType:
-      type = m_opaque_sp->GetCompilerType(false).GetTypeTemplateArgument(idx);
+      type = m_opaque_sp->GetCompilerType(false).GetTypeTemplateArgument(
+          idx, expand_pack);
       break;
     case eTemplateArgumentKindIntegral:
       type = m_opaque_sp->GetCompilerType(false)
-                 .GetIntegralTemplateArgument(idx)
+                 .GetIntegralTemplateArgument(idx, expand_pack)
                  ->type;
       break;
     default:
@@ -574,7 +577,8 @@ lldb::TemplateArgumentKind SBType::GetTemplateArgumentKind(uint32_t idx) {
   LLDB_INSTRUMENT_VA(this, idx);
 
   if (IsValid())
-    return m_opaque_sp->GetCompilerType(false).GetTemplateArgumentKind(idx);
+    return m_opaque_sp->GetCompilerType(false).GetTemplateArgumentKind(
+        idx, /*expand_pack=*/true);
   return eTemplateArgumentKindNull;
 }
 
diff --git a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp
index c6eb693bba6b..a1ebe5830bb9 100644
--- a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp
+++ b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp
@@ -7096,7 +7096,8 @@ TypeSystemClang::GetIndexOfChildWithName(lldb::opaque_compiler_type_t type,
 }
 
 size_t
-TypeSystemClang::GetNumTemplateArguments(lldb::opaque_compiler_type_t type) {
+TypeSystemClang::GetNumTemplateArguments(lldb::opaque_compiler_type_t type,
+                                         bool expand_pack) {
   if (!type)
     return 0;
 
@@ -7111,8 +7112,17 @@ TypeSystemClang::GetNumTemplateArguments(lldb::opaque_compiler_type_t type) {
         const clang::ClassTemplateSpecializationDecl *template_decl =
             llvm::dyn_cast<clang::ClassTemplateSpecializationDecl>(
                 cxx_record_decl);
-        if (template_decl)
-          return template_decl->getTemplateArgs().size();
+        if (template_decl) {
+          const auto &template_arg_list = template_decl->getTemplateArgs();
+          size_t num_args = template_arg_list.size();
+          assert(num_args && "template specialization without any args");
+          if (expand_pack && num_args) {
+            const auto &pack = template_arg_list[num_args - 1];
+            if (pack.getKind() == clang::TemplateArgument::Pack)
+              num_args += pack.pack_size() - 1;
+          }
+          return num_args;
+        }
       }
     }
     break;
@@ -7149,15 +7159,51 @@ TypeSystemClang::GetAsTemplateSpecialization(
   }
 }
 
+const TemplateArgument *
+GetNthTemplateArgument(const clang::ClassTemplateSpecializationDecl *decl,
+                       size_t idx, bool expand_pack) {
+  const auto &args = decl->getTemplateArgs();
+  const size_t args_size = args.size();
+
+  assert(args_size && "template specialization without any args");
+  if (!args_size)
+    return nullptr;
+
+  const size_t last_idx = args_size - 1;
+
+  // We're asked for a template argument that can't be a parameter pack, so
+  // return it without worrying about 'expand_pack'.
+  if (idx < last_idx)
+    return &args[idx];
+
+  // We're asked for the last template argument but we don't want/need to
+  // expand it.
+  if (!expand_pack || args[last_idx].getKind() != clang::TemplateArgument::Pack)
+    return idx >= args.size() ? nullptr : &args[idx];
+
+  // Index into the expanded pack.
+  // Note that 'idx' counts from the beginning of all template arguments
+  // (including the ones preceding the parameter pack).
+  const auto &pack = args[last_idx];
+  const size_t pack_idx = idx - last_idx;
+  const size_t pack_size = pack.pack_size();
+  assert(pack_idx < pack_size && "parameter pack index out-of-bounds");
+  return &pack.pack_elements()[pack_idx];
+}
+
 lldb::TemplateArgumentKind
 TypeSystemClang::GetTemplateArgumentKind(lldb::opaque_compiler_type_t type,
-                                         size_t arg_idx) {
+                                         size_t arg_idx, bool expand_pack) {
   const clang::ClassTemplateSpecializationDecl *template_decl =
       GetAsTemplateSpecialization(type);
-  if (! template_decl || arg_idx >= template_decl->getTemplateArgs().size())
+  if (!template_decl)
+    return eTemplateArgumentKindNull;
+
+  const auto *arg = GetNthTemplateArgument(template_decl, arg_idx, expand_pack);
+  if (!arg)
     return eTemplateArgumentKindNull;
 
-  switch (template_decl->getTemplateArgs()[arg_idx].getKind()) {
+  switch (arg->getKind()) {
   case clang::TemplateArgument::Null:
     return eTemplateArgumentKindNull;
 
@@ -7190,35 +7236,32 @@ TypeSystemClang::GetTemplateArgumentKind(lldb::opaque_compiler_type_t type,
 
 CompilerType
 TypeSystemClang::GetTypeTemplateArgument(lldb::opaque_compiler_type_t type,
-                                         size_t idx) {
+                                         size_t idx, bool expand_pack) {
   const clang::ClassTemplateSpecializationDecl *template_decl =
       GetAsTemplateSpecialization(type);
-  if (!template_decl || idx >= template_decl->getTemplateArgs().size())
+  if (!template_decl)
     return CompilerType();
 
-  const clang::TemplateArgument &template_arg =
-      template_decl->getTemplateArgs()[idx];
-  if (template_arg.getKind() != clang::TemplateArgument::Type)
+  const auto *arg = GetNthTemplateArgument(template_decl, idx, expand_pack);
+  if (!arg || arg->getKind() != clang::TemplateArgument::Type)
     return CompilerType();
 
-  return GetType(template_arg.getAsType());
+  return GetType(arg->getAsType());
 }
 
 Optional<CompilerType::IntegralTemplateArgument>
 TypeSystemClang::GetIntegralTemplateArgument(lldb::opaque_compiler_type_t type,
-                                             size_t idx) {
+                                             size_t idx, bool expand_pack) {
   const clang::ClassTemplateSpecializationDecl *template_decl =
       GetAsTemplateSpecialization(type);
-  if (! template_decl || idx >= template_decl->getTemplateArgs().size())
+  if (!template_decl)
     return llvm::None;
 
-  const clang::TemplateArgument &template_arg =
-      template_decl->getTemplateArgs()[idx];
-  if (template_arg.getKind() != clang::TemplateArgument::Integral)
+  const auto *arg = GetNthTemplateArgument(template_decl, idx, expand_pack);
+  if (!arg || arg->getKind() != clang::TemplateArgument::Integral)
     return llvm::None;
 
-  return {
-      {template_arg.getAsIntegral(), GetType(template_arg.getIntegralType())}};
+  return {{arg->getAsIntegral(), GetType(arg->getIntegralType())}};
 }
 
 CompilerType TypeSystemClang::GetTypeForFormatters(void *type) {
diff --git a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h
index 24dbb71c8f4d..7f25a6df548f 100644
--- a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h
+++ b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h
@@ -91,7 +91,7 @@ public:
   void SetOwningModule(OptionalClangModuleID id);
   /// \}
 };
-  
+
 /// A TypeSystem implementation based on Clang.
 ///
 /// This class uses a single clang::ASTContext as the backend for storing
@@ -334,7 +334,7 @@ public:
 
     llvm::SmallVector<const char *, 2> names;
     llvm::SmallVector<clang::TemplateArgument, 2> args;
-    
+
     const char * pack_name = nullptr;
     std::unique_ptr<TemplateParameterInfos> packed_args;
   };
@@ -537,7 +537,7 @@ public:
 #ifndef NDEBUG
   bool Verify(lldb::opaque_compiler_type_t type) override;
 #endif
-  
+
   bool IsArrayType(lldb::opaque_compiler_type_t type,
                    CompilerType *element_type, uint64_t *size,
                    bool *is_incomplete) override;
@@ -810,16 +810,17 @@ public:
                                 const char *name, bool omit_empty_base_classes,
                                 std::vector<uint32_t> &child_indexes) override;
 
-  size_t GetNumTemplateArguments(lldb::opaque_compiler_type_t type) override;
+  size_t GetNumTemplateArguments(lldb::opaque_compiler_type_t type,
+                                 bool expand_pack) override;
 
   lldb::TemplateArgumentKind
-  GetTemplateArgumentKind(lldb::opaque_compiler_type_t type,
-                          size_t idx) override;
+  GetTemplateArgumentKind(lldb::opaque_compiler_type_t type, size_t idx,
+                          bool expand_pack) override;
   CompilerType GetTypeTemplateArgument(lldb::opaque_compiler_type_t type,
-                                       size_t idx) override;
+                                       size_t idx, bool expand_pack) override;
   llvm::Optional<CompilerType::IntegralTemplateArgument>
-  GetIntegralTemplateArgument(lldb::opaque_compiler_type_t type,
-                              size_t idx) override;
+  GetIntegralTemplateArgument(lldb::opaque_compiler_type_t type, size_t idx,
+                              bool expand_pack) override;
 
   CompilerType GetTypeForFormatters(void *type) override;
 
diff --git a/lldb/source/Symbol/CompilerType.cpp b/lldb/source/Symbol/CompilerType.cpp
index ac98352c235e..bef456583687 100644
--- a/lldb/source/Symbol/CompilerType.cpp
+++ b/lldb/source/Symbol/CompilerType.cpp
@@ -659,30 +659,32 @@ size_t CompilerType::GetIndexOfChildMemberWithName(
   return 0;
 }
 
-size_t CompilerType::GetNumTemplateArguments() const {
+size_t CompilerType::GetNumTemplateArguments(bool expand_pack) const {
   if (IsValid()) {
-    return m_type_system->GetNumTemplateArguments(m_type);
+    return m_type_system->GetNumTemplateArguments(m_type, expand_pack);
   }
   return 0;
 }
 
-TemplateArgumentKind CompilerType::GetTemplateArgumentKind(size_t idx) const {
+TemplateArgumentKind
+CompilerType::GetTemplateArgumentKind(size_t idx, bool expand_pack) const {
   if (IsValid())
-    return m_type_system->GetTemplateArgumentKind(m_type, idx);
+    return m_type_system->GetTemplateArgumentKind(m_type, idx, expand_pack);
   return eTemplateArgumentKindNull;
 }
 
-CompilerType CompilerType::GetTypeTemplateArgument(size_t idx) const {
+CompilerType CompilerType::GetTypeTemplateArgument(size_t idx,
+                                                   bool expand_pack) const {
   if (IsValid()) {
-    return m_type_system->GetTypeTemplateArgument(m_type, idx);
+    return m_type_system->GetTypeTemplateArgument(m_type, idx, expand_pack);
   }
   return CompilerType();
 }
 
 llvm::Optional<CompilerType::IntegralTemplateArgument>
-CompilerType::GetIntegralTemplateArgument(size_t idx) const {
+CompilerType::GetIntegralTemplateArgument(size_t idx, bool expand_pack) const {
   if (IsValid())
-    return m_type_system->GetIntegralTemplateArgument(m_type, idx);
+    return m_type_system->GetIntegralTemplateArgument(m_type, idx, expand_pack);
   return llvm::None;
 }
 
diff --git a/lldb/source/Symbol/TypeSystem.cpp b/lldb/source/Symbol/TypeSystem.cpp
index 3092dc0bf0a4..412373533aab 100644
--- a/lldb/source/Symbol/TypeSystem.cpp
+++ b/lldb/source/Symbol/TypeSystem.cpp
@@ -118,23 +118,25 @@ CompilerType TypeSystem::GetTypeForFormatters(void *type) {
   return CompilerType(this, type);
 }
 
-size_t TypeSystem::GetNumTemplateArguments(lldb::opaque_compiler_type_t type) {
+size_t TypeSystem::GetNumTemplateArguments(lldb::opaque_compiler_type_t type,
+                                           bool expand_pack) {
   return 0;
 }
 
 TemplateArgumentKind
-TypeSystem::GetTemplateArgumentKind(opaque_compiler_type_t type, size_t idx) {
+TypeSystem::GetTemplateArgumentKind(opaque_compiler_type_t type, size_t idx,
+                                    bool expand_pack) {
   return eTemplateArgumentKindNull;
 }
 
 CompilerType TypeSystem::GetTypeTemplateArgument(opaque_compiler_type_t type,
-                                                 size_t idx) {
+                                                 size_t idx, bool expand_pack) {
   return CompilerType();
 }
 
 llvm::Optional<CompilerType::IntegralTemplateArgument>
-TypeSystem::GetIntegralTemplateArgument(opaque_compiler_type_t type,
-                                        size_t idx) {
+TypeSystem::GetIntegralTemplateArgument(opaque_compiler_type_t type, size_t idx,
+                                        bool expand_pack) {
   return llvm::None;
 }
 
diff --git a/lldb/test/API/lang/cpp/class-template-parameter-pack/TestTemplatePackArgs.py b/lldb/test/API/lang/cpp/class-template-parameter-pack/TestTemplatePackArgs.py
new file mode 100644
index 000000000000..180d3f503c4f
--- /dev/null
+++ b/lldb/test/API/lang/cpp/class-template-parameter-pack/TestTemplatePackArgs.py
@@ -0,0 +1,38 @@
+"""
+Test that the type of arguments to C++ template classes that have variadic
+parameters can be enumerated.
+"""
+import lldb
+from lldbsuite.test.decorators import *
+from lldbsuite.test.lldbtest import *
+from lldbsuite.test import lldbutil
+
+
+class TemplatePackArgsTestCase(TestBase):
+
+    mydir = TestBase.compute_mydir(__file__)
+
+    def test_template_argument_pack(self):
+        self.build()
+        (_, _, thread, _) = lldbutil.run_to_source_breakpoint(self,
+          'breakpoint here', lldb.SBFileSpec('main.cpp'), exe_name = 'a.out')
+        frame = thread.GetSelectedFrame()
+
+        empty_pack = frame.FindVariable('emptyPack')
+        self.assertTrue(empty_pack.IsValid(),
+                        'make sure we find the emptyPack variable')
+
+        only_pack = frame.FindVariable('onlyPack')
+        self.assertTrue(only_pack.IsValid(),
+                        'make sure we find the onlyPack variable')
+        self.assertEqual(only_pack.GetType().GetNumberOfTemplateArguments(), 4)
+        self.assertEqual(only_pack.GetType().GetTemplateArgumentType(0).GetName(), 'int')
+        self.assertEqual(only_pack.GetType().GetTemplateArgumentType(1).GetName(), 'char')
+        self.assertEqual(only_pack.GetType().GetTemplateArgumentType(2).GetName(), 'double')
+        # Access the C<double, 42> template parameter.
+        nested_template = only_pack.GetType().GetTemplateArgumentType(3)
+        self.assertEqual(nested_template.GetName(), 'D<int, int, bool>')
+        self.assertEqual(nested_template.GetNumberOfTemplateArguments(), 3)
+        self.assertEqual(nested_template.GetTemplateArgumentType(0).GetName(), 'int')
+        self.assertEqual(nested_template.GetTemplateArgumentType(1).GetName(), 'int')
+        self.assertEqual(nested_template.GetTemplateArgumentType(2).GetName(), 'bool')
diff --git a/lldb/test/API/lang/cpp/class-template-parameter-pack/main.cpp b/lldb/test/API/lang/cpp/class-template-parameter-pack/main.cpp
index 8bb0a42b58a3..26f8eab545c8 100644
--- a/lldb/test/API/lang/cpp/class-template-parameter-pack/main.cpp
+++ b/lldb/test/API/lang/cpp/class-template-parameter-pack/main.cpp
@@ -26,7 +26,13 @@ template <> struct D<int, int, bool> : D<int, int> {
   bool argsAre_Int_bool() { return true; }
 };
 
+template <typename... Args> struct OnlyPack {};
+template <typename T, typename... Args> struct EmptyPack {};
+
 int main(int argc, char const *argv[]) {
+  EmptyPack<int> emptyPack;
+  OnlyPack<int, char, double, D<int, int, bool>> onlyPack;
+
   C<int, 16, 32> myC;
   C<int, 16> myLesserC;
   myC.member = 64;
@@ -34,7 +40,7 @@ int main(int argc, char const *argv[]) {
   (void)C<int, 16>().argsAre_16_32();
   (void)(myC.member != 64);
   D<int, int, bool> myD;
-  D<int, int> myLesserD;
+  D<int, int> myLesserD; // breakpoint here
   myD.member = 64;
   (void)D<int, int, bool>().argsAre_Int_bool();
   (void)D<int, int>().argsAre_Int_bool();
diff --git a/lldb/unittests/Symbol/TestTypeSystemClang.cpp b/lldb/unittests/Symbol/TestTypeSystemClang.cpp
index e78a084f59f0..4da17cf3610c 100644
--- a/lldb/unittests/Symbol/TestTypeSystemClang.cpp
+++ b/lldb/unittests/Symbol/TestTypeSystemClang.cpp
@@ -500,18 +500,24 @@ TEST_F(TestTypeSystemClang, TemplateArguments) {
   for (CompilerType t : {type, typedef_type, auto_type}) {
     SCOPED_TRACE(t.GetTypeName().AsCString());
 
-    EXPECT_EQ(m_ast->GetTemplateArgumentKind(t.GetOpaqueQualType(), 0),
-              eTemplateArgumentKindType);
-    EXPECT_EQ(m_ast->GetTypeTemplateArgument(t.GetOpaqueQualType(), 0),
-              int_type);
-    EXPECT_EQ(llvm::None,
-              m_ast->GetIntegralTemplateArgument(t.GetOpaqueQualType(), 0));
-
-    EXPECT_EQ(m_ast->GetTemplateArgumentKind(t.GetOpaqueQualType(), 1),
-              eTemplateArgumentKindIntegral);
-    EXPECT_EQ(m_ast->GetTypeTemplateArgument(t.GetOpaqueQualType(), 1),
-              CompilerType());
-    auto result = m_ast->GetIntegralTemplateArgument(t.GetOpaqueQualType(), 1);
+    const bool expand_pack = false;
+    EXPECT_EQ(
+        m_ast->GetTemplateArgumentKind(t.GetOpaqueQualType(), 0, expand_pack),
+        eTemplateArgumentKindType);
+    EXPECT_EQ(
+        m_ast->GetTypeTemplateArgument(t.GetOpaqueQualType(), 0, expand_pack),
+        int_type);
+    EXPECT_EQ(llvm::None, m_ast->GetIntegralTemplateArgument(
+                              t.GetOpaqueQualType(), 0, expand_pack));
+
+    EXPECT_EQ(
+        m_ast->GetTemplateArgumentKind(t.GetOpaqueQualType(), 1, expand_pack),
+        eTemplateArgumentKindIntegral);
+    EXPECT_EQ(
+        m_ast->GetTypeTemplateArgument(t.GetOpaqueQualType(), 1, expand_pack),
+        CompilerType());
+    auto result = m_ast->GetIntegralTemplateArgument(t.GetOpaqueQualType(), 1,
+                                                     expand_pack);
     ASSERT_NE(llvm::None, result);
     EXPECT_EQ(arg, result->value);
     EXPECT_EQ(int_type, result->type);
-- 
Gitee


From db68723804fd30d5e7da1fb2ad2aab409ef58d29 Mon Sep 17 00:00:00 2001
From: Tom Praschan <13141438+tom-anders@users.noreply.github.com>
Date: Sun, 18 Sep 2022 18:48:11 +0200
Subject: [PATCH 09/41] [clangd] Return earlier when snippet is empty

Fixes github.com/clangd/clangd/issues/1216

If the Snippet string is empty, Snippet.front() would trigger a crash.
Move the Snippet->empty() check up a few lines to avoid this. Should not
break any existing behavior.

Differential Revision: https://reviews.llvm.org/D134137

(cherry picked from commit 60528c690a4c334d2a3a2c22eb97af9e67d7a91d)
---
 clang-tools-extra/clangd/CodeComplete.cpp       |  5 +++--
 .../clangd/unittests/CodeCompleteTests.cpp      | 17 +++++++++++++++++
 2 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/clang-tools-extra/clangd/CodeComplete.cpp b/clang-tools-extra/clangd/CodeComplete.cpp
index edbb1722ae38..55982f41250f 100644
--- a/clang-tools-extra/clangd/CodeComplete.cpp
+++ b/clang-tools-extra/clangd/CodeComplete.cpp
@@ -486,6 +486,9 @@ private:
       // we need to complete 'forward<$1>($0)'.
       return "($0)";
 
+    if (Snippet->empty())
+      return "";
+
     bool MayHaveArgList = Completion.Kind == CompletionItemKind::Function ||
                           Completion.Kind == CompletionItemKind::Method ||
                           Completion.Kind == CompletionItemKind::Constructor ||
@@ -524,8 +527,6 @@ private:
       return *Snippet;
 
     // Replace argument snippets with a simplified pattern.
-    if (Snippet->empty())
-      return "";
     if (MayHaveArgList) {
       // Functions snippets can be of 2 types:
       // - containing only function arguments, e.g.
diff --git a/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp b/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp
index 5050ab203b8d..079a4ec70623 100644
--- a/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp
+++ b/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp
@@ -1014,6 +1014,23 @@ TEST(CodeCompleteTest, NoColonColonAtTheEnd) {
   EXPECT_THAT(Results.Completions, Not(Contains(labeled("clang::"))));
 }
 
+TEST(CompletionTests, EmptySnippetDoesNotCrash) {
+    // See https://github.com/clangd/clangd/issues/1216
+    auto Results = completions(R"cpp(
+        int main() {
+          auto w = [&](auto &&f) { return f(f); };
+          auto f = w([&](auto &&f) {
+            return [&](auto &&n) {
+              if (n == 0) {
+                return 1;
+              }
+              return n * ^(f)(n - 1);
+            };
+          })(10);
+        }
+    )cpp");
+}
+
 TEST(CompletionTest, BacktrackCrashes) {
   // Sema calls code completion callbacks twice in these cases.
   auto Results = completions(R"cpp(
-- 
Gitee


From 3010b7e000006f680b6bf3141be988291fa1da41 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbaeder@redhat.com>
Date: Fri, 21 Oct 2022 11:36:37 +0200
Subject: [PATCH 10/41] [clang][driver] Remove dynamic gcc-toolset/devtoolset
 logic

This breaks when the newest available devtoolset directory is not a
complete toolset: https://github.com/llvm/llvm-project/issues/57843

Remove this again in favor or just adding the two new directories for
devtoolset/gcc-toolset 12.

This reverts commit 35aaf548237a4f213ba9d95de53b33c5ce1eadce.
This reverts commit 9f97720268911abae2ad9d90e270358db234a1c1.

Fixes https://github.com/llvm/llvm-project/issues/57843

Differential Revision: https://reviews.llvm.org/D136435
---
 clang/lib/Driver/ToolChains/Gnu.cpp      | 40 ++++-------
 clang/unittests/Driver/ToolChainTest.cpp | 92 ------------------------
 2 files changed, 15 insertions(+), 117 deletions(-)

diff --git a/clang/lib/Driver/ToolChains/Gnu.cpp b/clang/lib/Driver/ToolChains/Gnu.cpp
index f203cae1d329..665cdc3132fb 100644
--- a/clang/lib/Driver/ToolChains/Gnu.cpp
+++ b/clang/lib/Driver/ToolChains/Gnu.cpp
@@ -2139,31 +2139,21 @@ void Generic_GCC::GCCInstallationDetector::AddDefaultGCCPrefixes(
   // and gcc-toolsets.
   if (SysRoot.empty() && TargetTriple.getOS() == llvm::Triple::Linux &&
       D.getVFS().exists("/opt/rh")) {
-    // Find the directory in /opt/rh/ starting with gcc-toolset-* or
-    // devtoolset-* with the highest version number and add that
-    // one to our prefixes.
-    std::string ChosenToolsetDir;
-    unsigned ChosenToolsetVersion = 0;
-    std::error_code EC;
-    for (llvm::vfs::directory_iterator LI = D.getVFS().dir_begin("/opt/rh", EC),
-                                       LE;
-         !EC && LI != LE; LI = LI.increment(EC)) {
-      StringRef ToolsetDir = llvm::sys::path::filename(LI->path());
-      unsigned ToolsetVersion;
-      if ((!ToolsetDir.startswith("gcc-toolset-") &&
-           !ToolsetDir.startswith("devtoolset-")) ||
-          ToolsetDir.substr(ToolsetDir.rfind('-') + 1)
-              .getAsInteger(10, ToolsetVersion))
-        continue;
-
-      if (ToolsetVersion > ChosenToolsetVersion) {
-        ChosenToolsetVersion = ToolsetVersion;
-        ChosenToolsetDir = "/opt/rh/" + ToolsetDir.str();
-      }
-    }
-
-    if (ChosenToolsetVersion > 0)
-      Prefixes.push_back(ChosenToolsetDir + "/root/usr");
+    // TODO: We may want to remove this, since the functionality
+    //   can be achieved using config files.
+    Prefixes.push_back("/opt/rh/gcc-toolset-12/root/usr");
+    Prefixes.push_back("/opt/rh/gcc-toolset-11/root/usr");
+    Prefixes.push_back("/opt/rh/gcc-toolset-10/root/usr");
+    Prefixes.push_back("/opt/rh/devtoolset-12/root/usr");
+    Prefixes.push_back("/opt/rh/devtoolset-11/root/usr");
+    Prefixes.push_back("/opt/rh/devtoolset-10/root/usr");
+    Prefixes.push_back("/opt/rh/devtoolset-9/root/usr");
+    Prefixes.push_back("/opt/rh/devtoolset-8/root/usr");
+    Prefixes.push_back("/opt/rh/devtoolset-7/root/usr");
+    Prefixes.push_back("/opt/rh/devtoolset-6/root/usr");
+    Prefixes.push_back("/opt/rh/devtoolset-4/root/usr");
+    Prefixes.push_back("/opt/rh/devtoolset-3/root/usr");
+    Prefixes.push_back("/opt/rh/devtoolset-2/root/usr");
   }
 
   // Fall back to /usr which is used by most non-Solaris systems.
diff --git a/clang/unittests/Driver/ToolChainTest.cpp b/clang/unittests/Driver/ToolChainTest.cpp
index 64bc616523f0..c434dfcb3e86 100644
--- a/clang/unittests/Driver/ToolChainTest.cpp
+++ b/clang/unittests/Driver/ToolChainTest.cpp
@@ -18,7 +18,6 @@
 #include "clang/Driver/Driver.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/MC/TargetRegistry.h"
-#include "llvm/Support/Host.h"
 #include "llvm/Support/TargetSelect.h"
 #include "llvm/Support/VirtualFileSystem.h"
 #include "llvm/Support/raw_ostream.h"
@@ -570,95 +569,4 @@ TEST(DxcModeTest, ValidatorVersionValidation) {
   Diags.Clear();
   DiagConsumer->clear();
 }
-
-TEST(ToolChainTest, Toolsets) {
-  // Ignore this test on Windows hosts.
-  llvm::Triple Host(llvm::sys::getProcessTriple());
-  if (Host.isOSWindows())
-    GTEST_SKIP();
-
-  IntrusiveRefCntPtr<DiagnosticOptions> DiagOpts = new DiagnosticOptions();
-  IntrusiveRefCntPtr<DiagnosticIDs> DiagID(new DiagnosticIDs());
-
-  // Check (newer) GCC toolset installation.
-  {
-    IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> InMemoryFileSystem(
-        new llvm::vfs::InMemoryFileSystem);
-
-    // These should be ignored.
-    InMemoryFileSystem->addFile("/opt/rh/gcc-toolset-2", 0,
-                                llvm::MemoryBuffer::getMemBuffer("\n"));
-    InMemoryFileSystem->addFile("/opt/rh/gcc-toolset-", 0,
-                                llvm::MemoryBuffer::getMemBuffer("\n"));
-    InMemoryFileSystem->addFile("/opt/rh/gcc-toolset--", 0,
-                                llvm::MemoryBuffer::getMemBuffer("\n"));
-    InMemoryFileSystem->addFile("/opt/rh/gcc-toolset--1", 0,
-                                llvm::MemoryBuffer::getMemBuffer("\n"));
-
-    // File needed for GCC installation detection.
-    InMemoryFileSystem->addFile("/opt/rh/gcc-toolset-12/root/usr/lib/gcc/"
-                                "x86_64-redhat-linux/11/crtbegin.o",
-                                0, llvm::MemoryBuffer::getMemBuffer("\n"));
-
-    DiagnosticsEngine Diags(DiagID, &*DiagOpts, new SimpleDiagnosticConsumer);
-    Driver TheDriver("/bin/clang", "x86_64-redhat-linux", Diags,
-                     "clang LLVM compiler", InMemoryFileSystem);
-    std::unique_ptr<Compilation> C(
-        TheDriver.BuildCompilation({"clang", "--gcc-toolchain="}));
-    ASSERT_TRUE(C);
-    std::string S;
-    {
-      llvm::raw_string_ostream OS(S);
-      C->getDefaultToolChain().printVerboseInfo(OS);
-    }
-    EXPECT_EQ("Found candidate GCC installation: "
-              "/opt/rh/gcc-toolset-12/root/usr/lib/gcc/x86_64-redhat-linux/11\n"
-              "Selected GCC installation: "
-              "/opt/rh/gcc-toolset-12/root/usr/lib/gcc/x86_64-redhat-linux/11\n"
-              "Candidate multilib: .;@m64\n"
-              "Selected multilib: .;@m64\n",
-              S);
-  }
-
-  // And older devtoolset.
-  {
-    IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> InMemoryFileSystem(
-        new llvm::vfs::InMemoryFileSystem);
-
-    // These should be ignored.
-    InMemoryFileSystem->addFile("/opt/rh/devtoolset-2", 0,
-                                llvm::MemoryBuffer::getMemBuffer("\n"));
-    InMemoryFileSystem->addFile("/opt/rh/devtoolset-", 0,
-                                llvm::MemoryBuffer::getMemBuffer("\n"));
-    InMemoryFileSystem->addFile("/opt/rh/devtoolset--", 0,
-                                llvm::MemoryBuffer::getMemBuffer("\n"));
-    InMemoryFileSystem->addFile("/opt/rh/devtoolset--1", 0,
-                                llvm::MemoryBuffer::getMemBuffer("\n"));
-
-    // File needed for GCC installation detection.
-    InMemoryFileSystem->addFile("/opt/rh/devtoolset-12/root/usr/lib/gcc/"
-                                "x86_64-redhat-linux/11/crtbegin.o",
-                                0, llvm::MemoryBuffer::getMemBuffer("\n"));
-
-    DiagnosticsEngine Diags(DiagID, &*DiagOpts, new SimpleDiagnosticConsumer);
-    Driver TheDriver("/bin/clang", "x86_64-redhat-linux", Diags,
-                     "clang LLVM compiler", InMemoryFileSystem);
-    std::unique_ptr<Compilation> C(
-        TheDriver.BuildCompilation({"clang", "--gcc-toolchain="}));
-    ASSERT_TRUE(C);
-    std::string S;
-    {
-      llvm::raw_string_ostream OS(S);
-      C->getDefaultToolChain().printVerboseInfo(OS);
-    }
-    EXPECT_EQ("Found candidate GCC installation: "
-              "/opt/rh/devtoolset-12/root/usr/lib/gcc/x86_64-redhat-linux/11\n"
-              "Selected GCC installation: "
-              "/opt/rh/devtoolset-12/root/usr/lib/gcc/x86_64-redhat-linux/11\n"
-              "Candidate multilib: .;@m64\n"
-              "Selected multilib: .;@m64\n",
-              S);
-  }
-}
-
 } // end anonymous namespace.
-- 
Gitee


From dd711a9391226189476f23f793fb98e244d52a4b Mon Sep 17 00:00:00 2001
From: Jez Ng <jezng@fb.com>
Date: Tue, 11 Oct 2022 23:50:46 -0400
Subject: [PATCH 11/41] [lld-macho] Canonicalize personality pointers in EH
 frames

We already do this for personality pointers referenced from compact
unwind entries; this patch extends that behavior to personalities
referenced via EH frames as well.

This reduces the number of distinct personalities we need in the final
binary, and helps us avoid hitting the "too many personalities" error.

I renamed `UnwindInfoSection::prepareRelocations()` to simply `prepare`
since we now do some non-reloc-specific stuff within.

Fixes #58277.

Reviewed By: #lld-macho, oontvoo

Differential Revision: https://reviews.llvm.org/D135728

(cherry picked from commit 7b45dfc6811a52ff4e9a6054dc276d70d77fddaf)
---
 lld/MachO/UnwindInfoSection.cpp             | 35 ++++++++++++++---
 lld/MachO/UnwindInfoSection.h               |  2 +-
 lld/MachO/Writer.cpp                        |  2 +-
 lld/test/MachO/eh-frame-personality-dedup.s | 43 +++++++++++++++++++++
 4 files changed, 75 insertions(+), 7 deletions(-)
 create mode 100644 lld/test/MachO/eh-frame-personality-dedup.s

diff --git a/lld/MachO/UnwindInfoSection.cpp b/lld/MachO/UnwindInfoSection.cpp
index ca6cbdfbb8bb..8f267251b7c0 100644
--- a/lld/MachO/UnwindInfoSection.cpp
+++ b/lld/MachO/UnwindInfoSection.cpp
@@ -158,7 +158,7 @@ class UnwindInfoSectionImpl final : public UnwindInfoSection {
 public:
   UnwindInfoSectionImpl() : cuOffsets(target->wordSize) {}
   uint64_t getSize() const override { return unwindInfoSize; }
-  void prepareRelocations() override;
+  void prepare() override;
   void finalize() override;
   void writeTo(uint8_t *buf) const override;
 
@@ -166,6 +166,7 @@ private:
   void prepareRelocations(ConcatInputSection *);
   void relocateCompactUnwind(std::vector<CompactUnwindEntry> &);
   void encodePersonalities();
+  Symbol *canonicalizePersonality(Symbol *);
 
   uint64_t unwindInfoSize = 0;
   std::vector<decltype(symbols)::value_type> symbolsVec;
@@ -218,14 +219,24 @@ void UnwindInfoSection::addSymbol(const Defined *d) {
   }
 }
 
-void UnwindInfoSectionImpl::prepareRelocations() {
+void UnwindInfoSectionImpl::prepare() {
   // This iteration needs to be deterministic, since prepareRelocations may add
   // entries to the GOT. Hence the use of a MapVector for
   // UnwindInfoSection::symbols.
   for (const Defined *d : make_second_range(symbols))
-    if (d->unwindEntry &&
-        d->unwindEntry->getName() == section_names::compactUnwind)
-      prepareRelocations(d->unwindEntry);
+    if (d->unwindEntry) {
+      if (d->unwindEntry->getName() == section_names::compactUnwind) {
+        prepareRelocations(d->unwindEntry);
+      } else {
+        // We don't have to add entries to the GOT here because FDEs have
+        // explicit GOT relocations, so Writer::scanRelocations() will add those
+        // GOT entries. However, we still need to canonicalize the personality
+        // pointers (like prepareRelocations() does for CU entries) in order
+        // to avoid overflowing the 3-personality limit.
+        FDE &fde = cast<ObjFile>(d->getFile())->fdes[d->unwindEntry];
+        fde.personality = canonicalizePersonality(fde.personality);
+      }
+    }
 }
 
 // Compact unwind relocations have different semantics, so we handle them in a
@@ -279,6 +290,7 @@ void UnwindInfoSectionImpl::prepareRelocations(ConcatInputSection *isec) {
           continue;
       }
 
+      // Similar to canonicalizePersonality(), but we also register a GOT entry.
       if (auto *defined = dyn_cast<Defined>(s)) {
         // Check if we have created a synthetic symbol at the same address.
         Symbol *&personality =
@@ -291,6 +303,7 @@ void UnwindInfoSectionImpl::prepareRelocations(ConcatInputSection *isec) {
         }
         continue;
       }
+
       assert(isa<DylibSymbol>(s));
       in.got->addEntry(s);
       continue;
@@ -320,6 +333,18 @@ void UnwindInfoSectionImpl::prepareRelocations(ConcatInputSection *isec) {
   }
 }
 
+Symbol *UnwindInfoSectionImpl::canonicalizePersonality(Symbol *personality) {
+  if (auto *defined = dyn_cast_or_null<Defined>(personality)) {
+    // Check if we have created a synthetic symbol at the same address.
+    Symbol *&synth = personalityTable[{defined->isec, defined->value}];
+    if (synth == nullptr)
+      synth = defined;
+    else if (synth != defined)
+      return synth;
+  }
+  return personality;
+}
+
 // We need to apply the relocations to the pre-link compact unwind section
 // before converting it to post-link form. There should only be absolute
 // relocations here: since we are not emitting the pre-link CU section, there
diff --git a/lld/MachO/UnwindInfoSection.h b/lld/MachO/UnwindInfoSection.h
index c6b334731c75..f2bc3213a127 100644
--- a/lld/MachO/UnwindInfoSection.h
+++ b/lld/MachO/UnwindInfoSection.h
@@ -24,7 +24,7 @@ public:
   // section entirely.
   bool isNeeded() const override { return !allEntriesAreOmitted; }
   void addSymbol(const Defined *);
-  virtual void prepareRelocations() = 0;
+  virtual void prepare() = 0;
 
 protected:
   UnwindInfoSection();
diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp
index 3c44a60f4be2..ce9672dd0b4f 100644
--- a/lld/MachO/Writer.cpp
+++ b/lld/MachO/Writer.cpp
@@ -675,7 +675,7 @@ void Writer::scanRelocations() {
     }
   }
 
-  in.unwindInfo->prepareRelocations();
+  in.unwindInfo->prepare();
 }
 
 void Writer::scanSymbols() {
diff --git a/lld/test/MachO/eh-frame-personality-dedup.s b/lld/test/MachO/eh-frame-personality-dedup.s
new file mode 100644
index 000000000000..b14ddb23465d
--- /dev/null
+++ b/lld/test/MachO/eh-frame-personality-dedup.s
@@ -0,0 +1,43 @@
+# REQUIRES: x86
+# RUN: rm -rf %t; split-file %s %t
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin19.0.0 %t/eh-frame.s -o %t/eh-frame.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin19.0.0 %t/cu.s -o %t/cu.o
+# RUN: %lld -dylib %t/cu.o %t/eh-frame.o -o %t/out
+
+## Sanity check: we want our input to contain a section (and not symbol)
+## relocation for the personality reference.
+# RUN: llvm-readobj --relocations %t/cu.o | FileCheck %s --check-prefix=SECT-RELOC
+# SECT-RELOC:      Section __compact_unwind {
+# SECT-RELOC-NEXT:   __text
+# SECT-RELOC-NEXT:   __text
+# SECT-RELOC-NEXT: }
+
+## Verify that the personality referenced via a symbol reloc in eh-frame.s gets
+## dedup'ed with the personality referenced via a section reloc in cu.s.
+# RUN: llvm-objdump --macho --unwind-info %t/out | FileCheck %s
+# CHECK: Personality functions: (count = 1)
+
+#--- eh-frame.s
+_fun:
+  .cfi_startproc
+  .cfi_personality 155, _my_personality
+  ## cfi_escape cannot be encoded in compact unwind
+  .cfi_escape 0
+  ret
+  .cfi_endproc
+
+.subsections_via_symbols
+
+#--- cu.s
+.globl _my_personality
+_fun:
+  .cfi_startproc
+  .cfi_personality 155, _my_personality
+  .cfi_def_cfa_offset 16
+  ret
+  .cfi_endproc
+
+_my_personality:
+  nop
+
+.subsections_via_symbols
-- 
Gitee


From 9d46557baa84ab4342707c119cf9c77c9148230e Mon Sep 17 00:00:00 2001
From: Guillaume Chatelet <gchatelet@google.com>
Date: Wed, 26 Oct 2022 09:34:34 +0000
Subject: [PATCH 12/41] Take memset_inline into account in
 analyzeLoadFromClobberingMemInst

This appeared in https://reviews.llvm.org/D126903#3884061

Differential Revision: https://reviews.llvm.org/D136752

(cherry picked from commit 1a726cfa83667585dd87a9955ed5e331cad45d18)
---
 llvm/lib/Transforms/Utils/VNCoercion.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/VNCoercion.cpp b/llvm/lib/Transforms/Utils/VNCoercion.cpp
index 42be67f3cfc0..264da2187754 100644
--- a/llvm/lib/Transforms/Utils/VNCoercion.cpp
+++ b/llvm/lib/Transforms/Utils/VNCoercion.cpp
@@ -356,9 +356,9 @@ int analyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr,
 
   // If this is memset, we just need to see if the offset is valid in the size
   // of the memset..
-  if (MI->getIntrinsicID() == Intrinsic::memset) {
+  if (const auto *memset_inst = dyn_cast<MemSetInst>(MI)) {
     if (DL.isNonIntegralPointerType(LoadTy->getScalarType())) {
-      auto *CI = dyn_cast<ConstantInt>(cast<MemSetInst>(MI)->getValue());
+      auto *CI = dyn_cast<ConstantInt>(memset_inst->getValue());
       if (!CI || !CI->isZero())
         return -1;
     }
-- 
Gitee


From 08bd84e8a6358eb412fcef279f8875e2d69a3374 Mon Sep 17 00:00:00 2001
From: Koakuma <koachan@protonmail.com>
Date: Tue, 18 Oct 2022 00:01:55 +0000
Subject: [PATCH 13/41] [SPARC] Make calls to function with big return values
 work

Implement CanLowerReturn and associated CallingConv changes for SPARC/SPARC64.

In particular, for SPARC64 there's new `RetCC_Sparc64_*` functions that handles the return case of the calling convention.
It uses the same analysis as `CC_Sparc64_*` family of funtions, but fails if the return value doesn't fit into the return registers.

This makes calls to functions with big return values converted to an sret function as expected, instead of crashing LLVM.

Reviewed By: MaskRay

Differential Revision: https://reviews.llvm.org/D132465

(cherry picked from commit d3fcbee10d893b9e01e563c3840414ba89283484)
---
 .../SelectionDAG/SelectionDAGBuilder.cpp      |   1 +
 llvm/lib/Target/Sparc/SparcCallingConv.td     |  10 +-
 llvm/lib/Target/Sparc/SparcISelLowering.cpp   |  61 ++++-
 llvm/lib/Target/Sparc/SparcISelLowering.h     |   5 +
 llvm/test/CodeGen/SPARC/64abi.ll              |  27 --
 llvm/test/CodeGen/SPARC/bigreturn.ll          | 254 ++++++++++++++++++
 6 files changed, 322 insertions(+), 36 deletions(-)
 create mode 100644 llvm/test/CodeGen/SPARC/bigreturn.ll

diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 35650b9bd00e..ecdaef0442da 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -9693,6 +9693,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
     Entry.Alignment = Alignment;
     CLI.getArgs().insert(CLI.getArgs().begin(), Entry);
     CLI.NumFixedArgs += 1;
+    CLI.getArgs()[0].IndirectType = CLI.RetTy;
     CLI.RetTy = Type::getVoidTy(CLI.RetTy->getContext());
 
     // sret demotion isn't compatible with tail-calls, since the sret argument
diff --git a/llvm/lib/Target/Sparc/SparcCallingConv.td b/llvm/lib/Target/Sparc/SparcCallingConv.td
index e6d23f741ea5..8afd0a7fc09a 100644
--- a/llvm/lib/Target/Sparc/SparcCallingConv.td
+++ b/llvm/lib/Target/Sparc/SparcCallingConv.td
@@ -125,10 +125,14 @@ def CC_Sparc64 : CallingConv<[
 def RetCC_Sparc64 : CallingConv<[
   // A single f32 return value always goes in %f0. The ABI doesn't specify what
   // happens to multiple f32 return values outside a struct.
-  CCIfType<[f32], CCCustom<"CC_Sparc64_Half">>,
+  CCIfType<[f32], CCCustom<"RetCC_Sparc64_Half">>,
 
-  // Otherwise, return values are passed exactly like arguments.
-  CCDelegateTo<CC_Sparc64>
+  // Otherwise, return values are passed exactly like arguments, except that
+  // returns that are too big to fit into the registers is passed as an sret
+  // instead.
+  CCIfInReg<CCIfType<[i32, f32], CCCustom<"RetCC_Sparc64_Half">>>,
+  CCIfType<[i32], CCPromoteToType<i64>>,
+  CCCustom<"RetCC_Sparc64_Full">
 ]>;
 
 // Callee-saved registers are handled by the register window mechanism.
diff --git a/llvm/lib/Target/Sparc/SparcISelLowering.cpp b/llvm/lib/Target/Sparc/SparcISelLowering.cpp
index 2cb74e7709c7..f55675089102 100644
--- a/llvm/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/llvm/lib/Target/Sparc/SparcISelLowering.cpp
@@ -101,9 +101,9 @@ static bool CC_Sparc_Assign_Ret_Split_64(unsigned &ValNo, MVT &ValVT,
 }
 
 // Allocate a full-sized argument for the 64-bit ABI.
-static bool CC_Sparc64_Full(unsigned &ValNo, MVT &ValVT,
-                            MVT &LocVT, CCValAssign::LocInfo &LocInfo,
-                            ISD::ArgFlagsTy &ArgFlags, CCState &State) {
+static bool Analyze_CC_Sparc64_Full(bool IsReturn, unsigned &ValNo, MVT &ValVT,
+                                    MVT &LocVT, CCValAssign::LocInfo &LocInfo,
+                                    ISD::ArgFlagsTy &ArgFlags, CCState &State) {
   assert((LocVT == MVT::f32 || LocVT == MVT::f128
           || LocVT.getSizeInBits() == 64) &&
          "Can't handle non-64 bits locations");
@@ -133,6 +133,11 @@ static bool CC_Sparc64_Full(unsigned &ValNo, MVT &ValVT,
     return true;
   }
 
+  // Bail out if this is a return CC and we run out of registers to place
+  // values into.
+  if (IsReturn)
+    return false;
+
   // This argument goes on the stack in an 8-byte slot.
   // When passing floats, LocVT is smaller than 8 bytes. Adjust the offset to
   // the right-aligned float. The first 4 bytes of the stack slot are undefined.
@@ -146,9 +151,9 @@ static bool CC_Sparc64_Full(unsigned &ValNo, MVT &ValVT,
 // Allocate a half-sized argument for the 64-bit ABI.
 //
 // This is used when passing { float, int } structs by value in registers.
-static bool CC_Sparc64_Half(unsigned &ValNo, MVT &ValVT,
-                            MVT &LocVT, CCValAssign::LocInfo &LocInfo,
-                            ISD::ArgFlagsTy &ArgFlags, CCState &State) {
+static bool Analyze_CC_Sparc64_Half(bool IsReturn, unsigned &ValNo, MVT &ValVT,
+                                    MVT &LocVT, CCValAssign::LocInfo &LocInfo,
+                                    ISD::ArgFlagsTy &ArgFlags, CCState &State) {
   assert(LocVT.getSizeInBits() == 32 && "Can't handle non-32 bits locations");
   unsigned Offset = State.AllocateStack(4, Align(4));
 
@@ -174,10 +179,43 @@ static bool CC_Sparc64_Half(unsigned &ValNo, MVT &ValVT,
     return true;
   }
 
+  // Bail out if this is a return CC and we run out of registers to place
+  // values into.
+  if (IsReturn)
+    return false;
+
   State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
   return true;
 }
 
+static bool CC_Sparc64_Full(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+                            CCValAssign::LocInfo &LocInfo,
+                            ISD::ArgFlagsTy &ArgFlags, CCState &State) {
+  return Analyze_CC_Sparc64_Full(false, ValNo, ValVT, LocVT, LocInfo, ArgFlags,
+                                 State);
+}
+
+static bool CC_Sparc64_Half(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+                            CCValAssign::LocInfo &LocInfo,
+                            ISD::ArgFlagsTy &ArgFlags, CCState &State) {
+  return Analyze_CC_Sparc64_Half(false, ValNo, ValVT, LocVT, LocInfo, ArgFlags,
+                                 State);
+}
+
+static bool RetCC_Sparc64_Full(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+                               CCValAssign::LocInfo &LocInfo,
+                               ISD::ArgFlagsTy &ArgFlags, CCState &State) {
+  return Analyze_CC_Sparc64_Full(true, ValNo, ValVT, LocVT, LocInfo, ArgFlags,
+                                 State);
+}
+
+static bool RetCC_Sparc64_Half(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+                               CCValAssign::LocInfo &LocInfo,
+                               ISD::ArgFlagsTy &ArgFlags, CCState &State) {
+  return Analyze_CC_Sparc64_Half(true, ValNo, ValVT, LocVT, LocInfo, ArgFlags,
+                                 State);
+}
+
 #include "SparcGenCallingConv.inc"
 
 // The calling conventions in SparcCallingConv.td are described in terms of the
@@ -191,6 +229,15 @@ static unsigned toCallerWindow(unsigned Reg) {
   return Reg;
 }
 
+bool SparcTargetLowering::CanLowerReturn(
+    CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
+    const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
+  SmallVector<CCValAssign, 16> RVLocs;
+  CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
+  return CCInfo.CheckReturn(Outs, Subtarget->is64Bit() ? RetCC_Sparc64
+                                                       : RetCC_Sparc32);
+}
+
 SDValue
 SparcTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
                                  bool IsVarArg,
@@ -1031,6 +1078,7 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
 
   // Copy all of the result registers out of their specified physreg.
   for (unsigned i = 0; i != RVLocs.size(); ++i) {
+    assert(RVLocs[i].isRegLoc() && "Can only return in registers!");
     if (RVLocs[i].getLocVT() == MVT::v2i32) {
       SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2i32);
       SDValue Lo = DAG.getCopyFromReg(
@@ -1346,6 +1394,7 @@ SparcTargetLowering::LowerCall_64(TargetLowering::CallLoweringInfo &CLI,
   // Copy all of the result registers out of their specified physreg.
   for (unsigned i = 0; i != RVLocs.size(); ++i) {
     CCValAssign &VA = RVLocs[i];
+    assert(VA.isRegLoc() && "Can only return in registers!");
     unsigned Reg = toCallerWindow(VA.getLocReg());
 
     // When returning 'inreg {i32, i32 }', two consecutive i32 arguments can
diff --git a/llvm/lib/Target/Sparc/SparcISelLowering.h b/llvm/lib/Target/Sparc/SparcISelLowering.h
index 2768bb20566a..16e4f2687054 100644
--- a/llvm/lib/Target/Sparc/SparcISelLowering.h
+++ b/llvm/lib/Target/Sparc/SparcISelLowering.h
@@ -144,6 +144,11 @@ namespace llvm {
     SDValue LowerCall_64(TargetLowering::CallLoweringInfo &CLI,
                          SmallVectorImpl<SDValue> &InVals) const;
 
+    bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
+                        bool isVarArg,
+                        const SmallVectorImpl<ISD::OutputArg> &Outs,
+                        LLVMContext &Context) const override;
+
     SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
                         const SmallVectorImpl<ISD::OutputArg> &Outs,
                         const SmallVectorImpl<SDValue> &OutVals,
diff --git a/llvm/test/CodeGen/SPARC/64abi.ll b/llvm/test/CodeGen/SPARC/64abi.ll
index 6b181d8b3432..27865f718151 100644
--- a/llvm/test/CodeGen/SPARC/64abi.ll
+++ b/llvm/test/CodeGen/SPARC/64abi.ll
@@ -293,33 +293,6 @@ define void @call_inreg_ii(i32* %p, i32 %i1, i32 %i2) {
   ret void
 }
 
-; Structs up to 32 bytes in size can be returned in registers.
-; CHECK-LABEL: ret_i64_pair:
-; CHECK: ldx [%i2], %i0
-; CHECK: ldx [%i3], %i1
-define { i64, i64 } @ret_i64_pair(i32 %a0, i32 %a1, i64* %p, i64* %q) {
-  %r1 = load i64, i64* %p
-  %rv1 = insertvalue { i64, i64 } undef, i64 %r1, 0
-  store i64 0, i64* %p
-  %r2 = load i64, i64* %q
-  %rv2 = insertvalue { i64, i64 } %rv1, i64 %r2, 1
-  ret { i64, i64 } %rv2
-}
-
-; CHECK-LABEL: call_ret_i64_pair:
-; CHECK: call ret_i64_pair
-; CHECK: stx %o0, [%i0]
-; CHECK: stx %o1, [%i0]
-define void @call_ret_i64_pair(i64* %i0) {
-  %rv = call { i64, i64 } @ret_i64_pair(i32 undef, i32 undef,
-                                        i64* undef, i64* undef)
-  %e0 = extractvalue { i64, i64 } %rv, 0
-  store volatile i64 %e0, i64* %i0
-  %e1 = extractvalue { i64, i64 } %rv, 1
-  store i64 %e1, i64* %i0
-  ret void
-}
-
 ; This is not a C struct, the i32 member uses 8 bytes, but the float only 4.
 ; CHECK-LABEL: ret_i32_float_pair:
 ; CHECK: ld [%i2], %i0
diff --git a/llvm/test/CodeGen/SPARC/bigreturn.ll b/llvm/test/CodeGen/SPARC/bigreturn.ll
new file mode 100644
index 000000000000..25b4eeecadc0
--- /dev/null
+++ b/llvm/test/CodeGen/SPARC/bigreturn.ll
@@ -0,0 +1,254 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=sparc -disable-sparc-delay-filler -disable-sparc-leaf-proc | FileCheck --check-prefix=SPARC %s
+; RUN: llc < %s -mtriple=sparc64 -disable-sparc-delay-filler -disable-sparc-leaf-proc | FileCheck --check-prefix=SPARC64 %s
+
+;; Structs up to six registers in size can be returned in registers.
+;; Note that the maximum return size and member placement is NOT
+;; compatible with the C ABI - see SparcCallingConv.td.
+define { i32, i32 } @ret_i32_pair(i32 %a0, i32 %a1, i32* %p, i32* %q) {
+; SPARC-LABEL: ret_i32_pair:
+; SPARC:         .cfi_startproc
+; SPARC-NEXT:  ! %bb.0:
+; SPARC-NEXT:    save %sp, -96, %sp
+; SPARC-NEXT:    .cfi_def_cfa_register %fp
+; SPARC-NEXT:    .cfi_window_save
+; SPARC-NEXT:    .cfi_register %o7, %i7
+; SPARC-NEXT:    ld [%i2], %i0
+; SPARC-NEXT:    st %g0, [%i2]
+; SPARC-NEXT:    ld [%i3], %i1
+; SPARC-NEXT:    restore
+; SPARC-NEXT:    retl
+; SPARC-NEXT:    nop
+;
+; SPARC64-LABEL: ret_i32_pair:
+; SPARC64:         .cfi_startproc
+; SPARC64-NEXT:  ! %bb.0:
+; SPARC64-NEXT:    save %sp, -128, %sp
+; SPARC64-NEXT:    .cfi_def_cfa_register %fp
+; SPARC64-NEXT:    .cfi_window_save
+; SPARC64-NEXT:    .cfi_register %o7, %i7
+; SPARC64-NEXT:    ld [%i2], %i0
+; SPARC64-NEXT:    st %g0, [%i2]
+; SPARC64-NEXT:    ld [%i3], %i1
+; SPARC64-NEXT:    restore
+; SPARC64-NEXT:    retl
+; SPARC64-NEXT:    nop
+  %r1 = load i32, i32* %p
+  %rv1 = insertvalue { i32, i32 } undef, i32 %r1, 0
+  store i32 0, i32* %p
+  %r2 = load i32, i32* %q
+  %rv2 = insertvalue { i32, i32 } %rv1, i32 %r2, 1
+  ret { i32, i32 } %rv2
+}
+
+define void @call_ret_i32_pair(i32* %i0) {
+; SPARC-LABEL: call_ret_i32_pair:
+; SPARC:         .cfi_startproc
+; SPARC-NEXT:  ! %bb.0:
+; SPARC-NEXT:    save %sp, -96, %sp
+; SPARC-NEXT:    .cfi_def_cfa_register %fp
+; SPARC-NEXT:    .cfi_window_save
+; SPARC-NEXT:    .cfi_register %o7, %i7
+; SPARC-NEXT:    call ret_i32_pair
+; SPARC-NEXT:    nop
+; SPARC-NEXT:    st %o0, [%i0]
+; SPARC-NEXT:    st %o1, [%i0]
+; SPARC-NEXT:    restore
+; SPARC-NEXT:    retl
+; SPARC-NEXT:    nop
+;
+; SPARC64-LABEL: call_ret_i32_pair:
+; SPARC64:         .cfi_startproc
+; SPARC64-NEXT:  ! %bb.0:
+; SPARC64-NEXT:    save %sp, -176, %sp
+; SPARC64-NEXT:    .cfi_def_cfa_register %fp
+; SPARC64-NEXT:    .cfi_window_save
+; SPARC64-NEXT:    .cfi_register %o7, %i7
+; SPARC64-NEXT:    call ret_i32_pair
+; SPARC64-NEXT:    nop
+; SPARC64-NEXT:    st %o0, [%i0]
+; SPARC64-NEXT:    st %o1, [%i0]
+; SPARC64-NEXT:    restore
+; SPARC64-NEXT:    retl
+; SPARC64-NEXT:    nop
+  %rv = call { i32, i32 } @ret_i32_pair(i32 undef, i32 undef,
+                                        i32* undef, i32* undef)
+  %e0 = extractvalue { i32, i32 } %rv, 0
+  store volatile i32 %e0, i32* %i0
+  %e1 = extractvalue { i32, i32 } %rv, 1
+  store i32 %e1, i32* %i0
+  ret void
+}
+
+;; Functions returning structs more than six registers' worth of space
+;; should be automatically treated as an sret function.
+declare { [16 x i32] } @ret_i32_arr(i32 %input)
+
+define i32 @call_ret_i32_arr(i32 %0) {
+; SPARC-LABEL: call_ret_i32_arr:
+; SPARC:         .cfi_startproc
+; SPARC-NEXT:  ! %bb.0:
+; SPARC-NEXT:    save %sp, -160, %sp
+; SPARC-NEXT:    .cfi_def_cfa_register %fp
+; SPARC-NEXT:    .cfi_window_save
+; SPARC-NEXT:    .cfi_register %o7, %i7
+; SPARC-NEXT:    add %fp, -64, %i1
+; SPARC-NEXT:    st %i1, [%sp+64]
+; SPARC-NEXT:    mov %i0, %o0
+; SPARC-NEXT:    call ret_i32_arr
+; SPARC-NEXT:    nop
+; SPARC-NEXT:    unimp 64
+; SPARC-NEXT:    ld [%fp+-4], %i0
+; SPARC-NEXT:    restore
+; SPARC-NEXT:    retl
+; SPARC-NEXT:    nop
+;
+; SPARC64-LABEL: call_ret_i32_arr:
+; SPARC64:         .cfi_startproc
+; SPARC64-NEXT:  ! %bb.0:
+; SPARC64-NEXT:    save %sp, -240, %sp
+; SPARC64-NEXT:    .cfi_def_cfa_register %fp
+; SPARC64-NEXT:    .cfi_window_save
+; SPARC64-NEXT:    .cfi_register %o7, %i7
+; SPARC64-NEXT:    add %fp, 1983, %o0
+; SPARC64-NEXT:    mov %i0, %o1
+; SPARC64-NEXT:    call ret_i32_arr
+; SPARC64-NEXT:    nop
+; SPARC64-NEXT:    ld [%fp+2043], %i0
+; SPARC64-NEXT:    restore
+; SPARC64-NEXT:    retl
+; SPARC64-NEXT:    nop
+  %2 = call { [16 x i32] } @ret_i32_arr(i32 %0)
+  %3 = extractvalue { [16 x i32] } %2, 0
+  %4 = extractvalue [16 x i32] %3, 15
+  ret i32 %4
+}
+
+;; Structs up to six registers in size can be returned in registers.
+;; Note that the maximum return size and member placement is NOT
+;; compatible with the C ABI - see SparcCallingConv.td.
+define { i64, i64 } @ret_i64_pair(i32 %a0, i32 %a1, i64* %p, i64* %q) {
+; SPARC-LABEL: ret_i64_pair:
+; SPARC:         .cfi_startproc
+; SPARC-NEXT:  ! %bb.0:
+; SPARC-NEXT:    save %sp, -96, %sp
+; SPARC-NEXT:    .cfi_def_cfa_register %fp
+; SPARC-NEXT:    .cfi_window_save
+; SPARC-NEXT:    .cfi_register %o7, %i7
+; SPARC-NEXT:    mov %g0, %i4
+; SPARC-NEXT:    ldd [%i2], %i0
+; SPARC-NEXT:    mov %i4, %i5
+; SPARC-NEXT:    std %i4, [%i2]
+; SPARC-NEXT:    ldd [%i3], %i2
+; SPARC-NEXT:    restore
+; SPARC-NEXT:    retl
+; SPARC-NEXT:    nop
+;
+; SPARC64-LABEL: ret_i64_pair:
+; SPARC64:         .cfi_startproc
+; SPARC64-NEXT:  ! %bb.0:
+; SPARC64-NEXT:    save %sp, -128, %sp
+; SPARC64-NEXT:    .cfi_def_cfa_register %fp
+; SPARC64-NEXT:    .cfi_window_save
+; SPARC64-NEXT:    .cfi_register %o7, %i7
+; SPARC64-NEXT:    ldx [%i2], %i0
+; SPARC64-NEXT:    stx %g0, [%i2]
+; SPARC64-NEXT:    ldx [%i3], %i1
+; SPARC64-NEXT:    restore
+; SPARC64-NEXT:    retl
+; SPARC64-NEXT:    nop
+  %r1 = load i64, i64* %p
+  %rv1 = insertvalue { i64, i64 } undef, i64 %r1, 0
+  store i64 0, i64* %p
+  %r2 = load i64, i64* %q
+  %rv2 = insertvalue { i64, i64 } %rv1, i64 %r2, 1
+  ret { i64, i64 } %rv2
+}
+
+define void @call_ret_i64_pair(i64* %i0) {
+; SPARC-LABEL: call_ret_i64_pair:
+; SPARC:         .cfi_startproc
+; SPARC-NEXT:  ! %bb.0:
+; SPARC-NEXT:    save %sp, -96, %sp
+; SPARC-NEXT:    .cfi_def_cfa_register %fp
+; SPARC-NEXT:    .cfi_window_save
+; SPARC-NEXT:    .cfi_register %o7, %i7
+; SPARC-NEXT:    call ret_i64_pair
+; SPARC-NEXT:    nop
+; SPARC-NEXT:    ! kill: def $o0 killed $o0 killed $o0_o1 def $o0_o1
+; SPARC-NEXT:    ! kill: def $o2 killed $o2 killed $o2_o3 def $o2_o3
+; SPARC-NEXT:    ! kill: def $o1 killed $o1 killed $o0_o1 def $o0_o1
+; SPARC-NEXT:    std %o0, [%i0]
+; SPARC-NEXT:    ! kill: def $o3 killed $o3 killed $o2_o3 def $o2_o3
+; SPARC-NEXT:    std %o2, [%i0]
+; SPARC-NEXT:    restore
+; SPARC-NEXT:    retl
+; SPARC-NEXT:    nop
+;
+; SPARC64-LABEL: call_ret_i64_pair:
+; SPARC64:         .cfi_startproc
+; SPARC64-NEXT:  ! %bb.0:
+; SPARC64-NEXT:    save %sp, -176, %sp
+; SPARC64-NEXT:    .cfi_def_cfa_register %fp
+; SPARC64-NEXT:    .cfi_window_save
+; SPARC64-NEXT:    .cfi_register %o7, %i7
+; SPARC64-NEXT:    call ret_i64_pair
+; SPARC64-NEXT:    nop
+; SPARC64-NEXT:    stx %o0, [%i0]
+; SPARC64-NEXT:    stx %o1, [%i0]
+; SPARC64-NEXT:    restore
+; SPARC64-NEXT:    retl
+; SPARC64-NEXT:    nop
+  %rv = call { i64, i64 } @ret_i64_pair(i32 undef, i32 undef,
+                                        i64* undef, i64* undef)
+  %e0 = extractvalue { i64, i64 } %rv, 0
+  store volatile i64 %e0, i64* %i0
+  %e1 = extractvalue { i64, i64 } %rv, 1
+  store i64 %e1, i64* %i0
+  ret void
+}
+
+;; Functions returning structs more than six registers' worth of space
+;; should be automatically treated as an sret function.
+declare { [16 x i64] } @ret_i64_arr(i64 %input)
+
+define i64 @call_ret_i64_arr(i64 %0) {
+; SPARC-LABEL: call_ret_i64_arr:
+; SPARC:         .cfi_startproc
+; SPARC-NEXT:  ! %bb.0:
+; SPARC-NEXT:    save %sp, -224, %sp
+; SPARC-NEXT:    .cfi_def_cfa_register %fp
+; SPARC-NEXT:    .cfi_window_save
+; SPARC-NEXT:    .cfi_register %o7, %i7
+; SPARC-NEXT:    add %fp, -128, %i2
+; SPARC-NEXT:    st %i2, [%sp+64]
+; SPARC-NEXT:    mov %i0, %o0
+; SPARC-NEXT:    mov %i1, %o1
+; SPARC-NEXT:    call ret_i64_arr
+; SPARC-NEXT:    nop
+; SPARC-NEXT:    unimp 128
+; SPARC-NEXT:    ldd [%fp+-8], %i0
+; SPARC-NEXT:    restore
+; SPARC-NEXT:    retl
+; SPARC-NEXT:    nop
+;
+; SPARC64-LABEL: call_ret_i64_arr:
+; SPARC64:         .cfi_startproc
+; SPARC64-NEXT:  ! %bb.0:
+; SPARC64-NEXT:    save %sp, -304, %sp
+; SPARC64-NEXT:    .cfi_def_cfa_register %fp
+; SPARC64-NEXT:    .cfi_window_save
+; SPARC64-NEXT:    .cfi_register %o7, %i7
+; SPARC64-NEXT:    add %fp, 1919, %o0
+; SPARC64-NEXT:    mov %i0, %o1
+; SPARC64-NEXT:    call ret_i64_arr
+; SPARC64-NEXT:    nop
+; SPARC64-NEXT:    ldx [%fp+2039], %i0
+; SPARC64-NEXT:    restore
+; SPARC64-NEXT:    retl
+; SPARC64-NEXT:    nop
+  %2 = call { [16 x i64] } @ret_i64_arr(i64 %0)
+  %3 = extractvalue { [16 x i64] } %2, 0
+  %4 = extractvalue [16 x i64] %3, 15
+  ret i64 %4
+}
-- 
Gitee


From 80a9fc840b1b0c5bdd6509578283af3b02782d48 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Wed, 26 Oct 2022 16:15:02 -0700
Subject: [PATCH 14/41] [clang][Sema] Fix a clang crash with btf_type_tag

For the following program,
  $ cat t.c
  struct t {
   int (__attribute__((btf_type_tag("rcu"))) *f)();
   int a;
  };
  int foo(struct t *arg) {
    return arg->a;
  }
Compiling with 'clang -g -O2 -S t.c' will cause a failure like below:
  clang: /home/yhs/work/llvm-project/clang/lib/Sema/SemaType.cpp:6391: void {anonymous}::DeclaratorLocFiller::VisitParenTypeLoc(clang::ParenTypeLoc):
         Assertion `Chunk.Kind == DeclaratorChunk::Paren' failed.
  PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace, preprocessed source, and associated run script.
  Stack dump:
  ......
  #5 0x00007f89e4280ea5 abort (/lib64/libc.so.6+0x21ea5)
  #6 0x00007f89e4280d79 _nl_load_domain.cold.0 (/lib64/libc.so.6+0x21d79)
  #7 0x00007f89e42a6456 (/lib64/libc.so.6+0x47456)
  #8 0x00000000045c2596 GetTypeSourceInfoForDeclarator((anonymous namespace)::TypeProcessingState&, clang::QualType, clang::TypeSourceInfo*) SemaType.cpp:0:0
  #9 0x00000000045ccfa5 GetFullTypeForDeclarator((anonymous namespace)::TypeProcessingState&, clang::QualType, clang::TypeSourceInfo*) SemaType.cpp:0:0
  ......

The reason of the failure is due to the mismatch of TypeLoc and D.getTypeObject().Kind. For example,
the TypeLoc is
  BTFTagAttributedType 0x88614e0 'int  btf_type_tag(rcu)()' sugar
  |-ParenType 0x8861480 'int ()' sugar
  | `-FunctionNoProtoType 0x8861450 'int ()' cdecl
  |   `-BuiltinType 0x87fd500 'int'
while corresponding D.getTypeObject().Kind points to DeclaratorChunk::Paren, and
this will cause later assertion.

To fix the issue, similar to AttributedTypeLoc, let us skip BTFTagAttributedTypeLoc in
GetTypeSourceInfoForDeclarator().

Differential Revision: https://reviews.llvm.org/D136807
---
 clang/docs/ReleaseNotes.rst                     |  2 ++
 clang/lib/Sema/SemaType.cpp                     |  3 +++
 clang/test/CodeGen/attr-btf_type_tag-func-ptr.c | 15 +++++++++++++++
 3 files changed, 20 insertions(+)
 create mode 100644 clang/test/CodeGen/attr-btf_type_tag-func-ptr.c

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index a17b033f5eb3..a1ee8f0459ae 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -225,6 +225,8 @@ Bug Fixes
 - Fix a crash when generating code coverage information for an
   ``if consteval`` statement. This fixes
   `Issue 57377 <https://github.com/llvm/llvm-project/issues/57377>`_.
+- Fix a crash when a ``btf_type_tag`` attribute is applied to the pointee of
+  a function pointer.
 
 Improvements to Clang's diagnostics
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp
index 3ab5d26a9a75..edcac4d2ee9a 100644
--- a/clang/lib/Sema/SemaType.cpp
+++ b/clang/lib/Sema/SemaType.cpp
@@ -6443,6 +6443,9 @@ GetTypeSourceInfoForDeclarator(TypeProcessingState &State,
       CurrTL = TL.getNextTypeLoc().getUnqualifiedLoc();
     }
 
+    while (BTFTagAttributedTypeLoc TL = CurrTL.getAs<BTFTagAttributedTypeLoc>())
+      CurrTL = TL.getNextTypeLoc().getUnqualifiedLoc();
+
     while (DependentAddressSpaceTypeLoc TL =
                CurrTL.getAs<DependentAddressSpaceTypeLoc>()) {
       fillDependentAddressSpaceTypeLoc(TL, D.getTypeObject(i).getAttrs());
diff --git a/clang/test/CodeGen/attr-btf_type_tag-func-ptr.c b/clang/test/CodeGen/attr-btf_type_tag-func-ptr.c
new file mode 100644
index 000000000000..29ca5f58e4b8
--- /dev/null
+++ b/clang/test/CodeGen/attr-btf_type_tag-func-ptr.c
@@ -0,0 +1,15 @@
+// RUN: %clang_cc1 -triple %itanium_abi_triple -debug-info-kind=limited -S -emit-llvm -o - %s | FileCheck %s
+
+struct t {
+ int (__attribute__((btf_type_tag("rcu"))) *f)();
+ int a;
+};
+int foo(struct t *arg) {
+  return arg->a;
+}
+
+// CHECK:      !DIDerivedType(tag: DW_TAG_member, name: "f"
+// CHECK-SAME: baseType: ![[L18:[0-9]+]]
+// CHECK:      ![[L18]] = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: ![[#]], size: [[#]], annotations: ![[L21:[0-9]+]])
+// CHECK:      ![[L21]] = !{![[L22:[0-9]+]]}
+// CHECK:      ![[L22]] = !{!"btf_type_tag", !"rcu"}
-- 
Gitee


From 5c68a1cb123161b54b72ce90e7975d95a8eaf2a4 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Mon, 26 Sep 2022 23:07:49 -0400
Subject: [PATCH 15/41] AMDGPU: Make various vector undefs legal

Surprisingly these were getting legalized to something
zero initialized.

This fixes an infinite loop when combining some vector types.
Also fixes zero initializing some undef values.

SimplifyDemandedVectorElts / SimplifyDemandedBits are not checking
for the legality of the output undefs they are replacing unused
operations with. This resulted in turning vectors into undefs
that were later re-legalized back into zero vectors.

(cherry picked from commit 7a84624079a2656c684bed6100708544500c5a32)
---
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp     |   2 +
 llvm/test/CodeGen/AMDGPU/commute-shifts.ll    |  16 --
 .../AMDGPU/cross-block-use-is-not-abi-copy.ll |   4 +-
 .../CodeGen/AMDGPU/dagcombine-fma-fmad.ll     |  10 +-
 .../CodeGen/AMDGPU/extract-subvector-16bit.ll |  78 +------
 llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll |  14 +-
 llvm/test/CodeGen/AMDGPU/select-undef.ll      | 219 +++++++++++++++++-
 llvm/test/CodeGen/AMDGPU/skip-if-dead.ll      | 113 ++-------
 llvm/test/CodeGen/AMDGPU/v1024.ll             |   2 +
 .../CodeGen/AMDGPU/vgpr-tuple-allocation.ll   | 144 ++----------
 llvm/test/CodeGen/AMDGPU/wqm.ll               | 162 ++-----------
 11 files changed, 306 insertions(+), 458 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index f7d139adc63b..f6b7d1ffc6d2 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -249,6 +249,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
       case ISD::STORE:
       case ISD::BUILD_VECTOR:
       case ISD::BITCAST:
+      case ISD::UNDEF:
       case ISD::EXTRACT_VECTOR_ELT:
       case ISD::INSERT_VECTOR_ELT:
       case ISD::EXTRACT_SUBVECTOR:
@@ -516,6 +517,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
         case ISD::STORE:
         case ISD::BUILD_VECTOR:
         case ISD::BITCAST:
+        case ISD::UNDEF:
         case ISD::EXTRACT_VECTOR_ELT:
         case ISD::INSERT_VECTOR_ELT:
         case ISD::INSERT_SUBVECTOR:
diff --git a/llvm/test/CodeGen/AMDGPU/commute-shifts.ll b/llvm/test/CodeGen/AMDGPU/commute-shifts.ll
index 8df85ba872bf..3697946cb5c3 100644
--- a/llvm/test/CodeGen/AMDGPU/commute-shifts.ll
+++ b/llvm/test/CodeGen/AMDGPU/commute-shifts.ll
@@ -5,14 +5,6 @@
 define amdgpu_ps float @main(float %arg0, float %arg1) #0 {
 ; SI-LABEL: main:
 ; SI:       ; %bb.0: ; %bb
-; SI-NEXT:    s_mov_b32 s0, 0
-; SI-NEXT:    s_mov_b32 s1, s0
-; SI-NEXT:    s_mov_b32 s2, s0
-; SI-NEXT:    s_mov_b32 s3, s0
-; SI-NEXT:    s_mov_b32 s4, s0
-; SI-NEXT:    s_mov_b32 s5, s0
-; SI-NEXT:    s_mov_b32 s6, s0
-; SI-NEXT:    s_mov_b32 s7, s0
 ; SI-NEXT:    image_load v2, v0, s[0:7] dmask:0x1 unorm
 ; SI-NEXT:    v_cvt_i32_f32_e32 v0, v0
 ; SI-NEXT:    v_and_b32_e32 v0, 7, v0
@@ -26,14 +18,6 @@ define amdgpu_ps float @main(float %arg0, float %arg1) #0 {
 ;
 ; VI-LABEL: main:
 ; VI:       ; %bb.0: ; %bb
-; VI-NEXT:    s_mov_b32 s0, 0
-; VI-NEXT:    s_mov_b32 s1, s0
-; VI-NEXT:    s_mov_b32 s2, s0
-; VI-NEXT:    s_mov_b32 s3, s0
-; VI-NEXT:    s_mov_b32 s4, s0
-; VI-NEXT:    s_mov_b32 s5, s0
-; VI-NEXT:    s_mov_b32 s6, s0
-; VI-NEXT:    s_mov_b32 s7, s0
 ; VI-NEXT:    image_load v2, v0, s[0:7] dmask:0x1 unorm
 ; VI-NEXT:    v_cvt_i32_f32_e32 v0, v0
 ; VI-NEXT:    v_and_b32_e32 v0, 7, v0
diff --git a/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll b/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll
index 29fc098899ee..5d985850446c 100644
--- a/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll
+++ b/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll
@@ -213,7 +213,7 @@ if.else:                                          ; preds = %entry
   br label %if.end
 
 if.end:                                           ; preds = %if.else, %if.then
-  %call6.sink = phi <3 x i16> [ %call6, %if.else ], [ undef, %if.then ]
+  %call6.sink = phi <3 x i16> [ %call6, %if.else ], [ zeroinitializer, %if.then ]
   store <3 x i16> %call6.sink, <3 x i16> addrspace(1)* undef
   ret void
 }
@@ -266,7 +266,7 @@ if.else:                                          ; preds = %entry
   br label %if.end
 
 if.end:                                           ; preds = %if.else, %if.then
-  %call6.sink = phi <3 x half> [ %call6, %if.else ], [ undef, %if.then ]
+  %call6.sink = phi <3 x half> [ %call6, %if.else ], [ zeroinitializer, %if.then ]
   store <3 x half> %call6.sink, <3 x half> addrspace(1)* undef
   ret void
 }
diff --git a/llvm/test/CodeGen/AMDGPU/dagcombine-fma-fmad.ll b/llvm/test/CodeGen/AMDGPU/dagcombine-fma-fmad.ll
index 8af7575f03d0..0b629efffbb3 100644
--- a/llvm/test/CodeGen/AMDGPU/dagcombine-fma-fmad.ll
+++ b/llvm/test/CodeGen/AMDGPU/dagcombine-fma-fmad.ll
@@ -4,16 +4,8 @@
 define amdgpu_ps float @_amdgpu_ps_main() #0 {
 ; GCN-LABEL: _amdgpu_ps_main:
 ; GCN:       ; %bb.0: ; %.entry
-; GCN-NEXT:    s_mov_b32 s0, 0
-; GCN-NEXT:    v_mov_b32_e32 v4, 0
-; GCN-NEXT:    s_mov_b32 s1, s0
-; GCN-NEXT:    s_mov_b32 s2, s0
-; GCN-NEXT:    s_mov_b32 s3, s0
-; GCN-NEXT:    s_mov_b32 s4, s0
-; GCN-NEXT:    s_mov_b32 s5, s0
-; GCN-NEXT:    s_mov_b32 s6, s0
-; GCN-NEXT:    s_mov_b32 s7, s0
 ; GCN-NEXT:    image_sample v[0:1], v[0:1], s[0:7], s[0:3] dmask:0x3 dim:SQ_RSRC_IMG_2D
+; GCN-NEXT:    v_mov_b32_e32 v4, 0
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NEXT:    s_clause 0x1
 ; GCN-NEXT:    image_sample v2, v[0:1], s[0:7], s[0:3] dmask:0x4 dim:SQ_RSRC_IMG_2D
diff --git a/llvm/test/CodeGen/AMDGPU/extract-subvector-16bit.ll b/llvm/test/CodeGen/AMDGPU/extract-subvector-16bit.ll
index 6456c87a31fb..cbfd8ec5cb16 100644
--- a/llvm/test/CodeGen/AMDGPU/extract-subvector-16bit.ll
+++ b/llvm/test/CodeGen/AMDGPU/extract-subvector-16bit.ll
@@ -100,14 +100,7 @@ define <4 x i16> @vec_8xi16_extract_4xi16(<8 x i16> addrspace(1) * %p0, <8 x i16
 ; GFX9-NEXT:    s_cbranch_execz .LBB0_3
 ; GFX9-NEXT:    s_branch .LBB0_4
 ; GFX9-NEXT:  .LBB0_2:
-; GFX9-NEXT:    s_mov_b32 s8, 0
-; GFX9-NEXT:    s_mov_b32 s9, s8
-; GFX9-NEXT:    s_mov_b32 s10, s8
-; GFX9-NEXT:    s_mov_b32 s11, s8
-; GFX9-NEXT:    v_mov_b32_e32 v2, s8
-; GFX9-NEXT:    v_mov_b32_e32 v3, s9
-; GFX9-NEXT:    v_mov_b32_e32 v4, s10
-; GFX9-NEXT:    v_mov_b32_e32 v5, s11
+; GFX9-NEXT:    ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
 ; GFX9-NEXT:  .LBB0_3: ; %T
 ; GFX9-NEXT:    global_load_dwordx4 v[2:5], v[0:1], off glc
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
@@ -244,14 +237,7 @@ define <4 x i16> @vec_8xi16_extract_4xi16_2(<8 x i16> addrspace(1) * %p0, <8 x i
 ; GFX9-NEXT:    s_cbranch_execz .LBB1_3
 ; GFX9-NEXT:    s_branch .LBB1_4
 ; GFX9-NEXT:  .LBB1_2:
-; GFX9-NEXT:    s_mov_b32 s8, 0
-; GFX9-NEXT:    s_mov_b32 s9, s8
-; GFX9-NEXT:    s_mov_b32 s10, s8
-; GFX9-NEXT:    s_mov_b32 s11, s8
-; GFX9-NEXT:    v_mov_b32_e32 v2, s8
-; GFX9-NEXT:    v_mov_b32_e32 v3, s9
-; GFX9-NEXT:    v_mov_b32_e32 v4, s10
-; GFX9-NEXT:    v_mov_b32_e32 v5, s11
+; GFX9-NEXT:    ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
 ; GFX9-NEXT:  .LBB1_3: ; %T
 ; GFX9-NEXT:    global_load_dwordx4 v[2:5], v[0:1], off glc
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
@@ -386,14 +372,7 @@ define <4 x half> @vec_8xf16_extract_4xf16(<8 x half> addrspace(1) * %p0, <8 x h
 ; GFX9-NEXT:    s_cbranch_execz .LBB2_3
 ; GFX9-NEXT:    s_branch .LBB2_4
 ; GFX9-NEXT:  .LBB2_2:
-; GFX9-NEXT:    s_mov_b32 s8, 0
-; GFX9-NEXT:    s_mov_b32 s9, s8
-; GFX9-NEXT:    s_mov_b32 s10, s8
-; GFX9-NEXT:    s_mov_b32 s11, s8
-; GFX9-NEXT:    v_mov_b32_e32 v2, s8
-; GFX9-NEXT:    v_mov_b32_e32 v3, s9
-; GFX9-NEXT:    v_mov_b32_e32 v4, s10
-; GFX9-NEXT:    v_mov_b32_e32 v5, s11
+; GFX9-NEXT:    ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
 ; GFX9-NEXT:  .LBB2_3: ; %T
 ; GFX9-NEXT:    global_load_dwordx4 v[2:5], v[0:1], off glc
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
@@ -567,22 +546,7 @@ define <4 x i16> @vec_16xi16_extract_4xi16(<16 x i16> addrspace(1) * %p0, <16 x
 ; GFX9-NEXT:    s_cbranch_execz .LBB3_3
 ; GFX9-NEXT:    s_branch .LBB3_4
 ; GFX9-NEXT:  .LBB3_2:
-; GFX9-NEXT:    s_mov_b32 s8, 0
-; GFX9-NEXT:    s_mov_b32 s9, s8
-; GFX9-NEXT:    s_mov_b32 s10, s8
-; GFX9-NEXT:    s_mov_b32 s11, s8
-; GFX9-NEXT:    s_mov_b32 s12, s8
-; GFX9-NEXT:    s_mov_b32 s13, s8
-; GFX9-NEXT:    s_mov_b32 s14, s8
-; GFX9-NEXT:    s_mov_b32 s15, s8
-; GFX9-NEXT:    v_mov_b32_e32 v4, s8
-; GFX9-NEXT:    v_mov_b32_e32 v5, s9
-; GFX9-NEXT:    v_mov_b32_e32 v6, s10
-; GFX9-NEXT:    v_mov_b32_e32 v7, s11
-; GFX9-NEXT:    v_mov_b32_e32 v8, s12
-; GFX9-NEXT:    v_mov_b32_e32 v9, s13
-; GFX9-NEXT:    v_mov_b32_e32 v10, s14
-; GFX9-NEXT:    v_mov_b32_e32 v11, s15
+; GFX9-NEXT:    ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11
 ; GFX9-NEXT:  .LBB3_3: ; %T
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    global_load_dwordx4 v[2:5], v[0:1], off offset:16 glc
@@ -759,22 +723,7 @@ define <4 x i16> @vec_16xi16_extract_4xi16_2(<16 x i16> addrspace(1) * %p0, <16
 ; GFX9-NEXT:    s_cbranch_execz .LBB4_3
 ; GFX9-NEXT:    s_branch .LBB4_4
 ; GFX9-NEXT:  .LBB4_2:
-; GFX9-NEXT:    s_mov_b32 s8, 0
-; GFX9-NEXT:    s_mov_b32 s9, s8
-; GFX9-NEXT:    s_mov_b32 s10, s8
-; GFX9-NEXT:    s_mov_b32 s11, s8
-; GFX9-NEXT:    s_mov_b32 s12, s8
-; GFX9-NEXT:    s_mov_b32 s13, s8
-; GFX9-NEXT:    s_mov_b32 s14, s8
-; GFX9-NEXT:    s_mov_b32 s15, s8
-; GFX9-NEXT:    v_mov_b32_e32 v4, s8
-; GFX9-NEXT:    v_mov_b32_e32 v5, s9
-; GFX9-NEXT:    v_mov_b32_e32 v6, s10
-; GFX9-NEXT:    v_mov_b32_e32 v7, s11
-; GFX9-NEXT:    v_mov_b32_e32 v8, s12
-; GFX9-NEXT:    v_mov_b32_e32 v9, s13
-; GFX9-NEXT:    v_mov_b32_e32 v10, s14
-; GFX9-NEXT:    v_mov_b32_e32 v11, s15
+; GFX9-NEXT:    ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11
 ; GFX9-NEXT:  .LBB4_3: ; %T
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    global_load_dwordx4 v[2:5], v[0:1], off offset:16 glc
@@ -949,22 +898,7 @@ define <4 x half> @vec_16xf16_extract_4xf16(<16 x half> addrspace(1) * %p0, <16
 ; GFX9-NEXT:    s_cbranch_execz .LBB5_3
 ; GFX9-NEXT:    s_branch .LBB5_4
 ; GFX9-NEXT:  .LBB5_2:
-; GFX9-NEXT:    s_mov_b32 s8, 0
-; GFX9-NEXT:    s_mov_b32 s9, s8
-; GFX9-NEXT:    s_mov_b32 s10, s8
-; GFX9-NEXT:    s_mov_b32 s11, s8
-; GFX9-NEXT:    s_mov_b32 s12, s8
-; GFX9-NEXT:    s_mov_b32 s13, s8
-; GFX9-NEXT:    s_mov_b32 s14, s8
-; GFX9-NEXT:    s_mov_b32 s15, s8
-; GFX9-NEXT:    v_mov_b32_e32 v4, s8
-; GFX9-NEXT:    v_mov_b32_e32 v5, s9
-; GFX9-NEXT:    v_mov_b32_e32 v6, s10
-; GFX9-NEXT:    v_mov_b32_e32 v7, s11
-; GFX9-NEXT:    v_mov_b32_e32 v8, s12
-; GFX9-NEXT:    v_mov_b32_e32 v9, s13
-; GFX9-NEXT:    v_mov_b32_e32 v10, s14
-; GFX9-NEXT:    v_mov_b32_e32 v11, s15
+; GFX9-NEXT:    ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11
 ; GFX9-NEXT:  .LBB5_3: ; %T
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    global_load_dwordx4 v[2:5], v[0:1], off offset:16 glc
diff --git a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll
index cc4ece6c7059..f742d2c0bda4 100644
--- a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll
+++ b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll
@@ -374,18 +374,10 @@ define <4 x float> @insertelement_to_sgpr() nounwind {
 ; GCN-LABEL: insertelement_to_sgpr:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    s_load_dwordx4 s[12:15], s[4:5], 0x0
+; GCN-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x0
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN-NEXT:    s_mov_b32 s12, 0
-; GCN-NEXT:    s_mov_b32 s4, s12
-; GCN-NEXT:    s_mov_b32 s5, s12
-; GCN-NEXT:    s_mov_b32 s6, s12
-; GCN-NEXT:    s_mov_b32 s7, s12
-; GCN-NEXT:    s_mov_b32 s8, s12
-; GCN-NEXT:    s_mov_b32 s9, s12
-; GCN-NEXT:    s_mov_b32 s10, s12
-; GCN-NEXT:    s_mov_b32 s11, s12
-; GCN-NEXT:    image_gather4_lz v[0:3], v[0:1], s[4:11], s[12:15] dmask:0x1
+; GCN-NEXT:    s_mov_b32 s4, 0
+; GCN-NEXT:    image_gather4_lz v[0:3], v[0:1], s[4:11], s[4:7] dmask:0x1
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %tmp = load <4 x i32>, <4 x i32> addrspace(4)* undef
diff --git a/llvm/test/CodeGen/AMDGPU/select-undef.ll b/llvm/test/CodeGen/AMDGPU/select-undef.ll
index 6597d6784e0c..f02cd3fc5e4e 100644
--- a/llvm/test/CodeGen/AMDGPU/select-undef.ll
+++ b/llvm/test/CodeGen/AMDGPU/select-undef.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=amdgcn-amd-amdhsa < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN %s
 
 ; GCN-LABEL: {{^}}select_undef_lhs:
 ; GCN: s_waitcnt
@@ -43,3 +43,220 @@ define void @select_undef_n2(float addrspace(1)* %a, i32 %c) {
 }
 
 declare float @llvm.amdgcn.rcp.f32(float)
+
+
+; Make sure the vector undef isn't lowered into 0s.
+; GCN-LABEL: {{^}}undef_v6f32:
+; GCN-NOT: v_mov_b32_e32 v{{[0-9]+}}, 0
+; GCN-NOT: s_mov_b32 s{{[0-9]+}}, 0
+; GCN: s_cbranch_vccnz
+define amdgpu_kernel void @undef_v6f32(<6 x float> addrspace(3)* %ptr, i1 %cond) {
+entry:
+  br label %loop
+
+loop:
+  %phi = phi <6 x float> [ undef, %entry ], [ %add, %loop ]
+  %load = load volatile <6 x float>, <6 x float> addrspace(3)* undef
+  %add = fadd <6 x float> %load, %phi
+  br i1 %cond, label %loop, label %ret
+
+ret:
+  store volatile <6 x float> %add, <6 x float> addrspace(3)* undef
+  ret void
+}
+
+; GCN-LABEL: {{^}}undef_v6i32:
+; GCN-NOT: v_mov_b32_e32 v{{[0-9]+}}, 0
+; GCN-NOT: s_mov_b32 s{{[0-9]+}}, 0
+; GCN: s_cbranch_vccnz
+define amdgpu_kernel void @undef_v6i32(<6 x i32> addrspace(3)* %ptr, i1 %cond) {
+entry:
+  br label %loop
+
+loop:
+  %phi = phi <6 x i32> [ undef, %entry ], [ %add, %loop ]
+  %load = load volatile <6 x i32>, <6 x i32> addrspace(3)* undef
+  %add = add <6 x i32> %load, %phi
+  br i1 %cond, label %loop, label %ret
+
+ret:
+  store volatile <6 x i32> %add, <6 x i32> addrspace(3)* undef
+  ret void
+}
+
+; Make sure the vector undef isn't lowered into 0s.
+; GCN-LABEL: {{^}}undef_v5f32:
+; GCN-NOT: v_mov_b32_e32 v{{[0-9]+}}, 0
+; GCN-NOT: s_mov_b32 s{{[0-9]+}}, 0
+; GCN: s_cbranch_vccnz
+define amdgpu_kernel void @undef_v5f32(<5 x float> addrspace(3)* %ptr, i1 %cond) {
+entry:
+  br label %loop
+
+loop:
+  %phi = phi <5 x float> [ undef, %entry ], [ %add, %loop ]
+  %load = load volatile <5 x float>, <5 x float> addrspace(3)* undef
+  %add = fadd <5 x float> %load, %phi
+  br i1 %cond, label %loop, label %ret
+
+ret:
+  store volatile <5 x float> %add, <5 x float> addrspace(3)* undef
+  ret void
+}
+
+; GCN-LABEL: {{^}}undef_v5i32:
+; GCN-NOT: v_mov_b32_e32 v{{[0-9]+}}, 0
+; GCN-NOT: s_mov_b32 s{{[0-9]+}}, 0
+; GCN: s_cbranch_vccnz
+define amdgpu_kernel void @undef_v5i32(<5 x i32> addrspace(3)* %ptr, i1 %cond) {
+entry:
+  br label %loop
+
+loop:
+  %phi = phi <5 x i32> [ undef, %entry ], [ %add, %loop ]
+  %load = load volatile <5 x i32>, <5 x i32> addrspace(3)* undef
+  %add = add <5 x i32> %load, %phi
+  br i1 %cond, label %loop, label %ret
+
+ret:
+  store volatile <5 x i32> %add, <5 x i32> addrspace(3)* undef
+  ret void
+}
+
+; Make sure the vector undef isn't lowered into 0s.
+; GCN-LABEL: {{^}}undef_v3f64:
+; GCN-NOT: v_mov_b32_e32 v{{[0-9]+}}, 0
+; GCN-NOT: s_mov_b32 s{{[0-9]+}}, 0
+; GCN: s_cbranch_vccnz
+define amdgpu_kernel void @undef_v3f64(<3 x double> addrspace(3)* %ptr, i1 %cond) {
+entry:
+  br label %loop
+
+loop:
+  %phi = phi <3 x double> [ undef, %entry ], [ %add, %loop ]
+  %load = load volatile <3 x double>, <3 x double> addrspace(3)* %ptr
+  %add = fadd <3 x double> %load, %phi
+  br i1 %cond, label %loop, label %ret
+
+ret:
+  store volatile <3 x double> %add, <3 x double> addrspace(3)* %ptr
+  ret void
+}
+
+; GCN-LABEL: {{^}}undef_v3i64:
+; GCN-NOT: v_mov_b32_e32 v{{[0-9]+}}, 0
+; GCN-NOT: s_mov_b32 s{{[0-9]+}}, 0
+; GCN: s_cbranch_vccnz
+define amdgpu_kernel void @undef_v3i64(<3 x i64> addrspace(3)* %ptr, i1 %cond) {
+entry:
+  br label %loop
+
+loop:
+  %phi = phi <3 x i64> [ undef, %entry ], [ %add, %loop ]
+  %load = load volatile <3 x i64>, <3 x i64> addrspace(3)* %ptr
+  %add = add <3 x i64> %load, %phi
+  br i1 %cond, label %loop, label %ret
+
+ret:
+  store volatile <3 x i64> %add, <3 x i64> addrspace(3)* %ptr
+  ret void
+}
+
+; Make sure the vector undef isn't lowered into 0s.
+; GCN-LABEL: {{^}}undef_v4f16:
+; GCN-NOT: v_mov_b32_e32 v{{[0-9]+}}, 0
+; GCN-NOT: s_mov_b32 s{{[0-9]+}}, 0
+; GCN: s_cbranch_vccnz
+define amdgpu_kernel void @undef_v4f16(<4 x half> addrspace(3)* %ptr, i1 %cond) {
+entry:
+  br label %loop
+
+loop:
+  %phi = phi <4 x half> [ undef, %entry ], [ %add, %loop ]
+  %load = load volatile <4 x half>, <4 x half> addrspace(3)* %ptr
+  %add = fadd <4 x half> %load, %phi
+  br i1 %cond, label %loop, label %ret
+
+ret:
+  store volatile <4 x half> %add, <4 x half> addrspace(3)* %ptr
+  ret void
+}
+
+; GCN-LABEL: {{^}}undef_v4i16:
+; GCN-NOT: v_mov_b32_e32 v{{[0-9]+}}, 0
+; GCN-NOT: s_mov_b32 s{{[0-9]+}}, 0
+; GCN: s_cbranch_vccnz
+define amdgpu_kernel void @undef_v4i16(<4 x i16> addrspace(3)* %ptr, i1 %cond) {
+entry:
+  br label %loop
+
+loop:
+  %phi = phi <4 x i16> [ undef, %entry ], [ %add, %loop ]
+  %load = load volatile <4 x i16>, <4 x i16> addrspace(3)* %ptr
+  %add = add <4 x i16> %load, %phi
+  br i1 %cond, label %loop, label %ret
+
+ret:
+  store volatile <4 x i16> %add, <4 x i16> addrspace(3)* %ptr
+  ret void
+}
+
+; Make sure the vector undef isn't lowered into 0s.
+; GCN-LABEL: {{^}}undef_v2f16:
+; GCN-NOT: v_mov_b32_e32 v{{[0-9]+}}, 0
+; GCN-NOT: s_mov_b32 s{{[0-9]+}}, 0
+; GCN: s_cbranch_vccnz
+define amdgpu_kernel void @undef_v2f16(<2 x half> addrspace(3)* %ptr, i1 %cond) {
+entry:
+  br label %loop
+
+loop:
+  %phi = phi <2 x half> [ undef, %entry ], [ %add, %loop ]
+  %load = load volatile <2 x half>, <2 x half> addrspace(3)* %ptr
+  %add = fadd <2 x half> %load, %phi
+  br i1 %cond, label %loop, label %ret
+
+ret:
+  store volatile <2 x half> %add, <2 x half> addrspace(3)* %ptr
+  ret void
+}
+
+; GCN-LABEL: {{^}}undef_v2i16:
+; GCN-NOT: v_mov_b32_e32 v{{[0-9]+}}, 0
+; GCN-NOT: s_mov_b32 s{{[0-9]+}}, 0
+; GCN: s_cbranch_vccnz
+define amdgpu_kernel void @undef_v2i16(<2 x i16> addrspace(3)* %ptr, i1 %cond) {
+entry:
+  br label %loop
+
+loop:
+  %phi = phi <2 x i16> [ undef, %entry ], [ %add, %loop ]
+  %load = load volatile <2 x i16>, <2 x i16> addrspace(3)* %ptr
+  %add = add <2 x i16> %load, %phi
+  br i1 %cond, label %loop, label %ret
+
+ret:
+  store volatile <2 x i16> %add, <2 x i16> addrspace(3)* %ptr
+  ret void
+}
+
+; We were expanding undef vectors into zero vectors. Optimizations
+; would then see we used no elements of the vector, and reform the
+; undef vector resulting in a combiner loop.
+; GCN-LABEL: {{^}}inf_loop_undef_vector:
+; GCN: s_waitcnt
+; GCN-NEXT: v_mad_u64_u32
+; GCN-NEXT: v_mul_lo_u32
+; GCN-NEXT: v_mul_lo_u32
+; GCN-NEXT: v_add3_u32
+; GCN-NEXT: global_store_dwordx2
+define void @inf_loop_undef_vector(<6 x float> %arg, float %arg1, i64 %arg2) {
+  %i = insertelement <6 x float> %arg, float %arg1, i64 2
+  %i3 = bitcast <6 x float> %i to <3 x i64>
+  %i4 = extractelement <3 x i64> %i3, i64 0
+  %i5 = extractelement <3 x i64> %i3, i64 1
+  %i6 = mul i64 %i5, %arg2
+  %i7 = add i64 %i6, %i4
+  store volatile i64 %i7, i64 addrspace(1)* undef, align 4
+  ret void
+}
diff --git a/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll b/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll
index ada6c1da04e2..7080c84f7b50 100644
--- a/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll
+++ b/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll
@@ -1397,28 +1397,20 @@ bb7:                                              ; preds = %bb4
 define amdgpu_ps void @if_after_kill_block(float %arg, float %arg1, float %arg2, float %arg3) #0 {
 ; SI-LABEL: if_after_kill_block:
 ; SI:       ; %bb.0: ; %bb
-; SI-NEXT:    s_mov_b64 s[2:3], exec
+; SI-NEXT:    s_mov_b64 s[0:1], exec
 ; SI-NEXT:    s_wqm_b64 exec, exec
-; SI-NEXT:    s_mov_b32 s0, 0
 ; SI-NEXT:    v_cmp_nle_f32_e32 vcc, 0, v1
-; SI-NEXT:    s_and_saveexec_b64 s[4:5], vcc
-; SI-NEXT:    s_xor_b64 s[4:5], exec, s[4:5]
+; SI-NEXT:    s_and_saveexec_b64 s[2:3], vcc
+; SI-NEXT:    s_xor_b64 s[2:3], exec, s[2:3]
 ; SI-NEXT:    s_cbranch_execz .LBB13_3
 ; SI-NEXT:  ; %bb.1: ; %bb3
 ; SI-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v0
-; SI-NEXT:    s_andn2_b64 s[2:3], s[2:3], vcc
+; SI-NEXT:    s_andn2_b64 s[0:1], s[0:1], vcc
 ; SI-NEXT:    s_cbranch_scc0 .LBB13_6
 ; SI-NEXT:  ; %bb.2: ; %bb3
 ; SI-NEXT:    s_andn2_b64 exec, exec, vcc
 ; SI-NEXT:  .LBB13_3: ; %bb4
-; SI-NEXT:    s_or_b64 exec, exec, s[4:5]
-; SI-NEXT:    s_mov_b32 s1, s0
-; SI-NEXT:    s_mov_b32 s2, s0
-; SI-NEXT:    s_mov_b32 s3, s0
-; SI-NEXT:    s_mov_b32 s4, s0
-; SI-NEXT:    s_mov_b32 s5, s0
-; SI-NEXT:    s_mov_b32 s6, s0
-; SI-NEXT:    s_mov_b32 s7, s0
+; SI-NEXT:    s_or_b64 exec, exec, s[2:3]
 ; SI-NEXT:    image_sample_c v0, v[2:3], s[0:7], s[0:3] dmask:0x10
 ; SI-NEXT:    s_waitcnt vmcnt(0)
 ; SI-NEXT:    v_cmp_neq_f32_e32 vcc, 0, v0
@@ -1439,28 +1431,20 @@ define amdgpu_ps void @if_after_kill_block(float %arg, float %arg1, float %arg2,
 ;
 ; GFX10-WAVE64-LABEL: if_after_kill_block:
 ; GFX10-WAVE64:       ; %bb.0: ; %bb
-; GFX10-WAVE64-NEXT:    s_mov_b64 s[2:3], exec
+; GFX10-WAVE64-NEXT:    s_mov_b64 s[0:1], exec
 ; GFX10-WAVE64-NEXT:    s_wqm_b64 exec, exec
 ; GFX10-WAVE64-NEXT:    v_cmp_nle_f32_e32 vcc, 0, v1
-; GFX10-WAVE64-NEXT:    s_mov_b32 s0, 0
-; GFX10-WAVE64-NEXT:    s_and_saveexec_b64 s[4:5], vcc
-; GFX10-WAVE64-NEXT:    s_xor_b64 s[4:5], exec, s[4:5]
+; GFX10-WAVE64-NEXT:    s_and_saveexec_b64 s[2:3], vcc
+; GFX10-WAVE64-NEXT:    s_xor_b64 s[2:3], exec, s[2:3]
 ; GFX10-WAVE64-NEXT:    s_cbranch_execz .LBB13_3
 ; GFX10-WAVE64-NEXT:  ; %bb.1: ; %bb3
 ; GFX10-WAVE64-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v0
-; GFX10-WAVE64-NEXT:    s_andn2_b64 s[2:3], s[2:3], vcc
+; GFX10-WAVE64-NEXT:    s_andn2_b64 s[0:1], s[0:1], vcc
 ; GFX10-WAVE64-NEXT:    s_cbranch_scc0 .LBB13_6
 ; GFX10-WAVE64-NEXT:  ; %bb.2: ; %bb3
 ; GFX10-WAVE64-NEXT:    s_andn2_b64 exec, exec, vcc
 ; GFX10-WAVE64-NEXT:  .LBB13_3: ; %bb4
-; GFX10-WAVE64-NEXT:    s_or_b64 exec, exec, s[4:5]
-; GFX10-WAVE64-NEXT:    s_mov_b32 s1, s0
-; GFX10-WAVE64-NEXT:    s_mov_b32 s2, s0
-; GFX10-WAVE64-NEXT:    s_mov_b32 s3, s0
-; GFX10-WAVE64-NEXT:    s_mov_b32 s4, s0
-; GFX10-WAVE64-NEXT:    s_mov_b32 s5, s0
-; GFX10-WAVE64-NEXT:    s_mov_b32 s6, s0
-; GFX10-WAVE64-NEXT:    s_mov_b32 s7, s0
+; GFX10-WAVE64-NEXT:    s_or_b64 exec, exec, s[2:3]
 ; GFX10-WAVE64-NEXT:    image_sample_c v0, v[2:3], s[0:7], s[0:3] dmask:0x10 dim:SQ_RSRC_IMG_1D
 ; GFX10-WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-WAVE64-NEXT:    v_cmp_neq_f32_e32 vcc, 0, v0
@@ -1479,28 +1463,20 @@ define amdgpu_ps void @if_after_kill_block(float %arg, float %arg1, float %arg2,
 ;
 ; GFX10-WAVE32-LABEL: if_after_kill_block:
 ; GFX10-WAVE32:       ; %bb.0: ; %bb
-; GFX10-WAVE32-NEXT:    s_mov_b32 s1, exec_lo
+; GFX10-WAVE32-NEXT:    s_mov_b32 s0, exec_lo
 ; GFX10-WAVE32-NEXT:    s_wqm_b32 exec_lo, exec_lo
 ; GFX10-WAVE32-NEXT:    v_cmp_nle_f32_e32 vcc_lo, 0, v1
-; GFX10-WAVE32-NEXT:    s_mov_b32 s0, 0
-; GFX10-WAVE32-NEXT:    s_and_saveexec_b32 s2, vcc_lo
-; GFX10-WAVE32-NEXT:    s_xor_b32 s2, exec_lo, s2
+; GFX10-WAVE32-NEXT:    s_and_saveexec_b32 s1, vcc_lo
+; GFX10-WAVE32-NEXT:    s_xor_b32 s1, exec_lo, s1
 ; GFX10-WAVE32-NEXT:    s_cbranch_execz .LBB13_3
 ; GFX10-WAVE32-NEXT:  ; %bb.1: ; %bb3
 ; GFX10-WAVE32-NEXT:    v_cmp_ngt_f32_e32 vcc_lo, 0, v0
-; GFX10-WAVE32-NEXT:    s_andn2_b32 s1, s1, vcc_lo
+; GFX10-WAVE32-NEXT:    s_andn2_b32 s0, s0, vcc_lo
 ; GFX10-WAVE32-NEXT:    s_cbranch_scc0 .LBB13_6
 ; GFX10-WAVE32-NEXT:  ; %bb.2: ; %bb3
 ; GFX10-WAVE32-NEXT:    s_andn2_b32 exec_lo, exec_lo, vcc_lo
 ; GFX10-WAVE32-NEXT:  .LBB13_3: ; %bb4
-; GFX10-WAVE32-NEXT:    s_or_b32 exec_lo, exec_lo, s2
-; GFX10-WAVE32-NEXT:    s_mov_b32 s1, s0
-; GFX10-WAVE32-NEXT:    s_mov_b32 s2, s0
-; GFX10-WAVE32-NEXT:    s_mov_b32 s3, s0
-; GFX10-WAVE32-NEXT:    s_mov_b32 s4, s0
-; GFX10-WAVE32-NEXT:    s_mov_b32 s5, s0
-; GFX10-WAVE32-NEXT:    s_mov_b32 s6, s0
-; GFX10-WAVE32-NEXT:    s_mov_b32 s7, s0
+; GFX10-WAVE32-NEXT:    s_or_b32 exec_lo, exec_lo, s1
 ; GFX10-WAVE32-NEXT:    image_sample_c v0, v[2:3], s[0:7], s[0:3] dmask:0x10 dim:SQ_RSRC_IMG_1D
 ; GFX10-WAVE32-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-WAVE32-NEXT:    v_cmp_neq_f32_e32 vcc_lo, 0, v0
@@ -1519,29 +1495,22 @@ define amdgpu_ps void @if_after_kill_block(float %arg, float %arg1, float %arg2,
 ;
 ; GFX11-LABEL: if_after_kill_block:
 ; GFX11:       ; %bb.0: ; %bb
-; GFX11-NEXT:    s_mov_b64 s[2:3], exec
+; GFX11-NEXT:    s_mov_b64 s[0:1], exec
 ; GFX11-NEXT:    s_wqm_b64 exec, exec
-; GFX11-NEXT:    s_mov_b32 s0, 0
-; GFX11-NEXT:    s_mov_b64 s[4:5], exec
+; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX11-NEXT:    v_cmpx_nle_f32_e32 0, v1
-; GFX11-NEXT:    s_xor_b64 s[4:5], exec, s[4:5]
+; GFX11-NEXT:    s_xor_b64 s[2:3], exec, s[2:3]
 ; GFX11-NEXT:    s_cbranch_execz .LBB13_3
 ; GFX11-NEXT:  ; %bb.1: ; %bb3
 ; GFX11-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v0
-; GFX11-NEXT:    s_and_not1_b64 s[2:3], s[2:3], vcc
+; GFX11-NEXT:    s_and_not1_b64 s[0:1], s[0:1], vcc
 ; GFX11-NEXT:    s_cbranch_scc0 .LBB13_6
 ; GFX11-NEXT:  ; %bb.2: ; %bb3
 ; GFX11-NEXT:    s_and_not1_b64 exec, exec, vcc
 ; GFX11-NEXT:  .LBB13_3: ; %bb4
 ; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX11-NEXT:    s_or_b64 exec, exec, s[4:5]
-; GFX11-NEXT:    s_mov_b32 s1, s0
-; GFX11-NEXT:    s_mov_b32 s2, s0
-; GFX11-NEXT:    s_mov_b32 s3, s0
-; GFX11-NEXT:    s_mov_b32 s4, s0
-; GFX11-NEXT:    s_mov_b32 s5, s0
-; GFX11-NEXT:    s_mov_b32 s6, s0
-; GFX11-NEXT:    s_mov_b32 s7, s0
+; GFX11-NEXT:    s_or_b64 exec, exec, s[2:3]
 ; GFX11-NEXT:    image_sample_c v0, v[2:3], s[0:7], s[0:3] dmask:0x10 dim:SQ_RSRC_IMG_1D
 ; GFX11-NEXT:    s_mov_b64 s[0:1], exec
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
@@ -1584,19 +1553,11 @@ bb9:                                              ; preds = %bb4
 define amdgpu_ps void @cbranch_kill(i32 inreg %0, float %val0, float %val1) {
 ; SI-LABEL: cbranch_kill:
 ; SI:       ; %bb.0: ; %.entry
-; SI-NEXT:    s_mov_b32 s4, 0
 ; SI-NEXT:    s_mov_b64 s[0:1], exec
 ; SI-NEXT:    v_mov_b32_e32 v4, 0
 ; SI-NEXT:    v_mov_b32_e32 v2, v1
 ; SI-NEXT:    v_mov_b32_e32 v3, v1
-; SI-NEXT:    s_mov_b32 s5, s4
-; SI-NEXT:    s_mov_b32 s6, s4
-; SI-NEXT:    s_mov_b32 s7, s4
-; SI-NEXT:    s_mov_b32 s8, s4
-; SI-NEXT:    s_mov_b32 s9, s4
-; SI-NEXT:    s_mov_b32 s10, s4
-; SI-NEXT:    s_mov_b32 s11, s4
-; SI-NEXT:    image_sample_l v1, v[1:4], s[4:11], s[0:3] dmask:0x1 da
+; SI-NEXT:    image_sample_l v1, v[1:4], s[0:7], s[0:3] dmask:0x1 da
 ; SI-NEXT:    s_waitcnt vmcnt(0)
 ; SI-NEXT:    v_cmp_ge_f32_e32 vcc, 0, v1
 ; SI-NEXT:    s_and_saveexec_b64 s[2:3], vcc
@@ -1627,16 +1588,8 @@ define amdgpu_ps void @cbranch_kill(i32 inreg %0, float %val0, float %val1) {
 ; GFX10-WAVE64-LABEL: cbranch_kill:
 ; GFX10-WAVE64:       ; %bb.0: ; %.entry
 ; GFX10-WAVE64-NEXT:    v_mov_b32_e32 v2, 0
-; GFX10-WAVE64-NEXT:    s_mov_b32 s4, 0
 ; GFX10-WAVE64-NEXT:    s_mov_b64 s[0:1], exec
-; GFX10-WAVE64-NEXT:    s_mov_b32 s5, s4
-; GFX10-WAVE64-NEXT:    s_mov_b32 s6, s4
-; GFX10-WAVE64-NEXT:    s_mov_b32 s7, s4
-; GFX10-WAVE64-NEXT:    s_mov_b32 s8, s4
-; GFX10-WAVE64-NEXT:    s_mov_b32 s9, s4
-; GFX10-WAVE64-NEXT:    s_mov_b32 s10, s4
-; GFX10-WAVE64-NEXT:    s_mov_b32 s11, s4
-; GFX10-WAVE64-NEXT:    image_sample_l v1, [v1, v1, v1, v2], s[4:11], s[0:3] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY
+; GFX10-WAVE64-NEXT:    image_sample_l v1, [v1, v1, v1, v2], s[0:7], s[0:3] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY
 ; GFX10-WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-WAVE64-NEXT:    v_cmp_ge_f32_e32 vcc, 0, v1
 ; GFX10-WAVE64-NEXT:    s_and_saveexec_b64 s[2:3], vcc
@@ -1667,16 +1620,8 @@ define amdgpu_ps void @cbranch_kill(i32 inreg %0, float %val0, float %val1) {
 ; GFX10-WAVE32-LABEL: cbranch_kill:
 ; GFX10-WAVE32:       ; %bb.0: ; %.entry
 ; GFX10-WAVE32-NEXT:    v_mov_b32_e32 v2, 0
-; GFX10-WAVE32-NEXT:    s_mov_b32 s4, 0
 ; GFX10-WAVE32-NEXT:    s_mov_b32 s0, exec_lo
-; GFX10-WAVE32-NEXT:    s_mov_b32 s5, s4
-; GFX10-WAVE32-NEXT:    s_mov_b32 s6, s4
-; GFX10-WAVE32-NEXT:    s_mov_b32 s7, s4
-; GFX10-WAVE32-NEXT:    s_mov_b32 s8, s4
-; GFX10-WAVE32-NEXT:    s_mov_b32 s9, s4
-; GFX10-WAVE32-NEXT:    s_mov_b32 s10, s4
-; GFX10-WAVE32-NEXT:    s_mov_b32 s11, s4
-; GFX10-WAVE32-NEXT:    image_sample_l v1, [v1, v1, v1, v2], s[4:11], s[0:3] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY
+; GFX10-WAVE32-NEXT:    image_sample_l v1, [v1, v1, v1, v2], s[0:7], s[0:3] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY
 ; GFX10-WAVE32-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-WAVE32-NEXT:    v_cmp_ge_f32_e32 vcc_lo, 0, v1
 ; GFX10-WAVE32-NEXT:    s_and_saveexec_b32 s1, vcc_lo
@@ -1707,16 +1652,8 @@ define amdgpu_ps void @cbranch_kill(i32 inreg %0, float %val0, float %val1) {
 ; GFX11-LABEL: cbranch_kill:
 ; GFX11:       ; %bb.0: ; %.entry
 ; GFX11-NEXT:    v_mov_b32_e32 v2, 0
-; GFX11-NEXT:    s_mov_b32 s4, 0
 ; GFX11-NEXT:    s_mov_b64 s[0:1], exec
-; GFX11-NEXT:    s_mov_b32 s5, s4
-; GFX11-NEXT:    s_mov_b32 s6, s4
-; GFX11-NEXT:    s_mov_b32 s7, s4
-; GFX11-NEXT:    s_mov_b32 s8, s4
-; GFX11-NEXT:    s_mov_b32 s9, s4
-; GFX11-NEXT:    s_mov_b32 s10, s4
-; GFX11-NEXT:    s_mov_b32 s11, s4
-; GFX11-NEXT:    image_sample_l v1, [v1, v1, v1, v2], s[4:11], s[0:3] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY
+; GFX11-NEXT:    image_sample_l v1, [v1, v1, v1, v2], s[0:7], s[0:3] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY
 ; GFX11-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    v_cmpx_ge_f32_e32 0, v1
diff --git a/llvm/test/CodeGen/AMDGPU/v1024.ll b/llvm/test/CodeGen/AMDGPU/v1024.ll
index a5e0454a3634..1326ba437f94 100644
--- a/llvm/test/CodeGen/AMDGPU/v1024.ll
+++ b/llvm/test/CodeGen/AMDGPU/v1024.ll
@@ -10,6 +10,7 @@ define amdgpu_kernel void @test_v1024() {
 entry:
   %alloca = alloca <32 x i32>, align 16, addrspace(5)
   %cast = bitcast <32 x i32> addrspace(5)* %alloca to i8 addrspace(5)*
+  call void @llvm.memset.p5i8.i32(i8 addrspace(5)* %cast, i8 0, i32 128, i1 false)
   br i1 undef, label %if.then.i.i, label %if.else.i
 
 if.then.i.i:                                      ; preds = %entry
@@ -24,6 +25,7 @@ if.then.i62.i:                                    ; preds = %if.else.i, %if.then
   ret void
 }
 
+declare void @llvm.memset.p5i8.i32(i8 addrspace(5)* nocapture readonly, i8, i32, i1 immarg)
 declare void @llvm.memcpy.p5i8.p5i8.i64(i8 addrspace(5)* nocapture writeonly, i8 addrspace(5)* nocapture readonly, i64, i1 immarg)
 
 declare void @llvm.memcpy.p1i8.p5i8.i64(i8 addrspace(1)* nocapture writeonly, i8 addrspace(5)* nocapture readonly, i64, i1 immarg)
diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-tuple-allocation.ll b/llvm/test/CodeGen/AMDGPU/vgpr-tuple-allocation.ll
index 5164b072a6dd..ed0de729dafd 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr-tuple-allocation.ll
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-tuple-allocation.ll
@@ -14,7 +14,6 @@ define <4 x float> @non_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp,
 ; GFX9-NEXT:    s_or_saveexec_b64 s[4:5], -1
 ; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
 ; GFX9-NEXT:    s_mov_b64 exec, s[4:5]
-; GFX9-NEXT:    s_mov_b32 s4, 0
 ; GFX9-NEXT:    v_writelane_b32 v40, s33, 2
 ; GFX9-NEXT:    s_mov_b32 s33, s32
 ; GFX9-NEXT:    v_mov_b32_e32 v36, v16
@@ -22,13 +21,6 @@ define <4 x float> @non_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp,
 ; GFX9-NEXT:    v_mov_b32_e32 v34, v14
 ; GFX9-NEXT:    v_mov_b32_e32 v33, v13
 ; GFX9-NEXT:    v_mov_b32_e32 v32, v12
-; GFX9-NEXT:    s_mov_b32 s5, s4
-; GFX9-NEXT:    s_mov_b32 s6, s4
-; GFX9-NEXT:    s_mov_b32 s7, s4
-; GFX9-NEXT:    s_mov_b32 s8, s4
-; GFX9-NEXT:    s_mov_b32 s9, s4
-; GFX9-NEXT:    s_mov_b32 s10, s4
-; GFX9-NEXT:    s_mov_b32 s11, s4
 ; GFX9-NEXT:    buffer_store_dword v41, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill
 ; GFX9-NEXT:    buffer_store_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
 ; GFX9-NEXT:    buffer_store_dword v43, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
@@ -82,16 +74,8 @@ define <4 x float> @non_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp,
 ; GFX10-NEXT:    v_mov_b32_e32 v34, v14
 ; GFX10-NEXT:    v_mov_b32_e32 v33, v13
 ; GFX10-NEXT:    v_mov_b32_e32 v32, v12
-; GFX10-NEXT:    s_mov_b32 s4, 0
 ; GFX10-NEXT:    v_writelane_b32 v40, s33, 2
 ; GFX10-NEXT:    s_mov_b32 s33, s32
-; GFX10-NEXT:    s_mov_b32 s5, s4
-; GFX10-NEXT:    s_mov_b32 s6, s4
-; GFX10-NEXT:    s_mov_b32 s7, s4
-; GFX10-NEXT:    s_mov_b32 s8, s4
-; GFX10-NEXT:    s_mov_b32 s9, s4
-; GFX10-NEXT:    s_mov_b32 s10, s4
-; GFX10-NEXT:    s_mov_b32 s11, s4
 ; GFX10-NEXT:    buffer_store_dword v41, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill
 ; GFX10-NEXT:    buffer_store_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
 ; GFX10-NEXT:    buffer_store_dword v43, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
@@ -145,16 +129,8 @@ define <4 x float> @non_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp,
 ; GFX11-NEXT:    v_dual_mov_b32 v36, v16 :: v_dual_mov_b32 v35, v15
 ; GFX11-NEXT:    v_dual_mov_b32 v34, v14 :: v_dual_mov_b32 v33, v13
 ; GFX11-NEXT:    v_mov_b32_e32 v32, v12
-; GFX11-NEXT:    s_mov_b32 s0, 0
 ; GFX11-NEXT:    v_writelane_b32 v40, s33, 2
 ; GFX11-NEXT:    s_mov_b32 s33, s32
-; GFX11-NEXT:    s_mov_b32 s1, s0
-; GFX11-NEXT:    s_mov_b32 s2, s0
-; GFX11-NEXT:    s_mov_b32 s3, s0
-; GFX11-NEXT:    s_mov_b32 s4, s0
-; GFX11-NEXT:    s_mov_b32 s5, s0
-; GFX11-NEXT:    s_mov_b32 s6, s0
-; GFX11-NEXT:    s_mov_b32 s7, s0
 ; GFX11-NEXT:    s_clause 0x3
 ; GFX11-NEXT:    scratch_store_b32 off, v41, s33 offset:12
 ; GFX11-NEXT:    scratch_store_b32 off, v42, s33 offset:8
@@ -225,65 +201,41 @@ define <4 x float> @call_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp,
 ; GFX9-NEXT:    s_or_saveexec_b64 s[4:5], -1
 ; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
 ; GFX9-NEXT:    s_mov_b64 exec, s[4:5]
-; GFX9-NEXT:    v_writelane_b32 v40, s33, 10
-; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
-; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
-; GFX9-NEXT:    v_writelane_b32 v40, s36, 2
-; GFX9-NEXT:    v_writelane_b32 v40, s37, 3
-; GFX9-NEXT:    v_writelane_b32 v40, s38, 4
-; GFX9-NEXT:    v_writelane_b32 v40, s39, 5
-; GFX9-NEXT:    v_writelane_b32 v40, s40, 6
-; GFX9-NEXT:    v_writelane_b32 v40, s41, 7
+; GFX9-NEXT:    v_writelane_b32 v40, s33, 2
 ; GFX9-NEXT:    s_mov_b32 s33, s32
-; GFX9-NEXT:    v_writelane_b32 v40, s42, 8
-; GFX9-NEXT:    s_mov_b32 s36, 0
 ; GFX9-NEXT:    buffer_store_dword v41, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill
 ; GFX9-NEXT:    buffer_store_dword v42, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill
 ; GFX9-NEXT:    buffer_store_dword v43, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
 ; GFX9-NEXT:    buffer_store_dword v44, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
 ; GFX9-NEXT:    buffer_store_dword v45, off, s[0:3], s33 ; 4-byte Folded Spill
-; GFX9-NEXT:    v_writelane_b32 v40, s43, 9
 ; GFX9-NEXT:    v_mov_b32_e32 v45, v16
 ; GFX9-NEXT:    v_mov_b32_e32 v44, v15
 ; GFX9-NEXT:    v_mov_b32_e32 v43, v14
 ; GFX9-NEXT:    v_mov_b32_e32 v42, v13
 ; GFX9-NEXT:    v_mov_b32_e32 v41, v12
-; GFX9-NEXT:    s_mov_b32 s37, s36
-; GFX9-NEXT:    s_mov_b32 s38, s36
-; GFX9-NEXT:    s_mov_b32 s39, s36
-; GFX9-NEXT:    s_mov_b32 s40, s36
-; GFX9-NEXT:    s_mov_b32 s41, s36
-; GFX9-NEXT:    s_mov_b32 s42, s36
-; GFX9-NEXT:    s_mov_b32 s43, s36
-; GFX9-NEXT:    image_gather4_c_b_cl v[0:3], v[41:45], s[36:43], s[4:7] dmask:0x1
+; GFX9-NEXT:    image_gather4_c_b_cl v[0:3], v[41:45], s[4:11], s[4:7] dmask:0x1
 ; GFX9-NEXT:    s_addk_i32 s32, 0x800
 ; GFX9-NEXT:    s_getpc_b64 s[4:5]
 ; GFX9-NEXT:    s_add_u32 s4, s4, extern_func@gotpcrel32@lo+4
 ; GFX9-NEXT:    s_addc_u32 s5, s5, extern_func@gotpcrel32@hi+12
 ; GFX9-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
+; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    global_store_dwordx4 v[0:1], v[0:3], off
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT:    image_gather4_c_b_cl v[0:3], v[41:45], s[36:43], s[4:7] dmask:0x1
+; GFX9-NEXT:    image_gather4_c_b_cl v[0:3], v[41:45], s[4:11], s[4:7] dmask:0x1
 ; GFX9-NEXT:    s_nop 0
 ; GFX9-NEXT:    buffer_load_dword v45, off, s[0:3], s33 ; 4-byte Folded Reload
 ; GFX9-NEXT:    buffer_load_dword v44, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
 ; GFX9-NEXT:    buffer_load_dword v43, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
 ; GFX9-NEXT:    buffer_load_dword v42, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload
 ; GFX9-NEXT:    buffer_load_dword v41, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload
-; GFX9-NEXT:    v_readlane_b32 s43, v40, 9
-; GFX9-NEXT:    v_readlane_b32 s42, v40, 8
-; GFX9-NEXT:    v_readlane_b32 s41, v40, 7
-; GFX9-NEXT:    v_readlane_b32 s40, v40, 6
-; GFX9-NEXT:    v_readlane_b32 s39, v40, 5
-; GFX9-NEXT:    v_readlane_b32 s38, v40, 4
-; GFX9-NEXT:    v_readlane_b32 s37, v40, 3
-; GFX9-NEXT:    v_readlane_b32 s36, v40, 2
 ; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
 ; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
 ; GFX9-NEXT:    s_addk_i32 s32, 0xf800
-; GFX9-NEXT:    v_readlane_b32 s33, v40, 10
+; GFX9-NEXT:    v_readlane_b32 s33, v40, 2
 ; GFX9-NEXT:    s_or_saveexec_b64 s[4:5], -1
 ; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
 ; GFX9-NEXT:    s_mov_b64 exec, s[4:5]
@@ -298,66 +250,42 @@ define <4 x float> @call_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp,
 ; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
 ; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
 ; GFX10-NEXT:    s_mov_b32 exec_lo, s4
-; GFX10-NEXT:    v_writelane_b32 v40, s33, 10
+; GFX10-NEXT:    v_writelane_b32 v40, s33, 2
 ; GFX10-NEXT:    s_mov_b32 s33, s32
 ; GFX10-NEXT:    buffer_store_dword v41, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill
 ; GFX10-NEXT:    buffer_store_dword v42, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill
 ; GFX10-NEXT:    buffer_store_dword v43, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
 ; GFX10-NEXT:    buffer_store_dword v44, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
 ; GFX10-NEXT:    buffer_store_dword v45, off, s[0:3], s33 ; 4-byte Folded Spill
+; GFX10-NEXT:    image_gather4_c_b_cl v[0:3], v[12:16], s[4:11], s[4:7] dmask:0x1 dim:SQ_RSRC_IMG_2D
 ; GFX10-NEXT:    s_addk_i32 s32, 0x400
+; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
+; GFX10-NEXT:    s_getpc_b64 s[4:5]
+; GFX10-NEXT:    s_add_u32 s4, s4, extern_func@gotpcrel32@lo+4
+; GFX10-NEXT:    s_addc_u32 s5, s5, extern_func@gotpcrel32@hi+12
 ; GFX10-NEXT:    v_writelane_b32 v40, s30, 0
+; GFX10-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
 ; GFX10-NEXT:    v_mov_b32_e32 v41, v16
 ; GFX10-NEXT:    v_mov_b32_e32 v42, v15
 ; GFX10-NEXT:    v_mov_b32_e32 v43, v14
-; GFX10-NEXT:    v_mov_b32_e32 v44, v13
 ; GFX10-NEXT:    v_writelane_b32 v40, s31, 1
+; GFX10-NEXT:    v_mov_b32_e32 v44, v13
 ; GFX10-NEXT:    v_mov_b32_e32 v45, v12
-; GFX10-NEXT:    v_writelane_b32 v40, s36, 2
-; GFX10-NEXT:    s_mov_b32 s36, 0
-; GFX10-NEXT:    v_writelane_b32 v40, s37, 3
-; GFX10-NEXT:    s_mov_b32 s37, s36
-; GFX10-NEXT:    v_writelane_b32 v40, s38, 4
-; GFX10-NEXT:    s_mov_b32 s38, s36
-; GFX10-NEXT:    v_writelane_b32 v40, s39, 5
-; GFX10-NEXT:    s_mov_b32 s39, s36
-; GFX10-NEXT:    v_writelane_b32 v40, s40, 6
-; GFX10-NEXT:    s_mov_b32 s40, s36
-; GFX10-NEXT:    v_writelane_b32 v40, s41, 7
-; GFX10-NEXT:    s_mov_b32 s41, s36
-; GFX10-NEXT:    v_writelane_b32 v40, s42, 8
-; GFX10-NEXT:    s_mov_b32 s42, s36
-; GFX10-NEXT:    v_writelane_b32 v40, s43, 9
-; GFX10-NEXT:    s_mov_b32 s43, s36
-; GFX10-NEXT:    image_gather4_c_b_cl v[0:3], v[12:16], s[36:43], s[4:7] dmask:0x1 dim:SQ_RSRC_IMG_2D
-; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
-; GFX10-NEXT:    s_getpc_b64 s[4:5]
-; GFX10-NEXT:    s_add_u32 s4, s4, extern_func@gotpcrel32@lo+4
-; GFX10-NEXT:    s_addc_u32 s5, s5, extern_func@gotpcrel32@hi+12
-; GFX10-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    global_store_dwordx4 v[0:1], v[0:3], off
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-NEXT:    s_swappc_b64 s[30:31], s[4:5]
-; GFX10-NEXT:    image_gather4_c_b_cl v[0:3], [v45, v44, v43, v42, v41], s[36:43], s[4:7] dmask:0x1 dim:SQ_RSRC_IMG_2D
+; GFX10-NEXT:    image_gather4_c_b_cl v[0:3], [v45, v44, v43, v42, v41], s[4:11], s[4:7] dmask:0x1 dim:SQ_RSRC_IMG_2D
 ; GFX10-NEXT:    s_clause 0x4
 ; GFX10-NEXT:    buffer_load_dword v45, off, s[0:3], s33
 ; GFX10-NEXT:    buffer_load_dword v44, off, s[0:3], s33 offset:4
 ; GFX10-NEXT:    buffer_load_dword v43, off, s[0:3], s33 offset:8
 ; GFX10-NEXT:    buffer_load_dword v42, off, s[0:3], s33 offset:12
 ; GFX10-NEXT:    buffer_load_dword v41, off, s[0:3], s33 offset:16
-; GFX10-NEXT:    v_readlane_b32 s43, v40, 9
-; GFX10-NEXT:    v_readlane_b32 s42, v40, 8
-; GFX10-NEXT:    v_readlane_b32 s41, v40, 7
-; GFX10-NEXT:    v_readlane_b32 s40, v40, 6
-; GFX10-NEXT:    v_readlane_b32 s39, v40, 5
-; GFX10-NEXT:    v_readlane_b32 s38, v40, 4
-; GFX10-NEXT:    v_readlane_b32 s37, v40, 3
-; GFX10-NEXT:    v_readlane_b32 s36, v40, 2
 ; GFX10-NEXT:    v_readlane_b32 s31, v40, 1
 ; GFX10-NEXT:    v_readlane_b32 s30, v40, 0
 ; GFX10-NEXT:    s_addk_i32 s32, 0xfc00
-; GFX10-NEXT:    v_readlane_b32 s33, v40, 10
+; GFX10-NEXT:    v_readlane_b32 s33, v40, 2
 ; GFX10-NEXT:    s_or_saveexec_b32 s4, -1
 ; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
 ; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
@@ -372,7 +300,7 @@ define <4 x float> @call_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp,
 ; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
 ; GFX11-NEXT:    scratch_store_b32 off, v40, s32 offset:20 ; 4-byte Folded Spill
 ; GFX11-NEXT:    s_mov_b32 exec_lo, s0
-; GFX11-NEXT:    v_writelane_b32 v40, s33, 10
+; GFX11-NEXT:    v_writelane_b32 v40, s33, 2
 ; GFX11-NEXT:    s_mov_b32 s33, s32
 ; GFX11-NEXT:    s_clause 0x4
 ; GFX11-NEXT:    scratch_store_b32 off, v41, s33 offset:16
@@ -380,56 +308,32 @@ define <4 x float> @call_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp,
 ; GFX11-NEXT:    scratch_store_b32 off, v43, s33 offset:8
 ; GFX11-NEXT:    scratch_store_b32 off, v44, s33 offset:4
 ; GFX11-NEXT:    scratch_store_b32 off, v45, s33
+; GFX11-NEXT:    image_gather4_c_b_cl v[0:3], v[12:16], s[0:7], s[0:3] dmask:0x1 dim:SQ_RSRC_IMG_2D
 ; GFX11-NEXT:    s_add_i32 s32, s32, 32
+; GFX11-NEXT:    s_getpc_b64 s[0:1]
+; GFX11-NEXT:    s_add_u32 s0, s0, extern_func@gotpcrel32@lo+4
+; GFX11-NEXT:    s_addc_u32 s1, s1, extern_func@gotpcrel32@hi+12
 ; GFX11-NEXT:    v_writelane_b32 v40, s30, 0
+; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
 ; GFX11-NEXT:    v_dual_mov_b32 v41, v16 :: v_dual_mov_b32 v42, v15
 ; GFX11-NEXT:    v_dual_mov_b32 v43, v14 :: v_dual_mov_b32 v44, v13
 ; GFX11-NEXT:    v_writelane_b32 v40, s31, 1
 ; GFX11-NEXT:    v_mov_b32_e32 v45, v12
-; GFX11-NEXT:    v_writelane_b32 v40, s36, 2
-; GFX11-NEXT:    s_mov_b32 s36, 0
-; GFX11-NEXT:    v_writelane_b32 v40, s37, 3
-; GFX11-NEXT:    s_mov_b32 s37, s36
-; GFX11-NEXT:    v_writelane_b32 v40, s38, 4
-; GFX11-NEXT:    s_mov_b32 s38, s36
-; GFX11-NEXT:    v_writelane_b32 v40, s39, 5
-; GFX11-NEXT:    s_mov_b32 s39, s36
-; GFX11-NEXT:    v_writelane_b32 v40, s40, 6
-; GFX11-NEXT:    s_mov_b32 s40, s36
-; GFX11-NEXT:    v_writelane_b32 v40, s41, 7
-; GFX11-NEXT:    s_mov_b32 s41, s36
-; GFX11-NEXT:    v_writelane_b32 v40, s42, 8
-; GFX11-NEXT:    s_mov_b32 s42, s36
-; GFX11-NEXT:    v_writelane_b32 v40, s43, 9
-; GFX11-NEXT:    s_mov_b32 s43, s36
-; GFX11-NEXT:    image_gather4_c_b_cl v[0:3], v[12:16], s[36:43], s[0:3] dmask:0x1 dim:SQ_RSRC_IMG_2D
-; GFX11-NEXT:    s_getpc_b64 s[0:1]
-; GFX11-NEXT:    s_add_u32 s0, s0, extern_func@gotpcrel32@lo+4
-; GFX11-NEXT:    s_addc_u32 s1, s1, extern_func@gotpcrel32@hi+12
-; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    global_store_b128 v[0:1], v[0:3], off
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
-; GFX11-NEXT:    image_gather4_c_b_cl v[0:3], [v45, v44, v43, v42, v41], s[36:43], s[0:3] dmask:0x1 dim:SQ_RSRC_IMG_2D
+; GFX11-NEXT:    image_gather4_c_b_cl v[0:3], [v45, v44, v43, v42, v41], s[0:7], s[0:3] dmask:0x1 dim:SQ_RSRC_IMG_2D
 ; GFX11-NEXT:    s_clause 0x4
 ; GFX11-NEXT:    scratch_load_b32 v45, off, s33
 ; GFX11-NEXT:    scratch_load_b32 v44, off, s33 offset:4
 ; GFX11-NEXT:    scratch_load_b32 v43, off, s33 offset:8
 ; GFX11-NEXT:    scratch_load_b32 v42, off, s33 offset:12
 ; GFX11-NEXT:    scratch_load_b32 v41, off, s33 offset:16
-; GFX11-NEXT:    v_readlane_b32 s43, v40, 9
-; GFX11-NEXT:    v_readlane_b32 s42, v40, 8
-; GFX11-NEXT:    v_readlane_b32 s41, v40, 7
-; GFX11-NEXT:    v_readlane_b32 s40, v40, 6
-; GFX11-NEXT:    v_readlane_b32 s39, v40, 5
-; GFX11-NEXT:    v_readlane_b32 s38, v40, 4
-; GFX11-NEXT:    v_readlane_b32 s37, v40, 3
-; GFX11-NEXT:    v_readlane_b32 s36, v40, 2
 ; GFX11-NEXT:    v_readlane_b32 s31, v40, 1
 ; GFX11-NEXT:    v_readlane_b32 s30, v40, 0
 ; GFX11-NEXT:    s_addk_i32 s32, 0xffe0
-; GFX11-NEXT:    v_readlane_b32 s33, v40, 10
+; GFX11-NEXT:    v_readlane_b32 s33, v40, 2
 ; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
 ; GFX11-NEXT:    scratch_load_b32 v40, off, s32 offset:20 ; 4-byte Folded Reload
 ; GFX11-NEXT:    s_mov_b32 exec_lo, s0
diff --git a/llvm/test/CodeGen/AMDGPU/wqm.ll b/llvm/test/CodeGen/AMDGPU/wqm.ll
index dc85462631d4..16c30174657a 100644
--- a/llvm/test/CodeGen/AMDGPU/wqm.ll
+++ b/llvm/test/CodeGen/AMDGPU/wqm.ll
@@ -1833,87 +1833,54 @@ main_body:
 define amdgpu_ps <4 x float> @test_loop_vcc(<4 x float> %in) nounwind {
 ; GFX9-W64-LABEL: test_loop_vcc:
 ; GFX9-W64:       ; %bb.0: ; %entry
-; GFX9-W64-NEXT:    s_mov_b64 s[8:9], exec
+; GFX9-W64-NEXT:    s_mov_b64 s[0:1], exec
 ; GFX9-W64-NEXT:    s_wqm_b64 exec, exec
 ; GFX9-W64-NEXT:    v_mov_b32_e32 v7, v3
 ; GFX9-W64-NEXT:    v_mov_b32_e32 v6, v2
 ; GFX9-W64-NEXT:    v_mov_b32_e32 v5, v1
 ; GFX9-W64-NEXT:    v_mov_b32_e32 v4, v0
-; GFX9-W64-NEXT:    s_and_b64 exec, exec, s[8:9]
-; GFX9-W64-NEXT:    s_mov_b32 s0, 0
-; GFX9-W64-NEXT:    s_mov_b32 s1, s0
-; GFX9-W64-NEXT:    s_mov_b32 s2, s0
-; GFX9-W64-NEXT:    s_mov_b32 s3, s0
-; GFX9-W64-NEXT:    s_mov_b32 s4, s0
-; GFX9-W64-NEXT:    s_mov_b32 s5, s0
-; GFX9-W64-NEXT:    s_mov_b32 s6, s0
-; GFX9-W64-NEXT:    s_mov_b32 s7, s0
+; GFX9-W64-NEXT:    s_and_b64 exec, exec, s[0:1]
 ; GFX9-W64-NEXT:    image_store v[4:7], v0, s[0:7] dmask:0xf unorm
 ; GFX9-W64-NEXT:    s_wqm_b64 exec, exec
 ; GFX9-W64-NEXT:    v_mov_b32_e32 v8, 0
-; GFX9-W64-NEXT:    s_mov_b32 s10, 0x40e00000
+; GFX9-W64-NEXT:    s_mov_b32 s4, 0x40e00000
 ; GFX9-W64-NEXT:    s_branch .LBB31_2
 ; GFX9-W64-NEXT:  .LBB31_1: ; %body
 ; GFX9-W64-NEXT:    ; in Loop: Header=BB31_2 Depth=1
-; GFX9-W64-NEXT:    s_mov_b32 s1, s0
-; GFX9-W64-NEXT:    s_mov_b32 s2, s0
-; GFX9-W64-NEXT:    s_mov_b32 s3, s0
-; GFX9-W64-NEXT:    s_mov_b32 s4, s0
-; GFX9-W64-NEXT:    s_mov_b32 s5, s0
-; GFX9-W64-NEXT:    s_mov_b32 s6, s0
-; GFX9-W64-NEXT:    s_mov_b32 s7, s0
 ; GFX9-W64-NEXT:    image_sample v[4:7], v0, s[0:7], s[0:3] dmask:0xf
 ; GFX9-W64-NEXT:    v_add_f32_e32 v8, 2.0, v8
-; GFX9-W64-NEXT:    s_mov_b64 s[2:3], 0
 ; GFX9-W64-NEXT:    s_cbranch_execz .LBB31_4
 ; GFX9-W64-NEXT:  .LBB31_2: ; %loop
 ; GFX9-W64-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GFX9-W64-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-W64-NEXT:    v_mov_b32_e32 v0, v4
-; GFX9-W64-NEXT:    v_cmp_lt_f32_e32 vcc, s10, v8
+; GFX9-W64-NEXT:    v_cmp_lt_f32_e32 vcc, s4, v8
 ; GFX9-W64-NEXT:    v_mov_b32_e32 v1, v5
 ; GFX9-W64-NEXT:    v_mov_b32_e32 v2, v6
 ; GFX9-W64-NEXT:    v_mov_b32_e32 v3, v7
 ; GFX9-W64-NEXT:    s_cbranch_vccz .LBB31_1
 ; GFX9-W64-NEXT:  ; %bb.3:
-; GFX9-W64-NEXT:    s_mov_b64 s[2:3], -1
 ; GFX9-W64-NEXT:    ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7
 ; GFX9-W64-NEXT:    ; implicit-def: $vgpr8
 ; GFX9-W64-NEXT:  .LBB31_4: ; %break
-; GFX9-W64-NEXT:    s_and_b64 exec, exec, s[8:9]
+; GFX9-W64-NEXT:    s_and_b64 exec, exec, s[0:1]
 ; GFX9-W64-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-W64-NEXT:    ; return to shader part epilog
 ;
 ; GFX10-W32-LABEL: test_loop_vcc:
 ; GFX10-W32:       ; %bb.0: ; %entry
-; GFX10-W32-NEXT:    s_mov_b32 s8, exec_lo
+; GFX10-W32-NEXT:    s_mov_b32 s0, exec_lo
 ; GFX10-W32-NEXT:    s_wqm_b32 exec_lo, exec_lo
 ; GFX10-W32-NEXT:    v_mov_b32_e32 v8, 0
-; GFX10-W32-NEXT:    s_mov_b32 s0, 0
-; GFX10-W32-NEXT:    s_mov_b32 s1, s0
-; GFX10-W32-NEXT:    s_mov_b32 s2, s0
-; GFX10-W32-NEXT:    s_mov_b32 s3, s0
-; GFX10-W32-NEXT:    s_mov_b32 s4, s0
-; GFX10-W32-NEXT:    s_mov_b32 s5, s0
-; GFX10-W32-NEXT:    s_and_b32 exec_lo, exec_lo, s8
-; GFX10-W32-NEXT:    s_mov_b32 s6, s0
-; GFX10-W32-NEXT:    s_mov_b32 s7, s0
+; GFX10-W32-NEXT:    s_and_b32 exec_lo, exec_lo, s0
 ; GFX10-W32-NEXT:    image_store v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm
 ; GFX10-W32-NEXT:    s_wqm_b32 exec_lo, exec_lo
 ; GFX10-W32-NEXT:    s_branch .LBB31_2
 ; GFX10-W32-NEXT:    .p2align 6
 ; GFX10-W32-NEXT:  .LBB31_1: ; %body
 ; GFX10-W32-NEXT:    ; in Loop: Header=BB31_2 Depth=1
-; GFX10-W32-NEXT:    s_mov_b32 s1, s0
-; GFX10-W32-NEXT:    s_mov_b32 s2, s0
-; GFX10-W32-NEXT:    s_mov_b32 s3, s0
-; GFX10-W32-NEXT:    s_mov_b32 s4, s0
-; GFX10-W32-NEXT:    s_mov_b32 s5, s0
-; GFX10-W32-NEXT:    s_mov_b32 s6, s0
-; GFX10-W32-NEXT:    s_mov_b32 s7, s0
-; GFX10-W32-NEXT:    v_add_f32_e32 v8, 2.0, v8
 ; GFX10-W32-NEXT:    image_sample v[0:3], v4, s[0:7], s[0:3] dmask:0xf dim:SQ_RSRC_IMG_1D
-; GFX10-W32-NEXT:    s_mov_b32 s1, 0
+; GFX10-W32-NEXT:    v_add_f32_e32 v8, 2.0, v8
 ; GFX10-W32-NEXT:    s_cbranch_execz .LBB31_4
 ; GFX10-W32-NEXT:  .LBB31_2: ; %loop
 ; GFX10-W32-NEXT:    ; =>This Inner Loop Header: Depth=1
@@ -1925,11 +1892,10 @@ define amdgpu_ps <4 x float> @test_loop_vcc(<4 x float> %in) nounwind {
 ; GFX10-W32-NEXT:    v_mov_b32_e32 v4, v0
 ; GFX10-W32-NEXT:    s_cbranch_vccz .LBB31_1
 ; GFX10-W32-NEXT:  ; %bb.3:
-; GFX10-W32-NEXT:    s_mov_b32 s1, -1
 ; GFX10-W32-NEXT:    ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
 ; GFX10-W32-NEXT:    ; implicit-def: $vgpr8
 ; GFX10-W32-NEXT:  .LBB31_4: ; %break
-; GFX10-W32-NEXT:    s_and_b32 exec_lo, exec_lo, s8
+; GFX10-W32-NEXT:    s_and_b32 exec_lo, exec_lo, s0
 ; GFX10-W32-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-W32-NEXT:    v_mov_b32_e32 v0, v4
 ; GFX10-W32-NEXT:    v_mov_b32_e32 v1, v5
@@ -1999,14 +1965,6 @@ define amdgpu_ps void @test_alloca(float %data, i32 %a, i32 %idx) nounwind {
 ; GFX9-W64-NEXT:    v_lshl_add_u32 v0, v2, 2, v0
 ; GFX9-W64-NEXT:    buffer_load_dword v0, v0, s[8:11], 0 offen
 ; GFX9-W64-NEXT:    s_and_b64 exec, exec, s[0:1]
-; GFX9-W64-NEXT:    s_mov_b32 s0, 0
-; GFX9-W64-NEXT:    s_mov_b32 s1, s0
-; GFX9-W64-NEXT:    s_mov_b32 s2, s0
-; GFX9-W64-NEXT:    s_mov_b32 s3, s0
-; GFX9-W64-NEXT:    s_mov_b32 s4, s0
-; GFX9-W64-NEXT:    s_mov_b32 s5, s0
-; GFX9-W64-NEXT:    s_mov_b32 s6, s0
-; GFX9-W64-NEXT:    s_mov_b32 s7, s0
 ; GFX9-W64-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-W64-NEXT:    image_sample v[0:3], v0, s[0:7], s[0:3] dmask:0xf
 ; GFX9-W64-NEXT:    s_waitcnt vmcnt(0)
@@ -2035,14 +1993,6 @@ define amdgpu_ps void @test_alloca(float %data, i32 %a, i32 %idx) nounwind {
 ; GFX10-W32-NEXT:    s_wqm_b32 exec_lo, exec_lo
 ; GFX10-W32-NEXT:    buffer_load_dword v0, v2, s[8:11], 0 offen
 ; GFX10-W32-NEXT:    s_and_b32 exec_lo, exec_lo, s0
-; GFX10-W32-NEXT:    s_mov_b32 s0, 0
-; GFX10-W32-NEXT:    s_mov_b32 s1, s0
-; GFX10-W32-NEXT:    s_mov_b32 s2, s0
-; GFX10-W32-NEXT:    s_mov_b32 s3, s0
-; GFX10-W32-NEXT:    s_mov_b32 s4, s0
-; GFX10-W32-NEXT:    s_mov_b32 s5, s0
-; GFX10-W32-NEXT:    s_mov_b32 s6, s0
-; GFX10-W32-NEXT:    s_mov_b32 s7, s0
 ; GFX10-W32-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-W32-NEXT:    image_sample v[0:3], v0, s[0:7], s[0:3] dmask:0xf dim:SQ_RSRC_IMG_1D
 ; GFX10-W32-NEXT:    s_waitcnt vmcnt(0)
@@ -2079,18 +2029,10 @@ entry:
 define amdgpu_ps <4 x float> @test_nonvoid_return() nounwind {
 ; GFX9-W64-LABEL: test_nonvoid_return:
 ; GFX9-W64:       ; %bb.0:
-; GFX9-W64-NEXT:    s_mov_b32 s0, 0
-; GFX9-W64-NEXT:    s_mov_b64 s[8:9], exec
-; GFX9-W64-NEXT:    s_mov_b32 s1, s0
-; GFX9-W64-NEXT:    s_mov_b32 s2, s0
-; GFX9-W64-NEXT:    s_mov_b32 s3, s0
-; GFX9-W64-NEXT:    s_mov_b32 s4, s0
-; GFX9-W64-NEXT:    s_mov_b32 s5, s0
-; GFX9-W64-NEXT:    s_mov_b32 s6, s0
-; GFX9-W64-NEXT:    s_mov_b32 s7, s0
+; GFX9-W64-NEXT:    s_mov_b64 s[0:1], exec
 ; GFX9-W64-NEXT:    s_wqm_b64 exec, exec
 ; GFX9-W64-NEXT:    image_sample v0, v0, s[0:7], s[0:3] dmask:0x1
-; GFX9-W64-NEXT:    s_and_b64 exec, exec, s[8:9]
+; GFX9-W64-NEXT:    s_and_b64 exec, exec, s[0:1]
 ; GFX9-W64-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-W64-NEXT:    image_sample v[0:3], v0, s[0:7], s[0:3] dmask:0xf
 ; GFX9-W64-NEXT:    s_waitcnt vmcnt(0)
@@ -2098,18 +2040,10 @@ define amdgpu_ps <4 x float> @test_nonvoid_return() nounwind {
 ;
 ; GFX10-W32-LABEL: test_nonvoid_return:
 ; GFX10-W32:       ; %bb.0:
-; GFX10-W32-NEXT:    s_mov_b32 s0, 0
-; GFX10-W32-NEXT:    s_mov_b32 s8, exec_lo
-; GFX10-W32-NEXT:    s_mov_b32 s1, s0
-; GFX10-W32-NEXT:    s_mov_b32 s2, s0
-; GFX10-W32-NEXT:    s_mov_b32 s3, s0
-; GFX10-W32-NEXT:    s_mov_b32 s4, s0
-; GFX10-W32-NEXT:    s_mov_b32 s5, s0
-; GFX10-W32-NEXT:    s_mov_b32 s6, s0
-; GFX10-W32-NEXT:    s_mov_b32 s7, s0
+; GFX10-W32-NEXT:    s_mov_b32 s0, exec_lo
 ; GFX10-W32-NEXT:    s_wqm_b32 exec_lo, exec_lo
 ; GFX10-W32-NEXT:    image_sample v0, v0, s[0:7], s[0:3] dmask:0x1 dim:SQ_RSRC_IMG_1D
-; GFX10-W32-NEXT:    s_and_b32 exec_lo, exec_lo, s8
+; GFX10-W32-NEXT:    s_and_b32 exec_lo, exec_lo, s0
 ; GFX10-W32-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-W32-NEXT:    image_sample v[0:3], v0, s[0:7], s[0:3] dmask:0xf dim:SQ_RSRC_IMG_1D
 ; GFX10-W32-NEXT:    s_waitcnt vmcnt(0)
@@ -2128,20 +2062,11 @@ define amdgpu_ps <4 x float> @test_nonvoid_return() nounwind {
 define amdgpu_ps <4 x float> @test_nonvoid_return_unreachable(i32 inreg %c) nounwind {
 ; GFX9-W64-LABEL: test_nonvoid_return_unreachable:
 ; GFX9-W64:       ; %bb.0: ; %entry
-; GFX9-W64-NEXT:    s_mov_b32 s4, 0
-; GFX9-W64-NEXT:    s_mov_b64 s[2:3], exec
-; GFX9-W64-NEXT:    s_mov_b32 s5, s4
-; GFX9-W64-NEXT:    s_mov_b32 s6, s4
-; GFX9-W64-NEXT:    s_mov_b32 s7, s4
-; GFX9-W64-NEXT:    s_mov_b32 s8, s4
-; GFX9-W64-NEXT:    s_mov_b32 s9, s4
-; GFX9-W64-NEXT:    s_mov_b32 s10, s4
-; GFX9-W64-NEXT:    s_mov_b32 s11, s4
 ; GFX9-W64-NEXT:    s_wqm_b64 exec, exec
-; GFX9-W64-NEXT:    image_sample v0, v0, s[4:11], s[0:3] dmask:0x1
-; GFX9-W64-NEXT:    s_and_b64 exec, exec, s[2:3]
+; GFX9-W64-NEXT:    image_sample v0, v0, s[0:7], s[0:3] dmask:0x1
+; GFX9-W64-NEXT:    s_and_b64 exec, exec, exec
 ; GFX9-W64-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-W64-NEXT:    image_sample v[0:3], v0, s[4:11], s[0:3] dmask:0xf
+; GFX9-W64-NEXT:    image_sample v[0:3], v0, s[0:7], s[0:3] dmask:0xf
 ; GFX9-W64-NEXT:    s_cmp_lt_i32 s0, 1
 ; GFX9-W64-NEXT:    s_cbranch_scc0 .LBB34_2
 ; GFX9-W64-NEXT:  ; %bb.1: ; %else
@@ -2155,20 +2080,11 @@ define amdgpu_ps <4 x float> @test_nonvoid_return_unreachable(i32 inreg %c) noun
 ;
 ; GFX10-W32-LABEL: test_nonvoid_return_unreachable:
 ; GFX10-W32:       ; %bb.0: ; %entry
-; GFX10-W32-NEXT:    s_mov_b32 s4, 0
-; GFX10-W32-NEXT:    s_mov_b32 s1, exec_lo
-; GFX10-W32-NEXT:    s_mov_b32 s5, s4
-; GFX10-W32-NEXT:    s_mov_b32 s6, s4
-; GFX10-W32-NEXT:    s_mov_b32 s7, s4
-; GFX10-W32-NEXT:    s_mov_b32 s8, s4
-; GFX10-W32-NEXT:    s_mov_b32 s9, s4
-; GFX10-W32-NEXT:    s_mov_b32 s10, s4
-; GFX10-W32-NEXT:    s_mov_b32 s11, s4
 ; GFX10-W32-NEXT:    s_wqm_b32 exec_lo, exec_lo
-; GFX10-W32-NEXT:    image_sample v0, v0, s[4:11], s[0:3] dmask:0x1 dim:SQ_RSRC_IMG_1D
-; GFX10-W32-NEXT:    s_and_b32 exec_lo, exec_lo, s1
+; GFX10-W32-NEXT:    image_sample v0, v0, s[0:7], s[0:3] dmask:0x1 dim:SQ_RSRC_IMG_1D
+; GFX10-W32-NEXT:    s_and_b32 exec_lo, exec_lo, exec_lo
 ; GFX10-W32-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-W32-NEXT:    image_sample v[0:3], v0, s[4:11], s[0:3] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX10-W32-NEXT:    image_sample v[0:3], v0, s[0:7], s[0:3] dmask:0xf dim:SQ_RSRC_IMG_1D
 ; GFX10-W32-NEXT:    s_cmp_lt_i32 s0, 1
 ; GFX10-W32-NEXT:    s_cbranch_scc0 .LBB34_2
 ; GFX10-W32-NEXT:  ; %bb.1: ; %else
@@ -2215,33 +2131,17 @@ define amdgpu_ps <4 x float> @test_scc(i32 inreg %sel, i32 %idx) #1 {
 ; GFX9-W64-NEXT:    s_cmp_lt_i32 s0, 1
 ; GFX9-W64-NEXT:    s_cbranch_scc0 .LBB35_2
 ; GFX9-W64-NEXT:  ; %bb.1: ; %else
-; GFX9-W64-NEXT:    s_mov_b32 s4, 0
 ; GFX9-W64-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX9-W64-NEXT:    v_mov_b32_e32 v1, 1
-; GFX9-W64-NEXT:    s_mov_b32 s5, s4
-; GFX9-W64-NEXT:    s_mov_b32 s6, s4
-; GFX9-W64-NEXT:    s_mov_b32 s7, s4
-; GFX9-W64-NEXT:    s_mov_b32 s8, s4
-; GFX9-W64-NEXT:    s_mov_b32 s9, s4
-; GFX9-W64-NEXT:    s_mov_b32 s10, s4
-; GFX9-W64-NEXT:    s_mov_b32 s11, s4
-; GFX9-W64-NEXT:    image_sample v[0:3], v[0:1], s[4:11], s[0:3] dmask:0xf
+; GFX9-W64-NEXT:    image_sample v[0:3], v[0:1], s[0:7], s[0:3] dmask:0xf
 ; GFX9-W64-NEXT:    s_cbranch_execz .LBB35_3
 ; GFX9-W64-NEXT:    s_branch .LBB35_4
 ; GFX9-W64-NEXT:  .LBB35_2:
 ; GFX9-W64-NEXT:    ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
 ; GFX9-W64-NEXT:  .LBB35_3: ; %if
-; GFX9-W64-NEXT:    s_mov_b32 s4, 0
-; GFX9-W64-NEXT:    s_mov_b32 s5, s4
-; GFX9-W64-NEXT:    s_mov_b32 s6, s4
-; GFX9-W64-NEXT:    s_mov_b32 s7, s4
-; GFX9-W64-NEXT:    s_mov_b32 s8, s4
-; GFX9-W64-NEXT:    s_mov_b32 s9, s4
-; GFX9-W64-NEXT:    s_mov_b32 s10, s4
-; GFX9-W64-NEXT:    s_mov_b32 s11, s4
 ; GFX9-W64-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-W64-NEXT:    v_mov_b32_e32 v0, 0
-; GFX9-W64-NEXT:    image_sample v[0:3], v0, s[4:11], s[0:3] dmask:0xf
+; GFX9-W64-NEXT:    image_sample v[0:3], v0, s[0:7], s[0:3] dmask:0xf
 ; GFX9-W64-NEXT:  .LBB35_4: ; %end
 ; GFX9-W64-NEXT:    s_and_b64 exec, exec, s[2:3]
 ; GFX9-W64-NEXT:    v_mov_b32_e32 v5, 1.0
@@ -2252,21 +2152,13 @@ define amdgpu_ps <4 x float> @test_scc(i32 inreg %sel, i32 %idx) #1 {
 ; GFX10-W32-LABEL: test_scc:
 ; GFX10-W32:       ; %bb.0: ; %main_body
 ; GFX10-W32-NEXT:    v_mov_b32_e32 v4, v0
-; GFX10-W32-NEXT:    s_mov_b32 s8, exec_lo
+; GFX10-W32-NEXT:    s_mov_b32 s1, exec_lo
 ; GFX10-W32-NEXT:    s_wqm_b32 exec_lo, exec_lo
 ; GFX10-W32-NEXT:    s_cmp_lt_i32 s0, 1
 ; GFX10-W32-NEXT:    s_cbranch_scc0 .LBB35_2
 ; GFX10-W32-NEXT:  ; %bb.1: ; %else
 ; GFX10-W32-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX10-W32-NEXT:    v_mov_b32_e32 v1, 1
-; GFX10-W32-NEXT:    s_mov_b32 s0, 0
-; GFX10-W32-NEXT:    s_mov_b32 s1, s0
-; GFX10-W32-NEXT:    s_mov_b32 s2, s0
-; GFX10-W32-NEXT:    s_mov_b32 s3, s0
-; GFX10-W32-NEXT:    s_mov_b32 s4, s0
-; GFX10-W32-NEXT:    s_mov_b32 s5, s0
-; GFX10-W32-NEXT:    s_mov_b32 s6, s0
-; GFX10-W32-NEXT:    s_mov_b32 s7, s0
 ; GFX10-W32-NEXT:    image_sample v[0:3], v[0:1], s[0:7], s[0:3] dmask:0xf dim:SQ_RSRC_IMG_2D
 ; GFX10-W32-NEXT:    s_cbranch_execz .LBB35_3
 ; GFX10-W32-NEXT:    s_branch .LBB35_4
@@ -2275,17 +2167,9 @@ define amdgpu_ps <4 x float> @test_scc(i32 inreg %sel, i32 %idx) #1 {
 ; GFX10-W32-NEXT:  .LBB35_3: ; %if
 ; GFX10-W32-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-W32-NEXT:    v_mov_b32_e32 v0, 0
-; GFX10-W32-NEXT:    s_mov_b32 s0, 0
-; GFX10-W32-NEXT:    s_mov_b32 s1, s0
-; GFX10-W32-NEXT:    s_mov_b32 s2, s0
-; GFX10-W32-NEXT:    s_mov_b32 s3, s0
-; GFX10-W32-NEXT:    s_mov_b32 s4, s0
-; GFX10-W32-NEXT:    s_mov_b32 s5, s0
-; GFX10-W32-NEXT:    s_mov_b32 s6, s0
-; GFX10-W32-NEXT:    s_mov_b32 s7, s0
 ; GFX10-W32-NEXT:    image_sample v[0:3], v0, s[0:7], s[0:3] dmask:0xf dim:SQ_RSRC_IMG_1D
 ; GFX10-W32-NEXT:  .LBB35_4: ; %end
-; GFX10-W32-NEXT:    s_and_b32 exec_lo, exec_lo, s8
+; GFX10-W32-NEXT:    s_and_b32 exec_lo, exec_lo, s1
 ; GFX10-W32-NEXT:    v_mov_b32_e32 v5, 1.0
 ; GFX10-W32-NEXT:    buffer_store_dword v5, v4, s[0:3], 0 idxen
 ; GFX10-W32-NEXT:    s_waitcnt vmcnt(0)
-- 
Gitee


From c8e7a87b1ed6b00dc2c3543a80c892d5948f8849 Mon Sep 17 00:00:00 2001
From: Sam James <sam@gentoo.org>
Date: Tue, 8 Nov 2022 01:36:43 +0000
Subject: [PATCH 16/41] [CMake] Fix -Wstrict-prototypes

Fixes warnings (or errors, if someone injects -Werror in their build system,
which happens in fact with some folks vendoring LLVM too) with Clang 16:
```
+/var/tmp/portage.notmp/portage/sys-devel/llvm-15.0.4/work/llvm_build-abi_x86_64.amd64/CMakeFiles/CMakeTmp/src.c:3:9: warning: a function declaration without a prototype
is deprecated in all versions of C [-Wstrict-prototypes]
-/var/tmp/portage.notmp/portage/sys-devel/llvm-14.0.4/work/llvm_build-abi_x86_64.amd64/CMakeFiles/CMakeTmp/src.c:3:9: error: a function declaration without a prototype is
deprecated in all versions of C [-Werror,-Wstrict-prototypes]
 int main() {return 0;}
         ^
          void
```

Differential Revision: https://reviews.llvm.org/D137503

(cherry picked from commit 32a2af44e1e882f13d1cc2817f0a8d4d8b375d4d)
---
 .../cmake/Modules/CompilerRTDarwinUtils.cmake |  2 +-
 compiler-rt/cmake/config-ix.cmake             |  2 +-
 compiler-rt/lib/builtins/CMakeLists.txt       |  2 +-
 libcxx/cmake/config-ix.cmake                  |  2 +-
 libcxxabi/cmake/config-ix.cmake               |  2 +-
 libunwind/cmake/config-ix.cmake               |  2 +-
 lldb/tools/debugserver/source/CMakeLists.txt  |  2 +-
 llvm/cmake/config-ix.cmake                    |  2 +-
 llvm/cmake/modules/FindFFI.cmake              |  2 +-
 llvm/cmake/modules/FindTerminfo.cmake         |  2 +-
 llvm/cmake/modules/FindZ3.cmake               |  3 ++-
 llvm/cmake/modules/HandleLLVMOptions.cmake    |  2 +-
 openmp/runtime/cmake/config-ix.cmake          |  2 +-
 polly/lib/External/CMakeLists.txt             | 24 +++++++++----------
 14 files changed, 26 insertions(+), 25 deletions(-)

diff --git a/compiler-rt/cmake/Modules/CompilerRTDarwinUtils.cmake b/compiler-rt/cmake/Modules/CompilerRTDarwinUtils.cmake
index 2c9983c6a1ae..640c7e7124c9 100644
--- a/compiler-rt/cmake/Modules/CompilerRTDarwinUtils.cmake
+++ b/compiler-rt/cmake/Modules/CompilerRTDarwinUtils.cmake
@@ -116,7 +116,7 @@ function(darwin_test_archs os valid_archs)
   if(NOT TEST_COMPILE_ONLY)
     message(STATUS "Finding valid architectures for ${os}...")
     set(SIMPLE_C ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/src.c)
-    file(WRITE ${SIMPLE_C} "#include <stdio.h>\nint main() { printf(__FILE__); return 0; }\n")
+    file(WRITE ${SIMPLE_C} "#include <stdio.h>\nint main(void) { printf(__FILE__); return 0; }\n")
 
     set(os_linker_flags)
     foreach(flag ${DARWIN_${os}_LINK_FLAGS})
diff --git a/compiler-rt/cmake/config-ix.cmake b/compiler-rt/cmake/config-ix.cmake
index cd45176cf2ba..9077e8f9ffe0 100644
--- a/compiler-rt/cmake/config-ix.cmake
+++ b/compiler-rt/cmake/config-ix.cmake
@@ -209,7 +209,7 @@ set(COMPILER_RT_SUPPORTED_ARCH)
 # runtime libraries supported by our current compilers cross-compiling
 # abilities.
 set(SIMPLE_SOURCE ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/simple.cc)
-file(WRITE ${SIMPLE_SOURCE} "#include <stdlib.h>\n#include <stdio.h>\nint main() { printf(\"hello, world\"); }\n")
+file(WRITE ${SIMPLE_SOURCE} "#include <stdlib.h>\n#include <stdio.h>\nint main(void) { printf(\"hello, world\"); }\n")
 
 # Detect whether the current target platform is 32-bit or 64-bit, and setup
 # the correct commandline flags needed to attempt to target 32-bit and 64-bit.
diff --git a/compiler-rt/lib/builtins/CMakeLists.txt b/compiler-rt/lib/builtins/CMakeLists.txt
index ec668e294d6d..df02682ae00f 100644
--- a/compiler-rt/lib/builtins/CMakeLists.txt
+++ b/compiler-rt/lib/builtins/CMakeLists.txt
@@ -745,7 +745,7 @@ else ()
                            SOURCE "#if !(__ARM_FP & 0x8)
                                    #error No double-precision support!
                                    #endif
-                                   int main() { return 0; }")
+                                   int main(void) { return 0; }")
           if(NOT COMPILER_RT_HAS_${arch}_VFP_DP)
             list(REMOVE_ITEM ${arch}_SOURCES ${arm_Thumb1_VFPv2_DP_SOURCES})
           endif()
diff --git a/libcxx/cmake/config-ix.cmake b/libcxx/cmake/config-ix.cmake
index 209e6214a471..e65d32072852 100644
--- a/libcxx/cmake/config-ix.cmake
+++ b/libcxx/cmake/config-ix.cmake
@@ -94,7 +94,7 @@ if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
   set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -Werror=unknown-pragmas")
   check_c_source_compiles("
 #pragma comment(lib, \"c\")
-int main() { return 0; }
+int main(void) { return 0; }
 " C_SUPPORTS_COMMENT_LIB_PRAGMA)
   cmake_pop_check_state()
 endif()
diff --git a/libcxxabi/cmake/config-ix.cmake b/libcxxabi/cmake/config-ix.cmake
index 079cabf25da5..fa16a108e98a 100644
--- a/libcxxabi/cmake/config-ix.cmake
+++ b/libcxxabi/cmake/config-ix.cmake
@@ -77,7 +77,7 @@ if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
   set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -Werror=unknown-pragmas")
   check_c_source_compiles("
 #pragma comment(lib, \"c\")
-int main() { return 0; }
+int main(void) { return 0; }
 " C_SUPPORTS_COMMENT_LIB_PRAGMA)
   cmake_pop_check_state()
 endif()
diff --git a/libunwind/cmake/config-ix.cmake b/libunwind/cmake/config-ix.cmake
index c9b65b3cefc0..1b027cf37213 100644
--- a/libunwind/cmake/config-ix.cmake
+++ b/libunwind/cmake/config-ix.cmake
@@ -85,7 +85,7 @@ if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
   set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -Werror=unknown-pragmas")
   check_c_source_compiles("
 #pragma comment(lib, \"c\")
-int main() { return 0; }
+int main(void) { return 0; }
 " C_SUPPORTS_COMMENT_LIB_PRAGMA)
   cmake_pop_check_state()
 endif()
diff --git a/lldb/tools/debugserver/source/CMakeLists.txt b/lldb/tools/debugserver/source/CMakeLists.txt
index f636e387bf1f..c6e7e8cf49e8 100644
--- a/lldb/tools/debugserver/source/CMakeLists.txt
+++ b/lldb/tools/debugserver/source/CMakeLists.txt
@@ -95,7 +95,7 @@ check_c_source_compiles(
     #else
     #error Not building for ARM64
     #endif
-    int main() { return 0; }
+    int main(void) { return 0; }
     "
     BUILDING_FOR_ARM64_OSX
 )
diff --git a/llvm/cmake/config-ix.cmake b/llvm/cmake/config-ix.cmake
index 83512760d8dd..7e657fd1532d 100644
--- a/llvm/cmake/config-ix.cmake
+++ b/llvm/cmake/config-ix.cmake
@@ -71,7 +71,7 @@ if(APPLE)
   CHECK_C_SOURCE_COMPILES("
      static const char *__crashreporter_info__ = 0;
      asm(\".desc ___crashreporter_info__, 0x10\");
-     int main() { return 0; }"
+     int main(void) { return 0; }"
     HAVE_CRASHREPORTER_INFO)
 endif()
 
diff --git a/llvm/cmake/modules/FindFFI.cmake b/llvm/cmake/modules/FindFFI.cmake
index b0d859af8959..a493a89d6301 100644
--- a/llvm/cmake/modules/FindFFI.cmake
+++ b/llvm/cmake/modules/FindFFI.cmake
@@ -45,7 +45,7 @@ if(FFI_LIBRARIES)
     struct ffi_cif;
     typedef struct ffi_cif ffi_cif;
     void ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue);
-    int main() { ffi_call(0, 0, 0, 0); }"
+    int main(void) { ffi_call(0, 0, 0, 0); }"
     HAVE_FFI_CALL)
   cmake_pop_check_state()
 endif()
diff --git a/llvm/cmake/modules/FindTerminfo.cmake b/llvm/cmake/modules/FindTerminfo.cmake
index 65edb80fa69a..eef1f95853eb 100644
--- a/llvm/cmake/modules/FindTerminfo.cmake
+++ b/llvm/cmake/modules/FindTerminfo.cmake
@@ -20,7 +20,7 @@ if(Terminfo_LIBRARIES)
   list(APPEND CMAKE_REQUIRED_LIBRARIES ${Terminfo_LIBRARIES})
   check_c_source_compiles("
     int setupterm(char *term, int filedes, int *errret);
-    int main() { return setupterm(0, 0, 0); }"
+    int main(void) { return setupterm(0, 0, 0); }"
     Terminfo_LINKABLE)
   cmake_pop_check_state()
 endif()
diff --git a/llvm/cmake/modules/FindZ3.cmake b/llvm/cmake/modules/FindZ3.cmake
index 118b1eac3b32..6fb56d74184d 100644
--- a/llvm/cmake/modules/FindZ3.cmake
+++ b/llvm/cmake/modules/FindZ3.cmake
@@ -18,8 +18,9 @@ function(check_z3_version z3_include z3_lib)
   # The program that will be executed to print Z3's version.
   file(WRITE ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/testz3.cpp
        "#include <assert.h>
+        #include <stdio.h> 
         #include <z3.h>
-        int main() {
+        int main(void) {
           unsigned int major, minor, build, rev;
           Z3_get_version(&major, &minor, &build, &rev);
           printf(\"%u.%u.%u\", major, minor, build);
diff --git a/llvm/cmake/modules/HandleLLVMOptions.cmake b/llvm/cmake/modules/HandleLLVMOptions.cmake
index 56d05f5b5fce..0fca934be9cf 100644
--- a/llvm/cmake/modules/HandleLLVMOptions.cmake
+++ b/llvm/cmake/modules/HandleLLVMOptions.cmake
@@ -779,7 +779,7 @@ if (LLVM_ENABLE_WARNINGS AND (LLVM_COMPILER_IS_GCC_COMPATIBLE OR CLANG_CL))
   # line is also a // comment.
   set(OLD_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS})
   set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -Werror -Wcomment")
-  CHECK_C_SOURCE_COMPILES("// \\\\\\n//\\nint main() {return 0;}"
+  CHECK_C_SOURCE_COMPILES("// \\\\\\n//\\nint main(void) {return 0;}"
                           C_WCOMMENT_ALLOWS_LINE_WRAP)
   set(CMAKE_REQUIRED_FLAGS ${OLD_CMAKE_REQUIRED_FLAGS})
   if (NOT C_WCOMMENT_ALLOWS_LINE_WRAP)
diff --git a/openmp/runtime/cmake/config-ix.cmake b/openmp/runtime/cmake/config-ix.cmake
index 775c58f98484..bd7585545b72 100644
--- a/openmp/runtime/cmake/config-ix.cmake
+++ b/openmp/runtime/cmake/config-ix.cmake
@@ -27,7 +27,7 @@ function(libomp_check_version_symbols retval)
     void func2() { printf(\"World\"); }
     __asm__(\".symver func1, func@VER1\");
     __asm__(\".symver func2, func@VER2\");
-    int main() {
+    int main(void) {
       func1();
       func2();
       return 0;
diff --git a/polly/lib/External/CMakeLists.txt b/polly/lib/External/CMakeLists.txt
index 2f912e7daeb2..c0a5b32e283f 100644
--- a/polly/lib/External/CMakeLists.txt
+++ b/polly/lib/External/CMakeLists.txt
@@ -64,7 +64,7 @@ if (POLLY_BUNDLED_ISL)
     check_c_source_compiles("
     ${_includes}
     ${_type} typeVar;
-    int main() {
+    int main(void) {
     return 0;
     }
     " ${_variable})
@@ -73,7 +73,7 @@ if (POLLY_BUNDLED_ISL)
 
   check_c_source_compiles("
   int func(void) __attribute__((__warn_unused_result__));
-  int main() { return 0; }
+  int main(void) { return 0; }
   " HAS_ATTRIBUTE_WARN_UNUSED_RESULT)
   set(GCC_WARN_UNUSED_RESULT)
   if (HAS_ATTRIBUTE_WARN_UNUSED_RESULT)
@@ -82,22 +82,22 @@ if (POLLY_BUNDLED_ISL)
 
   check_c_source_compiles("
   __attribute__ ((unused)) static void foo(void);
-  int main() { return 0; }
+  int main(void) { return 0; }
   " HAVE___ATTRIBUTE__)
 
 
   check_c_source_compiles_numeric("
   #include <strings.h>
-  int main() { (void)ffs(0); return 0; }
+  int main(void) { (void)ffs(0); return 0; }
   " HAVE_DECL_FFS)
 
   check_c_source_compiles_numeric("
-  int main() { (void)__builtin_ffs(0); return 0; }
+  int main(void) { (void)__builtin_ffs(0); return 0; }
   " HAVE_DECL___BUILTIN_FFS)
 
   check_c_source_compiles_numeric("
   #include <intrin.h>
-  int main() { (void)_BitScanForward(NULL, 0); return 0; }
+  int main(void) { (void)_BitScanForward(NULL, 0); return 0; }
   " HAVE_DECL__BITSCANFORWARD)
 
   if (NOT HAVE_DECL_FFS AND
@@ -109,12 +109,12 @@ if (POLLY_BUNDLED_ISL)
 
   check_c_source_compiles_numeric("
   #include <strings.h>
-  int main() { (void)strcasecmp(\"\", \"\"); return 0; }
+  int main(void) { (void)strcasecmp(\"\", \"\"); return 0; }
   " HAVE_DECL_STRCASECMP)
 
   check_c_source_compiles_numeric("
   #include <string.h>
-  int main() { (void)_stricmp(\"\", \"\"); return 0; }
+  int main(void) { (void)_stricmp(\"\", \"\"); return 0; }
   " HAVE_DECL__STRICMP)
 
   if (NOT HAVE_DECL_STRCASECMP AND NOT HAVE_DECL__STRICMP)
@@ -124,12 +124,12 @@ if (POLLY_BUNDLED_ISL)
 
   check_c_source_compiles_numeric("
   #include <strings.h>
-  int main() { (void)strncasecmp(\"\", \"\", 0); return 0; }
+  int main(void) { (void)strncasecmp(\"\", \"\", 0); return 0; }
   " HAVE_DECL_STRNCASECMP)
 
   check_c_source_compiles_numeric("
   #include <string.h>
-  int main() { (void)_strnicmp(\"\", \"\", 0); return 0; }
+  int main(void) { (void)_strnicmp(\"\", \"\", 0); return 0; }
   " HAVE_DECL__STRNICMP)
 
   if (NOT HAVE_DECL_STRNCASECMP AND NOT HAVE_DECL__STRNICMP)
@@ -139,12 +139,12 @@ if (POLLY_BUNDLED_ISL)
 
   check_c_source_compiles_numeric("
   #include <stdio.h>
-  int main() { snprintf((void*)0, 0, \" \"); return 0; }
+  int main(void) { snprintf((void*)0, 0, \" \"); return 0; }
   " HAVE_DECL_SNPRINTF)
 
   check_c_source_compiles_numeric("
   #include <stdio.h>
-  int main() { _snprintf((void*)0, 0, \" \"); return 0; }
+  int main(void) { _snprintf((void*)0, 0, \" \"); return 0; }
   " HAVE_DECL__SNPRINTF)
 
   if (NOT HAVE_DECL_SNPRINTF AND NOT HAVE_DECL__SNPRINTF)
-- 
Gitee


From 931b6d51d84e2a5cbbdc925d546819d4d3b7c63e Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron@aaronballman.com>
Date: Wed, 2 Nov 2022 07:56:43 -0400
Subject: [PATCH 17/41] Reenable POSIX builtin library functions in gnu2x mode

gnu17 and earlier modes automatically expose several POSIX C APIs, and
this was accidentally disabled for gnu2x in
7d644e1215b376ec5e915df9ea2eeb56e2d94626.

This restores the behavior for gnu2x mode (without changing the
behavior in C standards modes instead of GNU modes).

Fixes #56607
---
 clang/docs/ReleaseNotes.rst    |  4 ++++
 clang/lib/Sema/SemaLookup.cpp  |  8 +++-----
 clang/test/Sema/gnu-builtins.c | 13 +++++++++++++
 3 files changed, 20 insertions(+), 5 deletions(-)
 create mode 100644 clang/test/Sema/gnu-builtins.c

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index a1ee8f0459ae..13cca2ebbfb8 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -227,6 +227,10 @@ Bug Fixes
   `Issue 57377 <https://github.com/llvm/llvm-project/issues/57377>`_.
 - Fix a crash when a ``btf_type_tag`` attribute is applied to the pointee of
   a function pointer.
+- Clang 14 predeclared some builtin POSIX library functions in ``gnu2x`` mode,
+  and Clang 15 accidentally stopped predeclaring those functions in that
+  language mode. Clang 16 now predeclares those functions again. This fixes
+  `Issue 56607 <https://github.com/llvm/llvm-project/issues/56607>`_.
 
 Improvements to Clang's diagnostics
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/clang/lib/Sema/SemaLookup.cpp b/clang/lib/Sema/SemaLookup.cpp
index 68158ec977cf..5d0d87fd2422 100644
--- a/clang/lib/Sema/SemaLookup.cpp
+++ b/clang/lib/Sema/SemaLookup.cpp
@@ -939,11 +939,9 @@ bool Sema::LookupBuiltin(LookupResult &R) {
 
       // If this is a builtin on this (or all) targets, create the decl.
       if (unsigned BuiltinID = II->getBuiltinID()) {
-        // In C++, C2x, and OpenCL (spec v1.2 s6.9.f), we don't have any
-        // predefined library functions like 'malloc'. Instead, we'll just
-        // error.
-        if ((getLangOpts().CPlusPlus || getLangOpts().OpenCL ||
-             getLangOpts().C2x) &&
+        // In C++ and OpenCL (spec v1.2 s6.9.f), we don't have any predefined
+        // library functions like 'malloc'. Instead, we'll just error.
+        if ((getLangOpts().CPlusPlus || getLangOpts().OpenCL) &&
             Context.BuiltinInfo.isPredefinedLibFunction(BuiltinID))
           return false;
 
diff --git a/clang/test/Sema/gnu-builtins.c b/clang/test/Sema/gnu-builtins.c
new file mode 100644
index 000000000000..c4da8b39363c
--- /dev/null
+++ b/clang/test/Sema/gnu-builtins.c
@@ -0,0 +1,13 @@
+// RUN: %clang_cc1 -fsyntax-only -verify=gnu -std=gnu17 %s
+// RUN: %clang_cc1 -fsyntax-only -verify=gnu -std=gnu2x %s
+// RUN: %clang_cc1 -fsyntax-only -verify=std -std=c17 %s
+// RUN: %clang_cc1 -fsyntax-only -verify=std -std=c2x %s
+
+// std-no-diagnostics
+
+// 'index' is a builtin library function, but only in GNU mode. So this should
+// give an error in GNU modes but be okay in non-GNU mode.
+// FIXME: the error is correct, but these notes are pretty awful.
+int index; // gnu-error {{redefinition of 'index' as different kind of symbol}} \
+              gnu-note {{unguarded header; consider using #ifdef guards or #pragma once}} \
+              gnu-note {{previous definition is here}}
-- 
Gitee


From 58ba50a52edeff3cae9cfa3bdd0ee000873ffef9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20G=C3=B3rny?= <mgorny@gentoo.org>
Date: Mon, 7 Nov 2022 14:46:58 +0100
Subject: [PATCH 18/41] [cmake] Add missing CMakePushCheckState include to
 FindLibEdit.cmake

Add the missing include to fix an error when `cmake_push_check_state()`
is called and incidentally the CMakePushCheckState module is not loaded
by any other check running prior to `FindLibEdit.cmake`:

    CMake Error at /var/no-tmpfs/portage/dev-util/lldb-15.0.4/work/cmake/Modules/FindLibEdit.cmake:24 (cmake_push_check_state):
      Unknown CMake command "cmake_push_check_state".
    Call Stack (most recent call first):
      cmake/modules/LLDBConfig.cmake:52 (find_package)
      cmake/modules/LLDBConfig.cmake:59 (add_optional_dependency)
      CMakeLists.txt:28 (include)

Gentoo Bug: https://bugs.gentoo.org/880065

Differential Revision: https://reviews.llvm.org/D137555

(cherry picked from commit 3676a86a4322e8c2b9c541f057b5d3704146b8f3)
---
 cmake/Modules/FindLibEdit.cmake | 1 +
 1 file changed, 1 insertion(+)

diff --git a/cmake/Modules/FindLibEdit.cmake b/cmake/Modules/FindLibEdit.cmake
index 7e62d4d839ae..de8f5a2e7101 100644
--- a/cmake/Modules/FindLibEdit.cmake
+++ b/cmake/Modules/FindLibEdit.cmake
@@ -21,6 +21,7 @@ find_library(LibEdit_LIBRARIES NAMES edit HINTS ${PC_LIBEDIT_LIBRARY_DIRS})
 
 include(CheckIncludeFile)
 if(LibEdit_INCLUDE_DIRS AND EXISTS "${LibEdit_INCLUDE_DIRS}/histedit.h")
+  include(CMakePushCheckState)
   cmake_push_check_state()
   list(APPEND CMAKE_REQUIRED_INCLUDES ${LibEdit_INCLUDE_DIRS})
   list(APPEND CMAKE_REQUIRED_LIBRARIES ${LibEdit_LIBRARIES})
-- 
Gitee


From 6750e341b076d10a336c662ed6916500fdb20105 Mon Sep 17 00:00:00 2001
From: "chenglin.bi" <chenglin.bi@linaro.org>
Date: Wed, 9 Nov 2022 05:07:44 +0800
Subject: [PATCH 19/41] [TypePromotion] Replace Zext to Truncate for the case
 src bitwidth is larger

Fix: https://github.com/llvm/llvm-project/issues/58843

Reviewed By: samtebbs

Differential Revision: https://reviews.llvm.org/D137613

(cherry picked from commit 597f44409236bf7fa933a4ce18af23772e28fb43)
---
 llvm/lib/CodeGen/TypePromotion.cpp            |  8 +++++++-
 .../TypePromotion/AArch64/pr58843.ll          | 20 +++++++++++++++++++
 2 files changed, 27 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/Transforms/TypePromotion/AArch64/pr58843.ll

diff --git a/llvm/lib/CodeGen/TypePromotion.cpp b/llvm/lib/CodeGen/TypePromotion.cpp
index 8dc8d381ad16..a63118067139 100644
--- a/llvm/lib/CodeGen/TypePromotion.cpp
+++ b/llvm/lib/CodeGen/TypePromotion.cpp
@@ -569,7 +569,8 @@ void IRPromoter::TruncateSinks() {
 void IRPromoter::Cleanup() {
   LLVM_DEBUG(dbgs() << "IR Promotion: Cleanup..\n");
   // Some zexts will now have become redundant, along with their trunc
-  // operands, so remove them
+  // operands, so remove them.
+  // Some zexts need to be replaced with truncate if src bitwidth is larger.
   for (auto *V : Visited) {
     if (!isa<ZExtInst>(V))
       continue;
@@ -584,6 +585,11 @@ void IRPromoter::Cleanup() {
                         << "\n");
       ReplaceAllUsersOfWith(ZExt, Src);
       continue;
+    } else if (ZExt->getSrcTy()->getScalarSizeInBits() > PromotedWidth) {
+      IRBuilder<> Builder{ZExt};
+      Value *Trunc = Builder.CreateTrunc(Src, ZExt->getDestTy());
+      ReplaceAllUsersOfWith(ZExt, Trunc);
+      continue;
     }
 
     // We've inserted a trunc for a zext sink, but we already know that the
diff --git a/llvm/test/Transforms/TypePromotion/AArch64/pr58843.ll b/llvm/test/Transforms/TypePromotion/AArch64/pr58843.ll
new file mode 100644
index 000000000000..983a32029c65
--- /dev/null
+++ b/llvm/test/Transforms/TypePromotion/AArch64/pr58843.ll
@@ -0,0 +1,20 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -mtriple=aarch64 -type-promotion -verify -S %s -o - | FileCheck %s
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+
+; Check the case don't crash due to zext source type bitwidth
+; larger than dest type bitwidth.
+define i1 @test(i8 %arg) {
+; CHECK-LABEL: @test(
+; CHECK-NEXT:    [[EXT1:%.*]] = zext i8 [[ARG:%.*]] to i64
+; CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[EXT1]], 7
+; CHECK-NEXT:    [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %ext1 = zext i8 %arg to i64
+  %trunc = trunc i64 %ext1 to i3
+  %ext2 = zext i3 %trunc to i8
+  %cmp = icmp ne i8 %ext2, 0
+  ret i1 %cmp
+}
-- 
Gitee


From d75ae21044ad893572855cefb0c0898a771b2094 Mon Sep 17 00:00:00 2001
From: Sam James <sam@gentoo.org>
Date: Wed, 19 Oct 2022 19:50:20 +0100
Subject: [PATCH 20/41] Set LLVM_ATOMIC_LIB variable for convenient linking
 against libatomic

* Set LLVM_ATOMIC_LIB to keep track of when we need to link against libatomic.
* Add detection of mold linker which is required for this.
* Use --as-needed when linking against libatomic as a bonus. On some platforms,
  libatomic may be required only sometimes.

Bug: https://bugs.gentoo.org/832675
Thanks-to: Arfrever Frehtes Taifersar Arahesis <Arfrever@Apache.Org>
Tested-by: erhard_f@mailbox.org <erhard_f@mailbox.org>

Differential Revision: https://reviews.llvm.org/D136280

(cherry picked from commit fa981b541365190ae646d2dce575706cd0626cf7)
---
 llvm/cmake/modules/AddLLVM.cmake     |  1 +
 llvm/cmake/modules/CheckAtomic.cmake | 13 +++++++++++++
 llvm/lib/Support/CMakeLists.txt      |  4 +---
 llvm/tools/dsymutil/CMakeLists.txt   |  4 +---
 4 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/llvm/cmake/modules/AddLLVM.cmake b/llvm/cmake/modules/AddLLVM.cmake
index 057431208322..1f0507f395cf 100644
--- a/llvm/cmake/modules/AddLLVM.cmake
+++ b/llvm/cmake/modules/AddLLVM.cmake
@@ -212,6 +212,7 @@ if (NOT DEFINED LLVM_LINKER_DETECTED AND NOT WIN32)
   else()
     if("${stdout}" MATCHES "^mold")
       set(LLVM_LINKER_DETECTED YES CACHE INTERNAL "")
+      set(LLVM_LINKER_IS_MOLD YES CACHE INTERNAL "")
       message(STATUS "Linker detection: mold")
     elseif("${stdout}" MATCHES "GNU gold")
       set(LLVM_LINKER_DETECTED YES CACHE INTERNAL "")
diff --git a/llvm/cmake/modules/CheckAtomic.cmake b/llvm/cmake/modules/CheckAtomic.cmake
index 3c5ba72993a3..f11cadf39ff6 100644
--- a/llvm/cmake/modules/CheckAtomic.cmake
+++ b/llvm/cmake/modules/CheckAtomic.cmake
@@ -82,6 +82,19 @@ elseif(LLVM_COMPILER_IS_GCC_COMPATIBLE OR CMAKE_CXX_COMPILER_ID MATCHES "XL")
   endif()
 endif()
 
+# Set variable LLVM_ATOMIC_LIB specifying flags for linking against libatomic.
+if(HAVE_CXX_ATOMICS_WITH_LIB OR HAVE_CXX_ATOMICS64_WITH_LIB)
+  # Use options --push-state, --as-needed and --pop-state if linker is known to support them.
+  # Use single option -Wl of compiler driver to avoid incorrect re-ordering of options by CMake.
+  if(LLVM_LINKER_IS_GNULD OR LLVM_LINKER_IS_GOLD OR LLVM_LINKER_IS_LLD OR LLVM_LINKER_IS_MOLD)
+    set(LLVM_ATOMIC_LIB "-Wl,--push-state,--as-needed,-latomic,--pop-state")
+  else()
+    set(LLVM_ATOMIC_LIB "-latomic")
+  endif()
+else()
+  set(LLVM_ATOMIC_LIB)
+endif()
+
 ## TODO: This define is only used for the legacy atomic operations in
 ## llvm's Atomic.h, which should be replaced.  Other code simply
 ## assumes C++11 <atomic> works.
diff --git a/llvm/lib/Support/CMakeLists.txt b/llvm/lib/Support/CMakeLists.txt
index 806cbc884cc5..ff23ec74df96 100644
--- a/llvm/lib/Support/CMakeLists.txt
+++ b/llvm/lib/Support/CMakeLists.txt
@@ -59,9 +59,7 @@ elseif( CMAKE_HOST_UNIX )
   if( LLVM_ENABLE_TERMINFO )
     set(imported_libs ${imported_libs} Terminfo::terminfo)
   endif()
-  if( LLVM_ENABLE_THREADS AND (HAVE_LIBATOMIC OR HAVE_CXX_LIBATOMICS64) )
-    set(system_libs ${system_libs} atomic)
-  endif()
+  set(system_libs ${system_libs} ${LLVM_ATOMIC_LIB})
   set(system_libs ${system_libs} ${LLVM_PTHREAD_LIB})
   if( UNIX AND NOT (BEOS OR HAIKU) )
     set(system_libs ${system_libs} m)
diff --git a/llvm/tools/dsymutil/CMakeLists.txt b/llvm/tools/dsymutil/CMakeLists.txt
index a255c1c5daf5..38028cd3d80a 100644
--- a/llvm/tools/dsymutil/CMakeLists.txt
+++ b/llvm/tools/dsymutil/CMakeLists.txt
@@ -40,6 +40,4 @@ if(APPLE)
   target_link_libraries(dsymutil PRIVATE "-framework CoreFoundation")
 endif(APPLE)
 
-if(HAVE_CXX_ATOMICS_WITH_LIB OR HAVE_CXX_ATOMICS64_WITH_LIB)
-  target_link_libraries(dsymutil PRIVATE atomic)
-endif()
+target_link_libraries(dsymutil PRIVATE ${LLVM_ATOMIC_LIB})
-- 
Gitee


From 4c3d83810ad7bde70b2665df9a15947695e92adb Mon Sep 17 00:00:00 2001
From: Sam James <sam@gentoo.org>
Date: Wed, 19 Oct 2022 20:09:34 +0100
Subject: [PATCH 21/41] Link liblldCOFF against libatomic when necessary

Also simplify code for liblldCommon using the new LLVM_ATOMIC_LIB variable.

Depends on D136280.

Bug: https://bugs.gentoo.org/832675
Thanks-to: Arfrever Frehtes Taifersar Arahesis <Arfrever@Apache.Org>
Tested-by: erhard_f@mailbox.org <erhard_f@mailbox.org>

Differential Revision: https://reviews.llvm.org/D136281

(cherry picked from commit f0b451c77f14947e3e7d314f048679fa2f5c6298)
---
 lld/COFF/CMakeLists.txt   | 1 +
 lld/Common/CMakeLists.txt | 9 ++-------
 2 files changed, 3 insertions(+), 7 deletions(-)

diff --git a/lld/COFF/CMakeLists.txt b/lld/COFF/CMakeLists.txt
index d289bd591034..55aec26854c8 100644
--- a/lld/COFF/CMakeLists.txt
+++ b/lld/COFF/CMakeLists.txt
@@ -44,6 +44,7 @@ add_lld_library(lldCOFF
   LINK_LIBS
   lldCommon
   ${LLVM_PTHREAD_LIB}
+  ${LLVM_ATOMIC_LIB}
 
   DEPENDS
   COFFOptionsTableGen
diff --git a/lld/Common/CMakeLists.txt b/lld/Common/CMakeLists.txt
index 1ae7da1f5f7f..9c23ed395223 100644
--- a/lld/Common/CMakeLists.txt
+++ b/lld/Common/CMakeLists.txt
@@ -1,9 +1,3 @@
-set(LLD_SYSTEM_LIBS ${LLVM_PTHREAD_LIB})
-
-if(NOT HAVE_CXX_ATOMICS64_WITHOUT_LIB)
-  list(APPEND LLD_SYSTEM_LIBS atomic)
-endif()
-
 find_first_existing_vc_file("${LLVM_MAIN_SRC_DIR}" llvm_vc)
 find_first_existing_vc_file("${LLD_SOURCE_DIR}" lld_vc)
 
@@ -54,7 +48,8 @@ add_lld_library(lldCommon
   Target
 
   LINK_LIBS
-  ${LLD_SYSTEM_LIBS}
+  ${LLVM_PTHREAD_LIB}
+  ${LLVM_ATOMIC_LIB}
 
   DEPENDS
   intrinsics_gen
-- 
Gitee


From 0988addf2680b3717be47fd6f2493f33fe886f90 Mon Sep 17 00:00:00 2001
From: Sam James <sam@gentoo.org>
Date: Wed, 19 Oct 2022 20:12:10 +0100
Subject: [PATCH 22/41] Link libclangBasic against libatomic when necessary.

This is necessary at least on PPC32.

Depends on D136280.

Bug: https://bugs.gentoo.org/874024
Thanks-to: Arfrever Frehtes Taifersar Arahesis <Arfrever@Apache.Org>
Tested-by: erhard_f@mailbox.org <erhard_f@mailbox.org>

Differential Revision: https://reviews.llvm.org/D136282

(cherry picked from commit 20132d8eaa68a6c53e152718beda1dc0f4c9ff6c)
---
 clang/CMakeLists.txt           | 1 +
 clang/lib/Basic/CMakeLists.txt | 4 ++++
 2 files changed, 5 insertions(+)

diff --git a/clang/CMakeLists.txt b/clang/CMakeLists.txt
index 13d76e7fd935..e3bc4b468fb6 100644
--- a/clang/CMakeLists.txt
+++ b/clang/CMakeLists.txt
@@ -117,6 +117,7 @@ if(CLANG_BUILT_STANDALONE)
   include(TableGen)
   include(HandleLLVMOptions)
   include(VersionFromVCS)
+  include(CheckAtomic)
   include(GetErrcMessages)
   include(LLVMDistributionSupport)
 
diff --git a/clang/lib/Basic/CMakeLists.txt b/clang/lib/Basic/CMakeLists.txt
index 3e052c0cf995..c38c9fddb424 100644
--- a/clang/lib/Basic/CMakeLists.txt
+++ b/clang/lib/Basic/CMakeLists.txt
@@ -110,3 +110,7 @@ add_clang_library(clangBasic
   omp_gen
   )
 
+target_link_libraries(clangBasic
+  PRIVATE
+  ${LLVM_ATOMIC_LIB}
+)
-- 
Gitee


From 11c3a21f8d1ba21c7744ba9d272c26c2ce3c58a0 Mon Sep 17 00:00:00 2001
From: Balazs Benics <benicsbalazs@gmail.com>
Date: Thu, 13 Oct 2022 08:41:31 +0200
Subject: [PATCH 23/41] [analyzer] Workaround crash on encountering Class
 non-type template parameters

The Clang Static Analyzer will crash on this code:
```lang=C++
struct Box {
  int value;
};
template <Box V> int get() {
  return V.value;
}
template int get<Box{-1}>();
```
https://godbolt.org/z/5Yb1sMMMb

The problem is that we don't account for encountering `TemplateParamObjectDecl`s
within the `DeclRefExpr` handler in the `ExprEngine`.

IMO we should create a new memregion for representing such template
param objects, to model their language semantics.
Such as:
 - it should have global static storage
 - for two identical values, their addresses should be identical as well
http://eel.is/c%2B%2Bdraft/temp.param#8

I was thinking of introducing a `TemplateParamObjectRegion` under `DeclRegion`
for this purpose. It could have `TemplateParamObjectDecl` as a field.

The `TemplateParamObjectDecl::getValue()` returns `APValue`, which might
represent multiple levels of structures, unions and other goodies -
making the transformation from `APValue` to `SVal` a bit complicated.

That being said, for now, I think having `Unknowns` for such cases is
definitely an improvement to crashing, hence I'm proposing this patch.

Reviewed By: xazax.hun

Differential Revision: https://reviews.llvm.org/D135763

(cherry picked from commit b062ee7dc4515b0a42157717105839627d5542bb)
---
 clang/lib/StaticAnalyzer/Core/ExprEngine.cpp  |  6 ++++
 .../test/Analysis/template-param-objects.cpp  | 33 +++++++++++++++++++
 2 files changed, 39 insertions(+)
 create mode 100644 clang/test/Analysis/template-param-objects.cpp

diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
index 19149d079822..ab65612bce90 100644
--- a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
+++ b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
@@ -2839,6 +2839,12 @@ void ExprEngine::VisitCommonDeclRefExpr(const Expr *Ex, const NamedDecl *D,
     return;
   }
 
+  if (const auto *TPO = dyn_cast<TemplateParamObjectDecl>(D)) {
+    // FIXME: We should meaningfully implement this.
+    (void)TPO;
+    return;
+  }
+
   llvm_unreachable("Support for this Decl not implemented.");
 }
 
diff --git a/clang/test/Analysis/template-param-objects.cpp b/clang/test/Analysis/template-param-objects.cpp
new file mode 100644
index 000000000000..dde95fa62cb6
--- /dev/null
+++ b/clang/test/Analysis/template-param-objects.cpp
@@ -0,0 +1,33 @@
+// RUN: %clang_analyze_cc1 -analyzer-checker=core,debug.ExprInspection \
+// RUN:    -analyzer-config eagerly-assume=false -std=c++20 -verify %s
+
+template <class T> void clang_analyzer_dump(T);
+void clang_analyzer_eval(bool);
+
+struct Box {
+  int value;
+};
+bool operator ==(Box lhs, Box rhs) {
+  return lhs.value == rhs.value;
+}
+template <Box V> void dumps() {
+  clang_analyzer_dump(V);        // expected-warning {{lazyCompoundVal}}
+  clang_analyzer_dump(&V);       // expected-warning {{Unknown}}
+  clang_analyzer_dump(V.value);  // expected-warning {{Unknown}} FIXME: It should be '6 S32b'.
+  clang_analyzer_dump(&V.value); // expected-warning {{Unknown}}
+}
+template void dumps<Box{6}>();
+
+// [temp.param].7.3.2:
+// "All such template parameters in the program of the same type with the
+// same value denote the same template parameter object."
+template <Box A1, Box A2, Box B1, Box B2> void stable_addresses() {
+  clang_analyzer_eval(&A1 == &A2); // expected-warning {{UNKNOWN}} FIXME: It should be TRUE.
+  clang_analyzer_eval(&B1 == &B2); // expected-warning {{UNKNOWN}} FIXME: It should be TRUE.
+  clang_analyzer_eval(&A1 == &B2); // expected-warning {{UNKNOWN}} FIXME: It should be FALSE.
+
+  clang_analyzer_eval(A1 == A2); // expected-warning {{UNKNOWN}} FIXME: It should be TRUE.
+  clang_analyzer_eval(B1 == B2); // expected-warning {{UNKNOWN}} FIXME: It should be TRUE.
+  clang_analyzer_eval(A1 == B2); // expected-warning {{UNKNOWN}} FIXME: It should be FALSE.
+}
+template void stable_addresses<Box{1}, Box{1}, Box{2}, Box{2}>();
-- 
Gitee


From 68799e789fc5f2a5ff25beffa4dc8828780c08fb Mon Sep 17 00:00:00 2001
From: Arthur Eubanks <aeubanks@google.com>
Date: Tue, 1 Nov 2022 11:37:10 -0700
Subject: [PATCH 24/41] [GlobalOpt] Don't remove inalloca from varargs
 functions

Varargs and inalloca have a weird interaction where varargs are actually
passed via the inalloca alloca. Removing inalloca breaks the varargs
because they're still not passed as separate arguments.

Fixes #58718

Reviewed By: rnk

Differential Revision: https://reviews.llvm.org/D137182

(cherry picked from commit 8c49b01a1ee051ab2c09be4cffc83656ccc0fbe6)
---
 llvm/lib/Transforms/IPO/GlobalOpt.cpp         |  2 +-
 .../Transforms/GlobalOpt/inalloca-varargs.ll  | 38 +++++++++++++++++++
 2 files changed, 39 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/Transforms/GlobalOpt/inalloca-varargs.ll

diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
index 6df0409256bb..6fc7b29c5b78 100644
--- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
@@ -2003,7 +2003,7 @@ OptimizeFunctions(Module &M,
     // FIXME: We should also hoist alloca affected by this to the entry
     // block if possible.
     if (F.getAttributes().hasAttrSomewhere(Attribute::InAlloca) &&
-        !F.hasAddressTaken() && !hasMustTailCallers(&F)) {
+        !F.hasAddressTaken() && !hasMustTailCallers(&F) && !F.isVarArg()) {
       RemoveAttribute(&F, Attribute::InAlloca);
       Changed = true;
     }
diff --git a/llvm/test/Transforms/GlobalOpt/inalloca-varargs.ll b/llvm/test/Transforms/GlobalOpt/inalloca-varargs.ll
new file mode 100644
index 000000000000..188210782edd
--- /dev/null
+++ b/llvm/test/Transforms/GlobalOpt/inalloca-varargs.ll
@@ -0,0 +1,38 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature
+; RUN: opt -passes=globalopt -S < %s | FileCheck %s
+
+define i32 @main(ptr %a) {
+; CHECK-LABEL: define {{[^@]+}}@main
+; CHECK-SAME: (ptr [[A:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:    [[ARGMEM:%.*]] = alloca inalloca <{ ptr, i32 }>, align 4
+; CHECK-NEXT:    store ptr [[A]], ptr [[ARGMEM]], align 8
+; CHECK-NEXT:    [[G0:%.*]] = getelementptr inbounds <{ ptr, i32 }>, ptr [[ARGMEM]], i32 0, i32 1
+; CHECK-NEXT:    store i32 5, ptr [[G0]], align 4
+; CHECK-NEXT:    [[CALL3:%.*]] = call i32 (ptr, ...) @i(ptr inalloca(ptr) [[ARGMEM]])
+; CHECK-NEXT:    ret i32 [[CALL3]]
+;
+  %argmem = alloca inalloca <{ ptr, i32 }>, align 4
+  store ptr %a, ptr %argmem, align 8
+  %g0 = getelementptr inbounds <{ ptr, i32 }>, ptr %argmem, i32 0, i32 1
+  store i32 5, ptr %g0, align 4
+  %call3 = call i32 (ptr, ...) @i(ptr inalloca(ptr) %argmem)
+  ret i32 %call3
+}
+
+define internal i32 @i(ptr inalloca(ptr) %a, ...) {
+; CHECK-LABEL: define {{[^@]+}}@i
+; CHECK-SAME: (ptr inalloca(ptr) [[A:%.*]], ...) unnamed_addr {
+; CHECK-NEXT:    [[AP:%.*]] = alloca ptr, align 4
+; CHECK-NEXT:    call void @llvm.va_start(ptr [[AP]])
+; CHECK-NEXT:    [[ARGP_CUR:%.*]] = load ptr, ptr [[AP]], align 4
+; CHECK-NEXT:    [[L:%.*]] = load i32, ptr [[ARGP_CUR]], align 4
+; CHECK-NEXT:    ret i32 [[L]]
+;
+  %ap = alloca ptr, align 4
+  call void @llvm.va_start(ptr %ap)
+  %argp.cur = load ptr, ptr %ap, align 4
+  %l = load i32, ptr %argp.cur, align 4
+  ret i32 %l
+}
+
+declare void @llvm.va_start(ptr)
-- 
Gitee


From 392963bb1daf7ec8822a0f02929a8ada17eb0a0a Mon Sep 17 00:00:00 2001
From: Jitka Plesnikova <jplesnik@redhat.com>
Date: Wed, 21 Sep 2022 11:42:46 +0200
Subject: [PATCH 25/41] [lldb] Fix 'error: non-const lvalue...' caused by SWIG
 4.1.0

Fix the failure caused by change in SwigValueWraper for C++11 and later
for improved move semantics in SWIG commit.

https://github.com/swig/swig/commit/d1055f4b3d51cb8060893f8036846ac743302dab
(cherry picked from commit f0a25fe0b746f56295d5c02116ba28d2f965c175)
---
 lldb/bindings/python/python-typemaps.swig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lldb/bindings/python/python-typemaps.swig b/lldb/bindings/python/python-typemaps.swig
index bf3de66b91bf..d45431c771ca 100644
--- a/lldb/bindings/python/python-typemaps.swig
+++ b/lldb/bindings/python/python-typemaps.swig
@@ -435,7 +435,7 @@ template <> bool SetNumberFromPyObject<double>(double &number, PyObject *obj) {
 
 %typemap(out) lldb::FileSP {
   $result = nullptr;
-  lldb::FileSP &sp = $1;
+  const lldb::FileSP &sp = $1;
   if (sp) {
     PythonFile pyfile = unwrapOrSetPythonException(PythonFile::FromFile(*sp));
     if (!pyfile.IsValid())
-- 
Gitee


From dc8f6ffc3bf297098a1dfd3fbce801afbe9f5238 Mon Sep 17 00:00:00 2001
From: serge-sans-paille <sguelton@redhat.com>
Date: Thu, 29 Sep 2022 21:48:38 +0200
Subject: [PATCH 26/41] [lldb] Get rid of __STDC_LIMIT_MACROS and
 __STDC_CONSTANT_MACROS

C++11 made the use of these macro obsolete, see https://sourceware.org/bugzilla/show_bug.cgi?id=15366

As a side effect this prevents https://github.com/swig/swig/issues/2193.

Differential Revision: https://reviews.llvm.org/D134877

(cherry picked from commit 81fc5f7909a4ef5a8d4b5da2a10f77f7cb01ba63)
---
 lldb/bindings/CMakeLists.txt  | 2 --
 lldb/bindings/interfaces.swig | 3 ---
 2 files changed, 5 deletions(-)

diff --git a/lldb/bindings/CMakeLists.txt b/lldb/bindings/CMakeLists.txt
index c8aa0bcf9681..9eed2f1e6299 100644
--- a/lldb/bindings/CMakeLists.txt
+++ b/lldb/bindings/CMakeLists.txt
@@ -26,8 +26,6 @@ set(SWIG_COMMON_FLAGS
   -features autodoc
   -I${LLDB_SOURCE_DIR}/include
   -I${CMAKE_CURRENT_SOURCE_DIR}
-  -D__STDC_LIMIT_MACROS
-  -D__STDC_CONSTANT_MACROS
   ${DARWIN_EXTRAS}
 )
 
diff --git a/lldb/bindings/interfaces.swig b/lldb/bindings/interfaces.swig
index c9a6d0f06056..021c7683d170 100644
--- a/lldb/bindings/interfaces.swig
+++ b/lldb/bindings/interfaces.swig
@@ -1,8 +1,5 @@
 /* Various liblldb typedefs that SWIG needs to know about.  */
 #define __extension__ /* Undefine GCC keyword to make Swig happy when processing glibc's stdint.h. */
-/* The ISO C99 standard specifies that in C++ implementations limit macros such
-   as INT32_MAX should only be defined if __STDC_LIMIT_MACROS is. */
-#define __STDC_LIMIT_MACROS
 %include "stdint.i"
 
 %include "lldb/lldb-defines.h"
-- 
Gitee


From a399896637584c86acd61afca5bbfe8ed76a7c7d Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo@fhahn.com>
Date: Sun, 13 Nov 2022 22:05:37 +0000
Subject: [PATCH 27/41] [VectorUtils] Skip interleave members with diff type
 and alloca sizes.

Currently, codegen doesn't support cases where the type size doesn't
match the alloc size. Skip them for now.

Fixes #58722.

(cherry picked from commit 758699c39984296f20a4dac44c6892065601c4cd)
---
 llvm/lib/Analysis/VectorUtils.cpp             |   7 +-
 ...interleave-allocsize-not-equal-typesize.ll | 141 ++++++++++++++++++
 2 files changed, 147 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/Transforms/LoopVectorize/AArch64/interleave-allocsize-not-equal-typesize.ll

diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp
index c4795a80ead2..bc20f33f174c 100644
--- a/llvm/lib/Analysis/VectorUtils.cpp
+++ b/llvm/lib/Analysis/VectorUtils.cpp
@@ -1110,6 +1110,12 @@ void InterleavedAccessInfo::collectConstStrideAccesses(
         continue;
       Type *ElementTy = getLoadStoreType(&I);
 
+      // Currently, codegen doesn't support cases where the type size doesn't
+      // match the alloc size. Skip them for now.
+      uint64_t Size = DL.getTypeAllocSize(ElementTy);
+      if (Size * 8 != DL.getTypeSizeInBits(ElementTy))
+        continue;
+
       // We don't check wrapping here because we don't know yet if Ptr will be
       // part of a full group or a group with gaps. Checking wrapping for all
       // pointers (even those that end up in groups with no gaps) will be overly
@@ -1121,7 +1127,6 @@ void InterleavedAccessInfo::collectConstStrideAccesses(
                                     /*Assume=*/true, /*ShouldCheckWrap=*/false);
 
       const SCEV *Scev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
-      uint64_t Size = DL.getTypeAllocSize(ElementTy);
       AccessStrideInfo[&I] = StrideDescriptor(Stride, Scev, Size,
                                               getLoadStoreAlignment(&I));
     }
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/interleave-allocsize-not-equal-typesize.ll b/llvm/test/Transforms/LoopVectorize/AArch64/interleave-allocsize-not-equal-typesize.ll
new file mode 100644
index 000000000000..32ca12ee7e0e
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/interleave-allocsize-not-equal-typesize.ll
@@ -0,0 +1,141 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -passes=loop-vectorize -S %s | FileCheck %s
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64-unknown-linux-gnu"
+
+; Make sure LV does not crash when analyzing potential interleave groups with
+; accesses where the typesize doesn't match to allocsize.
+define void @pr58722_load_interleave_group(ptr %src, ptr %dst) {
+; CHECK-LABEL: @pr58722_load_interleave_group(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
+; CHECK:       vector.memcheck:
+; CHECK-NEXT:    [[UGLYGEP:%.*]] = getelementptr i8, ptr [[DST:%.*]], i64 40004
+; CHECK-NEXT:    [[UGLYGEP1:%.*]] = getelementptr i8, ptr [[SRC:%.*]], i64 80007
+; CHECK-NEXT:    [[BOUND0:%.*]] = icmp ult ptr [[DST]], [[UGLYGEP1]]
+; CHECK-NEXT:    [[BOUND1:%.*]] = icmp ult ptr [[SRC]], [[UGLYGEP]]
+; CHECK-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
+; CHECK-NEXT:    br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; CHECK:       vector.ph:
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 2
+; CHECK-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 3
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[TMP0]]
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[TMP1]]
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[TMP2]]
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[TMP3]]
+; CHECK-NEXT:    [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4, !alias.scope !0
+; CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4, !alias.scope !0
+; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <2 x i32> poison, i32 [[TMP8]], i32 0
+; CHECK-NEXT:    [[TMP11:%.*]] = insertelement <2 x i32> [[TMP10]], i32 [[TMP9]], i32 1
+; CHECK-NEXT:    [[TMP12:%.*]] = load i32, ptr [[TMP6]], align 4, !alias.scope !0
+; CHECK-NEXT:    [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4, !alias.scope !0
+; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <2 x i32> poison, i32 [[TMP12]], i32 0
+; CHECK-NEXT:    [[TMP15:%.*]] = insertelement <2 x i32> [[TMP14]], i32 [[TMP13]], i32 1
+; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 1
+; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i64 1
+; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 1
+; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i64 1
+; CHECK-NEXT:    [[TMP20:%.*]] = load i24, ptr [[TMP16]], align 4, !alias.scope !0
+; CHECK-NEXT:    [[TMP21:%.*]] = load i24, ptr [[TMP17]], align 4, !alias.scope !0
+; CHECK-NEXT:    [[TMP22:%.*]] = insertelement <2 x i24> poison, i24 [[TMP20]], i32 0
+; CHECK-NEXT:    [[TMP23:%.*]] = insertelement <2 x i24> [[TMP22]], i24 [[TMP21]], i32 1
+; CHECK-NEXT:    [[TMP24:%.*]] = load i24, ptr [[TMP18]], align 4, !alias.scope !0
+; CHECK-NEXT:    [[TMP25:%.*]] = load i24, ptr [[TMP19]], align 4, !alias.scope !0
+; CHECK-NEXT:    [[TMP26:%.*]] = insertelement <2 x i24> poison, i24 [[TMP24]], i32 0
+; CHECK-NEXT:    [[TMP27:%.*]] = insertelement <2 x i24> [[TMP26]], i24 [[TMP25]], i32 1
+; CHECK-NEXT:    [[TMP28:%.*]] = zext <2 x i24> [[TMP23]] to <2 x i32>
+; CHECK-NEXT:    [[TMP29:%.*]] = zext <2 x i24> [[TMP27]] to <2 x i32>
+; CHECK-NEXT:    [[TMP30:%.*]] = add <2 x i32> [[TMP11]], [[TMP28]]
+; CHECK-NEXT:    [[TMP31:%.*]] = add <2 x i32> [[TMP15]], [[TMP29]]
+; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP0]]
+; CHECK-NEXT:    [[TMP33:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP2]]
+; CHECK-NEXT:    [[TMP34:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i32 0
+; CHECK-NEXT:    store <2 x i32> [[TMP30]], ptr [[TMP34]], align 4, !alias.scope !3, !noalias !0
+; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i32 2
+; CHECK-NEXT:    store <2 x i32> [[TMP31]], ptr [[TMP35]], align 4, !alias.scope !3, !noalias !0
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP36:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000
+; CHECK-NEXT:    br i1 [[TMP36]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK:       middle.block:
+; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 10001, 10000
+; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK:       scalar.ph:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 10000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ]
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[GEP_IV:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[IV]]
+; CHECK-NEXT:    [[V1:%.*]] = load i32, ptr [[GEP_IV]], align 4
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i32, ptr [[GEP_IV]], i64 1
+; CHECK-NEXT:    [[V2:%.*]] = load i24, ptr [[GEP]], align 4
+; CHECK-NEXT:    [[V2_EXT:%.*]] = zext i24 [[V2]] to i32
+; CHECK-NEXT:    [[SUM:%.*]] = add i32 [[V1]], [[V2_EXT]]
+; CHECK-NEXT:    [[GEP_DST:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[IV]]
+; CHECK-NEXT:    store i32 [[SUM]], ptr [[GEP_DST]], align 4
+; CHECK-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i64 [[IV]], 10000
+; CHECK-NEXT:    br i1 [[CMP]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP7:![0-9]+]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+  %gep.iv = getelementptr inbounds i64, ptr %src, i64 %iv
+  %v1 = load i32, ptr %gep.iv, align 4
+  %gep = getelementptr inbounds i32, ptr %gep.iv, i64 1
+  %v2 = load i24, ptr %gep, align 4
+  %v2.ext = zext i24 %v2 to i32
+  %sum = add i32 %v1, %v2.ext
+  %gep.dst = getelementptr inbounds i32, ptr %dst, i64 %iv
+  store i32 %sum, ptr %gep.dst
+  %iv.next = add i64 %iv, 1
+  %cmp = icmp eq i64 %iv, 10000
+  br i1 %cmp, label %exit, label %loop
+
+exit:
+  ret void
+}
+
+define void @pr58722_store_interleave_group(ptr %src, ptr %dst) {
+; CHECK-LABEL: @pr58722_store_interleave_group(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[GEP_IV:%.*]] = getelementptr inbounds i64, ptr [[SRC:%.*]], i32 [[IV]]
+; CHECK-NEXT:    store i32 [[IV]], ptr [[GEP_IV]], align 4
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i64, ptr [[GEP_IV]], i64 1
+; CHECK-NEXT:    [[TRUNC_IV:%.*]] = trunc i32 [[IV]] to i24
+; CHECK-NEXT:    store i24 [[TRUNC_IV]], ptr [[GEP]], align 4
+; CHECK-NEXT:    [[IV_NEXT]] = add i32 [[IV]], 2
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[IV]], 10000
+; CHECK-NEXT:    br i1 [[CMP]], label [[EXIT:%.*]], label [[LOOP]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+  %gep.iv = getelementptr inbounds i64, ptr %src, i32 %iv
+  store i32 %iv, ptr %gep.iv
+  %gep = getelementptr inbounds i64, ptr %gep.iv, i64 1
+  %trunc.iv = trunc i32 %iv to i24
+  store i24 %trunc.iv, ptr %gep
+  %iv.next = add i32 %iv, 2
+  %cmp = icmp eq i32 %iv, 10000
+  br i1 %cmp, label %exit, label %loop
+
+exit:
+  ret void
+}
-- 
Gitee


From 154e88af7ec97d9b9f389e55d45bf07108a9a097 Mon Sep 17 00:00:00 2001
From: Tom Stellard <tstellar@redhat.com>
Date: Tue, 15 Nov 2022 22:28:29 -0800
Subject: [PATCH 28/41] Bump version to 15.0.5

---
 libcxx/include/__config                                     | 2 +-
 llvm/CMakeLists.txt                                         | 2 +-
 llvm/utils/gn/secondary/llvm/version.gni                    | 2 +-
 llvm/utils/lit/lit/__init__.py                              | 2 +-
 utils/bazel/llvm-project-overlay/clang/BUILD.bazel          | 6 +++---
 .../clang/include/clang/Config/config.h                     | 2 +-
 utils/bazel/llvm-project-overlay/lld/BUILD.bazel            | 2 +-
 .../llvm/include/llvm/Config/llvm-config.h                  | 4 ++--
 8 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/libcxx/include/__config b/libcxx/include/__config
index 810189c94a94..d5ac34eba37a 100644
--- a/libcxx/include/__config
+++ b/libcxx/include/__config
@@ -36,7 +36,7 @@
 
 #ifdef __cplusplus
 
-#  define _LIBCPP_VERSION 15004
+#  define _LIBCPP_VERSION 15005
 
 #  define _LIBCPP_CONCAT_IMPL(_X, _Y) _X##_Y
 #  define _LIBCPP_CONCAT(_X, _Y) _LIBCPP_CONCAT_IMPL(_X, _Y)
diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt
index 3d6f5e6f9d3d..b101a1a187c9 100644
--- a/llvm/CMakeLists.txt
+++ b/llvm/CMakeLists.txt
@@ -22,7 +22,7 @@ if(NOT DEFINED LLVM_VERSION_MINOR)
   set(LLVM_VERSION_MINOR 0)
 endif()
 if(NOT DEFINED LLVM_VERSION_PATCH)
-  set(LLVM_VERSION_PATCH 4)
+  set(LLVM_VERSION_PATCH 5)
 endif()
 if(NOT DEFINED LLVM_VERSION_SUFFIX)
   set(LLVM_VERSION_SUFFIX)
diff --git a/llvm/utils/gn/secondary/llvm/version.gni b/llvm/utils/gn/secondary/llvm/version.gni
index 3b890e00bec3..391132f1a316 100644
--- a/llvm/utils/gn/secondary/llvm/version.gni
+++ b/llvm/utils/gn/secondary/llvm/version.gni
@@ -1,4 +1,4 @@
 llvm_version_major = 15
 llvm_version_minor = 0
-llvm_version_patch = 4
+llvm_version_patch = 5
 llvm_version = "$llvm_version_major.$llvm_version_minor.$llvm_version_patch"
diff --git a/llvm/utils/lit/lit/__init__.py b/llvm/utils/lit/lit/__init__.py
index 04f6e94535e4..e3c8693c0421 100644
--- a/llvm/utils/lit/lit/__init__.py
+++ b/llvm/utils/lit/lit/__init__.py
@@ -2,7 +2,7 @@
 
 __author__ = 'Daniel Dunbar'
 __email__ = 'daniel@minormatter.com'
-__versioninfo__ = (15, 0, 4)
+__versioninfo__ = (15, 0, 5)
 __version__ = '.'.join(str(v) for v in __versioninfo__) + 'dev'
 
 __all__ = []
diff --git a/utils/bazel/llvm-project-overlay/clang/BUILD.bazel b/utils/bazel/llvm-project-overlay/clang/BUILD.bazel
index 96b462717be9..8f21799d4888 100644
--- a/utils/bazel/llvm-project-overlay/clang/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/clang/BUILD.bazel
@@ -358,11 +358,11 @@ genrule(
     name = "basic_version_gen",
     outs = ["include/clang/Basic/Version.inc"],
     cmd = (
-        "echo '#define CLANG_VERSION 15.0.4' >> $@\n" +
+        "echo '#define CLANG_VERSION 15.0.5' >> $@\n" +
         "echo '#define CLANG_VERSION_MAJOR 15' >> $@\n" +
         "echo '#define CLANG_VERSION_MINOR 0' >> $@\n" +
-        "echo '#define CLANG_VERSION_PATCHLEVEL 4' >> $@\n" +
-        "echo '#define CLANG_VERSION_STRING \"15.0.4\"' >> $@\n"
+        "echo '#define CLANG_VERSION_PATCHLEVEL 5' >> $@\n" +
+        "echo '#define CLANG_VERSION_STRING \"15.0.5\"' >> $@\n"
     ),
 )
 
diff --git a/utils/bazel/llvm-project-overlay/clang/include/clang/Config/config.h b/utils/bazel/llvm-project-overlay/clang/include/clang/Config/config.h
index 5d157492eae3..ec15fccc089b 100644
--- a/utils/bazel/llvm-project-overlay/clang/include/clang/Config/config.h
+++ b/utils/bazel/llvm-project-overlay/clang/include/clang/Config/config.h
@@ -93,7 +93,7 @@
 /* CLANG_HAVE_RLIMITS defined conditionally below */
 
 /* The LLVM product name and version */
-#define BACKEND_PACKAGE_STRING "LLVM 15.0.4"
+#define BACKEND_PACKAGE_STRING "LLVM 15.0.5"
 
 /* Linker version detected at compile time. */
 /* #undef HOST_LINK_VERSION */
diff --git a/utils/bazel/llvm-project-overlay/lld/BUILD.bazel b/utils/bazel/llvm-project-overlay/lld/BUILD.bazel
index e3a8c98ffa22..b4e7f5ccbdc4 100644
--- a/utils/bazel/llvm-project-overlay/lld/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/lld/BUILD.bazel
@@ -13,7 +13,7 @@ package(
 genrule(
     name = "config_version_gen",
     outs = ["include/lld/Common/Version.inc"],
-    cmd = "echo '#define LLD_VERSION_STRING \"15.0.4\"' > $@",
+    cmd = "echo '#define LLD_VERSION_STRING \"15.0.5\"' > $@",
 )
 
 genrule(
diff --git a/utils/bazel/llvm-project-overlay/llvm/include/llvm/Config/llvm-config.h b/utils/bazel/llvm-project-overlay/llvm/include/llvm/Config/llvm-config.h
index 7a86202e8e0d..16ffcbf0d2f6 100644
--- a/utils/bazel/llvm-project-overlay/llvm/include/llvm/Config/llvm-config.h
+++ b/utils/bazel/llvm-project-overlay/llvm/include/llvm/Config/llvm-config.h
@@ -80,10 +80,10 @@
 #define LLVM_VERSION_MINOR 0
 
 /* Patch version of the LLVM API */
-#define LLVM_VERSION_PATCH 4
+#define LLVM_VERSION_PATCH 5
 
 /* LLVM version string */
-#define LLVM_VERSION_STRING "15.0.4"
+#define LLVM_VERSION_STRING "15.0.5"
 
 /* Whether LLVM records statistics for use with GetStatistics(),
  * PrintStatistics() or PrintStatisticsJSON()
-- 
Gitee


From 25a36ca5c791611588a455e7e7e8a3593e90f9a3 Mon Sep 17 00:00:00 2001
From: Tom Stellard <tstellar@redhat.com>
Date: Mon, 21 Nov 2022 10:38:24 -0800
Subject: [PATCH 29/41] Bump version to 15.0.6

---
 libcxx/include/__config                                     | 2 +-
 llvm/CMakeLists.txt                                         | 2 +-
 llvm/utils/gn/secondary/llvm/version.gni                    | 2 +-
 llvm/utils/lit/lit/__init__.py                              | 2 +-
 utils/bazel/llvm-project-overlay/clang/BUILD.bazel          | 6 +++---
 .../clang/include/clang/Config/config.h                     | 2 +-
 utils/bazel/llvm-project-overlay/lld/BUILD.bazel            | 2 +-
 .../llvm/include/llvm/Config/llvm-config.h                  | 4 ++--
 8 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/libcxx/include/__config b/libcxx/include/__config
index d5ac34eba37a..5f62b974170f 100644
--- a/libcxx/include/__config
+++ b/libcxx/include/__config
@@ -36,7 +36,7 @@
 
 #ifdef __cplusplus
 
-#  define _LIBCPP_VERSION 15005
+#  define _LIBCPP_VERSION 15006
 
 #  define _LIBCPP_CONCAT_IMPL(_X, _Y) _X##_Y
 #  define _LIBCPP_CONCAT(_X, _Y) _LIBCPP_CONCAT_IMPL(_X, _Y)
diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt
index b101a1a187c9..28e28cfe7b6a 100644
--- a/llvm/CMakeLists.txt
+++ b/llvm/CMakeLists.txt
@@ -22,7 +22,7 @@ if(NOT DEFINED LLVM_VERSION_MINOR)
   set(LLVM_VERSION_MINOR 0)
 endif()
 if(NOT DEFINED LLVM_VERSION_PATCH)
-  set(LLVM_VERSION_PATCH 5)
+  set(LLVM_VERSION_PATCH 6)
 endif()
 if(NOT DEFINED LLVM_VERSION_SUFFIX)
   set(LLVM_VERSION_SUFFIX)
diff --git a/llvm/utils/gn/secondary/llvm/version.gni b/llvm/utils/gn/secondary/llvm/version.gni
index 391132f1a316..00e1dc6f1411 100644
--- a/llvm/utils/gn/secondary/llvm/version.gni
+++ b/llvm/utils/gn/secondary/llvm/version.gni
@@ -1,4 +1,4 @@
 llvm_version_major = 15
 llvm_version_minor = 0
-llvm_version_patch = 5
+llvm_version_patch = 6
 llvm_version = "$llvm_version_major.$llvm_version_minor.$llvm_version_patch"
diff --git a/llvm/utils/lit/lit/__init__.py b/llvm/utils/lit/lit/__init__.py
index e3c8693c0421..b34d17b9dc47 100644
--- a/llvm/utils/lit/lit/__init__.py
+++ b/llvm/utils/lit/lit/__init__.py
@@ -2,7 +2,7 @@
 
 __author__ = 'Daniel Dunbar'
 __email__ = 'daniel@minormatter.com'
-__versioninfo__ = (15, 0, 5)
+__versioninfo__ = (15, 0, 6)
 __version__ = '.'.join(str(v) for v in __versioninfo__) + 'dev'
 
 __all__ = []
diff --git a/utils/bazel/llvm-project-overlay/clang/BUILD.bazel b/utils/bazel/llvm-project-overlay/clang/BUILD.bazel
index 8f21799d4888..5ee87d516519 100644
--- a/utils/bazel/llvm-project-overlay/clang/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/clang/BUILD.bazel
@@ -358,11 +358,11 @@ genrule(
     name = "basic_version_gen",
     outs = ["include/clang/Basic/Version.inc"],
     cmd = (
-        "echo '#define CLANG_VERSION 15.0.5' >> $@\n" +
+        "echo '#define CLANG_VERSION 15.0.6' >> $@\n" +
         "echo '#define CLANG_VERSION_MAJOR 15' >> $@\n" +
         "echo '#define CLANG_VERSION_MINOR 0' >> $@\n" +
-        "echo '#define CLANG_VERSION_PATCHLEVEL 5' >> $@\n" +
-        "echo '#define CLANG_VERSION_STRING \"15.0.5\"' >> $@\n"
+        "echo '#define CLANG_VERSION_PATCHLEVEL 6' >> $@\n" +
+        "echo '#define CLANG_VERSION_STRING \"15.0.6\"' >> $@\n"
     ),
 )
 
diff --git a/utils/bazel/llvm-project-overlay/clang/include/clang/Config/config.h b/utils/bazel/llvm-project-overlay/clang/include/clang/Config/config.h
index ec15fccc089b..ff4feb2b4af9 100644
--- a/utils/bazel/llvm-project-overlay/clang/include/clang/Config/config.h
+++ b/utils/bazel/llvm-project-overlay/clang/include/clang/Config/config.h
@@ -93,7 +93,7 @@
 /* CLANG_HAVE_RLIMITS defined conditionally below */
 
 /* The LLVM product name and version */
-#define BACKEND_PACKAGE_STRING "LLVM 15.0.5"
+#define BACKEND_PACKAGE_STRING "LLVM 15.0.6"
 
 /* Linker version detected at compile time. */
 /* #undef HOST_LINK_VERSION */
diff --git a/utils/bazel/llvm-project-overlay/lld/BUILD.bazel b/utils/bazel/llvm-project-overlay/lld/BUILD.bazel
index b4e7f5ccbdc4..6ba06c794413 100644
--- a/utils/bazel/llvm-project-overlay/lld/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/lld/BUILD.bazel
@@ -13,7 +13,7 @@ package(
 genrule(
     name = "config_version_gen",
     outs = ["include/lld/Common/Version.inc"],
-    cmd = "echo '#define LLD_VERSION_STRING \"15.0.5\"' > $@",
+    cmd = "echo '#define LLD_VERSION_STRING \"15.0.6\"' > $@",
 )
 
 genrule(
diff --git a/utils/bazel/llvm-project-overlay/llvm/include/llvm/Config/llvm-config.h b/utils/bazel/llvm-project-overlay/llvm/include/llvm/Config/llvm-config.h
index 16ffcbf0d2f6..a44e9f60c0e1 100644
--- a/utils/bazel/llvm-project-overlay/llvm/include/llvm/Config/llvm-config.h
+++ b/utils/bazel/llvm-project-overlay/llvm/include/llvm/Config/llvm-config.h
@@ -80,10 +80,10 @@
 #define LLVM_VERSION_MINOR 0
 
 /* Patch version of the LLVM API */
-#define LLVM_VERSION_PATCH 5
+#define LLVM_VERSION_PATCH 6
 
 /* LLVM version string */
-#define LLVM_VERSION_STRING "15.0.5"
+#define LLVM_VERSION_STRING "15.0.6"
 
 /* Whether LLVM records statistics for use with GetStatistics(),
  * PrintStatistics() or PrintStatisticsJSON()
-- 
Gitee


From e6e61e9b2ef7c0fa3fe2cd7c612e00ecf57a9dd8 Mon Sep 17 00:00:00 2001
From: Bill Wendling <morbo@google.com>
Date: Thu, 17 Nov 2022 16:07:15 -0800
Subject: [PATCH 30/41] Revert "Reapply: Add an error message to the default
 SIGPIPE handler"

This patch is spamming compiles with unhelpful and confusing messages.
E.g. the Linux kernel uses "grep -q" in several places. It's meant to
quit with a return code of zero when the first match is found. This can
cause a SIGPIPE signal, but that's expected, and there's no way to turn
this error message off to avoid spurious error messages.

UNIX03 apparently doesn't require printing an error message on SIGPIPE,
but specifically when there's an error on the stdout stream in a normal
program flow, e.g. when SIGPIPE trap is disabled.

A separate patch is planned to address the specific case we care most
about (involving llvm-nm).

This reverts commit b89bcefa6202e310eb3167dd1c37f1807377ec8d.

Link: https://github.com/llvm/llvm-project/issues/59037
Link: https://github.com/ClangBuiltLinux/linux/issues/1651

Differential Revision: https://reviews.llvm.org/D138244

(cherry picked from commit 4787efa38066adb51e2c049499d25b3610c0877b)
---
 llvm/lib/Support/Unix/Signals.inc          |  4 ----
 llvm/test/Support/unix03-sigpipe-exit.test | 26 ----------------------
 2 files changed, 30 deletions(-)
 delete mode 100644 llvm/test/Support/unix03-sigpipe-exit.test

diff --git a/llvm/lib/Support/Unix/Signals.inc b/llvm/lib/Support/Unix/Signals.inc
index bf145bffe8bf..23ac012b9e00 100644
--- a/llvm/lib/Support/Unix/Signals.inc
+++ b/llvm/lib/Support/Unix/Signals.inc
@@ -432,10 +432,6 @@ void llvm::sys::SetOneShotPipeSignalFunction(void (*Handler)()) {
 }
 
 void llvm::sys::DefaultOneShotPipeSignalHandler() {
-  // UNIX03 conformance requires a non-zero exit code and an error message
-  // to stderr when writing to a closed stdout fails.
-  errs() << "error: write on a pipe with no reader\n";
-
   // Send a special return code that drivers can check for, from sysexits.h.
   exit(EX_IOERR);
 }
diff --git a/llvm/test/Support/unix03-sigpipe-exit.test b/llvm/test/Support/unix03-sigpipe-exit.test
deleted file mode 100644
index 01680841db00..000000000000
--- a/llvm/test/Support/unix03-sigpipe-exit.test
+++ /dev/null
@@ -1,26 +0,0 @@
-## Test that when writing to a closed stdout, LLVM tools finish with a non-zero
-## exit code and an error message on stderr. The test uses llvm-cxxfilt, but
-## it's a logic from the default SIGPIPE handler, so it applies to all the tools.
-## This is required for UNIX03 conformance.
-
-# UNSUPPORTED: system-windows
-
-# RUN: not %python %s llvm-cxxfilt 2>&1 | FileCheck %s
-# CHECK: error: write on a pipe with no reader
-
-import subprocess
-import sys
-
-with subprocess.Popen([sys.argv[1]], stdout=subprocess.PIPE, stdin=subprocess.PIPE) as process:
-  process.stdout.close()
-
-  # llvm-cxxfilt with no extra arguments runs interactively and writes input
-  # to output. Writing continuously to stdin should trigger SIGPIPE when the
-  # subprocess attempts to write out bytes to a closed stdout.
-  try:
-    while True:
-      process.stdin.write("foo\n".encode("utf-8"))
-  except BrokenPipeError:
-    # Clear stdin, pipe is broken and closing it on cleanup will raise an exception.
-    process.stdin = None
-sys.exit(process.returncode)
-- 
Gitee


From abcd0341d8465d55d7293dfc7142fef5055047fa Mon Sep 17 00:00:00 2001
From: Brett Werling <bwerl.dev@gmail.com>
Date: Wed, 16 Nov 2022 08:16:11 -0800
Subject: [PATCH 31/41] [ELF] Handle GCC collect2 -plugin-opt= on Windows

Follows up on commit cd5d5ce235081005173566c99c592550021de058 by
additionally ignoring relative paths ending in "lto-wrapper.exe" as
can be the case for GCC cross-compiled for Windows.

Reviewed By: tejohnson

Differential Revision: https://reviews.llvm.org/D138065

(cherry picked from commit cf4f35b78871aecc21e663067c57e60595bd7197)
---
 lld/ELF/Driver.cpp               | 9 ++++++---
 lld/test/ELF/lto-plugin-ignore.s | 2 ++
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp
index 296fb4220012..58d863776430 100644
--- a/lld/ELF/Driver.cpp
+++ b/lld/ELF/Driver.cpp
@@ -1330,12 +1330,15 @@ static void readConfigs(opt::InputArgList &args) {
     parseClangOption(std::string("-") + arg->getValue(), arg->getSpelling());
 
   // GCC collect2 passes -plugin-opt=path/to/lto-wrapper with an absolute or
-  // relative path. Just ignore. If not ended with "lto-wrapper", consider it an
+  // relative path. Just ignore. If not ended with "lto-wrapper" (or
+  // "lto-wrapper.exe" for GCC cross-compiled for Windows), consider it an
   // unsupported LLVMgold.so option and error.
-  for (opt::Arg *arg : args.filtered(OPT_plugin_opt_eq))
-    if (!StringRef(arg->getValue()).endswith("lto-wrapper"))
+  for (opt::Arg *arg : args.filtered(OPT_plugin_opt_eq)) {
+    StringRef v(arg->getValue());
+    if (!v.endswith("lto-wrapper") && !v.endswith("lto-wrapper.exe"))
       error(arg->getSpelling() + ": unknown plugin option '" + arg->getValue() +
             "'");
+  }
 
   config->passPlugins = args::getStrings(args, OPT_load_pass_plugins);
 
diff --git a/lld/test/ELF/lto-plugin-ignore.s b/lld/test/ELF/lto-plugin-ignore.s
index 2935bad149de..dd39139b6243 100644
--- a/lld/test/ELF/lto-plugin-ignore.s
+++ b/lld/test/ELF/lto-plugin-ignore.s
@@ -8,7 +8,9 @@
 # RUN: ld.lld %t.o -o /dev/null \
 # RUN:   -plugin path/to/liblto_plugin.so \
 # RUN:   -plugin-opt=/path/to/lto-wrapper \
+# RUN:   -plugin-opt=/path/to/lto-wrapper.exe \
 # RUN:   -plugin-opt=relative/path/to/lto-wrapper \
+# RUN:   -plugin-opt=relative/path/to/lto-wrapper.exe \
 # RUN:   -plugin-opt=-fresolution=zed \
 # RUN:   -plugin-opt=-pass-through=-lgcc \
 # RUN:   -plugin-opt=-pass-through=-lgcc_eh \
-- 
Gitee


From 088f33605d8a61ff519c580a71b1dd57d16a03f8 Mon Sep 17 00:00:00 2001
From: yronglin <yronglin777@gmail.com>
Date: Thu, 17 Nov 2022 23:06:21 +0800
Subject: [PATCH 32/41] [CodeGen][ARM] Fix ARMABIInfo::EmitVAAarg crash with
 empty record type variadic arg

Fix ARMABIInfo::EmitVAAarg crash with empty record type variadic arg

Open issue: https://github.com/llvm/llvm-project/issues/58794

Reviewed By: rjmccall

Differential Revision: https://reviews.llvm.org/D138137

(cherry picked from commit 80f444646c62ccc8b2399d60ac91e62e6e576da6)
---
 clang/lib/CodeGen/TargetInfo.cpp |  8 ++++----
 clang/test/CodeGen/arm-vaarg.c   | 23 +++++++++++++++++++++++
 2 files changed, 27 insertions(+), 4 deletions(-)
 create mode 100644 clang/test/CodeGen/arm-vaarg.c

diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp
index 36e10e4df4c1..44743fa0206f 100644
--- a/clang/lib/CodeGen/TargetInfo.cpp
+++ b/clang/lib/CodeGen/TargetInfo.cpp
@@ -7047,10 +7047,10 @@ Address ARMABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
 
   // Empty records are ignored for parameter passing purposes.
   if (isEmptyRecord(getContext(), Ty, true)) {
-    Address Addr = Address(CGF.Builder.CreateLoad(VAListAddr),
-                           getVAListElementType(CGF), SlotSize);
-    Addr = CGF.Builder.CreateElementBitCast(Addr, CGF.ConvertTypeForMem(Ty));
-    return Addr;
+    VAListAddr = CGF.Builder.CreateElementBitCast(VAListAddr, CGF.Int8PtrTy);
+    auto *Load = CGF.Builder.CreateLoad(VAListAddr);
+    Address Addr = Address(Load, CGF.Int8Ty, SlotSize);
+    return CGF.Builder.CreateElementBitCast(Addr, CGF.ConvertTypeForMem(Ty));
   }
 
   CharUnits TySize = getContext().getTypeSizeInChars(Ty);
diff --git a/clang/test/CodeGen/arm-vaarg.c b/clang/test/CodeGen/arm-vaarg.c
new file mode 100644
index 000000000000..4dab397a20de
--- /dev/null
+++ b/clang/test/CodeGen/arm-vaarg.c
@@ -0,0 +1,23 @@
+// RUN: %clang -Xclang -no-opaque-pointers -mfloat-abi=soft -target arm-linux-gnu -emit-llvm -S -o - %s | FileCheck %s
+
+struct Empty {};
+
+struct Empty emptyvar;
+
+void take_args(int a, ...) {
+// CHECK: [[ALLOCA_VA_LIST:%[a-zA-Z0-9._]+]] = alloca %struct.__va_list, align 4
+// CHECK: call void @llvm.va_start
+// CHECK-NEXT: [[AP_ADDR:%[a-zA-Z0-9._]+]] = bitcast %struct.__va_list* [[ALLOCA_VA_LIST]] to i8**
+// CHECK-NEXT: [[LOAD_AP:%[a-zA-Z0-9._]+]] = load i8*, i8** [[AP_ADDR]], align 4
+// CHECK-NEXT: [[EMPTY_PTR:%[a-zA-Z0-9._]+]] = bitcast i8* [[LOAD_AP]] to %struct.Empty*
+
+  // It's conceivable that EMPTY_PTR may not actually be a valid pointer
+  // (e.g. it's at the very bottom of the stack and the next page is
+  // invalid). This doesn't matter provided it's never loaded (there's no
+  // well-defined way to tell), but it becomes a problem if we do try to use it.
+// CHECK-NOT: load %struct.Empty, %struct.Empty* [[EMPTY_PTR]]
+  __builtin_va_list l;
+  __builtin_va_start(l, a);
+  emptyvar = __builtin_va_arg(l, struct Empty);
+  __builtin_va_end(l);
+}
-- 
Gitee


From a8af9f679231a55b8a0f5707d8727679a98f4b06 Mon Sep 17 00:00:00 2001
From: Tom Stellard <tstellar@redhat.com>
Date: Fri, 6 Jan 2023 13:09:17 -0800
Subject: [PATCH 33/41] Bump version to 15.0.7

---
 libcxx/include/__config                                     | 2 +-
 llvm/CMakeLists.txt                                         | 2 +-
 llvm/utils/gn/secondary/llvm/version.gni                    | 2 +-
 llvm/utils/lit/lit/__init__.py                              | 2 +-
 utils/bazel/llvm-project-overlay/clang/BUILD.bazel          | 6 +++---
 .../clang/include/clang/Config/config.h                     | 2 +-
 utils/bazel/llvm-project-overlay/lld/BUILD.bazel            | 2 +-
 .../llvm/include/llvm/Config/llvm-config.h                  | 4 ++--
 8 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/libcxx/include/__config b/libcxx/include/__config
index 5f62b974170f..c97cbae96fba 100644
--- a/libcxx/include/__config
+++ b/libcxx/include/__config
@@ -36,7 +36,7 @@
 
 #ifdef __cplusplus
 
-#  define _LIBCPP_VERSION 15006
+#  define _LIBCPP_VERSION 15007
 
 #  define _LIBCPP_CONCAT_IMPL(_X, _Y) _X##_Y
 #  define _LIBCPP_CONCAT(_X, _Y) _LIBCPP_CONCAT_IMPL(_X, _Y)
diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt
index 28e28cfe7b6a..db207e3328be 100644
--- a/llvm/CMakeLists.txt
+++ b/llvm/CMakeLists.txt
@@ -22,7 +22,7 @@ if(NOT DEFINED LLVM_VERSION_MINOR)
   set(LLVM_VERSION_MINOR 0)
 endif()
 if(NOT DEFINED LLVM_VERSION_PATCH)
-  set(LLVM_VERSION_PATCH 6)
+  set(LLVM_VERSION_PATCH 7)
 endif()
 if(NOT DEFINED LLVM_VERSION_SUFFIX)
   set(LLVM_VERSION_SUFFIX)
diff --git a/llvm/utils/gn/secondary/llvm/version.gni b/llvm/utils/gn/secondary/llvm/version.gni
index 00e1dc6f1411..d485a133f921 100644
--- a/llvm/utils/gn/secondary/llvm/version.gni
+++ b/llvm/utils/gn/secondary/llvm/version.gni
@@ -1,4 +1,4 @@
 llvm_version_major = 15
 llvm_version_minor = 0
-llvm_version_patch = 6
+llvm_version_patch = 7
 llvm_version = "$llvm_version_major.$llvm_version_minor.$llvm_version_patch"
diff --git a/llvm/utils/lit/lit/__init__.py b/llvm/utils/lit/lit/__init__.py
index b34d17b9dc47..928d6db8d3cd 100644
--- a/llvm/utils/lit/lit/__init__.py
+++ b/llvm/utils/lit/lit/__init__.py
@@ -2,7 +2,7 @@
 
 __author__ = 'Daniel Dunbar'
 __email__ = 'daniel@minormatter.com'
-__versioninfo__ = (15, 0, 6)
+__versioninfo__ = (15, 0, 7)
 __version__ = '.'.join(str(v) for v in __versioninfo__) + 'dev'
 
 __all__ = []
diff --git a/utils/bazel/llvm-project-overlay/clang/BUILD.bazel b/utils/bazel/llvm-project-overlay/clang/BUILD.bazel
index 5ee87d516519..59932954c949 100644
--- a/utils/bazel/llvm-project-overlay/clang/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/clang/BUILD.bazel
@@ -358,11 +358,11 @@ genrule(
     name = "basic_version_gen",
     outs = ["include/clang/Basic/Version.inc"],
     cmd = (
-        "echo '#define CLANG_VERSION 15.0.6' >> $@\n" +
+        "echo '#define CLANG_VERSION 15.0.7' >> $@\n" +
         "echo '#define CLANG_VERSION_MAJOR 15' >> $@\n" +
         "echo '#define CLANG_VERSION_MINOR 0' >> $@\n" +
-        "echo '#define CLANG_VERSION_PATCHLEVEL 6' >> $@\n" +
-        "echo '#define CLANG_VERSION_STRING \"15.0.6\"' >> $@\n"
+        "echo '#define CLANG_VERSION_PATCHLEVEL 7' >> $@\n" +
+        "echo '#define CLANG_VERSION_STRING \"15.0.7\"' >> $@\n"
     ),
 )
 
diff --git a/utils/bazel/llvm-project-overlay/clang/include/clang/Config/config.h b/utils/bazel/llvm-project-overlay/clang/include/clang/Config/config.h
index ff4feb2b4af9..5ee35630d35a 100644
--- a/utils/bazel/llvm-project-overlay/clang/include/clang/Config/config.h
+++ b/utils/bazel/llvm-project-overlay/clang/include/clang/Config/config.h
@@ -93,7 +93,7 @@
 /* CLANG_HAVE_RLIMITS defined conditionally below */
 
 /* The LLVM product name and version */
-#define BACKEND_PACKAGE_STRING "LLVM 15.0.6"
+#define BACKEND_PACKAGE_STRING "LLVM 15.0.7"
 
 /* Linker version detected at compile time. */
 /* #undef HOST_LINK_VERSION */
diff --git a/utils/bazel/llvm-project-overlay/lld/BUILD.bazel b/utils/bazel/llvm-project-overlay/lld/BUILD.bazel
index 6ba06c794413..06b57f39e333 100644
--- a/utils/bazel/llvm-project-overlay/lld/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/lld/BUILD.bazel
@@ -13,7 +13,7 @@ package(
 genrule(
     name = "config_version_gen",
     outs = ["include/lld/Common/Version.inc"],
-    cmd = "echo '#define LLD_VERSION_STRING \"15.0.6\"' > $@",
+    cmd = "echo '#define LLD_VERSION_STRING \"15.0.7\"' > $@",
 )
 
 genrule(
diff --git a/utils/bazel/llvm-project-overlay/llvm/include/llvm/Config/llvm-config.h b/utils/bazel/llvm-project-overlay/llvm/include/llvm/Config/llvm-config.h
index a44e9f60c0e1..1d5c3a4e879b 100644
--- a/utils/bazel/llvm-project-overlay/llvm/include/llvm/Config/llvm-config.h
+++ b/utils/bazel/llvm-project-overlay/llvm/include/llvm/Config/llvm-config.h
@@ -80,10 +80,10 @@
 #define LLVM_VERSION_MINOR 0
 
 /* Patch version of the LLVM API */
-#define LLVM_VERSION_PATCH 6
+#define LLVM_VERSION_PATCH 7
 
 /* LLVM version string */
-#define LLVM_VERSION_STRING "15.0.6"
+#define LLVM_VERSION_STRING "15.0.7"
 
 /* Whether LLVM records statistics for use with GetStatistics(),
  * PrintStatistics() or PrintStatisticsJSON()
-- 
Gitee


From 74d3ba1af5c09b85331c90105c461484762ee3e4 Mon Sep 17 00:00:00 2001
From: Bill Wendling <morbo@google.com>
Date: Tue, 13 Dec 2022 15:06:29 -0800
Subject: [PATCH 34/41] [X86] Don't zero out %eax if both %al and %ah are used

The iterator over super and sub registers doesn't include both 8-bit
registers in its list. So if both registers are used and only one of
them is live on return, then we need to make sure that the other 8-bit
register is also marked as live and not zeroed out.

Reviewed By: nickdesaulniers

Differential Revision: https://reviews.llvm.org/D139679

(cherry picked from commit 14d4cddc5506fb0fd3c4ac556b4edd970aa151eb)
---
 llvm/lib/CodeGen/PrologEpilogInserter.cpp     |   8 +-
 .../CodeGen/X86/zero-call-used-regs-i386.ll   | 112 ++++++++++++++++++
 2 files changed, 119 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/CodeGen/X86/zero-call-used-regs-i386.ll

diff --git a/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/llvm/lib/CodeGen/PrologEpilogInserter.cpp
index 85d051cfdbe7..a8d40edd88d3 100644
--- a/llvm/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/llvm/lib/CodeGen/PrologEpilogInserter.cpp
@@ -1237,7 +1237,13 @@ void PEI::insertZeroCallUsedRegs(MachineFunction &MF) {
         if (!MO.isReg())
           continue;
 
-        for (MCPhysReg SReg : TRI.sub_and_superregs_inclusive(MO.getReg()))
+        MCRegister Reg = MO.getReg();
+
+        // This picks up sibling registers (e.q. %al -> %ah).
+        for (MCRegUnitIterator Unit(Reg, &TRI); Unit.isValid(); ++Unit)
+          RegsToZero.reset(*Unit);
+
+        for (MCPhysReg SReg : TRI.sub_and_superregs_inclusive(Reg))
           RegsToZero.reset(SReg);
       }
     }
diff --git a/llvm/test/CodeGen/X86/zero-call-used-regs-i386.ll b/llvm/test/CodeGen/X86/zero-call-used-regs-i386.ll
new file mode 100644
index 000000000000..33e501ca8503
--- /dev/null
+++ b/llvm/test/CodeGen/X86/zero-call-used-regs-i386.ll
@@ -0,0 +1,112 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -opaque-pointers | FileCheck %s --check-prefix=I386
+;
+; Make sure we don't zero out %eax when both %ah and %al are used.
+;
+; PR1766: https://github.com/ClangBuiltLinux/linux/issues/1766
+
+%struct.maple_subtree_state = type { ptr }
+
+@mas_data_end_type = dso_local local_unnamed_addr global i32 0, align 4
+@ma_meta_end_mn_0_0_0_0_0_0 = dso_local local_unnamed_addr global i8 0, align 1
+@mt_pivots_0 = dso_local local_unnamed_addr global i8 0, align 1
+@mas_data_end___trans_tmp_2 = dso_local local_unnamed_addr global ptr null, align 4
+@mt_slots_0 = dso_local local_unnamed_addr global i8 0, align 1
+
+define dso_local zeroext i1 @test1(ptr nocapture noundef readonly %0) local_unnamed_addr "zero-call-used-regs"="used-gpr" nounwind {
+; I386-LABEL: test1:
+; I386:       # %bb.0:
+; I386-NEXT:    pushl %ebx
+; I386-NEXT:    subl $24, %esp
+; I386-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; I386-NEXT:    movl (%eax), %eax
+; I386-NEXT:    movzbl (%eax), %ebx
+; I386-NEXT:    calll bar
+; I386-NEXT:    testb %al, %al
+; I386-NEXT:    # implicit-def: $al
+; I386-NEXT:    # kill: killed $al
+; I386-NEXT:    je .LBB0_6
+; I386-NEXT:  # %bb.1:
+; I386-NEXT:    cmpl $0, mas_data_end_type
+; I386-NEXT:    je .LBB0_3
+; I386-NEXT:  # %bb.2:
+; I386-NEXT:    movzbl ma_meta_end_mn_0_0_0_0_0_0, %eax
+; I386-NEXT:    movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; I386-NEXT:    jmp .LBB0_6
+; I386-NEXT:  .LBB0_3:
+; I386-NEXT:    movb mt_pivots_0, %ah
+; I386-NEXT:    movb %ah, %al
+; I386-NEXT:    decb %al
+; I386-NEXT:    movl mas_data_end___trans_tmp_2, %ecx
+; I386-NEXT:    movsbl %al, %edx
+; I386-NEXT:    cmpl $0, (%ecx,%edx,4)
+; I386-NEXT:    je .LBB0_5
+; I386-NEXT:  # %bb.4:
+; I386-NEXT:    movb %al, %ah
+; I386-NEXT:  .LBB0_5:
+; I386-NEXT:    movb %ah, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; I386-NEXT:  .LBB0_6:
+; I386-NEXT:    movb mt_slots_0, %bh
+; I386-NEXT:    leal {{[0-9]+}}(%esp), %eax
+; I386-NEXT:    movl %eax, (%esp)
+; I386-NEXT:    calll baz
+; I386-NEXT:    subl $4, %esp
+; I386-NEXT:    cmpb %bh, %bl
+; I386-NEXT:    jae .LBB0_8
+; I386-NEXT:  # %bb.7:
+; I386-NEXT:    movsbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; I386-NEXT:    movl %eax, (%esp)
+; I386-NEXT:    calll gaz
+; I386-NEXT:  .LBB0_8:
+; I386-NEXT:    movb $1, %al
+; I386-NEXT:    addl $24, %esp
+; I386-NEXT:    popl %ebx
+; I386-NEXT:    xorl %ecx, %ecx
+; I386-NEXT:    xorl %edx, %edx
+; I386-NEXT:    retl
+  %2 = alloca %struct.maple_subtree_state, align 4
+  %3 = load ptr, ptr %0, align 4
+  %4 = load i8, ptr %3, align 1
+  %5 = tail call zeroext i1 @bar()
+  br i1 %5, label %6, label %20
+
+6:                                                ; preds = %1
+  %7 = load i32, ptr @mas_data_end_type, align 4
+  %8 = icmp eq i32 %7, 0
+  br i1 %8, label %11, label %9
+
+9:                                                ; preds = %6
+  %10 = load i8, ptr @ma_meta_end_mn_0_0_0_0_0_0, align 1
+  br label %20
+
+11:                                               ; preds = %6
+  %12 = load i8, ptr @mt_pivots_0, align 1
+  %13 = add i8 %12, -1
+  %14 = load ptr, ptr @mas_data_end___trans_tmp_2, align 4
+  %15 = sext i8 %13 to i32
+  %16 = getelementptr inbounds [1 x i32], ptr %14, i32 0, i32 %15
+  %17 = load i32, ptr %16, align 4
+  %18 = icmp eq i32 %17, 0
+  %19 = select i1 %18, i8 %12, i8 %13
+  br label %20
+
+20:                                               ; preds = %11, %9, %1
+  %21 = phi i8 [ undef, %1 ], [ %10, %9 ], [ %19, %11 ]
+  %22 = load i8, ptr @mt_slots_0, align 1
+  call void @baz(ptr nonnull sret(%struct.maple_subtree_state) align 4 %2)
+  %23 = icmp ult i8 %4, %22
+  br i1 %23, label %24, label %25
+
+24:                                               ; preds = %20
+  call void @gaz(i8 noundef signext %21)
+  br label %25
+
+25:                                               ; preds = %20, %24
+  ret i1 true
+}
+
+declare dso_local zeroext i1 @bar(...) local_unnamed_addr
+
+declare dso_local void @baz(ptr sret(%struct.maple_subtree_state) align 4, ...) local_unnamed_addr
+
+declare dso_local void @gaz(i8 noundef signext) local_unnamed_addr
-- 
Gitee


From 67fd0d2af4bf9e939ebcccb2b66552a31789af94 Mon Sep 17 00:00:00 2001
From: "chenglin.bi" <chenglin.bi@linaro.org>
Date: Tue, 3 Jan 2023 18:12:15 +0800
Subject: [PATCH 35/41] [TypePromotion] Add truncate in ConvertTruncs when the
 original truncate type is not extend type

If the src type is not extend type, after convert the truncate to and we need to truncate the and also to make sure the all user is legal.

The old fix D137613 doesn't work when the truncate convert to and have the other users. So this time I try to add the truncate after and to avoid all these potential issues.

Fix: #59554

Reviewed By: samparker

Differential Revision: https://reviews.llvm.org/D140869

(cherry picked from commit a0b470c9848c638e86f97eca53063642e07cea67)
---
 llvm/lib/CodeGen/TypePromotion.cpp            |  8 ++---
 .../TypePromotion/AArch64/pr58843.ll          | 20 -----------
 .../TypePromotion/AArch64/trunc-zext-chain.ll | 36 +++++++++++++++++++
 3 files changed, 38 insertions(+), 26 deletions(-)
 delete mode 100644 llvm/test/Transforms/TypePromotion/AArch64/pr58843.ll

diff --git a/llvm/lib/CodeGen/TypePromotion.cpp b/llvm/lib/CodeGen/TypePromotion.cpp
index a63118067139..36e3c1245f1c 100644
--- a/llvm/lib/CodeGen/TypePromotion.cpp
+++ b/llvm/lib/CodeGen/TypePromotion.cpp
@@ -570,7 +570,6 @@ void IRPromoter::Cleanup() {
   LLVM_DEBUG(dbgs() << "IR Promotion: Cleanup..\n");
   // Some zexts will now have become redundant, along with their trunc
   // operands, so remove them.
-  // Some zexts need to be replaced with truncate if src bitwidth is larger.
   for (auto *V : Visited) {
     if (!isa<ZExtInst>(V))
       continue;
@@ -585,11 +584,6 @@ void IRPromoter::Cleanup() {
                         << "\n");
       ReplaceAllUsersOfWith(ZExt, Src);
       continue;
-    } else if (ZExt->getSrcTy()->getScalarSizeInBits() > PromotedWidth) {
-      IRBuilder<> Builder{ZExt};
-      Value *Trunc = Builder.CreateTrunc(Src, ZExt->getDestTy());
-      ReplaceAllUsersOfWith(ZExt, Trunc);
-      continue;
     }
 
     // We've inserted a trunc for a zext sink, but we already know that the
@@ -626,6 +620,8 @@ void IRPromoter::ConvertTruncs() {
     ConstantInt *Mask =
         ConstantInt::get(SrcTy, APInt::getMaxValue(NumBits).getZExtValue());
     Value *Masked = Builder.CreateAnd(Trunc->getOperand(0), Mask);
+    if (SrcTy != ExtTy)
+      Masked = Builder.CreateTrunc(Masked, ExtTy);
 
     if (auto *I = dyn_cast<Instruction>(Masked))
       NewInsts.insert(I);
diff --git a/llvm/test/Transforms/TypePromotion/AArch64/pr58843.ll b/llvm/test/Transforms/TypePromotion/AArch64/pr58843.ll
deleted file mode 100644
index 983a32029c65..000000000000
--- a/llvm/test/Transforms/TypePromotion/AArch64/pr58843.ll
+++ /dev/null
@@ -1,20 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -mtriple=aarch64 -type-promotion -verify -S %s -o - | FileCheck %s
-target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
-
-; Check the case don't crash due to zext source type bitwidth
-; larger than dest type bitwidth.
-define i1 @test(i8 %arg) {
-; CHECK-LABEL: @test(
-; CHECK-NEXT:    [[EXT1:%.*]] = zext i8 [[ARG:%.*]] to i64
-; CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[EXT1]], 7
-; CHECK-NEXT:    [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[TMP2]], 0
-; CHECK-NEXT:    ret i1 [[CMP]]
-;
-  %ext1 = zext i8 %arg to i64
-  %trunc = trunc i64 %ext1 to i3
-  %ext2 = zext i3 %trunc to i8
-  %cmp = icmp ne i8 %ext2, 0
-  ret i1 %cmp
-}
diff --git a/llvm/test/Transforms/TypePromotion/AArch64/trunc-zext-chain.ll b/llvm/test/Transforms/TypePromotion/AArch64/trunc-zext-chain.ll
index 0a846ba115ec..bcf8dfdd7cb0 100644
--- a/llvm/test/Transforms/TypePromotion/AArch64/trunc-zext-chain.ll
+++ b/llvm/test/Transforms/TypePromotion/AArch64/trunc-zext-chain.ll
@@ -177,3 +177,39 @@ latch:                                             ; preds = %bb14, %bb9
 exit:
   ret i64 %var30
 }
+
+; Check the case don't crash due to zext source type bitwidth
+; larger than dest type bitwidth.
+define i1 @pr58843(i8 %arg) {
+; CHECK-LABEL: @pr58843(
+; CHECK-NEXT:    [[EXT1:%.*]] = zext i8 [[ARG:%.*]] to i64
+; CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[EXT1]], 7
+; CHECK-NEXT:    [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %ext1 = zext i8 %arg to i64
+  %trunc = trunc i64 %ext1 to i3
+  %ext2 = zext i3 %trunc to i8
+  %cmp = icmp ne i8 %ext2, 0
+  ret i1 %cmp
+}
+
+; Check the case don't crash due to xor two op have different
+; types
+define i1 @pr59554(i8 %arg) {
+; CHECK-LABEL: @pr59554(
+; CHECK-NEXT:    [[ARG_EXT:%.*]] = zext i8 [[ARG:%.*]] to i64
+; CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[ARG_EXT]], 7
+; CHECK-NEXT:    [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32
+; CHECK-NEXT:    [[SWITCH_TABLEIDX:%.*]] = xor i32 [[TMP2]], 1
+; CHECK-NEXT:    [[SWITCH_LOBIT:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK-NEXT:    ret i1 [[SWITCH_LOBIT]]
+;
+  %arg.ext = zext i8 %arg to i64
+  %trunc = trunc i64 %arg.ext to i3
+  %switch.tableidx = xor i3 %trunc, 1
+  %switch.maskindex = zext i3 %trunc to i8
+  %switch.lobit = icmp ne i8 %switch.maskindex, 0
+  ret i1 %switch.lobit
+}
-- 
Gitee


From 1095870e8ceddc5371f446f4e7c3473f89a461cd Mon Sep 17 00:00:00 2001
From: Dan Gohman <dev@sunfishcode.online>
Date: Mon, 17 Oct 2022 13:36:19 -0700
Subject: [PATCH 36/41] [wasm-ld] Define a `__heap_end` symbol marking the end
 of allocated memory.

Define a `__heap_end` symbol that marks the end of the memory region
that starts at `__heap_base`. This will allow malloc implementations to
know how much memory they can use at `__heap_base` even if someone has
done a `memory.grow` before they can initialize their state.

Differential Revision: https://reviews.llvm.org/D136110
---
 lld/test/wasm/export-all.s             |  7 +++++--
 lld/test/wasm/mutable-global-exports.s |  7 +++++--
 lld/wasm/Driver.cpp                    |  1 +
 lld/wasm/Symbols.cpp                   |  1 +
 lld/wasm/Symbols.h                     | 11 +++++++----
 lld/wasm/Writer.cpp                    | 16 +++++++++++++---
 6 files changed, 32 insertions(+), 11 deletions(-)

diff --git a/lld/test/wasm/export-all.s b/lld/test/wasm/export-all.s
index 009da9f6a381..5af835ce485e 100644
--- a/lld/test/wasm/export-all.s
+++ b/lld/test/wasm/export-all.s
@@ -40,9 +40,12 @@ foo:
 # CHECK-NEXT:       - Name:            __heap_base
 # CHECK-NEXT:         Kind:            GLOBAL
 # CHECK-NEXT:         Index:           4
-# CHECK-NEXT:       - Name:            __memory_base
+# CHECK-NEXT:       - Name:            __heap_end
 # CHECK-NEXT:         Kind:            GLOBAL
 # CHECK-NEXT:         Index:           5
-# CHECK-NEXT:       - Name:            __table_base
+# CHECK-NEXT:       - Name:            __memory_base
 # CHECK-NEXT:         Kind:            GLOBAL
 # CHECK-NEXT:         Index:           6
+# CHECK-NEXT:       - Name:            __table_base
+# CHECK-NEXT:         Kind:            GLOBAL
+# CHECK-NEXT:         Index:           7
diff --git a/lld/test/wasm/mutable-global-exports.s b/lld/test/wasm/mutable-global-exports.s
index e2e45ff93a4b..98009610ac55 100644
--- a/lld/test/wasm/mutable-global-exports.s
+++ b/lld/test/wasm/mutable-global-exports.s
@@ -79,10 +79,13 @@ _start:
 # CHECK-ALL-NEXT:      - Name:            __heap_base
 # CHECK-ALL-NEXT:        Kind:            GLOBAL
 # CHECK-ALL-NEXT:        Index:           5
-# CHECK-ALL-NEXT:      - Name:            __memory_base
+# CHECK-ALL-NEXT:      - Name:            __heap_end
 # CHECK-ALL-NEXT:        Kind:            GLOBAL
 # CHECK-ALL-NEXT:        Index:           6
-# CHECK-ALL-NEXT:      - Name:            __table_base
+# CHECK-ALL-NEXT:      - Name:            __memory_base
 # CHECK-ALL-NEXT:        Kind:            GLOBAL
 # CHECK-ALL-NEXT:        Index:           7
+# CHECK-ALL-NEXT:      - Name:            __table_base
+# CHECK-ALL-NEXT:        Kind:            GLOBAL
+# CHECK-ALL-NEXT:        Index:           8
 # CHECK-ALL-NEXT:  - Type:            CODE
diff --git a/lld/wasm/Driver.cpp b/lld/wasm/Driver.cpp
index 0a0f0c8a05bd..4afbfe24110f 100644
--- a/lld/wasm/Driver.cpp
+++ b/lld/wasm/Driver.cpp
@@ -681,6 +681,7 @@ static void createOptionalSymbols() {
   if (!config->isPic) {
     WasmSym::globalBase = symtab->addOptionalDataSymbol("__global_base");
     WasmSym::heapBase = symtab->addOptionalDataSymbol("__heap_base");
+    WasmSym::heapEnd = symtab->addOptionalDataSymbol("__heap_end");
     WasmSym::definedMemoryBase = symtab->addOptionalDataSymbol("__memory_base");
     WasmSym::definedTableBase = symtab->addOptionalDataSymbol("__table_base");
     if (config->is64.value_or(false))
diff --git a/lld/wasm/Symbols.cpp b/lld/wasm/Symbols.cpp
index e0670cea6425..a79c5bec3b3b 100644
--- a/lld/wasm/Symbols.cpp
+++ b/lld/wasm/Symbols.cpp
@@ -83,6 +83,7 @@ DefinedData *WasmSym::dsoHandle;
 DefinedData *WasmSym::dataEnd;
 DefinedData *WasmSym::globalBase;
 DefinedData *WasmSym::heapBase;
+DefinedData *WasmSym::heapEnd;
 DefinedData *WasmSym::initMemoryFlag;
 GlobalSymbol *WasmSym::stackPointer;
 GlobalSymbol *WasmSym::tlsBase;
diff --git a/lld/wasm/Symbols.h b/lld/wasm/Symbols.h
index c17b720a90fa..32e75a69c5f8 100644
--- a/lld/wasm/Symbols.h
+++ b/lld/wasm/Symbols.h
@@ -538,11 +538,14 @@ struct WasmSym {
   // Symbol marking the end of the data and bss.
   static DefinedData *dataEnd;
 
-  // __heap_base
-  // Symbol marking the end of the data, bss and explicit stack.  Any linear
-  // memory following this address is not used by the linked code and can
-  // therefore be used as a backing store for brk()/malloc() implementations.
+  // __heap_base/__heap_end
+  // Symbols marking the beginning and end of the "heap". It starts at the end
+  // of the data, bss and explicit stack, and extends to the end of the linear
+  // memory allocated by wasm-ld. This region of memory is not used by the
+  // linked code, so it may be used as a backing store for `sbrk` or `malloc`
+  // implementations.
   static DefinedData *heapBase;
+  static DefinedData *heapEnd;
 
   // __wasm_init_memory_flag
   // Symbol whose contents are nonzero iff memory has already been initialized.
diff --git a/lld/wasm/Writer.cpp b/lld/wasm/Writer.cpp
index f98c95526c9e..f6bbaa02b571 100644
--- a/lld/wasm/Writer.cpp
+++ b/lld/wasm/Writer.cpp
@@ -340,10 +340,20 @@ void Writer::layoutMemory() {
             Twine(maxMemorySetting));
     memoryPtr = config->initialMemory;
   }
-  out.memorySec->numMemoryPages =
-      alignTo(memoryPtr, WasmPageSize) / WasmPageSize;
+
+  memoryPtr = alignTo(memoryPtr, WasmPageSize);
+
+  out.memorySec->numMemoryPages = memoryPtr / WasmPageSize;
   log("mem: total pages = " + Twine(out.memorySec->numMemoryPages));
 
+  if (WasmSym::heapEnd) {
+    // Set `__heap_end` to follow the end of the statically allocated linear
+    // memory. The fact that this comes last means that a malloc/brk
+    // implementation can grow the heap at runtime.
+    log("mem: heap end    = " + Twine(memoryPtr));
+    WasmSym::heapEnd->setVA(memoryPtr);
+  }
+
   if (config->maxMemory != 0) {
     if (config->maxMemory != alignTo(config->maxMemory, WasmPageSize))
       error("maximum memory must be " + Twine(WasmPageSize) + "-byte aligned");
@@ -363,7 +373,7 @@ void Writer::layoutMemory() {
       if (config->isPic)
         max = maxMemorySetting;
       else
-        max = alignTo(memoryPtr, WasmPageSize);
+        max = memoryPtr;
     }
     out.memorySec->maxMemoryPages = max / WasmPageSize;
     log("mem: max pages   = " + Twine(out.memorySec->maxMemoryPages));
-- 
Gitee


From 948cadd6d4247f7a5bd8fd6b1386062653d54c84 Mon Sep 17 00:00:00 2001
From: Josh Stone <jistone@redhat.com>
Date: Tue, 3 Jan 2023 16:04:45 -0800
Subject: [PATCH 37/41] [RegAllocFast] Handle new debug values for spills

These new debug values get inserted after the place where the spill
happens, which means they won't be reached by the reverse traversal of
basic block instructions. This would crash or fail assertions if they
contained any virtual registers to be replaced. We can manually handle
the new debug values right away to resolve this.

Fixes https://github.com/llvm/llvm-project/issues/59172

Reviewed By: StephenTozer

Differential Revision: https://reviews.llvm.org/D139590

(cherry picked from commit 87f57f459e7acbb00a6ca4ee6dec6014c5a97e07)
---
 llvm/lib/CodeGen/RegAllocFast.cpp             |   3 +
 .../PowerPC/regalloc-fast-debug-spill.ll      | 250 ++++++++++++++++++
 2 files changed, 253 insertions(+)
 create mode 100644 llvm/test/CodeGen/PowerPC/regalloc-fast-debug-spill.ll

diff --git a/llvm/lib/CodeGen/RegAllocFast.cpp b/llvm/lib/CodeGen/RegAllocFast.cpp
index 9e4e26f1392e..cb552f212fbb 100644
--- a/llvm/lib/CodeGen/RegAllocFast.cpp
+++ b/llvm/lib/CodeGen/RegAllocFast.cpp
@@ -443,6 +443,9 @@ void RegAllocFast::spill(MachineBasicBlock::iterator Before, Register VirtReg,
     SpilledOperandsMap[MO->getParent()].push_back(MO);
   for (auto MISpilledOperands : SpilledOperandsMap) {
     MachineInstr &DBG = *MISpilledOperands.first;
+    // We don't have enough support for tracking operands of DBG_VALUE_LISTs.
+    if (DBG.isDebugValueList())
+      continue;
     MachineInstr *NewDV = buildDbgValueForSpill(
         *MBB, Before, *MISpilledOperands.first, FI, MISpilledOperands.second);
     assert(NewDV->getParent() == MBB && "dangling parent pointer");
diff --git a/llvm/test/CodeGen/PowerPC/regalloc-fast-debug-spill.ll b/llvm/test/CodeGen/PowerPC/regalloc-fast-debug-spill.ll
new file mode 100644
index 000000000000..cae3cb3bbdf8
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/regalloc-fast-debug-spill.ll
@@ -0,0 +1,250 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -O0 < %s | FileCheck %s
+
+; This test would previously crash in RegisterScavenging, or with assertions
+; enabled it would fail when RegAllocFast calls clearVirtRegs. This was due to
+; unhandled virt regs in cloned DBG_VALUE_LIST for spills, which are now skipped.
+; https://github.com/llvm/llvm-project/issues/59172
+
+target datalayout = "e-m:e-i64:64-n32:64-S128-v256:256:256-v512:512:512"
+target triple = "powerpc64le-unknown-linux-gnu"
+
+; Function Attrs: argmemonly nocallback nofree nounwind willreturn
+declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) #0
+
+; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn
+declare void @llvm.dbg.value(metadata, metadata, metadata) #1
+
+define void @read_to_end(i1 %0) personality ptr null {
+; CHECK-LABEL: read_to_end:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    mflr 0
+; CHECK-NEXT:    std 0, 16(1)
+; CHECK-NEXT:    stdu 1, -80(1)
+; CHECK-NEXT:    .cfi_def_cfa_offset 80
+; CHECK-NEXT:    .cfi_offset lr, 16
+; CHECK-NEXT:    andi. 3, 3, 1
+; CHECK-NEXT:    mfocrf 3, 128
+; CHECK-NEXT:    rlwinm 3, 3, 1, 0, 0
+; CHECK-NEXT:    stw 3, 60(1)
+; CHECK-NEXT:    ld 3, 0(0)
+; CHECK-NEXT:    std 3, 64(1) # 8-byte Folded Spill
+; CHECK-NEXT:    li 3, 0
+; CHECK-NEXT:    std 3, 72(1) # 8-byte Folded Spill
+; CHECK-NEXT:    #DEBUG_VALUE: spec_extend<u8, alloc::alloc::Global>:iterator <- [DW_OP_LLVM_arg 0, DW_OP_LLVM_arg 1, DW_OP_constu 1, DW_OP_mul, DW_OP_plus, DW_OP_stack_value, DW_OP_LLVM_fragment 64 64] undef, $x3
+; CHECK-NEXT:    creqv 20, 20, 20
+; CHECK-NEXT:    crxor 20, 1, 20
+; CHECK-NEXT:    bc 12, 20, .LBB0_2
+; CHECK-NEXT:    b .LBB0_1
+; CHECK-NEXT:  .LBB0_1:
+; CHECK-NEXT:    addi 1, 1, 80
+; CHECK-NEXT:    ld 0, 16(1)
+; CHECK-NEXT:    mtlr 0
+; CHECK-NEXT:    blr
+; CHECK-NEXT:  .LBB0_2:
+; CHECK-NEXT:    ld 5, 72(1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld 4, 64(1) # 8-byte Folded Reload
+; CHECK-NEXT:    li 3, 0
+; CHECK-NEXT:    bl memcpy
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    lwz 4, 60(1)
+; CHECK-NEXT:    # implicit-def: $cr5lt
+; CHECK-NEXT:    mfocrf 3, 4
+; CHECK-NEXT:    rlwimi 3, 4, 12, 20, 20
+; CHECK-NEXT:    mtocrf 4, 3
+; CHECK-NEXT:    bc 12, 20, .LBB0_4
+; CHECK-NEXT:    b .LBB0_3
+; CHECK-NEXT:  .LBB0_3:
+; CHECK-NEXT:    b .LBB0_4
+; CHECK-NEXT:  .LBB0_4:
+; CHECK-NEXT:    addi 1, 1, 80
+; CHECK-NEXT:    ld 0, 16(1)
+; CHECK-NEXT:    mtlr 0
+; CHECK-NEXT:    blr
+  %2 = load ptr, ptr null, align 8
+  %3 = sub i64 0, 0
+  call void @llvm.dbg.value(metadata !DIArgList(ptr %2, i64 %3), metadata !129, metadata !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_arg, 1, DW_OP_constu, 1, DW_OP_mul, DW_OP_plus, DW_OP_stack_value, DW_OP_LLVM_fragment, 64, 64)), !dbg !140
+  br i1 %0, label %4, label %5
+
+4:                                                ; preds = %1
+  ret void
+
+5:                                                ; preds = %1
+  tail call void @llvm.memcpy.p0.p0.i64(ptr null, ptr %2, i64 %3, i1 false)
+  br i1 %0, label %7, label %6
+
+6:                                                ; preds = %5
+  br label %7
+
+7:                                                ; preds = %6, %5
+  ret void
+}
+
+attributes #0 = { argmemonly nocallback nofree nounwind willreturn }
+attributes #1 = { nocallback nofree nosync nounwind readnone speculatable willreturn }
+
+!llvm.module.flags = !{!0}
+!llvm.dbg.cu = !{!1}
+
+!0 = !{i32 2, !"Debug Info Version", i32 3}
+!1 = distinct !DICompileUnit(language: DW_LANG_Rust, file: !2, producer: "clang LLVM (rustc version 1.67.0-dev)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !3, globals: !4)
+!2 = !DIFile(filename: "library/std/src/lib.rs/@/std.ff910444-cgu.11", directory: "/home/jistone/rust")
+!3 = !{}
+!4 = !{!5, !12, !18, !23, !28, !34, !43, !49, !52, !58, !64, !68, !77, !81, !86, !90, !98, !102, !106, !111, !117, !124}
+!5 = !DIGlobalVariableExpression(var: !6, expr: !DIExpression())
+!6 = distinct !DIGlobalVariable(name: "<alloc::string::String as core::fmt::Write>::{vtable}", scope: null, file: !7, type: !8, isLocal: true, isDefinition: true)
+!7 = !DIFile(filename: "<unknown>", directory: "")
+!8 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "<alloc::string::String as core::fmt::Write>::{vtable_type}", file: !7, size: 384, align: 64, flags: DIFlagArtificial, elements: !3, vtableHolder: !9, templateParams: !3, identifier: "1ec913b2a90798f33a12cdc627a17d3d")
+!9 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "String", scope: !10, file: !7, size: 192, align: 64, elements: !3, templateParams: !3, identifier: "b616ccc9e18737e903266aae12eea82")
+!10 = !DINamespace(name: "string", scope: !11)
+!11 = !DINamespace(name: "alloc", scope: null)
+!12 = !DIGlobalVariableExpression(var: !13, expr: !DIExpression())
+!13 = distinct !DIGlobalVariable(name: "<core::cell::BorrowMutError as core::fmt::Debug>::{vtable}", scope: null, file: !7, type: !14, isLocal: true, isDefinition: true)
+!14 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "<core::cell::BorrowMutError as core::fmt::Debug>::{vtable_type}", file: !7, size: 256, align: 64, flags: DIFlagArtificial, elements: !3, vtableHolder: !15, templateParams: !3, identifier: "7f09904511177108b2e94c43effbe403")
+!15 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "BorrowMutError", scope: !16, file: !7, align: 8, elements: !3, identifier: "acf9edd4524e0ff9a9398905d3ba31a6")
+!16 = !DINamespace(name: "cell", scope: !17)
+!17 = !DINamespace(name: "core", scope: null)
+!18 = !DIGlobalVariableExpression(var: !19, expr: !DIExpression())
+!19 = distinct !DIGlobalVariable(name: "<core::fmt::Error as core::fmt::Debug>::{vtable}", scope: null, file: !7, type: !20, isLocal: true, isDefinition: true)
+!20 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "<core::fmt::Error as core::fmt::Debug>::{vtable_type}", file: !7, size: 256, align: 64, flags: DIFlagArtificial, elements: !3, vtableHolder: !21, templateParams: !3, identifier: "84cb7e6d80fc4c532d8f45aaa75a7ae3")
+!21 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Error", scope: !22, file: !7, align: 8, elements: !3, identifier: "abcb9fb1fe4fda8598a8687b517935b")
+!22 = !DINamespace(name: "fmt", scope: !17)
+!23 = !DIGlobalVariableExpression(var: !24, expr: !DIExpression())
+!24 = distinct !DIGlobalVariable(name: "<core::array::TryFromSliceError as core::fmt::Debug>::{vtable}", scope: null, file: !7, type: !25, isLocal: true, isDefinition: true)
+!25 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "<core::array::TryFromSliceError as core::fmt::Debug>::{vtable_type}", file: !7, size: 256, align: 64, flags: DIFlagArtificial, elements: !3, vtableHolder: !26, templateParams: !3, identifier: "bafa31943f8233dbf8d2de6a615f899")
+!26 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "TryFromSliceError", scope: !27, file: !7, align: 8, elements: !3, templateParams: !3, identifier: "2dd7cf8d77337f63be7c7f5feb370b37")
+!27 = !DINamespace(name: "array", scope: !17)
+!28 = !DIGlobalVariableExpression(var: !29, expr: !DIExpression())
+!29 = distinct !DIGlobalVariable(name: "<core::num::error::TryFromIntError as core::fmt::Debug>::{vtable}", scope: null, file: !7, type: !30, isLocal: true, isDefinition: true)
+!30 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "<core::num::error::TryFromIntError as core::fmt::Debug>::{vtable_type}", file: !7, size: 256, align: 64, flags: DIFlagArtificial, elements: !3, vtableHolder: !31, templateParams: !3, identifier: "3292395ea0f5a7e3e88f36db52eb440c")
+!31 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "TryFromIntError", scope: !32, file: !7, align: 8, elements: !3, templateParams: !3, identifier: "5b131d57d001578fbf4fb83f2028eb12")
+!32 = !DINamespace(name: "error", scope: !33)
+!33 = !DINamespace(name: "num", scope: !17)
+!34 = !DIGlobalVariableExpression(var: !35, expr: !DIExpression())
+!35 = distinct !DIGlobalVariable(name: "OUTPUT_CAPTURE_USED", linkageName: "_ZN3std2io5stdio19OUTPUT_CAPTURE_USED17h6bb564f9f9e20f1bE", scope: !36, file: !39, line: 38, type: !40, isLocal: true, isDefinition: true, align: 8)
+!36 = !DINamespace(name: "stdio", scope: !37)
+!37 = !DINamespace(name: "io", scope: !38)
+!38 = !DINamespace(name: "std", scope: null)
+!39 = !DIFile(filename: "library/std/src/io/stdio.rs", directory: "/home/jistone/rust", checksumkind: CSK_MD5, checksum: "6e6a519ce8370e29f07d850a34a413c1")
+!40 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "AtomicBool", scope: !41, file: !7, size: 8, align: 8, elements: !3, templateParams: !3, identifier: "c1cddf0305d4e6a98a8ddd4b6fdb5b91")
+!41 = !DINamespace(name: "atomic", scope: !42)
+!42 = !DINamespace(name: "sync", scope: !17)
+!43 = !DIGlobalVariableExpression(var: !44, expr: !DIExpression())
+!44 = distinct !DIGlobalVariable(name: "INSTANCE", linkageName: "_ZN3std2io5stdio5stdin8INSTANCE17h225ddf7c6608f4aaE", scope: !45, file: !39, line: 320, type: !46, isLocal: true, isDefinition: true, align: 64)
+!45 = !DINamespace(name: "stdin", scope: !36)
+!46 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "OnceLock<std::sync::mutex::Mutex<std::io::buffered::bufreader::BufReader<std::io::stdio::StdinRaw>>>", scope: !47, file: !7, size: 448, align: 64, elements: !3, templateParams: !3, identifier: "2236c08b0846b8b2f33c235183822718")
+!47 = !DINamespace(name: "once_lock", scope: !48)
+!48 = !DINamespace(name: "sync", scope: !38)
+!49 = !DIGlobalVariableExpression(var: !50, expr: !DIExpression())
+!50 = distinct !DIGlobalVariable(name: "STDOUT", linkageName: "_ZN3std2io5stdio6STDOUT17hd8472b9eb112f94aE", scope: !36, file: !39, line: 554, type: !51, isLocal: true, isDefinition: true, align: 64)
+!51 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "OnceLock<std::sys_common::remutex::ReentrantMutex<core::cell::RefCell<std::io::buffered::linewriter::LineWriter<std::io::stdio::StdoutRaw>>>>", scope: !47, file: !7, size: 512, align: 64, elements: !3, templateParams: !3, identifier: "31535135376bcf9dc80438cb0beaa95")
+!52 = !DIGlobalVariableExpression(var: !53, expr: !DIExpression())
+!53 = distinct !DIGlobalVariable(name: "INSTANCE", linkageName: "_ZN3std2io5stdio6stderr8INSTANCE17he81b75fda1609dccE", scope: !54, file: !39, line: 844, type: !55, isLocal: true, isDefinition: true, align: 64)
+!54 = !DINamespace(name: "stderr", scope: !36)
+!55 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "ReentrantMutex<core::cell::RefCell<std::io::stdio::StderrRaw>>", scope: !56, file: !7, size: 192, align: 64, elements: !3, templateParams: !3, identifier: "5c48123cbde8afbfd832b70e8bb014b")
+!56 = !DINamespace(name: "remutex", scope: !57)
+!57 = !DINamespace(name: "sys_common", scope: !38)
+!58 = !DIGlobalVariableExpression(var: !59, expr: !DIExpression())
+!59 = distinct !DIGlobalVariable(name: "<std::io::Write::write_fmt::Adapter<std::io::stdio::StdoutLock> as core::fmt::Write>::{vtable}", scope: null, file: !7, type: !60, isLocal: true, isDefinition: true)
+!60 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "<std::io::Write::write_fmt::Adapter<std::io::stdio::StdoutLock> as core::fmt::Write>::{vtable_type}", file: !7, size: 384, align: 64, flags: DIFlagArtificial, elements: !3, vtableHolder: !61, templateParams: !3, identifier: "43681bec3eba7b0defb75fd847230ef3")
+!61 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Adapter<std::io::stdio::StdoutLock>", scope: !62, file: !7, size: 128, align: 64, elements: !3, templateParams: !3, identifier: "eda3cd8f60f00feb7019a3d90d2413dd")
+!62 = !DINamespace(name: "write_fmt", scope: !63)
+!63 = !DINamespace(name: "Write", scope: !37)
+!64 = !DIGlobalVariableExpression(var: !65, expr: !DIExpression())
+!65 = distinct !DIGlobalVariable(name: "<std::io::Write::write_fmt::Adapter<std::io::stdio::StderrLock> as core::fmt::Write>::{vtable}", scope: null, file: !7, type: !66, isLocal: true, isDefinition: true)
+!66 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "<std::io::Write::write_fmt::Adapter<std::io::stdio::StderrLock> as core::fmt::Write>::{vtable_type}", file: !7, size: 384, align: 64, flags: DIFlagArtificial, elements: !3, vtableHolder: !67, templateParams: !3, identifier: "2235adf22355a080446df25ada963d8f")
+!67 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Adapter<std::io::stdio::StderrLock>", scope: !62, file: !7, size: 128, align: 64, elements: !3, templateParams: !3, identifier: "50f418463bae1beffe989359452b170a")
+!68 = !DIGlobalVariableExpression(var: !69, expr: !DIExpression())
+!69 = distinct !DIGlobalVariable(name: "__KEY", linkageName: "_ZN3std2io5stdio14OUTPUT_CAPTURE7__getit5__KEY17h82ea5b0c4e81236dE", scope: !70, file: !72, line: 331, type: !73, isLocal: true, isDefinition: true, align: 64)
+!70 = !DINamespace(name: "__getit", scope: !71)
+!71 = !DINamespace(name: "OUTPUT_CAPTURE", scope: !36)
+!72 = !DIFile(filename: "library/std/src/thread/local.rs", directory: "/home/jistone/rust", checksumkind: CSK_MD5, checksum: "e3766fd5751a888dc2040f63031e944e")
+!73 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Key<core::cell::Cell<core::option::Option<alloc::sync::Arc<std::sync::mutex::Mutex<alloc::vec::Vec<u8, alloc::alloc::Global>>>>>>", scope: !74, file: !7, size: 192, align: 64, elements: !3, templateParams: !3, identifier: "4d1514f685cbd010d7b86d2eef080b6c")
+!74 = !DINamespace(name: "fast", scope: !75)
+!75 = !DINamespace(name: "local", scope: !76)
+!76 = !DINamespace(name: "thread", scope: !38)
+!77 = !DIGlobalVariableExpression(var: !78, expr: !DIExpression())
+!78 = distinct !DIGlobalVariable(name: "<i32 as core::fmt::Debug>::{vtable}", scope: null, file: !7, type: !79, isLocal: true, isDefinition: true)
+!79 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "<i32 as core::fmt::Debug>::{vtable_type}", file: !7, size: 256, align: 64, flags: DIFlagArtificial, elements: !3, vtableHolder: !80, templateParams: !3, identifier: "1a7c0806435616633a284f48de1194c5")
+!80 = !DIBasicType(name: "i32", size: 32, encoding: DW_ATE_signed)
+!81 = !DIGlobalVariableExpression(var: !82, expr: !DIExpression())
+!82 = distinct !DIGlobalVariable(name: "<std::path::PathBuf as core::fmt::Debug>::{vtable}", scope: null, file: !7, type: !83, isLocal: true, isDefinition: true)
+!83 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "<std::path::PathBuf as core::fmt::Debug>::{vtable_type}", file: !7, size: 256, align: 64, flags: DIFlagArtificial, elements: !3, vtableHolder: !84, templateParams: !3, identifier: "8e82e7dafb168f2995a6711175975a8")
+!84 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "PathBuf", scope: !85, file: !7, size: 192, align: 64, elements: !3, templateParams: !3, identifier: "85bd755ad2187534379df2cc01ef53a0")
+!85 = !DINamespace(name: "path", scope: !38)
+!86 = !DIGlobalVariableExpression(var: !87, expr: !DIExpression())
+!87 = distinct !DIGlobalVariable(name: "<bool as core::fmt::Debug>::{vtable}", scope: null, file: !7, type: !88, isLocal: true, isDefinition: true)
+!88 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "<bool as core::fmt::Debug>::{vtable_type}", file: !7, size: 256, align: 64, flags: DIFlagArtificial, elements: !3, vtableHolder: !89, templateParams: !3, identifier: "a8f7c32dd1df279746df60c6d46ce35e")
+!89 = !DIBasicType(name: "bool", size: 8, encoding: DW_ATE_boolean)
+!90 = !DIGlobalVariableExpression(var: !91, expr: !DIExpression())
+!91 = distinct !DIGlobalVariable(name: "STATX_STATE", linkageName: "_ZN3std3sys4unix2fs9try_statx11STATX_STATE17h465ade0d62262837E", scope: !92, file: !96, line: 157, type: !97, isLocal: true, isDefinition: true, align: 8)
+!92 = !DINamespace(name: "try_statx", scope: !93)
+!93 = !DINamespace(name: "fs", scope: !94)
+!94 = !DINamespace(name: "unix", scope: !95)
+!95 = !DINamespace(name: "sys", scope: !38)
+!96 = !DIFile(filename: "library/std/src/sys/unix/fs.rs", directory: "/home/jistone/rust", checksumkind: CSK_MD5, checksum: "594559328c68ee77afe955cd571273ee")
+!97 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "AtomicU8", scope: !41, file: !7, size: 8, align: 8, elements: !3, templateParams: !3, identifier: "dda3b691bea8e1b5292414dd97926af2")
+!98 = !DIGlobalVariableExpression(var: !99, expr: !DIExpression())
+!99 = distinct !DIGlobalVariable(name: "<&bool as core::fmt::Debug>::{vtable}", scope: null, file: !7, type: !100, isLocal: true, isDefinition: true)
+!100 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "<&bool as core::fmt::Debug>::{vtable_type}", file: !7, size: 256, align: 64, flags: DIFlagArtificial, elements: !3, vtableHolder: !101, templateParams: !3, identifier: "5e8d2c48c9cc79c318e2bd28b03e141a")
+!101 = !DIDerivedType(tag: DW_TAG_pointer_type, name: "&bool", baseType: !89, size: 64, align: 64, dwarfAddressSpace: 0)
+!102 = !DIGlobalVariableExpression(var: !103, expr: !DIExpression())
+!103 = distinct !DIGlobalVariable(name: "<&i32 as core::fmt::Debug>::{vtable}", scope: null, file: !7, type: !104, isLocal: true, isDefinition: true)
+!104 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "<&i32 as core::fmt::Debug>::{vtable_type}", file: !7, size: 256, align: 64, flags: DIFlagArtificial, elements: !3, vtableHolder: !105, templateParams: !3, identifier: "d4029746615b6a868ffbc67515d99878")
+!105 = !DIDerivedType(tag: DW_TAG_pointer_type, name: "&i32", baseType: !80, size: 64, align: 64, dwarfAddressSpace: 0)
+!106 = !DIGlobalVariableExpression(var: !107, expr: !DIExpression())
+!107 = distinct !DIGlobalVariable(name: "<&u32 as core::fmt::Debug>::{vtable}", scope: null, file: !7, type: !108, isLocal: true, isDefinition: true)
+!108 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "<&u32 as core::fmt::Debug>::{vtable_type}", file: !7, size: 256, align: 64, flags: DIFlagArtificial, elements: !3, vtableHolder: !109, templateParams: !3, identifier: "178e0e76b9d9178d686381b2d05a7777")
+!109 = !DIDerivedType(tag: DW_TAG_pointer_type, name: "&u32", baseType: !110, size: 64, align: 64, dwarfAddressSpace: 0)
+!110 = !DIBasicType(name: "u32", size: 32, encoding: DW_ATE_unsigned)
+!111 = !DIGlobalVariableExpression(var: !112, expr: !DIExpression())
+!112 = distinct !DIGlobalVariable(name: "<&core::option::Option<std::sys::unix::time::SystemTime> as core::fmt::Debug>::{vtable}", scope: null, file: !7, type: !113, isLocal: true, isDefinition: true)
+!113 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "<&core::option::Option<std::sys::unix::time::SystemTime> as core::fmt::Debug>::{vtable_type}", file: !7, size: 256, align: 64, flags: DIFlagArtificial, elements: !3, vtableHolder: !114, templateParams: !3, identifier: "7ca8386b4d420d719587fa3255329a7a")
+!114 = !DIDerivedType(tag: DW_TAG_pointer_type, name: "&core::option::Option<std::sys::unix::time::SystemTime>", baseType: !115, size: 64, align: 64, dwarfAddressSpace: 0)
+!115 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Option<std::sys::unix::time::SystemTime>", scope: !116, file: !7, size: 128, align: 64, elements: !3, templateParams: !3, identifier: "ad8474e495013fa1e3af4a6b53a05f4b")
+!116 = !DINamespace(name: "option", scope: !17)
+!117 = !DIGlobalVariableExpression(var: !118, expr: !DIExpression())
+!118 = distinct !DIGlobalVariable(name: "HAS_CLONE3", linkageName: "_ZN3std3sys4unix7process13process_inner66_$LT$impl$u20$std..sys..unix..process..process_common..Command$GT$7do_fork10HAS_CLONE317h7d23eb353ae1c9a8E", scope: !119, file: !123, line: 148, type: !40, isLocal: true, isDefinition: true, align: 8)
+!119 = !DINamespace(name: "do_fork", scope: !120)
+!120 = !DINamespace(name: "{impl#0}", scope: !121)
+!121 = !DINamespace(name: "process_inner", scope: !122)
+!122 = !DINamespace(name: "process", scope: !94)
+!123 = !DIFile(filename: "library/std/src/sys/unix/process/process_unix.rs", directory: "/home/jistone/rust", checksumkind: CSK_MD5, checksum: "91761d638041a5dd66c0a64d968debe6")
+!124 = !DIGlobalVariableExpression(var: !125, expr: !DIExpression())
+!125 = distinct !DIGlobalVariable(name: "<core::num::nonzero::NonZeroI32 as core::fmt::Debug>::{vtable}", scope: null, file: !7, type: !126, isLocal: true, isDefinition: true)
+!126 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "<core::num::nonzero::NonZeroI32 as core::fmt::Debug>::{vtable_type}", file: !7, size: 256, align: 64, flags: DIFlagArtificial, elements: !3, vtableHolder: !127, templateParams: !3, identifier: "13903f30d26ee5869ef7a3fc63a2e03d")
+!127 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "NonZeroI32", scope: !128, file: !7, size: 32, align: 32, elements: !3, templateParams: !3, identifier: "e292f11a32f1ce5cf3b26864e4a0f5e5")
+!128 = !DINamespace(name: "nonzero", scope: !33)
+!129 = !DILocalVariable(name: "iterator", arg: 2, scope: !130, file: !131, line: 83, type: !137)
+!130 = distinct !DISubprogram(name: "spec_extend<u8, alloc::alloc::Global>", linkageName: "_ZN132_$LT$alloc..vec..Vec$LT$T$C$A$GT$$u20$as$u20$alloc..vec..spec_extend..SpecExtend$LT$$RF$T$C$core..slice..iter..Iter$LT$T$GT$$GT$$GT$11spec_extend17hb56b69f474ec1e6dE", scope: !132, file: !131, line: 83, type: !135, scopeLine: 83, flags: DIFlagPrototyped, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !1, templateParams: !3, retainedNodes: !3)
+!131 = !DIFile(filename: "library/alloc/src/vec/spec_extend.rs", directory: "/home/jistone/rust", checksumkind: CSK_MD5, checksum: "0614d5dabe9e343254af1b3fa1ec7315")
+!132 = !DINamespace(name: "{impl#4}", scope: !133)
+!133 = !DINamespace(name: "spec_extend", scope: !134)
+!134 = !DINamespace(name: "vec", scope: !11)
+!135 = distinct !DISubroutineType(types: !136)
+!136 = !{null}
+!137 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Iter<u8>", scope: !138, file: !7, size: 128, align: 64, elements: !3, templateParams: !3, identifier: "c31ab6f02ccece1f1a6e93425acabaa1")
+!138 = !DINamespace(name: "iter", scope: !139)
+!139 = !DINamespace(name: "slice", scope: !17)
+!140 = !DILocation(line: 0, scope: !130, inlinedAt: !141)
+!141 = distinct !DILocation(line: 2392, column: 9, scope: !142, inlinedAt: !146)
+!142 = distinct !DISubprogram(name: "extend_from_slice<u8, alloc::alloc::Global>", linkageName: "_ZN5alloc3vec16Vec$LT$T$C$A$GT$17extend_from_slice17hbc8d29f2694fd768E", scope: !144, file: !143, line: 2391, type: !145, scopeLine: 2391, flags: DIFlagPrototyped, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !1, templateParams: !3, retainedNodes: !3)
+!143 = !DIFile(filename: "library/alloc/src/vec/mod.rs", directory: "/home/jistone/rust", checksumkind: CSK_MD5, checksum: "0d69d0c0c11b3e47364cf6be0d07c829")
+!144 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Vec<u8, alloc::alloc::Global>", scope: !134, file: !7, size: 192, align: 64, elements: !3, templateParams: !3, identifier: "f970dea4d30c1daf847db520fef9390d")
+!145 = distinct !DISubroutineType(types: !136)
+!146 = distinct !DILocation(line: 330, column: 9, scope: !147, inlinedAt: !154)
+!147 = distinct !DILexicalBlock(scope: !149, file: !148, line: 329, column: 9)
+!148 = !DIFile(filename: "library/std/src/io/buffered/bufreader.rs", directory: "/home/jistone/rust", checksumkind: CSK_MD5, checksum: "5375e06de487f85ee2f6d21c8a84ce7d")
+!149 = distinct !DISubprogram(name: "read_to_end<std::io::stdio::StdinRaw>", linkageName: "_ZN82_$LT$std..io..buffered..bufreader..BufReader$LT$R$GT$$u20$as$u20$std..io..Read$GT$11read_to_end17h9f09720ee76e6db9E", scope: !150, file: !148, line: 328, type: !153, scopeLine: 328, flags: DIFlagPrototyped, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !1, templateParams: !3, retainedNodes: !3)
+!150 = !DINamespace(name: "{impl#3}", scope: !151)
+!151 = !DINamespace(name: "bufreader", scope: !152)
+!152 = !DINamespace(name: "buffered", scope: !37)
+!153 = distinct !DISubroutineType(types: !3)
+!154 = distinct !DILocation(line: 464, column: 9, scope: !155, inlinedAt: !158)
+!155 = distinct !DISubprogram(name: "read_to_end", linkageName: "_ZN59_$LT$std..io..stdio..StdinLock$u20$as$u20$std..io..Read$GT$11read_to_end17h38999a681cc6c5b5E", scope: !156, file: !39, line: 463, type: !157, scopeLine: 463, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !1, templateParams: !3, retainedNodes: !3)
+!156 = !DINamespace(name: "{impl#7}", scope: !36)
+!157 = distinct !DISubroutineType(types: !3)
+!158 = distinct !DILocation(line: 430, column: 9, scope: !159)
+!159 = distinct !DISubprogram(name: "read_to_end", linkageName: "_ZN55_$LT$std..io..stdio..Stdin$u20$as$u20$std..io..Read$GT$11read_to_end17haba70a09681d41d3E", scope: !160, file: !39, line: 429, type: !161, scopeLine: 429, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !1, templateParams: !3, retainedNodes: !3)
+!160 = !DINamespace(name: "{impl#5}", scope: !36)
+!161 = !DISubroutineType(types: !3)
-- 
Gitee


From 939f5a33711370697f9ad1de4267cfc7399dfe86 Mon Sep 17 00:00:00 2001
From: Sylvestre Ledru <sylvestre@debian.org>
Date: Sun, 8 Jan 2023 00:32:42 +0100
Subject: [PATCH 38/41] libc++: bring back the unsigned in the return type in
 wcstoull_l

got remove here:
https://github.com/llvm/llvm-project/commit/67b0b02ec9f2bbc57bf8f0550828d97f460ac11f#diff-e41832b8aa26da45585a57c5111531f2e1d07e91a67c4f8bf1cd6d566ae45a2bR42

Differential Revision: https://reviews.llvm.org/D141208

(cherry picked from commit fc87452916c0d8759625aad65e9335778ce9cc68)
---
 libcxx/include/__support/musl/xlocale.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libcxx/include/__support/musl/xlocale.h b/libcxx/include/__support/musl/xlocale.h
index f564c87885ac..e674a41fa622 100644
--- a/libcxx/include/__support/musl/xlocale.h
+++ b/libcxx/include/__support/musl/xlocale.h
@@ -39,7 +39,7 @@ wcstoll_l(const wchar_t *__nptr, wchar_t **__endptr, int __base, locale_t) {
   return ::wcstoll(__nptr, __endptr, __base);
 }
 
-inline _LIBCPP_HIDE_FROM_ABI long long
+inline _LIBCPP_HIDE_FROM_ABI unsigned long long
 wcstoull_l(const wchar_t *__nptr, wchar_t **__endptr, int __base, locale_t) {
   return ::wcstoull(__nptr, __endptr, __base);
 }
-- 
Gitee


From 8dfdcc7b7bf66834a761bd8de445840ef68e4d1a Mon Sep 17 00:00:00 2001
From: Nikolas Klauser <nikolasklauser@berlin.de>
Date: Thu, 17 Nov 2022 21:34:29 +0100
Subject: [PATCH 39/41] [libc++] Fix memory leaks when throwing inside
 std::vector constructors

Fixes #58392

Reviewed By: ldionne, #libc

Spies: alexfh, hans, joanahalili, dblaikie, libcxx-commits

Differential Revision: https://reviews.llvm.org/D138601
---
 libcxx/include/vector                         | 119 ++++++---
 .../vector.bool/ctor_exceptions.pass.cpp      | 141 +++++++++++
 .../vector/vector.cons/exceptions.pass.cpp    | 229 ++++++++++++++++++
 3 files changed, 453 insertions(+), 36 deletions(-)
 create mode 100644 libcxx/test/std/containers/sequences/vector.bool/ctor_exceptions.pass.cpp
 create mode 100644 libcxx/test/std/containers/sequences/vector/vector.cons/exceptions.pass.cpp

diff --git a/libcxx/include/vector b/libcxx/include/vector
index 252a0f051ff5..63759407ce94 100644
--- a/libcxx/include/vector
+++ b/libcxx/include/vector
@@ -297,6 +297,7 @@ erase_if(vector<T, Allocator>& c, Predicate pred);    // C++20
 #include <__utility/forward.h>
 #include <__utility/move.h>
 #include <__utility/swap.h>
+#include <__utility/transaction.h>
 #include <climits>
 #include <cstdlib>
 #include <cstring>
@@ -425,18 +426,27 @@ public:
                                     value_type,
                                     typename iterator_traits<_ForwardIterator>::reference>::value>::type* = 0);
 
-    _LIBCPP_CONSTEXPR_AFTER_CXX17 _LIBCPP_INLINE_VISIBILITY
-    ~vector()
-    {
-      __annotate_delete();
-      std::__debug_db_erase_c(this);
+private:
+  class __destroy_vector {
+    public:
+      _LIBCPP_CONSTEXPR __destroy_vector(vector& __vec) : __vec_(__vec) {}
 
-      if (this->__begin_ != nullptr)
-      {
-        __clear();
-        __alloc_traits::deallocate(__alloc(), this->__begin_, capacity());
+      _LIBCPP_CONSTEXPR_AFTER_CXX17 _LIBCPP_HIDE_FROM_ABI void operator()() {
+          __vec_.__annotate_delete();
+          std::__debug_db_erase_c(std::addressof(__vec_));
+
+          if (__vec_.__begin_ != nullptr) {
+            __vec_.__clear();
+            __alloc_traits::deallocate(__vec_.__alloc(), __vec_.__begin_, __vec_.capacity());
+          }
       }
-    }
+
+    private:
+      vector& __vec_;
+  };
+
+public:
+  _LIBCPP_CONSTEXPR_AFTER_CXX17 _LIBCPP_HIDE_FROM_ABI ~vector() { __destroy_vector(*this)(); }
 
     _LIBCPP_CONSTEXPR_AFTER_CXX17 vector(const vector& __x);
     _LIBCPP_CONSTEXPR_AFTER_CXX17 vector(const vector& __x, const __type_identity_t<allocator_type>& __a);
@@ -1075,12 +1085,14 @@ template <class _Tp, class _Allocator>
 _LIBCPP_CONSTEXPR_AFTER_CXX17
 vector<_Tp, _Allocator>::vector(size_type __n)
 {
-    _VSTD::__debug_db_insert_c(this);
+    auto __guard = std::__make_transaction(__destroy_vector(*this));
+    std::__debug_db_insert_c(this);
     if (__n > 0)
     {
         __vallocate(__n);
         __construct_at_end(__n);
     }
+    __guard.__complete();
 }
 
 #if _LIBCPP_STD_VER > 11
@@ -1089,12 +1101,14 @@ _LIBCPP_CONSTEXPR_AFTER_CXX17
 vector<_Tp, _Allocator>::vector(size_type __n, const allocator_type& __a)
     : __end_cap_(nullptr, __a)
 {
-    _VSTD::__debug_db_insert_c(this);
+    auto __guard = std::__make_transaction(__destroy_vector(*this));
+    std::__debug_db_insert_c(this);
     if (__n > 0)
     {
         __vallocate(__n);
         __construct_at_end(__n);
     }
+    __guard.__complete();
 }
 #endif
 
@@ -1102,12 +1116,14 @@ template <class _Tp, class _Allocator>
 _LIBCPP_CONSTEXPR_AFTER_CXX17
 vector<_Tp, _Allocator>::vector(size_type __n, const value_type& __x)
 {
-    _VSTD::__debug_db_insert_c(this);
+    auto __guard = std::__make_transaction(__destroy_vector(*this));
+    std::__debug_db_insert_c(this);
     if (__n > 0)
     {
         __vallocate(__n);
         __construct_at_end(__n, __x);
     }
+    __guard.__complete();
 }
 
 template <class _Tp, class _Allocator>
@@ -1120,9 +1136,11 @@ vector<_Tp, _Allocator>::vector(_InputIterator __first,
                             typename iterator_traits<_InputIterator>::reference>::value,
                           _InputIterator>::type __last)
 {
-    _VSTD::__debug_db_insert_c(this);
+    auto __guard = std::__make_transaction(__destroy_vector(*this));
+    std::__debug_db_insert_c(this);
     for (; __first != __last; ++__first)
         emplace_back(*__first);
+    __guard.__complete();
 }
 
 template <class _Tp, class _Allocator>
@@ -1135,9 +1153,11 @@ vector<_Tp, _Allocator>::vector(_InputIterator __first, _InputIterator __last, c
                             typename iterator_traits<_InputIterator>::reference>::value>::type*)
     : __end_cap_(nullptr, __a)
 {
-    _VSTD::__debug_db_insert_c(this);
+    auto __guard = std::__make_transaction(__destroy_vector(*this));
+    std::__debug_db_insert_c(this);
     for (; __first != __last; ++__first)
         emplace_back(*__first);
+    __guard.__complete();
 }
 
 template <class _Tp, class _Allocator>
@@ -1150,13 +1170,15 @@ vector<_Tp, _Allocator>::vector(_ForwardIterator __first,
                                    typename iterator_traits<_ForwardIterator>::reference>::value,
                                                    _ForwardIterator>::type __last)
 {
-    _VSTD::__debug_db_insert_c(this);
-    size_type __n = static_cast<size_type>(_VSTD::distance(__first, __last));
+    auto __guard = std::__make_transaction(__destroy_vector(*this));
+    std::__debug_db_insert_c(this);
+    size_type __n = static_cast<size_type>(std::distance(__first, __last));
     if (__n > 0)
     {
         __vallocate(__n);
         __construct_at_end(__first, __last, __n);
     }
+    __guard.__complete();
 }
 
 template <class _Tp, class _Allocator>
@@ -1169,13 +1191,15 @@ vector<_Tp, _Allocator>::vector(_ForwardIterator __first, _ForwardIterator __las
                                    typename iterator_traits<_ForwardIterator>::reference>::value>::type*)
     : __end_cap_(nullptr, __a)
 {
-    _VSTD::__debug_db_insert_c(this);
-    size_type __n = static_cast<size_type>(_VSTD::distance(__first, __last));
+    auto __guard = std::__make_transaction(__destroy_vector(*this));
+    std::__debug_db_insert_c(this);
+    size_type __n = static_cast<size_type>(std::distance(__first, __last));
     if (__n > 0)
     {
         __vallocate(__n);
         __construct_at_end(__first, __last, __n);
     }
+    __guard.__complete();
 }
 
 template <class _Tp, class _Allocator>
@@ -1183,13 +1207,15 @@ _LIBCPP_CONSTEXPR_AFTER_CXX17
 vector<_Tp, _Allocator>::vector(const vector& __x)
     : __end_cap_(nullptr, __alloc_traits::select_on_container_copy_construction(__x.__alloc()))
 {
-    _VSTD::__debug_db_insert_c(this);
+    auto __guard = std::__make_transaction(__destroy_vector(*this));
+    std::__debug_db_insert_c(this);
     size_type __n = __x.size();
     if (__n > 0)
     {
         __vallocate(__n);
         __construct_at_end(__x.__begin_, __x.__end_, __n);
     }
+    __guard.__complete();
 }
 
 template <class _Tp, class _Allocator>
@@ -1197,13 +1223,15 @@ _LIBCPP_CONSTEXPR_AFTER_CXX17
 vector<_Tp, _Allocator>::vector(const vector& __x, const __type_identity_t<allocator_type>& __a)
     : __end_cap_(nullptr, __a)
 {
-    _VSTD::__debug_db_insert_c(this);
+    auto __guard = std::__make_transaction(__destroy_vector(*this));
+    std::__debug_db_insert_c(this);
     size_type __n = __x.size();
     if (__n > 0)
     {
         __vallocate(__n);
         __construct_at_end(__x.__begin_, __x.__end_, __n);
     }
+    __guard.__complete();
 }
 
 template <class _Tp, class _Allocator>
@@ -1243,7 +1271,9 @@ vector<_Tp, _Allocator>::vector(vector&& __x, const __type_identity_t<allocator_
     else
     {
         typedef move_iterator<iterator> _Ip;
+        auto __guard = std::__make_transaction(__destroy_vector(*this));
         assign(_Ip(__x.begin()), _Ip(__x.end()));
+        __guard.__complete();
     }
 }
 
@@ -1254,12 +1284,14 @@ _LIBCPP_CONSTEXPR_AFTER_CXX17
 inline _LIBCPP_INLINE_VISIBILITY
 vector<_Tp, _Allocator>::vector(initializer_list<value_type> __il)
 {
-    _VSTD::__debug_db_insert_c(this);
+    auto __guard = std::__make_transaction(__destroy_vector(*this));
+    std::__debug_db_insert_c(this);
     if (__il.size() > 0)
     {
         __vallocate(__il.size());
         __construct_at_end(__il.begin(), __il.end(), __il.size());
     }
+    __guard.__complete();
 }
 
 template <class _Tp, class _Allocator>
@@ -1268,12 +1300,14 @@ inline _LIBCPP_INLINE_VISIBILITY
 vector<_Tp, _Allocator>::vector(initializer_list<value_type> __il, const allocator_type& __a)
     : __end_cap_(nullptr, __a)
 {
-    _VSTD::__debug_db_insert_c(this);
+    auto __guard = std::__make_transaction(__destroy_vector(*this));
+    std::__debug_db_insert_c(this);
     if (__il.size() > 0)
     {
         __vallocate(__il.size());
         __construct_at_end(__il.begin(), __il.end(), __il.size());
     }
+    __guard.__complete();
 }
 
 #endif // _LIBCPP_CXX03_LANG
@@ -2111,8 +2145,26 @@ public:
 #else
         _NOEXCEPT;
 #endif
-    _LIBCPP_CONSTEXPR_AFTER_CXX17 ~vector();
-    _LIBCPP_CONSTEXPR_AFTER_CXX17 explicit vector(size_type __n);
+
+private:
+  class __destroy_vector {
+    public:
+      _LIBCPP_CONSTEXPR __destroy_vector(vector& __vec) : __vec_(__vec) {}
+
+      _LIBCPP_CONSTEXPR_AFTER_CXX17 _LIBCPP_HIDE_FROM_ABI void operator()() {
+        if (__vec_.__begin_ != nullptr)
+            __storage_traits::deallocate(__vec_.__alloc(), __vec_.__begin_, __vec_.__cap());
+        std::__debug_db_invalidate_all(this);
+      }
+
+    private:
+      vector& __vec_;
+  };
+
+public:
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX17 ~vector() { __destroy_vector(*this)(); }
+
+    _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX17 explicit vector(size_type __n);
 #if _LIBCPP_STD_VER > 11
     _LIBCPP_CONSTEXPR_AFTER_CXX17 explicit vector(size_type __n, const allocator_type& __a);
 #endif
@@ -2647,12 +2699,14 @@ vector<bool, _Allocator>::vector(_ForwardIterator __first, _ForwardIterator __la
       __size_(0),
       __cap_alloc_(0, __default_init_tag())
 {
-    size_type __n = static_cast<size_type>(_VSTD::distance(__first, __last));
+    auto __guard = std::__make_transaction(__destroy_vector(*this));
+    size_type __n = static_cast<size_type>(std::distance(__first, __last));
     if (__n > 0)
     {
         __vallocate(__n);
         __construct_at_end(__first, __last);
     }
+    __guard.__complete();
 }
 
 template <class _Allocator>
@@ -2664,12 +2718,14 @@ vector<bool, _Allocator>::vector(_ForwardIterator __first, _ForwardIterator __la
       __size_(0),
       __cap_alloc_(0, static_cast<__storage_allocator>(__a))
 {
-    size_type __n = static_cast<size_type>(_VSTD::distance(__first, __last));
+    auto __guard = std::__make_transaction(__destroy_vector(*this));
+    size_type __n = static_cast<size_type>(std::distance(__first, __last));
     if (__n > 0)
     {
         __vallocate(__n);
         __construct_at_end(__first, __last);
     }
+    __guard.__complete();
 }
 
 #ifndef _LIBCPP_CXX03_LANG
@@ -2706,15 +2762,6 @@ vector<bool, _Allocator>::vector(initializer_list<value_type> __il, const alloca
 
 #endif // _LIBCPP_CXX03_LANG
 
-template <class _Allocator>
-_LIBCPP_CONSTEXPR_AFTER_CXX17
-vector<bool, _Allocator>::~vector()
-{
-    if (__begin_ != nullptr)
-        __storage_traits::deallocate(__alloc(), __begin_, __cap());
-    std::__debug_db_invalidate_all(this);
-}
-
 template <class _Allocator>
 _LIBCPP_CONSTEXPR_AFTER_CXX17
 vector<bool, _Allocator>::vector(const vector& __v)
diff --git a/libcxx/test/std/containers/sequences/vector.bool/ctor_exceptions.pass.cpp b/libcxx/test/std/containers/sequences/vector.bool/ctor_exceptions.pass.cpp
new file mode 100644
index 000000000000..592d733de42d
--- /dev/null
+++ b/libcxx/test/std/containers/sequences/vector.bool/ctor_exceptions.pass.cpp
@@ -0,0 +1,141 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: no-exceptions
+
+// (bug report: https://llvm.org/PR58392)
+// Check that vector<bool> constructors don't leak memory when an operation inside the constructor throws an exception
+
+#include <type_traits>
+#include <vector>
+
+#include "count_new.h"
+#include "test_iterators.h"
+
+template <class T>
+struct Allocator {
+  using value_type      = T;
+  using is_always_equal = std::false_type;
+
+  template <class U>
+  Allocator(const Allocator<U>&) {}
+
+  Allocator(bool should_throw = true) {
+    if (should_throw)
+      throw 0;
+  }
+
+  T* allocate(int n) { return std::allocator<T>().allocate(n); }
+  void deallocate(T* ptr, int n) { std::allocator<T>().deallocate(ptr, n); }
+
+  friend bool operator==(const Allocator&, const Allocator&) { return false; }
+};
+
+template <class IterCat>
+struct Iterator {
+  using iterator_category = IterCat;
+  using difference_type   = std::ptrdiff_t;
+  using value_type        = bool;
+  using reference         = bool&;
+  using pointer           = bool*;
+
+  int i_;
+  bool b_ = true;
+  Iterator(int i = 0) : i_(i) {}
+  bool& operator*() {
+    if (i_ == 1)
+      throw 1;
+    return b_;
+  }
+
+  friend bool operator==(const Iterator& lhs, const Iterator& rhs) { return lhs.i_ == rhs.i_; }
+
+  friend bool operator!=(const Iterator& lhs, const Iterator& rhs) { return lhs.i_ != rhs.i_; }
+
+  Iterator& operator++() {
+    ++i_;
+    return *this;
+  }
+
+  Iterator operator++(int) {
+    auto tmp = *this;
+    ++i_;
+    return tmp;
+  }
+};
+
+void check_new_delete_called() {
+  assert(globalMemCounter.new_called == globalMemCounter.delete_called);
+  assert(globalMemCounter.new_array_called == globalMemCounter.delete_array_called);
+  assert(globalMemCounter.aligned_new_called == globalMemCounter.aligned_delete_called);
+  assert(globalMemCounter.aligned_new_array_called == globalMemCounter.aligned_delete_array_called);
+}
+
+int main(int, char**) {
+  using AllocVec = std::vector<bool, Allocator<bool> >;
+
+#if TEST_STD_VER >= 14
+  try { // Throw in vector(size_type, const allocator_type&) from allocator
+    Allocator<bool> alloc(false);
+    AllocVec get_alloc(0, alloc);
+  } catch (int) {
+  }
+  check_new_delete_called();
+#endif  // TEST_STD_VER >= 14
+
+  try { // Throw in vector(InputIterator, InputIterator) from input iterator
+    std::vector<bool> vec((Iterator<std::input_iterator_tag>()), Iterator<std::input_iterator_tag>(2));
+  } catch (int) {
+  }
+  check_new_delete_called();
+
+  try { // Throw in vector(InputIterator, InputIterator) from forward iterator
+    std::vector<bool> vec((Iterator<std::forward_iterator_tag>()), Iterator<std::forward_iterator_tag>(2));
+  } catch (int) {
+  }
+  check_new_delete_called();
+
+  try { // Throw in vector(InputIterator, InputIterator) from allocator
+    int a[] = {1, 2};
+    AllocVec vec(cpp17_input_iterator<int*>(a), cpp17_input_iterator<int*>(a + 2));
+  } catch (int) {
+  }
+  check_new_delete_called();
+
+  try { // Throw in vector(InputIterator, InputIterator, const allocator_type&) from input iterator
+    std::allocator<bool> alloc;
+    std::vector<bool> vec(Iterator<std::input_iterator_tag>(), Iterator<std::input_iterator_tag>(2), alloc);
+  } catch (int) {
+  }
+  check_new_delete_called();
+
+  try { // Throw in vector(InputIterator, InputIterator, const allocator_type&) from forward iterator
+    std::allocator<bool> alloc;
+    std::vector<bool> vec(Iterator<std::forward_iterator_tag>(), Iterator<std::forward_iterator_tag>(2), alloc);
+  } catch (int) {
+  }
+  check_new_delete_called();
+
+  try { // Throw in vector(InputIterator, InputIterator, const allocator_type&) from allocator
+    bool a[] = {true, false};
+    Allocator<bool> alloc(false);
+    AllocVec vec(cpp17_input_iterator<bool*>(a), cpp17_input_iterator<bool*>(a + 2), alloc);
+  } catch (int) {
+  }
+  check_new_delete_called();
+
+  try { // Throw in vector(InputIterator, InputIterator, const allocator_type&) from allocator
+    bool a[] = {true, false};
+    Allocator<bool> alloc(false);
+    AllocVec vec(forward_iterator<bool*>(a), forward_iterator<bool*>(a + 2), alloc);
+  } catch (int) {
+  }
+  check_new_delete_called();
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/sequences/vector/vector.cons/exceptions.pass.cpp b/libcxx/test/std/containers/sequences/vector/vector.cons/exceptions.pass.cpp
new file mode 100644
index 000000000000..26ad7b4fd05a
--- /dev/null
+++ b/libcxx/test/std/containers/sequences/vector/vector.cons/exceptions.pass.cpp
@@ -0,0 +1,229 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: no-exceptions
+
+// (bug report: https://llvm.org/PR58392)
+// Check that vector constructors don't leak memory when an operation inside the constructor throws an exception
+
+#include <type_traits>
+#include <vector>
+
+#include "count_new.h"
+#include "test_iterators.h"
+
+template <class T>
+struct Allocator {
+  using value_type      = T;
+  using is_always_equal = std::false_type;
+
+  Allocator(bool should_throw = true) {
+    if (should_throw)
+      throw 0;
+  }
+
+  T* allocate(int n) { return std::allocator<T>().allocate(n); }
+  void deallocate(T* ptr, int n) { std::allocator<T>().deallocate(ptr, n); }
+
+  friend bool operator==(const Allocator&, const Allocator&) { return false; }
+};
+
+struct ThrowingT {
+  int* throw_after_n_ = nullptr;
+  ThrowingT() { throw 0; }
+
+  ThrowingT(int& throw_after_n) : throw_after_n_(&throw_after_n) {
+    if (throw_after_n == 0)
+      throw 0;
+    --throw_after_n;
+  }
+
+  ThrowingT(const ThrowingT&) {
+    if (throw_after_n_ == nullptr || *throw_after_n_ == 0)
+      throw 1;
+    --*throw_after_n_;
+  }
+
+  ThrowingT& operator=(const ThrowingT&) {
+    if (throw_after_n_ == nullptr || *throw_after_n_ == 0)
+      throw 1;
+    --*throw_after_n_;
+    return *this;
+  }
+};
+
+template <class IterCat>
+struct Iterator {
+  using iterator_category = IterCat;
+  using difference_type   = std::ptrdiff_t;
+  using value_type        = int;
+  using reference         = int&;
+  using pointer           = int*;
+
+  int i_;
+  Iterator(int i = 0) : i_(i) {}
+  int& operator*() {
+    if (i_ == 1)
+      throw 1;
+    return i_;
+  }
+
+  friend bool operator==(const Iterator& lhs, const Iterator& rhs) { return lhs.i_ == rhs.i_; }
+
+  friend bool operator!=(const Iterator& lhs, const Iterator& rhs) { return lhs.i_ != rhs.i_; }
+
+  Iterator& operator++() {
+    ++i_;
+    return *this;
+  }
+
+  Iterator operator++(int) {
+    auto tmp = *this;
+    ++i_;
+    return tmp;
+  }
+};
+
+void check_new_delete_called() {
+  assert(globalMemCounter.new_called == globalMemCounter.delete_called);
+  assert(globalMemCounter.new_array_called == globalMemCounter.delete_array_called);
+  assert(globalMemCounter.aligned_new_called == globalMemCounter.aligned_delete_called);
+  assert(globalMemCounter.aligned_new_array_called == globalMemCounter.aligned_delete_array_called);
+}
+
+int main(int, char**) {
+  using AllocVec = std::vector<int, Allocator<int> >;
+  try { // vector()
+    AllocVec vec;
+  } catch (int) {
+  }
+  check_new_delete_called();
+
+  try { // Throw in vector(size_type) from type
+    std::vector<ThrowingT> get_alloc(1);
+  } catch (int) {
+  }
+  check_new_delete_called();
+
+#if TEST_STD_VER >= 14
+  try { // Throw in vector(size_type, value_type) from type
+    int throw_after = 1;
+    ThrowingT v(throw_after);
+    std::vector<ThrowingT> get_alloc(1, v);
+  } catch (int) {
+  }
+  check_new_delete_called();
+
+  try { // Throw in vector(size_type, const allocator_type&) from allocator
+    Allocator<int> alloc(false);
+    AllocVec get_alloc(0, alloc);
+  } catch (int) {
+  }
+  check_new_delete_called();
+
+  try { // Throw in vector(size_type, const allocator_type&) from the type
+    std::vector<ThrowingT> vec(1, std::allocator<ThrowingT>());
+  } catch (int) {
+  }
+  check_new_delete_called();
+#endif  // TEST_STD_VER >= 14
+
+  try { // Throw in vector(InputIterator, InputIterator) from input iterator
+    std::vector<int> vec((Iterator<std::input_iterator_tag>()), Iterator<std::input_iterator_tag>(2));
+  } catch (int) {
+  }
+  check_new_delete_called();
+
+  try { // Throw in vector(InputIterator, InputIterator) from forward iterator
+    std::vector<int> vec((Iterator<std::forward_iterator_tag>()), Iterator<std::forward_iterator_tag>(2));
+  } catch (int) {
+  }
+  check_new_delete_called();
+
+  try { // Throw in vector(InputIterator, InputIterator) from allocator
+    int a[] = {1, 2};
+    AllocVec vec(cpp17_input_iterator<int*>(a), cpp17_input_iterator<int*>(a + 2));
+  } catch (int) {
+  }
+  check_new_delete_called();
+
+  try { // Throw in vector(InputIterator, InputIterator, const allocator_type&) from input iterator
+    std::allocator<int> alloc;
+    std::vector<int> vec(Iterator<std::input_iterator_tag>(), Iterator<std::input_iterator_tag>(2), alloc);
+  } catch (int) {
+  }
+  check_new_delete_called();
+
+  try { // Throw in vector(InputIterator, InputIterator, const allocator_type&) from forward iterator
+    std::allocator<int> alloc;
+    std::vector<int> vec(Iterator<std::forward_iterator_tag>(), Iterator<std::forward_iterator_tag>(2), alloc);
+  } catch (int) {
+  }
+  check_new_delete_called();
+
+  try { // Throw in vector(InputIterator, InputIterator, const allocator_type&) from allocator
+    int a[] = {1, 2};
+    Allocator<int> alloc(false);
+    AllocVec vec(cpp17_input_iterator<int*>(a), cpp17_input_iterator<int*>(a + 2), alloc);
+  } catch (int) {
+  }
+  check_new_delete_called();
+
+  try { // Throw in vector(InputIterator, InputIterator, const allocator_type&) from allocator
+    int a[] = {1, 2};
+    Allocator<int> alloc(false);
+    AllocVec vec(forward_iterator<int*>(a), forward_iterator<int*>(a + 2), alloc);
+  } catch (int) {
+  }
+  check_new_delete_called();
+
+  try { // Throw in vector(const vector&) from type
+    std::vector<ThrowingT> vec;
+    int throw_after = 0;
+    vec.emplace_back(throw_after);
+    auto vec2 = vec;
+  } catch (int) {
+  }
+  check_new_delete_called();
+
+  try { // Throw in vector(const vector&, const allocator_type&) from type
+    std::vector<ThrowingT> vec;
+    int throw_after = 1;
+    vec.emplace_back(throw_after);
+    std::vector<ThrowingT> vec2(vec, std::allocator<int>());
+  } catch (int) {
+  }
+  check_new_delete_called();
+
+  try { // Throw in vector(vector&&, const allocator_type&) from type
+    std::vector<ThrowingT, Allocator<ThrowingT> > vec(Allocator<ThrowingT>(false));
+    int throw_after = 1;
+    vec.emplace_back(throw_after);
+    std::vector<ThrowingT, Allocator<ThrowingT> > vec2(std::move(vec), Allocator<ThrowingT>(false));
+  } catch (int) {
+  }
+  check_new_delete_called();
+
+#if TEST_STD_VER >= 11
+  try { // Throw in vector(initializer_list<value_type>) from type
+    int throw_after = 1;
+    std::vector<ThrowingT> vec({ThrowingT(throw_after)});
+  } catch (int) {
+  }
+  check_new_delete_called();
+
+  try { // Throw in vector(initializer_list<value_type>, const allocator_type&) constructor from type
+    int throw_after = 1;
+    std::vector<ThrowingT> vec({ThrowingT(throw_after)}, std::allocator<ThrowingT>());
+  } catch (int) {
+  }
+  check_new_delete_called();
+#endif // TEST_STD_VER >= 11
+
+  return 0;
+}
-- 
Gitee


From cbaba90c719d4363aead90c2552cb5fdbe39c7c1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E8=BE=9B=E8=BF=8E=E6=9E=97?= <xinyinglin@huawei.com>
Date: Fri, 4 Nov 2022 10:18:37 +0000
Subject: [PATCH 40/41] add build.sh.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: 辛迎林 <xinyinglin@huawei.com>
---
 README_OE.md                                  |  21 +++
 build.sh                                      | 164 ++++++++++++++++++
 .../lsan/TestCases/Linux/log-path_test.cpp    |   1 +
 compiler-rt/test/lsan/TestCases/malloc_zero.c |   1 +
 .../test/lsan/TestCases/realloc_too_big.c     |   1 +
 .../archer/tests/races/critical-unrelated.c   |   2 +
 .../tests/races/lock-nested-unrelated.c       |   1 +
 .../tools/archer/tests/races/lock-unrelated.c |   2 +
 .../archer/tests/races/parallel-simple.c      |   2 +
 .../archer/tests/races/task-dependency.c      |   2 +
 .../tests/races/task-taskgroup-unrelated.c    |   2 +
 .../archer/tests/races/task-taskwait-nested.c |   2 +
 openmp/tools/archer/tests/races/task-two.c    |   2 +
 .../archer/tests/task/task_late_fulfill.c     |   1 +
 14 files changed, 204 insertions(+)
 create mode 100644 README_OE.md
 create mode 100755 build.sh

diff --git a/README_OE.md b/README_OE.md
new file mode 100644
index 000000000000..af956a923c04
--- /dev/null
+++ b/README_OE.md
@@ -0,0 +1,21 @@
+  我们提供了build.sh脚本来构建llvm-project。
+  我们已经验证了它可以在openEuler系统成功构建。
+
+  在构建编译器之前，确保已经安装了以下工具：
+  gcc \
+  g++ \
+  libgcc \
+  openssl-devel \
+  cmake \
+  python3 \
+  python3-setuptools \
+  python-wheel \
+  texinfo \
+  binutils \
+  libstdc++-static \
+  libatomic
+
+ 具体的编译选项可以在build.sh中查看，也可以通过“./build.sh -h”命令查看。
+ 例如：
+   ./build.sh -r -b release -X X86
+
diff --git a/build.sh b/build.sh
new file mode 100755
index 000000000000..4fed7989a657
--- /dev/null
+++ b/build.sh
@@ -0,0 +1,164 @@
+#!/bin/bash
+
+# Tools to use for bootstrapping.
+C_COMPILER_PATH=gcc
+CXX_COMPILER_PATH=g++
+
+# Initialize our own variables:
+buildtype=RelWithDebInfo
+backends="ARM;AArch64;X86"
+split_dwarf=on
+use_ccache="0"
+do_install="0"
+clean=0
+unit_test=""
+verbose=""
+dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )"
+build_dir_name="build"
+install_dir_name="install"
+build_prefix="$dir/$build_dir_name"
+install_prefix="$dir/$install_dir_name"
+
+# Use 8 threads for builds and tests by default. Use more threads if possible,
+# but avoid overloading the system by using up to 50% of available cores.
+threads=8
+nproc=$(type -p nproc)
+if [ -x "$nproc" -a -f /proc/loadavg ]; then
+  loadavg=$(awk '{printf "%.0f\n", $1}' < /proc/loadavg)
+  let threads="($($nproc) - $loadavg) / 2"
+  if [ $threads -le 0 ]; then
+    threads=1
+  fi
+fi
+
+# Exit script on first error.
+set -e
+
+usage() {
+  cat <<EOF
+Usage: $0 [options]
+
+Build the compiler under $(get_relative_path $build_prefix), then install under $(get_relative_path $install_prefix) .
+
+Options:
+  -b type  Specify CMake build type (default: $buildtype).
+  -c       Use ccache (default: $use_ccache).
+  -h       Display this help message.
+  -i       Install the build (default: $do_install).
+  -I name  Specify install directory name (default: "$install_dir_name").
+  -j N     Allow N jobs at once (default: $threads).
+  -r       Delete $(get_relative_path $install_prefix) and perform a clean build (default: incremental).
+  -t       Enable unit tests for components that support them (make check-all).
+  -v       Enable verbose build output (default: quiet).
+  -X archs Build only the specified semi-colon-delimited list of backends (default: "$backends").
+EOF
+}
+
+# Process command-line options. Remember the options for passing to the
+# containerized build script.
+while getopts :b:chiI:j:rtvX: optchr; do
+  case "$optchr" in
+    b)
+      buildtype="$OPTARG"
+      case "${buildtype,,}" in
+        release)
+          split_dwarf=off
+          ;;
+        debug|relwithdebinfo)
+          ;;
+        *)
+          echo "$0: invalid build type '$buildtype'"
+          exit 1
+          ;;
+      esac
+      ;;
+    c)
+      use_ccache="1"
+      ;;
+    h)
+      usage
+      exit
+      ;;
+    i)
+      do_install="1"
+      ;;
+    I)
+      install_dir_name="$OPTARG"
+      install_prefix="$dir/$install_dir_name"
+      ;;
+    j)
+      threads="$OPTARG"
+      ;;
+    r)
+      clean=1
+      ;;
+    t)
+      unit_test=check-all
+      ;;
+    v)
+      verbose="VERBOSE=1"
+      ;;
+    X)
+      backends="$OPTARG"
+      ;;
+    :)
+      echo "$0: missing argument for option '-$OPTARG'"
+      exit 1
+      ;;
+    ?)
+      echo "$0: invalid option '-$OPTARG'"
+      exit 1
+      ;;
+  esac
+done
+
+CMAKE_OPTIONS="-DCMAKE_INSTALL_PREFIX=$install_prefix \
+               -DCMAKE_BUILD_TYPE=$buildtype \
+               -DCMAKE_C_COMPILER=$C_COMPILER_PATH \
+               -DCMAKE_CXX_COMPILER=$CXX_COMPILER_PATH \
+               -DLLVM_TARGETS_TO_BUILD=$backends "
+
+# Warning: the -DLLVM_ENABLE_PROJECTS option is specified with cmake
+# to avoid issues with nested quotation marks
+
+if [ $use_ccache == "1" ]; then
+  echo "Build using ccache"
+  CMAKE_OPTIONS="$CMAKE_OPTIONS \
+                -DCMAKE_C_COMPILER_LAUNCHER=ccache \
+                -DCMAKE_CXX_COMPILER_LAUNCHER=ccache "
+fi
+
+# Build and install
+if [ $clean -eq 1 -a -e "$install_prefix" ]; then
+  rm -rf "$install_prefix"
+fi
+mkdir -p "$install_prefix/bin"
+
+if [ $clean -eq 1 -a -e "$build_prefix" ]; then
+  rm -rf "$build_prefix"
+fi
+
+mkdir -p "$build_prefix" && cd "$build_prefix"
+cmake $CMAKE_OPTIONS \
+      -DCOMPILER_RT_BUILD_SANITIZERS=on \
+      -DLLVM_ENABLE_PROJECTS="clang;compiler-rt;libunwind;lld;openmp;clang-tools-extra" \
+      -DLLVM_USE_LINKER=gold \
+      -DLLVM_LIT_ARGS="-sv -j$threads" \
+      -DLLVM_USE_SPLIT_DWARF=$split_dwarf \
+      -DCMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO="-Wl,--gdb-index -Wl,--compress-debug-sections=zlib"\
+      -DCMAKE_EXE_LINKER_FLAGS_DEBUG="-Wl,--gdb-index -Wl,--compress-debug-sections=zlib" \
+      $verbose \
+      ../llvm
+
+make -j$threads
+if [ $do_install == "1" ]; then
+  make -j$threads $verbose install
+fi
+
+if [ -n "$unit_test" ]; then
+  make -j$threads $verbose check-all
+fi
+
+cd ..
+
+echo "$0: SUCCESS"
diff --git a/compiler-rt/test/lsan/TestCases/Linux/log-path_test.cpp b/compiler-rt/test/lsan/TestCases/Linux/log-path_test.cpp
index e2f59ea693c4..ce782ecd34f4 100644
--- a/compiler-rt/test/lsan/TestCases/Linux/log-path_test.cpp
+++ b/compiler-rt/test/lsan/TestCases/Linux/log-path_test.cpp
@@ -13,6 +13,7 @@
 // RUN: cat %device_rundir/%t.log.* >> %t.log
 // RUN: %adb_shell 'cat %device_rundir/%t.log.*' >> %t.log
 // RUN: FileCheck %s --check-prefix=CHECK-ERROR < %t.log.*
+// REQUIRES: x86_64 
 
 #include <stdio.h>
 #include <stdlib.h>
diff --git a/compiler-rt/test/lsan/TestCases/malloc_zero.c b/compiler-rt/test/lsan/TestCases/malloc_zero.c
index 5c8d1850a0f9..d6d2db6a62c0 100644
--- a/compiler-rt/test/lsan/TestCases/malloc_zero.c
+++ b/compiler-rt/test/lsan/TestCases/malloc_zero.c
@@ -3,6 +3,7 @@
 
 /// Fails when only leak sanitizer is enabled
 // UNSUPPORTED: arm-linux, armhf-linux
+// REQUIRES: x86_64
 
 #include <stdio.h>
 #include <stdlib.h>
diff --git a/compiler-rt/test/lsan/TestCases/realloc_too_big.c b/compiler-rt/test/lsan/TestCases/realloc_too_big.c
index bb1316024b5c..ad0c0a78e154 100644
--- a/compiler-rt/test/lsan/TestCases/realloc_too_big.c
+++ b/compiler-rt/test/lsan/TestCases/realloc_too_big.c
@@ -3,6 +3,7 @@
 
 /// Fails when only leak sanitizer is enabled
 // UNSUPPORTED: arm-linux, armhf-linux
+// REQUIRES: x86_64
 
 #include <assert.h>
 #include <stdio.h>
diff --git a/openmp/tools/archer/tests/races/critical-unrelated.c b/openmp/tools/archer/tests/races/critical-unrelated.c
index bff8b9763c14..1a4d39a72362 100644
--- a/openmp/tools/archer/tests/races/critical-unrelated.c
+++ b/openmp/tools/archer/tests/races/critical-unrelated.c
@@ -13,6 +13,8 @@
 // RUN: %libarcher-compile-and-run-race | FileCheck %s
 // RUN: %libarcher-compile-and-run-race-noserial | FileCheck %s
 // REQUIRES: tsan
+// REQUIRES: x86_64
+
 #include <omp.h>
 #include <stdio.h>
 
diff --git a/openmp/tools/archer/tests/races/lock-nested-unrelated.c b/openmp/tools/archer/tests/races/lock-nested-unrelated.c
index e24b4cdedc71..13b6e3417458 100644
--- a/openmp/tools/archer/tests/races/lock-nested-unrelated.c
+++ b/openmp/tools/archer/tests/races/lock-nested-unrelated.c
@@ -13,6 +13,7 @@
 // RUN: %libarcher-compile-and-run-race | FileCheck %s
 // RUN: %libarcher-compile-and-run-race-noserial | FileCheck %s
 // REQUIRES: tsan
+// REQUIRES: x86_64
 #include <omp.h>
 #include <stdio.h>
 
diff --git a/openmp/tools/archer/tests/races/lock-unrelated.c b/openmp/tools/archer/tests/races/lock-unrelated.c
index 4245490a703f..891549ff91ff 100644
--- a/openmp/tools/archer/tests/races/lock-unrelated.c
+++ b/openmp/tools/archer/tests/races/lock-unrelated.c
@@ -13,6 +13,8 @@
 // RUN: %libarcher-compile-and-run-race | FileCheck %s
 // RUN: %libarcher-compile-and-run-race-noserial | FileCheck %s
 // REQUIRES: tsan
+// REQUIRES: x86_64
+
 #include <omp.h>
 #include <stdio.h>
 
diff --git a/openmp/tools/archer/tests/races/parallel-simple.c b/openmp/tools/archer/tests/races/parallel-simple.c
index 700e4a7f3313..809ef6f4c114 100644
--- a/openmp/tools/archer/tests/races/parallel-simple.c
+++ b/openmp/tools/archer/tests/races/parallel-simple.c
@@ -13,6 +13,8 @@
 // RUN: %libarcher-compile-and-run-race | FileCheck %s
 // RUN: %libarcher-compile-and-run-race-noserial | FileCheck %s
 // REQUIRES: tsan
+// REQUIRES: x86_64
+
 #include <omp.h>
 #include <stdio.h>
 
diff --git a/openmp/tools/archer/tests/races/task-dependency.c b/openmp/tools/archer/tests/races/task-dependency.c
index f6496ac8596d..63b4cf4bfc56 100644
--- a/openmp/tools/archer/tests/races/task-dependency.c
+++ b/openmp/tools/archer/tests/races/task-dependency.c
@@ -13,6 +13,8 @@
 // RUN: %libarcher-compile-and-run-race | FileCheck %s
 // RUN: %libarcher-compile-and-run-race-noserial | FileCheck %s
 // REQUIRES: tsan
+// REQUIRES: x86_64
+
 #include "ompt/ompt-signal.h"
 #include <omp.h>
 #include <stdio.h>
diff --git a/openmp/tools/archer/tests/races/task-taskgroup-unrelated.c b/openmp/tools/archer/tests/races/task-taskgroup-unrelated.c
index 04b2957b4863..770e31c8f831 100644
--- a/openmp/tools/archer/tests/races/task-taskgroup-unrelated.c
+++ b/openmp/tools/archer/tests/races/task-taskgroup-unrelated.c
@@ -13,6 +13,8 @@
 // RUN: %libarcher-compile-and-run-race | FileCheck %s
 // RUN: %libarcher-compile-and-run-race-noserial | FileCheck %s
 // REQUIRES: tsan
+// REQUIRES: x86_64 
+
 #include "ompt/ompt-signal.h"
 #include <omp.h>
 #include <stdio.h>
diff --git a/openmp/tools/archer/tests/races/task-taskwait-nested.c b/openmp/tools/archer/tests/races/task-taskwait-nested.c
index 02f1fb576c87..64aa03f3f8db 100644
--- a/openmp/tools/archer/tests/races/task-taskwait-nested.c
+++ b/openmp/tools/archer/tests/races/task-taskwait-nested.c
@@ -13,6 +13,8 @@
 // RUN: %libarcher-compile-and-run-race | FileCheck %s
 // RUN: %libarcher-compile-and-run-race-noserial | FileCheck %s
 // REQUIRES: tsan
+// REQUIRES: x86_64 
+
 #include "ompt/ompt-signal.h"
 #include <omp.h>
 #include <stdio.h>
diff --git a/openmp/tools/archer/tests/races/task-two.c b/openmp/tools/archer/tests/races/task-two.c
index 06d5bc021ee6..f1fa654b2da9 100644
--- a/openmp/tools/archer/tests/races/task-two.c
+++ b/openmp/tools/archer/tests/races/task-two.c
@@ -13,6 +13,8 @@
 // RUN: %libarcher-compile-and-run-race | FileCheck %s
 // RUN: %libarcher-compile-and-run-race-noserial | FileCheck %s
 // REQUIRES: tsan
+// REQUIRES: x86_64 
+
 #include <omp.h>
 #include <stdio.h>
 #include <unistd.h>
diff --git a/openmp/tools/archer/tests/task/task_late_fulfill.c b/openmp/tools/archer/tests/task/task_late_fulfill.c
index 31d096deec75..23f54eeb9b57 100644
--- a/openmp/tools/archer/tests/task/task_late_fulfill.c
+++ b/openmp/tools/archer/tests/task/task_late_fulfill.c
@@ -10,6 +10,7 @@
 // icc compiler does not support detach clause.
 // UNSUPPORTED: icc
 // REQUIRES: tsan
+// REQUIRES: x86_64
 
 #include <omp.h>
 #include <stdio.h>
-- 
Gitee


From ec80fbeeeee7cee2441bcd58f9ed3898073102f0 Mon Sep 17 00:00:00 2001
From: cf_zhao <zhaochuanfeng@huawei.com>
Date: Wed, 14 Dec 2022 10:32:07 +0800
Subject: [PATCH 41/41] [Build] Add several options to slim the install folder.

---
 build.sh | 46 ++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 40 insertions(+), 6 deletions(-)

diff --git a/build.sh b/build.sh
index 4fed7989a657..7b6b7908abbd 100755
--- a/build.sh
+++ b/build.sh
@@ -7,11 +7,15 @@ CXX_COMPILER_PATH=g++
 # Initialize our own variables:
 buildtype=RelWithDebInfo
 backends="ARM;AArch64;X86"
+enabled_projects="clang;lld;compiler-rt;openmp;clang-tools-extra"
+embedded_toolchain="0"
 split_dwarf=on
 use_ccache="0"
 do_install="0"
 clean=0
 unit_test=""
+install="install"
+install_toolchain_only="0"
 verbose=""
 dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )"
 build_dir_name="build"
@@ -38,16 +42,19 @@ usage() {
   cat <<EOF
 Usage: $0 [options]
 
-Build the compiler under $(get_relative_path $build_prefix), then install under $(get_relative_path $install_prefix) .
+Build the compiler under $build_prefix, then install under $install_prefix.
 
 Options:
   -b type  Specify CMake build type (default: $buildtype).
   -c       Use ccache (default: $use_ccache).
+  -e       Build for embedded cross tool chain.
   -h       Display this help message.
   -i       Install the build (default: $do_install).
   -I name  Specify install directory name (default: "$install_dir_name").
   -j N     Allow N jobs at once (default: $threads).
-  -r       Delete $(get_relative_path $install_prefix) and perform a clean build (default: incremental).
+  -o       Enable LLVM_INSTALL_TOOLCHAIN_ONLY=ON.
+  -r       Delete $install_prefix and perform a clean build (default: incremental).
+  -s       Strip binaries and minimize file permissions when (re-)installing.
   -t       Enable unit tests for components that support them (make check-all).
   -v       Enable verbose build output (default: quiet).
   -X archs Build only the specified semi-colon-delimited list of backends (default: "$backends").
@@ -56,7 +63,7 @@ EOF
 
 # Process command-line options. Remember the options for passing to the
 # containerized build script.
-while getopts :b:chiI:j:rtvX: optchr; do
+while getopts :b:cehiI:j:orstvX: optchr; do
   case "$optchr" in
     b)
       buildtype="$OPTARG"
@@ -75,6 +82,9 @@ while getopts :b:chiI:j:rtvX: optchr; do
     c)
       use_ccache="1"
       ;;
+    e)
+      embedded_toolchain="1"
+      ;;
     h)
       usage
       exit
@@ -89,9 +99,15 @@ while getopts :b:chiI:j:rtvX: optchr; do
     j)
       threads="$OPTARG"
       ;;
+    o)
+      install_toolchain_only=1
+      ;;
     r)
       clean=1
       ;;
+    s)
+      install="install/strip"
+      ;;
     t)
       unit_test=check-all
       ;;
@@ -120,7 +136,6 @@ CMAKE_OPTIONS="-DCMAKE_INSTALL_PREFIX=$install_prefix \
 
 # Warning: the -DLLVM_ENABLE_PROJECTS option is specified with cmake
 # to avoid issues with nested quotation marks
-
 if [ $use_ccache == "1" ]; then
   echo "Build using ccache"
   CMAKE_OPTIONS="$CMAKE_OPTIONS \
@@ -128,6 +143,18 @@ if [ $use_ccache == "1" ]; then
                 -DCMAKE_CXX_COMPILER_LAUNCHER=ccache "
 fi
 
+if [ $embedded_toolchain == "1" ]; then
+  echo "Build for embedded cross tool chain"
+  enabled_projects="clang;lld;compiler-rt;"
+fi
+
+# When set LLVM_INSTALL_TOOLCHAIN_ONLY to On it removes many of the LLVM development
+# and testing tools as well as component libraries from the default install target.
+if [ $install_toolchain_only == "1" ]; then
+  echo "Only install toolchain"
+  CMAKE_OPTIONS="$CMAKE_OPTIONS -DLLVM_INSTALL_TOOLCHAIN_ONLY=ON"
+fi
+
 # Build and install
 if [ $clean -eq 1 -a -e "$install_prefix" ]; then
   rm -rf "$install_prefix"
@@ -141,7 +168,8 @@ fi
 mkdir -p "$build_prefix" && cd "$build_prefix"
 cmake $CMAKE_OPTIONS \
       -DCOMPILER_RT_BUILD_SANITIZERS=on \
-      -DLLVM_ENABLE_PROJECTS="clang;compiler-rt;libunwind;lld;openmp;clang-tools-extra" \
+      -DLLVM_ENABLE_PROJECTS=$enabled_projects \
+      -DLLVM_ENABLE_RUNTIMES="libcxx;libcxxabi;libunwind" \
       -DLLVM_USE_LINKER=gold \
       -DLLVM_LIT_ARGS="-sv -j$threads" \
       -DLLVM_USE_SPLIT_DWARF=$split_dwarf \
@@ -152,7 +180,7 @@ cmake $CMAKE_OPTIONS \
 
 make -j$threads
 if [ $do_install == "1" ]; then
-  make -j$threads $verbose install
+  make -j$threads $verbose $install
 fi
 
 if [ -n "$unit_test" ]; then
@@ -161,4 +189,10 @@ fi
 
 cd ..
 
+# When building official deliverables, minimize file permissions under the
+# installation directory.
+if [ "$install" = "install/strip" ]; then
+  find $install_prefix -type f -exec chmod a-w,o-rx {} \;
+fi
+
 echo "$0: SUCCESS"
-- 
Gitee