From f8bdac1cdfa3b06a45bf77d9e00fc21e5843eddb Mon Sep 17 00:00:00 2001 From: Qingqing Li Date: Mon, 14 Jul 2025 10:50:49 +0800 Subject: [PATCH] backport patches from glibc upstream 2.38 branch This includes: malloc: cleanup casts in tst-calloc malloc: obscure calloc use in tst-calloc malloc: add indirection for malloc(-like) functions in tests [BZ #32366] nptl: PTHREAD_COND_INITIALIZER compatibility with pre-2.41 versions (bug 32786) nptl: Use all of g1_start and g_signals nptl: rename __condvar_quiesce_and_switch_g1 nptl: Fix indentation nptl: Use a single loop in pthread_cond_wait instaed of a nested loop nptl: Remove g_refs from condition variables nptl: Remove unnecessary quadruple check in pthread_cond_wait nptl: Remove unnecessary catch-all-wake in condvar group switch nptl: Update comments and indentation for new condvar implementation pthreads NPTL: lost wakeup fix 2 --- glibc.spec | 30 +- ...ection-for-malloc-like-functions-in-.patch | 187 ++++++++ malloc-cleanup-casts-in-tst-calloc.patch | 32 ++ malloc-obscure-calloc-use-in-tst-calloc.patch | 60 +++ nptl-Fix-indentation.patch | 148 ++++++ ...D_INITIALIZER-compatibility-with-pre.patch | 53 ++ ...move-g_refs-from-condition-variables.patch | 187 ++++++++ ...cessary-catch-all-wake-in-condvar-gr.patch | 78 +++ ...cessary-quadruple-check-in-pthread_c.patch | 118 +++++ ...ents-and-indentation-for-new-condvar.patch | 145 ++++++ ...e-loop-in-pthread_cond_wait-instaed-.patch | 102 ++++ nptl-Use-all-of-g1_start-and-g_signals.patch | 192 ++++++++ ...name-__condvar_quiesce_and_switch_g1.patch | 160 ++++++ pthreads-NPTL-lost-wakeup-fix-2.patch | 454 ++++++++++++++++++ 14 files changed, 1945 insertions(+), 1 deletion(-) create mode 100644 malloc-add-indirection-for-malloc-like-functions-in-.patch create mode 100644 malloc-cleanup-casts-in-tst-calloc.patch create mode 100644 malloc-obscure-calloc-use-in-tst-calloc.patch create mode 100644 nptl-Fix-indentation.patch create mode 100644 nptl-PTHREAD_COND_INITIALIZER-compatibility-with-pre.patch create mode 100644 nptl-Remove-g_refs-from-condition-variables.patch create mode 100644 nptl-Remove-unnecessary-catch-all-wake-in-condvar-gr.patch create mode 100644 nptl-Remove-unnecessary-quadruple-check-in-pthread_c.patch create mode 100644 nptl-Update-comments-and-indentation-for-new-condvar.patch create mode 100644 nptl-Use-a-single-loop-in-pthread_cond_wait-instaed-.patch create mode 100644 nptl-Use-all-of-g1_start-and-g_signals.patch create mode 100644 nptl-rename-__condvar_quiesce_and_switch_g1.patch create mode 100644 pthreads-NPTL-lost-wakeup-fix-2.patch diff --git a/glibc.spec b/glibc.spec index cf3a182..611d132 100644 --- a/glibc.spec +++ b/glibc.spec @@ -67,7 +67,7 @@ ############################################################################## Name: glibc Version: 2.38 -Release: 62 +Release: 63 Summary: The GNU libc libraries License: %{all_license} URL: http://www.gnu.org/software/glibc/ @@ -294,6 +294,19 @@ Patch204: x86_64-Fix-typo-in-ifunc-impl-list.c.patch Patch205: elf-Fix-subprocess-status-handling-for-tst-dlopen-sg.patch Patch206: support-Pick-group-in-support_capture_subprogram_sel.patch Patch207: Fix-error-reporting-false-negatives-in-SGID-tests.patch +Patch208: pthreads-NPTL-lost-wakeup-fix-2.patch +Patch209: nptl-Update-comments-and-indentation-for-new-condvar.patch +Patch210: nptl-Remove-unnecessary-catch-all-wake-in-condvar-gr.patch +Patch211: nptl-Remove-unnecessary-quadruple-check-in-pthread_c.patch +Patch212: nptl-Remove-g_refs-from-condition-variables.patch +Patch213: nptl-Use-a-single-loop-in-pthread_cond_wait-instaed-.patch +Patch214: nptl-Fix-indentation.patch +Patch215: nptl-rename-__condvar_quiesce_and_switch_g1.patch +Patch216: nptl-Use-all-of-g1_start-and-g_signals.patch +Patch217: nptl-PTHREAD_COND_INITIALIZER-compatibility-with-pre.patch +Patch218: malloc-add-indirection-for-malloc-like-functions-in-.patch +Patch219: malloc-obscure-calloc-use-in-tst-calloc.patch +Patch220: malloc-cleanup-casts-in-tst-calloc.patch #openEuler patch list Patch9000: turn-default-value-of-x86_rep_stosb_threshold_form_2K_to_1M.patch @@ -1519,6 +1532,21 @@ fi %endif %changelog +* Mon Jul 14 2025 Qingqing Li - 2.38-63 +- malloc: cleanup casts in tst-calloc +- malloc: obscure calloc use in tst-calloc +- malloc: add indirection for malloc(-like) functions in tests [BZ #32366] +- nptl: PTHREAD_COND_INITIALIZER compatibility with pre-2.41 versions (bug 32786) +- nptl: Use all of g1_start and g_signals +- nptl: rename __condvar_quiesce_and_switch_g1 +- nptl: Fix indentation +- nptl: Use a single loop in pthread_cond_wait instaed of a nested loop +- nptl: Remove g_refs from condition variables +- nptl: Remove unnecessary quadruple check in pthread_cond_wait +- nptl: Remove unnecessary catch-all-wake in condvar group switch +- nptl: Update comments and indentation for new condvar implementation +- pthreads NPTL: lost wakeup fix 2 + * Sat Jul 12 2025 Qingqing Li - 2.38-62 - aarch64: Using __memmove_generic when kunpeng920 with tsv120 micro architecture - aarch64: revert "aarch64: Use memcpy_simd as the default memcpy" diff --git a/malloc-add-indirection-for-malloc-like-functions-in-.patch b/malloc-add-indirection-for-malloc-like-functions-in-.patch new file mode 100644 index 0000000..1388b2b --- /dev/null +++ b/malloc-add-indirection-for-malloc-like-functions-in-.patch @@ -0,0 +1,187 @@ +From 879f0ee122477e2b08b7cabea104338d9a9198eb Mon Sep 17 00:00:00 2001 +From: Sam James +Date: Mon, 9 Dec 2024 23:11:25 +0000 +Subject: [PATCH] malloc: add indirection for malloc(-like) functions in + tests [BZ #32366] + +GCC 15 introduces allocation dead code removal (DCE) for PR117370 in +r15-5255-g7828dc070510f8. This breaks various glibc tests which want +to assert various properties of the allocator without doing anything +obviously useful with the allocated memory. + +Alexander Monakov rightly pointed out that we can and should do better +than passing -fno-malloc-dce to paper over the problem. Not least because +GCC 14 already does such DCE where there's no testing of malloc's return +value against NULL, and LLVM has such optimisations too. + +Handle this by providing malloc (and friends) wrappers with a volatile +function pointer to obscure that we're calling malloc (et. al) from the +compiler. + +Reviewed-by: Paul Eggert +(cherry picked from commit a9944a52c967ce76a5894c30d0274b824df43c7a) +--- + malloc/tst-aligned-alloc.c | 2 ++ + malloc/tst-compathooks-off.c | 2 ++ + malloc/tst-malloc-aux.h | 41 +++++++++++++++++++++++++++++++++++ + malloc/tst-malloc-check.c | 2 ++ + malloc/tst-malloc-too-large.c | 1 + + malloc/tst-malloc.c | 2 ++ + malloc/tst-realloc.c | 2 ++ + support/support.h | 2 +- + test-skeleton.c | 1 - + 9 files changed, 53 insertions(+), 2 deletions(-) + create mode 100644 malloc/tst-malloc-aux.h + +diff --git a/malloc/tst-aligned-alloc.c b/malloc/tst-aligned-alloc.c +index 8bd6527147..39f0cd22c0 100644 +--- a/malloc/tst-aligned-alloc.c ++++ b/malloc/tst-aligned-alloc.c +@@ -25,6 +25,8 @@ + #include + #include + ++#include "tst-malloc-aux.h" ++ + static int + do_test (void) + { +diff --git a/malloc/tst-compathooks-off.c b/malloc/tst-compathooks-off.c +index 134d7ea107..cd8043d6f5 100644 +--- a/malloc/tst-compathooks-off.c ++++ b/malloc/tst-compathooks-off.c +@@ -25,6 +25,8 @@ + #include + #include + ++#include "tst-malloc-aux.h" ++ + extern void (*volatile __free_hook) (void *, const void *); + extern void *(*volatile __malloc_hook)(size_t, const void *); + extern void *(*volatile __realloc_hook)(void *, size_t, const void *); +diff --git a/malloc/tst-malloc-aux.h b/malloc/tst-malloc-aux.h +new file mode 100644 +index 0000000000..54908b4a24 +--- /dev/null ++++ b/malloc/tst-malloc-aux.h +@@ -0,0 +1,41 @@ ++/* Wrappers for malloc-like functions to allow testing the implementation ++ without optimization. ++ Copyright (C) 2024 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public License as ++ published by the Free Software Foundation; either version 2.1 of the ++ License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; see the file COPYING.LIB. If ++ not, see . */ ++ ++#ifndef TST_MALLOC_AUX_H ++#define TST_MALLOC_AUX_H ++ ++#include ++#include ++ ++static void *(*volatile aligned_alloc_indirect)(size_t, size_t) = aligned_alloc; ++static void *(*volatile calloc_indirect)(size_t, size_t) = calloc; ++static void *(*volatile malloc_indirect)(size_t) = malloc; ++static void *(*volatile realloc_indirect)(void*, size_t) = realloc; ++ ++#undef aligned_alloc ++#undef calloc ++#undef malloc ++#undef realloc ++ ++#define aligned_alloc aligned_alloc_indirect ++#define calloc calloc_indirect ++#define malloc malloc_indirect ++#define realloc realloc_indirect ++ ++#endif /* TST_MALLOC_AUX_H */ +diff --git a/malloc/tst-malloc-check.c b/malloc/tst-malloc-check.c +index df9fd67c13..18c6f28f80 100644 +--- a/malloc/tst-malloc-check.c ++++ b/malloc/tst-malloc-check.c +@@ -20,6 +20,8 @@ + #include + #include + ++#include "tst-malloc-aux.h" ++ + static int errors = 0; + + static void +diff --git a/malloc/tst-malloc-too-large.c b/malloc/tst-malloc-too-large.c +index 5be6800b89..44979018b8 100644 +--- a/malloc/tst-malloc-too-large.c ++++ b/malloc/tst-malloc-too-large.c +@@ -43,6 +43,7 @@ + #include + #include + ++#include "tst-malloc-aux.h" + + /* This function prepares for each 'too-large memory allocation' test by + performing a small successful malloc/free and resetting errno prior to +diff --git a/malloc/tst-malloc.c b/malloc/tst-malloc.c +index 0da5a8f263..7b337e6e09 100644 +--- a/malloc/tst-malloc.c ++++ b/malloc/tst-malloc.c +@@ -20,6 +20,8 @@ + #include + #include + ++#include "tst-malloc-aux.h" ++ + static int errors = 0; + + static void +diff --git a/malloc/tst-realloc.c b/malloc/tst-realloc.c +index fb661d312e..b35a5a2125 100644 +--- a/malloc/tst-realloc.c ++++ b/malloc/tst-realloc.c +@@ -23,6 +23,8 @@ + #include + #include + ++#include "tst-malloc-aux.h" ++ + static int + do_test (void) + { +diff --git a/support/support.h b/support/support.h +index b7f76bf080..3742f26b55 100644 +--- a/support/support.h ++++ b/support/support.h +@@ -113,7 +113,7 @@ void *xposix_memalign (size_t alignment, size_t n) + __attribute_malloc__ __attribute_alloc_align__ ((1)) + __attribute_alloc_size__ ((2)) __attr_dealloc_free __returns_nonnull; + char *xasprintf (const char *format, ...) +- __attribute__ ((format (printf, 1, 2), malloc)) __attr_dealloc_free ++ __attribute__ ((format (printf, 1, 2), __malloc__)) __attr_dealloc_free + __returns_nonnull; + char *xstrdup (const char *) __attr_dealloc_free __returns_nonnull; + char *xstrndup (const char *, size_t) __attr_dealloc_free __returns_nonnull; +diff --git a/test-skeleton.c b/test-skeleton.c +index efd52a6fa4..508522fa33 100644 +--- a/test-skeleton.c ++++ b/test-skeleton.c +@@ -27,7 +27,6 @@ + #include + #include + #include +-#include + #include + #include + #include +-- +2.27.0 + diff --git a/malloc-cleanup-casts-in-tst-calloc.patch b/malloc-cleanup-casts-in-tst-calloc.patch new file mode 100644 index 0000000..b5e9708 --- /dev/null +++ b/malloc-cleanup-casts-in-tst-calloc.patch @@ -0,0 +1,32 @@ +From 21019afe6528f10b9a00e86df03a79a477037781 Mon Sep 17 00:00:00 2001 +From: Sam James +Date: Mon, 13 Jan 2025 02:27:41 +0000 +Subject: [PATCH] malloc: cleanup casts in tst-calloc + +Followup to c3d1dac96bdd10250aa37bb367d5ef8334a093a1. As pointed out by +Maciej W. Rozycki, the casts are obviously useless now. + +Reviewed-by: H.J. Lu +(cherry picked from commit fc8f253d808ade5e97c93b363bd1932023e770ba) +--- + malloc/tst-calloc.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/malloc/tst-calloc.c b/malloc/tst-calloc.c +index d708a0fbeb..3d2e3d9fda 100644 +--- a/malloc/tst-calloc.c ++++ b/malloc/tst-calloc.c +@@ -106,8 +106,8 @@ null_test (void) + calloc (0, 0); + calloc (0, max_size); + calloc (max_size, 0); +- calloc (0, ~((size_t) zero_size)); +- calloc (~((size_t) zero_size), 0); ++ calloc (0, ~zero_size); ++ calloc (~zero_size, 0); + DIAG_POP_NEEDS_COMMENT; + } + +-- +2.27.0 + diff --git a/malloc-obscure-calloc-use-in-tst-calloc.patch b/malloc-obscure-calloc-use-in-tst-calloc.patch new file mode 100644 index 0000000..067ddba --- /dev/null +++ b/malloc-obscure-calloc-use-in-tst-calloc.patch @@ -0,0 +1,60 @@ +From a637f2c42ffa216571575f6cd8af6ebd8b9334db Mon Sep 17 00:00:00 2001 +From: Sam James +Date: Fri, 10 Jan 2025 03:03:47 +0000 +Subject: [PATCH] malloc: obscure calloc use in tst-calloc + +Similar to a9944a52c967ce76a5894c30d0274b824df43c7a and +f9493a15ea9cfb63a815c00c23142369ec09d8ce, we need to hide calloc use from +the compiler to accommodate GCC's r15-6566-g804e9d55d9e54c change. + +First, include tst-malloc-aux.h, but then use `volatile` variables +for size. + +The test passes without the tst-malloc-aux.h change but IMO we want +it there for consistency and to avoid future problems (possibly silent). + +Reviewed-by: H.J. Lu +(cherry picked from commit c3d1dac96bdd10250aa37bb367d5ef8334a093a1) +--- + malloc/tst-calloc.c | 12 ++++++++---- + 1 file changed, 8 insertions(+), 4 deletions(-) + +diff --git a/malloc/tst-calloc.c b/malloc/tst-calloc.c +index 50b1d7d603..d708a0fbeb 100644 +--- a/malloc/tst-calloc.c ++++ b/malloc/tst-calloc.c +@@ -23,6 +23,7 @@ + #include + #include + ++#include "tst-malloc-aux.h" + + /* Number of samples per size. */ + #define N 50000 +@@ -94,16 +95,19 @@ random_test (void) + static void + null_test (void) + { ++ /* Obscure allocation size from the compiler. */ ++ volatile size_t max_size = UINT_MAX; ++ volatile size_t zero_size = 0; + /* If the size is 0 the result is implementation defined. Just make + sure the program doesn't crash. The result of calloc is + deliberately ignored, so do not warn about that. */ + DIAG_PUSH_NEEDS_COMMENT; + DIAG_IGNORE_NEEDS_COMMENT (10, "-Wunused-result"); + calloc (0, 0); +- calloc (0, UINT_MAX); +- calloc (UINT_MAX, 0); +- calloc (0, ~((size_t) 0)); +- calloc (~((size_t) 0), 0); ++ calloc (0, max_size); ++ calloc (max_size, 0); ++ calloc (0, ~((size_t) zero_size)); ++ calloc (~((size_t) zero_size), 0); + DIAG_POP_NEEDS_COMMENT; + } + +-- +2.27.0 + diff --git a/nptl-Fix-indentation.patch b/nptl-Fix-indentation.patch new file mode 100644 index 0000000..504687e --- /dev/null +++ b/nptl-Fix-indentation.patch @@ -0,0 +1,148 @@ +From 576565369755eb428ab34272ca18aba786bb7163 Mon Sep 17 00:00:00 2001 +From: Malte Skarupke +Date: Fri, 11 Jul 2025 05:57:47 -0700 +Subject: [PATCH] nptl: Fix indentation + +[BZ #25847] + +In my previous change I turned a nested loop into a simple loop. I'm doing +the resulting indentation changes in a separate commit to make the diff on +the previous commit easier to review. + +(cherry picked from commit ee6c14ed59d480720721aaacc5fb03213dc153da) + +Signed-off-by: Sunil Dora +Tested-by: Carlos O'Donell +Reviewed-by: Carlos O'Donell +--- + nptl/pthread_cond_wait.c | 110 +++++++++++++++++++-------------------- + 1 file changed, 55 insertions(+), 55 deletions(-) + +diff --git a/nptl/pthread_cond_wait.c b/nptl/pthread_cond_wait.c +index b7f1d9c41d..d70012d647 100644 +--- a/nptl/pthread_cond_wait.c ++++ b/nptl/pthread_cond_wait.c +@@ -383,65 +383,65 @@ __pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex, + } + + +- while (1) +- { +- /* Now wait until a signal is available in our group or it is closed. +- Acquire MO so that if we observe (signals == lowseq) after group +- switching in __condvar_quiesce_and_switch_g1, we synchronize with that +- store and will see the prior update of __g1_start done while switching +- groups too. */ +- unsigned int signals = atomic_load_acquire (cond->__data.__g_signals + g); +- uint64_t g1_start = __condvar_load_g1_start_relaxed (cond); +- unsigned int lowseq = (g1_start & 1) == g ? signals : g1_start & ~1U; +- +- if (seq < (g1_start >> 1)) +- { +- /* If the group is closed already, +- then this waiter originally had enough extra signals to +- consume, up until the time its group was closed. */ +- break; +- } +- +- /* If there is an available signal, don't block. +- If __g1_start has advanced at all, then we must be in G1 +- by now, perhaps in the process of switching back to an older +- G2, but in either case we're allowed to consume the available +- signal and should not block anymore. */ +- if ((int)(signals - lowseq) >= 2) +- { +- /* Try to grab a signal. See above for MO. (if we do another loop +- iteration we need to see the correct value of g1_start) */ +- if (atomic_compare_exchange_weak_acquire ( +- cond->__data.__g_signals + g, ++ while (1) ++ { ++ /* Now wait until a signal is available in our group or it is closed. ++ Acquire MO so that if we observe (signals == lowseq) after group ++ switching in __condvar_quiesce_and_switch_g1, we synchronize with that ++ store and will see the prior update of __g1_start done while switching ++ groups too. */ ++ unsigned int signals = atomic_load_acquire (cond->__data.__g_signals + g); ++ uint64_t g1_start = __condvar_load_g1_start_relaxed (cond); ++ unsigned int lowseq = (g1_start & 1) == g ? signals : g1_start & ~1U; ++ ++ if (seq < (g1_start >> 1)) ++ { ++ /* If the group is closed already, ++ then this waiter originally had enough extra signals to ++ consume, up until the time its group was closed. */ ++ break; ++ } ++ ++ /* If there is an available signal, don't block. ++ If __g1_start has advanced at all, then we must be in G1 ++ by now, perhaps in the process of switching back to an older ++ G2, but in either case we're allowed to consume the available ++ signal and should not block anymore. */ ++ if ((int)(signals - lowseq) >= 2) ++ { ++ /* Try to grab a signal. See above for MO. (if we do another loop ++ iteration we need to see the correct value of g1_start) */ ++ if (atomic_compare_exchange_weak_acquire ( ++ cond->__data.__g_signals + g, + &signals, signals - 2)) +- break; +- else +- continue; +- } +- +- // Now block. +- struct _pthread_cleanup_buffer buffer; +- struct _condvar_cleanup_buffer cbuffer; +- cbuffer.wseq = wseq; +- cbuffer.cond = cond; +- cbuffer.mutex = mutex; +- cbuffer.private = private; +- __pthread_cleanup_push (&buffer, __condvar_cleanup_waiting, &cbuffer); +- +- err = __futex_abstimed_wait_cancelable64 ( +- cond->__data.__g_signals + g, signals, clockid, abstime, private); +- +- __pthread_cleanup_pop (&buffer, 0); +- +- if (__glibc_unlikely (err == ETIMEDOUT || err == EOVERFLOW)) +- { +- /* If we timed out, we effectively cancel waiting. */ +- __condvar_cancel_waiting (cond, seq, g, private); +- result = err; + break; +- } ++ else ++ continue; + } + ++ // Now block. ++ struct _pthread_cleanup_buffer buffer; ++ struct _condvar_cleanup_buffer cbuffer; ++ cbuffer.wseq = wseq; ++ cbuffer.cond = cond; ++ cbuffer.mutex = mutex; ++ cbuffer.private = private; ++ __pthread_cleanup_push (&buffer, __condvar_cleanup_waiting, &cbuffer); ++ ++ err = __futex_abstimed_wait_cancelable64 ( ++ cond->__data.__g_signals + g, signals, clockid, abstime, private); ++ ++ __pthread_cleanup_pop (&buffer, 0); ++ ++ if (__glibc_unlikely (err == ETIMEDOUT || err == EOVERFLOW)) ++ { ++ /* If we timed out, we effectively cancel waiting. */ ++ __condvar_cancel_waiting (cond, seq, g, private); ++ result = err; ++ break; ++ } ++ } ++ + /* Confirm that we have been woken. We do that before acquiring the mutex + to allow for execution of pthread_cond_destroy while having acquired the + mutex. */ +-- +2.27.0 + diff --git a/nptl-PTHREAD_COND_INITIALIZER-compatibility-with-pre.patch b/nptl-PTHREAD_COND_INITIALIZER-compatibility-with-pre.patch new file mode 100644 index 0000000..74f91fe --- /dev/null +++ b/nptl-PTHREAD_COND_INITIALIZER-compatibility-with-pre.patch @@ -0,0 +1,53 @@ +From 51210d64966caa0091e9c9cd260f1be1b19f7f4d Mon Sep 17 00:00:00 2001 +From: Florian Weimer +Date: Fri, 11 Jul 2025 05:57:50 -0700 +Subject: [PATCH] nptl: PTHREAD_COND_INITIALIZER compatibility with + pre-2.41 versions (bug 32786) + +[BZ #25847] + +The new initializer and struct layout does not initialize the +__g_signals field in the old struct layout before the change in +commit c36fc50781995e6758cae2b6927839d0157f213c ("nptl: Remove +g_refs from condition variables"). Bring back fields at the end +of struct __pthread_cond_s, so that they are again zero-initialized. + +(cherry picked from commit dbc5a50d12eff4cb3f782129029d04b8a76f58e7) + +Signed-off-by: Sunil Dora +Tested-by: Carlos O'Donell +Reviewed-by: Carlos O'Donell +--- + sysdeps/nptl/bits/thread-shared-types.h | 2 ++ + sysdeps/nptl/pthread.h | 2 +- + 2 files changed, 3 insertions(+), 1 deletion(-) + +diff --git a/sysdeps/nptl/bits/thread-shared-types.h b/sysdeps/nptl/bits/thread-shared-types.h +index 59f9adc59a..3fe5d4afc0 100644 +--- a/sysdeps/nptl/bits/thread-shared-types.h ++++ b/sysdeps/nptl/bits/thread-shared-types.h +@@ -99,6 +99,8 @@ struct __pthread_cond_s + unsigned int __g1_orig_size; + unsigned int __wrefs; + unsigned int __g_signals[2]; ++ unsigned int __unused_initialized_1; ++ unsigned int __unused_initialized_2; + }; + + typedef unsigned int __tss_t; +diff --git a/sysdeps/nptl/pthread.h b/sysdeps/nptl/pthread.h +index a43efa20d2..476cd0ed54 100644 +--- a/sysdeps/nptl/pthread.h ++++ b/sysdeps/nptl/pthread.h +@@ -152,7 +152,7 @@ enum + + + /* Conditional variable handling. */ +-#define PTHREAD_COND_INITIALIZER { { {0}, {0}, {0, 0}, 0, 0, {0, 0} } } ++#define PTHREAD_COND_INITIALIZER { { {0}, {0}, {0, 0}, 0, 0, {0, 0}, 0, 0 } } + + + /* Cleanup buffers */ +-- +2.27.0 + diff --git a/nptl-Remove-g_refs-from-condition-variables.patch b/nptl-Remove-g_refs-from-condition-variables.patch new file mode 100644 index 0000000..fe2f28a --- /dev/null +++ b/nptl-Remove-g_refs-from-condition-variables.patch @@ -0,0 +1,187 @@ +From 7625579f1192a1b41e41db97e5bae937034fe52e Mon Sep 17 00:00:00 2001 +From: Malte Skarupke +Date: Fri, 11 Jul 2025 05:57:45 -0700 +Subject: [PATCH] nptl: Remove g_refs from condition variables + +[BZ #25847] + +This variable used to be needed to wait in group switching until all sleepers +have confirmed that they have woken. This is no longer needed. Nothing waits +on this variable so there is no need to track how many threads are currently +asleep in each group. + +(cherry picked from commit c36fc50781995e6758cae2b6927839d0157f213c) + +Signed-off-by: Sunil Dora +Tested-by: Carlos O'Donell +Reviewed-by: Carlos O'Donell +--- + nptl/pthread_cond_wait.c | 52 +------------------------ + nptl/tst-cond22.c | 12 +++--- + sysdeps/nptl/bits/thread-shared-types.h | 3 +- + sysdeps/nptl/pthread.h | 2 +- + 4 files changed, 9 insertions(+), 60 deletions(-) + +diff --git a/nptl/pthread_cond_wait.c b/nptl/pthread_cond_wait.c +index ab486664ff..4773859524 100644 +--- a/nptl/pthread_cond_wait.c ++++ b/nptl/pthread_cond_wait.c +@@ -143,23 +143,6 @@ __condvar_cancel_waiting (pthread_cond_t *cond, uint64_t seq, unsigned int g, + } + } + +-/* Wake up any signalers that might be waiting. */ +-static void +-__condvar_dec_grefs (pthread_cond_t *cond, unsigned int g, int private) +-{ +- /* Release MO to synchronize-with the acquire load in +- __condvar_quiesce_and_switch_g1. */ +- if (atomic_fetch_add_release (cond->__data.__g_refs + g, -2) == 3) +- { +- /* Clear the wake-up request flag before waking up. We do not need more +- than relaxed MO and it doesn't matter if we apply this for an aliased +- group because we wake all futex waiters right after clearing the +- flag. */ +- atomic_fetch_and_relaxed (cond->__data.__g_refs + g, ~(unsigned int) 1); +- futex_wake (cond->__data.__g_refs + g, INT_MAX, private); +- } +-} +- + /* Clean-up for cancellation of waiters waiting for normal signals. We cancel + our registration as a waiter, confirm we have woken up, and re-acquire the + mutex. */ +@@ -171,8 +154,6 @@ __condvar_cleanup_waiting (void *arg) + pthread_cond_t *cond = cbuffer->cond; + unsigned g = cbuffer->wseq & 1; + +- __condvar_dec_grefs (cond, g, cbuffer->private); +- + __condvar_cancel_waiting (cond, cbuffer->wseq >> 1, g, cbuffer->private); + /* FIXME With the current cancellation implementation, it is possible that + a thread is cancelled after it has returned from a syscall. This could +@@ -327,15 +308,6 @@ __condvar_cleanup_waiting (void *arg) + sufficient because if a waiter can see a sufficiently large value, it could + have also consume a signal in the waiters group. + +- It is essential that the last field in pthread_cond_t is __g_signals[1]: +- The previous condvar used a pointer-sized field in pthread_cond_t, so a +- PTHREAD_COND_INITIALIZER from that condvar implementation might only +- initialize 4 bytes to zero instead of the 8 bytes we need (i.e., 44 bytes +- in total instead of the 48 we need). __g_signals[1] is not accessed before +- the first group switch (G2 starts at index 0), which will set its value to +- zero after a harmless fetch-or whose return value is ignored. This +- effectively completes initialization. +- + + Limitations: + * This condvar isn't designed to allow for more than +@@ -440,21 +412,6 @@ __pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex, + if ((int)(signals - lowseq) >= 2) + break; + +- /* No signals available after spinning, so prepare to block. +- We first acquire a group reference and use acquire MO for that so +- that we synchronize with the dummy read-modify-write in +- __condvar_quiesce_and_switch_g1 if we read from that. In turn, +- in this case this will make us see the advancement of __g_signals +- to the upcoming new g1_start that occurs with a concurrent +- attempt to reuse the group's slot. +- We use acquire MO for the __g_signals check to make the +- __g1_start check work (see spinning above). +- Note that the group reference acquisition will not mask the +- release MO when decrementing the reference count because we use +- an atomic read-modify-write operation and thus extend the release +- sequence. */ +- atomic_fetch_add_acquire (cond->__data.__g_refs + g, 2); +- + // Now block. + struct _pthread_cleanup_buffer buffer; + struct _condvar_cleanup_buffer cbuffer; +@@ -471,18 +428,11 @@ __pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex, + + if (__glibc_unlikely (err == ETIMEDOUT || err == EOVERFLOW)) + { +- __condvar_dec_grefs (cond, g, private); +- /* If we timed out, we effectively cancel waiting. Note that +- we have decremented __g_refs before cancellation, so that a +- deadlock between waiting for quiescence of our group in +- __condvar_quiesce_and_switch_g1 and us trying to acquire +- the lock during cancellation is not possible. */ ++ /* If we timed out, we effectively cancel waiting. */ + __condvar_cancel_waiting (cond, seq, g, private); + result = err; + goto done; + } +- else +- __condvar_dec_grefs (cond, g, private); + + /* Reload signals. See above for MO. */ + signals = atomic_load_acquire (cond->__data.__g_signals + g); +diff --git a/nptl/tst-cond22.c b/nptl/tst-cond22.c +index 1336e9c79d..bdcb45c536 100644 +--- a/nptl/tst-cond22.c ++++ b/nptl/tst-cond22.c +@@ -106,13 +106,13 @@ do_test (void) + status = 1; + } + +- printf ("cond = { 0x%x:%x, 0x%x:%x, %u/%u/%u, %u/%u/%u, %u, %u }\n", ++ printf ("cond = { 0x%x:%x, 0x%x:%x, %u/%u, %u/%u, %u, %u }\n", + c.__data.__wseq.__value32.__high, + c.__data.__wseq.__value32.__low, + c.__data.__g1_start.__value32.__high, + c.__data.__g1_start.__value32.__low, +- c.__data.__g_signals[0], c.__data.__g_refs[0], c.__data.__g_size[0], +- c.__data.__g_signals[1], c.__data.__g_refs[1], c.__data.__g_size[1], ++ c.__data.__g_signals[0], c.__data.__g_size[0], ++ c.__data.__g_signals[1], c.__data.__g_size[1], + c.__data.__g1_orig_size, c.__data.__wrefs); + + if (pthread_create (&th, NULL, tf, (void *) 1l) != 0) +@@ -152,13 +152,13 @@ do_test (void) + status = 1; + } + +- printf ("cond = { 0x%x:%x, 0x%x:%x, %u/%u/%u, %u/%u/%u, %u, %u }\n", ++ printf ("cond = { 0x%x:%x, 0x%x:%x, %u/%u, %u/%u, %u, %u }\n", + c.__data.__wseq.__value32.__high, + c.__data.__wseq.__value32.__low, + c.__data.__g1_start.__value32.__high, + c.__data.__g1_start.__value32.__low, +- c.__data.__g_signals[0], c.__data.__g_refs[0], c.__data.__g_size[0], +- c.__data.__g_signals[1], c.__data.__g_refs[1], c.__data.__g_size[1], ++ c.__data.__g_signals[0], c.__data.__g_size[0], ++ c.__data.__g_signals[1], c.__data.__g_size[1], + c.__data.__g1_orig_size, c.__data.__wrefs); + + return status; +diff --git a/sysdeps/nptl/bits/thread-shared-types.h b/sysdeps/nptl/bits/thread-shared-types.h +index 2de6ff9caf..59f9adc59a 100644 +--- a/sysdeps/nptl/bits/thread-shared-types.h ++++ b/sysdeps/nptl/bits/thread-shared-types.h +@@ -95,8 +95,7 @@ struct __pthread_cond_s + { + __atomic_wide_counter __wseq; + __atomic_wide_counter __g1_start; +- unsigned int __g_refs[2] __LOCK_ALIGNMENT; +- unsigned int __g_size[2]; ++ unsigned int __g_size[2] __LOCK_ALIGNMENT; + unsigned int __g1_orig_size; + unsigned int __wrefs; + unsigned int __g_signals[2]; +diff --git a/sysdeps/nptl/pthread.h b/sysdeps/nptl/pthread.h +index 7f65483542..a43efa20d2 100644 +--- a/sysdeps/nptl/pthread.h ++++ b/sysdeps/nptl/pthread.h +@@ -152,7 +152,7 @@ enum + + + /* Conditional variable handling. */ +-#define PTHREAD_COND_INITIALIZER { { {0}, {0}, {0, 0}, {0, 0}, 0, 0, {0, 0} } } ++#define PTHREAD_COND_INITIALIZER { { {0}, {0}, {0, 0}, 0, 0, {0, 0} } } + + + /* Cleanup buffers */ +-- +2.27.0 + diff --git a/nptl-Remove-unnecessary-catch-all-wake-in-condvar-gr.patch b/nptl-Remove-unnecessary-catch-all-wake-in-condvar-gr.patch new file mode 100644 index 0000000..d37d6bb --- /dev/null +++ b/nptl-Remove-unnecessary-catch-all-wake-in-condvar-gr.patch @@ -0,0 +1,78 @@ +From 1fa5e5189713e64fbd1bd775ab1cb6de1035eff9 Mon Sep 17 00:00:00 2001 +From: Malte Skarupke +Date: Fri, 11 Jul 2025 05:57:43 -0700 +Subject: [PATCH] nptl: Remove unnecessary catch-all-wake in condvar + group switch + +[BZ #25847] + +This wake is unnecessary. We only switch groups after every sleeper in a group +has been woken. Sure, they may take a while to actually wake up and may still +hold a reference, but waking them a second time doesn't speed that up. Instead +this just makes the code more complicated and may hide problems. + +In particular this safety wake wouldn't even have helped with the bug that was +fixed by Barrus' patch: The bug there was that pthread_cond_signal would not +switch g1 when it should, so we wouldn't even have entered this code path. + +(cherry picked from commit b42cc6af11062c260c7dfa91f1c89891366fed3e) + +Signed-off-by: Sunil Dora +Tested-by: Carlos O'Donell +Reviewed-by: Carlos O'Donell +--- + nptl/pthread_cond_common.c | 31 +------------------------------ + 1 file changed, 1 insertion(+), 30 deletions(-) + +diff --git a/nptl/pthread_cond_common.c b/nptl/pthread_cond_common.c +index c1eeec6a93..1c43432bc5 100644 +--- a/nptl/pthread_cond_common.c ++++ b/nptl/pthread_cond_common.c +@@ -221,13 +221,7 @@ __condvar_quiesce_and_switch_g1 (pthread_cond_t *cond, uint64_t wseq, + * New waiters arriving concurrently with the group switching will all go + into G2 until we atomically make the switch. Waiters existing in G2 + are not affected. +- * Waiters in G1 have already received a signal and been woken. If they +- haven't woken yet, they will be closed out immediately by the advancing +- of __g_signals to the next "lowseq" (low 31 bits of the new g1_start), +- which will prevent waiters from blocking using a futex on +- __g_signals since it provides enough signals for all possible +- remaining waiters. As a result, they can each consume a signal +- and they will eventually remove their group reference. */ ++ * Waiters in G1 have already received a signal and been woken. */ + + /* Update __g1_start, which finishes closing this group. The value we add + will never be negative because old_orig_size can only be zero when we +@@ -240,29 +234,6 @@ __condvar_quiesce_and_switch_g1 (pthread_cond_t *cond, uint64_t wseq, + + unsigned int lowseq = ((old_g1_start + old_orig_size) << 1) & ~1U; + +- /* If any waiters still hold group references (and thus could be blocked), +- then wake them all up now and prevent any running ones from blocking. +- This is effectively a catch-all for any possible current or future +- bugs that can allow the group size to reach 0 before all G1 waiters +- have been awakened or at least given signals to consume, or any +- other case that can leave blocked (or about to block) older waiters.. */ +- if ((atomic_fetch_or_release (cond->__data.__g_refs + g1, 0) >> 1) > 0) +- { +- /* First advance signals to the end of the group (i.e. enough signals +- for the entire G1 group) to ensure that waiters which have not +- yet blocked in the futex will not block. +- Note that in the vast majority of cases, this should never +- actually be necessary, since __g_signals will have enough +- signals for the remaining g_refs waiters. As an optimization, +- we could check this first before proceeding, although that +- could still leave the potential for futex lost wakeup bugs +- if the signal count was non-zero but the futex wakeup +- was somehow lost. */ +- atomic_store_release (cond->__data.__g_signals + g1, lowseq); +- +- futex_wake (cond->__data.__g_signals + g1, INT_MAX, private); +- } +- + /* At this point, the old G1 is now a valid new G2 (but not in use yet). + No old waiter can neither grab a signal nor acquire a reference without + noticing that __g1_start is larger. +-- +2.27.0 + diff --git a/nptl-Remove-unnecessary-quadruple-check-in-pthread_c.patch b/nptl-Remove-unnecessary-quadruple-check-in-pthread_c.patch new file mode 100644 index 0000000..f18050d --- /dev/null +++ b/nptl-Remove-unnecessary-quadruple-check-in-pthread_c.patch @@ -0,0 +1,118 @@ +From 44eaf0615d3feddfb0a0a0bb20adff2119dd44c4 Mon Sep 17 00:00:00 2001 +From: Malte Skarupke +Date: Fri, 11 Jul 2025 05:57:44 -0700 +Subject: [PATCH] nptl: Remove unnecessary quadruple check in + pthread_cond_wait + +[BZ #25847] + +pthread_cond_wait was checking whether it was in a closed group no less than +four times. Checking once is enough. Here are the four checks: + +1. While spin-waiting. This was dead code: maxspin is set to 0 and has been + for years. +2. Before deciding to go to sleep, and before incrementing grefs: I kept this +3. After incrementing grefs. There is no reason to think that the group would + close while we do an atomic increment. Obviously it could close at any + point, but that doesn't mean we have to recheck after every step. This + check was equally good as check 2, except it has to do more work. +4. When we find ourselves in a group that has a signal. We only get here after + we check that we're not in a closed group. There is no need to check again. + The check would only have helped in cases where the compare_exchange in the + next line would also have failed. Relying on the compare_exchange is fine. + +Removing the duplicate checks clarifies the code. + +(cherry picked from commit 4f7b051f8ee3feff1b53b27a906f245afaa9cee1) + +Signed-off-by: Sunil Dora +Tested-by: Carlos O'Donell +Reviewed-by: Carlos O'Donell +--- + nptl/pthread_cond_wait.c | 49 ---------------------------------------- + 1 file changed, 49 deletions(-) + +diff --git a/nptl/pthread_cond_wait.c b/nptl/pthread_cond_wait.c +index a01904c965..ab486664ff 100644 +--- a/nptl/pthread_cond_wait.c ++++ b/nptl/pthread_cond_wait.c +@@ -366,7 +366,6 @@ static __always_inline int + __pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex, + clockid_t clockid, const struct __timespec64 *abstime) + { +- const int maxspin = 0; + int err; + int result = 0; + +@@ -425,33 +424,6 @@ __pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex, + uint64_t g1_start = __condvar_load_g1_start_relaxed (cond); + unsigned int lowseq = (g1_start & 1) == g ? signals : g1_start & ~1U; + +- /* Spin-wait first. +- Note that spinning first without checking whether a timeout +- passed might lead to what looks like a spurious wake-up even +- though we should return ETIMEDOUT (e.g., if the caller provides +- an absolute timeout that is clearly in the past). However, +- (1) spurious wake-ups are allowed, (2) it seems unlikely that a +- user will (ab)use pthread_cond_wait as a check for whether a +- point in time is in the past, and (3) spinning first without +- having to compare against the current time seems to be the right +- choice from a performance perspective for most use cases. */ +- unsigned int spin = maxspin; +- while (spin > 0 && ((int)(signals - lowseq) < 2)) +- { +- /* Check that we are not spinning on a group that's already +- closed. */ +- if (seq < (g1_start >> 1)) +- break; +- +- /* TODO Back off. */ +- +- /* Reload signals. See above for MO. */ +- signals = atomic_load_acquire (cond->__data.__g_signals + g); +- g1_start = __condvar_load_g1_start_relaxed (cond); +- lowseq = (g1_start & 1) == g ? signals : g1_start & ~1U; +- spin--; +- } +- + if (seq < (g1_start >> 1)) + { + /* If the group is closed already, +@@ -482,24 +454,6 @@ __pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex, + an atomic read-modify-write operation and thus extend the release + sequence. */ + atomic_fetch_add_acquire (cond->__data.__g_refs + g, 2); +- signals = atomic_load_acquire (cond->__data.__g_signals + g); +- g1_start = __condvar_load_g1_start_relaxed (cond); +- lowseq = (g1_start & 1) == g ? signals : g1_start & ~1U; +- +- if (seq < (g1_start >> 1)) +- { +- /* group is closed already, so don't block */ +- __condvar_dec_grefs (cond, g, private); +- goto done; +- } +- +- if ((int)(signals - lowseq) >= 2) +- { +- /* a signal showed up or G1/G2 switched after we grabbed the +- refcount */ +- __condvar_dec_grefs (cond, g, private); +- break; +- } + + // Now block. + struct _pthread_cleanup_buffer buffer; +@@ -533,9 +487,6 @@ __pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex, + /* Reload signals. See above for MO. */ + signals = atomic_load_acquire (cond->__data.__g_signals + g); + } +- +- if (seq < (__condvar_load_g1_start_relaxed (cond) >> 1)) +- goto done; + } + /* Try to grab a signal. See above for MO. (if we do another loop + iteration we need to see the correct value of g1_start) */ +-- +2.27.0 + diff --git a/nptl-Update-comments-and-indentation-for-new-condvar.patch b/nptl-Update-comments-and-indentation-for-new-condvar.patch new file mode 100644 index 0000000..b3f7208 --- /dev/null +++ b/nptl-Update-comments-and-indentation-for-new-condvar.patch @@ -0,0 +1,145 @@ +From b5c4727e590ab9545d74b16f03338261870563bf Mon Sep 17 00:00:00 2001 +From: Malte Skarupke +Date: Fri, 11 Jul 2025 05:57:42 -0700 +Subject: [PATCH] nptl: Update comments and indentation for new condvar + implementation + +[BZ #25847] + +Some comments were wrong after the most recent commit. This fixes that. + +Also fixing indentation where it was using spaces instead of tabs. + +(cherry picked from commit 0cc973160c23bb67f895bc887dd6942d29f8fee3) + +Signed-off-by: Sunil Dora +Tested-by: Carlos O'Donell +Reviewed-by: Carlos O'Donell +--- + nptl/pthread_cond_common.c | 5 +++-- + nptl/pthread_cond_wait.c | 39 +++++++++++++++++++------------------- + 2 files changed, 22 insertions(+), 22 deletions(-) + +diff --git a/nptl/pthread_cond_common.c b/nptl/pthread_cond_common.c +index d54f69ee16..c1eeec6a93 100644 +--- a/nptl/pthread_cond_common.c ++++ b/nptl/pthread_cond_common.c +@@ -221,8 +221,9 @@ __condvar_quiesce_and_switch_g1 (pthread_cond_t *cond, uint64_t wseq, + * New waiters arriving concurrently with the group switching will all go + into G2 until we atomically make the switch. Waiters existing in G2 + are not affected. +- * Waiters in G1 will be closed out immediately by the advancing of +- __g_signals to the next "lowseq" (low 31 bits of the new g1_start), ++ * Waiters in G1 have already received a signal and been woken. If they ++ haven't woken yet, they will be closed out immediately by the advancing ++ of __g_signals to the next "lowseq" (low 31 bits of the new g1_start), + which will prevent waiters from blocking using a futex on + __g_signals since it provides enough signals for all possible + remaining waiters. As a result, they can each consume a signal +diff --git a/nptl/pthread_cond_wait.c b/nptl/pthread_cond_wait.c +index 235813f685..a01904c965 100644 +--- a/nptl/pthread_cond_wait.c ++++ b/nptl/pthread_cond_wait.c +@@ -249,7 +249,7 @@ __condvar_cleanup_waiting (void *arg) + figure out whether they are in a group that has already been completely + signaled (i.e., if the current G1 starts at a later position that the + waiter's position). Waiters cannot determine whether they are currently +- in G2 or G1 -- but they do not have too because all they are interested in ++ in G2 or G1 -- but they do not have to because all they are interested in + is whether there are available signals, and they always start in G2 (whose + group slot they know because of the bit in the waiter sequence. Signalers + will simply fill the right group until it is completely signaled and can +@@ -412,7 +412,7 @@ __pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex, + } + + /* Now wait until a signal is available in our group or it is closed. +- Acquire MO so that if we observe a value of zero written after group ++ Acquire MO so that if we observe (signals == lowseq) after group + switching in __condvar_quiesce_and_switch_g1, we synchronize with that + store and will see the prior update of __g1_start done while switching + groups too. */ +@@ -422,8 +422,8 @@ __pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex, + { + while (1) + { +- uint64_t g1_start = __condvar_load_g1_start_relaxed (cond); +- unsigned int lowseq = (g1_start & 1) == g ? signals : g1_start & ~1U; ++ uint64_t g1_start = __condvar_load_g1_start_relaxed (cond); ++ unsigned int lowseq = (g1_start & 1) == g ? signals : g1_start & ~1U; + + /* Spin-wait first. + Note that spinning first without checking whether a timeout +@@ -447,21 +447,21 @@ __pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex, + + /* Reload signals. See above for MO. */ + signals = atomic_load_acquire (cond->__data.__g_signals + g); +- g1_start = __condvar_load_g1_start_relaxed (cond); +- lowseq = (g1_start & 1) == g ? signals : g1_start & ~1U; ++ g1_start = __condvar_load_g1_start_relaxed (cond); ++ lowseq = (g1_start & 1) == g ? signals : g1_start & ~1U; + spin--; + } + +- if (seq < (g1_start >> 1)) ++ if (seq < (g1_start >> 1)) + { +- /* If the group is closed already, ++ /* If the group is closed already, + then this waiter originally had enough extra signals to + consume, up until the time its group was closed. */ + goto done; +- } ++ } + + /* If there is an available signal, don't block. +- If __g1_start has advanced at all, then we must be in G1 ++ If __g1_start has advanced at all, then we must be in G1 + by now, perhaps in the process of switching back to an older + G2, but in either case we're allowed to consume the available + signal and should not block anymore. */ +@@ -483,22 +483,23 @@ __pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex, + sequence. */ + atomic_fetch_add_acquire (cond->__data.__g_refs + g, 2); + signals = atomic_load_acquire (cond->__data.__g_signals + g); +- g1_start = __condvar_load_g1_start_relaxed (cond); +- lowseq = (g1_start & 1) == g ? signals : g1_start & ~1U; ++ g1_start = __condvar_load_g1_start_relaxed (cond); ++ lowseq = (g1_start & 1) == g ? signals : g1_start & ~1U; + +- if (seq < (g1_start >> 1)) ++ if (seq < (g1_start >> 1)) + { +- /* group is closed already, so don't block */ ++ /* group is closed already, so don't block */ + __condvar_dec_grefs (cond, g, private); + goto done; + } + + if ((int)(signals - lowseq) >= 2) + { +- /* a signal showed up or G1/G2 switched after we grabbed the refcount */ ++ /* a signal showed up or G1/G2 switched after we grabbed the ++ refcount */ + __condvar_dec_grefs (cond, g, private); + break; +- } ++ } + + // Now block. + struct _pthread_cleanup_buffer buffer; +@@ -536,10 +537,8 @@ __pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex, + if (seq < (__condvar_load_g1_start_relaxed (cond) >> 1)) + goto done; + } +- /* Try to grab a signal. Use acquire MO so that we see an up-to-date value +- of __g1_start below (see spinning above for a similar case). In +- particular, if we steal from a more recent group, we will also see a +- more recent __g1_start below. */ ++ /* Try to grab a signal. See above for MO. (if we do another loop ++ iteration we need to see the correct value of g1_start) */ + while (!atomic_compare_exchange_weak_acquire (cond->__data.__g_signals + g, + &signals, signals - 2)); + +-- +2.27.0 + diff --git a/nptl-Use-a-single-loop-in-pthread_cond_wait-instaed-.patch b/nptl-Use-a-single-loop-in-pthread_cond_wait-instaed-.patch new file mode 100644 index 0000000..0c94f4b --- /dev/null +++ b/nptl-Use-a-single-loop-in-pthread_cond_wait-instaed-.patch @@ -0,0 +1,102 @@ +From 6bac834c5a44e6f72e0c7087e8b57b006beef816 Mon Sep 17 00:00:00 2001 +From: Malte Skarupke +Date: Fri, 11 Jul 2025 05:57:46 -0700 +Subject: [PATCH] nptl: Use a single loop in pthread_cond_wait instaed of + a nested loop + +[BZ #25847] + +The loop was a little more complicated than necessary. There was only one +break statement out of the inner loop, and the outer loop was nearly empty. +So just remove the outer loop, moving its code to the one break statement in +the inner loop. This allows us to replace all gotos with break statements. + +(cherry picked from commit 929a4764ac90382616b6a21f099192b2475da674) + +Signed-off-by: Sunil Dora +Tested-by: Carlos O'Donell +Reviewed-by: Carlos O'Donell +--- + nptl/pthread_cond_wait.c | 41 +++++++++++++++++++--------------------- + 1 file changed, 19 insertions(+), 22 deletions(-) + +diff --git a/nptl/pthread_cond_wait.c b/nptl/pthread_cond_wait.c +index 4773859524..b7f1d9c41d 100644 +--- a/nptl/pthread_cond_wait.c ++++ b/nptl/pthread_cond_wait.c +@@ -382,17 +382,15 @@ __pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex, + return err; + } + +- /* Now wait until a signal is available in our group or it is closed. +- Acquire MO so that if we observe (signals == lowseq) after group +- switching in __condvar_quiesce_and_switch_g1, we synchronize with that +- store and will see the prior update of __g1_start done while switching +- groups too. */ +- unsigned int signals = atomic_load_acquire (cond->__data.__g_signals + g); +- +- do +- { ++ + while (1) + { ++ /* Now wait until a signal is available in our group or it is closed. ++ Acquire MO so that if we observe (signals == lowseq) after group ++ switching in __condvar_quiesce_and_switch_g1, we synchronize with that ++ store and will see the prior update of __g1_start done while switching ++ groups too. */ ++ unsigned int signals = atomic_load_acquire (cond->__data.__g_signals + g); + uint64_t g1_start = __condvar_load_g1_start_relaxed (cond); + unsigned int lowseq = (g1_start & 1) == g ? signals : g1_start & ~1U; + +@@ -401,7 +399,7 @@ __pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex, + /* If the group is closed already, + then this waiter originally had enough extra signals to + consume, up until the time its group was closed. */ +- goto done; ++ break; + } + + /* If there is an available signal, don't block. +@@ -410,7 +408,16 @@ __pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex, + G2, but in either case we're allowed to consume the available + signal and should not block anymore. */ + if ((int)(signals - lowseq) >= 2) +- break; ++ { ++ /* Try to grab a signal. See above for MO. (if we do another loop ++ iteration we need to see the correct value of g1_start) */ ++ if (atomic_compare_exchange_weak_acquire ( ++ cond->__data.__g_signals + g, ++ &signals, signals - 2)) ++ break; ++ else ++ continue; ++ } + + // Now block. + struct _pthread_cleanup_buffer buffer; +@@ -431,19 +438,9 @@ __pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex, + /* If we timed out, we effectively cancel waiting. */ + __condvar_cancel_waiting (cond, seq, g, private); + result = err; +- goto done; ++ break; + } +- +- /* Reload signals. See above for MO. */ +- signals = atomic_load_acquire (cond->__data.__g_signals + g); + } +- } +- /* Try to grab a signal. See above for MO. (if we do another loop +- iteration we need to see the correct value of g1_start) */ +- while (!atomic_compare_exchange_weak_acquire (cond->__data.__g_signals + g, +- &signals, signals - 2)); +- +- done: + + /* Confirm that we have been woken. We do that before acquiring the mutex + to allow for execution of pthread_cond_destroy while having acquired the +-- +2.27.0 + diff --git a/nptl-Use-all-of-g1_start-and-g_signals.patch b/nptl-Use-all-of-g1_start-and-g_signals.patch new file mode 100644 index 0000000..c0a1b27 --- /dev/null +++ b/nptl-Use-all-of-g1_start-and-g_signals.patch @@ -0,0 +1,192 @@ +From 39a80f40359e2b7b778c2c42cabf3d16765a101b Mon Sep 17 00:00:00 2001 +From: Malte Skarupke +Date: Fri, 11 Jul 2025 05:57:49 -0700 +Subject: [PATCH] nptl: Use all of g1_start and g_signals + +[BZ #25847] + +The LSB of g_signals was unused. The LSB of g1_start was used to indicate +which group is G2. This was used to always go to sleep in pthread_cond_wait +if a waiter is in G2. A comment earlier in the file says that this is not +correct to do: + + "Waiters cannot determine whether they are currently in G2 or G1 -- but they + do not have to because all they are interested in is whether there are + available signals" + +I either would have had to update the comment, or get rid of the check. I +chose to get rid of the check. In fact I don't quite know why it was there. +There will never be available signals for group G2, so we didn't need the +special case. Even if there were, this would just be a spurious wake. This +might have caught some cases where the count has wrapped around, but it +wouldn't reliably do that, (and even if it did, why would you want to force a +sleep in that case?) and we don't support that many concurrent waiters +anyway. Getting rid of it allows us to use one more bit, making us more +robust to wraparound. + +(cherry picked from commit 91bb902f58264a2fd50fbce8f39a9a290dd23706) + +Signed-off-by: Sunil Dora +Tested-by: Carlos O'Donell +Reviewed-by: Carlos O'Donell +--- + nptl/pthread_cond_broadcast.c | 4 ++-- + nptl/pthread_cond_common.c | 26 ++++++++++---------------- + nptl/pthread_cond_signal.c | 2 +- + nptl/pthread_cond_wait.c | 14 +++++--------- + 4 files changed, 18 insertions(+), 28 deletions(-) + +diff --git a/nptl/pthread_cond_broadcast.c b/nptl/pthread_cond_broadcast.c +index f77198f4b1..f5793e715f 100644 +--- a/nptl/pthread_cond_broadcast.c ++++ b/nptl/pthread_cond_broadcast.c +@@ -57,7 +57,7 @@ ___pthread_cond_broadcast (pthread_cond_t *cond) + { + /* Add as many signals as the remaining size of the group. */ + atomic_fetch_add_relaxed (cond->__data.__g_signals + g1, +- cond->__data.__g_size[g1] << 1); ++ cond->__data.__g_size[g1]); + cond->__data.__g_size[g1] = 0; + + /* We need to wake G1 waiters before we switch G1 below. */ +@@ -73,7 +73,7 @@ ___pthread_cond_broadcast (pthread_cond_t *cond) + { + /* Step (3): Send signals to all waiters in the old G2 / new G1. */ + atomic_fetch_add_relaxed (cond->__data.__g_signals + g1, +- cond->__data.__g_size[g1] << 1); ++ cond->__data.__g_size[g1]); + cond->__data.__g_size[g1] = 0; + /* TODO Only set it if there are indeed futex waiters. */ + do_futex_wake = true; +diff --git a/nptl/pthread_cond_common.c b/nptl/pthread_cond_common.c +index 64531e809f..99640968f1 100644 +--- a/nptl/pthread_cond_common.c ++++ b/nptl/pthread_cond_common.c +@@ -208,9 +208,9 @@ __condvar_switch_g1 (pthread_cond_t *cond, uint64_t wseq, + behavior. + Note that this works correctly for a zero-initialized condvar too. */ + unsigned int old_orig_size = __condvar_get_orig_size (cond); +- uint64_t old_g1_start = __condvar_load_g1_start_relaxed (cond) >> 1; +- if (((unsigned) (wseq - old_g1_start - old_orig_size) +- + cond->__data.__g_size[g1 ^ 1]) == 0) ++ uint64_t old_g1_start = __condvar_load_g1_start_relaxed (cond); ++ uint64_t new_g1_start = old_g1_start + old_orig_size; ++ if (((unsigned) (wseq - new_g1_start) + cond->__data.__g_size[g1 ^ 1]) == 0) + return false; + + /* We have to consider the following kinds of waiters: +@@ -221,16 +221,10 @@ __condvar_switch_g1 (pthread_cond_t *cond, uint64_t wseq, + are not affected. + * Waiters in G1 have already received a signal and been woken. */ + +- /* Update __g1_start, which closes this group. The value we add will never +- be negative because old_orig_size can only be zero when we switch groups +- the first time after a condvar was initialized, in which case G1 will be +- at index 1 and we will add a value of 1. Relaxed MO is fine because the +- change comes with no additional constraints that others would have to +- observe. */ +- __condvar_add_g1_start_relaxed (cond, +- (old_orig_size << 1) + (g1 == 1 ? 1 : - 1)); +- +- unsigned int lowseq = ((old_g1_start + old_orig_size) << 1) & ~1U; ++ /* Update __g1_start, which closes this group. Relaxed MO is fine because ++ the change comes with no additional constraints that others would have ++ to observe. */ ++ __condvar_add_g1_start_relaxed (cond, old_orig_size); + + /* At this point, the old G1 is now a valid new G2 (but not in use yet). + No old waiter can neither grab a signal nor acquire a reference without +@@ -242,13 +236,13 @@ __condvar_switch_g1 (pthread_cond_t *cond, uint64_t wseq, + g1 ^= 1; + *g1index ^= 1; + +- /* Now advance the new G1 g_signals to the new lowseq, giving it ++ /* Now advance the new G1 g_signals to the new g1_start, giving it + an effective signal count of 0 to start. */ +- atomic_store_release (cond->__data.__g_signals + g1, lowseq); ++ atomic_store_release (cond->__data.__g_signals + g1, (unsigned)new_g1_start); + + /* These values are just observed by signalers, and thus protected by the + lock. */ +- unsigned int orig_size = wseq - (old_g1_start + old_orig_size); ++ unsigned int orig_size = wseq - new_g1_start; + __condvar_set_orig_size (cond, orig_size); + /* Use and addition to not loose track of cancellations in what was + previously G2. */ +diff --git a/nptl/pthread_cond_signal.c b/nptl/pthread_cond_signal.c +index 97316b2579..bdc1e82466 100644 +--- a/nptl/pthread_cond_signal.c ++++ b/nptl/pthread_cond_signal.c +@@ -80,7 +80,7 @@ ___pthread_cond_signal (pthread_cond_t *cond) + release-MO store when initializing a group in __condvar_switch_g1 + because we use an atomic read-modify-write and thus extend that + store's release sequence. */ +- atomic_fetch_add_relaxed (cond->__data.__g_signals + g1, 2); ++ atomic_fetch_add_relaxed (cond->__data.__g_signals + g1, 1); + cond->__data.__g_size[g1]--; + /* TODO Only set it if there are indeed futex waiters. */ + do_futex_wake = true; +diff --git a/nptl/pthread_cond_wait.c b/nptl/pthread_cond_wait.c +index 6f5277b9df..d919b3622a 100644 +--- a/nptl/pthread_cond_wait.c ++++ b/nptl/pthread_cond_wait.c +@@ -84,7 +84,7 @@ __condvar_cancel_waiting (pthread_cond_t *cond, uint64_t seq, unsigned int g, + not hold a reference on the group. */ + __condvar_acquire_lock (cond, private); + +- uint64_t g1_start = __condvar_load_g1_start_relaxed (cond) >> 1; ++ uint64_t g1_start = __condvar_load_g1_start_relaxed (cond); + if (g1_start > seq) + { + /* Our group is closed, so someone provided enough signals for it. +@@ -259,7 +259,6 @@ __condvar_cleanup_waiting (void *arg) + * Waiters fetch-add while having acquire the mutex associated with the + condvar. Signalers load it and fetch-xor it concurrently. + __g1_start: Starting position of G1 (inclusive) +- * LSB is index of current G2. + * Modified by signalers while having acquired the condvar-internal lock + and observed concurrently by waiters. + __g1_orig_size: Initial size of G1 +@@ -280,11 +279,9 @@ __condvar_cleanup_waiting (void *arg) + * Reference count used by waiters concurrently with signalers that have + acquired the condvar-internal lock. + __g_signals: The number of signals that can still be consumed, relative to +- the current g1_start. (i.e. bits 31 to 1 of __g_signals are bits +- 31 to 1 of g1_start with the signal count added) ++ the current g1_start. (i.e. g1_start with the signal count added) + * Used as a futex word by waiters. Used concurrently by waiters and + signalers. +- * LSB is currently reserved and 0. + __g_size: Waiters remaining in this group (i.e., which have not been + signaled yet. + * Accessed by signalers and waiters that cancel waiting (both do so only +@@ -391,9 +388,8 @@ __pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex, + too. */ + unsigned int signals = atomic_load_acquire (cond->__data.__g_signals + g); + uint64_t g1_start = __condvar_load_g1_start_relaxed (cond); +- unsigned int lowseq = (g1_start & 1) == g ? signals : g1_start & ~1U; + +- if (seq < (g1_start >> 1)) ++ if (seq < g1_start) + { + /* If the group is closed already, + then this waiter originally had enough extra signals to +@@ -406,13 +402,13 @@ __pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex, + by now, perhaps in the process of switching back to an older + G2, but in either case we're allowed to consume the available + signal and should not block anymore. */ +- if ((int)(signals - lowseq) >= 2) ++ if ((int)(signals - (unsigned int)g1_start) > 0) + { + /* Try to grab a signal. See above for MO. (if we do another loop + iteration we need to see the correct value of g1_start) */ + if (atomic_compare_exchange_weak_acquire ( + cond->__data.__g_signals + g, +- &signals, signals - 2)) ++ &signals, signals - 1)) + break; + else + continue; +-- +2.27.0 + diff --git a/nptl-rename-__condvar_quiesce_and_switch_g1.patch b/nptl-rename-__condvar_quiesce_and_switch_g1.patch new file mode 100644 index 0000000..7cf0f28 --- /dev/null +++ b/nptl-rename-__condvar_quiesce_and_switch_g1.patch @@ -0,0 +1,160 @@ +From 8899e89b29797fca151bc31ac26e2d1cbdffaa21 Mon Sep 17 00:00:00 2001 +From: Malte Skarupke +Date: Fri, 11 Jul 2025 05:57:48 -0700 +Subject: [PATCH] nptl: rename __condvar_quiesce_and_switch_g1 + +[BZ #25847] + +This function no longer waits for threads to leave g1, so rename it to +__condvar_switch_g1 + +(cherry picked from commit 4b79e27a5073c02f6bff9aa8f4791230a0ab1867) + +Signed-off-by: Sunil Dora +Tested-by: Carlos O'Donell +Reviewed-by: Carlos O'Donell +--- + nptl/pthread_cond_broadcast.c | 4 ++-- + nptl/pthread_cond_common.c | 26 ++++++++++++-------------- + nptl/pthread_cond_signal.c | 17 ++++++++--------- + nptl/pthread_cond_wait.c | 9 ++++----- + 4 files changed, 26 insertions(+), 30 deletions(-) + +diff --git a/nptl/pthread_cond_broadcast.c b/nptl/pthread_cond_broadcast.c +index 7409958394..f77198f4b1 100644 +--- a/nptl/pthread_cond_broadcast.c ++++ b/nptl/pthread_cond_broadcast.c +@@ -60,7 +60,7 @@ ___pthread_cond_broadcast (pthread_cond_t *cond) + cond->__data.__g_size[g1] << 1); + cond->__data.__g_size[g1] = 0; + +- /* We need to wake G1 waiters before we quiesce G1 below. */ ++ /* We need to wake G1 waiters before we switch G1 below. */ + /* TODO Only set it if there are indeed futex waiters. We could + also try to move this out of the critical section in cases when + G2 is empty (and we don't need to quiesce). */ +@@ -69,7 +69,7 @@ ___pthread_cond_broadcast (pthread_cond_t *cond) + + /* G1 is complete. Step (2) is next unless there are no waiters in G2, in + which case we can stop. */ +- if (__condvar_quiesce_and_switch_g1 (cond, wseq, &g1, private)) ++ if (__condvar_switch_g1 (cond, wseq, &g1, private)) + { + /* Step (3): Send signals to all waiters in the old G2 / new G1. */ + atomic_fetch_add_relaxed (cond->__data.__g_signals + g1, +diff --git a/nptl/pthread_cond_common.c b/nptl/pthread_cond_common.c +index 1c43432bc5..64531e809f 100644 +--- a/nptl/pthread_cond_common.c ++++ b/nptl/pthread_cond_common.c +@@ -189,16 +189,15 @@ __condvar_get_private (int flags) + return FUTEX_SHARED; + } + +-/* This closes G1 (whose index is in G1INDEX), waits for all futex waiters to +- leave G1, converts G1 into a fresh G2, and then switches group roles so that +- the former G2 becomes the new G1 ending at the current __wseq value when we +- eventually make the switch (WSEQ is just an observation of __wseq by the +- signaler). ++/* This closes G1 (whose index is in G1INDEX), converts G1 into a fresh G2, ++ and then switches group roles so that the former G2 becomes the new G1 ++ ending at the current __wseq value when we eventually make the switch ++ (WSEQ is just an observation of __wseq by the signaler). + If G2 is empty, it will not switch groups because then it would create an + empty G1 which would require switching groups again on the next signal. + Returns false iff groups were not switched because G2 was empty. */ + static bool __attribute__ ((unused)) +-__condvar_quiesce_and_switch_g1 (pthread_cond_t *cond, uint64_t wseq, ++__condvar_switch_g1 (pthread_cond_t *cond, uint64_t wseq, + unsigned int *g1index, int private) + { + unsigned int g1 = *g1index; +@@ -214,8 +213,7 @@ __condvar_quiesce_and_switch_g1 (pthread_cond_t *cond, uint64_t wseq, + + cond->__data.__g_size[g1 ^ 1]) == 0) + return false; + +- /* Now try to close and quiesce G1. We have to consider the following kinds +- of waiters: ++ /* We have to consider the following kinds of waiters: + * Waiters from less recent groups than G1 are not affected because + nothing will change for them apart from __g1_start getting larger. + * New waiters arriving concurrently with the group switching will all go +@@ -223,12 +221,12 @@ __condvar_quiesce_and_switch_g1 (pthread_cond_t *cond, uint64_t wseq, + are not affected. + * Waiters in G1 have already received a signal and been woken. */ + +- /* Update __g1_start, which finishes closing this group. The value we add +- will never be negative because old_orig_size can only be zero when we +- switch groups the first time after a condvar was initialized, in which +- case G1 will be at index 1 and we will add a value of 1. +- Relaxed MO is fine because the change comes with no additional +- constraints that others would have to observe. */ ++ /* Update __g1_start, which closes this group. The value we add will never ++ be negative because old_orig_size can only be zero when we switch groups ++ the first time after a condvar was initialized, in which case G1 will be ++ at index 1 and we will add a value of 1. Relaxed MO is fine because the ++ change comes with no additional constraints that others would have to ++ observe. */ + __condvar_add_g1_start_relaxed (cond, + (old_orig_size << 1) + (g1 == 1 ? 1 : - 1)); + +diff --git a/nptl/pthread_cond_signal.c b/nptl/pthread_cond_signal.c +index 9ac4bd9f75..97316b2579 100644 +--- a/nptl/pthread_cond_signal.c ++++ b/nptl/pthread_cond_signal.c +@@ -69,18 +69,17 @@ ___pthread_cond_signal (pthread_cond_t *cond) + bool do_futex_wake = false; + + /* If G1 is still receiving signals, we put the signal there. If not, we +- check if G2 has waiters, and if so, quiesce and switch G1 to the former +- G2; if this results in a new G1 with waiters (G2 might have cancellations +- already, see __condvar_quiesce_and_switch_g1), we put the signal in the +- new G1. */ ++ check if G2 has waiters, and if so, switch G1 to the former G2; if this ++ results in a new G1 with waiters (G2 might have cancellations already, ++ see __condvar_switch_g1), we put the signal in the new G1. */ + if ((cond->__data.__g_size[g1] != 0) +- || __condvar_quiesce_and_switch_g1 (cond, wseq, &g1, private)) ++ || __condvar_switch_g1 (cond, wseq, &g1, private)) + { + /* Add a signal. Relaxed MO is fine because signaling does not need to +- establish a happens-before relation (see above). We do not mask the +- release-MO store when initializing a group in +- __condvar_quiesce_and_switch_g1 because we use an atomic +- read-modify-write and thus extend that store's release sequence. */ ++ establish a happens-before relation (see above). We do not mask the ++ release-MO store when initializing a group in __condvar_switch_g1 ++ because we use an atomic read-modify-write and thus extend that ++ store's release sequence. */ + atomic_fetch_add_relaxed (cond->__data.__g_signals + g1, 2); + cond->__data.__g_size[g1]--; + /* TODO Only set it if there are indeed futex waiters. */ +diff --git a/nptl/pthread_cond_wait.c b/nptl/pthread_cond_wait.c +index d70012d647..6f5277b9df 100644 +--- a/nptl/pthread_cond_wait.c ++++ b/nptl/pthread_cond_wait.c +@@ -354,8 +354,7 @@ __pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex, + because we do not need to establish any happens-before relation with + signalers (see __pthread_cond_signal); modification order alone + establishes a total order of waiters/signals. We do need acquire MO +- to synchronize with group reinitialization in +- __condvar_quiesce_and_switch_g1. */ ++ to synchronize with group reinitialization in __condvar_switch_g1. */ + uint64_t wseq = __condvar_fetch_add_wseq_acquire (cond, 2); + /* Find our group's index. We always go into what was G2 when we acquired + our position. */ +@@ -387,9 +386,9 @@ __pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex, + { + /* Now wait until a signal is available in our group or it is closed. + Acquire MO so that if we observe (signals == lowseq) after group +- switching in __condvar_quiesce_and_switch_g1, we synchronize with that +- store and will see the prior update of __g1_start done while switching +- groups too. */ ++ switching in __condvar_switch_g1, we synchronize with that store and ++ will see the prior update of __g1_start done while switching groups ++ too. */ + unsigned int signals = atomic_load_acquire (cond->__data.__g_signals + g); + uint64_t g1_start = __condvar_load_g1_start_relaxed (cond); + unsigned int lowseq = (g1_start & 1) == g ? signals : g1_start & ~1U; +-- +2.27.0 + diff --git a/pthreads-NPTL-lost-wakeup-fix-2.patch b/pthreads-NPTL-lost-wakeup-fix-2.patch new file mode 100644 index 0000000..a7de0cc --- /dev/null +++ b/pthreads-NPTL-lost-wakeup-fix-2.patch @@ -0,0 +1,454 @@ +From 1a0d73a625877a1a759975613a4f10bb341c66d4 Mon Sep 17 00:00:00 2001 +From: Frank Barrus +Date: Fri, 11 Jul 2025 05:57:41 -0700 +Subject: [PATCH] pthreads NPTL: lost wakeup fix 2 + +[BZ #25847] + +This fixes the lost wakeup (from a bug in signal stealing) with a change +in the usage of g_signals[] in the condition variable internal state. +It also completely eliminates the concept and handling of signal stealing, +as well as the need for signalers to block to wait for waiters to wake +up every time there is a G1/G2 switch. This greatly reduces the average +and maximum latency for pthread_cond_signal. + +The g_signals[] field now contains a signal count that is relative to +the current g1_start value. Since it is a 32-bit field, and the LSB is +still reserved (though not currently used anymore), it has a 31-bit value +that corresponds to the low 31 bits of the sequence number in g1_start. +(since g1_start also has an LSB flag, this means bits 31:1 in g_signals +correspond to bits 31:1 in g1_start, plus the current signal count) + +By making the signal count relative to g1_start, there is no longer +any ambiguity or A/B/A issue, and thus any checks before blocking, +including the futex call itself, are guaranteed not to block if the G1/G2 +switch occurs, even if the signal count remains the same. This allows +initially safely blocking in G2 until the switch to G1 occurs, and +then transitioning from G1 to a new G1 or G2, and always being able to +distinguish the state change. This removes the race condition and A/B/A +problems that otherwise ocurred if a late (pre-empted) waiter were to +resume just as the futex call attempted to block on g_signal since +otherwise there was no last opportunity to re-check things like whether +the current G1 group was already closed. + +By fixing these issues, the signal stealing code can be eliminated, +since there is no concept of signal stealing anymore. The code to block +for all waiters to exit g_refs can also be removed, since any waiters +that are still in the g_refs region can be guaranteed to safely wake +up and exit. If there are still any left at this time, they are all +sent one final futex wakeup to ensure that they are not blocked any +longer, but there is no need for the signaller to block and wait for +them to wake up and exit the g_refs region. + +The signal count is then effectively "zeroed" but since it is now +relative to g1_start, this is done by advancing it to a new value that +can be observed by any pending blocking waiters. Any late waiters can +always tell the difference, and can thus just cleanly exit if they are +in a stale G1 or G2. They can never steal a signal from the current +G1 if they are not in the current G1, since the signal value that has +to match in the cmpxchg has the low 31 bits of the g1_start value +contained in it, and that's first checked, and then it won't match if +there's a G1/G2 change. + +Note: the 31-bit sequence number used in g_signals is designed to +handle wrap-around when checking the signal count, but if the entire +31-bit wraparound (2 billion signals) occurs while there is still a +late waiter that has not yet resumed, and it happens to then match +the current g1_start low bits, and the pre-emption occurs after the +normal "closed group" checks (which are 64-bit) but then hits the +futex syscall and signal consuming code, then an A/B/A issue could +still result and cause an incorrect assumption about whether it +should block. This particular scenario seems unlikely in practice. +Note that once awake from the futex, the waiter would notice the +closed group before consuming the signal (since that's still a 64-bit +check that would not be aliased in the wrap-around in g_signals), +so the biggest impact would be blocking on the futex until the next +full wakeup from a G1/G2 switch. + +(cherry picked from commit 1db84775f831a1494993ce9c118deaf9537cc50a) + +Signed-off-by: Sunil Dora +Tested-by: Carlos O'Donell +Reviewed-by: Carlos O'Donell +--- + nptl/pthread_cond_common.c | 105 +++++++++------------------ + nptl/pthread_cond_wait.c | 144 ++++++++++++------------------------- + 2 files changed, 81 insertions(+), 168 deletions(-) + +diff --git a/nptl/pthread_cond_common.c b/nptl/pthread_cond_common.c +index 7440bb18e6..d54f69ee16 100644 +--- a/nptl/pthread_cond_common.c ++++ b/nptl/pthread_cond_common.c +@@ -201,7 +201,6 @@ static bool __attribute__ ((unused)) + __condvar_quiesce_and_switch_g1 (pthread_cond_t *cond, uint64_t wseq, + unsigned int *g1index, int private) + { +- const unsigned int maxspin = 0; + unsigned int g1 = *g1index; + + /* If there is no waiter in G2, we don't do anything. The expression may +@@ -222,84 +221,46 @@ __condvar_quiesce_and_switch_g1 (pthread_cond_t *cond, uint64_t wseq, + * New waiters arriving concurrently with the group switching will all go + into G2 until we atomically make the switch. Waiters existing in G2 + are not affected. +- * Waiters in G1 will be closed out immediately by setting a flag in +- __g_signals, which will prevent waiters from blocking using a futex on +- __g_signals and also notifies them that the group is closed. As a +- result, they will eventually remove their group reference, allowing us +- to close switch group roles. */ +- +- /* First, set the closed flag on __g_signals. This tells waiters that are +- about to wait that they shouldn't do that anymore. This basically +- serves as an advance notification of the upcoming change to __g1_start; +- waiters interpret it as if __g1_start was larger than their waiter +- sequence position. This allows us to change __g1_start after waiting +- for all existing waiters with group references to leave, which in turn +- makes recovery after stealing a signal simpler because it then can be +- skipped if __g1_start indicates that the group is closed (otherwise, +- we would have to recover always because waiters don't know how big their +- groups are). Relaxed MO is fine. */ +- atomic_fetch_or_relaxed (cond->__data.__g_signals + g1, 1); +- +- /* Wait until there are no group references anymore. The fetch-or operation +- injects us into the modification order of __g_refs; release MO ensures +- that waiters incrementing __g_refs after our fetch-or see the previous +- changes to __g_signals and to __g1_start that had to happen before we can +- switch this G1 and alias with an older group (we have two groups, so +- aliasing requires switching group roles twice). Note that nobody else +- can have set the wake-request flag, so we do not have to act upon it. +- +- Also note that it is harmless if older waiters or waiters from this G1 +- get a group reference after we have quiesced the group because it will +- remain closed for them either because of the closed flag in __g_signals +- or the later update to __g1_start. New waiters will never arrive here +- but instead continue to go into the still current G2. */ +- unsigned r = atomic_fetch_or_release (cond->__data.__g_refs + g1, 0); +- while ((r >> 1) > 0) +- { +- for (unsigned int spin = maxspin; ((r >> 1) > 0) && (spin > 0); spin--) +- { +- /* TODO Back off. */ +- r = atomic_load_relaxed (cond->__data.__g_refs + g1); +- } +- if ((r >> 1) > 0) +- { +- /* There is still a waiter after spinning. Set the wake-request +- flag and block. Relaxed MO is fine because this is just about +- this futex word. +- +- Update r to include the set wake-request flag so that the upcoming +- futex_wait only blocks if the flag is still set (otherwise, we'd +- violate the basic client-side futex protocol). */ +- r = atomic_fetch_or_relaxed (cond->__data.__g_refs + g1, 1) | 1; +- +- if ((r >> 1) > 0) +- futex_wait_simple (cond->__data.__g_refs + g1, r, private); +- /* Reload here so we eventually see the most recent value even if we +- do not spin. */ +- r = atomic_load_relaxed (cond->__data.__g_refs + g1); +- } +- } +- /* Acquire MO so that we synchronize with the release operation that waiters +- use to decrement __g_refs and thus happen after the waiters we waited +- for. */ +- atomic_thread_fence_acquire (); ++ * Waiters in G1 will be closed out immediately by the advancing of ++ __g_signals to the next "lowseq" (low 31 bits of the new g1_start), ++ which will prevent waiters from blocking using a futex on ++ __g_signals since it provides enough signals for all possible ++ remaining waiters. As a result, they can each consume a signal ++ and they will eventually remove their group reference. */ + + /* Update __g1_start, which finishes closing this group. The value we add + will never be negative because old_orig_size can only be zero when we + switch groups the first time after a condvar was initialized, in which +- case G1 will be at index 1 and we will add a value of 1. See above for +- why this takes place after waiting for quiescence of the group. ++ case G1 will be at index 1 and we will add a value of 1. + Relaxed MO is fine because the change comes with no additional + constraints that others would have to observe. */ + __condvar_add_g1_start_relaxed (cond, + (old_orig_size << 1) + (g1 == 1 ? 1 : - 1)); + +- /* Now reopen the group, thus enabling waiters to again block using the +- futex controlled by __g_signals. Release MO so that observers that see +- no signals (and thus can block) also see the write __g1_start and thus +- that this is now a new group (see __pthread_cond_wait_common for the +- matching acquire MO loads). */ +- atomic_store_release (cond->__data.__g_signals + g1, 0); ++ unsigned int lowseq = ((old_g1_start + old_orig_size) << 1) & ~1U; ++ ++ /* If any waiters still hold group references (and thus could be blocked), ++ then wake them all up now and prevent any running ones from blocking. ++ This is effectively a catch-all for any possible current or future ++ bugs that can allow the group size to reach 0 before all G1 waiters ++ have been awakened or at least given signals to consume, or any ++ other case that can leave blocked (or about to block) older waiters.. */ ++ if ((atomic_fetch_or_release (cond->__data.__g_refs + g1, 0) >> 1) > 0) ++ { ++ /* First advance signals to the end of the group (i.e. enough signals ++ for the entire G1 group) to ensure that waiters which have not ++ yet blocked in the futex will not block. ++ Note that in the vast majority of cases, this should never ++ actually be necessary, since __g_signals will have enough ++ signals for the remaining g_refs waiters. As an optimization, ++ we could check this first before proceeding, although that ++ could still leave the potential for futex lost wakeup bugs ++ if the signal count was non-zero but the futex wakeup ++ was somehow lost. */ ++ atomic_store_release (cond->__data.__g_signals + g1, lowseq); ++ ++ futex_wake (cond->__data.__g_signals + g1, INT_MAX, private); ++ } + + /* At this point, the old G1 is now a valid new G2 (but not in use yet). + No old waiter can neither grab a signal nor acquire a reference without +@@ -311,6 +272,10 @@ __condvar_quiesce_and_switch_g1 (pthread_cond_t *cond, uint64_t wseq, + g1 ^= 1; + *g1index ^= 1; + ++ /* Now advance the new G1 g_signals to the new lowseq, giving it ++ an effective signal count of 0 to start. */ ++ atomic_store_release (cond->__data.__g_signals + g1, lowseq); ++ + /* These values are just observed by signalers, and thus protected by the + lock. */ + unsigned int orig_size = wseq - (old_g1_start + old_orig_size); +diff --git a/nptl/pthread_cond_wait.c b/nptl/pthread_cond_wait.c +index 806c432d13..235813f685 100644 +--- a/nptl/pthread_cond_wait.c ++++ b/nptl/pthread_cond_wait.c +@@ -238,9 +238,7 @@ __condvar_cleanup_waiting (void *arg) + signaled), and a reference count. + + The group reference count is used to maintain the number of waiters that +- are using the group's futex. Before a group can change its role, the +- reference count must show that no waiters are using the futex anymore; this +- prevents ABA issues on the futex word. ++ are using the group's futex. + + To represent which intervals in the waiter sequence the groups cover (and + thus also which group slot contains G1 or G2), we use a 64b counter to +@@ -300,11 +298,12 @@ __condvar_cleanup_waiting (void *arg) + last reference. + * Reference count used by waiters concurrently with signalers that have + acquired the condvar-internal lock. +- __g_signals: The number of signals that can still be consumed. ++ __g_signals: The number of signals that can still be consumed, relative to ++ the current g1_start. (i.e. bits 31 to 1 of __g_signals are bits ++ 31 to 1 of g1_start with the signal count added) + * Used as a futex word by waiters. Used concurrently by waiters and + signalers. +- * LSB is true iff this group has been completely signaled (i.e., it is +- closed). ++ * LSB is currently reserved and 0. + __g_size: Waiters remaining in this group (i.e., which have not been + signaled yet. + * Accessed by signalers and waiters that cancel waiting (both do so only +@@ -328,18 +327,6 @@ __condvar_cleanup_waiting (void *arg) + sufficient because if a waiter can see a sufficiently large value, it could + have also consume a signal in the waiters group. + +- Waiters try to grab a signal from __g_signals without holding a reference +- count, which can lead to stealing a signal from a more recent group after +- their own group was already closed. They cannot always detect whether they +- in fact did because they do not know when they stole, but they can +- conservatively add a signal back to the group they stole from; if they +- did so unnecessarily, all that happens is a spurious wake-up. To make this +- even less likely, __g1_start contains the index of the current g2 too, +- which allows waiters to check if there aliasing on the group slots; if +- there wasn't, they didn't steal from the current G1, which means that the +- G1 they stole from must have been already closed and they do not need to +- fix anything. +- + It is essential that the last field in pthread_cond_t is __g_signals[1]: + The previous condvar used a pointer-sized field in pthread_cond_t, so a + PTHREAD_COND_INITIALIZER from that condvar implementation might only +@@ -435,6 +422,9 @@ __pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex, + { + while (1) + { ++ uint64_t g1_start = __condvar_load_g1_start_relaxed (cond); ++ unsigned int lowseq = (g1_start & 1) == g ? signals : g1_start & ~1U; ++ + /* Spin-wait first. + Note that spinning first without checking whether a timeout + passed might lead to what looks like a spurious wake-up even +@@ -446,35 +436,45 @@ __pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex, + having to compare against the current time seems to be the right + choice from a performance perspective for most use cases. */ + unsigned int spin = maxspin; +- while (signals == 0 && spin > 0) ++ while (spin > 0 && ((int)(signals - lowseq) < 2)) + { + /* Check that we are not spinning on a group that's already + closed. */ +- if (seq < (__condvar_load_g1_start_relaxed (cond) >> 1)) +- goto done; ++ if (seq < (g1_start >> 1)) ++ break; + + /* TODO Back off. */ + + /* Reload signals. See above for MO. */ + signals = atomic_load_acquire (cond->__data.__g_signals + g); ++ g1_start = __condvar_load_g1_start_relaxed (cond); ++ lowseq = (g1_start & 1) == g ? signals : g1_start & ~1U; + spin--; + } + +- /* If our group will be closed as indicated by the flag on signals, +- don't bother grabbing a signal. */ +- if (signals & 1) +- goto done; +- +- /* If there is an available signal, don't block. */ +- if (signals != 0) ++ if (seq < (g1_start >> 1)) ++ { ++ /* If the group is closed already, ++ then this waiter originally had enough extra signals to ++ consume, up until the time its group was closed. */ ++ goto done; ++ } ++ ++ /* If there is an available signal, don't block. ++ If __g1_start has advanced at all, then we must be in G1 ++ by now, perhaps in the process of switching back to an older ++ G2, but in either case we're allowed to consume the available ++ signal and should not block anymore. */ ++ if ((int)(signals - lowseq) >= 2) + break; + + /* No signals available after spinning, so prepare to block. + We first acquire a group reference and use acquire MO for that so + that we synchronize with the dummy read-modify-write in + __condvar_quiesce_and_switch_g1 if we read from that. In turn, +- in this case this will make us see the closed flag on __g_signals +- that designates a concurrent attempt to reuse the group's slot. ++ in this case this will make us see the advancement of __g_signals ++ to the upcoming new g1_start that occurs with a concurrent ++ attempt to reuse the group's slot. + We use acquire MO for the __g_signals check to make the + __g1_start check work (see spinning above). + Note that the group reference acquisition will not mask the +@@ -482,15 +482,24 @@ __pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex, + an atomic read-modify-write operation and thus extend the release + sequence. */ + atomic_fetch_add_acquire (cond->__data.__g_refs + g, 2); +- if (((atomic_load_acquire (cond->__data.__g_signals + g) & 1) != 0) +- || (seq < (__condvar_load_g1_start_relaxed (cond) >> 1))) ++ signals = atomic_load_acquire (cond->__data.__g_signals + g); ++ g1_start = __condvar_load_g1_start_relaxed (cond); ++ lowseq = (g1_start & 1) == g ? signals : g1_start & ~1U; ++ ++ if (seq < (g1_start >> 1)) + { +- /* Our group is closed. Wake up any signalers that might be +- waiting. */ ++ /* group is closed already, so don't block */ + __condvar_dec_grefs (cond, g, private); + goto done; + } + ++ if ((int)(signals - lowseq) >= 2) ++ { ++ /* a signal showed up or G1/G2 switched after we grabbed the refcount */ ++ __condvar_dec_grefs (cond, g, private); ++ break; ++ } ++ + // Now block. + struct _pthread_cleanup_buffer buffer; + struct _condvar_cleanup_buffer cbuffer; +@@ -501,7 +510,7 @@ __pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex, + __pthread_cleanup_push (&buffer, __condvar_cleanup_waiting, &cbuffer); + + err = __futex_abstimed_wait_cancelable64 ( +- cond->__data.__g_signals + g, 0, clockid, abstime, private); ++ cond->__data.__g_signals + g, signals, clockid, abstime, private); + + __pthread_cleanup_pop (&buffer, 0); + +@@ -524,6 +533,8 @@ __pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex, + signals = atomic_load_acquire (cond->__data.__g_signals + g); + } + ++ if (seq < (__condvar_load_g1_start_relaxed (cond) >> 1)) ++ goto done; + } + /* Try to grab a signal. Use acquire MO so that we see an up-to-date value + of __g1_start below (see spinning above for a similar case). In +@@ -532,69 +543,6 @@ __pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex, + while (!atomic_compare_exchange_weak_acquire (cond->__data.__g_signals + g, + &signals, signals - 2)); + +- /* We consumed a signal but we could have consumed from a more recent group +- that aliased with ours due to being in the same group slot. If this +- might be the case our group must be closed as visible through +- __g1_start. */ +- uint64_t g1_start = __condvar_load_g1_start_relaxed (cond); +- if (seq < (g1_start >> 1)) +- { +- /* We potentially stole a signal from a more recent group but we do not +- know which group we really consumed from. +- We do not care about groups older than current G1 because they are +- closed; we could have stolen from these, but then we just add a +- spurious wake-up for the current groups. +- We will never steal a signal from current G2 that was really intended +- for G2 because G2 never receives signals (until it becomes G1). We +- could have stolen a signal from G2 that was conservatively added by a +- previous waiter that also thought it stole a signal -- but given that +- that signal was added unnecessarily, it's not a problem if we steal +- it. +- Thus, the remaining case is that we could have stolen from the current +- G1, where "current" means the __g1_start value we observed. However, +- if the current G1 does not have the same slot index as we do, we did +- not steal from it and do not need to undo that. This is the reason +- for putting a bit with G2's index into__g1_start as well. */ +- if (((g1_start & 1) ^ 1) == g) +- { +- /* We have to conservatively undo our potential mistake of stealing +- a signal. We can stop trying to do that when the current G1 +- changes because other spinning waiters will notice this too and +- __condvar_quiesce_and_switch_g1 has checked that there are no +- futex waiters anymore before switching G1. +- Relaxed MO is fine for the __g1_start load because we need to +- merely be able to observe this fact and not have to observe +- something else as well. +- ??? Would it help to spin for a little while to see whether the +- current G1 gets closed? This might be worthwhile if the group is +- small or close to being closed. */ +- unsigned int s = atomic_load_relaxed (cond->__data.__g_signals + g); +- while (__condvar_load_g1_start_relaxed (cond) == g1_start) +- { +- /* Try to add a signal. We don't need to acquire the lock +- because at worst we can cause a spurious wake-up. If the +- group is in the process of being closed (LSB is true), this +- has an effect similar to us adding a signal. */ +- if (((s & 1) != 0) +- || atomic_compare_exchange_weak_relaxed +- (cond->__data.__g_signals + g, &s, s + 2)) +- { +- /* If we added a signal, we also need to add a wake-up on +- the futex. We also need to do that if we skipped adding +- a signal because the group is being closed because +- while __condvar_quiesce_and_switch_g1 could have closed +- the group, it might still be waiting for futex waiters to +- leave (and one of those waiters might be the one we stole +- the signal from, which cause it to block using the +- futex). */ +- futex_wake (cond->__data.__g_signals + g, 1, private); +- break; +- } +- /* TODO Back off. */ +- } +- } +- } +- + done: + + /* Confirm that we have been woken. We do that before acquiring the mutex +-- +2.27.0 + -- Gitee